]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - libxfs/xfs_alloc.c
xfs: remove the xfs_btree_get_buf[ls] functions
[thirdparty/xfsprogs-dev.git] / libxfs / xfs_alloc.c
CommitLineData
37b3b4d6 1// SPDX-License-Identifier: GPL-2.0
2bd0ea18 2/*
5e656dbb 3 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
da23017d 4 * All Rights Reserved.
2bd0ea18 5 */
9c799827 6#include "libxfs_priv.h"
b626fb59
DC
7#include "xfs_fs.h"
8#include "xfs_format.h"
9#include "xfs_log_format.h"
10#include "xfs_shared.h"
11#include "xfs_trans_resv.h"
12#include "xfs_bit.h"
13#include "xfs_sb.h"
14#include "xfs_mount.h"
f944d3d0 15#include "xfs_defer.h"
b626fb59 16#include "xfs_btree.h"
631ac87a 17#include "xfs_rmap.h"
b626fb59
DC
18#include "xfs_alloc_btree.h"
19#include "xfs_alloc.h"
2cf10e4c 20#include "xfs_errortag.h"
b626fb59
DC
21#include "xfs_trace.h"
22#include "xfs_trans.h"
cf8ce220 23#include "xfs_ag_resv.h"
d5c1b462
BF
24#include "xfs_bmap.h"
25
26extern kmem_zone_t *xfs_bmap_free_item_zone;
2bd0ea18 27
ff105f75
DC
28struct workqueue_struct *xfs_alloc_wq;
29
2bd0ea18 30#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
5e656dbb
BN
31
32#define XFSA_FIXUP_BNO_OK 1
33#define XFSA_FIXUP_CNT_OK 2
34
5e656dbb
BN
35STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
36STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
37STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
2bd0ea18 38
b8165508
DC
39/*
40 * Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in
41 * the beginning of the block for a proper header with the location information
42 * and CRC.
43 */
44unsigned int
45xfs_agfl_size(
46 struct xfs_mount *mp)
47{
48 unsigned int size = mp->m_sb.sb_sectsize;
49
50 if (xfs_sb_version_hascrc(&mp->m_sb))
51 size -= sizeof(struct xfs_agfl);
52
53 return size / sizeof(xfs_agblock_t);
54}
55
2a96beb9
DW
56unsigned int
57xfs_refc_block(
58 struct xfs_mount *mp)
59{
60 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
61 return XFS_RMAP_BLOCK(mp) + 1;
62 if (xfs_sb_version_hasfinobt(&mp->m_sb))
63 return XFS_FIBT_BLOCK(mp) + 1;
64 return XFS_IBT_BLOCK(mp) + 1;
65}
66
ef5340cd
DW
67xfs_extlen_t
68xfs_prealloc_blocks(
69 struct xfs_mount *mp)
70{
2a96beb9
DW
71 if (xfs_sb_version_hasreflink(&mp->m_sb))
72 return xfs_refc_block(mp) + 1;
ef5340cd
DW
73 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
74 return XFS_RMAP_BLOCK(mp) + 1;
75 if (xfs_sb_version_hasfinobt(&mp->m_sb))
76 return XFS_FIBT_BLOCK(mp) + 1;
77 return XFS_IBT_BLOCK(mp) + 1;
78}
79
b8a8d6e5
DW
80/*
81 * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
82 * AGF buffer (PV 947395), we place constraints on the relationship among
83 * actual allocations for data blocks, freelist blocks, and potential file data
84 * bmap btree blocks. However, these restrictions may result in no actual space
85 * allocated for a delayed extent, for example, a data block in a certain AG is
86 * allocated but there is no additional block for the additional bmap btree
87 * block due to a split of the bmap btree of the file. The result of this may
88 * lead to an infinite loop when the file gets flushed to disk and all delayed
89 * extents need to be actually allocated. To get around this, we explicitly set
90 * aside a few blocks which will not be reserved in delayed allocation.
91 *
cf8ce220
DW
92 * We need to reserve 4 fsbs _per AG_ for the freelist and 4 more to handle a
93 * potential split of the file's bmap btree.
b8a8d6e5
DW
94 */
95unsigned int
96xfs_alloc_set_aside(
97 struct xfs_mount *mp)
98{
8eeb15ea 99 return mp->m_sb.sb_agcount * (XFS_ALLOC_AGFL_RESERVE + 4);
b8a8d6e5
DW
100}
101
102/*
103 * When deciding how much space to allocate out of an AG, we limit the
104 * allocation maximum size to the size the AG. However, we cannot use all the
105 * blocks in the AG - some are permanently used by metadata. These
106 * blocks are generally:
107 * - the AG superblock, AGF, AGI and AGFL
108 * - the AGF (bno and cnt) and AGI btree root blocks, and optionally
109 * the AGI free inode and rmap btree root blocks.
110 * - blocks on the AGFL according to xfs_alloc_set_aside() limits
111 * - the rmapbt root block
112 *
113 * The AG headers are sector sized, so the amount of space they take up is
114 * dependent on filesystem geometry. The others are all single blocks.
115 */
116unsigned int
117xfs_alloc_ag_max_usable(
118 struct xfs_mount *mp)
119{
120 unsigned int blocks;
121
122 blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
123 blocks += XFS_ALLOC_AGFL_RESERVE;
124 blocks += 3; /* AGF, AGI btree root blocks */
125 if (xfs_sb_version_hasfinobt(&mp->m_sb))
126 blocks++; /* finobt root block */
127 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
128 blocks++; /* rmap root block */
868c70e3
DW
129 if (xfs_sb_version_hasreflink(&mp->m_sb))
130 blocks++; /* refcount root block */
b8a8d6e5
DW
131
132 return mp->m_sb.sb_agblocks - blocks;
133}
134
b194c7d8
BN
135/*
136 * Lookup the record equal to [bno, len] in the btree given by cur.
137 */
138STATIC int /* error */
139xfs_alloc_lookup_eq(
140 struct xfs_btree_cur *cur, /* btree cursor */
141 xfs_agblock_t bno, /* starting block of extent */
142 xfs_extlen_t len, /* length of extent */
143 int *stat) /* success/failure */
144{
7777d8c6
BF
145 int error;
146
b194c7d8
BN
147 cur->bc_rec.a.ar_startblock = bno;
148 cur->bc_rec.a.ar_blockcount = len;
7777d8c6
BF
149 error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
150 cur->bc_private.a.priv.abt.active = (*stat == 1);
151 return error;
b194c7d8
BN
152}
153
154/*
155 * Lookup the first record greater than or equal to [bno, len]
156 * in the btree given by cur.
157 */
a2ceac1f 158int /* error */
b194c7d8
BN
159xfs_alloc_lookup_ge(
160 struct xfs_btree_cur *cur, /* btree cursor */
161 xfs_agblock_t bno, /* starting block of extent */
162 xfs_extlen_t len, /* length of extent */
163 int *stat) /* success/failure */
164{
7777d8c6
BF
165 int error;
166
b194c7d8
BN
167 cur->bc_rec.a.ar_startblock = bno;
168 cur->bc_rec.a.ar_blockcount = len;
7777d8c6
BF
169 error = xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
170 cur->bc_private.a.priv.abt.active = (*stat == 1);
171 return error;
b194c7d8
BN
172}
173
174/*
175 * Lookup the first record less than or equal to [bno, len]
176 * in the btree given by cur.
177 */
1fe41a73 178int /* error */
b194c7d8
BN
179xfs_alloc_lookup_le(
180 struct xfs_btree_cur *cur, /* btree cursor */
181 xfs_agblock_t bno, /* starting block of extent */
182 xfs_extlen_t len, /* length of extent */
183 int *stat) /* success/failure */
184{
7777d8c6 185 int error;
b194c7d8
BN
186 cur->bc_rec.a.ar_startblock = bno;
187 cur->bc_rec.a.ar_blockcount = len;
7777d8c6
BF
188 error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
189 cur->bc_private.a.priv.abt.active = (*stat == 1);
190 return error;
191}
192
193static inline bool
194xfs_alloc_cur_active(
195 struct xfs_btree_cur *cur)
196{
197 return cur && cur->bc_private.a.priv.abt.active;
b194c7d8
BN
198}
199
200/*
201 * Update the record referred to by cur to the value given
202 * by [bno, len].
203 * This either works (return 0) or gets an EFSCORRUPTED error.
204 */
205STATIC int /* error */
206xfs_alloc_update(
207 struct xfs_btree_cur *cur, /* btree cursor */
208 xfs_agblock_t bno, /* starting block of extent */
209 xfs_extlen_t len) /* length of extent */
210{
211 union xfs_btree_rec rec;
212
213 rec.alloc.ar_startblock = cpu_to_be32(bno);
214 rec.alloc.ar_blockcount = cpu_to_be32(len);
215 return xfs_btree_update(cur, &rec);
216}
217
218/*
219 * Get the data from the pointed-to record.
220 */
a2ceac1f 221int /* error */
b194c7d8
BN
222xfs_alloc_get_rec(
223 struct xfs_btree_cur *cur, /* btree cursor */
224 xfs_agblock_t *bno, /* output: starting block of extent */
225 xfs_extlen_t *len, /* output: length of extent */
226 int *stat) /* output: success/failure */
227{
ec291989
DC
228 struct xfs_mount *mp = cur->bc_mp;
229 xfs_agnumber_t agno = cur->bc_private.a.agno;
b194c7d8
BN
230 union xfs_btree_rec *rec;
231 int error;
232
233 error = xfs_btree_get_rec(cur, &rec, stat);
3741f971
DW
234 if (error || !(*stat))
235 return error;
3741f971
DW
236
237 *bno = be32_to_cpu(rec->alloc.ar_startblock);
238 *len = be32_to_cpu(rec->alloc.ar_blockcount);
239
c25e4305
CM
240 if (*len == 0)
241 goto out_bad_rec;
242
ec291989
DC
243 /* check for valid extent range, including overflow */
244 if (!xfs_verify_agbno(mp, agno, *bno))
245 goto out_bad_rec;
246 if (*bno > *bno + *len)
247 goto out_bad_rec;
248 if (!xfs_verify_agbno(mp, agno, *bno + *len - 1))
249 goto out_bad_rec;
250
251 return 0;
252
253out_bad_rec:
254 xfs_warn(mp,
255 "%s Freespace BTree record corruption in AG %d detected!",
256 cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size", agno);
257 xfs_warn(mp,
258 "start block 0x%x block count 0x%x", *bno, *len);
259 return -EFSCORRUPTED;
b194c7d8
BN
260}
261
2bd0ea18
NS
262/*
263 * Compute aligned version of the found extent.
264 * Takes alignment and min length into account.
265 */
cd80de04 266STATIC bool
2bd0ea18 267xfs_alloc_compute_aligned(
a2ceac1f 268 xfs_alloc_arg_t *args, /* allocation argument structure */
2bd0ea18
NS
269 xfs_agblock_t foundbno, /* starting block in found extent */
270 xfs_extlen_t foundlen, /* length in found extent */
2bd0ea18 271 xfs_agblock_t *resbno, /* result block number */
cd80de04
CH
272 xfs_extlen_t *reslen, /* result length */
273 unsigned *busy_gen)
2bd0ea18 274{
cd80de04
CH
275 xfs_agblock_t bno = foundbno;
276 xfs_extlen_t len = foundlen;
ff3263dd 277 xfs_extlen_t diff;
cd80de04 278 bool busy;
2bd0ea18 279
a2ceac1f 280 /* Trim busy sections out of found extent */
cd80de04 281 busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen);
a2ceac1f 282
ff3263dd
BF
283 /*
284 * If we have a largish extent that happens to start before min_agbno,
285 * see if we can shift it into range...
286 */
287 if (bno < args->min_agbno && bno + len > args->min_agbno) {
288 diff = args->min_agbno - bno;
289 if (len > diff) {
290 bno += diff;
291 len -= diff;
292 }
293 }
294
a2ceac1f
DC
295 if (args->alignment > 1 && len >= args->minlen) {
296 xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
ff3263dd
BF
297
298 diff = aligned_bno - bno;
a2ceac1f
DC
299
300 *resbno = aligned_bno;
301 *reslen = diff >= len ? 0 : len - diff;
2bd0ea18 302 } else {
a2ceac1f
DC
303 *resbno = bno;
304 *reslen = len;
2bd0ea18 305 }
cd80de04
CH
306
307 return busy;
2bd0ea18
NS
308}
309
310/*
311 * Compute best start block and diff for "near" allocations.
312 * freelen >= wantlen already checked by caller.
313 */
314STATIC xfs_extlen_t /* difference value (absolute) */
315xfs_alloc_compute_diff(
316 xfs_agblock_t wantbno, /* target starting block */
317 xfs_extlen_t wantlen, /* target length */
318 xfs_extlen_t alignment, /* target alignment */
1fccd5c8 319 int datatype, /* are we allocating data? */
2bd0ea18
NS
320 xfs_agblock_t freebno, /* freespace's starting block */
321 xfs_extlen_t freelen, /* freespace's length */
322 xfs_agblock_t *newbnop) /* result: best start block from free */
323{
324 xfs_agblock_t freeend; /* end of freespace extent */
325 xfs_agblock_t newbno1; /* return block number */
326 xfs_agblock_t newbno2; /* other new block number */
0e266570
NS
327 xfs_extlen_t newlen1=0; /* length with newbno1 */
328 xfs_extlen_t newlen2=0; /* length with newbno2 */
2bd0ea18 329 xfs_agblock_t wantend; /* end of target extent */
a85522b6 330 bool userdata = datatype & XFS_ALLOC_USERDATA;
2bd0ea18
NS
331
332 ASSERT(freelen >= wantlen);
333 freeend = freebno + freelen;
334 wantend = wantbno + wantlen;
84a62eea
DC
335 /*
336 * We want to allocate from the start of a free extent if it is past
337 * the desired block or if we are allocating user data and the free
338 * extent is before desired block. The second case is there to allow
339 * for contiguous allocation from the remaining free space if the file
340 * grows in the short term.
341 */
342 if (freebno >= wantbno || (userdata && freeend < wantend)) {
2bd0ea18
NS
343 if ((newbno1 = roundup(freebno, alignment)) >= freeend)
344 newbno1 = NULLAGBLOCK;
345 } else if (freeend >= wantend && alignment > 1) {
346 newbno1 = roundup(wantbno, alignment);
347 newbno2 = newbno1 - alignment;
348 if (newbno1 >= freeend)
349 newbno1 = NULLAGBLOCK;
350 else
351 newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
352 if (newbno2 < freebno)
353 newbno2 = NULLAGBLOCK;
354 else
355 newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
356 if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
357 if (newlen1 < newlen2 ||
358 (newlen1 == newlen2 &&
359 XFS_ABSDIFF(newbno1, wantbno) >
360 XFS_ABSDIFF(newbno2, wantbno)))
361 newbno1 = newbno2;
362 } else if (newbno2 != NULLAGBLOCK)
363 newbno1 = newbno2;
364 } else if (freeend >= wantend) {
365 newbno1 = wantbno;
366 } else if (alignment > 1) {
367 newbno1 = roundup(freeend - wantlen, alignment);
368 if (newbno1 > freeend - wantlen &&
369 newbno1 - alignment >= freebno)
370 newbno1 -= alignment;
371 else if (newbno1 >= freeend)
372 newbno1 = NULLAGBLOCK;
373 } else
374 newbno1 = freeend - wantlen;
375 *newbnop = newbno1;
376 return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
377}
378
379/*
380 * Fix up the length, based on mod and prod.
381 * len should be k * prod + mod for some k.
382 * If len is too small it is returned unchanged.
383 * If len hits maxlen it is left alone.
384 */
385STATIC void
386xfs_alloc_fix_len(
dfc130f3 387 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18
NS
388{
389 xfs_extlen_t k;
390 xfs_extlen_t rlen;
391
392 ASSERT(args->mod < args->prod);
393 rlen = args->len;
394 ASSERT(rlen >= args->minlen);
395 ASSERT(rlen <= args->maxlen);
396 if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
397 (args->mod == 0 && rlen < args->prod))
398 return;
399 k = rlen % args->prod;
400 if (k == args->mod)
401 return;
ff105f75
DC
402 if (k > args->mod)
403 rlen = rlen - (k - args->mod);
404 else
405 rlen = rlen - args->prod + (args->mod - k);
19ebedcf 406 /* casts to (int) catch length underflows */
ff105f75
DC
407 if ((int)rlen < (int)args->minlen)
408 return;
409 ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
410 ASSERT(rlen % args->prod == args->mod);
2c003dc2
CH
411 ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >=
412 rlen + args->minleft);
2bd0ea18
NS
413 args->len = rlen;
414}
415
2bd0ea18
NS
416/*
417 * Update the two btrees, logically removing from freespace the extent
418 * starting at rbno, rlen blocks. The extent is contained within the
419 * actual (current) free extent fbno for flen blocks.
420 * Flags are passed in indicating whether the cursors are set to the
421 * relevant records.
422 */
423STATIC int /* error code */
424xfs_alloc_fixup_trees(
dfc130f3
RC
425 xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */
426 xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */
2bd0ea18
NS
427 xfs_agblock_t fbno, /* starting block of free extent */
428 xfs_extlen_t flen, /* length of free extent */
429 xfs_agblock_t rbno, /* starting block of returned extent */
430 xfs_extlen_t rlen, /* length of returned extent */
431 int flags) /* flags, XFSA_FIXUP_... */
432{
433 int error; /* error code */
434 int i; /* operation results */
435 xfs_agblock_t nfbno1; /* first new free startblock */
436 xfs_agblock_t nfbno2; /* second new free startblock */
0e266570
NS
437 xfs_extlen_t nflen1=0; /* first new free length */
438 xfs_extlen_t nflen2=0; /* second new free length */
19ebedcf
DC
439 struct xfs_mount *mp;
440
441 mp = cnt_cur->bc_mp;
2bd0ea18
NS
442
443 /*
444 * Look up the record in the by-size tree if necessary.
445 */
446 if (flags & XFSA_FIXUP_CNT_OK) {
447#ifdef DEBUG
0e266570 448 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
2bd0ea18 449 return error;
fbb4fa7f
DW
450 if (XFS_IS_CORRUPT(mp,
451 i != 1 ||
452 nfbno1 != fbno ||
453 nflen1 != flen))
454 return -EFSCORRUPTED;
2bd0ea18
NS
455#endif
456 } else {
0e266570 457 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
2bd0ea18 458 return error;
fbb4fa7f
DW
459 if (XFS_IS_CORRUPT(mp, i != 1))
460 return -EFSCORRUPTED;
2bd0ea18
NS
461 }
462 /*
463 * Look up the record in the by-block tree if necessary.
464 */
465 if (flags & XFSA_FIXUP_BNO_OK) {
466#ifdef DEBUG
0e266570 467 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
2bd0ea18 468 return error;
fbb4fa7f
DW
469 if (XFS_IS_CORRUPT(mp,
470 i != 1 ||
471 nfbno1 != fbno ||
472 nflen1 != flen))
473 return -EFSCORRUPTED;
2bd0ea18
NS
474#endif
475 } else {
0e266570 476 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
2bd0ea18 477 return error;
fbb4fa7f
DW
478 if (XFS_IS_CORRUPT(mp, i != 1))
479 return -EFSCORRUPTED;
2bd0ea18 480 }
b3563c19 481
2bd0ea18 482#ifdef DEBUG
b3563c19
BN
483 if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
484 struct xfs_btree_block *bnoblock;
485 struct xfs_btree_block *cntblock;
486
487 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
488 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
2bd0ea18 489
fbb4fa7f
DW
490 if (XFS_IS_CORRUPT(mp,
491 bnoblock->bb_numrecs !=
492 cntblock->bb_numrecs))
493 return -EFSCORRUPTED;
2bd0ea18
NS
494 }
495#endif
b3563c19 496
2bd0ea18
NS
497 /*
498 * Deal with all four cases: the allocated record is contained
499 * within the freespace record, so we can have new freespace
500 * at either (or both) end, or no freespace remaining.
501 */
502 if (rbno == fbno && rlen == flen)
503 nfbno1 = nfbno2 = NULLAGBLOCK;
504 else if (rbno == fbno) {
505 nfbno1 = rbno + rlen;
506 nflen1 = flen - rlen;
507 nfbno2 = NULLAGBLOCK;
508 } else if (rbno + rlen == fbno + flen) {
509 nfbno1 = fbno;
510 nflen1 = flen - rlen;
511 nfbno2 = NULLAGBLOCK;
512 } else {
513 nfbno1 = fbno;
514 nflen1 = rbno - fbno;
515 nfbno2 = rbno + rlen;
516 nflen2 = (fbno + flen) - nfbno2;
517 }
518 /*
519 * Delete the entry from the by-size btree.
520 */
b194c7d8 521 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18 522 return error;
fbb4fa7f
DW
523 if (XFS_IS_CORRUPT(mp, i != 1))
524 return -EFSCORRUPTED;
2bd0ea18
NS
525 /*
526 * Add new by-size btree entry(s).
527 */
528 if (nfbno1 != NULLAGBLOCK) {
0e266570 529 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
2bd0ea18 530 return error;
fbb4fa7f
DW
531 if (XFS_IS_CORRUPT(mp, i != 0))
532 return -EFSCORRUPTED;
b194c7d8 533 if ((error = xfs_btree_insert(cnt_cur, &i)))
2bd0ea18 534 return error;
fbb4fa7f
DW
535 if (XFS_IS_CORRUPT(mp, i != 1))
536 return -EFSCORRUPTED;
2bd0ea18
NS
537 }
538 if (nfbno2 != NULLAGBLOCK) {
0e266570 539 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
2bd0ea18 540 return error;
fbb4fa7f
DW
541 if (XFS_IS_CORRUPT(mp, i != 0))
542 return -EFSCORRUPTED;
b194c7d8 543 if ((error = xfs_btree_insert(cnt_cur, &i)))
2bd0ea18 544 return error;
fbb4fa7f
DW
545 if (XFS_IS_CORRUPT(mp, i != 1))
546 return -EFSCORRUPTED;
2bd0ea18
NS
547 }
548 /*
549 * Fix up the by-block btree entry(s).
550 */
551 if (nfbno1 == NULLAGBLOCK) {
552 /*
553 * No remaining freespace, just delete the by-block tree entry.
554 */
b194c7d8 555 if ((error = xfs_btree_delete(bno_cur, &i)))
2bd0ea18 556 return error;
fbb4fa7f
DW
557 if (XFS_IS_CORRUPT(mp, i != 1))
558 return -EFSCORRUPTED;
2bd0ea18
NS
559 } else {
560 /*
561 * Update the by-block entry to start later|be shorter.
562 */
0e266570 563 if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
2bd0ea18
NS
564 return error;
565 }
566 if (nfbno2 != NULLAGBLOCK) {
567 /*
568 * 2 resulting free entries, need to add one.
569 */
0e266570 570 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
2bd0ea18 571 return error;
fbb4fa7f
DW
572 if (XFS_IS_CORRUPT(mp, i != 0))
573 return -EFSCORRUPTED;
b194c7d8 574 if ((error = xfs_btree_insert(bno_cur, &i)))
2bd0ea18 575 return error;
fbb4fa7f
DW
576 if (XFS_IS_CORRUPT(mp, i != 1))
577 return -EFSCORRUPTED;
2bd0ea18
NS
578 }
579 return 0;
580}
581
bc01119d 582static xfs_failaddr_t
a2ceac1f
DC
583xfs_agfl_verify(
584 struct xfs_buf *bp)
585{
7861ef77 586 struct xfs_mount *mp = bp->b_mount;
a2ceac1f 587 struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
a2ceac1f
DC
588 int i;
589
95d9582b
DW
590 /*
591 * There is no verification of non-crc AGFLs because mkfs does not
592 * initialise the AGFL to zero or NULL. Hence the only valid part of the
593 * AGFL is what the AGF says is active. We can't get to the AGF, so we
594 * can't verify just those entries are valid.
595 */
596 if (!xfs_sb_version_hascrc(&mp->m_sb))
597 return NULL;
598
68dbe77f 599 if (!xfs_verify_magic(bp, agfl->agfl_magicnum))
bc01119d 600 return __this_address;
68dbe77f 601 if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
bc01119d 602 return __this_address;
dd5b876e
DC
603 /*
604 * during growfs operations, the perag is not fully initialised,
605 * so we can't use it for any useful checking. growfs ensures we can't
606 * use it by using uncached buffers that don't have the perag attached
607 * so we can detect and avoid this problem.
608 */
609 if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
bc01119d 610 return __this_address;
dd5b876e 611
b8165508 612 for (i = 0; i < xfs_agfl_size(mp); i++) {
dd5b876e 613 if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
a2ceac1f 614 be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
bc01119d 615 return __this_address;
a2ceac1f 616 }
a65d8d29 617
bc01119d
DW
618 if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)))
619 return __this_address;
620 return NULL;
dd5b876e
DC
621}
622
623static void
624xfs_agfl_read_verify(
625 struct xfs_buf *bp)
626{
7861ef77 627 struct xfs_mount *mp = bp->b_mount;
1e697959 628 xfs_failaddr_t fa;
dd5b876e
DC
629
630 /*
631 * There is no verification of non-crc AGFLs because mkfs does not
632 * initialise the AGFL to zero or NULL. Hence the only valid part of the
633 * AGFL is what the AGF says is active. We can't get to the AGF, so we
634 * can't verify just those entries are valid.
635 */
636 if (!xfs_sb_version_hascrc(&mp->m_sb))
637 return;
638
45922933 639 if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
1e697959
DW
640 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
641 else {
642 fa = xfs_agfl_verify(bp);
643 if (fa)
644 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
645 }
a2ceac1f
DC
646}
647
648static void
649xfs_agfl_write_verify(
650 struct xfs_buf *bp)
651{
7861ef77 652 struct xfs_mount *mp = bp->b_mount;
37d086ca 653 struct xfs_buf_log_item *bip = bp->b_log_item;
1e697959 654 xfs_failaddr_t fa;
a2ceac1f 655
dd5b876e
DC
656 /* no verification of non-crc AGFLs */
657 if (!xfs_sb_version_hascrc(&mp->m_sb))
658 return;
659
1e697959
DW
660 fa = xfs_agfl_verify(bp);
661 if (fa) {
662 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
dd5b876e
DC
663 return;
664 }
665
666 if (bip)
667 XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
668
43b5aeed 669 xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
a2ceac1f
DC
670}
671
672const struct xfs_buf_ops xfs_agfl_buf_ops = {
a3fac935 673 .name = "xfs_agfl",
68dbe77f 674 .magic = { cpu_to_be32(XFS_AGFL_MAGIC), cpu_to_be32(XFS_AGFL_MAGIC) },
a2ceac1f
DC
675 .verify_read = xfs_agfl_read_verify,
676 .verify_write = xfs_agfl_write_verify,
95d9582b 677 .verify_struct = xfs_agfl_verify,
a2ceac1f
DC
678};
679
2bd0ea18
NS
680/*
681 * Read in the allocation group free block array.
682 */
50bb67d6 683int /* error */
2bd0ea18
NS
684xfs_alloc_read_agfl(
685 xfs_mount_t *mp, /* mount point structure */
686 xfs_trans_t *tp, /* transaction pointer */
687 xfs_agnumber_t agno, /* allocation group number */
688 xfs_buf_t **bpp) /* buffer for the ag free block array */
689{
690 xfs_buf_t *bp; /* return value */
2bd0ea18
NS
691 int error;
692
693 ASSERT(agno != NULLAGNUMBER);
9440d84d
NS
694 error = xfs_trans_read_buf(
695 mp, tp, mp->m_ddev_targp,
696 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
a2ceac1f 697 XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
9440d84d 698 if (error)
2bd0ea18 699 return error;
a2ceac1f 700 xfs_buf_set_ref(bp, XFS_AGFL_REF);
2bd0ea18
NS
701 *bpp = bp;
702 return 0;
703}
704
a2ceac1f
DC
705STATIC int
706xfs_alloc_update_counters(
707 struct xfs_trans *tp,
708 struct xfs_perag *pag,
709 struct xfs_buf *agbp,
710 long len)
711{
712 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
713
714 pag->pagf_freeblks += len;
715 be32_add_cpu(&agf->agf_freeblks, len);
716
717 xfs_trans_agblocks_delta(tp, len);
718 if (unlikely(be32_to_cpu(agf->agf_freeblks) >
a0264b73
DW
719 be32_to_cpu(agf->agf_length))) {
720 xfs_buf_corruption_error(agbp);
12b53197 721 return -EFSCORRUPTED;
a0264b73 722 }
a2ceac1f
DC
723
724 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
725 return 0;
726}
727
2bd0ea18 728/*
2950989c 729 * Block allocation algorithm and data structures.
2bd0ea18 730 */
2950989c
BF
731struct xfs_alloc_cur {
732 struct xfs_btree_cur *cnt; /* btree cursors */
733 struct xfs_btree_cur *bnolt;
734 struct xfs_btree_cur *bnogt;
4d66edb1 735 xfs_extlen_t cur_len;/* current search length */
19fe42e3
BF
736 xfs_agblock_t rec_bno;/* extent startblock */
737 xfs_extlen_t rec_len;/* extent length */
738 xfs_agblock_t bno; /* alloc bno */
739 xfs_extlen_t len; /* alloc len */
740 xfs_extlen_t diff; /* diff from search bno */
e055d59e
BF
741 unsigned int busy_gen;/* busy state */
742 bool busy;
2950989c
BF
743};
744
745/*
746 * Set up cursors, etc. in the extent allocation cursor. This function can be
747 * called multiple times to reset an initialized structure without having to
748 * reallocate cursors.
749 */
750static int
751xfs_alloc_cur_setup(
752 struct xfs_alloc_arg *args,
753 struct xfs_alloc_cur *acur)
754{
755 int error;
756 int i;
757
758 ASSERT(args->alignment == 1 || args->type != XFS_ALLOCTYPE_THIS_BNO);
759
4d66edb1 760 acur->cur_len = args->maxlen;
19fe42e3
BF
761 acur->rec_bno = 0;
762 acur->rec_len = 0;
763 acur->bno = 0;
764 acur->len = 0;
04ea0ac1 765 acur->diff = -1;
e055d59e
BF
766 acur->busy = false;
767 acur->busy_gen = 0;
768
2950989c
BF
769 /*
770 * Perform an initial cntbt lookup to check for availability of maxlen
771 * extents. If this fails, we'll return -ENOSPC to signal the caller to
772 * attempt a small allocation.
773 */
774 if (!acur->cnt)
775 acur->cnt = xfs_allocbt_init_cursor(args->mp, args->tp,
776 args->agbp, args->agno, XFS_BTNUM_CNT);
777 error = xfs_alloc_lookup_ge(acur->cnt, 0, args->maxlen, &i);
778 if (error)
779 return error;
780
781 /*
782 * Allocate the bnobt left and right search cursors.
783 */
784 if (!acur->bnolt)
785 acur->bnolt = xfs_allocbt_init_cursor(args->mp, args->tp,
786 args->agbp, args->agno, XFS_BTNUM_BNO);
787 if (!acur->bnogt)
788 acur->bnogt = xfs_allocbt_init_cursor(args->mp, args->tp,
789 args->agbp, args->agno, XFS_BTNUM_BNO);
790 return i == 1 ? 0 : -ENOSPC;
791}
792
793static void
794xfs_alloc_cur_close(
795 struct xfs_alloc_cur *acur,
796 bool error)
797{
798 int cur_error = XFS_BTREE_NOERROR;
799
800 if (error)
801 cur_error = XFS_BTREE_ERROR;
802
803 if (acur->cnt)
804 xfs_btree_del_cursor(acur->cnt, cur_error);
805 if (acur->bnolt)
806 xfs_btree_del_cursor(acur->bnolt, cur_error);
807 if (acur->bnogt)
808 xfs_btree_del_cursor(acur->bnogt, cur_error);
809 acur->cnt = acur->bnolt = acur->bnogt = NULL;
810}
2bd0ea18 811
04ea0ac1
BF
812/*
813 * Check an extent for allocation and track the best available candidate in the
814 * allocation structure. The cursor is deactivated if it has entered an out of
815 * range state based on allocation arguments. Optionally return the extent
816 * extent geometry and allocation status if requested by the caller.
817 */
818static int
819xfs_alloc_cur_check(
820 struct xfs_alloc_arg *args,
821 struct xfs_alloc_cur *acur,
822 struct xfs_btree_cur *cur,
823 int *new)
824{
825 int error, i;
826 xfs_agblock_t bno, bnoa, bnew;
827 xfs_extlen_t len, lena, diff = -1;
828 bool busy;
829 unsigned busy_gen = 0;
830 bool deactivate = false;
3ca39168 831 bool isbnobt = cur->bc_btnum == XFS_BTNUM_BNO;
04ea0ac1
BF
832
833 *new = 0;
834
835 error = xfs_alloc_get_rec(cur, &bno, &len, &i);
836 if (error)
837 return error;
fbb4fa7f
DW
838 if (XFS_IS_CORRUPT(args->mp, i != 1))
839 return -EFSCORRUPTED;
04ea0ac1
BF
840
841 /*
842 * Check minlen and deactivate a cntbt cursor if out of acceptable size
843 * range (i.e., walking backwards looking for a minlen extent).
844 */
845 if (len < args->minlen) {
3ca39168 846 deactivate = !isbnobt;
04ea0ac1
BF
847 goto out;
848 }
849
850 busy = xfs_alloc_compute_aligned(args, bno, len, &bnoa, &lena,
851 &busy_gen);
852 acur->busy |= busy;
853 if (busy)
854 acur->busy_gen = busy_gen;
855 /* deactivate a bnobt cursor outside of locality range */
3ca39168
BF
856 if (bnoa < args->min_agbno || bnoa > args->max_agbno) {
857 deactivate = isbnobt;
04ea0ac1 858 goto out;
3ca39168 859 }
04ea0ac1
BF
860 if (lena < args->minlen)
861 goto out;
862
863 args->len = XFS_EXTLEN_MIN(lena, args->maxlen);
864 xfs_alloc_fix_len(args);
865 ASSERT(args->len >= args->minlen);
866 if (args->len < acur->len)
867 goto out;
868
869 /*
870 * We have an aligned record that satisfies minlen and beats or matches
871 * the candidate extent size. Compare locality for near allocation mode.
872 */
873 ASSERT(args->type == XFS_ALLOCTYPE_NEAR_BNO);
874 diff = xfs_alloc_compute_diff(args->agbno, args->len,
875 args->alignment, args->datatype,
876 bnoa, lena, &bnew);
877 if (bnew == NULLAGBLOCK)
878 goto out;
3ca39168
BF
879
880 /*
881 * Deactivate a bnobt cursor with worse locality than the current best.
882 */
883 if (diff > acur->diff) {
884 deactivate = isbnobt;
04ea0ac1 885 goto out;
3ca39168 886 }
04ea0ac1
BF
887
888 ASSERT(args->len > acur->len ||
889 (args->len == acur->len && diff <= acur->diff));
890 acur->rec_bno = bno;
891 acur->rec_len = len;
892 acur->bno = bnew;
893 acur->len = args->len;
894 acur->diff = diff;
895 *new = 1;
896
4f2eee5a
BF
897 /*
898 * We're done if we found a perfect allocation. This only deactivates
899 * the current cursor, but this is just an optimization to terminate a
900 * cntbt search that otherwise runs to the edge of the tree.
901 */
902 if (acur->diff == 0 && acur->len == args->maxlen)
903 deactivate = true;
04ea0ac1
BF
904out:
905 if (deactivate)
906 cur->bc_private.a.priv.abt.active = false;
907 trace_xfs_alloc_cur_check(args->mp, cur->bc_btnum, bno, len, diff,
908 *new);
909 return 0;
910}
911
dacde37d
BF
912/*
913 * Complete an allocation of a candidate extent. Remove the extent from both
914 * trees and update the args structure.
915 */
916STATIC int
917xfs_alloc_cur_finish(
918 struct xfs_alloc_arg *args,
919 struct xfs_alloc_cur *acur)
920{
921 int error;
922
923 ASSERT(acur->cnt && acur->bnolt);
924 ASSERT(acur->bno >= acur->rec_bno);
925 ASSERT(acur->bno + acur->len <= acur->rec_bno + acur->rec_len);
926 ASSERT(acur->rec_bno + acur->rec_len <=
927 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
928
929 error = xfs_alloc_fixup_trees(acur->cnt, acur->bnolt, acur->rec_bno,
930 acur->rec_len, acur->bno, acur->len, 0);
931 if (error)
932 return error;
933
934 args->agbno = acur->bno;
935 args->len = acur->len;
936 args->wasfromfl = 0;
937
938 trace_xfs_alloc_cur(args);
939 return 0;
940}
941
4d66edb1
BF
942/*
943 * Locality allocation lookup algorithm. This expects a cntbt cursor and uses
944 * bno optimized lookup to search for extents with ideal size and locality.
945 */
946STATIC int
947xfs_alloc_cntbt_iter(
948 struct xfs_alloc_arg *args,
949 struct xfs_alloc_cur *acur)
950{
951 struct xfs_btree_cur *cur = acur->cnt;
952 xfs_agblock_t bno;
953 xfs_extlen_t len, cur_len;
954 int error;
955 int i;
956
957 if (!xfs_alloc_cur_active(cur))
958 return 0;
959
960 /* locality optimized lookup */
961 cur_len = acur->cur_len;
962 error = xfs_alloc_lookup_ge(cur, args->agbno, cur_len, &i);
963 if (error)
964 return error;
965 if (i == 0)
966 return 0;
967 error = xfs_alloc_get_rec(cur, &bno, &len, &i);
968 if (error)
969 return error;
970
971 /* check the current record and update search length from it */
972 error = xfs_alloc_cur_check(args, acur, cur, &i);
973 if (error)
974 return error;
975 ASSERT(len >= acur->cur_len);
976 acur->cur_len = len;
977
978 /*
979 * We looked up the first record >= [agbno, len] above. The agbno is a
980 * secondary key and so the current record may lie just before or after
981 * agbno. If it is past agbno, check the previous record too so long as
982 * the length matches as it may be closer. Don't check a smaller record
983 * because that could deactivate our cursor.
984 */
985 if (bno > args->agbno) {
986 error = xfs_btree_decrement(cur, 0, &i);
987 if (!error && i) {
988 error = xfs_alloc_get_rec(cur, &bno, &len, &i);
989 if (!error && i && len == acur->cur_len)
990 error = xfs_alloc_cur_check(args, acur, cur,
991 &i);
992 }
993 if (error)
994 return error;
995 }
996
997 /*
998 * Increment the search key until we find at least one allocation
999 * candidate or if the extent we found was larger. Otherwise, double the
1000 * search key to optimize the search. Efficiency is more important here
1001 * than absolute best locality.
1002 */
1003 cur_len <<= 1;
1004 if (!acur->len || acur->cur_len >= cur_len)
1005 acur->cur_len++;
1006 else
1007 acur->cur_len = cur_len;
1008
1009 return error;
1010}
1011
ba02381c
BF
1012/*
1013 * Deal with the case where only small freespaces remain. Either return the
1014 * contents of the last freespace record, or allocate space from the freelist if
1015 * there is nothing in the tree.
1016 */
1017STATIC int /* error */
1018xfs_alloc_ag_vextent_small(
1019 struct xfs_alloc_arg *args, /* allocation argument structure */
1020 struct xfs_btree_cur *ccur, /* optional by-size cursor */
1021 xfs_agblock_t *fbnop, /* result block number */
1022 xfs_extlen_t *flenp, /* result length */
1023 int *stat) /* status: 0-freelist, 1-normal/none */
1024{
1025 int error = 0;
1026 xfs_agblock_t fbno = NULLAGBLOCK;
1027 xfs_extlen_t flen = 0;
9e1862f0 1028 int i = 0;
ba02381c 1029
9e1862f0
BF
1030 /*
1031 * If a cntbt cursor is provided, try to allocate the largest record in
1032 * the tree. Try the AGFL if the cntbt is empty, otherwise fail the
1033 * allocation. Make sure to respect minleft even when pulling from the
1034 * freelist.
1035 */
1036 if (ccur)
1037 error = xfs_btree_decrement(ccur, 0, &i);
ba02381c
BF
1038 if (error)
1039 goto error;
1040 if (i) {
1041 error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i);
1042 if (error)
1043 goto error;
fbb4fa7f
DW
1044 if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1045 error = -EFSCORRUPTED;
1046 goto error;
1047 }
ba02381c
BF
1048 goto out;
1049 }
1050
1051 if (args->minlen != 1 || args->alignment != 1 ||
1052 args->resv == XFS_AG_RESV_AGFL ||
1053 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) <=
1054 args->minleft))
1055 goto out;
1056
1057 error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
1058 if (error)
1059 goto error;
1060 if (fbno == NULLAGBLOCK)
1061 goto out;
1062
1063 xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
a85522b6 1064 (args->datatype & XFS_ALLOC_NOBUSY));
ba02381c 1065
a85522b6 1066 if (args->datatype & XFS_ALLOC_USERDATA) {
ba02381c
BF
1067 struct xfs_buf *bp;
1068
78fcd346
DW
1069 error = xfs_trans_get_buf(args->tp, args->mp->m_ddev_targp,
1070 XFS_AGB_TO_DADDR(args->mp, args->agno, fbno),
1071 args->mp->m_bsize, 0, &bp);
1072 if (error)
ba02381c 1073 goto error;
ba02381c
BF
1074 xfs_trans_binval(args->tp, bp);
1075 }
2d7ea81f
BF
1076 *fbnop = args->agbno = fbno;
1077 *flenp = args->len = 1;
fbb4fa7f
DW
1078 if (XFS_IS_CORRUPT(args->mp,
1079 fbno >= be32_to_cpu(
1080 XFS_BUF_TO_AGF(args->agbp)->agf_length))) {
1081 error = -EFSCORRUPTED;
1082 goto error;
1083 }
ba02381c
BF
1084 args->wasfromfl = 1;
1085 trace_xfs_alloc_small_freelist(args);
1086
1087 /*
1088 * If we're feeding an AGFL block to something that doesn't live in the
1089 * free space, we need to clear out the OWN_AG rmap.
1090 */
1091 error = xfs_rmap_free(args->tp, args->agbp, args->agno, fbno, 1,
1092 &XFS_RMAP_OINFO_AG);
1093 if (error)
1094 goto error;
1095
1096 *stat = 0;
1097 return 0;
1098
1099out:
1100 /*
1101 * Can't do the allocation, give up.
1102 */
1103 if (flen < args->minlen) {
1104 args->agbno = NULLAGBLOCK;
1105 trace_xfs_alloc_small_notenough(args);
1106 flen = 0;
1107 }
1108 *fbnop = fbno;
1109 *flenp = flen;
1110 *stat = 1;
1111 trace_xfs_alloc_small_done(args);
1112 return 0;
1113
1114error:
1115 trace_xfs_alloc_small_error(args);
1116 return error;
1117}
1118
2bd0ea18
NS
1119/*
1120 * Allocate a variable extent in the allocation group agno.
1121 * Type and bno are used to determine where in the allocation group the
1122 * extent will start.
1123 * Extent's length (returned in *len) will be between minlen and maxlen,
1124 * and of the form k * prod + mod unless there's nothing that large.
1125 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1126 */
1127STATIC int /* error */
1128xfs_alloc_ag_vextent(
dfc130f3 1129 xfs_alloc_arg_t *args) /* argument structure for allocation */
2bd0ea18 1130{
0e266570 1131 int error=0;
2bd0ea18
NS
1132
1133 ASSERT(args->minlen > 0);
1134 ASSERT(args->maxlen > 0);
1135 ASSERT(args->minlen <= args->maxlen);
1136 ASSERT(args->mod < args->prod);
1137 ASSERT(args->alignment > 0);
cf8ce220 1138
2bd0ea18
NS
1139 /*
1140 * Branch to correct routine based on the type.
1141 */
1142 args->wasfromfl = 0;
1143 switch (args->type) {
1144 case XFS_ALLOCTYPE_THIS_AG:
1145 error = xfs_alloc_ag_vextent_size(args);
1146 break;
1147 case XFS_ALLOCTYPE_NEAR_BNO:
1148 error = xfs_alloc_ag_vextent_near(args);
1149 break;
1150 case XFS_ALLOCTYPE_THIS_BNO:
1151 error = xfs_alloc_ag_vextent_exact(args);
1152 break;
1153 default:
1154 ASSERT(0);
1155 /* NOTREACHED */
1156 }
a2ceac1f
DC
1157
1158 if (error || args->agbno == NULLAGBLOCK)
2bd0ea18 1159 return error;
2bd0ea18 1160
a2ceac1f
DC
1161 ASSERT(args->len >= args->minlen);
1162 ASSERT(args->len <= args->maxlen);
9760cac2 1163 ASSERT(!args->wasfromfl || args->resv != XFS_AG_RESV_AGFL);
a2ceac1f
DC
1164 ASSERT(args->agbno % args->alignment == 0);
1165
631ac87a 1166 /* if not file data, insert new block into the reverse map btree */
3ee858aa 1167 if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
631ac87a
DW
1168 error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
1169 args->agbno, args->len, &args->oinfo);
1170 if (error)
1171 return error;
1172 }
1173
a2ceac1f
DC
1174 if (!args->wasfromfl) {
1175 error = xfs_alloc_update_counters(args->tp, args->pag,
1176 args->agbp,
1177 -((long)(args->len)));
1178 if (error)
1179 return error;
1180
1181 ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
1182 args->agbno, args->len));
2bd0ea18 1183 }
a2ceac1f 1184
cf8ce220 1185 xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
a2ceac1f 1186
79896434
BD
1187 XFS_STATS_INC(args->mp, xs_allocx);
1188 XFS_STATS_ADD(args->mp, xs_allocb, args->len);
a2ceac1f 1189 return error;
2bd0ea18
NS
1190}
1191
1192/*
1193 * Allocate a variable extent at exactly agno/bno.
1194 * Extent's length (returned in *len) will be between minlen and maxlen,
1195 * and of the form k * prod + mod unless there's nothing that large.
1196 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
1197 */
1198STATIC int /* error */
1199xfs_alloc_ag_vextent_exact(
dfc130f3 1200 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18 1201{
dfc130f3
RC
1202 xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
1203 xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
2bd0ea18
NS
1204 int error;
1205 xfs_agblock_t fbno; /* start block of found extent */
2bd0ea18 1206 xfs_extlen_t flen; /* length of found extent */
cd80de04
CH
1207 xfs_agblock_t tbno; /* start block of busy extent */
1208 xfs_extlen_t tlen; /* length of busy extent */
1209 xfs_agblock_t tend; /* end block of busy extent */
2bd0ea18 1210 int i; /* success/failure of operation */
cd80de04 1211 unsigned busy_gen;
2bd0ea18
NS
1212
1213 ASSERT(args->alignment == 1);
a2ceac1f 1214
2bd0ea18
NS
1215 /*
1216 * Allocate/initialize a cursor for the by-number freespace btree.
1217 */
b194c7d8 1218 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
56b2de80
DC
1219 args->agno, XFS_BTNUM_BNO);
1220
2bd0ea18
NS
1221 /*
1222 * Lookup bno and minlen in the btree (minlen is irrelevant, really).
1223 * Look for the closest free block <= bno, it must contain bno
1224 * if any free block does.
1225 */
56b2de80
DC
1226 error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
1227 if (error)
2bd0ea18 1228 goto error0;
56b2de80
DC
1229 if (!i)
1230 goto not_found;
1231
2bd0ea18
NS
1232 /*
1233 * Grab the freespace record.
1234 */
56b2de80
DC
1235 error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
1236 if (error)
2bd0ea18 1237 goto error0;
fbb4fa7f
DW
1238 if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1239 error = -EFSCORRUPTED;
1240 goto error0;
1241 }
2bd0ea18 1242 ASSERT(fbno <= args->agbno);
56b2de80 1243
5000d01d 1244 /*
a2ceac1f
DC
1245 * Check for overlapping busy extents.
1246 */
cd80de04
CH
1247 tbno = fbno;
1248 tlen = flen;
1249 xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen);
a2ceac1f
DC
1250
1251 /*
1252 * Give up if the start of the extent is busy, or the freespace isn't
1253 * long enough for the minimum request.
2bd0ea18 1254 */
a2ceac1f
DC
1255 if (tbno > args->agbno)
1256 goto not_found;
1257 if (tlen < args->minlen)
1258 goto not_found;
1259 tend = tbno + tlen;
1260 if (tend < args->agbno + args->minlen)
56b2de80
DC
1261 goto not_found;
1262
2bd0ea18
NS
1263 /*
1264 * End of extent will be smaller of the freespace end and the
1265 * maximal requested end.
56b2de80 1266 *
2bd0ea18
NS
1267 * Fix the length according to mod and prod if given.
1268 */
a2ceac1f
DC
1269 args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
1270 - args->agbno;
2bd0ea18 1271 xfs_alloc_fix_len(args);
a2ceac1f 1272 ASSERT(args->agbno + args->len <= tend);
56b2de80 1273
2bd0ea18 1274 /*
a2ceac1f 1275 * We are allocating agbno for args->len
2bd0ea18
NS
1276 * Allocate/initialize a cursor for the by-size btree.
1277 */
b194c7d8
BN
1278 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1279 args->agno, XFS_BTNUM_CNT);
2bd0ea18 1280 ASSERT(args->agbno + args->len <=
6e3140c7 1281 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
56b2de80
DC
1282 error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
1283 args->len, XFSA_FIXUP_BNO_OK);
1284 if (error) {
2bd0ea18
NS
1285 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1286 goto error0;
1287 }
a2ceac1f 1288
2bd0ea18
NS
1289 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1290 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
a2ceac1f 1291
2bd0ea18 1292 args->wasfromfl = 0;
56b2de80
DC
1293 trace_xfs_alloc_exact_done(args);
1294 return 0;
1295
1296not_found:
1297 /* Didn't find it, return null. */
1298 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1299 args->agbno = NULLAGBLOCK;
1300 trace_xfs_alloc_exact_notfound(args);
2bd0ea18
NS
1301 return 0;
1302
1303error0:
1304 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
56b2de80
DC
1305 trace_xfs_alloc_exact_error(args);
1306 return error;
1307}
1308
1309/*
4f2eee5a
BF
1310 * Search a given number of btree records in a given direction. Check each
1311 * record against the good extent we've already found.
56b2de80
DC
1312 */
1313STATIC int
4f2eee5a 1314xfs_alloc_walk_iter(
3ca39168
BF
1315 struct xfs_alloc_arg *args,
1316 struct xfs_alloc_cur *acur,
1317 struct xfs_btree_cur *cur,
4f2eee5a
BF
1318 bool increment,
1319 bool find_one, /* quit on first candidate */
1320 int count, /* rec count (-1 for infinite) */
1321 int *stat)
56b2de80 1322{
56b2de80
DC
1323 int error;
1324 int i;
56b2de80 1325
4f2eee5a
BF
1326 *stat = 0;
1327
56b2de80 1328 /*
3ca39168
BF
1329 * Search so long as the cursor is active or we find a better extent.
1330 * The cursor is deactivated if it extends beyond the range of the
1331 * current allocation candidate.
56b2de80 1332 */
4f2eee5a 1333 while (xfs_alloc_cur_active(cur) && count) {
3ca39168 1334 error = xfs_alloc_cur_check(args, acur, cur, &i);
56b2de80 1335 if (error)
3ca39168 1336 return error;
4f2eee5a
BF
1337 if (i == 1) {
1338 *stat = 1;
1339 if (find_one)
1340 break;
1341 }
3ca39168
BF
1342 if (!xfs_alloc_cur_active(cur))
1343 break;
56b2de80 1344
3ca39168
BF
1345 if (increment)
1346 error = xfs_btree_increment(cur, 0, &i);
56b2de80 1347 else
3ca39168 1348 error = xfs_btree_decrement(cur, 0, &i);
56b2de80 1349 if (error)
3ca39168
BF
1350 return error;
1351 if (i == 0)
1352 cur->bc_private.a.priv.abt.active = false;
4f2eee5a
BF
1353
1354 if (count > 0)
1355 count--;
3ca39168 1356 }
56b2de80 1357
56b2de80 1358 return 0;
2bd0ea18
NS
1359}
1360
d0ebd9ee 1361/*
4d66edb1
BF
1362 * Search the by-bno and by-size btrees in parallel in search of an extent with
1363 * ideal locality based on the NEAR mode ->agbno locality hint.
d0ebd9ee
BF
1364 */
1365STATIC int
4d66edb1 1366xfs_alloc_ag_vextent_locality(
d0ebd9ee
BF
1367 struct xfs_alloc_arg *args,
1368 struct xfs_alloc_cur *acur,
1369 int *stat)
1370{
1371 struct xfs_btree_cur *fbcur = NULL;
1372 int error;
1373 int i;
1374 bool fbinc;
1375
1376 ASSERT(acur->len == 0);
1377 ASSERT(args->type == XFS_ALLOCTYPE_NEAR_BNO);
1378
1379 *stat = 0;
1380
4d66edb1
BF
1381 error = xfs_alloc_lookup_ge(acur->cnt, args->agbno, acur->cur_len, &i);
1382 if (error)
1383 return error;
d0ebd9ee
BF
1384 error = xfs_alloc_lookup_le(acur->bnolt, args->agbno, 0, &i);
1385 if (error)
1386 return error;
1387 error = xfs_alloc_lookup_ge(acur->bnogt, args->agbno, 0, &i);
1388 if (error)
1389 return error;
1390
1391 /*
4d66edb1
BF
1392 * Search the bnobt and cntbt in parallel. Search the bnobt left and
1393 * right and lookup the closest extent to the locality hint for each
1394 * extent size key in the cntbt. The entire search terminates
1395 * immediately on a bnobt hit because that means we've found best case
1396 * locality. Otherwise the search continues until the cntbt cursor runs
1397 * off the end of the tree. If no allocation candidate is found at this
1398 * point, give up on locality, walk backwards from the end of the cntbt
1399 * and take the first available extent.
1400 *
1401 * The parallel tree searches balance each other out to provide fairly
1402 * consistent performance for various situations. The bnobt search can
1403 * have pathological behavior in the worst case scenario of larger
1404 * allocation requests and fragmented free space. On the other hand, the
1405 * bnobt is able to satisfy most smaller allocation requests much more
1406 * quickly than the cntbt. The cntbt search can sift through fragmented
1407 * free space and sets of free extents for larger allocation requests
1408 * more quickly than the bnobt. Since the locality hint is just a hint
1409 * and we don't want to scan the entire bnobt for perfect locality, the
1410 * cntbt search essentially bounds the bnobt search such that we can
1411 * find good enough locality at reasonable performance in most cases.
d0ebd9ee
BF
1412 */
1413 while (xfs_alloc_cur_active(acur->bnolt) ||
4d66edb1
BF
1414 xfs_alloc_cur_active(acur->bnogt) ||
1415 xfs_alloc_cur_active(acur->cnt)) {
1416
1417 trace_xfs_alloc_cur_lookup(args);
1418
1419 /*
1420 * Search the bnobt left and right. In the case of a hit, finish
1421 * the search in the opposite direction and we're done.
1422 */
d0ebd9ee
BF
1423 error = xfs_alloc_walk_iter(args, acur, acur->bnolt, false,
1424 true, 1, &i);
1425 if (error)
1426 return error;
1427 if (i == 1) {
1428 trace_xfs_alloc_cur_left(args);
1429 fbcur = acur->bnogt;
1430 fbinc = true;
1431 break;
1432 }
d0ebd9ee
BF
1433 error = xfs_alloc_walk_iter(args, acur, acur->bnogt, true, true,
1434 1, &i);
1435 if (error)
1436 return error;
1437 if (i == 1) {
1438 trace_xfs_alloc_cur_right(args);
1439 fbcur = acur->bnolt;
1440 fbinc = false;
1441 break;
1442 }
4d66edb1
BF
1443
1444 /*
1445 * Check the extent with best locality based on the current
1446 * extent size search key and keep track of the best candidate.
1447 */
1448 error = xfs_alloc_cntbt_iter(args, acur);
1449 if (error)
1450 return error;
1451 if (!xfs_alloc_cur_active(acur->cnt)) {
1452 trace_xfs_alloc_cur_lookup_done(args);
1453 break;
1454 }
1455 }
1456
1457 /*
1458 * If we failed to find anything due to busy extents, return empty
1459 * handed so the caller can flush and retry. If no busy extents were
1460 * found, walk backwards from the end of the cntbt as a last resort.
1461 */
1462 if (!xfs_alloc_cur_active(acur->cnt) && !acur->len && !acur->busy) {
1463 error = xfs_btree_decrement(acur->cnt, 0, &i);
1464 if (error)
1465 return error;
1466 if (i) {
1467 acur->cnt->bc_private.a.priv.abt.active = true;
1468 fbcur = acur->cnt;
1469 fbinc = false;
1470 }
d0ebd9ee
BF
1471 }
1472
4d66edb1
BF
1473 /*
1474 * Search in the opposite direction for a better entry in the case of
1475 * a bnobt hit or walk backwards from the end of the cntbt.
1476 */
d0ebd9ee
BF
1477 if (fbcur) {
1478 error = xfs_alloc_walk_iter(args, acur, fbcur, fbinc, true, -1,
1479 &i);
1480 if (error)
1481 return error;
1482 }
1483
1484 if (acur->len)
1485 *stat = 1;
1486
1487 return 0;
1488}
1489
0bb673ce
DW
1490/* Check the last block of the cnt btree for allocations. */
1491static int
1492xfs_alloc_ag_vextent_lastblock(
1493 struct xfs_alloc_arg *args,
1494 struct xfs_alloc_cur *acur,
1495 xfs_agblock_t *bno,
1496 xfs_extlen_t *len,
1497 bool *allocated)
1498{
1499 int error;
1500 int i;
1501
1502#ifdef DEBUG
1503 /* Randomly don't execute the first algorithm. */
1504 if (prandom_u32() & 1)
1505 return 0;
1506#endif
1507
1508 /*
1509 * Start from the entry that lookup found, sequence through all larger
1510 * free blocks. If we're actually pointing at a record smaller than
1511 * maxlen, go to the start of this block, and skip all those smaller
1512 * than minlen.
1513 */
1514 if (len || args->alignment > 1) {
1515 acur->cnt->bc_ptrs[0] = 1;
1516 do {
1517 error = xfs_alloc_get_rec(acur->cnt, bno, len, &i);
1518 if (error)
1519 return error;
fbb4fa7f
DW
1520 if (XFS_IS_CORRUPT(args->mp, i != 1))
1521 return -EFSCORRUPTED;
0bb673ce
DW
1522 if (*len >= args->minlen)
1523 break;
1524 error = xfs_btree_increment(acur->cnt, 0, &i);
1525 if (error)
1526 return error;
1527 } while (i);
1528 ASSERT(*len >= args->minlen);
1529 if (!i)
1530 return 0;
1531 }
1532
1533 error = xfs_alloc_walk_iter(args, acur, acur->cnt, true, false, -1, &i);
1534 if (error)
1535 return error;
1536
1537 /*
1538 * It didn't work. We COULD be in a case where there's a good record
1539 * somewhere, so try again.
1540 */
1541 if (acur->len == 0)
1542 return 0;
1543
1544 trace_xfs_alloc_near_first(args);
1545 *allocated = true;
1546 return 0;
1547}
1548
2bd0ea18
NS
1549/*
1550 * Allocate a variable extent near bno in the allocation group agno.
1551 * Extent's length (returned in len) will be between minlen and maxlen,
1552 * and of the form k * prod + mod unless there's nothing that large.
1553 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1554 */
2950989c 1555STATIC int
2bd0ea18 1556xfs_alloc_ag_vextent_near(
2950989c 1557 struct xfs_alloc_arg *args)
2bd0ea18 1558{
2950989c 1559 struct xfs_alloc_cur acur = {};
3ca39168
BF
1560 int error; /* error code */
1561 int i; /* result code, temporary */
3ca39168
BF
1562 xfs_agblock_t bno;
1563 xfs_extlen_t len;
a2ceac1f 1564
9bef1574 1565 /* handle uninitialized agbno range so caller doesn't have to */
ff3263dd
BF
1566 if (!args->min_agbno && !args->max_agbno)
1567 args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
1568 ASSERT(args->min_agbno <= args->max_agbno);
1569
1570 /* clamp agbno to the range if it's outside */
1571 if (args->agbno < args->min_agbno)
1572 args->agbno = args->min_agbno;
1573 if (args->agbno > args->max_agbno)
1574 args->agbno = args->max_agbno;
1575
a2ceac1f 1576restart:
3ca39168 1577 len = 0;
a2ceac1f 1578
2bd0ea18 1579 /*
2950989c
BF
1580 * Set up cursors and see if there are any free extents as big as
1581 * maxlen. If not, pick the last entry in the tree unless the tree is
1582 * empty.
5000d01d 1583 */
2950989c
BF
1584 error = xfs_alloc_cur_setup(args, &acur);
1585 if (error == -ENOSPC) {
3ca39168
BF
1586 error = xfs_alloc_ag_vextent_small(args, acur.cnt, &bno,
1587 &len, &i);
2950989c
BF
1588 if (error)
1589 goto out;
3ca39168 1590 if (i == 0 || len == 0) {
a2ceac1f 1591 trace_xfs_alloc_near_noentry(args);
2950989c 1592 goto out;
2bd0ea18
NS
1593 }
1594 ASSERT(i == 1);
2950989c
BF
1595 } else if (error) {
1596 goto out;
2bd0ea18 1597 }
a2ceac1f 1598
5000d01d 1599 /*
2bd0ea18
NS
1600 * First algorithm.
1601 * If the requested extent is large wrt the freespaces available
1602 * in this a.g., then the cursor will be pointing to a btree entry
1603 * near the right edge of the tree. If it's in the last btree leaf
1604 * block, then we just examine all the entries in that block
1605 * that are big enough, and pick the best one.
2bd0ea18 1606 */
0bb673ce
DW
1607 if (xfs_btree_islastblock(acur.cnt, 0)) {
1608 bool allocated = false;
4f2eee5a 1609
0bb673ce
DW
1610 error = xfs_alloc_ag_vextent_lastblock(args, &acur, &bno, &len,
1611 &allocated);
4f2eee5a
BF
1612 if (error)
1613 goto out;
0bb673ce
DW
1614 if (allocated)
1615 goto alloc_finish;
2bd0ea18 1616 }
2950989c 1617
2bd0ea18 1618 /*
4d66edb1
BF
1619 * Second algorithm. Combined cntbt and bnobt search to find ideal
1620 * locality.
2bd0ea18 1621 */
4d66edb1 1622 error = xfs_alloc_ag_vextent_locality(args, &acur, &i);
2950989c
BF
1623 if (error)
1624 goto out;
1625
2bd0ea18
NS
1626 /*
1627 * If we couldn't get anything, give up.
1628 */
3ca39168 1629 if (!acur.len) {
e055d59e 1630 if (acur.busy) {
a2ceac1f 1631 trace_xfs_alloc_near_busy(args);
e055d59e
BF
1632 xfs_extent_busy_flush(args->mp, args->pag,
1633 acur.busy_gen);
a2ceac1f
DC
1634 goto restart;
1635 }
56b2de80 1636 trace_xfs_alloc_size_neither(args);
2bd0ea18 1637 args->agbno = NULLAGBLOCK;
2950989c 1638 goto out;
2bd0ea18 1639 }
56b2de80 1640
0bb673ce 1641alloc_finish:
dacde37d
BF
1642 /* fix up btrees on a successful allocation */
1643 error = xfs_alloc_cur_finish(args, &acur);
56b2de80 1644
2950989c
BF
1645out:
1646 xfs_alloc_cur_close(&acur, error);
2bd0ea18
NS
1647 return error;
1648}
1649
1650/*
1651 * Allocate a variable extent anywhere in the allocation group agno.
1652 * Extent's length (returned in len) will be between minlen and maxlen,
1653 * and of the form k * prod + mod unless there's nothing that large.
1654 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1655 */
1656STATIC int /* error */
1657xfs_alloc_ag_vextent_size(
dfc130f3 1658 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18 1659{
dfc130f3
RC
1660 xfs_btree_cur_t *bno_cur; /* cursor for bno btree */
1661 xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */
2bd0ea18
NS
1662 int error; /* error result */
1663 xfs_agblock_t fbno; /* start of found freespace */
1664 xfs_extlen_t flen; /* length of found freespace */
2bd0ea18
NS
1665 int i; /* temp status variable */
1666 xfs_agblock_t rbno; /* returned block number */
1667 xfs_extlen_t rlen; /* length of returned extent */
cd80de04
CH
1668 bool busy;
1669 unsigned busy_gen;
2bd0ea18 1670
a2ceac1f 1671restart:
2bd0ea18
NS
1672 /*
1673 * Allocate and initialize a cursor for the by-size btree.
1674 */
b194c7d8
BN
1675 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1676 args->agno, XFS_BTNUM_CNT);
2bd0ea18 1677 bno_cur = NULL;
cd80de04 1678 busy = false;
a2ceac1f 1679
2bd0ea18
NS
1680 /*
1681 * Look for an entry >= maxlen+alignment-1 blocks.
1682 */
0e266570
NS
1683 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1684 args->maxlen + args->alignment - 1, &i)))
2bd0ea18 1685 goto error0;
a2ceac1f 1686
2bd0ea18 1687 /*
cd80de04
CH
1688 * If none then we have to settle for a smaller extent. In the case that
1689 * there are no large extents, this will return the last entry in the
1690 * tree unless the tree is empty. In the case that there are only busy
1691 * large extents, this will return the largest small extent unless there
a2ceac1f 1692 * are no smaller extents available.
5000d01d 1693 */
cd80de04 1694 if (!i) {
a2ceac1f
DC
1695 error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1696 &fbno, &flen, &i);
1697 if (error)
2bd0ea18
NS
1698 goto error0;
1699 if (i == 0 || flen == 0) {
1700 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
56b2de80 1701 trace_xfs_alloc_size_noentry(args);
2bd0ea18
NS
1702 return 0;
1703 }
1704 ASSERT(i == 1);
cd80de04
CH
1705 busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno,
1706 &rlen, &busy_gen);
a2ceac1f
DC
1707 } else {
1708 /*
1709 * Search for a non-busy extent that is large enough.
a2ceac1f
DC
1710 */
1711 for (;;) {
1712 error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1713 if (error)
1714 goto error0;
fbb4fa7f
DW
1715 if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1716 error = -EFSCORRUPTED;
1717 goto error0;
1718 }
a2ceac1f 1719
cd80de04
CH
1720 busy = xfs_alloc_compute_aligned(args, fbno, flen,
1721 &rbno, &rlen, &busy_gen);
a2ceac1f
DC
1722
1723 if (rlen >= args->maxlen)
1724 break;
1725
1726 error = xfs_btree_increment(cnt_cur, 0, &i);
1727 if (error)
1728 goto error0;
1729 if (i == 0) {
1730 /*
1731 * Our only valid extents must have been busy.
1732 * Make it unbusy by forcing the log out and
cd80de04 1733 * retrying.
a2ceac1f
DC
1734 */
1735 xfs_btree_del_cursor(cnt_cur,
1736 XFS_BTREE_NOERROR);
1737 trace_xfs_alloc_size_busy(args);
cd80de04
CH
1738 xfs_extent_busy_flush(args->mp,
1739 args->pag, busy_gen);
a2ceac1f
DC
1740 goto restart;
1741 }
1742 }
2bd0ea18 1743 }
a2ceac1f 1744
2bd0ea18
NS
1745 /*
1746 * In the first case above, we got the last entry in the
1747 * by-size btree. Now we check to see if the space hits maxlen
1748 * once aligned; if not, we search left for something better.
1749 * This can't happen in the second case above.
1750 */
2bd0ea18 1751 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
fbb4fa7f
DW
1752 if (XFS_IS_CORRUPT(args->mp,
1753 rlen != 0 &&
1754 (rlen > flen ||
1755 rbno + rlen > fbno + flen))) {
1756 error = -EFSCORRUPTED;
1757 goto error0;
1758 }
2bd0ea18
NS
1759 if (rlen < args->maxlen) {
1760 xfs_agblock_t bestfbno;
1761 xfs_extlen_t bestflen;
1762 xfs_agblock_t bestrbno;
1763 xfs_extlen_t bestrlen;
1764
1765 bestrlen = rlen;
1766 bestrbno = rbno;
1767 bestflen = flen;
1768 bestfbno = fbno;
1769 for (;;) {
b194c7d8 1770 if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
2bd0ea18
NS
1771 goto error0;
1772 if (i == 0)
1773 break;
0e266570
NS
1774 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1775 &i)))
2bd0ea18 1776 goto error0;
fbb4fa7f
DW
1777 if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1778 error = -EFSCORRUPTED;
1779 goto error0;
1780 }
2bd0ea18
NS
1781 if (flen < bestrlen)
1782 break;
cd80de04
CH
1783 busy = xfs_alloc_compute_aligned(args, fbno, flen,
1784 &rbno, &rlen, &busy_gen);
2bd0ea18 1785 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
fbb4fa7f
DW
1786 if (XFS_IS_CORRUPT(args->mp,
1787 rlen != 0 &&
1788 (rlen > flen ||
1789 rbno + rlen > fbno + flen))) {
1790 error = -EFSCORRUPTED;
1791 goto error0;
1792 }
2bd0ea18
NS
1793 if (rlen > bestrlen) {
1794 bestrlen = rlen;
1795 bestrbno = rbno;
1796 bestflen = flen;
1797 bestfbno = fbno;
1798 if (rlen == args->maxlen)
1799 break;
1800 }
5000d01d 1801 }
0e266570
NS
1802 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1803 &i)))
2bd0ea18 1804 goto error0;
fbb4fa7f
DW
1805 if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1806 error = -EFSCORRUPTED;
1807 goto error0;
1808 }
2bd0ea18
NS
1809 rlen = bestrlen;
1810 rbno = bestrbno;
1811 flen = bestflen;
1812 fbno = bestfbno;
1813 }
1814 args->wasfromfl = 0;
1815 /*
1816 * Fix up the length.
1817 */
1818 args->len = rlen;
a2ceac1f 1819 if (rlen < args->minlen) {
cd80de04 1820 if (busy) {
a2ceac1f
DC
1821 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1822 trace_xfs_alloc_size_busy(args);
cd80de04 1823 xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
a2ceac1f
DC
1824 goto restart;
1825 }
1826 goto out_nominleft;
2bd0ea18 1827 }
a2ceac1f
DC
1828 xfs_alloc_fix_len(args);
1829
2bd0ea18 1830 rlen = args->len;
fbb4fa7f
DW
1831 if (XFS_IS_CORRUPT(args->mp, rlen > flen)) {
1832 error = -EFSCORRUPTED;
1833 goto error0;
1834 }
2bd0ea18
NS
1835 /*
1836 * Allocate and initialize a cursor for the by-block tree.
1837 */
b194c7d8
BN
1838 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1839 args->agno, XFS_BTNUM_BNO);
0e266570
NS
1840 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1841 rbno, rlen, XFSA_FIXUP_CNT_OK)))
2bd0ea18
NS
1842 goto error0;
1843 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1844 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1845 cnt_cur = bno_cur = NULL;
1846 args->len = rlen;
1847 args->agbno = rbno;
fbb4fa7f
DW
1848 if (XFS_IS_CORRUPT(args->mp,
1849 args->agbno + args->len >
1850 be32_to_cpu(
1851 XFS_BUF_TO_AGF(args->agbp)->agf_length))) {
1852 error = -EFSCORRUPTED;
1853 goto error0;
1854 }
56b2de80 1855 trace_xfs_alloc_size_done(args);
2bd0ea18
NS
1856 return 0;
1857
1858error0:
56b2de80 1859 trace_xfs_alloc_size_error(args);
2bd0ea18
NS
1860 if (cnt_cur)
1861 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1862 if (bno_cur)
1863 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1864 return error;
a2ceac1f
DC
1865
1866out_nominleft:
1867 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1868 trace_xfs_alloc_size_nominleft(args);
1869 args->agbno = NULLAGBLOCK;
1870 return 0;
2bd0ea18
NS
1871}
1872
2bd0ea18
NS
1873/*
1874 * Free the extent starting at agno/bno for length.
1875 */
85aec44f 1876STATIC int
2bd0ea18 1877xfs_free_ag_extent(
5837e73b
DW
1878 struct xfs_trans *tp,
1879 struct xfs_buf *agbp,
1880 xfs_agnumber_t agno,
1881 xfs_agblock_t bno,
1882 xfs_extlen_t len,
1883 const struct xfs_owner_info *oinfo,
1884 enum xfs_ag_resv_type type)
2bd0ea18 1885{
5837e73b
DW
1886 struct xfs_mount *mp;
1887 struct xfs_perag *pag;
1888 struct xfs_btree_cur *bno_cur;
1889 struct xfs_btree_cur *cnt_cur;
1890 xfs_agblock_t gtbno; /* start of right neighbor */
1891 xfs_extlen_t gtlen; /* length of right neighbor */
1892 xfs_agblock_t ltbno; /* start of left neighbor */
1893 xfs_extlen_t ltlen; /* length of left neighbor */
1894 xfs_agblock_t nbno; /* new starting block of freesp */
1895 xfs_extlen_t nlen; /* new length of freespace */
1896 int haveleft; /* have a left neighbor */
1897 int haveright; /* have a right neighbor */
1898 int i;
1899 int error;
2bd0ea18 1900
631ac87a 1901 bno_cur = cnt_cur = NULL;
2bd0ea18 1902 mp = tp->t_mountp;
631ac87a 1903
3ee858aa 1904 if (!xfs_rmap_should_skip_owner_update(oinfo)) {
631ac87a
DW
1905 error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo);
1906 if (error)
1907 goto error0;
1908 }
1909
5000d01d 1910 /*
2bd0ea18
NS
1911 * Allocate and initialize a cursor for the by-block btree.
1912 */
b194c7d8 1913 bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
5000d01d 1914 /*
2bd0ea18
NS
1915 * Look for a neighboring block on the left (lower block numbers)
1916 * that is contiguous with this space.
1917 */
0e266570 1918 if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
2bd0ea18
NS
1919 goto error0;
1920 if (haveleft) {
1921 /*
1922 * There is a block to our left.
1923 */
0e266570 1924 if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
2bd0ea18 1925 goto error0;
fbb4fa7f
DW
1926 if (XFS_IS_CORRUPT(mp, i != 1)) {
1927 error = -EFSCORRUPTED;
1928 goto error0;
1929 }
2bd0ea18
NS
1930 /*
1931 * It's not contiguous, though.
1932 */
1933 if (ltbno + ltlen < bno)
1934 haveleft = 0;
1935 else {
1936 /*
1937 * If this failure happens the request to free this
1938 * space was invalid, it's (partly) already free.
1939 * Very bad.
1940 */
fbb4fa7f
DW
1941 if (XFS_IS_CORRUPT(mp, ltbno + ltlen > bno)) {
1942 error = -EFSCORRUPTED;
1943 goto error0;
1944 }
2bd0ea18
NS
1945 }
1946 }
5000d01d 1947 /*
2bd0ea18
NS
1948 * Look for a neighboring block on the right (higher block numbers)
1949 * that is contiguous with this space.
1950 */
b194c7d8 1951 if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
2bd0ea18
NS
1952 goto error0;
1953 if (haveright) {
1954 /*
1955 * There is a block to our right.
1956 */
0e266570 1957 if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
2bd0ea18 1958 goto error0;
fbb4fa7f
DW
1959 if (XFS_IS_CORRUPT(mp, i != 1)) {
1960 error = -EFSCORRUPTED;
1961 goto error0;
1962 }
2bd0ea18
NS
1963 /*
1964 * It's not contiguous, though.
1965 */
1966 if (bno + len < gtbno)
1967 haveright = 0;
1968 else {
1969 /*
1970 * If this failure happens the request to free this
1971 * space was invalid, it's (partly) already free.
1972 * Very bad.
1973 */
fbb4fa7f
DW
1974 if (XFS_IS_CORRUPT(mp, bno + len > gtbno)) {
1975 error = -EFSCORRUPTED;
1976 goto error0;
1977 }
2bd0ea18
NS
1978 }
1979 }
1980 /*
1981 * Now allocate and initialize a cursor for the by-size tree.
1982 */
b194c7d8 1983 cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
2bd0ea18
NS
1984 /*
1985 * Have both left and right contiguous neighbors.
1986 * Merge all three into a single free block.
1987 */
1988 if (haveleft && haveright) {
1989 /*
1990 * Delete the old by-size entry on the left.
1991 */
0e266570 1992 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
2bd0ea18 1993 goto error0;
fbb4fa7f
DW
1994 if (XFS_IS_CORRUPT(mp, i != 1)) {
1995 error = -EFSCORRUPTED;
1996 goto error0;
1997 }
b194c7d8 1998 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18 1999 goto error0;
fbb4fa7f
DW
2000 if (XFS_IS_CORRUPT(mp, i != 1)) {
2001 error = -EFSCORRUPTED;
2002 goto error0;
2003 }
2bd0ea18
NS
2004 /*
2005 * Delete the old by-size entry on the right.
2006 */
0e266570 2007 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2bd0ea18 2008 goto error0;
fbb4fa7f
DW
2009 if (XFS_IS_CORRUPT(mp, i != 1)) {
2010 error = -EFSCORRUPTED;
2011 goto error0;
2012 }
b194c7d8 2013 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18 2014 goto error0;
fbb4fa7f
DW
2015 if (XFS_IS_CORRUPT(mp, i != 1)) {
2016 error = -EFSCORRUPTED;
2017 goto error0;
2018 }
2bd0ea18
NS
2019 /*
2020 * Delete the old by-block entry for the right block.
2021 */
b194c7d8 2022 if ((error = xfs_btree_delete(bno_cur, &i)))
2bd0ea18 2023 goto error0;
fbb4fa7f
DW
2024 if (XFS_IS_CORRUPT(mp, i != 1)) {
2025 error = -EFSCORRUPTED;
2026 goto error0;
2027 }
2bd0ea18
NS
2028 /*
2029 * Move the by-block cursor back to the left neighbor.
2030 */
b194c7d8 2031 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
2bd0ea18 2032 goto error0;
fbb4fa7f
DW
2033 if (XFS_IS_CORRUPT(mp, i != 1)) {
2034 error = -EFSCORRUPTED;
2035 goto error0;
2036 }
2bd0ea18
NS
2037#ifdef DEBUG
2038 /*
2039 * Check that this is the right record: delete didn't
2040 * mangle the cursor.
2041 */
2042 {
2043 xfs_agblock_t xxbno;
2044 xfs_extlen_t xxlen;
2045
0e266570
NS
2046 if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
2047 &i)))
2bd0ea18 2048 goto error0;
fbb4fa7f
DW
2049 if (XFS_IS_CORRUPT(mp,
2050 i != 1 ||
2051 xxbno != ltbno ||
2052 xxlen != ltlen)) {
2053 error = -EFSCORRUPTED;
2054 goto error0;
2055 }
2bd0ea18
NS
2056 }
2057#endif
2058 /*
2059 * Update remaining by-block entry to the new, joined block.
2060 */
2061 nbno = ltbno;
2062 nlen = len + ltlen + gtlen;
0e266570 2063 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2bd0ea18
NS
2064 goto error0;
2065 }
2066 /*
2067 * Have only a left contiguous neighbor.
2068 * Merge it together with the new freespace.
2069 */
2070 else if (haveleft) {
2071 /*
2072 * Delete the old by-size entry on the left.
2073 */
0e266570 2074 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
2bd0ea18 2075 goto error0;
fbb4fa7f
DW
2076 if (XFS_IS_CORRUPT(mp, i != 1)) {
2077 error = -EFSCORRUPTED;
2078 goto error0;
2079 }
b194c7d8 2080 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18 2081 goto error0;
fbb4fa7f
DW
2082 if (XFS_IS_CORRUPT(mp, i != 1)) {
2083 error = -EFSCORRUPTED;
2084 goto error0;
2085 }
2bd0ea18
NS
2086 /*
2087 * Back up the by-block cursor to the left neighbor, and
2088 * update its length.
2089 */
b194c7d8 2090 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
2bd0ea18 2091 goto error0;
fbb4fa7f
DW
2092 if (XFS_IS_CORRUPT(mp, i != 1)) {
2093 error = -EFSCORRUPTED;
2094 goto error0;
2095 }
2bd0ea18
NS
2096 nbno = ltbno;
2097 nlen = len + ltlen;
0e266570 2098 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2bd0ea18
NS
2099 goto error0;
2100 }
2101 /*
2102 * Have only a right contiguous neighbor.
2103 * Merge it together with the new freespace.
2104 */
2105 else if (haveright) {
2106 /*
2107 * Delete the old by-size entry on the right.
2108 */
0e266570 2109 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2bd0ea18 2110 goto error0;
fbb4fa7f
DW
2111 if (XFS_IS_CORRUPT(mp, i != 1)) {
2112 error = -EFSCORRUPTED;
2113 goto error0;
2114 }
b194c7d8 2115 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18 2116 goto error0;
fbb4fa7f
DW
2117 if (XFS_IS_CORRUPT(mp, i != 1)) {
2118 error = -EFSCORRUPTED;
2119 goto error0;
2120 }
2bd0ea18 2121 /*
5000d01d 2122 * Update the starting block and length of the right
2bd0ea18
NS
2123 * neighbor in the by-block tree.
2124 */
2125 nbno = bno;
2126 nlen = len + gtlen;
0e266570 2127 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2bd0ea18
NS
2128 goto error0;
2129 }
2130 /*
2131 * No contiguous neighbors.
2132 * Insert the new freespace into the by-block tree.
2133 */
2134 else {
2135 nbno = bno;
2136 nlen = len;
b194c7d8 2137 if ((error = xfs_btree_insert(bno_cur, &i)))
2bd0ea18 2138 goto error0;
fbb4fa7f
DW
2139 if (XFS_IS_CORRUPT(mp, i != 1)) {
2140 error = -EFSCORRUPTED;
2141 goto error0;
2142 }
2bd0ea18
NS
2143 }
2144 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
2145 bno_cur = NULL;
2146 /*
2147 * In all cases we need to insert the new freespace in the by-size tree.
2148 */
0e266570 2149 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
2bd0ea18 2150 goto error0;
fbb4fa7f
DW
2151 if (XFS_IS_CORRUPT(mp, i != 0)) {
2152 error = -EFSCORRUPTED;
2153 goto error0;
2154 }
b194c7d8 2155 if ((error = xfs_btree_insert(cnt_cur, &i)))
2bd0ea18 2156 goto error0;
fbb4fa7f
DW
2157 if (XFS_IS_CORRUPT(mp, i != 1)) {
2158 error = -EFSCORRUPTED;
2159 goto error0;
2160 }
2bd0ea18
NS
2161 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
2162 cnt_cur = NULL;
a2ceac1f 2163
2bd0ea18
NS
2164 /*
2165 * Update the freespace totals in the ag and superblock.
2166 */
a2ceac1f
DC
2167 pag = xfs_perag_get(mp, agno);
2168 error = xfs_alloc_update_counters(tp, pag, agbp, len);
cf8ce220 2169 xfs_ag_resv_free_extent(pag, type, tp, len);
a2ceac1f
DC
2170 xfs_perag_put(pag);
2171 if (error)
2172 goto error0;
2173
79896434
BD
2174 XFS_STATS_INC(mp, xs_freex);
2175 XFS_STATS_ADD(mp, xs_freeb, len);
56b2de80 2176
65a15e06 2177 trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright);
3e535bba 2178
2bd0ea18
NS
2179 return 0;
2180
2181 error0:
65a15e06 2182 trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1);
2bd0ea18
NS
2183 if (bno_cur)
2184 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
2185 if (cnt_cur)
2186 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
2187 return error;
2188}
2189
5000d01d 2190/*
2bd0ea18
NS
2191 * Visible (exported) allocation/free functions.
2192 * Some of these are used just by xfs_alloc_btree.c and this file.
2193 */
2194
2195/*
2196 * Compute and fill in value of m_ag_maxlevels.
2197 */
2198void
2199xfs_alloc_compute_maxlevels(
2200 xfs_mount_t *mp) /* file system mount structure */
2201{
1421de38 2202 mp->m_ag_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr,
730e2a19 2203 (mp->m_sb.sb_agblocks + 1) / 2);
2bd0ea18
NS
2204}
2205
56b2de80 2206/*
cf8ce220
DW
2207 * Find the length of the longest extent in an AG. The 'need' parameter
2208 * specifies how much space we're going to need for the AGFL and the
2209 * 'reserved' parameter tells us how many blocks in this AG are reserved for
2210 * other callers.
56b2de80
DC
2211 */
2212xfs_extlen_t
2213xfs_alloc_longest_free_extent(
72bda06d 2214 struct xfs_perag *pag,
cf8ce220
DW
2215 xfs_extlen_t need,
2216 xfs_extlen_t reserved)
56b2de80 2217{
72bda06d 2218 xfs_extlen_t delta = 0;
56b2de80 2219
cf8ce220
DW
2220 /*
2221 * If the AGFL needs a recharge, we'll have to subtract that from the
2222 * longest extent.
2223 */
56b2de80
DC
2224 if (need > pag->pagf_flcount)
2225 delta = need - pag->pagf_flcount;
2226
cf8ce220
DW
2227 /*
2228 * If we cannot maintain others' reservations with space from the
2229 * not-longest freesp extents, we'll have to subtract /that/ from
2230 * the longest extent too.
2231 */
2232 if (pag->pagf_freeblks - pag->pagf_longest < reserved)
2233 delta += reserved - (pag->pagf_freeblks - pag->pagf_longest);
2234
2235 /*
2236 * If the longest extent is long enough to satisfy all the
2237 * reservations and AGFL rules in place, we can return this extent.
2238 */
56b2de80 2239 if (pag->pagf_longest > delta)
14058d94
DC
2240 return min_t(xfs_extlen_t, pag->pag_mount->m_ag_max_usable,
2241 pag->pagf_longest - delta);
cf8ce220
DW
2242
2243 /* Otherwise, let the caller try for 1 block if there's space. */
56b2de80
DC
2244 return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
2245}
2246
b75bb1bd
DW
2247/*
2248 * Compute the minimum length of the AGFL in the given AG. If @pag is NULL,
2249 * return the largest possible minimum length.
2250 */
de046644
DC
2251unsigned int
2252xfs_alloc_min_freelist(
2253 struct xfs_mount *mp,
2254 struct xfs_perag *pag)
2255{
b75bb1bd
DW
2256 /* AG btrees have at least 1 level. */
2257 static const uint8_t fake_levels[XFS_BTNUM_AGF] = {1, 1, 1};
2258 const uint8_t *levels = pag ? pag->pagf_levels : fake_levels;
de046644
DC
2259 unsigned int min_free;
2260
b75bb1bd
DW
2261 ASSERT(mp->m_ag_maxlevels > 0);
2262
de046644 2263 /* space needed by-bno freespace btree */
b75bb1bd 2264 min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
de046644
DC
2265 mp->m_ag_maxlevels);
2266 /* space needed by-size freespace btree */
b75bb1bd 2267 min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
de046644 2268 mp->m_ag_maxlevels);
b8a8d6e5
DW
2269 /* space needed reverse mapping used space btree */
2270 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
b75bb1bd
DW
2271 min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
2272 mp->m_rmap_maxlevels);
de046644
DC
2273
2274 return min_free;
2275}
2276
5515b7c1
DC
2277/*
2278 * Check if the operation we are fixing up the freelist for should go ahead or
2279 * not. If we are freeing blocks, we always allow it, otherwise the allocation
2280 * is dependent on whether the size and shape of free space available will
2281 * permit the requested allocation to take place.
2282 */
2283static bool
2284xfs_alloc_space_available(
2285 struct xfs_alloc_arg *args,
2286 xfs_extlen_t min_free,
2287 int flags)
2288{
2289 struct xfs_perag *pag = args->pag;
3fe4a6dd 2290 xfs_extlen_t alloc_len, longest;
cf8ce220 2291 xfs_extlen_t reservation; /* blocks that are still reserved */
5515b7c1 2292 int available;
580a4380 2293 xfs_extlen_t agflcount;
5515b7c1
DC
2294
2295 if (flags & XFS_ALLOC_FLAG_FREEING)
2296 return true;
2297
cf8ce220
DW
2298 reservation = xfs_ag_resv_needed(pag, args->resv);
2299
5515b7c1 2300 /* do we have enough contiguous free space for the allocation? */
3fe4a6dd 2301 alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
1421de38 2302 longest = xfs_alloc_longest_free_extent(pag, min_free, reservation);
3fe4a6dd 2303 if (longest < alloc_len)
5515b7c1
DC
2304 return false;
2305
580a4380
BF
2306 /*
2307 * Do we have enough free space remaining for the allocation? Don't
2308 * account extra agfl blocks because we are about to defer free them,
2309 * making them unavailable until the current transaction commits.
2310 */
2311 agflcount = min_t(xfs_extlen_t, pag->pagf_flcount, min_free);
2312 available = (int)(pag->pagf_freeblks + agflcount -
2c003dc2 2313 reservation - min_free - args->minleft);
3fe4a6dd 2314 if (available < (int)max(args->total, alloc_len))
5515b7c1
DC
2315 return false;
2316
2c003dc2
CH
2317 /*
2318 * Clamp maxlen to the amount of free space available for the actual
2319 * extent allocation.
2320 */
2321 if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) {
2322 args->maxlen = available;
2323 ASSERT(args->maxlen > 0);
2324 ASSERT(args->maxlen >= args->minlen);
2325 }
2326
5515b7c1
DC
2327 return true;
2328}
2329
30c8be8a
BF
2330int
2331xfs_free_agfl_block(
2332 struct xfs_trans *tp,
2333 xfs_agnumber_t agno,
2334 xfs_agblock_t agbno,
2335 struct xfs_buf *agbp,
2336 struct xfs_owner_info *oinfo)
2337{
2338 int error;
2339 struct xfs_buf *bp;
2340
2341 error = xfs_free_ag_extent(tp, agbp, agno, agbno, 1, oinfo,
2342 XFS_AG_RESV_AGFL);
2343 if (error)
2344 return error;
2345
78fcd346
DW
2346 error = xfs_trans_get_buf(tp, tp->t_mountp->m_ddev_targp,
2347 XFS_AGB_TO_DADDR(tp->t_mountp, agno, agbno),
2348 tp->t_mountp->m_bsize, 0, &bp);
2349 if (error)
2350 return error;
30c8be8a
BF
2351 xfs_trans_binval(tp, bp);
2352
2353 return 0;
2354}
2355
8dbee8f5
BF
2356/*
2357 * Check the agfl fields of the agf for inconsistency or corruption. The purpose
2358 * is to detect an agfl header padding mismatch between current and early v5
2359 * kernels. This problem manifests as a 1-slot size difference between the
2360 * on-disk flcount and the active [first, last] range of a wrapped agfl. This
2361 * may also catch variants of agfl count corruption unrelated to padding. Either
2362 * way, we'll reset the agfl and warn the user.
2363 *
2364 * Return true if a reset is required before the agfl can be used, false
2365 * otherwise.
2366 */
2367static bool
2368xfs_agfl_needs_reset(
2369 struct xfs_mount *mp,
2370 struct xfs_agf *agf)
2371{
2372 uint32_t f = be32_to_cpu(agf->agf_flfirst);
2373 uint32_t l = be32_to_cpu(agf->agf_fllast);
2374 uint32_t c = be32_to_cpu(agf->agf_flcount);
2375 int agfl_size = xfs_agfl_size(mp);
2376 int active;
2377
2378 /* no agfl header on v4 supers */
2379 if (!xfs_sb_version_hascrc(&mp->m_sb))
2380 return false;
2381
2382 /*
2383 * The agf read verifier catches severe corruption of these fields.
2384 * Repeat some sanity checks to cover a packed -> unpacked mismatch if
2385 * the verifier allows it.
2386 */
2387 if (f >= agfl_size || l >= agfl_size)
2388 return true;
2389 if (c > agfl_size)
2390 return true;
2391
2392 /*
2393 * Check consistency between the on-disk count and the active range. An
2394 * agfl padding mismatch manifests as an inconsistent flcount.
2395 */
2396 if (c && l >= f)
2397 active = l - f + 1;
2398 else if (c)
2399 active = agfl_size - f + l + 1;
2400 else
2401 active = 0;
2402
2403 return active != c;
2404}
2405
2406/*
2407 * Reset the agfl to an empty state. Ignore/drop any existing blocks since the
2408 * agfl content cannot be trusted. Warn the user that a repair is required to
2409 * recover leaked blocks.
2410 *
2411 * The purpose of this mechanism is to handle filesystems affected by the agfl
2412 * header padding mismatch problem. A reset keeps the filesystem online with a
2413 * relatively minor free space accounting inconsistency rather than suffer the
2414 * inevitable crash from use of an invalid agfl block.
2415 */
2416static void
2417xfs_agfl_reset(
2418 struct xfs_trans *tp,
2419 struct xfs_buf *agbp,
2420 struct xfs_perag *pag)
2421{
2422 struct xfs_mount *mp = tp->t_mountp;
2423 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
2424
2425 ASSERT(pag->pagf_agflreset);
2426 trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_);
2427
2428 xfs_warn(mp,
2429 "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. "
2430 "Please unmount and run xfs_repair.",
2431 pag->pag_agno, pag->pagf_flcount);
2432
2433 agf->agf_flfirst = 0;
2434 agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1);
2435 agf->agf_flcount = 0;
2436 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST |
2437 XFS_AGF_FLCOUNT);
2438
2439 pag->pagf_flcount = 0;
2440 pag->pagf_agflreset = false;
2441}
2442
d5c1b462
BF
2443/*
2444 * Defer an AGFL block free. This is effectively equivalent to
2445 * xfs_bmap_add_free() with some special handling particular to AGFL blocks.
2446 *
2447 * Deferring AGFL frees helps prevent log reservation overruns due to too many
2448 * allocation operations in a transaction. AGFL frees are prone to this problem
2449 * because for one they are always freed one at a time. Further, an immediate
2450 * AGFL block free can cause a btree join and require another block free before
2451 * the real allocation can proceed. Deferring the free disconnects freeing up
2452 * the AGFL slot from freeing the block.
2453 */
2454STATIC void
2455xfs_defer_agfl_block(
21375e5d 2456 struct xfs_trans *tp,
d5c1b462
BF
2457 xfs_agnumber_t agno,
2458 xfs_fsblock_t agbno,
2459 struct xfs_owner_info *oinfo)
2460{
21375e5d 2461 struct xfs_mount *mp = tp->t_mountp;
d5c1b462
BF
2462 struct xfs_extent_free_item *new; /* new element */
2463
2464 ASSERT(xfs_bmap_free_item_zone != NULL);
2465 ASSERT(oinfo != NULL);
2466
6cd1e6db 2467 new = kmem_zone_alloc(xfs_bmap_free_item_zone, 0);
d5c1b462
BF
2468 new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno);
2469 new->xefi_blockcount = 1;
2470 new->xefi_oinfo = *oinfo;
2471
2472 trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
2473
21375e5d 2474 xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &new->xefi_list);
d5c1b462
BF
2475}
2476
2bd0ea18
NS
2477/*
2478 * Decide whether to use this allocation group for this allocation.
2479 * If so, fix up the btree freelist's size.
2bd0ea18 2480 */
ff105f75 2481int /* error */
2bd0ea18 2482xfs_alloc_fix_freelist(
c98e644e
DC
2483 struct xfs_alloc_arg *args, /* allocation argument structure */
2484 int flags) /* XFS_ALLOC_FLAG_... */
2bd0ea18 2485{
c98e644e
DC
2486 struct xfs_mount *mp = args->mp;
2487 struct xfs_perag *pag = args->pag;
2488 struct xfs_trans *tp = args->tp;
2489 struct xfs_buf *agbp = NULL;
2490 struct xfs_buf *agflbp = NULL;
2491 struct xfs_alloc_arg targs; /* local allocation arguments */
2492 xfs_agblock_t bno; /* freelist block */
2493 xfs_extlen_t need; /* total blocks needed in freelist */
fcdd428c 2494 int error = 0;
c98e644e 2495
d85d3424
BF
2496 /* deferred ops (AGFL block frees) require permanent transactions */
2497 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
2498
2bd0ea18 2499 if (!pag->pagf_init) {
c98e644e
DC
2500 error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
2501 if (error)
2502 goto out_no_agbp;
2bd0ea18 2503 if (!pag->pagf_init) {
5e656dbb
BN
2504 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
2505 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
c98e644e 2506 goto out_agbp_relse;
2bd0ea18 2507 }
c98e644e 2508 }
34317449 2509
5e656dbb 2510 /*
c98e644e
DC
2511 * If this is a metadata preferred pag and we are user data then try
2512 * somewhere else if we are not being asked to try harder at this
2513 * point
34317449 2514 */
a85522b6 2515 if (pag->pagf_metadata && (args->datatype & XFS_ALLOC_USERDATA) &&
5e656dbb
BN
2516 (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
2517 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
c98e644e 2518 goto out_agbp_relse;
34317449
NS
2519 }
2520
de046644 2521 need = xfs_alloc_min_freelist(mp, pag);
2c003dc2
CH
2522 if (!xfs_alloc_space_available(args, need, flags |
2523 XFS_ALLOC_FLAG_CHECK))
c98e644e 2524 goto out_agbp_relse;
5e656dbb 2525
2bd0ea18
NS
2526 /*
2527 * Get the a.g. freespace buffer.
2528 * Can fail if we're not blocking on locks, and it's held.
2529 */
c98e644e
DC
2530 if (!agbp) {
2531 error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
2532 if (error)
2533 goto out_no_agbp;
2534 if (!agbp) {
5e656dbb
BN
2535 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
2536 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
c98e644e 2537 goto out_no_agbp;
2bd0ea18
NS
2538 }
2539 }
72bda06d 2540
8dbee8f5
BF
2541 /* reset a padding mismatched agfl before final free space check */
2542 if (pag->pagf_agflreset)
2543 xfs_agfl_reset(tp, agbp, pag);
2544
72bda06d 2545 /* If there isn't enough total space or single-extent, reject it. */
de046644 2546 need = xfs_alloc_min_freelist(mp, pag);
c98e644e
DC
2547 if (!xfs_alloc_space_available(args, need, flags))
2548 goto out_agbp_relse;
5515b7c1 2549
2bd0ea18
NS
2550 /*
2551 * Make the freelist shorter if it's too long.
72bda06d 2552 *
c98e644e
DC
2553 * Note that from this point onwards, we will always release the agf and
2554 * agfl buffers on error. This handles the case where we error out and
2555 * the buffers are clean or may not have been joined to the transaction
2556 * and hence need to be released manually. If they have been joined to
2557 * the transaction, then xfs_trans_brelse() will handle them
2558 * appropriately based on the recursion count and dirty state of the
2559 * buffer.
2560 *
72bda06d
DC
2561 * XXX (dgc): When we have lots of free space, does this buy us
2562 * anything other than extra overhead when we need to put more blocks
2563 * back on the free list? Maybe we should only do this when space is
2564 * getting low or the AGFL is more than half full?
e365af6f
DW
2565 *
2566 * The NOSHRINK flag prevents the AGFL from being shrunk if it's too
2567 * big; the NORMAP flag prevents AGFL expand/shrink operations from
2568 * updating the rmapbt. Both flags are used in xfs_repair while we're
2569 * rebuilding the rmapbt, and neither are used by the kernel. They're
2570 * both required to ensure that rmaps are correctly recorded for the
2571 * regenerated AGFL, bnobt, and cntbt. See repair/phase5.c and
2572 * repair/rmap.c in xfsprogs for details.
2bd0ea18 2573 */
e365af6f 2574 memset(&targs, 0, sizeof(targs));
007347e3 2575 /* struct copy below */
e365af6f 2576 if (flags & XFS_ALLOC_FLAG_NORMAP)
007347e3 2577 targs.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
e365af6f 2578 else
007347e3 2579 targs.oinfo = XFS_RMAP_OINFO_AG;
e365af6f 2580 while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) {
5e656dbb
BN
2581 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
2582 if (error)
c98e644e 2583 goto out_agbp_relse;
30c8be8a 2584
6dc24128
BF
2585 /* defer agfl frees */
2586 xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
2bd0ea18 2587 }
72bda06d 2588
2bd0ea18
NS
2589 targs.tp = tp;
2590 targs.mp = mp;
2591 targs.agbp = agbp;
2592 targs.agno = args->agno;
cf8ce220 2593 targs.alignment = targs.minlen = targs.prod = 1;
2bd0ea18
NS
2594 targs.type = XFS_ALLOCTYPE_THIS_AG;
2595 targs.pag = pag;
72bda06d
DC
2596 error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp);
2597 if (error)
c98e644e 2598 goto out_agbp_relse;
72bda06d
DC
2599
2600 /* Make the freelist longer if it's too short. */
2601 while (pag->pagf_flcount < need) {
2bd0ea18 2602 targs.agbno = 0;
72bda06d 2603 targs.maxlen = need - pag->pagf_flcount;
9760cac2 2604 targs.resv = XFS_AG_RESV_AGFL;
72bda06d
DC
2605
2606 /* Allocate as many blocks as possible at once. */
2607 error = xfs_alloc_ag_vextent(&targs);
c98e644e
DC
2608 if (error)
2609 goto out_agflbp_relse;
2610
2bd0ea18 2611 /*
dfc130f3
RC
2612 * Stop if we run out. Won't happen if callers are obeying
2613 * the restrictions correctly. Can happen for free calls
2bd0ea18
NS
2614 * on a completely full ag.
2615 */
5e656dbb
BN
2616 if (targs.agbno == NULLAGBLOCK) {
2617 if (flags & XFS_ALLOC_FLAG_FREEING)
2618 break;
c98e644e 2619 goto out_agflbp_relse;
5e656dbb 2620 }
2bd0ea18
NS
2621 /*
2622 * Put each allocated block on the list.
2623 */
2624 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
5e656dbb
BN
2625 error = xfs_alloc_put_freelist(tp, agbp,
2626 agflbp, bno, 0);
2627 if (error)
c98e644e 2628 goto out_agflbp_relse;
2bd0ea18
NS
2629 }
2630 }
cb4deb22 2631 xfs_trans_brelse(tp, agflbp);
2bd0ea18
NS
2632 args->agbp = agbp;
2633 return 0;
c98e644e
DC
2634
2635out_agflbp_relse:
2636 xfs_trans_brelse(tp, agflbp);
2637out_agbp_relse:
2638 if (agbp)
2639 xfs_trans_brelse(tp, agbp);
2640out_no_agbp:
2641 args->agbp = NULL;
2642 return error;
2bd0ea18
NS
2643}
2644
2645/*
2646 * Get a block from the freelist.
2647 * Returns with the buffer for the block gotten.
2648 */
2649int /* error */
2650xfs_alloc_get_freelist(
2651 xfs_trans_t *tp, /* transaction pointer */
2652 xfs_buf_t *agbp, /* buffer containing the agf structure */
cdded3d8
DC
2653 xfs_agblock_t *bnop, /* block address retrieved from freelist */
2654 int btreeblk) /* destination is a AGF btree */
2bd0ea18
NS
2655{
2656 xfs_agf_t *agf; /* a.g. freespace structure */
2bd0ea18
NS
2657 xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */
2658 xfs_agblock_t bno; /* block number returned */
dd5b876e 2659 __be32 *agfl_bno;
2bd0ea18 2660 int error;
cdded3d8 2661 int logflags;
dd5b876e 2662 xfs_mount_t *mp = tp->t_mountp;
2bd0ea18
NS
2663 xfs_perag_t *pag; /* per allocation group data */
2664
2bd0ea18
NS
2665 /*
2666 * Freelist is empty, give up.
2667 */
dd5b876e 2668 agf = XFS_BUF_TO_AGF(agbp);
46eca962 2669 if (!agf->agf_flcount) {
2bd0ea18
NS
2670 *bnop = NULLAGBLOCK;
2671 return 0;
2672 }
2673 /*
2674 * Read the array of free blocks.
2675 */
dd5b876e
DC
2676 error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
2677 &agflbp);
2678 if (error)
2bd0ea18 2679 return error;
dd5b876e
DC
2680
2681
2bd0ea18
NS
2682 /*
2683 * Get the block number and update the data structures.
2684 */
dd5b876e
DC
2685 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2686 bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
5e656dbb 2687 be32_add_cpu(&agf->agf_flfirst, 1);
2bd0ea18 2688 xfs_trans_brelse(tp, agflbp);
b8165508 2689 if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
46eca962 2690 agf->agf_flfirst = 0;
56b2de80
DC
2691
2692 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
8dbee8f5 2693 ASSERT(!pag->pagf_agflreset);
5e656dbb 2694 be32_add_cpu(&agf->agf_flcount, -1);
2bd0ea18
NS
2695 xfs_trans_agflist_delta(tp, -1);
2696 pag->pagf_flcount--;
cdded3d8
DC
2697
2698 logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2699 if (btreeblk) {
5e656dbb 2700 be32_add_cpu(&agf->agf_btreeblks, 1);
cdded3d8
DC
2701 pag->pagf_btreeblks++;
2702 logflags |= XFS_AGF_BTREEBLKS;
2703 }
8648192f 2704 xfs_perag_put(pag);
cdded3d8 2705
cdded3d8 2706 xfs_alloc_log_agf(tp, agbp, logflags);
2bd0ea18 2707 *bnop = bno;
3e535bba 2708
2bd0ea18
NS
2709 return 0;
2710}
2711
2712/*
2713 * Log the given fields from the agf structure.
2714 */
2715void
2716xfs_alloc_log_agf(
2717 xfs_trans_t *tp, /* transaction pointer */
2718 xfs_buf_t *bp, /* buffer for a.g. freelist header */
dfc130f3 2719 int fields) /* mask of fields to be logged (XFS_AGF_...) */
2bd0ea18
NS
2720{
2721 int first; /* first byte offset */
2722 int last; /* last byte offset */
2723 static const short offsets[] = {
2724 offsetof(xfs_agf_t, agf_magicnum),
2725 offsetof(xfs_agf_t, agf_versionnum),
2726 offsetof(xfs_agf_t, agf_seqno),
2727 offsetof(xfs_agf_t, agf_length),
2728 offsetof(xfs_agf_t, agf_roots[0]),
2729 offsetof(xfs_agf_t, agf_levels[0]),
2730 offsetof(xfs_agf_t, agf_flfirst),
2731 offsetof(xfs_agf_t, agf_fllast),
2732 offsetof(xfs_agf_t, agf_flcount),
2733 offsetof(xfs_agf_t, agf_freeblks),
2734 offsetof(xfs_agf_t, agf_longest),
cdded3d8 2735 offsetof(xfs_agf_t, agf_btreeblks),
dd5b876e 2736 offsetof(xfs_agf_t, agf_uuid),
8511b71a 2737 offsetof(xfs_agf_t, agf_rmap_blocks),
bc859611
DW
2738 offsetof(xfs_agf_t, agf_refcount_blocks),
2739 offsetof(xfs_agf_t, agf_refcount_root),
2740 offsetof(xfs_agf_t, agf_refcount_level),
8511b71a
DW
2741 /* needed so that we don't log the whole rest of the structure: */
2742 offsetof(xfs_agf_t, agf_spare64),
2bd0ea18
NS
2743 sizeof(xfs_agf_t)
2744 };
2745
56b2de80
DC
2746 trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
2747
bdc16ee5 2748 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
dd5b876e 2749
2bd0ea18
NS
2750 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2751 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2752}
2753
2754/*
2755 * Interface for inode allocation to force the pag data to be initialized.
2756 */
2757int /* error */
2758xfs_alloc_pagf_init(
2759 xfs_mount_t *mp, /* file system mount structure */
2760 xfs_trans_t *tp, /* transaction pointer */
2761 xfs_agnumber_t agno, /* allocation group number */
2762 int flags) /* XFS_ALLOC_FLAGS_... */
2763{
7a3bffe4 2764 xfs_buf_t *bp;
2bd0ea18
NS
2765 int error;
2766
0e266570 2767 if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)))
2bd0ea18
NS
2768 return error;
2769 if (bp)
2770 xfs_trans_brelse(tp, bp);
2771 return 0;
2772}
2773
2774/*
2775 * Put the block on the freelist for the allocation group.
2776 */
2777int /* error */
2778xfs_alloc_put_freelist(
2779 xfs_trans_t *tp, /* transaction pointer */
2780 xfs_buf_t *agbp, /* buffer for a.g. freelist header */
2781 xfs_buf_t *agflbp,/* buffer for a.g. free block array */
cdded3d8
DC
2782 xfs_agblock_t bno, /* block being freed */
2783 int btreeblk) /* block came from a AGF btree */
2bd0ea18
NS
2784{
2785 xfs_agf_t *agf; /* a.g. freespace structure */
5e656dbb 2786 __be32 *blockp;/* pointer to array entry */
2bd0ea18 2787 int error;
cdded3d8 2788 int logflags;
2bd0ea18
NS
2789 xfs_mount_t *mp; /* mount structure */
2790 xfs_perag_t *pag; /* per allocation group data */
dd5b876e
DC
2791 __be32 *agfl_bno;
2792 int startoff;
2bd0ea18
NS
2793
2794 agf = XFS_BUF_TO_AGF(agbp);
2795 mp = tp->t_mountp;
2796
2797 if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
6e3140c7 2798 be32_to_cpu(agf->agf_seqno), &agflbp)))
2bd0ea18 2799 return error;
5e656dbb 2800 be32_add_cpu(&agf->agf_fllast, 1);
b8165508 2801 if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp))
46eca962 2802 agf->agf_fllast = 0;
56b2de80
DC
2803
2804 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
8dbee8f5 2805 ASSERT(!pag->pagf_agflreset);
5e656dbb 2806 be32_add_cpu(&agf->agf_flcount, 1);
2bd0ea18
NS
2807 xfs_trans_agflist_delta(tp, 1);
2808 pag->pagf_flcount++;
cdded3d8
DC
2809
2810 logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2811 if (btreeblk) {
5e656dbb 2812 be32_add_cpu(&agf->agf_btreeblks, -1);
cdded3d8
DC
2813 pag->pagf_btreeblks--;
2814 logflags |= XFS_AGF_BTREEBLKS;
2815 }
56b2de80 2816 xfs_perag_put(pag);
cdded3d8 2817
5e656dbb
BN
2818 xfs_alloc_log_agf(tp, agbp, logflags);
2819
b8165508 2820 ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp));
dd5b876e
DC
2821
2822 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2823 blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
5e656dbb 2824 *blockp = cpu_to_be32(bno);
dd5b876e
DC
2825 startoff = (char *)blockp - (char *)agflbp->b_addr;
2826
cdded3d8 2827 xfs_alloc_log_agf(tp, agbp, logflags);
dd5b876e 2828
bdc16ee5 2829 xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
dd5b876e
DC
2830 xfs_trans_log_buf(tp, agflbp, startoff,
2831 startoff + sizeof(xfs_agblock_t) - 1);
2bd0ea18
NS
2832 return 0;
2833}
2834
bc01119d 2835static xfs_failaddr_t
a2ceac1f 2836xfs_agf_verify(
95d9582b
DW
2837 struct xfs_buf *bp)
2838{
7861ef77 2839 struct xfs_mount *mp = bp->b_mount;
95d9582b 2840 struct xfs_agf *agf = XFS_BUF_TO_AGF(bp);
a2ceac1f 2841
a65d8d29
BF
2842 if (xfs_sb_version_hascrc(&mp->m_sb)) {
2843 if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
bc01119d 2844 return __this_address;
a65d8d29
BF
2845 if (!xfs_log_check_lsn(mp,
2846 be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn)))
bc01119d 2847 return __this_address;
a65d8d29 2848 }
a2ceac1f 2849
68dbe77f
BF
2850 if (!xfs_verify_magic(bp, agf->agf_magicnum))
2851 return __this_address;
2852
2853 if (!(XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
dd5b876e 2854 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
b8165508
DC
2855 be32_to_cpu(agf->agf_flfirst) < xfs_agfl_size(mp) &&
2856 be32_to_cpu(agf->agf_fllast) < xfs_agfl_size(mp) &&
2857 be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp)))
bc01119d 2858 return __this_address;
a2ceac1f 2859
00795aae
DW
2860 if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
2861 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
2862 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
5a35bf2c 2863 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
bc01119d 2864 return __this_address;
5a35bf2c 2865
e37838e5 2866 if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
00795aae
DW
2867 (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
2868 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS))
bc01119d 2869 return __this_address;
e37838e5 2870
a2ceac1f
DC
2871 /*
2872 * during growfs operations, the perag is not fully initialised,
2873 * so we can't use it for any useful checking. growfs ensures we can't
2874 * use it by using uncached buffers that don't have the perag attached
2875 * so we can detect and avoid this problem.
2876 */
dd5b876e 2877 if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
bc01119d 2878 return __this_address;
a2ceac1f 2879
dd5b876e
DC
2880 if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
2881 be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
bc01119d 2882 return __this_address;
dd5b876e 2883
88ce0792 2884 if (xfs_sb_version_hasreflink(&mp->m_sb) &&
00795aae
DW
2885 (be32_to_cpu(agf->agf_refcount_level) < 1 ||
2886 be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS))
bc01119d 2887 return __this_address;
88ce0792 2888
bc01119d 2889 return NULL;
a2ceac1f 2890
a2ceac1f
DC
2891}
2892
2893static void
2894xfs_agf_read_verify(
2895 struct xfs_buf *bp)
2896{
7861ef77 2897 struct xfs_mount *mp = bp->b_mount;
1e697959 2898 xfs_failaddr_t fa;
dd5b876e 2899
45922933
DC
2900 if (xfs_sb_version_hascrc(&mp->m_sb) &&
2901 !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
1e697959
DW
2902 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
2903 else {
95d9582b 2904 fa = xfs_agf_verify(bp);
1e697959
DW
2905 if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF))
2906 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
2907 }
a2ceac1f
DC
2908}
2909
2910static void
2911xfs_agf_write_verify(
2912 struct xfs_buf *bp)
2913{
7861ef77 2914 struct xfs_mount *mp = bp->b_mount;
37d086ca 2915 struct xfs_buf_log_item *bip = bp->b_log_item;
1e697959 2916 xfs_failaddr_t fa;
dd5b876e 2917
95d9582b 2918 fa = xfs_agf_verify(bp);
1e697959
DW
2919 if (fa) {
2920 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
dd5b876e
DC
2921 return;
2922 }
2923
2924 if (!xfs_sb_version_hascrc(&mp->m_sb))
2925 return;
2926
2927 if (bip)
2928 XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
2929
43b5aeed 2930 xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
a2ceac1f
DC
2931}
2932
2933const struct xfs_buf_ops xfs_agf_buf_ops = {
a3fac935 2934 .name = "xfs_agf",
68dbe77f 2935 .magic = { cpu_to_be32(XFS_AGF_MAGIC), cpu_to_be32(XFS_AGF_MAGIC) },
a2ceac1f
DC
2936 .verify_read = xfs_agf_read_verify,
2937 .verify_write = xfs_agf_write_verify,
95d9582b 2938 .verify_struct = xfs_agf_verify,
a2ceac1f
DC
2939};
2940
2bd0ea18
NS
2941/*
2942 * Read in the allocation group header (free/alloc section).
2943 */
2944int /* error */
56b2de80
DC
2945xfs_read_agf(
2946 struct xfs_mount *mp, /* mount point structure */
2947 struct xfs_trans *tp, /* transaction pointer */
2948 xfs_agnumber_t agno, /* allocation group number */
2949 int flags, /* XFS_BUF_ */
2950 struct xfs_buf **bpp) /* buffer for the ag freelist header */
2bd0ea18 2951{
9440d84d 2952 int error;
2bd0ea18 2953
ff105f75
DC
2954 trace_xfs_read_agf(mp, agno);
2955
2bd0ea18 2956 ASSERT(agno != NULLAGNUMBER);
3e6069a1 2957 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
9440d84d 2958 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
a2ceac1f 2959 XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
3e6069a1
DW
2960 /*
2961 * Callers of xfs_read_agf() currently interpret a NULL bpp as EAGAIN
2962 * and need to be converted to check for EAGAIN specifically.
2963 */
2964 if (error == -EAGAIN)
2965 return 0;
9440d84d 2966 if (error)
2bd0ea18 2967 return error;
56b2de80 2968
a2ceac1f
DC
2969 ASSERT(!(*bpp)->b_error);
2970 xfs_buf_set_ref(*bpp, XFS_AGF_REF);
56b2de80
DC
2971 return 0;
2972}
2973
2974/*
2975 * Read in the allocation group header (free/alloc section).
2976 */
2977int /* error */
2978xfs_alloc_read_agf(
2979 struct xfs_mount *mp, /* mount point structure */
2980 struct xfs_trans *tp, /* transaction pointer */
2981 xfs_agnumber_t agno, /* allocation group number */
2982 int flags, /* XFS_ALLOC_FLAG_... */
2983 struct xfs_buf **bpp) /* buffer for the ag freelist header */
2984{
2985 struct xfs_agf *agf; /* ag freelist header */
2986 struct xfs_perag *pag; /* per allocation group data */
2987 int error;
2988
ff105f75 2989 trace_xfs_alloc_read_agf(mp, agno);
56b2de80 2990
ff105f75 2991 ASSERT(agno != NULLAGNUMBER);
56b2de80
DC
2992 error = xfs_read_agf(mp, tp, agno,
2993 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
2994 bpp);
2995 if (error)
2996 return error;
2997 if (!*bpp)
2998 return 0;
a2ceac1f 2999 ASSERT(!(*bpp)->b_error);
56b2de80
DC
3000
3001 agf = XFS_BUF_TO_AGF(*bpp);
3002 pag = xfs_perag_get(mp, agno);
2bd0ea18 3003 if (!pag->pagf_init) {
6e3140c7 3004 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
cdded3d8 3005 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
6e3140c7
NS
3006 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
3007 pag->pagf_longest = be32_to_cpu(agf->agf_longest);
2bd0ea18 3008 pag->pagf_levels[XFS_BTNUM_BNOi] =
6e3140c7 3009 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
2bd0ea18 3010 pag->pagf_levels[XFS_BTNUM_CNTi] =
6e3140c7 3011 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
e37838e5
DW
3012 pag->pagf_levels[XFS_BTNUM_RMAPi] =
3013 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
88ce0792 3014 pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
2bd0ea18 3015 pag->pagf_init = 1;
8dbee8f5 3016 pag->pagf_agflreset = xfs_agfl_needs_reset(mp, agf);
2bd0ea18
NS
3017 }
3018#ifdef DEBUG
3019 else if (!XFS_FORCED_SHUTDOWN(mp)) {
6e3140c7 3020 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
cdded3d8 3021 ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
6e3140c7
NS
3022 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
3023 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
2bd0ea18 3024 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
6e3140c7 3025 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
2bd0ea18 3026 ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
6e3140c7 3027 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
2bd0ea18
NS
3028 }
3029#endif
56b2de80 3030 xfs_perag_put(pag);
2bd0ea18
NS
3031 return 0;
3032}
3033
3034/*
3035 * Allocate an extent (variable-size).
3036 * Depending on the allocation type, we either look in a single allocation
3037 * group or loop over the allocation groups to find the result.
3038 */
3039int /* error */
3040xfs_alloc_vextent(
03aeb29d 3041 struct xfs_alloc_arg *args) /* allocation argument structure */
2bd0ea18 3042{
03aeb29d
BF
3043 xfs_agblock_t agsize; /* allocation group size */
3044 int error;
3045 int flags; /* XFS_ALLOC_FLAG_... locking flags */
3046 struct xfs_mount *mp; /* mount structure pointer */
3047 xfs_agnumber_t sagno; /* starting allocation group number */
3048 xfs_alloctype_t type; /* input allocation type */
3049 int bump_rotor = 0;
3050 xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */
2bd0ea18
NS
3051
3052 mp = args->mp;
3053 type = args->otype = args->type;
3054 args->agbno = NULLAGBLOCK;
3055 /*
3056 * Just fix this up, for the case where the last a.g. is shorter
3057 * (or there's only one a.g.) and the caller couldn't easily figure
3058 * that out (xfs_bmap_alloc).
3059 */
3060 agsize = mp->m_sb.sb_agblocks;
3061 if (args->maxlen > agsize)
3062 args->maxlen = agsize;
3063 if (args->alignment == 0)
3064 args->alignment = 1;
3065 ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
3066 ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
3067 ASSERT(args->minlen <= args->maxlen);
3068 ASSERT(args->minlen <= agsize);
3069 ASSERT(args->mod < args->prod);
3070 if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
3071 XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
3072 args->minlen > args->maxlen || args->minlen > agsize ||
3073 args->mod >= args->prod) {
3074 args->fsbno = NULLFSBLOCK;
56b2de80 3075 trace_xfs_alloc_vextent_badargs(args);
2bd0ea18
NS
3076 return 0;
3077 }
9baa549b 3078
2bd0ea18
NS
3079 switch (type) {
3080 case XFS_ALLOCTYPE_THIS_AG:
3081 case XFS_ALLOCTYPE_NEAR_BNO:
3082 case XFS_ALLOCTYPE_THIS_BNO:
3083 /*
3084 * These three force us into a single a.g.
3085 */
3086 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
56b2de80 3087 args->pag = xfs_perag_get(mp, args->agno);
2bd0ea18 3088 error = xfs_alloc_fix_freelist(args, 0);
2bd0ea18 3089 if (error) {
56b2de80 3090 trace_xfs_alloc_vextent_nofix(args);
2bd0ea18
NS
3091 goto error0;
3092 }
3093 if (!args->agbp) {
56b2de80 3094 trace_xfs_alloc_vextent_noagbp(args);
2bd0ea18
NS
3095 break;
3096 }
3097 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
0e266570 3098 if ((error = xfs_alloc_ag_vextent(args)))
2bd0ea18 3099 goto error0;
2bd0ea18
NS
3100 break;
3101 case XFS_ALLOCTYPE_START_BNO:
3102 /*
3103 * Try near allocation first, then anywhere-in-ag after
3104 * the first a.g. fails.
3105 */
1fccd5c8 3106 if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
34317449 3107 (mp->m_flags & XFS_MOUNT_32BITINODES)) {
46eca962
NS
3108 args->fsbno = XFS_AGB_TO_FSB(mp,
3109 ((mp->m_agfrotor / rotorstep) %
3110 mp->m_sb.sb_agcount), 0);
34317449
NS
3111 bump_rotor = 1;
3112 }
2bd0ea18
NS
3113 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
3114 args->type = XFS_ALLOCTYPE_NEAR_BNO;
3115 /* FALLTHROUGH */
2bd0ea18
NS
3116 case XFS_ALLOCTYPE_FIRST_AG:
3117 /*
3118 * Rotate through the allocation groups looking for a winner.
3119 */
f3eda3a5 3120 if (type == XFS_ALLOCTYPE_FIRST_AG) {
2bd0ea18
NS
3121 /*
3122 * Start with allocation group given by bno.
3123 */
3124 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
3125 args->type = XFS_ALLOCTYPE_THIS_AG;
3126 sagno = 0;
3127 flags = 0;
3128 } else {
2bd0ea18
NS
3129 /*
3130 * Start with the given allocation group.
3131 */
3132 args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
3133 flags = XFS_ALLOC_FLAG_TRYLOCK;
3134 }
3135 /*
3136 * Loop over allocation groups twice; first time with
3137 * trylock set, second time without.
3138 */
3139 for (;;) {
56b2de80 3140 args->pag = xfs_perag_get(mp, args->agno);
9baa549b 3141 error = xfs_alloc_fix_freelist(args, flags);
9baa549b 3142 if (error) {
56b2de80 3143 trace_xfs_alloc_vextent_nofix(args);
2bd0ea18
NS
3144 goto error0;
3145 }
3146 /*
3147 * If we get a buffer back then the allocation will fly.
3148 */
3149 if (args->agbp) {
0e266570 3150 if ((error = xfs_alloc_ag_vextent(args)))
2bd0ea18 3151 goto error0;
2bd0ea18
NS
3152 break;
3153 }
56b2de80
DC
3154
3155 trace_xfs_alloc_vextent_loopfailed(args);
3156
2bd0ea18
NS
3157 /*
3158 * Didn't work, figure out the next iteration.
3159 */
3160 if (args->agno == sagno &&
3161 type == XFS_ALLOCTYPE_START_BNO)
3162 args->type = XFS_ALLOCTYPE_THIS_AG;
5e656dbb
BN
3163 /*
3164 * For the first allocation, we can try any AG to get
3165 * space. However, if we already have allocated a
3166 * block, we don't want to try AGs whose number is below
3167 * sagno. Otherwise, we may end up with out-of-order
3168 * locking of AGF, which might cause deadlock.
3169 */
3170 if (++(args->agno) == mp->m_sb.sb_agcount) {
03aeb29d 3171 if (args->tp->t_firstblock != NULLFSBLOCK)
5e656dbb
BN
3172 args->agno = sagno;
3173 else
3174 args->agno = 0;
3175 }
5000d01d 3176 /*
2bd0ea18
NS
3177 * Reached the starting a.g., must either be done
3178 * or switch to non-trylock mode.
3179 */
3180 if (args->agno == sagno) {
a3b4a951 3181 if (flags == 0) {
2bd0ea18 3182 args->agbno = NULLAGBLOCK;
56b2de80 3183 trace_xfs_alloc_vextent_allfailed(args);
2bd0ea18
NS
3184 break;
3185 }
a3b4a951
CH
3186
3187 flags = 0;
3188 if (type == XFS_ALLOCTYPE_START_BNO) {
3189 args->agbno = XFS_FSB_TO_AGBNO(mp,
3190 args->fsbno);
3191 args->type = XFS_ALLOCTYPE_NEAR_BNO;
2bd0ea18
NS
3192 }
3193 }
56b2de80 3194 xfs_perag_put(args->pag);
2bd0ea18 3195 }
f3eda3a5 3196 if (bump_rotor) {
46eca962
NS
3197 if (args->agno == sagno)
3198 mp->m_agfrotor = (mp->m_agfrotor + 1) %
3199 (mp->m_sb.sb_agcount * rotorstep);
3200 else
3201 mp->m_agfrotor = (args->agno * rotorstep + 1) %
3202 (mp->m_sb.sb_agcount * rotorstep);
3203 }
2bd0ea18
NS
3204 break;
3205 default:
3206 ASSERT(0);
3207 /* NOTREACHED */
3208 }
3209 if (args->agbno == NULLAGBLOCK)
3210 args->fsbno = NULLFSBLOCK;
3211 else {
3212 args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
3213#ifdef DEBUG
3214 ASSERT(args->len >= args->minlen);
3215 ASSERT(args->len <= args->maxlen);
3216 ASSERT(args->agbno % args->alignment == 0);
3217 XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
3218 args->len);
3219#endif
9542ae13 3220
2bd0ea18 3221 }
56b2de80 3222 xfs_perag_put(args->pag);
2bd0ea18
NS
3223 return 0;
3224error0:
56b2de80 3225 xfs_perag_put(args->pag);
2bd0ea18
NS
3226 return error;
3227}
3228
2a6da3b8
DC
3229/* Ensure that the freelist is at full capacity. */
3230int
3231xfs_free_extent_fix_freelist(
3232 struct xfs_trans *tp,
3233 xfs_agnumber_t agno,
3234 struct xfs_buf **agbp)
2bd0ea18 3235{
2a6da3b8
DC
3236 struct xfs_alloc_arg args;
3237 int error;
2bd0ea18 3238
2a6da3b8 3239 memset(&args, 0, sizeof(struct xfs_alloc_arg));
2bd0ea18
NS
3240 args.tp = tp;
3241 args.mp = tp->t_mountp;
2a6da3b8 3242 args.agno = agno;
a2ceac1f
DC
3243
3244 /*
3245 * validate that the block number is legal - the enables us to detect
3246 * and handle a silent filesystem corruption rather than crashing.
3247 */
a2ceac1f 3248 if (args.agno >= args.mp->m_sb.sb_agcount)
12b53197 3249 return -EFSCORRUPTED;
a2ceac1f 3250
56b2de80 3251 args.pag = xfs_perag_get(args.mp, args.agno);
a2ceac1f
DC
3252 ASSERT(args.pag);
3253
3254 error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
3255 if (error)
2a6da3b8
DC
3256 goto out;
3257
3258 *agbp = args.agbp;
3259out:
3260 xfs_perag_put(args.pag);
3261 return error;
3262}
3263
3264/*
3265 * Free an extent.
3266 * Just break up the extent address and hand off to xfs_free_ag_extent
3267 * after fixing up the freelist.
3268 */
5837e73b 3269int
3a13f959 3270__xfs_free_extent(
5837e73b
DW
3271 struct xfs_trans *tp,
3272 xfs_fsblock_t bno,
3273 xfs_extlen_t len,
3274 const struct xfs_owner_info *oinfo,
3275 enum xfs_ag_resv_type type,
3276 bool skip_discard)
2a6da3b8 3277{
5837e73b
DW
3278 struct xfs_mount *mp = tp->t_mountp;
3279 struct xfs_buf *agbp;
3280 xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, bno);
3281 xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, bno);
3282 int error;
3283 unsigned int busy_flags = 0;
2a6da3b8
DC
3284
3285 ASSERT(len != 0);
9760cac2 3286 ASSERT(type != XFS_AG_RESV_AGFL);
2a6da3b8 3287
a9da40de 3288 if (XFS_TEST_ERROR(false, mp,
e2a190dd 3289 XFS_ERRTAG_FREE_EXTENT))
a9da40de
DW
3290 return -EIO;
3291
2a6da3b8
DC
3292 error = xfs_free_extent_fix_freelist(tp, agno, &agbp);
3293 if (error)
3294 return error;
3295
fbb4fa7f
DW
3296 if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) {
3297 error = -EFSCORRUPTED;
3298 goto err;
3299 }
a2ceac1f
DC
3300
3301 /* validate the extent size is legal now we have the agf locked */
fbb4fa7f
DW
3302 if (XFS_IS_CORRUPT(mp,
3303 agbno + len >
3304 be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length))) {
3305 error = -EFSCORRUPTED;
3306 goto err;
3307 }
a2ceac1f 3308
cf8ce220 3309 error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type);
2a6da3b8
DC
3310 if (error)
3311 goto err;
3312
3a13f959
BF
3313 if (skip_discard)
3314 busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD;
3315 xfs_extent_busy_insert(tp, agno, agbno, len, busy_flags);
2a6da3b8
DC
3316 return 0;
3317
3318err:
3319 xfs_trans_brelse(tp, agbp);
2bd0ea18
NS
3320 return error;
3321}
b3d83fa6
DW
3322
3323struct xfs_alloc_query_range_info {
3324 xfs_alloc_query_range_fn fn;
3325 void *priv;
3326};
3327
3328/* Format btree record and pass to our callback. */
3329STATIC int
3330xfs_alloc_query_range_helper(
3331 struct xfs_btree_cur *cur,
3332 union xfs_btree_rec *rec,
3333 void *priv)
3334{
3335 struct xfs_alloc_query_range_info *query = priv;
3336 struct xfs_alloc_rec_incore irec;
3337
3338 irec.ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
3339 irec.ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
3340 return query->fn(cur, &irec, query->priv);
3341}
3342
3343/* Find all free space within a given range of blocks. */
3344int
3345xfs_alloc_query_range(
3346 struct xfs_btree_cur *cur,
3347 struct xfs_alloc_rec_incore *low_rec,
3348 struct xfs_alloc_rec_incore *high_rec,
3349 xfs_alloc_query_range_fn fn,
3350 void *priv)
3351{
3352 union xfs_btree_irec low_brec;
3353 union xfs_btree_irec high_brec;
3354 struct xfs_alloc_query_range_info query;
3355
3356 ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
3357 low_brec.a = *low_rec;
3358 high_brec.a = *high_rec;
3359 query.priv = priv;
3360 query.fn = fn;
3361 return xfs_btree_query_range(cur, &low_brec, &high_brec,
3362 xfs_alloc_query_range_helper, &query);
3363}
7e05e856
DW
3364
3365/* Find all free space records. */
3366int
3367xfs_alloc_query_all(
3368 struct xfs_btree_cur *cur,
3369 xfs_alloc_query_range_fn fn,
3370 void *priv)
3371{
3372 struct xfs_alloc_query_range_info query;
3373
3374 ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
3375 query.priv = priv;
3376 query.fn = fn;
3377 return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
3378}
9bef6258 3379
1fe41a73
DW
3380/* Is there a record covering a given extent? */
3381int
3382xfs_alloc_has_record(
3383 struct xfs_btree_cur *cur,
3384 xfs_agblock_t bno,
3385 xfs_extlen_t len,
3386 bool *exists)
3387{
3388 union xfs_btree_irec low;
3389 union xfs_btree_irec high;
3390
3391 memset(&low, 0, sizeof(low));
3392 low.a.ar_startblock = bno;
3393 memset(&high, 0xFF, sizeof(high));
3394 high.a.ar_startblock = bno + len - 1;
3395
3396 return xfs_btree_has_record(cur, &low, &high, exists);
3397}
71a98c66
DW
3398
3399/*
3400 * Walk all the blocks in the AGFL. The @walk_fn can return any negative
4a509d6d 3401 * error code or XFS_ITER_*.
71a98c66
DW
3402 */
3403int
3404xfs_agfl_walk(
3405 struct xfs_mount *mp,
3406 struct xfs_agf *agf,
3407 struct xfs_buf *agflbp,
3408 xfs_agfl_walk_fn walk_fn,
3409 void *priv)
3410{
3411 __be32 *agfl_bno;
3412 unsigned int i;
3413 int error;
3414
3415 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
3416 i = be32_to_cpu(agf->agf_flfirst);
3417
3418 /* Nothing to walk in an empty AGFL. */
3419 if (agf->agf_flcount == cpu_to_be32(0))
3420 return 0;
3421
3422 /* Otherwise, walk from first to last, wrapping as needed. */
3423 for (;;) {
3424 error = walk_fn(mp, be32_to_cpu(agfl_bno[i]), priv);
3425 if (error)
3426 return error;
3427 if (i == be32_to_cpu(agf->agf_fllast))
3428 break;
3429 if (++i == xfs_agfl_size(mp))
3430 i = 0;
3431 }
3432
3433 return 0;
3434}