]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - libxfs/xfs_alloc.c
xfs: make the btree cursor union members named structure
[thirdparty/xfsprogs-dev.git] / libxfs / xfs_alloc.c
CommitLineData
37b3b4d6 1// SPDX-License-Identifier: GPL-2.0
2bd0ea18 2/*
5e656dbb 3 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
da23017d 4 * All Rights Reserved.
2bd0ea18 5 */
9c799827 6#include "libxfs_priv.h"
b626fb59
DC
7#include "xfs_fs.h"
8#include "xfs_format.h"
9#include "xfs_log_format.h"
10#include "xfs_shared.h"
11#include "xfs_trans_resv.h"
12#include "xfs_bit.h"
13#include "xfs_sb.h"
14#include "xfs_mount.h"
f944d3d0 15#include "xfs_defer.h"
b626fb59 16#include "xfs_btree.h"
631ac87a 17#include "xfs_rmap.h"
b626fb59
DC
18#include "xfs_alloc_btree.h"
19#include "xfs_alloc.h"
2cf10e4c 20#include "xfs_errortag.h"
b626fb59
DC
21#include "xfs_trace.h"
22#include "xfs_trans.h"
cf8ce220 23#include "xfs_ag_resv.h"
d5c1b462
BF
24#include "xfs_bmap.h"
25
26extern kmem_zone_t *xfs_bmap_free_item_zone;
2bd0ea18 27
ff105f75
DC
28struct workqueue_struct *xfs_alloc_wq;
29
2bd0ea18 30#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
5e656dbb
BN
31
32#define XFSA_FIXUP_BNO_OK 1
33#define XFSA_FIXUP_CNT_OK 2
34
5e656dbb
BN
35STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
36STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
37STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
2bd0ea18 38
b8165508
DC
39/*
40 * Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in
41 * the beginning of the block for a proper header with the location information
42 * and CRC.
43 */
44unsigned int
45xfs_agfl_size(
46 struct xfs_mount *mp)
47{
48 unsigned int size = mp->m_sb.sb_sectsize;
49
50 if (xfs_sb_version_hascrc(&mp->m_sb))
51 size -= sizeof(struct xfs_agfl);
52
53 return size / sizeof(xfs_agblock_t);
54}
55
2a96beb9
DW
56unsigned int
57xfs_refc_block(
58 struct xfs_mount *mp)
59{
60 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
61 return XFS_RMAP_BLOCK(mp) + 1;
62 if (xfs_sb_version_hasfinobt(&mp->m_sb))
63 return XFS_FIBT_BLOCK(mp) + 1;
64 return XFS_IBT_BLOCK(mp) + 1;
65}
66
ef5340cd
DW
67xfs_extlen_t
68xfs_prealloc_blocks(
69 struct xfs_mount *mp)
70{
2a96beb9
DW
71 if (xfs_sb_version_hasreflink(&mp->m_sb))
72 return xfs_refc_block(mp) + 1;
ef5340cd
DW
73 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
74 return XFS_RMAP_BLOCK(mp) + 1;
75 if (xfs_sb_version_hasfinobt(&mp->m_sb))
76 return XFS_FIBT_BLOCK(mp) + 1;
77 return XFS_IBT_BLOCK(mp) + 1;
78}
79
b8a8d6e5
DW
80/*
81 * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
82 * AGF buffer (PV 947395), we place constraints on the relationship among
83 * actual allocations for data blocks, freelist blocks, and potential file data
84 * bmap btree blocks. However, these restrictions may result in no actual space
85 * allocated for a delayed extent, for example, a data block in a certain AG is
86 * allocated but there is no additional block for the additional bmap btree
87 * block due to a split of the bmap btree of the file. The result of this may
88 * lead to an infinite loop when the file gets flushed to disk and all delayed
89 * extents need to be actually allocated. To get around this, we explicitly set
90 * aside a few blocks which will not be reserved in delayed allocation.
91 *
cf8ce220
DW
92 * We need to reserve 4 fsbs _per AG_ for the freelist and 4 more to handle a
93 * potential split of the file's bmap btree.
b8a8d6e5
DW
94 */
95unsigned int
96xfs_alloc_set_aside(
97 struct xfs_mount *mp)
98{
8eeb15ea 99 return mp->m_sb.sb_agcount * (XFS_ALLOC_AGFL_RESERVE + 4);
b8a8d6e5
DW
100}
101
102/*
103 * When deciding how much space to allocate out of an AG, we limit the
104 * allocation maximum size to the size the AG. However, we cannot use all the
105 * blocks in the AG - some are permanently used by metadata. These
106 * blocks are generally:
107 * - the AG superblock, AGF, AGI and AGFL
108 * - the AGF (bno and cnt) and AGI btree root blocks, and optionally
109 * the AGI free inode and rmap btree root blocks.
110 * - blocks on the AGFL according to xfs_alloc_set_aside() limits
111 * - the rmapbt root block
112 *
113 * The AG headers are sector sized, so the amount of space they take up is
114 * dependent on filesystem geometry. The others are all single blocks.
115 */
116unsigned int
117xfs_alloc_ag_max_usable(
118 struct xfs_mount *mp)
119{
120 unsigned int blocks;
121
122 blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
123 blocks += XFS_ALLOC_AGFL_RESERVE;
124 blocks += 3; /* AGF, AGI btree root blocks */
125 if (xfs_sb_version_hasfinobt(&mp->m_sb))
126 blocks++; /* finobt root block */
127 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
128 blocks++; /* rmap root block */
868c70e3
DW
129 if (xfs_sb_version_hasreflink(&mp->m_sb))
130 blocks++; /* refcount root block */
b8a8d6e5
DW
131
132 return mp->m_sb.sb_agblocks - blocks;
133}
134
b194c7d8
BN
135/*
136 * Lookup the record equal to [bno, len] in the btree given by cur.
137 */
138STATIC int /* error */
139xfs_alloc_lookup_eq(
140 struct xfs_btree_cur *cur, /* btree cursor */
141 xfs_agblock_t bno, /* starting block of extent */
142 xfs_extlen_t len, /* length of extent */
143 int *stat) /* success/failure */
144{
7777d8c6
BF
145 int error;
146
b194c7d8
BN
147 cur->bc_rec.a.ar_startblock = bno;
148 cur->bc_rec.a.ar_blockcount = len;
7777d8c6 149 error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
aa6fa37f 150 cur->bc_ag.priv.abt.active = (*stat == 1);
7777d8c6 151 return error;
b194c7d8
BN
152}
153
154/*
155 * Lookup the first record greater than or equal to [bno, len]
156 * in the btree given by cur.
157 */
a2ceac1f 158int /* error */
b194c7d8
BN
159xfs_alloc_lookup_ge(
160 struct xfs_btree_cur *cur, /* btree cursor */
161 xfs_agblock_t bno, /* starting block of extent */
162 xfs_extlen_t len, /* length of extent */
163 int *stat) /* success/failure */
164{
7777d8c6
BF
165 int error;
166
b194c7d8
BN
167 cur->bc_rec.a.ar_startblock = bno;
168 cur->bc_rec.a.ar_blockcount = len;
7777d8c6 169 error = xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
aa6fa37f 170 cur->bc_ag.priv.abt.active = (*stat == 1);
7777d8c6 171 return error;
b194c7d8
BN
172}
173
174/*
175 * Lookup the first record less than or equal to [bno, len]
176 * in the btree given by cur.
177 */
1fe41a73 178int /* error */
b194c7d8
BN
179xfs_alloc_lookup_le(
180 struct xfs_btree_cur *cur, /* btree cursor */
181 xfs_agblock_t bno, /* starting block of extent */
182 xfs_extlen_t len, /* length of extent */
183 int *stat) /* success/failure */
184{
7777d8c6 185 int error;
b194c7d8
BN
186 cur->bc_rec.a.ar_startblock = bno;
187 cur->bc_rec.a.ar_blockcount = len;
7777d8c6 188 error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
aa6fa37f 189 cur->bc_ag.priv.abt.active = (*stat == 1);
7777d8c6
BF
190 return error;
191}
192
193static inline bool
194xfs_alloc_cur_active(
195 struct xfs_btree_cur *cur)
196{
aa6fa37f 197 return cur && cur->bc_ag.priv.abt.active;
b194c7d8
BN
198}
199
200/*
201 * Update the record referred to by cur to the value given
202 * by [bno, len].
203 * This either works (return 0) or gets an EFSCORRUPTED error.
204 */
205STATIC int /* error */
206xfs_alloc_update(
207 struct xfs_btree_cur *cur, /* btree cursor */
208 xfs_agblock_t bno, /* starting block of extent */
209 xfs_extlen_t len) /* length of extent */
210{
211 union xfs_btree_rec rec;
212
213 rec.alloc.ar_startblock = cpu_to_be32(bno);
214 rec.alloc.ar_blockcount = cpu_to_be32(len);
215 return xfs_btree_update(cur, &rec);
216}
217
218/*
219 * Get the data from the pointed-to record.
220 */
a2ceac1f 221int /* error */
b194c7d8
BN
222xfs_alloc_get_rec(
223 struct xfs_btree_cur *cur, /* btree cursor */
224 xfs_agblock_t *bno, /* output: starting block of extent */
225 xfs_extlen_t *len, /* output: length of extent */
226 int *stat) /* output: success/failure */
227{
ec291989 228 struct xfs_mount *mp = cur->bc_mp;
aa6fa37f 229 xfs_agnumber_t agno = cur->bc_ag.agno;
b194c7d8
BN
230 union xfs_btree_rec *rec;
231 int error;
232
233 error = xfs_btree_get_rec(cur, &rec, stat);
3741f971
DW
234 if (error || !(*stat))
235 return error;
3741f971
DW
236
237 *bno = be32_to_cpu(rec->alloc.ar_startblock);
238 *len = be32_to_cpu(rec->alloc.ar_blockcount);
239
c25e4305
CM
240 if (*len == 0)
241 goto out_bad_rec;
242
ec291989
DC
243 /* check for valid extent range, including overflow */
244 if (!xfs_verify_agbno(mp, agno, *bno))
245 goto out_bad_rec;
246 if (*bno > *bno + *len)
247 goto out_bad_rec;
248 if (!xfs_verify_agbno(mp, agno, *bno + *len - 1))
249 goto out_bad_rec;
250
251 return 0;
252
253out_bad_rec:
254 xfs_warn(mp,
255 "%s Freespace BTree record corruption in AG %d detected!",
256 cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size", agno);
257 xfs_warn(mp,
258 "start block 0x%x block count 0x%x", *bno, *len);
259 return -EFSCORRUPTED;
b194c7d8
BN
260}
261
2bd0ea18
NS
262/*
263 * Compute aligned version of the found extent.
264 * Takes alignment and min length into account.
265 */
cd80de04 266STATIC bool
2bd0ea18 267xfs_alloc_compute_aligned(
a2ceac1f 268 xfs_alloc_arg_t *args, /* allocation argument structure */
2bd0ea18
NS
269 xfs_agblock_t foundbno, /* starting block in found extent */
270 xfs_extlen_t foundlen, /* length in found extent */
2bd0ea18 271 xfs_agblock_t *resbno, /* result block number */
cd80de04
CH
272 xfs_extlen_t *reslen, /* result length */
273 unsigned *busy_gen)
2bd0ea18 274{
cd80de04
CH
275 xfs_agblock_t bno = foundbno;
276 xfs_extlen_t len = foundlen;
ff3263dd 277 xfs_extlen_t diff;
cd80de04 278 bool busy;
2bd0ea18 279
a2ceac1f 280 /* Trim busy sections out of found extent */
cd80de04 281 busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen);
a2ceac1f 282
ff3263dd
BF
283 /*
284 * If we have a largish extent that happens to start before min_agbno,
285 * see if we can shift it into range...
286 */
287 if (bno < args->min_agbno && bno + len > args->min_agbno) {
288 diff = args->min_agbno - bno;
289 if (len > diff) {
290 bno += diff;
291 len -= diff;
292 }
293 }
294
a2ceac1f
DC
295 if (args->alignment > 1 && len >= args->minlen) {
296 xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
ff3263dd
BF
297
298 diff = aligned_bno - bno;
a2ceac1f
DC
299
300 *resbno = aligned_bno;
301 *reslen = diff >= len ? 0 : len - diff;
2bd0ea18 302 } else {
a2ceac1f
DC
303 *resbno = bno;
304 *reslen = len;
2bd0ea18 305 }
cd80de04
CH
306
307 return busy;
2bd0ea18
NS
308}
309
310/*
311 * Compute best start block and diff for "near" allocations.
312 * freelen >= wantlen already checked by caller.
313 */
314STATIC xfs_extlen_t /* difference value (absolute) */
315xfs_alloc_compute_diff(
316 xfs_agblock_t wantbno, /* target starting block */
317 xfs_extlen_t wantlen, /* target length */
318 xfs_extlen_t alignment, /* target alignment */
1fccd5c8 319 int datatype, /* are we allocating data? */
2bd0ea18
NS
320 xfs_agblock_t freebno, /* freespace's starting block */
321 xfs_extlen_t freelen, /* freespace's length */
322 xfs_agblock_t *newbnop) /* result: best start block from free */
323{
324 xfs_agblock_t freeend; /* end of freespace extent */
325 xfs_agblock_t newbno1; /* return block number */
326 xfs_agblock_t newbno2; /* other new block number */
0e266570
NS
327 xfs_extlen_t newlen1=0; /* length with newbno1 */
328 xfs_extlen_t newlen2=0; /* length with newbno2 */
2bd0ea18 329 xfs_agblock_t wantend; /* end of target extent */
a85522b6 330 bool userdata = datatype & XFS_ALLOC_USERDATA;
2bd0ea18
NS
331
332 ASSERT(freelen >= wantlen);
333 freeend = freebno + freelen;
334 wantend = wantbno + wantlen;
84a62eea
DC
335 /*
336 * We want to allocate from the start of a free extent if it is past
337 * the desired block or if we are allocating user data and the free
338 * extent is before desired block. The second case is there to allow
339 * for contiguous allocation from the remaining free space if the file
340 * grows in the short term.
341 */
342 if (freebno >= wantbno || (userdata && freeend < wantend)) {
2bd0ea18
NS
343 if ((newbno1 = roundup(freebno, alignment)) >= freeend)
344 newbno1 = NULLAGBLOCK;
345 } else if (freeend >= wantend && alignment > 1) {
346 newbno1 = roundup(wantbno, alignment);
347 newbno2 = newbno1 - alignment;
348 if (newbno1 >= freeend)
349 newbno1 = NULLAGBLOCK;
350 else
351 newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
352 if (newbno2 < freebno)
353 newbno2 = NULLAGBLOCK;
354 else
355 newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
356 if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
357 if (newlen1 < newlen2 ||
358 (newlen1 == newlen2 &&
359 XFS_ABSDIFF(newbno1, wantbno) >
360 XFS_ABSDIFF(newbno2, wantbno)))
361 newbno1 = newbno2;
362 } else if (newbno2 != NULLAGBLOCK)
363 newbno1 = newbno2;
364 } else if (freeend >= wantend) {
365 newbno1 = wantbno;
366 } else if (alignment > 1) {
367 newbno1 = roundup(freeend - wantlen, alignment);
368 if (newbno1 > freeend - wantlen &&
369 newbno1 - alignment >= freebno)
370 newbno1 -= alignment;
371 else if (newbno1 >= freeend)
372 newbno1 = NULLAGBLOCK;
373 } else
374 newbno1 = freeend - wantlen;
375 *newbnop = newbno1;
376 return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
377}
378
379/*
380 * Fix up the length, based on mod and prod.
381 * len should be k * prod + mod for some k.
382 * If len is too small it is returned unchanged.
383 * If len hits maxlen it is left alone.
384 */
385STATIC void
386xfs_alloc_fix_len(
dfc130f3 387 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18
NS
388{
389 xfs_extlen_t k;
390 xfs_extlen_t rlen;
391
392 ASSERT(args->mod < args->prod);
393 rlen = args->len;
394 ASSERT(rlen >= args->minlen);
395 ASSERT(rlen <= args->maxlen);
396 if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
397 (args->mod == 0 && rlen < args->prod))
398 return;
399 k = rlen % args->prod;
400 if (k == args->mod)
401 return;
ff105f75
DC
402 if (k > args->mod)
403 rlen = rlen - (k - args->mod);
404 else
405 rlen = rlen - args->prod + (args->mod - k);
19ebedcf 406 /* casts to (int) catch length underflows */
ff105f75
DC
407 if ((int)rlen < (int)args->minlen)
408 return;
409 ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
410 ASSERT(rlen % args->prod == args->mod);
2c003dc2
CH
411 ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >=
412 rlen + args->minleft);
2bd0ea18
NS
413 args->len = rlen;
414}
415
2bd0ea18
NS
416/*
417 * Update the two btrees, logically removing from freespace the extent
418 * starting at rbno, rlen blocks. The extent is contained within the
419 * actual (current) free extent fbno for flen blocks.
420 * Flags are passed in indicating whether the cursors are set to the
421 * relevant records.
422 */
423STATIC int /* error code */
424xfs_alloc_fixup_trees(
dfc130f3
RC
425 xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */
426 xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */
2bd0ea18
NS
427 xfs_agblock_t fbno, /* starting block of free extent */
428 xfs_extlen_t flen, /* length of free extent */
429 xfs_agblock_t rbno, /* starting block of returned extent */
430 xfs_extlen_t rlen, /* length of returned extent */
431 int flags) /* flags, XFSA_FIXUP_... */
432{
433 int error; /* error code */
434 int i; /* operation results */
435 xfs_agblock_t nfbno1; /* first new free startblock */
436 xfs_agblock_t nfbno2; /* second new free startblock */
0e266570
NS
437 xfs_extlen_t nflen1=0; /* first new free length */
438 xfs_extlen_t nflen2=0; /* second new free length */
19ebedcf
DC
439 struct xfs_mount *mp;
440
441 mp = cnt_cur->bc_mp;
2bd0ea18
NS
442
443 /*
444 * Look up the record in the by-size tree if necessary.
445 */
446 if (flags & XFSA_FIXUP_CNT_OK) {
447#ifdef DEBUG
0e266570 448 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
2bd0ea18 449 return error;
fbb4fa7f
DW
450 if (XFS_IS_CORRUPT(mp,
451 i != 1 ||
452 nfbno1 != fbno ||
453 nflen1 != flen))
454 return -EFSCORRUPTED;
2bd0ea18
NS
455#endif
456 } else {
0e266570 457 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
2bd0ea18 458 return error;
fbb4fa7f
DW
459 if (XFS_IS_CORRUPT(mp, i != 1))
460 return -EFSCORRUPTED;
2bd0ea18
NS
461 }
462 /*
463 * Look up the record in the by-block tree if necessary.
464 */
465 if (flags & XFSA_FIXUP_BNO_OK) {
466#ifdef DEBUG
0e266570 467 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
2bd0ea18 468 return error;
fbb4fa7f
DW
469 if (XFS_IS_CORRUPT(mp,
470 i != 1 ||
471 nfbno1 != fbno ||
472 nflen1 != flen))
473 return -EFSCORRUPTED;
2bd0ea18
NS
474#endif
475 } else {
0e266570 476 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
2bd0ea18 477 return error;
fbb4fa7f
DW
478 if (XFS_IS_CORRUPT(mp, i != 1))
479 return -EFSCORRUPTED;
2bd0ea18 480 }
b3563c19 481
2bd0ea18 482#ifdef DEBUG
b3563c19
BN
483 if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
484 struct xfs_btree_block *bnoblock;
485 struct xfs_btree_block *cntblock;
486
487 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
488 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
2bd0ea18 489
fbb4fa7f
DW
490 if (XFS_IS_CORRUPT(mp,
491 bnoblock->bb_numrecs !=
492 cntblock->bb_numrecs))
493 return -EFSCORRUPTED;
2bd0ea18
NS
494 }
495#endif
b3563c19 496
2bd0ea18
NS
497 /*
498 * Deal with all four cases: the allocated record is contained
499 * within the freespace record, so we can have new freespace
500 * at either (or both) end, or no freespace remaining.
501 */
502 if (rbno == fbno && rlen == flen)
503 nfbno1 = nfbno2 = NULLAGBLOCK;
504 else if (rbno == fbno) {
505 nfbno1 = rbno + rlen;
506 nflen1 = flen - rlen;
507 nfbno2 = NULLAGBLOCK;
508 } else if (rbno + rlen == fbno + flen) {
509 nfbno1 = fbno;
510 nflen1 = flen - rlen;
511 nfbno2 = NULLAGBLOCK;
512 } else {
513 nfbno1 = fbno;
514 nflen1 = rbno - fbno;
515 nfbno2 = rbno + rlen;
516 nflen2 = (fbno + flen) - nfbno2;
517 }
518 /*
519 * Delete the entry from the by-size btree.
520 */
b194c7d8 521 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18 522 return error;
fbb4fa7f
DW
523 if (XFS_IS_CORRUPT(mp, i != 1))
524 return -EFSCORRUPTED;
2bd0ea18
NS
525 /*
526 * Add new by-size btree entry(s).
527 */
528 if (nfbno1 != NULLAGBLOCK) {
0e266570 529 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
2bd0ea18 530 return error;
fbb4fa7f
DW
531 if (XFS_IS_CORRUPT(mp, i != 0))
532 return -EFSCORRUPTED;
b194c7d8 533 if ((error = xfs_btree_insert(cnt_cur, &i)))
2bd0ea18 534 return error;
fbb4fa7f
DW
535 if (XFS_IS_CORRUPT(mp, i != 1))
536 return -EFSCORRUPTED;
2bd0ea18
NS
537 }
538 if (nfbno2 != NULLAGBLOCK) {
0e266570 539 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
2bd0ea18 540 return error;
fbb4fa7f
DW
541 if (XFS_IS_CORRUPT(mp, i != 0))
542 return -EFSCORRUPTED;
b194c7d8 543 if ((error = xfs_btree_insert(cnt_cur, &i)))
2bd0ea18 544 return error;
fbb4fa7f
DW
545 if (XFS_IS_CORRUPT(mp, i != 1))
546 return -EFSCORRUPTED;
2bd0ea18
NS
547 }
548 /*
549 * Fix up the by-block btree entry(s).
550 */
551 if (nfbno1 == NULLAGBLOCK) {
552 /*
553 * No remaining freespace, just delete the by-block tree entry.
554 */
b194c7d8 555 if ((error = xfs_btree_delete(bno_cur, &i)))
2bd0ea18 556 return error;
fbb4fa7f
DW
557 if (XFS_IS_CORRUPT(mp, i != 1))
558 return -EFSCORRUPTED;
2bd0ea18
NS
559 } else {
560 /*
561 * Update the by-block entry to start later|be shorter.
562 */
0e266570 563 if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
2bd0ea18
NS
564 return error;
565 }
566 if (nfbno2 != NULLAGBLOCK) {
567 /*
568 * 2 resulting free entries, need to add one.
569 */
0e266570 570 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
2bd0ea18 571 return error;
fbb4fa7f
DW
572 if (XFS_IS_CORRUPT(mp, i != 0))
573 return -EFSCORRUPTED;
b194c7d8 574 if ((error = xfs_btree_insert(bno_cur, &i)))
2bd0ea18 575 return error;
fbb4fa7f
DW
576 if (XFS_IS_CORRUPT(mp, i != 1))
577 return -EFSCORRUPTED;
2bd0ea18
NS
578 }
579 return 0;
580}
581
bc01119d 582static xfs_failaddr_t
a2ceac1f
DC
583xfs_agfl_verify(
584 struct xfs_buf *bp)
585{
7861ef77 586 struct xfs_mount *mp = bp->b_mount;
a2ceac1f 587 struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
b134e771 588 __be32 *agfl_bno = xfs_buf_to_agfl_bno(bp);
a2ceac1f
DC
589 int i;
590
95d9582b
DW
591 /*
592 * There is no verification of non-crc AGFLs because mkfs does not
593 * initialise the AGFL to zero or NULL. Hence the only valid part of the
594 * AGFL is what the AGF says is active. We can't get to the AGF, so we
595 * can't verify just those entries are valid.
596 */
597 if (!xfs_sb_version_hascrc(&mp->m_sb))
598 return NULL;
599
68dbe77f 600 if (!xfs_verify_magic(bp, agfl->agfl_magicnum))
bc01119d 601 return __this_address;
68dbe77f 602 if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
bc01119d 603 return __this_address;
dd5b876e
DC
604 /*
605 * during growfs operations, the perag is not fully initialised,
606 * so we can't use it for any useful checking. growfs ensures we can't
607 * use it by using uncached buffers that don't have the perag attached
608 * so we can detect and avoid this problem.
609 */
610 if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
bc01119d 611 return __this_address;
dd5b876e 612
b8165508 613 for (i = 0; i < xfs_agfl_size(mp); i++) {
b134e771
CH
614 if (be32_to_cpu(agfl_bno[i]) != NULLAGBLOCK &&
615 be32_to_cpu(agfl_bno[i]) >= mp->m_sb.sb_agblocks)
bc01119d 616 return __this_address;
a2ceac1f 617 }
a65d8d29 618
bc01119d
DW
619 if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)))
620 return __this_address;
621 return NULL;
dd5b876e
DC
622}
623
624static void
625xfs_agfl_read_verify(
626 struct xfs_buf *bp)
627{
7861ef77 628 struct xfs_mount *mp = bp->b_mount;
1e697959 629 xfs_failaddr_t fa;
dd5b876e
DC
630
631 /*
632 * There is no verification of non-crc AGFLs because mkfs does not
633 * initialise the AGFL to zero or NULL. Hence the only valid part of the
634 * AGFL is what the AGF says is active. We can't get to the AGF, so we
635 * can't verify just those entries are valid.
636 */
637 if (!xfs_sb_version_hascrc(&mp->m_sb))
638 return;
639
45922933 640 if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
1e697959
DW
641 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
642 else {
643 fa = xfs_agfl_verify(bp);
644 if (fa)
645 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
646 }
a2ceac1f
DC
647}
648
649static void
650xfs_agfl_write_verify(
651 struct xfs_buf *bp)
652{
7861ef77 653 struct xfs_mount *mp = bp->b_mount;
37d086ca 654 struct xfs_buf_log_item *bip = bp->b_log_item;
1e697959 655 xfs_failaddr_t fa;
a2ceac1f 656
dd5b876e
DC
657 /* no verification of non-crc AGFLs */
658 if (!xfs_sb_version_hascrc(&mp->m_sb))
659 return;
660
1e697959
DW
661 fa = xfs_agfl_verify(bp);
662 if (fa) {
663 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
dd5b876e
DC
664 return;
665 }
666
667 if (bip)
668 XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
669
43b5aeed 670 xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
a2ceac1f
DC
671}
672
673const struct xfs_buf_ops xfs_agfl_buf_ops = {
a3fac935 674 .name = "xfs_agfl",
68dbe77f 675 .magic = { cpu_to_be32(XFS_AGFL_MAGIC), cpu_to_be32(XFS_AGFL_MAGIC) },
a2ceac1f
DC
676 .verify_read = xfs_agfl_read_verify,
677 .verify_write = xfs_agfl_write_verify,
95d9582b 678 .verify_struct = xfs_agfl_verify,
a2ceac1f
DC
679};
680
2bd0ea18
NS
681/*
682 * Read in the allocation group free block array.
683 */
50bb67d6 684int /* error */
2bd0ea18
NS
685xfs_alloc_read_agfl(
686 xfs_mount_t *mp, /* mount point structure */
687 xfs_trans_t *tp, /* transaction pointer */
688 xfs_agnumber_t agno, /* allocation group number */
689 xfs_buf_t **bpp) /* buffer for the ag free block array */
690{
691 xfs_buf_t *bp; /* return value */
2bd0ea18
NS
692 int error;
693
694 ASSERT(agno != NULLAGNUMBER);
9440d84d
NS
695 error = xfs_trans_read_buf(
696 mp, tp, mp->m_ddev_targp,
697 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
a2ceac1f 698 XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
9440d84d 699 if (error)
2bd0ea18 700 return error;
a2ceac1f 701 xfs_buf_set_ref(bp, XFS_AGFL_REF);
2bd0ea18
NS
702 *bpp = bp;
703 return 0;
704}
705
a2ceac1f
DC
706STATIC int
707xfs_alloc_update_counters(
708 struct xfs_trans *tp,
709 struct xfs_perag *pag,
710 struct xfs_buf *agbp,
711 long len)
712{
0bc284c2 713 struct xfs_agf *agf = agbp->b_addr;
a2ceac1f
DC
714
715 pag->pagf_freeblks += len;
716 be32_add_cpu(&agf->agf_freeblks, len);
717
718 xfs_trans_agblocks_delta(tp, len);
719 if (unlikely(be32_to_cpu(agf->agf_freeblks) >
a0264b73 720 be32_to_cpu(agf->agf_length))) {
1be76d11 721 xfs_buf_mark_corrupt(agbp);
12b53197 722 return -EFSCORRUPTED;
a0264b73 723 }
a2ceac1f
DC
724
725 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
726 return 0;
727}
728
2bd0ea18 729/*
2950989c 730 * Block allocation algorithm and data structures.
2bd0ea18 731 */
2950989c
BF
732struct xfs_alloc_cur {
733 struct xfs_btree_cur *cnt; /* btree cursors */
734 struct xfs_btree_cur *bnolt;
735 struct xfs_btree_cur *bnogt;
4d66edb1 736 xfs_extlen_t cur_len;/* current search length */
19fe42e3
BF
737 xfs_agblock_t rec_bno;/* extent startblock */
738 xfs_extlen_t rec_len;/* extent length */
739 xfs_agblock_t bno; /* alloc bno */
740 xfs_extlen_t len; /* alloc len */
741 xfs_extlen_t diff; /* diff from search bno */
e055d59e
BF
742 unsigned int busy_gen;/* busy state */
743 bool busy;
2950989c
BF
744};
745
746/*
747 * Set up cursors, etc. in the extent allocation cursor. This function can be
748 * called multiple times to reset an initialized structure without having to
749 * reallocate cursors.
750 */
751static int
752xfs_alloc_cur_setup(
753 struct xfs_alloc_arg *args,
754 struct xfs_alloc_cur *acur)
755{
756 int error;
757 int i;
758
759 ASSERT(args->alignment == 1 || args->type != XFS_ALLOCTYPE_THIS_BNO);
760
4d66edb1 761 acur->cur_len = args->maxlen;
19fe42e3
BF
762 acur->rec_bno = 0;
763 acur->rec_len = 0;
764 acur->bno = 0;
765 acur->len = 0;
04ea0ac1 766 acur->diff = -1;
e055d59e
BF
767 acur->busy = false;
768 acur->busy_gen = 0;
769
2950989c
BF
770 /*
771 * Perform an initial cntbt lookup to check for availability of maxlen
772 * extents. If this fails, we'll return -ENOSPC to signal the caller to
773 * attempt a small allocation.
774 */
775 if (!acur->cnt)
776 acur->cnt = xfs_allocbt_init_cursor(args->mp, args->tp,
777 args->agbp, args->agno, XFS_BTNUM_CNT);
778 error = xfs_alloc_lookup_ge(acur->cnt, 0, args->maxlen, &i);
779 if (error)
780 return error;
781
782 /*
783 * Allocate the bnobt left and right search cursors.
784 */
785 if (!acur->bnolt)
786 acur->bnolt = xfs_allocbt_init_cursor(args->mp, args->tp,
787 args->agbp, args->agno, XFS_BTNUM_BNO);
788 if (!acur->bnogt)
789 acur->bnogt = xfs_allocbt_init_cursor(args->mp, args->tp,
790 args->agbp, args->agno, XFS_BTNUM_BNO);
791 return i == 1 ? 0 : -ENOSPC;
792}
793
794static void
795xfs_alloc_cur_close(
796 struct xfs_alloc_cur *acur,
797 bool error)
798{
799 int cur_error = XFS_BTREE_NOERROR;
800
801 if (error)
802 cur_error = XFS_BTREE_ERROR;
803
804 if (acur->cnt)
805 xfs_btree_del_cursor(acur->cnt, cur_error);
806 if (acur->bnolt)
807 xfs_btree_del_cursor(acur->bnolt, cur_error);
808 if (acur->bnogt)
809 xfs_btree_del_cursor(acur->bnogt, cur_error);
810 acur->cnt = acur->bnolt = acur->bnogt = NULL;
811}
2bd0ea18 812
04ea0ac1
BF
813/*
814 * Check an extent for allocation and track the best available candidate in the
815 * allocation structure. The cursor is deactivated if it has entered an out of
816 * range state based on allocation arguments. Optionally return the extent
817 * extent geometry and allocation status if requested by the caller.
818 */
819static int
820xfs_alloc_cur_check(
821 struct xfs_alloc_arg *args,
822 struct xfs_alloc_cur *acur,
823 struct xfs_btree_cur *cur,
824 int *new)
825{
826 int error, i;
827 xfs_agblock_t bno, bnoa, bnew;
828 xfs_extlen_t len, lena, diff = -1;
829 bool busy;
830 unsigned busy_gen = 0;
831 bool deactivate = false;
3ca39168 832 bool isbnobt = cur->bc_btnum == XFS_BTNUM_BNO;
04ea0ac1
BF
833
834 *new = 0;
835
836 error = xfs_alloc_get_rec(cur, &bno, &len, &i);
837 if (error)
838 return error;
fbb4fa7f
DW
839 if (XFS_IS_CORRUPT(args->mp, i != 1))
840 return -EFSCORRUPTED;
04ea0ac1
BF
841
842 /*
843 * Check minlen and deactivate a cntbt cursor if out of acceptable size
844 * range (i.e., walking backwards looking for a minlen extent).
845 */
846 if (len < args->minlen) {
3ca39168 847 deactivate = !isbnobt;
04ea0ac1
BF
848 goto out;
849 }
850
851 busy = xfs_alloc_compute_aligned(args, bno, len, &bnoa, &lena,
852 &busy_gen);
853 acur->busy |= busy;
854 if (busy)
855 acur->busy_gen = busy_gen;
856 /* deactivate a bnobt cursor outside of locality range */
3ca39168
BF
857 if (bnoa < args->min_agbno || bnoa > args->max_agbno) {
858 deactivate = isbnobt;
04ea0ac1 859 goto out;
3ca39168 860 }
04ea0ac1
BF
861 if (lena < args->minlen)
862 goto out;
863
864 args->len = XFS_EXTLEN_MIN(lena, args->maxlen);
865 xfs_alloc_fix_len(args);
866 ASSERT(args->len >= args->minlen);
867 if (args->len < acur->len)
868 goto out;
869
870 /*
871 * We have an aligned record that satisfies minlen and beats or matches
872 * the candidate extent size. Compare locality for near allocation mode.
873 */
874 ASSERT(args->type == XFS_ALLOCTYPE_NEAR_BNO);
875 diff = xfs_alloc_compute_diff(args->agbno, args->len,
876 args->alignment, args->datatype,
877 bnoa, lena, &bnew);
878 if (bnew == NULLAGBLOCK)
879 goto out;
3ca39168
BF
880
881 /*
882 * Deactivate a bnobt cursor with worse locality than the current best.
883 */
884 if (diff > acur->diff) {
885 deactivate = isbnobt;
04ea0ac1 886 goto out;
3ca39168 887 }
04ea0ac1
BF
888
889 ASSERT(args->len > acur->len ||
890 (args->len == acur->len && diff <= acur->diff));
891 acur->rec_bno = bno;
892 acur->rec_len = len;
893 acur->bno = bnew;
894 acur->len = args->len;
895 acur->diff = diff;
896 *new = 1;
897
4f2eee5a
BF
898 /*
899 * We're done if we found a perfect allocation. This only deactivates
900 * the current cursor, but this is just an optimization to terminate a
901 * cntbt search that otherwise runs to the edge of the tree.
902 */
903 if (acur->diff == 0 && acur->len == args->maxlen)
904 deactivate = true;
04ea0ac1
BF
905out:
906 if (deactivate)
aa6fa37f 907 cur->bc_ag.priv.abt.active = false;
04ea0ac1
BF
908 trace_xfs_alloc_cur_check(args->mp, cur->bc_btnum, bno, len, diff,
909 *new);
910 return 0;
911}
912
dacde37d
BF
913/*
914 * Complete an allocation of a candidate extent. Remove the extent from both
915 * trees and update the args structure.
916 */
917STATIC int
918xfs_alloc_cur_finish(
919 struct xfs_alloc_arg *args,
920 struct xfs_alloc_cur *acur)
921{
0bc284c2 922 struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
dacde37d
BF
923 int error;
924
925 ASSERT(acur->cnt && acur->bnolt);
926 ASSERT(acur->bno >= acur->rec_bno);
927 ASSERT(acur->bno + acur->len <= acur->rec_bno + acur->rec_len);
0bc284c2 928 ASSERT(acur->rec_bno + acur->rec_len <= be32_to_cpu(agf->agf_length));
dacde37d
BF
929
930 error = xfs_alloc_fixup_trees(acur->cnt, acur->bnolt, acur->rec_bno,
931 acur->rec_len, acur->bno, acur->len, 0);
932 if (error)
933 return error;
934
935 args->agbno = acur->bno;
936 args->len = acur->len;
937 args->wasfromfl = 0;
938
939 trace_xfs_alloc_cur(args);
940 return 0;
941}
942
4d66edb1
BF
943/*
944 * Locality allocation lookup algorithm. This expects a cntbt cursor and uses
945 * bno optimized lookup to search for extents with ideal size and locality.
946 */
947STATIC int
948xfs_alloc_cntbt_iter(
949 struct xfs_alloc_arg *args,
950 struct xfs_alloc_cur *acur)
951{
952 struct xfs_btree_cur *cur = acur->cnt;
953 xfs_agblock_t bno;
954 xfs_extlen_t len, cur_len;
955 int error;
956 int i;
957
958 if (!xfs_alloc_cur_active(cur))
959 return 0;
960
961 /* locality optimized lookup */
962 cur_len = acur->cur_len;
963 error = xfs_alloc_lookup_ge(cur, args->agbno, cur_len, &i);
964 if (error)
965 return error;
966 if (i == 0)
967 return 0;
968 error = xfs_alloc_get_rec(cur, &bno, &len, &i);
969 if (error)
970 return error;
971
972 /* check the current record and update search length from it */
973 error = xfs_alloc_cur_check(args, acur, cur, &i);
974 if (error)
975 return error;
976 ASSERT(len >= acur->cur_len);
977 acur->cur_len = len;
978
979 /*
980 * We looked up the first record >= [agbno, len] above. The agbno is a
981 * secondary key and so the current record may lie just before or after
982 * agbno. If it is past agbno, check the previous record too so long as
983 * the length matches as it may be closer. Don't check a smaller record
984 * because that could deactivate our cursor.
985 */
986 if (bno > args->agbno) {
987 error = xfs_btree_decrement(cur, 0, &i);
988 if (!error && i) {
989 error = xfs_alloc_get_rec(cur, &bno, &len, &i);
990 if (!error && i && len == acur->cur_len)
991 error = xfs_alloc_cur_check(args, acur, cur,
992 &i);
993 }
994 if (error)
995 return error;
996 }
997
998 /*
999 * Increment the search key until we find at least one allocation
1000 * candidate or if the extent we found was larger. Otherwise, double the
1001 * search key to optimize the search. Efficiency is more important here
1002 * than absolute best locality.
1003 */
1004 cur_len <<= 1;
1005 if (!acur->len || acur->cur_len >= cur_len)
1006 acur->cur_len++;
1007 else
1008 acur->cur_len = cur_len;
1009
1010 return error;
1011}
1012
ba02381c
BF
1013/*
1014 * Deal with the case where only small freespaces remain. Either return the
1015 * contents of the last freespace record, or allocate space from the freelist if
1016 * there is nothing in the tree.
1017 */
1018STATIC int /* error */
1019xfs_alloc_ag_vextent_small(
1020 struct xfs_alloc_arg *args, /* allocation argument structure */
1021 struct xfs_btree_cur *ccur, /* optional by-size cursor */
1022 xfs_agblock_t *fbnop, /* result block number */
1023 xfs_extlen_t *flenp, /* result length */
1024 int *stat) /* status: 0-freelist, 1-normal/none */
1025{
0bc284c2 1026 struct xfs_agf *agf = args->agbp->b_addr;
ba02381c
BF
1027 int error = 0;
1028 xfs_agblock_t fbno = NULLAGBLOCK;
1029 xfs_extlen_t flen = 0;
9e1862f0 1030 int i = 0;
ba02381c 1031
9e1862f0
BF
1032 /*
1033 * If a cntbt cursor is provided, try to allocate the largest record in
1034 * the tree. Try the AGFL if the cntbt is empty, otherwise fail the
1035 * allocation. Make sure to respect minleft even when pulling from the
1036 * freelist.
1037 */
1038 if (ccur)
1039 error = xfs_btree_decrement(ccur, 0, &i);
ba02381c
BF
1040 if (error)
1041 goto error;
1042 if (i) {
1043 error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i);
1044 if (error)
1045 goto error;
fbb4fa7f
DW
1046 if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1047 error = -EFSCORRUPTED;
1048 goto error;
1049 }
ba02381c
BF
1050 goto out;
1051 }
1052
1053 if (args->minlen != 1 || args->alignment != 1 ||
1054 args->resv == XFS_AG_RESV_AGFL ||
0bc284c2 1055 be32_to_cpu(agf->agf_flcount) <= args->minleft)
ba02381c
BF
1056 goto out;
1057
1058 error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
1059 if (error)
1060 goto error;
1061 if (fbno == NULLAGBLOCK)
1062 goto out;
1063
1064 xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
a85522b6 1065 (args->datatype & XFS_ALLOC_NOBUSY));
ba02381c 1066
a85522b6 1067 if (args->datatype & XFS_ALLOC_USERDATA) {
ba02381c
BF
1068 struct xfs_buf *bp;
1069
78fcd346
DW
1070 error = xfs_trans_get_buf(args->tp, args->mp->m_ddev_targp,
1071 XFS_AGB_TO_DADDR(args->mp, args->agno, fbno),
1072 args->mp->m_bsize, 0, &bp);
1073 if (error)
ba02381c 1074 goto error;
ba02381c
BF
1075 xfs_trans_binval(args->tp, bp);
1076 }
2d7ea81f
BF
1077 *fbnop = args->agbno = fbno;
1078 *flenp = args->len = 1;
0bc284c2 1079 if (XFS_IS_CORRUPT(args->mp, fbno >= be32_to_cpu(agf->agf_length))) {
fbb4fa7f
DW
1080 error = -EFSCORRUPTED;
1081 goto error;
1082 }
ba02381c
BF
1083 args->wasfromfl = 1;
1084 trace_xfs_alloc_small_freelist(args);
1085
1086 /*
1087 * If we're feeding an AGFL block to something that doesn't live in the
1088 * free space, we need to clear out the OWN_AG rmap.
1089 */
1090 error = xfs_rmap_free(args->tp, args->agbp, args->agno, fbno, 1,
1091 &XFS_RMAP_OINFO_AG);
1092 if (error)
1093 goto error;
1094
1095 *stat = 0;
1096 return 0;
1097
1098out:
1099 /*
1100 * Can't do the allocation, give up.
1101 */
1102 if (flen < args->minlen) {
1103 args->agbno = NULLAGBLOCK;
1104 trace_xfs_alloc_small_notenough(args);
1105 flen = 0;
1106 }
1107 *fbnop = fbno;
1108 *flenp = flen;
1109 *stat = 1;
1110 trace_xfs_alloc_small_done(args);
1111 return 0;
1112
1113error:
1114 trace_xfs_alloc_small_error(args);
1115 return error;
1116}
1117
2bd0ea18
NS
1118/*
1119 * Allocate a variable extent in the allocation group agno.
1120 * Type and bno are used to determine where in the allocation group the
1121 * extent will start.
1122 * Extent's length (returned in *len) will be between minlen and maxlen,
1123 * and of the form k * prod + mod unless there's nothing that large.
1124 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1125 */
1126STATIC int /* error */
1127xfs_alloc_ag_vextent(
dfc130f3 1128 xfs_alloc_arg_t *args) /* argument structure for allocation */
2bd0ea18 1129{
0e266570 1130 int error=0;
2bd0ea18
NS
1131
1132 ASSERT(args->minlen > 0);
1133 ASSERT(args->maxlen > 0);
1134 ASSERT(args->minlen <= args->maxlen);
1135 ASSERT(args->mod < args->prod);
1136 ASSERT(args->alignment > 0);
cf8ce220 1137
2bd0ea18
NS
1138 /*
1139 * Branch to correct routine based on the type.
1140 */
1141 args->wasfromfl = 0;
1142 switch (args->type) {
1143 case XFS_ALLOCTYPE_THIS_AG:
1144 error = xfs_alloc_ag_vextent_size(args);
1145 break;
1146 case XFS_ALLOCTYPE_NEAR_BNO:
1147 error = xfs_alloc_ag_vextent_near(args);
1148 break;
1149 case XFS_ALLOCTYPE_THIS_BNO:
1150 error = xfs_alloc_ag_vextent_exact(args);
1151 break;
1152 default:
1153 ASSERT(0);
1154 /* NOTREACHED */
1155 }
a2ceac1f
DC
1156
1157 if (error || args->agbno == NULLAGBLOCK)
2bd0ea18 1158 return error;
2bd0ea18 1159
a2ceac1f
DC
1160 ASSERT(args->len >= args->minlen);
1161 ASSERT(args->len <= args->maxlen);
9760cac2 1162 ASSERT(!args->wasfromfl || args->resv != XFS_AG_RESV_AGFL);
a2ceac1f
DC
1163 ASSERT(args->agbno % args->alignment == 0);
1164
631ac87a 1165 /* if not file data, insert new block into the reverse map btree */
3ee858aa 1166 if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
631ac87a
DW
1167 error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
1168 args->agbno, args->len, &args->oinfo);
1169 if (error)
1170 return error;
1171 }
1172
a2ceac1f
DC
1173 if (!args->wasfromfl) {
1174 error = xfs_alloc_update_counters(args->tp, args->pag,
1175 args->agbp,
1176 -((long)(args->len)));
1177 if (error)
1178 return error;
1179
1180 ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
1181 args->agbno, args->len));
2bd0ea18 1182 }
a2ceac1f 1183
cf8ce220 1184 xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
a2ceac1f 1185
79896434
BD
1186 XFS_STATS_INC(args->mp, xs_allocx);
1187 XFS_STATS_ADD(args->mp, xs_allocb, args->len);
a2ceac1f 1188 return error;
2bd0ea18
NS
1189}
1190
1191/*
1192 * Allocate a variable extent at exactly agno/bno.
1193 * Extent's length (returned in *len) will be between minlen and maxlen,
1194 * and of the form k * prod + mod unless there's nothing that large.
1195 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
1196 */
1197STATIC int /* error */
1198xfs_alloc_ag_vextent_exact(
dfc130f3 1199 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18 1200{
0bc284c2 1201 struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
dfc130f3
RC
1202 xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
1203 xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
2bd0ea18
NS
1204 int error;
1205 xfs_agblock_t fbno; /* start block of found extent */
2bd0ea18 1206 xfs_extlen_t flen; /* length of found extent */
cd80de04
CH
1207 xfs_agblock_t tbno; /* start block of busy extent */
1208 xfs_extlen_t tlen; /* length of busy extent */
1209 xfs_agblock_t tend; /* end block of busy extent */
2bd0ea18 1210 int i; /* success/failure of operation */
cd80de04 1211 unsigned busy_gen;
2bd0ea18
NS
1212
1213 ASSERT(args->alignment == 1);
a2ceac1f 1214
2bd0ea18
NS
1215 /*
1216 * Allocate/initialize a cursor for the by-number freespace btree.
1217 */
b194c7d8 1218 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
56b2de80
DC
1219 args->agno, XFS_BTNUM_BNO);
1220
2bd0ea18
NS
1221 /*
1222 * Lookup bno and minlen in the btree (minlen is irrelevant, really).
1223 * Look for the closest free block <= bno, it must contain bno
1224 * if any free block does.
1225 */
56b2de80
DC
1226 error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
1227 if (error)
2bd0ea18 1228 goto error0;
56b2de80
DC
1229 if (!i)
1230 goto not_found;
1231
2bd0ea18
NS
1232 /*
1233 * Grab the freespace record.
1234 */
56b2de80
DC
1235 error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
1236 if (error)
2bd0ea18 1237 goto error0;
fbb4fa7f
DW
1238 if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1239 error = -EFSCORRUPTED;
1240 goto error0;
1241 }
2bd0ea18 1242 ASSERT(fbno <= args->agbno);
56b2de80 1243
5000d01d 1244 /*
a2ceac1f
DC
1245 * Check for overlapping busy extents.
1246 */
cd80de04
CH
1247 tbno = fbno;
1248 tlen = flen;
1249 xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen);
a2ceac1f
DC
1250
1251 /*
1252 * Give up if the start of the extent is busy, or the freespace isn't
1253 * long enough for the minimum request.
2bd0ea18 1254 */
a2ceac1f
DC
1255 if (tbno > args->agbno)
1256 goto not_found;
1257 if (tlen < args->minlen)
1258 goto not_found;
1259 tend = tbno + tlen;
1260 if (tend < args->agbno + args->minlen)
56b2de80
DC
1261 goto not_found;
1262
2bd0ea18
NS
1263 /*
1264 * End of extent will be smaller of the freespace end and the
1265 * maximal requested end.
56b2de80 1266 *
2bd0ea18
NS
1267 * Fix the length according to mod and prod if given.
1268 */
a2ceac1f
DC
1269 args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
1270 - args->agbno;
2bd0ea18 1271 xfs_alloc_fix_len(args);
a2ceac1f 1272 ASSERT(args->agbno + args->len <= tend);
56b2de80 1273
2bd0ea18 1274 /*
a2ceac1f 1275 * We are allocating agbno for args->len
2bd0ea18
NS
1276 * Allocate/initialize a cursor for the by-size btree.
1277 */
b194c7d8
BN
1278 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1279 args->agno, XFS_BTNUM_CNT);
0bc284c2 1280 ASSERT(args->agbno + args->len <= be32_to_cpu(agf->agf_length));
56b2de80
DC
1281 error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
1282 args->len, XFSA_FIXUP_BNO_OK);
1283 if (error) {
2bd0ea18
NS
1284 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1285 goto error0;
1286 }
a2ceac1f 1287
2bd0ea18
NS
1288 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1289 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
a2ceac1f 1290
2bd0ea18 1291 args->wasfromfl = 0;
56b2de80
DC
1292 trace_xfs_alloc_exact_done(args);
1293 return 0;
1294
1295not_found:
1296 /* Didn't find it, return null. */
1297 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1298 args->agbno = NULLAGBLOCK;
1299 trace_xfs_alloc_exact_notfound(args);
2bd0ea18
NS
1300 return 0;
1301
1302error0:
1303 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
56b2de80
DC
1304 trace_xfs_alloc_exact_error(args);
1305 return error;
1306}
1307
1308/*
4f2eee5a
BF
1309 * Search a given number of btree records in a given direction. Check each
1310 * record against the good extent we've already found.
56b2de80
DC
1311 */
1312STATIC int
4f2eee5a 1313xfs_alloc_walk_iter(
3ca39168
BF
1314 struct xfs_alloc_arg *args,
1315 struct xfs_alloc_cur *acur,
1316 struct xfs_btree_cur *cur,
4f2eee5a
BF
1317 bool increment,
1318 bool find_one, /* quit on first candidate */
1319 int count, /* rec count (-1 for infinite) */
1320 int *stat)
56b2de80 1321{
56b2de80
DC
1322 int error;
1323 int i;
56b2de80 1324
4f2eee5a
BF
1325 *stat = 0;
1326
56b2de80 1327 /*
3ca39168
BF
1328 * Search so long as the cursor is active or we find a better extent.
1329 * The cursor is deactivated if it extends beyond the range of the
1330 * current allocation candidate.
56b2de80 1331 */
4f2eee5a 1332 while (xfs_alloc_cur_active(cur) && count) {
3ca39168 1333 error = xfs_alloc_cur_check(args, acur, cur, &i);
56b2de80 1334 if (error)
3ca39168 1335 return error;
4f2eee5a
BF
1336 if (i == 1) {
1337 *stat = 1;
1338 if (find_one)
1339 break;
1340 }
3ca39168
BF
1341 if (!xfs_alloc_cur_active(cur))
1342 break;
56b2de80 1343
3ca39168
BF
1344 if (increment)
1345 error = xfs_btree_increment(cur, 0, &i);
56b2de80 1346 else
3ca39168 1347 error = xfs_btree_decrement(cur, 0, &i);
56b2de80 1348 if (error)
3ca39168
BF
1349 return error;
1350 if (i == 0)
aa6fa37f 1351 cur->bc_ag.priv.abt.active = false;
4f2eee5a
BF
1352
1353 if (count > 0)
1354 count--;
3ca39168 1355 }
56b2de80 1356
56b2de80 1357 return 0;
2bd0ea18
NS
1358}
1359
d0ebd9ee 1360/*
4d66edb1
BF
1361 * Search the by-bno and by-size btrees in parallel in search of an extent with
1362 * ideal locality based on the NEAR mode ->agbno locality hint.
d0ebd9ee
BF
1363 */
1364STATIC int
4d66edb1 1365xfs_alloc_ag_vextent_locality(
d0ebd9ee
BF
1366 struct xfs_alloc_arg *args,
1367 struct xfs_alloc_cur *acur,
1368 int *stat)
1369{
1370 struct xfs_btree_cur *fbcur = NULL;
1371 int error;
1372 int i;
1373 bool fbinc;
1374
1375 ASSERT(acur->len == 0);
1376 ASSERT(args->type == XFS_ALLOCTYPE_NEAR_BNO);
1377
1378 *stat = 0;
1379
4d66edb1
BF
1380 error = xfs_alloc_lookup_ge(acur->cnt, args->agbno, acur->cur_len, &i);
1381 if (error)
1382 return error;
d0ebd9ee
BF
1383 error = xfs_alloc_lookup_le(acur->bnolt, args->agbno, 0, &i);
1384 if (error)
1385 return error;
1386 error = xfs_alloc_lookup_ge(acur->bnogt, args->agbno, 0, &i);
1387 if (error)
1388 return error;
1389
1390 /*
4d66edb1
BF
1391 * Search the bnobt and cntbt in parallel. Search the bnobt left and
1392 * right and lookup the closest extent to the locality hint for each
1393 * extent size key in the cntbt. The entire search terminates
1394 * immediately on a bnobt hit because that means we've found best case
1395 * locality. Otherwise the search continues until the cntbt cursor runs
1396 * off the end of the tree. If no allocation candidate is found at this
1397 * point, give up on locality, walk backwards from the end of the cntbt
1398 * and take the first available extent.
1399 *
1400 * The parallel tree searches balance each other out to provide fairly
1401 * consistent performance for various situations. The bnobt search can
1402 * have pathological behavior in the worst case scenario of larger
1403 * allocation requests and fragmented free space. On the other hand, the
1404 * bnobt is able to satisfy most smaller allocation requests much more
1405 * quickly than the cntbt. The cntbt search can sift through fragmented
1406 * free space and sets of free extents for larger allocation requests
1407 * more quickly than the bnobt. Since the locality hint is just a hint
1408 * and we don't want to scan the entire bnobt for perfect locality, the
1409 * cntbt search essentially bounds the bnobt search such that we can
1410 * find good enough locality at reasonable performance in most cases.
d0ebd9ee
BF
1411 */
1412 while (xfs_alloc_cur_active(acur->bnolt) ||
4d66edb1
BF
1413 xfs_alloc_cur_active(acur->bnogt) ||
1414 xfs_alloc_cur_active(acur->cnt)) {
1415
1416 trace_xfs_alloc_cur_lookup(args);
1417
1418 /*
1419 * Search the bnobt left and right. In the case of a hit, finish
1420 * the search in the opposite direction and we're done.
1421 */
d0ebd9ee
BF
1422 error = xfs_alloc_walk_iter(args, acur, acur->bnolt, false,
1423 true, 1, &i);
1424 if (error)
1425 return error;
1426 if (i == 1) {
1427 trace_xfs_alloc_cur_left(args);
1428 fbcur = acur->bnogt;
1429 fbinc = true;
1430 break;
1431 }
d0ebd9ee
BF
1432 error = xfs_alloc_walk_iter(args, acur, acur->bnogt, true, true,
1433 1, &i);
1434 if (error)
1435 return error;
1436 if (i == 1) {
1437 trace_xfs_alloc_cur_right(args);
1438 fbcur = acur->bnolt;
1439 fbinc = false;
1440 break;
1441 }
4d66edb1
BF
1442
1443 /*
1444 * Check the extent with best locality based on the current
1445 * extent size search key and keep track of the best candidate.
1446 */
1447 error = xfs_alloc_cntbt_iter(args, acur);
1448 if (error)
1449 return error;
1450 if (!xfs_alloc_cur_active(acur->cnt)) {
1451 trace_xfs_alloc_cur_lookup_done(args);
1452 break;
1453 }
1454 }
1455
1456 /*
1457 * If we failed to find anything due to busy extents, return empty
1458 * handed so the caller can flush and retry. If no busy extents were
1459 * found, walk backwards from the end of the cntbt as a last resort.
1460 */
1461 if (!xfs_alloc_cur_active(acur->cnt) && !acur->len && !acur->busy) {
1462 error = xfs_btree_decrement(acur->cnt, 0, &i);
1463 if (error)
1464 return error;
1465 if (i) {
aa6fa37f 1466 acur->cnt->bc_ag.priv.abt.active = true;
4d66edb1
BF
1467 fbcur = acur->cnt;
1468 fbinc = false;
1469 }
d0ebd9ee
BF
1470 }
1471
4d66edb1
BF
1472 /*
1473 * Search in the opposite direction for a better entry in the case of
1474 * a bnobt hit or walk backwards from the end of the cntbt.
1475 */
d0ebd9ee
BF
1476 if (fbcur) {
1477 error = xfs_alloc_walk_iter(args, acur, fbcur, fbinc, true, -1,
1478 &i);
1479 if (error)
1480 return error;
1481 }
1482
1483 if (acur->len)
1484 *stat = 1;
1485
1486 return 0;
1487}
1488
0bb673ce
DW
1489/* Check the last block of the cnt btree for allocations. */
1490static int
1491xfs_alloc_ag_vextent_lastblock(
1492 struct xfs_alloc_arg *args,
1493 struct xfs_alloc_cur *acur,
1494 xfs_agblock_t *bno,
1495 xfs_extlen_t *len,
1496 bool *allocated)
1497{
1498 int error;
1499 int i;
1500
1501#ifdef DEBUG
1502 /* Randomly don't execute the first algorithm. */
1503 if (prandom_u32() & 1)
1504 return 0;
1505#endif
1506
1507 /*
1508 * Start from the entry that lookup found, sequence through all larger
1509 * free blocks. If we're actually pointing at a record smaller than
1510 * maxlen, go to the start of this block, and skip all those smaller
1511 * than minlen.
1512 */
1513 if (len || args->alignment > 1) {
1514 acur->cnt->bc_ptrs[0] = 1;
1515 do {
1516 error = xfs_alloc_get_rec(acur->cnt, bno, len, &i);
1517 if (error)
1518 return error;
fbb4fa7f
DW
1519 if (XFS_IS_CORRUPT(args->mp, i != 1))
1520 return -EFSCORRUPTED;
0bb673ce
DW
1521 if (*len >= args->minlen)
1522 break;
1523 error = xfs_btree_increment(acur->cnt, 0, &i);
1524 if (error)
1525 return error;
1526 } while (i);
1527 ASSERT(*len >= args->minlen);
1528 if (!i)
1529 return 0;
1530 }
1531
1532 error = xfs_alloc_walk_iter(args, acur, acur->cnt, true, false, -1, &i);
1533 if (error)
1534 return error;
1535
1536 /*
1537 * It didn't work. We COULD be in a case where there's a good record
1538 * somewhere, so try again.
1539 */
1540 if (acur->len == 0)
1541 return 0;
1542
1543 trace_xfs_alloc_near_first(args);
1544 *allocated = true;
1545 return 0;
1546}
1547
2bd0ea18
NS
1548/*
1549 * Allocate a variable extent near bno in the allocation group agno.
1550 * Extent's length (returned in len) will be between minlen and maxlen,
1551 * and of the form k * prod + mod unless there's nothing that large.
1552 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1553 */
2950989c 1554STATIC int
2bd0ea18 1555xfs_alloc_ag_vextent_near(
2950989c 1556 struct xfs_alloc_arg *args)
2bd0ea18 1557{
2950989c 1558 struct xfs_alloc_cur acur = {};
3ca39168
BF
1559 int error; /* error code */
1560 int i; /* result code, temporary */
3ca39168
BF
1561 xfs_agblock_t bno;
1562 xfs_extlen_t len;
a2ceac1f 1563
9bef1574 1564 /* handle uninitialized agbno range so caller doesn't have to */
ff3263dd
BF
1565 if (!args->min_agbno && !args->max_agbno)
1566 args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
1567 ASSERT(args->min_agbno <= args->max_agbno);
1568
1569 /* clamp agbno to the range if it's outside */
1570 if (args->agbno < args->min_agbno)
1571 args->agbno = args->min_agbno;
1572 if (args->agbno > args->max_agbno)
1573 args->agbno = args->max_agbno;
1574
a2ceac1f 1575restart:
3ca39168 1576 len = 0;
a2ceac1f 1577
2bd0ea18 1578 /*
2950989c
BF
1579 * Set up cursors and see if there are any free extents as big as
1580 * maxlen. If not, pick the last entry in the tree unless the tree is
1581 * empty.
5000d01d 1582 */
2950989c
BF
1583 error = xfs_alloc_cur_setup(args, &acur);
1584 if (error == -ENOSPC) {
3ca39168
BF
1585 error = xfs_alloc_ag_vextent_small(args, acur.cnt, &bno,
1586 &len, &i);
2950989c
BF
1587 if (error)
1588 goto out;
3ca39168 1589 if (i == 0 || len == 0) {
a2ceac1f 1590 trace_xfs_alloc_near_noentry(args);
2950989c 1591 goto out;
2bd0ea18
NS
1592 }
1593 ASSERT(i == 1);
2950989c
BF
1594 } else if (error) {
1595 goto out;
2bd0ea18 1596 }
a2ceac1f 1597
5000d01d 1598 /*
2bd0ea18
NS
1599 * First algorithm.
1600 * If the requested extent is large wrt the freespaces available
1601 * in this a.g., then the cursor will be pointing to a btree entry
1602 * near the right edge of the tree. If it's in the last btree leaf
1603 * block, then we just examine all the entries in that block
1604 * that are big enough, and pick the best one.
2bd0ea18 1605 */
0bb673ce
DW
1606 if (xfs_btree_islastblock(acur.cnt, 0)) {
1607 bool allocated = false;
4f2eee5a 1608
0bb673ce
DW
1609 error = xfs_alloc_ag_vextent_lastblock(args, &acur, &bno, &len,
1610 &allocated);
4f2eee5a
BF
1611 if (error)
1612 goto out;
0bb673ce
DW
1613 if (allocated)
1614 goto alloc_finish;
2bd0ea18 1615 }
2950989c 1616
2bd0ea18 1617 /*
4d66edb1
BF
1618 * Second algorithm. Combined cntbt and bnobt search to find ideal
1619 * locality.
2bd0ea18 1620 */
4d66edb1 1621 error = xfs_alloc_ag_vextent_locality(args, &acur, &i);
2950989c
BF
1622 if (error)
1623 goto out;
1624
2bd0ea18
NS
1625 /*
1626 * If we couldn't get anything, give up.
1627 */
3ca39168 1628 if (!acur.len) {
e055d59e 1629 if (acur.busy) {
a2ceac1f 1630 trace_xfs_alloc_near_busy(args);
e055d59e
BF
1631 xfs_extent_busy_flush(args->mp, args->pag,
1632 acur.busy_gen);
a2ceac1f
DC
1633 goto restart;
1634 }
56b2de80 1635 trace_xfs_alloc_size_neither(args);
2bd0ea18 1636 args->agbno = NULLAGBLOCK;
2950989c 1637 goto out;
2bd0ea18 1638 }
56b2de80 1639
0bb673ce 1640alloc_finish:
dacde37d
BF
1641 /* fix up btrees on a successful allocation */
1642 error = xfs_alloc_cur_finish(args, &acur);
56b2de80 1643
2950989c
BF
1644out:
1645 xfs_alloc_cur_close(&acur, error);
2bd0ea18
NS
1646 return error;
1647}
1648
1649/*
1650 * Allocate a variable extent anywhere in the allocation group agno.
1651 * Extent's length (returned in len) will be between minlen and maxlen,
1652 * and of the form k * prod + mod unless there's nothing that large.
1653 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1654 */
1655STATIC int /* error */
1656xfs_alloc_ag_vextent_size(
dfc130f3 1657 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18 1658{
0bc284c2 1659 struct xfs_agf *agf = args->agbp->b_addr;
dfc130f3
RC
1660 xfs_btree_cur_t *bno_cur; /* cursor for bno btree */
1661 xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */
2bd0ea18
NS
1662 int error; /* error result */
1663 xfs_agblock_t fbno; /* start of found freespace */
1664 xfs_extlen_t flen; /* length of found freespace */
2bd0ea18
NS
1665 int i; /* temp status variable */
1666 xfs_agblock_t rbno; /* returned block number */
1667 xfs_extlen_t rlen; /* length of returned extent */
cd80de04
CH
1668 bool busy;
1669 unsigned busy_gen;
2bd0ea18 1670
a2ceac1f 1671restart:
2bd0ea18
NS
1672 /*
1673 * Allocate and initialize a cursor for the by-size btree.
1674 */
b194c7d8
BN
1675 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1676 args->agno, XFS_BTNUM_CNT);
2bd0ea18 1677 bno_cur = NULL;
cd80de04 1678 busy = false;
a2ceac1f 1679
2bd0ea18
NS
1680 /*
1681 * Look for an entry >= maxlen+alignment-1 blocks.
1682 */
0e266570
NS
1683 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1684 args->maxlen + args->alignment - 1, &i)))
2bd0ea18 1685 goto error0;
a2ceac1f 1686
2bd0ea18 1687 /*
cd80de04
CH
1688 * If none then we have to settle for a smaller extent. In the case that
1689 * there are no large extents, this will return the last entry in the
1690 * tree unless the tree is empty. In the case that there are only busy
1691 * large extents, this will return the largest small extent unless there
a2ceac1f 1692 * are no smaller extents available.
5000d01d 1693 */
cd80de04 1694 if (!i) {
a2ceac1f
DC
1695 error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1696 &fbno, &flen, &i);
1697 if (error)
2bd0ea18
NS
1698 goto error0;
1699 if (i == 0 || flen == 0) {
1700 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
56b2de80 1701 trace_xfs_alloc_size_noentry(args);
2bd0ea18
NS
1702 return 0;
1703 }
1704 ASSERT(i == 1);
cd80de04
CH
1705 busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno,
1706 &rlen, &busy_gen);
a2ceac1f
DC
1707 } else {
1708 /*
1709 * Search for a non-busy extent that is large enough.
a2ceac1f
DC
1710 */
1711 for (;;) {
1712 error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1713 if (error)
1714 goto error0;
fbb4fa7f
DW
1715 if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1716 error = -EFSCORRUPTED;
1717 goto error0;
1718 }
a2ceac1f 1719
cd80de04
CH
1720 busy = xfs_alloc_compute_aligned(args, fbno, flen,
1721 &rbno, &rlen, &busy_gen);
a2ceac1f
DC
1722
1723 if (rlen >= args->maxlen)
1724 break;
1725
1726 error = xfs_btree_increment(cnt_cur, 0, &i);
1727 if (error)
1728 goto error0;
1729 if (i == 0) {
1730 /*
1731 * Our only valid extents must have been busy.
1732 * Make it unbusy by forcing the log out and
cd80de04 1733 * retrying.
a2ceac1f
DC
1734 */
1735 xfs_btree_del_cursor(cnt_cur,
1736 XFS_BTREE_NOERROR);
1737 trace_xfs_alloc_size_busy(args);
cd80de04
CH
1738 xfs_extent_busy_flush(args->mp,
1739 args->pag, busy_gen);
a2ceac1f
DC
1740 goto restart;
1741 }
1742 }
2bd0ea18 1743 }
a2ceac1f 1744
2bd0ea18
NS
1745 /*
1746 * In the first case above, we got the last entry in the
1747 * by-size btree. Now we check to see if the space hits maxlen
1748 * once aligned; if not, we search left for something better.
1749 * This can't happen in the second case above.
1750 */
2bd0ea18 1751 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
fbb4fa7f
DW
1752 if (XFS_IS_CORRUPT(args->mp,
1753 rlen != 0 &&
1754 (rlen > flen ||
1755 rbno + rlen > fbno + flen))) {
1756 error = -EFSCORRUPTED;
1757 goto error0;
1758 }
2bd0ea18
NS
1759 if (rlen < args->maxlen) {
1760 xfs_agblock_t bestfbno;
1761 xfs_extlen_t bestflen;
1762 xfs_agblock_t bestrbno;
1763 xfs_extlen_t bestrlen;
1764
1765 bestrlen = rlen;
1766 bestrbno = rbno;
1767 bestflen = flen;
1768 bestfbno = fbno;
1769 for (;;) {
b194c7d8 1770 if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
2bd0ea18
NS
1771 goto error0;
1772 if (i == 0)
1773 break;
0e266570
NS
1774 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1775 &i)))
2bd0ea18 1776 goto error0;
fbb4fa7f
DW
1777 if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1778 error = -EFSCORRUPTED;
1779 goto error0;
1780 }
2bd0ea18
NS
1781 if (flen < bestrlen)
1782 break;
cd80de04
CH
1783 busy = xfs_alloc_compute_aligned(args, fbno, flen,
1784 &rbno, &rlen, &busy_gen);
2bd0ea18 1785 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
fbb4fa7f
DW
1786 if (XFS_IS_CORRUPT(args->mp,
1787 rlen != 0 &&
1788 (rlen > flen ||
1789 rbno + rlen > fbno + flen))) {
1790 error = -EFSCORRUPTED;
1791 goto error0;
1792 }
2bd0ea18
NS
1793 if (rlen > bestrlen) {
1794 bestrlen = rlen;
1795 bestrbno = rbno;
1796 bestflen = flen;
1797 bestfbno = fbno;
1798 if (rlen == args->maxlen)
1799 break;
1800 }
5000d01d 1801 }
0e266570
NS
1802 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1803 &i)))
2bd0ea18 1804 goto error0;
fbb4fa7f
DW
1805 if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1806 error = -EFSCORRUPTED;
1807 goto error0;
1808 }
2bd0ea18
NS
1809 rlen = bestrlen;
1810 rbno = bestrbno;
1811 flen = bestflen;
1812 fbno = bestfbno;
1813 }
1814 args->wasfromfl = 0;
1815 /*
1816 * Fix up the length.
1817 */
1818 args->len = rlen;
a2ceac1f 1819 if (rlen < args->minlen) {
cd80de04 1820 if (busy) {
a2ceac1f
DC
1821 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1822 trace_xfs_alloc_size_busy(args);
cd80de04 1823 xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
a2ceac1f
DC
1824 goto restart;
1825 }
1826 goto out_nominleft;
2bd0ea18 1827 }
a2ceac1f
DC
1828 xfs_alloc_fix_len(args);
1829
2bd0ea18 1830 rlen = args->len;
fbb4fa7f
DW
1831 if (XFS_IS_CORRUPT(args->mp, rlen > flen)) {
1832 error = -EFSCORRUPTED;
1833 goto error0;
1834 }
2bd0ea18
NS
1835 /*
1836 * Allocate and initialize a cursor for the by-block tree.
1837 */
b194c7d8
BN
1838 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1839 args->agno, XFS_BTNUM_BNO);
0e266570
NS
1840 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1841 rbno, rlen, XFSA_FIXUP_CNT_OK)))
2bd0ea18
NS
1842 goto error0;
1843 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1844 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1845 cnt_cur = bno_cur = NULL;
1846 args->len = rlen;
1847 args->agbno = rbno;
fbb4fa7f
DW
1848 if (XFS_IS_CORRUPT(args->mp,
1849 args->agbno + args->len >
0bc284c2 1850 be32_to_cpu(agf->agf_length))) {
fbb4fa7f
DW
1851 error = -EFSCORRUPTED;
1852 goto error0;
1853 }
56b2de80 1854 trace_xfs_alloc_size_done(args);
2bd0ea18
NS
1855 return 0;
1856
1857error0:
56b2de80 1858 trace_xfs_alloc_size_error(args);
2bd0ea18
NS
1859 if (cnt_cur)
1860 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1861 if (bno_cur)
1862 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1863 return error;
a2ceac1f
DC
1864
1865out_nominleft:
1866 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1867 trace_xfs_alloc_size_nominleft(args);
1868 args->agbno = NULLAGBLOCK;
1869 return 0;
2bd0ea18
NS
1870}
1871
2bd0ea18
NS
1872/*
1873 * Free the extent starting at agno/bno for length.
1874 */
85aec44f 1875STATIC int
2bd0ea18 1876xfs_free_ag_extent(
5837e73b
DW
1877 struct xfs_trans *tp,
1878 struct xfs_buf *agbp,
1879 xfs_agnumber_t agno,
1880 xfs_agblock_t bno,
1881 xfs_extlen_t len,
1882 const struct xfs_owner_info *oinfo,
1883 enum xfs_ag_resv_type type)
2bd0ea18 1884{
5837e73b
DW
1885 struct xfs_mount *mp;
1886 struct xfs_perag *pag;
1887 struct xfs_btree_cur *bno_cur;
1888 struct xfs_btree_cur *cnt_cur;
1889 xfs_agblock_t gtbno; /* start of right neighbor */
1890 xfs_extlen_t gtlen; /* length of right neighbor */
1891 xfs_agblock_t ltbno; /* start of left neighbor */
1892 xfs_extlen_t ltlen; /* length of left neighbor */
1893 xfs_agblock_t nbno; /* new starting block of freesp */
1894 xfs_extlen_t nlen; /* new length of freespace */
1895 int haveleft; /* have a left neighbor */
1896 int haveright; /* have a right neighbor */
1897 int i;
1898 int error;
2bd0ea18 1899
631ac87a 1900 bno_cur = cnt_cur = NULL;
2bd0ea18 1901 mp = tp->t_mountp;
631ac87a 1902
3ee858aa 1903 if (!xfs_rmap_should_skip_owner_update(oinfo)) {
631ac87a
DW
1904 error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo);
1905 if (error)
1906 goto error0;
1907 }
1908
5000d01d 1909 /*
2bd0ea18
NS
1910 * Allocate and initialize a cursor for the by-block btree.
1911 */
b194c7d8 1912 bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
5000d01d 1913 /*
2bd0ea18
NS
1914 * Look for a neighboring block on the left (lower block numbers)
1915 * that is contiguous with this space.
1916 */
0e266570 1917 if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
2bd0ea18
NS
1918 goto error0;
1919 if (haveleft) {
1920 /*
1921 * There is a block to our left.
1922 */
0e266570 1923 if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
2bd0ea18 1924 goto error0;
fbb4fa7f
DW
1925 if (XFS_IS_CORRUPT(mp, i != 1)) {
1926 error = -EFSCORRUPTED;
1927 goto error0;
1928 }
2bd0ea18
NS
1929 /*
1930 * It's not contiguous, though.
1931 */
1932 if (ltbno + ltlen < bno)
1933 haveleft = 0;
1934 else {
1935 /*
1936 * If this failure happens the request to free this
1937 * space was invalid, it's (partly) already free.
1938 * Very bad.
1939 */
fbb4fa7f
DW
1940 if (XFS_IS_CORRUPT(mp, ltbno + ltlen > bno)) {
1941 error = -EFSCORRUPTED;
1942 goto error0;
1943 }
2bd0ea18
NS
1944 }
1945 }
5000d01d 1946 /*
2bd0ea18
NS
1947 * Look for a neighboring block on the right (higher block numbers)
1948 * that is contiguous with this space.
1949 */
b194c7d8 1950 if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
2bd0ea18
NS
1951 goto error0;
1952 if (haveright) {
1953 /*
1954 * There is a block to our right.
1955 */
0e266570 1956 if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
2bd0ea18 1957 goto error0;
fbb4fa7f
DW
1958 if (XFS_IS_CORRUPT(mp, i != 1)) {
1959 error = -EFSCORRUPTED;
1960 goto error0;
1961 }
2bd0ea18
NS
1962 /*
1963 * It's not contiguous, though.
1964 */
1965 if (bno + len < gtbno)
1966 haveright = 0;
1967 else {
1968 /*
1969 * If this failure happens the request to free this
1970 * space was invalid, it's (partly) already free.
1971 * Very bad.
1972 */
fbb4fa7f
DW
1973 if (XFS_IS_CORRUPT(mp, bno + len > gtbno)) {
1974 error = -EFSCORRUPTED;
1975 goto error0;
1976 }
2bd0ea18
NS
1977 }
1978 }
1979 /*
1980 * Now allocate and initialize a cursor for the by-size tree.
1981 */
b194c7d8 1982 cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
2bd0ea18
NS
1983 /*
1984 * Have both left and right contiguous neighbors.
1985 * Merge all three into a single free block.
1986 */
1987 if (haveleft && haveright) {
1988 /*
1989 * Delete the old by-size entry on the left.
1990 */
0e266570 1991 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
2bd0ea18 1992 goto error0;
fbb4fa7f
DW
1993 if (XFS_IS_CORRUPT(mp, i != 1)) {
1994 error = -EFSCORRUPTED;
1995 goto error0;
1996 }
b194c7d8 1997 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18 1998 goto error0;
fbb4fa7f
DW
1999 if (XFS_IS_CORRUPT(mp, i != 1)) {
2000 error = -EFSCORRUPTED;
2001 goto error0;
2002 }
2bd0ea18
NS
2003 /*
2004 * Delete the old by-size entry on the right.
2005 */
0e266570 2006 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2bd0ea18 2007 goto error0;
fbb4fa7f
DW
2008 if (XFS_IS_CORRUPT(mp, i != 1)) {
2009 error = -EFSCORRUPTED;
2010 goto error0;
2011 }
b194c7d8 2012 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18 2013 goto error0;
fbb4fa7f
DW
2014 if (XFS_IS_CORRUPT(mp, i != 1)) {
2015 error = -EFSCORRUPTED;
2016 goto error0;
2017 }
2bd0ea18
NS
2018 /*
2019 * Delete the old by-block entry for the right block.
2020 */
b194c7d8 2021 if ((error = xfs_btree_delete(bno_cur, &i)))
2bd0ea18 2022 goto error0;
fbb4fa7f
DW
2023 if (XFS_IS_CORRUPT(mp, i != 1)) {
2024 error = -EFSCORRUPTED;
2025 goto error0;
2026 }
2bd0ea18
NS
2027 /*
2028 * Move the by-block cursor back to the left neighbor.
2029 */
b194c7d8 2030 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
2bd0ea18 2031 goto error0;
fbb4fa7f
DW
2032 if (XFS_IS_CORRUPT(mp, i != 1)) {
2033 error = -EFSCORRUPTED;
2034 goto error0;
2035 }
2bd0ea18
NS
2036#ifdef DEBUG
2037 /*
2038 * Check that this is the right record: delete didn't
2039 * mangle the cursor.
2040 */
2041 {
2042 xfs_agblock_t xxbno;
2043 xfs_extlen_t xxlen;
2044
0e266570
NS
2045 if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
2046 &i)))
2bd0ea18 2047 goto error0;
fbb4fa7f
DW
2048 if (XFS_IS_CORRUPT(mp,
2049 i != 1 ||
2050 xxbno != ltbno ||
2051 xxlen != ltlen)) {
2052 error = -EFSCORRUPTED;
2053 goto error0;
2054 }
2bd0ea18
NS
2055 }
2056#endif
2057 /*
2058 * Update remaining by-block entry to the new, joined block.
2059 */
2060 nbno = ltbno;
2061 nlen = len + ltlen + gtlen;
0e266570 2062 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2bd0ea18
NS
2063 goto error0;
2064 }
2065 /*
2066 * Have only a left contiguous neighbor.
2067 * Merge it together with the new freespace.
2068 */
2069 else if (haveleft) {
2070 /*
2071 * Delete the old by-size entry on the left.
2072 */
0e266570 2073 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
2bd0ea18 2074 goto error0;
fbb4fa7f
DW
2075 if (XFS_IS_CORRUPT(mp, i != 1)) {
2076 error = -EFSCORRUPTED;
2077 goto error0;
2078 }
b194c7d8 2079 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18 2080 goto error0;
fbb4fa7f
DW
2081 if (XFS_IS_CORRUPT(mp, i != 1)) {
2082 error = -EFSCORRUPTED;
2083 goto error0;
2084 }
2bd0ea18
NS
2085 /*
2086 * Back up the by-block cursor to the left neighbor, and
2087 * update its length.
2088 */
b194c7d8 2089 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
2bd0ea18 2090 goto error0;
fbb4fa7f
DW
2091 if (XFS_IS_CORRUPT(mp, i != 1)) {
2092 error = -EFSCORRUPTED;
2093 goto error0;
2094 }
2bd0ea18
NS
2095 nbno = ltbno;
2096 nlen = len + ltlen;
0e266570 2097 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2bd0ea18
NS
2098 goto error0;
2099 }
2100 /*
2101 * Have only a right contiguous neighbor.
2102 * Merge it together with the new freespace.
2103 */
2104 else if (haveright) {
2105 /*
2106 * Delete the old by-size entry on the right.
2107 */
0e266570 2108 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2bd0ea18 2109 goto error0;
fbb4fa7f
DW
2110 if (XFS_IS_CORRUPT(mp, i != 1)) {
2111 error = -EFSCORRUPTED;
2112 goto error0;
2113 }
b194c7d8 2114 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18 2115 goto error0;
fbb4fa7f
DW
2116 if (XFS_IS_CORRUPT(mp, i != 1)) {
2117 error = -EFSCORRUPTED;
2118 goto error0;
2119 }
2bd0ea18 2120 /*
5000d01d 2121 * Update the starting block and length of the right
2bd0ea18
NS
2122 * neighbor in the by-block tree.
2123 */
2124 nbno = bno;
2125 nlen = len + gtlen;
0e266570 2126 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2bd0ea18
NS
2127 goto error0;
2128 }
2129 /*
2130 * No contiguous neighbors.
2131 * Insert the new freespace into the by-block tree.
2132 */
2133 else {
2134 nbno = bno;
2135 nlen = len;
b194c7d8 2136 if ((error = xfs_btree_insert(bno_cur, &i)))
2bd0ea18 2137 goto error0;
fbb4fa7f
DW
2138 if (XFS_IS_CORRUPT(mp, i != 1)) {
2139 error = -EFSCORRUPTED;
2140 goto error0;
2141 }
2bd0ea18
NS
2142 }
2143 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
2144 bno_cur = NULL;
2145 /*
2146 * In all cases we need to insert the new freespace in the by-size tree.
2147 */
0e266570 2148 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
2bd0ea18 2149 goto error0;
fbb4fa7f
DW
2150 if (XFS_IS_CORRUPT(mp, i != 0)) {
2151 error = -EFSCORRUPTED;
2152 goto error0;
2153 }
b194c7d8 2154 if ((error = xfs_btree_insert(cnt_cur, &i)))
2bd0ea18 2155 goto error0;
fbb4fa7f
DW
2156 if (XFS_IS_CORRUPT(mp, i != 1)) {
2157 error = -EFSCORRUPTED;
2158 goto error0;
2159 }
2bd0ea18
NS
2160 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
2161 cnt_cur = NULL;
a2ceac1f 2162
2bd0ea18
NS
2163 /*
2164 * Update the freespace totals in the ag and superblock.
2165 */
a2ceac1f
DC
2166 pag = xfs_perag_get(mp, agno);
2167 error = xfs_alloc_update_counters(tp, pag, agbp, len);
cf8ce220 2168 xfs_ag_resv_free_extent(pag, type, tp, len);
a2ceac1f
DC
2169 xfs_perag_put(pag);
2170 if (error)
2171 goto error0;
2172
79896434
BD
2173 XFS_STATS_INC(mp, xs_freex);
2174 XFS_STATS_ADD(mp, xs_freeb, len);
56b2de80 2175
65a15e06 2176 trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright);
3e535bba 2177
2bd0ea18
NS
2178 return 0;
2179
2180 error0:
65a15e06 2181 trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1);
2bd0ea18
NS
2182 if (bno_cur)
2183 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
2184 if (cnt_cur)
2185 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
2186 return error;
2187}
2188
5000d01d 2189/*
2bd0ea18
NS
2190 * Visible (exported) allocation/free functions.
2191 * Some of these are used just by xfs_alloc_btree.c and this file.
2192 */
2193
2194/*
2195 * Compute and fill in value of m_ag_maxlevels.
2196 */
2197void
2198xfs_alloc_compute_maxlevels(
2199 xfs_mount_t *mp) /* file system mount structure */
2200{
1421de38 2201 mp->m_ag_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr,
730e2a19 2202 (mp->m_sb.sb_agblocks + 1) / 2);
2bd0ea18
NS
2203}
2204
56b2de80 2205/*
cf8ce220
DW
2206 * Find the length of the longest extent in an AG. The 'need' parameter
2207 * specifies how much space we're going to need for the AGFL and the
2208 * 'reserved' parameter tells us how many blocks in this AG are reserved for
2209 * other callers.
56b2de80
DC
2210 */
2211xfs_extlen_t
2212xfs_alloc_longest_free_extent(
72bda06d 2213 struct xfs_perag *pag,
cf8ce220
DW
2214 xfs_extlen_t need,
2215 xfs_extlen_t reserved)
56b2de80 2216{
72bda06d 2217 xfs_extlen_t delta = 0;
56b2de80 2218
cf8ce220
DW
2219 /*
2220 * If the AGFL needs a recharge, we'll have to subtract that from the
2221 * longest extent.
2222 */
56b2de80
DC
2223 if (need > pag->pagf_flcount)
2224 delta = need - pag->pagf_flcount;
2225
cf8ce220
DW
2226 /*
2227 * If we cannot maintain others' reservations with space from the
2228 * not-longest freesp extents, we'll have to subtract /that/ from
2229 * the longest extent too.
2230 */
2231 if (pag->pagf_freeblks - pag->pagf_longest < reserved)
2232 delta += reserved - (pag->pagf_freeblks - pag->pagf_longest);
2233
2234 /*
2235 * If the longest extent is long enough to satisfy all the
2236 * reservations and AGFL rules in place, we can return this extent.
2237 */
56b2de80 2238 if (pag->pagf_longest > delta)
14058d94
DC
2239 return min_t(xfs_extlen_t, pag->pag_mount->m_ag_max_usable,
2240 pag->pagf_longest - delta);
cf8ce220
DW
2241
2242 /* Otherwise, let the caller try for 1 block if there's space. */
56b2de80
DC
2243 return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
2244}
2245
b75bb1bd
DW
2246/*
2247 * Compute the minimum length of the AGFL in the given AG. If @pag is NULL,
2248 * return the largest possible minimum length.
2249 */
de046644
DC
2250unsigned int
2251xfs_alloc_min_freelist(
2252 struct xfs_mount *mp,
2253 struct xfs_perag *pag)
2254{
b75bb1bd
DW
2255 /* AG btrees have at least 1 level. */
2256 static const uint8_t fake_levels[XFS_BTNUM_AGF] = {1, 1, 1};
2257 const uint8_t *levels = pag ? pag->pagf_levels : fake_levels;
de046644
DC
2258 unsigned int min_free;
2259
b75bb1bd
DW
2260 ASSERT(mp->m_ag_maxlevels > 0);
2261
de046644 2262 /* space needed by-bno freespace btree */
b75bb1bd 2263 min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
de046644
DC
2264 mp->m_ag_maxlevels);
2265 /* space needed by-size freespace btree */
b75bb1bd 2266 min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
de046644 2267 mp->m_ag_maxlevels);
b8a8d6e5
DW
2268 /* space needed reverse mapping used space btree */
2269 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
b75bb1bd
DW
2270 min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
2271 mp->m_rmap_maxlevels);
de046644
DC
2272
2273 return min_free;
2274}
2275
5515b7c1
DC
2276/*
2277 * Check if the operation we are fixing up the freelist for should go ahead or
2278 * not. If we are freeing blocks, we always allow it, otherwise the allocation
2279 * is dependent on whether the size and shape of free space available will
2280 * permit the requested allocation to take place.
2281 */
2282static bool
2283xfs_alloc_space_available(
2284 struct xfs_alloc_arg *args,
2285 xfs_extlen_t min_free,
2286 int flags)
2287{
2288 struct xfs_perag *pag = args->pag;
3fe4a6dd 2289 xfs_extlen_t alloc_len, longest;
cf8ce220 2290 xfs_extlen_t reservation; /* blocks that are still reserved */
5515b7c1 2291 int available;
580a4380 2292 xfs_extlen_t agflcount;
5515b7c1
DC
2293
2294 if (flags & XFS_ALLOC_FLAG_FREEING)
2295 return true;
2296
cf8ce220
DW
2297 reservation = xfs_ag_resv_needed(pag, args->resv);
2298
5515b7c1 2299 /* do we have enough contiguous free space for the allocation? */
3fe4a6dd 2300 alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
1421de38 2301 longest = xfs_alloc_longest_free_extent(pag, min_free, reservation);
3fe4a6dd 2302 if (longest < alloc_len)
5515b7c1
DC
2303 return false;
2304
580a4380
BF
2305 /*
2306 * Do we have enough free space remaining for the allocation? Don't
2307 * account extra agfl blocks because we are about to defer free them,
2308 * making them unavailable until the current transaction commits.
2309 */
2310 agflcount = min_t(xfs_extlen_t, pag->pagf_flcount, min_free);
2311 available = (int)(pag->pagf_freeblks + agflcount -
2c003dc2 2312 reservation - min_free - args->minleft);
3fe4a6dd 2313 if (available < (int)max(args->total, alloc_len))
5515b7c1
DC
2314 return false;
2315
2c003dc2
CH
2316 /*
2317 * Clamp maxlen to the amount of free space available for the actual
2318 * extent allocation.
2319 */
2320 if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) {
2321 args->maxlen = available;
2322 ASSERT(args->maxlen > 0);
2323 ASSERT(args->maxlen >= args->minlen);
2324 }
2325
5515b7c1
DC
2326 return true;
2327}
2328
30c8be8a
BF
2329int
2330xfs_free_agfl_block(
2331 struct xfs_trans *tp,
2332 xfs_agnumber_t agno,
2333 xfs_agblock_t agbno,
2334 struct xfs_buf *agbp,
2335 struct xfs_owner_info *oinfo)
2336{
2337 int error;
2338 struct xfs_buf *bp;
2339
2340 error = xfs_free_ag_extent(tp, agbp, agno, agbno, 1, oinfo,
2341 XFS_AG_RESV_AGFL);
2342 if (error)
2343 return error;
2344
78fcd346
DW
2345 error = xfs_trans_get_buf(tp, tp->t_mountp->m_ddev_targp,
2346 XFS_AGB_TO_DADDR(tp->t_mountp, agno, agbno),
2347 tp->t_mountp->m_bsize, 0, &bp);
2348 if (error)
2349 return error;
30c8be8a
BF
2350 xfs_trans_binval(tp, bp);
2351
2352 return 0;
2353}
2354
8dbee8f5
BF
2355/*
2356 * Check the agfl fields of the agf for inconsistency or corruption. The purpose
2357 * is to detect an agfl header padding mismatch between current and early v5
2358 * kernels. This problem manifests as a 1-slot size difference between the
2359 * on-disk flcount and the active [first, last] range of a wrapped agfl. This
2360 * may also catch variants of agfl count corruption unrelated to padding. Either
2361 * way, we'll reset the agfl and warn the user.
2362 *
2363 * Return true if a reset is required before the agfl can be used, false
2364 * otherwise.
2365 */
2366static bool
2367xfs_agfl_needs_reset(
2368 struct xfs_mount *mp,
2369 struct xfs_agf *agf)
2370{
2371 uint32_t f = be32_to_cpu(agf->agf_flfirst);
2372 uint32_t l = be32_to_cpu(agf->agf_fllast);
2373 uint32_t c = be32_to_cpu(agf->agf_flcount);
2374 int agfl_size = xfs_agfl_size(mp);
2375 int active;
2376
2377 /* no agfl header on v4 supers */
2378 if (!xfs_sb_version_hascrc(&mp->m_sb))
2379 return false;
2380
2381 /*
2382 * The agf read verifier catches severe corruption of these fields.
2383 * Repeat some sanity checks to cover a packed -> unpacked mismatch if
2384 * the verifier allows it.
2385 */
2386 if (f >= agfl_size || l >= agfl_size)
2387 return true;
2388 if (c > agfl_size)
2389 return true;
2390
2391 /*
2392 * Check consistency between the on-disk count and the active range. An
2393 * agfl padding mismatch manifests as an inconsistent flcount.
2394 */
2395 if (c && l >= f)
2396 active = l - f + 1;
2397 else if (c)
2398 active = agfl_size - f + l + 1;
2399 else
2400 active = 0;
2401
2402 return active != c;
2403}
2404
2405/*
2406 * Reset the agfl to an empty state. Ignore/drop any existing blocks since the
2407 * agfl content cannot be trusted. Warn the user that a repair is required to
2408 * recover leaked blocks.
2409 *
2410 * The purpose of this mechanism is to handle filesystems affected by the agfl
2411 * header padding mismatch problem. A reset keeps the filesystem online with a
2412 * relatively minor free space accounting inconsistency rather than suffer the
2413 * inevitable crash from use of an invalid agfl block.
2414 */
2415static void
2416xfs_agfl_reset(
2417 struct xfs_trans *tp,
2418 struct xfs_buf *agbp,
2419 struct xfs_perag *pag)
2420{
2421 struct xfs_mount *mp = tp->t_mountp;
0bc284c2 2422 struct xfs_agf *agf = agbp->b_addr;
8dbee8f5
BF
2423
2424 ASSERT(pag->pagf_agflreset);
2425 trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_);
2426
2427 xfs_warn(mp,
2428 "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. "
2429 "Please unmount and run xfs_repair.",
2430 pag->pag_agno, pag->pagf_flcount);
2431
2432 agf->agf_flfirst = 0;
2433 agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1);
2434 agf->agf_flcount = 0;
2435 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST |
2436 XFS_AGF_FLCOUNT);
2437
2438 pag->pagf_flcount = 0;
2439 pag->pagf_agflreset = false;
2440}
2441
d5c1b462
BF
2442/*
2443 * Defer an AGFL block free. This is effectively equivalent to
2444 * xfs_bmap_add_free() with some special handling particular to AGFL blocks.
2445 *
2446 * Deferring AGFL frees helps prevent log reservation overruns due to too many
2447 * allocation operations in a transaction. AGFL frees are prone to this problem
2448 * because for one they are always freed one at a time. Further, an immediate
2449 * AGFL block free can cause a btree join and require another block free before
2450 * the real allocation can proceed. Deferring the free disconnects freeing up
2451 * the AGFL slot from freeing the block.
2452 */
2453STATIC void
2454xfs_defer_agfl_block(
21375e5d 2455 struct xfs_trans *tp,
d5c1b462
BF
2456 xfs_agnumber_t agno,
2457 xfs_fsblock_t agbno,
2458 struct xfs_owner_info *oinfo)
2459{
21375e5d 2460 struct xfs_mount *mp = tp->t_mountp;
d5c1b462
BF
2461 struct xfs_extent_free_item *new; /* new element */
2462
2463 ASSERT(xfs_bmap_free_item_zone != NULL);
2464 ASSERT(oinfo != NULL);
2465
6cd1e6db 2466 new = kmem_zone_alloc(xfs_bmap_free_item_zone, 0);
d5c1b462
BF
2467 new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno);
2468 new->xefi_blockcount = 1;
2469 new->xefi_oinfo = *oinfo;
2470
2471 trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
2472
21375e5d 2473 xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &new->xefi_list);
d5c1b462
BF
2474}
2475
2bd0ea18
NS
2476/*
2477 * Decide whether to use this allocation group for this allocation.
2478 * If so, fix up the btree freelist's size.
2bd0ea18 2479 */
ff105f75 2480int /* error */
2bd0ea18 2481xfs_alloc_fix_freelist(
c98e644e
DC
2482 struct xfs_alloc_arg *args, /* allocation argument structure */
2483 int flags) /* XFS_ALLOC_FLAG_... */
2bd0ea18 2484{
c98e644e
DC
2485 struct xfs_mount *mp = args->mp;
2486 struct xfs_perag *pag = args->pag;
2487 struct xfs_trans *tp = args->tp;
2488 struct xfs_buf *agbp = NULL;
2489 struct xfs_buf *agflbp = NULL;
2490 struct xfs_alloc_arg targs; /* local allocation arguments */
2491 xfs_agblock_t bno; /* freelist block */
2492 xfs_extlen_t need; /* total blocks needed in freelist */
fcdd428c 2493 int error = 0;
c98e644e 2494
d85d3424
BF
2495 /* deferred ops (AGFL block frees) require permanent transactions */
2496 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
2497
2bd0ea18 2498 if (!pag->pagf_init) {
c98e644e 2499 error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
475f184c
DW
2500 if (error) {
2501 /* Couldn't lock the AGF so skip this AG. */
2502 if (error == -EAGAIN)
2503 error = 0;
c98e644e 2504 goto out_no_agbp;
2bd0ea18 2505 }
c98e644e 2506 }
34317449 2507
5e656dbb 2508 /*
c98e644e
DC
2509 * If this is a metadata preferred pag and we are user data then try
2510 * somewhere else if we are not being asked to try harder at this
2511 * point
34317449 2512 */
a85522b6 2513 if (pag->pagf_metadata && (args->datatype & XFS_ALLOC_USERDATA) &&
5e656dbb
BN
2514 (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
2515 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
c98e644e 2516 goto out_agbp_relse;
34317449
NS
2517 }
2518
de046644 2519 need = xfs_alloc_min_freelist(mp, pag);
2c003dc2
CH
2520 if (!xfs_alloc_space_available(args, need, flags |
2521 XFS_ALLOC_FLAG_CHECK))
c98e644e 2522 goto out_agbp_relse;
5e656dbb 2523
2bd0ea18
NS
2524 /*
2525 * Get the a.g. freespace buffer.
2526 * Can fail if we're not blocking on locks, and it's held.
2527 */
c98e644e
DC
2528 if (!agbp) {
2529 error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
475f184c
DW
2530 if (error) {
2531 /* Couldn't lock the AGF so skip this AG. */
2532 if (error == -EAGAIN)
2533 error = 0;
c98e644e 2534 goto out_no_agbp;
2bd0ea18
NS
2535 }
2536 }
72bda06d 2537
8dbee8f5
BF
2538 /* reset a padding mismatched agfl before final free space check */
2539 if (pag->pagf_agflreset)
2540 xfs_agfl_reset(tp, agbp, pag);
2541
72bda06d 2542 /* If there isn't enough total space or single-extent, reject it. */
de046644 2543 need = xfs_alloc_min_freelist(mp, pag);
c98e644e
DC
2544 if (!xfs_alloc_space_available(args, need, flags))
2545 goto out_agbp_relse;
5515b7c1 2546
2bd0ea18
NS
2547 /*
2548 * Make the freelist shorter if it's too long.
72bda06d 2549 *
c98e644e
DC
2550 * Note that from this point onwards, we will always release the agf and
2551 * agfl buffers on error. This handles the case where we error out and
2552 * the buffers are clean or may not have been joined to the transaction
2553 * and hence need to be released manually. If they have been joined to
2554 * the transaction, then xfs_trans_brelse() will handle them
2555 * appropriately based on the recursion count and dirty state of the
2556 * buffer.
2557 *
72bda06d
DC
2558 * XXX (dgc): When we have lots of free space, does this buy us
2559 * anything other than extra overhead when we need to put more blocks
2560 * back on the free list? Maybe we should only do this when space is
2561 * getting low or the AGFL is more than half full?
e365af6f
DW
2562 *
2563 * The NOSHRINK flag prevents the AGFL from being shrunk if it's too
2564 * big; the NORMAP flag prevents AGFL expand/shrink operations from
2565 * updating the rmapbt. Both flags are used in xfs_repair while we're
2566 * rebuilding the rmapbt, and neither are used by the kernel. They're
2567 * both required to ensure that rmaps are correctly recorded for the
2568 * regenerated AGFL, bnobt, and cntbt. See repair/phase5.c and
2569 * repair/rmap.c in xfsprogs for details.
2bd0ea18 2570 */
e365af6f 2571 memset(&targs, 0, sizeof(targs));
007347e3 2572 /* struct copy below */
e365af6f 2573 if (flags & XFS_ALLOC_FLAG_NORMAP)
007347e3 2574 targs.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
e365af6f 2575 else
007347e3 2576 targs.oinfo = XFS_RMAP_OINFO_AG;
e365af6f 2577 while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) {
5e656dbb
BN
2578 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
2579 if (error)
c98e644e 2580 goto out_agbp_relse;
30c8be8a 2581
6dc24128
BF
2582 /* defer agfl frees */
2583 xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
2bd0ea18 2584 }
72bda06d 2585
2bd0ea18
NS
2586 targs.tp = tp;
2587 targs.mp = mp;
2588 targs.agbp = agbp;
2589 targs.agno = args->agno;
cf8ce220 2590 targs.alignment = targs.minlen = targs.prod = 1;
2bd0ea18
NS
2591 targs.type = XFS_ALLOCTYPE_THIS_AG;
2592 targs.pag = pag;
72bda06d
DC
2593 error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp);
2594 if (error)
c98e644e 2595 goto out_agbp_relse;
72bda06d
DC
2596
2597 /* Make the freelist longer if it's too short. */
2598 while (pag->pagf_flcount < need) {
2bd0ea18 2599 targs.agbno = 0;
72bda06d 2600 targs.maxlen = need - pag->pagf_flcount;
9760cac2 2601 targs.resv = XFS_AG_RESV_AGFL;
72bda06d
DC
2602
2603 /* Allocate as many blocks as possible at once. */
2604 error = xfs_alloc_ag_vextent(&targs);
c98e644e
DC
2605 if (error)
2606 goto out_agflbp_relse;
2607
2bd0ea18 2608 /*
dfc130f3
RC
2609 * Stop if we run out. Won't happen if callers are obeying
2610 * the restrictions correctly. Can happen for free calls
2bd0ea18
NS
2611 * on a completely full ag.
2612 */
5e656dbb
BN
2613 if (targs.agbno == NULLAGBLOCK) {
2614 if (flags & XFS_ALLOC_FLAG_FREEING)
2615 break;
c98e644e 2616 goto out_agflbp_relse;
5e656dbb 2617 }
2bd0ea18
NS
2618 /*
2619 * Put each allocated block on the list.
2620 */
2621 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
5e656dbb
BN
2622 error = xfs_alloc_put_freelist(tp, agbp,
2623 agflbp, bno, 0);
2624 if (error)
c98e644e 2625 goto out_agflbp_relse;
2bd0ea18
NS
2626 }
2627 }
cb4deb22 2628 xfs_trans_brelse(tp, agflbp);
2bd0ea18
NS
2629 args->agbp = agbp;
2630 return 0;
c98e644e
DC
2631
2632out_agflbp_relse:
2633 xfs_trans_brelse(tp, agflbp);
2634out_agbp_relse:
2635 if (agbp)
2636 xfs_trans_brelse(tp, agbp);
2637out_no_agbp:
2638 args->agbp = NULL;
2639 return error;
2bd0ea18
NS
2640}
2641
2642/*
2643 * Get a block from the freelist.
2644 * Returns with the buffer for the block gotten.
2645 */
2646int /* error */
2647xfs_alloc_get_freelist(
2648 xfs_trans_t *tp, /* transaction pointer */
2649 xfs_buf_t *agbp, /* buffer containing the agf structure */
cdded3d8
DC
2650 xfs_agblock_t *bnop, /* block address retrieved from freelist */
2651 int btreeblk) /* destination is a AGF btree */
2bd0ea18 2652{
0bc284c2 2653 struct xfs_agf *agf = agbp->b_addr;
2bd0ea18
NS
2654 xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */
2655 xfs_agblock_t bno; /* block number returned */
dd5b876e 2656 __be32 *agfl_bno;
2bd0ea18 2657 int error;
cdded3d8 2658 int logflags;
dd5b876e 2659 xfs_mount_t *mp = tp->t_mountp;
2bd0ea18
NS
2660 xfs_perag_t *pag; /* per allocation group data */
2661
2bd0ea18
NS
2662 /*
2663 * Freelist is empty, give up.
2664 */
46eca962 2665 if (!agf->agf_flcount) {
2bd0ea18
NS
2666 *bnop = NULLAGBLOCK;
2667 return 0;
2668 }
2669 /*
2670 * Read the array of free blocks.
2671 */
dd5b876e
DC
2672 error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
2673 &agflbp);
2674 if (error)
2bd0ea18 2675 return error;
dd5b876e
DC
2676
2677
2bd0ea18
NS
2678 /*
2679 * Get the block number and update the data structures.
2680 */
b134e771 2681 agfl_bno = xfs_buf_to_agfl_bno(agflbp);
dd5b876e 2682 bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
5e656dbb 2683 be32_add_cpu(&agf->agf_flfirst, 1);
2bd0ea18 2684 xfs_trans_brelse(tp, agflbp);
b8165508 2685 if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
46eca962 2686 agf->agf_flfirst = 0;
56b2de80
DC
2687
2688 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
8dbee8f5 2689 ASSERT(!pag->pagf_agflreset);
5e656dbb 2690 be32_add_cpu(&agf->agf_flcount, -1);
2bd0ea18
NS
2691 xfs_trans_agflist_delta(tp, -1);
2692 pag->pagf_flcount--;
cdded3d8
DC
2693
2694 logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2695 if (btreeblk) {
5e656dbb 2696 be32_add_cpu(&agf->agf_btreeblks, 1);
cdded3d8
DC
2697 pag->pagf_btreeblks++;
2698 logflags |= XFS_AGF_BTREEBLKS;
2699 }
8648192f 2700 xfs_perag_put(pag);
cdded3d8 2701
cdded3d8 2702 xfs_alloc_log_agf(tp, agbp, logflags);
2bd0ea18 2703 *bnop = bno;
3e535bba 2704
2bd0ea18
NS
2705 return 0;
2706}
2707
2708/*
2709 * Log the given fields from the agf structure.
2710 */
2711void
2712xfs_alloc_log_agf(
2713 xfs_trans_t *tp, /* transaction pointer */
2714 xfs_buf_t *bp, /* buffer for a.g. freelist header */
dfc130f3 2715 int fields) /* mask of fields to be logged (XFS_AGF_...) */
2bd0ea18
NS
2716{
2717 int first; /* first byte offset */
2718 int last; /* last byte offset */
2719 static const short offsets[] = {
2720 offsetof(xfs_agf_t, agf_magicnum),
2721 offsetof(xfs_agf_t, agf_versionnum),
2722 offsetof(xfs_agf_t, agf_seqno),
2723 offsetof(xfs_agf_t, agf_length),
2724 offsetof(xfs_agf_t, agf_roots[0]),
2725 offsetof(xfs_agf_t, agf_levels[0]),
2726 offsetof(xfs_agf_t, agf_flfirst),
2727 offsetof(xfs_agf_t, agf_fllast),
2728 offsetof(xfs_agf_t, agf_flcount),
2729 offsetof(xfs_agf_t, agf_freeblks),
2730 offsetof(xfs_agf_t, agf_longest),
cdded3d8 2731 offsetof(xfs_agf_t, agf_btreeblks),
dd5b876e 2732 offsetof(xfs_agf_t, agf_uuid),
8511b71a 2733 offsetof(xfs_agf_t, agf_rmap_blocks),
bc859611
DW
2734 offsetof(xfs_agf_t, agf_refcount_blocks),
2735 offsetof(xfs_agf_t, agf_refcount_root),
2736 offsetof(xfs_agf_t, agf_refcount_level),
8511b71a
DW
2737 /* needed so that we don't log the whole rest of the structure: */
2738 offsetof(xfs_agf_t, agf_spare64),
2bd0ea18
NS
2739 sizeof(xfs_agf_t)
2740 };
2741
0bc284c2 2742 trace_xfs_agf(tp->t_mountp, bp->b_addr, fields, _RET_IP_);
56b2de80 2743
bdc16ee5 2744 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
dd5b876e 2745
2bd0ea18
NS
2746 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2747 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2748}
2749
2750/*
2751 * Interface for inode allocation to force the pag data to be initialized.
2752 */
2753int /* error */
2754xfs_alloc_pagf_init(
2755 xfs_mount_t *mp, /* file system mount structure */
2756 xfs_trans_t *tp, /* transaction pointer */
2757 xfs_agnumber_t agno, /* allocation group number */
2758 int flags) /* XFS_ALLOC_FLAGS_... */
2759{
7a3bffe4 2760 xfs_buf_t *bp;
2bd0ea18
NS
2761 int error;
2762
475f184c
DW
2763 error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp);
2764 if (!error)
2bd0ea18 2765 xfs_trans_brelse(tp, bp);
475f184c 2766 return error;
2bd0ea18
NS
2767}
2768
2769/*
2770 * Put the block on the freelist for the allocation group.
2771 */
2772int /* error */
2773xfs_alloc_put_freelist(
2774 xfs_trans_t *tp, /* transaction pointer */
2775 xfs_buf_t *agbp, /* buffer for a.g. freelist header */
2776 xfs_buf_t *agflbp,/* buffer for a.g. free block array */
cdded3d8
DC
2777 xfs_agblock_t bno, /* block being freed */
2778 int btreeblk) /* block came from a AGF btree */
2bd0ea18 2779{
0bc284c2
CH
2780 struct xfs_mount *mp = tp->t_mountp;
2781 struct xfs_agf *agf = agbp->b_addr;
5e656dbb 2782 __be32 *blockp;/* pointer to array entry */
2bd0ea18 2783 int error;
cdded3d8 2784 int logflags;
2bd0ea18 2785 xfs_perag_t *pag; /* per allocation group data */
dd5b876e
DC
2786 __be32 *agfl_bno;
2787 int startoff;
2bd0ea18 2788
2bd0ea18 2789 if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
6e3140c7 2790 be32_to_cpu(agf->agf_seqno), &agflbp)))
2bd0ea18 2791 return error;
5e656dbb 2792 be32_add_cpu(&agf->agf_fllast, 1);
b8165508 2793 if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp))
46eca962 2794 agf->agf_fllast = 0;
56b2de80
DC
2795
2796 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
8dbee8f5 2797 ASSERT(!pag->pagf_agflreset);
5e656dbb 2798 be32_add_cpu(&agf->agf_flcount, 1);
2bd0ea18
NS
2799 xfs_trans_agflist_delta(tp, 1);
2800 pag->pagf_flcount++;
cdded3d8
DC
2801
2802 logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2803 if (btreeblk) {
5e656dbb 2804 be32_add_cpu(&agf->agf_btreeblks, -1);
cdded3d8
DC
2805 pag->pagf_btreeblks--;
2806 logflags |= XFS_AGF_BTREEBLKS;
2807 }
56b2de80 2808 xfs_perag_put(pag);
cdded3d8 2809
5e656dbb
BN
2810 xfs_alloc_log_agf(tp, agbp, logflags);
2811
b8165508 2812 ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp));
dd5b876e 2813
b134e771 2814 agfl_bno = xfs_buf_to_agfl_bno(agflbp);
dd5b876e 2815 blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
5e656dbb 2816 *blockp = cpu_to_be32(bno);
dd5b876e
DC
2817 startoff = (char *)blockp - (char *)agflbp->b_addr;
2818
cdded3d8 2819 xfs_alloc_log_agf(tp, agbp, logflags);
dd5b876e 2820
bdc16ee5 2821 xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
dd5b876e
DC
2822 xfs_trans_log_buf(tp, agflbp, startoff,
2823 startoff + sizeof(xfs_agblock_t) - 1);
2bd0ea18
NS
2824 return 0;
2825}
2826
bc01119d 2827static xfs_failaddr_t
a2ceac1f 2828xfs_agf_verify(
95d9582b
DW
2829 struct xfs_buf *bp)
2830{
7861ef77 2831 struct xfs_mount *mp = bp->b_mount;
0bc284c2 2832 struct xfs_agf *agf = bp->b_addr;
a2ceac1f 2833
a65d8d29
BF
2834 if (xfs_sb_version_hascrc(&mp->m_sb)) {
2835 if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
bc01119d 2836 return __this_address;
0bc284c2 2837 if (!xfs_log_check_lsn(mp, be64_to_cpu(agf->agf_lsn)))
bc01119d 2838 return __this_address;
a65d8d29 2839 }
a2ceac1f 2840
68dbe77f
BF
2841 if (!xfs_verify_magic(bp, agf->agf_magicnum))
2842 return __this_address;
2843
2844 if (!(XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
dd5b876e 2845 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
b8165508
DC
2846 be32_to_cpu(agf->agf_flfirst) < xfs_agfl_size(mp) &&
2847 be32_to_cpu(agf->agf_fllast) < xfs_agfl_size(mp) &&
2848 be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp)))
bc01119d 2849 return __this_address;
a2ceac1f 2850
0f24228d
ZB
2851 if (be32_to_cpu(agf->agf_length) > mp->m_sb.sb_dblocks)
2852 return __this_address;
2853
2854 if (be32_to_cpu(agf->agf_freeblks) < be32_to_cpu(agf->agf_longest) ||
2855 be32_to_cpu(agf->agf_freeblks) > be32_to_cpu(agf->agf_length))
2856 return __this_address;
2857
00795aae
DW
2858 if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
2859 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
2860 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
5a35bf2c 2861 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
bc01119d 2862 return __this_address;
5a35bf2c 2863
e37838e5 2864 if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
00795aae
DW
2865 (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
2866 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS))
bc01119d 2867 return __this_address;
e37838e5 2868
0f24228d
ZB
2869 if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
2870 be32_to_cpu(agf->agf_rmap_blocks) > be32_to_cpu(agf->agf_length))
2871 return __this_address;
2872
a2ceac1f
DC
2873 /*
2874 * during growfs operations, the perag is not fully initialised,
2875 * so we can't use it for any useful checking. growfs ensures we can't
2876 * use it by using uncached buffers that don't have the perag attached
2877 * so we can detect and avoid this problem.
2878 */
dd5b876e 2879 if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
bc01119d 2880 return __this_address;
a2ceac1f 2881
dd5b876e
DC
2882 if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
2883 be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
bc01119d 2884 return __this_address;
dd5b876e 2885
0f24228d
ZB
2886 if (xfs_sb_version_hasreflink(&mp->m_sb) &&
2887 be32_to_cpu(agf->agf_refcount_blocks) >
2888 be32_to_cpu(agf->agf_length))
2889 return __this_address;
2890
88ce0792 2891 if (xfs_sb_version_hasreflink(&mp->m_sb) &&
00795aae
DW
2892 (be32_to_cpu(agf->agf_refcount_level) < 1 ||
2893 be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS))
bc01119d 2894 return __this_address;
88ce0792 2895
bc01119d 2896 return NULL;
a2ceac1f 2897
a2ceac1f
DC
2898}
2899
2900static void
2901xfs_agf_read_verify(
2902 struct xfs_buf *bp)
2903{
7861ef77 2904 struct xfs_mount *mp = bp->b_mount;
1e697959 2905 xfs_failaddr_t fa;
dd5b876e 2906
45922933
DC
2907 if (xfs_sb_version_hascrc(&mp->m_sb) &&
2908 !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
1e697959
DW
2909 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
2910 else {
95d9582b 2911 fa = xfs_agf_verify(bp);
1e697959
DW
2912 if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF))
2913 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
2914 }
a2ceac1f
DC
2915}
2916
2917static void
2918xfs_agf_write_verify(
2919 struct xfs_buf *bp)
2920{
7861ef77 2921 struct xfs_mount *mp = bp->b_mount;
37d086ca 2922 struct xfs_buf_log_item *bip = bp->b_log_item;
0bc284c2 2923 struct xfs_agf *agf = bp->b_addr;
1e697959 2924 xfs_failaddr_t fa;
dd5b876e 2925
95d9582b 2926 fa = xfs_agf_verify(bp);
1e697959
DW
2927 if (fa) {
2928 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
dd5b876e
DC
2929 return;
2930 }
2931
2932 if (!xfs_sb_version_hascrc(&mp->m_sb))
2933 return;
2934
2935 if (bip)
0bc284c2 2936 agf->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
dd5b876e 2937
43b5aeed 2938 xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
a2ceac1f
DC
2939}
2940
2941const struct xfs_buf_ops xfs_agf_buf_ops = {
a3fac935 2942 .name = "xfs_agf",
68dbe77f 2943 .magic = { cpu_to_be32(XFS_AGF_MAGIC), cpu_to_be32(XFS_AGF_MAGIC) },
a2ceac1f
DC
2944 .verify_read = xfs_agf_read_verify,
2945 .verify_write = xfs_agf_write_verify,
95d9582b 2946 .verify_struct = xfs_agf_verify,
a2ceac1f
DC
2947};
2948
2bd0ea18
NS
2949/*
2950 * Read in the allocation group header (free/alloc section).
2951 */
2952int /* error */
56b2de80
DC
2953xfs_read_agf(
2954 struct xfs_mount *mp, /* mount point structure */
2955 struct xfs_trans *tp, /* transaction pointer */
2956 xfs_agnumber_t agno, /* allocation group number */
2957 int flags, /* XFS_BUF_ */
2958 struct xfs_buf **bpp) /* buffer for the ag freelist header */
2bd0ea18 2959{
9440d84d 2960 int error;
2bd0ea18 2961
ff105f75
DC
2962 trace_xfs_read_agf(mp, agno);
2963
2bd0ea18 2964 ASSERT(agno != NULLAGNUMBER);
3e6069a1 2965 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
9440d84d 2966 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
a2ceac1f 2967 XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
9440d84d 2968 if (error)
2bd0ea18 2969 return error;
56b2de80 2970
a2ceac1f
DC
2971 ASSERT(!(*bpp)->b_error);
2972 xfs_buf_set_ref(*bpp, XFS_AGF_REF);
56b2de80
DC
2973 return 0;
2974}
2975
2976/*
2977 * Read in the allocation group header (free/alloc section).
2978 */
2979int /* error */
2980xfs_alloc_read_agf(
2981 struct xfs_mount *mp, /* mount point structure */
2982 struct xfs_trans *tp, /* transaction pointer */
2983 xfs_agnumber_t agno, /* allocation group number */
2984 int flags, /* XFS_ALLOC_FLAG_... */
2985 struct xfs_buf **bpp) /* buffer for the ag freelist header */
2986{
2987 struct xfs_agf *agf; /* ag freelist header */
2988 struct xfs_perag *pag; /* per allocation group data */
2989 int error;
2990
ff105f75 2991 trace_xfs_alloc_read_agf(mp, agno);
56b2de80 2992
475f184c
DW
2993 /* We don't support trylock when freeing. */
2994 ASSERT((flags & (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK)) !=
2995 (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK));
ff105f75 2996 ASSERT(agno != NULLAGNUMBER);
56b2de80
DC
2997 error = xfs_read_agf(mp, tp, agno,
2998 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
2999 bpp);
3000 if (error)
3001 return error;
a2ceac1f 3002 ASSERT(!(*bpp)->b_error);
56b2de80 3003
0bc284c2 3004 agf = (*bpp)->b_addr;
56b2de80 3005 pag = xfs_perag_get(mp, agno);
2bd0ea18 3006 if (!pag->pagf_init) {
6e3140c7 3007 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
cdded3d8 3008 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
6e3140c7
NS
3009 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
3010 pag->pagf_longest = be32_to_cpu(agf->agf_longest);
2bd0ea18 3011 pag->pagf_levels[XFS_BTNUM_BNOi] =
6e3140c7 3012 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
2bd0ea18 3013 pag->pagf_levels[XFS_BTNUM_CNTi] =
6e3140c7 3014 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
e37838e5
DW
3015 pag->pagf_levels[XFS_BTNUM_RMAPi] =
3016 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
88ce0792 3017 pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
2bd0ea18 3018 pag->pagf_init = 1;
8dbee8f5 3019 pag->pagf_agflreset = xfs_agfl_needs_reset(mp, agf);
2bd0ea18
NS
3020 }
3021#ifdef DEBUG
3022 else if (!XFS_FORCED_SHUTDOWN(mp)) {
6e3140c7 3023 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
cdded3d8 3024 ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
6e3140c7
NS
3025 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
3026 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
2bd0ea18 3027 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
6e3140c7 3028 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
2bd0ea18 3029 ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
6e3140c7 3030 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
2bd0ea18
NS
3031 }
3032#endif
56b2de80 3033 xfs_perag_put(pag);
2bd0ea18
NS
3034 return 0;
3035}
3036
3037/*
3038 * Allocate an extent (variable-size).
3039 * Depending on the allocation type, we either look in a single allocation
3040 * group or loop over the allocation groups to find the result.
3041 */
3042int /* error */
3043xfs_alloc_vextent(
03aeb29d 3044 struct xfs_alloc_arg *args) /* allocation argument structure */
2bd0ea18 3045{
03aeb29d
BF
3046 xfs_agblock_t agsize; /* allocation group size */
3047 int error;
3048 int flags; /* XFS_ALLOC_FLAG_... locking flags */
3049 struct xfs_mount *mp; /* mount structure pointer */
3050 xfs_agnumber_t sagno; /* starting allocation group number */
3051 xfs_alloctype_t type; /* input allocation type */
3052 int bump_rotor = 0;
3053 xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */
2bd0ea18
NS
3054
3055 mp = args->mp;
3056 type = args->otype = args->type;
3057 args->agbno = NULLAGBLOCK;
3058 /*
3059 * Just fix this up, for the case where the last a.g. is shorter
3060 * (or there's only one a.g.) and the caller couldn't easily figure
3061 * that out (xfs_bmap_alloc).
3062 */
3063 agsize = mp->m_sb.sb_agblocks;
3064 if (args->maxlen > agsize)
3065 args->maxlen = agsize;
3066 if (args->alignment == 0)
3067 args->alignment = 1;
3068 ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
3069 ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
3070 ASSERT(args->minlen <= args->maxlen);
3071 ASSERT(args->minlen <= agsize);
3072 ASSERT(args->mod < args->prod);
3073 if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
3074 XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
3075 args->minlen > args->maxlen || args->minlen > agsize ||
3076 args->mod >= args->prod) {
3077 args->fsbno = NULLFSBLOCK;
56b2de80 3078 trace_xfs_alloc_vextent_badargs(args);
2bd0ea18
NS
3079 return 0;
3080 }
9baa549b 3081
2bd0ea18
NS
3082 switch (type) {
3083 case XFS_ALLOCTYPE_THIS_AG:
3084 case XFS_ALLOCTYPE_NEAR_BNO:
3085 case XFS_ALLOCTYPE_THIS_BNO:
3086 /*
3087 * These three force us into a single a.g.
3088 */
3089 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
56b2de80 3090 args->pag = xfs_perag_get(mp, args->agno);
2bd0ea18 3091 error = xfs_alloc_fix_freelist(args, 0);
2bd0ea18 3092 if (error) {
56b2de80 3093 trace_xfs_alloc_vextent_nofix(args);
2bd0ea18
NS
3094 goto error0;
3095 }
3096 if (!args->agbp) {
56b2de80 3097 trace_xfs_alloc_vextent_noagbp(args);
2bd0ea18
NS
3098 break;
3099 }
3100 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
0e266570 3101 if ((error = xfs_alloc_ag_vextent(args)))
2bd0ea18 3102 goto error0;
2bd0ea18
NS
3103 break;
3104 case XFS_ALLOCTYPE_START_BNO:
3105 /*
3106 * Try near allocation first, then anywhere-in-ag after
3107 * the first a.g. fails.
3108 */
1fccd5c8 3109 if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
34317449 3110 (mp->m_flags & XFS_MOUNT_32BITINODES)) {
46eca962
NS
3111 args->fsbno = XFS_AGB_TO_FSB(mp,
3112 ((mp->m_agfrotor / rotorstep) %
3113 mp->m_sb.sb_agcount), 0);
34317449
NS
3114 bump_rotor = 1;
3115 }
2bd0ea18
NS
3116 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
3117 args->type = XFS_ALLOCTYPE_NEAR_BNO;
3118 /* FALLTHROUGH */
2bd0ea18
NS
3119 case XFS_ALLOCTYPE_FIRST_AG:
3120 /*
3121 * Rotate through the allocation groups looking for a winner.
3122 */
f3eda3a5 3123 if (type == XFS_ALLOCTYPE_FIRST_AG) {
2bd0ea18
NS
3124 /*
3125 * Start with allocation group given by bno.
3126 */
3127 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
3128 args->type = XFS_ALLOCTYPE_THIS_AG;
3129 sagno = 0;
3130 flags = 0;
3131 } else {
2bd0ea18
NS
3132 /*
3133 * Start with the given allocation group.
3134 */
3135 args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
3136 flags = XFS_ALLOC_FLAG_TRYLOCK;
3137 }
3138 /*
3139 * Loop over allocation groups twice; first time with
3140 * trylock set, second time without.
3141 */
3142 for (;;) {
56b2de80 3143 args->pag = xfs_perag_get(mp, args->agno);
9baa549b 3144 error = xfs_alloc_fix_freelist(args, flags);
9baa549b 3145 if (error) {
56b2de80 3146 trace_xfs_alloc_vextent_nofix(args);
2bd0ea18
NS
3147 goto error0;
3148 }
3149 /*
3150 * If we get a buffer back then the allocation will fly.
3151 */
3152 if (args->agbp) {
0e266570 3153 if ((error = xfs_alloc_ag_vextent(args)))
2bd0ea18 3154 goto error0;
2bd0ea18
NS
3155 break;
3156 }
56b2de80
DC
3157
3158 trace_xfs_alloc_vextent_loopfailed(args);
3159
2bd0ea18
NS
3160 /*
3161 * Didn't work, figure out the next iteration.
3162 */
3163 if (args->agno == sagno &&
3164 type == XFS_ALLOCTYPE_START_BNO)
3165 args->type = XFS_ALLOCTYPE_THIS_AG;
5e656dbb
BN
3166 /*
3167 * For the first allocation, we can try any AG to get
3168 * space. However, if we already have allocated a
3169 * block, we don't want to try AGs whose number is below
3170 * sagno. Otherwise, we may end up with out-of-order
3171 * locking of AGF, which might cause deadlock.
3172 */
3173 if (++(args->agno) == mp->m_sb.sb_agcount) {
03aeb29d 3174 if (args->tp->t_firstblock != NULLFSBLOCK)
5e656dbb
BN
3175 args->agno = sagno;
3176 else
3177 args->agno = 0;
3178 }
5000d01d 3179 /*
2bd0ea18
NS
3180 * Reached the starting a.g., must either be done
3181 * or switch to non-trylock mode.
3182 */
3183 if (args->agno == sagno) {
a3b4a951 3184 if (flags == 0) {
2bd0ea18 3185 args->agbno = NULLAGBLOCK;
56b2de80 3186 trace_xfs_alloc_vextent_allfailed(args);
2bd0ea18
NS
3187 break;
3188 }
a3b4a951
CH
3189
3190 flags = 0;
3191 if (type == XFS_ALLOCTYPE_START_BNO) {
3192 args->agbno = XFS_FSB_TO_AGBNO(mp,
3193 args->fsbno);
3194 args->type = XFS_ALLOCTYPE_NEAR_BNO;
2bd0ea18
NS
3195 }
3196 }
56b2de80 3197 xfs_perag_put(args->pag);
2bd0ea18 3198 }
f3eda3a5 3199 if (bump_rotor) {
46eca962
NS
3200 if (args->agno == sagno)
3201 mp->m_agfrotor = (mp->m_agfrotor + 1) %
3202 (mp->m_sb.sb_agcount * rotorstep);
3203 else
3204 mp->m_agfrotor = (args->agno * rotorstep + 1) %
3205 (mp->m_sb.sb_agcount * rotorstep);
3206 }
2bd0ea18
NS
3207 break;
3208 default:
3209 ASSERT(0);
3210 /* NOTREACHED */
3211 }
3212 if (args->agbno == NULLAGBLOCK)
3213 args->fsbno = NULLFSBLOCK;
3214 else {
3215 args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
3216#ifdef DEBUG
3217 ASSERT(args->len >= args->minlen);
3218 ASSERT(args->len <= args->maxlen);
3219 ASSERT(args->agbno % args->alignment == 0);
3220 XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
3221 args->len);
3222#endif
9542ae13 3223
2bd0ea18 3224 }
56b2de80 3225 xfs_perag_put(args->pag);
2bd0ea18
NS
3226 return 0;
3227error0:
56b2de80 3228 xfs_perag_put(args->pag);
2bd0ea18
NS
3229 return error;
3230}
3231
2a6da3b8
DC
3232/* Ensure that the freelist is at full capacity. */
3233int
3234xfs_free_extent_fix_freelist(
3235 struct xfs_trans *tp,
3236 xfs_agnumber_t agno,
3237 struct xfs_buf **agbp)
2bd0ea18 3238{
2a6da3b8
DC
3239 struct xfs_alloc_arg args;
3240 int error;
2bd0ea18 3241
2a6da3b8 3242 memset(&args, 0, sizeof(struct xfs_alloc_arg));
2bd0ea18
NS
3243 args.tp = tp;
3244 args.mp = tp->t_mountp;
2a6da3b8 3245 args.agno = agno;
a2ceac1f
DC
3246
3247 /*
3248 * validate that the block number is legal - the enables us to detect
3249 * and handle a silent filesystem corruption rather than crashing.
3250 */
a2ceac1f 3251 if (args.agno >= args.mp->m_sb.sb_agcount)
12b53197 3252 return -EFSCORRUPTED;
a2ceac1f 3253
56b2de80 3254 args.pag = xfs_perag_get(args.mp, args.agno);
a2ceac1f
DC
3255 ASSERT(args.pag);
3256
3257 error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
3258 if (error)
2a6da3b8
DC
3259 goto out;
3260
3261 *agbp = args.agbp;
3262out:
3263 xfs_perag_put(args.pag);
3264 return error;
3265}
3266
3267/*
3268 * Free an extent.
3269 * Just break up the extent address and hand off to xfs_free_ag_extent
3270 * after fixing up the freelist.
3271 */
5837e73b 3272int
3a13f959 3273__xfs_free_extent(
5837e73b
DW
3274 struct xfs_trans *tp,
3275 xfs_fsblock_t bno,
3276 xfs_extlen_t len,
3277 const struct xfs_owner_info *oinfo,
3278 enum xfs_ag_resv_type type,
3279 bool skip_discard)
2a6da3b8 3280{
5837e73b
DW
3281 struct xfs_mount *mp = tp->t_mountp;
3282 struct xfs_buf *agbp;
3283 xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, bno);
3284 xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, bno);
0bc284c2 3285 struct xfs_agf *agf;
5837e73b
DW
3286 int error;
3287 unsigned int busy_flags = 0;
2a6da3b8
DC
3288
3289 ASSERT(len != 0);
9760cac2 3290 ASSERT(type != XFS_AG_RESV_AGFL);
2a6da3b8 3291
a9da40de 3292 if (XFS_TEST_ERROR(false, mp,
e2a190dd 3293 XFS_ERRTAG_FREE_EXTENT))
a9da40de
DW
3294 return -EIO;
3295
2a6da3b8
DC
3296 error = xfs_free_extent_fix_freelist(tp, agno, &agbp);
3297 if (error)
3298 return error;
0bc284c2 3299 agf = agbp->b_addr;
2a6da3b8 3300
fbb4fa7f
DW
3301 if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) {
3302 error = -EFSCORRUPTED;
3303 goto err;
3304 }
a2ceac1f
DC
3305
3306 /* validate the extent size is legal now we have the agf locked */
0bc284c2 3307 if (XFS_IS_CORRUPT(mp, agbno + len > be32_to_cpu(agf->agf_length))) {
fbb4fa7f
DW
3308 error = -EFSCORRUPTED;
3309 goto err;
3310 }
a2ceac1f 3311
cf8ce220 3312 error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type);
2a6da3b8
DC
3313 if (error)
3314 goto err;
3315
3a13f959
BF
3316 if (skip_discard)
3317 busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD;
3318 xfs_extent_busy_insert(tp, agno, agbno, len, busy_flags);
2a6da3b8
DC
3319 return 0;
3320
3321err:
3322 xfs_trans_brelse(tp, agbp);
2bd0ea18
NS
3323 return error;
3324}
b3d83fa6
DW
3325
3326struct xfs_alloc_query_range_info {
3327 xfs_alloc_query_range_fn fn;
3328 void *priv;
3329};
3330
3331/* Format btree record and pass to our callback. */
3332STATIC int
3333xfs_alloc_query_range_helper(
3334 struct xfs_btree_cur *cur,
3335 union xfs_btree_rec *rec,
3336 void *priv)
3337{
3338 struct xfs_alloc_query_range_info *query = priv;
3339 struct xfs_alloc_rec_incore irec;
3340
3341 irec.ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
3342 irec.ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
3343 return query->fn(cur, &irec, query->priv);
3344}
3345
3346/* Find all free space within a given range of blocks. */
3347int
3348xfs_alloc_query_range(
3349 struct xfs_btree_cur *cur,
3350 struct xfs_alloc_rec_incore *low_rec,
3351 struct xfs_alloc_rec_incore *high_rec,
3352 xfs_alloc_query_range_fn fn,
3353 void *priv)
3354{
3355 union xfs_btree_irec low_brec;
3356 union xfs_btree_irec high_brec;
3357 struct xfs_alloc_query_range_info query;
3358
3359 ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
3360 low_brec.a = *low_rec;
3361 high_brec.a = *high_rec;
3362 query.priv = priv;
3363 query.fn = fn;
3364 return xfs_btree_query_range(cur, &low_brec, &high_brec,
3365 xfs_alloc_query_range_helper, &query);
3366}
7e05e856
DW
3367
3368/* Find all free space records. */
3369int
3370xfs_alloc_query_all(
3371 struct xfs_btree_cur *cur,
3372 xfs_alloc_query_range_fn fn,
3373 void *priv)
3374{
3375 struct xfs_alloc_query_range_info query;
3376
3377 ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
3378 query.priv = priv;
3379 query.fn = fn;
3380 return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
3381}
9bef6258 3382
1fe41a73
DW
3383/* Is there a record covering a given extent? */
3384int
3385xfs_alloc_has_record(
3386 struct xfs_btree_cur *cur,
3387 xfs_agblock_t bno,
3388 xfs_extlen_t len,
3389 bool *exists)
3390{
3391 union xfs_btree_irec low;
3392 union xfs_btree_irec high;
3393
3394 memset(&low, 0, sizeof(low));
3395 low.a.ar_startblock = bno;
3396 memset(&high, 0xFF, sizeof(high));
3397 high.a.ar_startblock = bno + len - 1;
3398
3399 return xfs_btree_has_record(cur, &low, &high, exists);
3400}
71a98c66
DW
3401
3402/*
3403 * Walk all the blocks in the AGFL. The @walk_fn can return any negative
4a509d6d 3404 * error code or XFS_ITER_*.
71a98c66
DW
3405 */
3406int
3407xfs_agfl_walk(
3408 struct xfs_mount *mp,
3409 struct xfs_agf *agf,
3410 struct xfs_buf *agflbp,
3411 xfs_agfl_walk_fn walk_fn,
3412 void *priv)
3413{
3414 __be32 *agfl_bno;
3415 unsigned int i;
3416 int error;
3417
b134e771 3418 agfl_bno = xfs_buf_to_agfl_bno(agflbp);
71a98c66
DW
3419 i = be32_to_cpu(agf->agf_flfirst);
3420
3421 /* Nothing to walk in an empty AGFL. */
3422 if (agf->agf_flcount == cpu_to_be32(0))
3423 return 0;
3424
3425 /* Otherwise, walk from first to last, wrapping as needed. */
3426 for (;;) {
3427 error = walk_fn(mp, be32_to_cpu(agfl_bno[i]), priv);
3428 if (error)
3429 return error;
3430 if (i == be32_to_cpu(agf->agf_fllast))
3431 break;
3432 if (++i == xfs_agfl_size(mp))
3433 i = 0;
3434 }
3435
3436 return 0;
3437}