]> git.ipfire.org Git - people/ms/linux.git/blame - fs/xfs/libxfs/xfs_alloc.c
xfs: pass perag to xfs_read_agf
[people/ms/linux.git] / fs / xfs / libxfs / xfs_alloc.c
CommitLineData
0b61f8a4 1// SPDX-License-Identifier: GPL-2.0
1da177e4 2/*
7b718769
NS
3 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
4 * All Rights Reserved.
1da177e4 5 */
1da177e4 6#include "xfs.h"
a844f451 7#include "xfs_fs.h"
70a9883c 8#include "xfs_format.h"
239880ef 9#include "xfs_log_format.h"
70a9883c 10#include "xfs_shared.h"
239880ef 11#include "xfs_trans_resv.h"
a844f451 12#include "xfs_bit.h"
1da177e4 13#include "xfs_mount.h"
3ab78df2 14#include "xfs_defer.h"
1da177e4 15#include "xfs_btree.h"
673930c3 16#include "xfs_rmap.h"
a4fbe6ab 17#include "xfs_alloc_btree.h"
1da177e4 18#include "xfs_alloc.h"
efc27b52 19#include "xfs_extent_busy.h"
e9e899a2 20#include "xfs_errortag.h"
1da177e4 21#include "xfs_error.h"
0b1b213f 22#include "xfs_trace.h"
239880ef 23#include "xfs_trans.h"
4e0e6040 24#include "xfs_buf_item.h"
239880ef 25#include "xfs_log.h"
9bbafc71 26#include "xfs_ag.h"
3fd129b6 27#include "xfs_ag_resv.h"
f8f2835a
BF
28#include "xfs_bmap.h"
29
c201d9ca 30struct kmem_cache *xfs_extfree_item_cache;
1da177e4 31
c999a223 32struct workqueue_struct *xfs_alloc_wq;
1da177e4
LT
33
34#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
35
36#define XFSA_FIXUP_BNO_OK 1
37#define XFSA_FIXUP_CNT_OK 2
38
1da177e4
LT
39STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
40STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
41STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
1da177e4 42
a78ee256
DC
43/*
44 * Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in
45 * the beginning of the block for a proper header with the location information
46 * and CRC.
47 */
48unsigned int
49xfs_agfl_size(
50 struct xfs_mount *mp)
51{
52 unsigned int size = mp->m_sb.sb_sectsize;
53
38c26bfd 54 if (xfs_has_crc(mp))
a78ee256
DC
55 size -= sizeof(struct xfs_agfl);
56
57 return size / sizeof(xfs_agblock_t);
58}
59
af30dfa1
DW
60unsigned int
61xfs_refc_block(
62 struct xfs_mount *mp)
63{
38c26bfd 64 if (xfs_has_rmapbt(mp))
af30dfa1 65 return XFS_RMAP_BLOCK(mp) + 1;
38c26bfd 66 if (xfs_has_finobt(mp))
af30dfa1
DW
67 return XFS_FIBT_BLOCK(mp) + 1;
68 return XFS_IBT_BLOCK(mp) + 1;
69}
70
8018026e
DW
71xfs_extlen_t
72xfs_prealloc_blocks(
73 struct xfs_mount *mp)
74{
38c26bfd 75 if (xfs_has_reflink(mp))
af30dfa1 76 return xfs_refc_block(mp) + 1;
38c26bfd 77 if (xfs_has_rmapbt(mp))
8018026e 78 return XFS_RMAP_BLOCK(mp) + 1;
38c26bfd 79 if (xfs_has_finobt(mp))
8018026e
DW
80 return XFS_FIBT_BLOCK(mp) + 1;
81 return XFS_IBT_BLOCK(mp) + 1;
82}
83
52548852 84/*
93defd5a
DW
85 * The number of blocks per AG that we withhold from xfs_mod_fdblocks to
86 * guarantee that we can refill the AGFL prior to allocating space in a nearly
87 * full AG. Although the the space described by the free space btrees, the
88 * blocks used by the freesp btrees themselves, and the blocks owned by the
89 * AGFL are counted in the ondisk fdblocks, it's a mistake to let the ondisk
90 * free space in the AG drop so low that the free space btrees cannot refill an
91 * empty AGFL up to the minimum level. Rather than grind through empty AGs
92 * until the fs goes down, we subtract this many AG blocks from the incore
93 * fdblocks to ensure user allocation does not overcommit the space the
94 * filesystem needs for the AGFLs. The rmap btree uses a per-AG reservation to
95 * withhold space from xfs_mod_fdblocks, so we do not account for that here.
96 */
97#define XFS_ALLOCBT_AGFL_RESERVE 4
98
99/*
100 * Compute the number of blocks that we set aside to guarantee the ability to
101 * refill the AGFL and handle a full bmap btree split.
102 *
52548852
DW
103 * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
104 * AGF buffer (PV 947395), we place constraints on the relationship among
105 * actual allocations for data blocks, freelist blocks, and potential file data
106 * bmap btree blocks. However, these restrictions may result in no actual space
107 * allocated for a delayed extent, for example, a data block in a certain AG is
108 * allocated but there is no additional block for the additional bmap btree
109 * block due to a split of the bmap btree of the file. The result of this may
110 * lead to an infinite loop when the file gets flushed to disk and all delayed
111 * extents need to be actually allocated. To get around this, we explicitly set
112 * aside a few blocks which will not be reserved in delayed allocation.
113 *
93defd5a
DW
114 * For each AG, we need to reserve enough blocks to replenish a totally empty
115 * AGFL and 4 more to handle a potential split of the file's bmap btree.
52548852
DW
116 */
117unsigned int
118xfs_alloc_set_aside(
119 struct xfs_mount *mp)
120{
93defd5a 121 return mp->m_sb.sb_agcount * (XFS_ALLOCBT_AGFL_RESERVE + 4);
52548852
DW
122}
123
124/*
125 * When deciding how much space to allocate out of an AG, we limit the
126 * allocation maximum size to the size the AG. However, we cannot use all the
127 * blocks in the AG - some are permanently used by metadata. These
128 * blocks are generally:
129 * - the AG superblock, AGF, AGI and AGFL
130 * - the AGF (bno and cnt) and AGI btree root blocks, and optionally
131 * the AGI free inode and rmap btree root blocks.
132 * - blocks on the AGFL according to xfs_alloc_set_aside() limits
133 * - the rmapbt root block
134 *
135 * The AG headers are sector sized, so the amount of space they take up is
136 * dependent on filesystem geometry. The others are all single blocks.
137 */
138unsigned int
139xfs_alloc_ag_max_usable(
140 struct xfs_mount *mp)
141{
142 unsigned int blocks;
143
144 blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
93defd5a 145 blocks += XFS_ALLOCBT_AGFL_RESERVE;
52548852 146 blocks += 3; /* AGF, AGI btree root blocks */
38c26bfd 147 if (xfs_has_finobt(mp))
52548852 148 blocks++; /* finobt root block */
38c26bfd 149 if (xfs_has_rmapbt(mp))
93defd5a 150 blocks++; /* rmap root block */
38c26bfd 151 if (xfs_has_reflink(mp))
d0e853f3 152 blocks++; /* refcount root block */
52548852
DW
153
154 return mp->m_sb.sb_agblocks - blocks;
155}
156
fe033cc8
CH
157/*
158 * Lookup the record equal to [bno, len] in the btree given by cur.
159 */
160STATIC int /* error */
161xfs_alloc_lookup_eq(
162 struct xfs_btree_cur *cur, /* btree cursor */
163 xfs_agblock_t bno, /* starting block of extent */
164 xfs_extlen_t len, /* length of extent */
165 int *stat) /* success/failure */
166{
f6b428a4
BF
167 int error;
168
fe033cc8
CH
169 cur->bc_rec.a.ar_startblock = bno;
170 cur->bc_rec.a.ar_blockcount = len;
f6b428a4 171 error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
c4aa10d0 172 cur->bc_ag.abt.active = (*stat == 1);
f6b428a4 173 return error;
fe033cc8
CH
174}
175
176/*
177 * Lookup the first record greater than or equal to [bno, len]
178 * in the btree given by cur.
179 */
a66d6363 180int /* error */
fe033cc8
CH
181xfs_alloc_lookup_ge(
182 struct xfs_btree_cur *cur, /* btree cursor */
183 xfs_agblock_t bno, /* starting block of extent */
184 xfs_extlen_t len, /* length of extent */
185 int *stat) /* success/failure */
186{
f6b428a4
BF
187 int error;
188
fe033cc8
CH
189 cur->bc_rec.a.ar_startblock = bno;
190 cur->bc_rec.a.ar_blockcount = len;
f6b428a4 191 error = xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
c4aa10d0 192 cur->bc_ag.abt.active = (*stat == 1);
f6b428a4 193 return error;
fe033cc8
CH
194}
195
196/*
197 * Lookup the first record less than or equal to [bno, len]
198 * in the btree given by cur.
199 */
ce1d802e 200int /* error */
fe033cc8
CH
201xfs_alloc_lookup_le(
202 struct xfs_btree_cur *cur, /* btree cursor */
203 xfs_agblock_t bno, /* starting block of extent */
204 xfs_extlen_t len, /* length of extent */
205 int *stat) /* success/failure */
206{
f6b428a4 207 int error;
fe033cc8
CH
208 cur->bc_rec.a.ar_startblock = bno;
209 cur->bc_rec.a.ar_blockcount = len;
f6b428a4 210 error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
c4aa10d0 211 cur->bc_ag.abt.active = (*stat == 1);
f6b428a4
BF
212 return error;
213}
214
215static inline bool
216xfs_alloc_cur_active(
217 struct xfs_btree_cur *cur)
218{
c4aa10d0 219 return cur && cur->bc_ag.abt.active;
fe033cc8
CH
220}
221
278d0ca1
CH
222/*
223 * Update the record referred to by cur to the value given
224 * by [bno, len].
225 * This either works (return 0) or gets an EFSCORRUPTED error.
226 */
227STATIC int /* error */
228xfs_alloc_update(
229 struct xfs_btree_cur *cur, /* btree cursor */
230 xfs_agblock_t bno, /* starting block of extent */
231 xfs_extlen_t len) /* length of extent */
232{
233 union xfs_btree_rec rec;
234
235 rec.alloc.ar_startblock = cpu_to_be32(bno);
236 rec.alloc.ar_blockcount = cpu_to_be32(len);
237 return xfs_btree_update(cur, &rec);
238}
fe033cc8 239
8cc938fe
CH
240/*
241 * Get the data from the pointed-to record.
242 */
a46db608 243int /* error */
8cc938fe
CH
244xfs_alloc_get_rec(
245 struct xfs_btree_cur *cur, /* btree cursor */
246 xfs_agblock_t *bno, /* output: starting block of extent */
247 xfs_extlen_t *len, /* output: length of extent */
248 int *stat) /* output: success/failure */
249{
9e6c08d4 250 struct xfs_mount *mp = cur->bc_mp;
50f02fe3 251 xfs_agnumber_t agno = cur->bc_ag.pag->pag_agno;
8cc938fe
CH
252 union xfs_btree_rec *rec;
253 int error;
254
255 error = xfs_btree_get_rec(cur, &rec, stat);
a37f7b12
DW
256 if (error || !(*stat))
257 return error;
a37f7b12
DW
258
259 *bno = be32_to_cpu(rec->alloc.ar_startblock);
260 *len = be32_to_cpu(rec->alloc.ar_blockcount);
261
efe80327
CM
262 if (*len == 0)
263 goto out_bad_rec;
264
9e6c08d4
DC
265 /* check for valid extent range, including overflow */
266 if (!xfs_verify_agbno(mp, agno, *bno))
267 goto out_bad_rec;
268 if (*bno > *bno + *len)
269 goto out_bad_rec;
270 if (!xfs_verify_agbno(mp, agno, *bno + *len - 1))
271 goto out_bad_rec;
272
273 return 0;
274
275out_bad_rec:
276 xfs_warn(mp,
277 "%s Freespace BTree record corruption in AG %d detected!",
278 cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size", agno);
279 xfs_warn(mp,
280 "start block 0x%x block count 0x%x", *bno, *len);
281 return -EFSCORRUPTED;
8cc938fe
CH
282}
283
1da177e4
LT
284/*
285 * Compute aligned version of the found extent.
286 * Takes alignment and min length into account.
287 */
ebf55872 288STATIC bool
1da177e4 289xfs_alloc_compute_aligned(
86fa8af6 290 xfs_alloc_arg_t *args, /* allocation argument structure */
1da177e4
LT
291 xfs_agblock_t foundbno, /* starting block in found extent */
292 xfs_extlen_t foundlen, /* length in found extent */
1da177e4 293 xfs_agblock_t *resbno, /* result block number */
ebf55872
CH
294 xfs_extlen_t *reslen, /* result length */
295 unsigned *busy_gen)
1da177e4 296{
ebf55872
CH
297 xfs_agblock_t bno = foundbno;
298 xfs_extlen_t len = foundlen;
bfe46d4e 299 xfs_extlen_t diff;
ebf55872 300 bool busy;
1da177e4 301
e26f0501 302 /* Trim busy sections out of found extent */
ebf55872 303 busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen);
e26f0501 304
bfe46d4e
BF
305 /*
306 * If we have a largish extent that happens to start before min_agbno,
307 * see if we can shift it into range...
308 */
309 if (bno < args->min_agbno && bno + len > args->min_agbno) {
310 diff = args->min_agbno - bno;
311 if (len > diff) {
312 bno += diff;
313 len -= diff;
314 }
315 }
316
e26f0501
CH
317 if (args->alignment > 1 && len >= args->minlen) {
318 xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
bfe46d4e
BF
319
320 diff = aligned_bno - bno;
e26f0501
CH
321
322 *resbno = aligned_bno;
323 *reslen = diff >= len ? 0 : len - diff;
1da177e4 324 } else {
e26f0501
CH
325 *resbno = bno;
326 *reslen = len;
1da177e4 327 }
ebf55872
CH
328
329 return busy;
1da177e4
LT
330}
331
332/*
333 * Compute best start block and diff for "near" allocations.
334 * freelen >= wantlen already checked by caller.
335 */
336STATIC xfs_extlen_t /* difference value (absolute) */
337xfs_alloc_compute_diff(
338 xfs_agblock_t wantbno, /* target starting block */
339 xfs_extlen_t wantlen, /* target length */
340 xfs_extlen_t alignment, /* target alignment */
292378ed 341 int datatype, /* are we allocating data? */
1da177e4
LT
342 xfs_agblock_t freebno, /* freespace's starting block */
343 xfs_extlen_t freelen, /* freespace's length */
344 xfs_agblock_t *newbnop) /* result: best start block from free */
345{
346 xfs_agblock_t freeend; /* end of freespace extent */
347 xfs_agblock_t newbno1; /* return block number */
348 xfs_agblock_t newbno2; /* other new block number */
349 xfs_extlen_t newlen1=0; /* length with newbno1 */
350 xfs_extlen_t newlen2=0; /* length with newbno2 */
351 xfs_agblock_t wantend; /* end of target extent */
c34d570d 352 bool userdata = datatype & XFS_ALLOC_USERDATA;
1da177e4
LT
353
354 ASSERT(freelen >= wantlen);
355 freeend = freebno + freelen;
356 wantend = wantbno + wantlen;
211d022c
JK
357 /*
358 * We want to allocate from the start of a free extent if it is past
359 * the desired block or if we are allocating user data and the free
360 * extent is before desired block. The second case is there to allow
361 * for contiguous allocation from the remaining free space if the file
362 * grows in the short term.
363 */
364 if (freebno >= wantbno || (userdata && freeend < wantend)) {
1da177e4
LT
365 if ((newbno1 = roundup(freebno, alignment)) >= freeend)
366 newbno1 = NULLAGBLOCK;
367 } else if (freeend >= wantend && alignment > 1) {
368 newbno1 = roundup(wantbno, alignment);
369 newbno2 = newbno1 - alignment;
370 if (newbno1 >= freeend)
371 newbno1 = NULLAGBLOCK;
372 else
373 newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
374 if (newbno2 < freebno)
375 newbno2 = NULLAGBLOCK;
376 else
377 newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
378 if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
379 if (newlen1 < newlen2 ||
380 (newlen1 == newlen2 &&
381 XFS_ABSDIFF(newbno1, wantbno) >
382 XFS_ABSDIFF(newbno2, wantbno)))
383 newbno1 = newbno2;
384 } else if (newbno2 != NULLAGBLOCK)
385 newbno1 = newbno2;
386 } else if (freeend >= wantend) {
387 newbno1 = wantbno;
388 } else if (alignment > 1) {
389 newbno1 = roundup(freeend - wantlen, alignment);
390 if (newbno1 > freeend - wantlen &&
391 newbno1 - alignment >= freebno)
392 newbno1 -= alignment;
393 else if (newbno1 >= freeend)
394 newbno1 = NULLAGBLOCK;
395 } else
396 newbno1 = freeend - wantlen;
397 *newbnop = newbno1;
398 return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
399}
400
401/*
402 * Fix up the length, based on mod and prod.
403 * len should be k * prod + mod for some k.
404 * If len is too small it is returned unchanged.
405 * If len hits maxlen it is left alone.
406 */
407STATIC void
408xfs_alloc_fix_len(
409 xfs_alloc_arg_t *args) /* allocation argument structure */
410{
411 xfs_extlen_t k;
412 xfs_extlen_t rlen;
413
414 ASSERT(args->mod < args->prod);
415 rlen = args->len;
416 ASSERT(rlen >= args->minlen);
417 ASSERT(rlen <= args->maxlen);
418 if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
419 (args->mod == 0 && rlen < args->prod))
420 return;
421 k = rlen % args->prod;
422 if (k == args->mod)
423 return;
30265117
JK
424 if (k > args->mod)
425 rlen = rlen - (k - args->mod);
426 else
427 rlen = rlen - args->prod + (args->mod - k);
3790a8cd 428 /* casts to (int) catch length underflows */
30265117
JK
429 if ((int)rlen < (int)args->minlen)
430 return;
431 ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
432 ASSERT(rlen % args->prod == args->mod);
54fee133
CH
433 ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >=
434 rlen + args->minleft);
1da177e4
LT
435 args->len = rlen;
436}
437
1da177e4
LT
438/*
439 * Update the two btrees, logically removing from freespace the extent
440 * starting at rbno, rlen blocks. The extent is contained within the
441 * actual (current) free extent fbno for flen blocks.
442 * Flags are passed in indicating whether the cursors are set to the
443 * relevant records.
444 */
445STATIC int /* error code */
446xfs_alloc_fixup_trees(
ae127f08
DW
447 struct xfs_btree_cur *cnt_cur, /* cursor for by-size btree */
448 struct xfs_btree_cur *bno_cur, /* cursor for by-block btree */
1da177e4
LT
449 xfs_agblock_t fbno, /* starting block of free extent */
450 xfs_extlen_t flen, /* length of free extent */
451 xfs_agblock_t rbno, /* starting block of returned extent */
452 xfs_extlen_t rlen, /* length of returned extent */
453 int flags) /* flags, XFSA_FIXUP_... */
454{
455 int error; /* error code */
456 int i; /* operation results */
457 xfs_agblock_t nfbno1; /* first new free startblock */
458 xfs_agblock_t nfbno2; /* second new free startblock */
459 xfs_extlen_t nflen1=0; /* first new free length */
460 xfs_extlen_t nflen2=0; /* second new free length */
5fb5aeee
ES
461 struct xfs_mount *mp;
462
463 mp = cnt_cur->bc_mp;
1da177e4
LT
464
465 /*
466 * Look up the record in the by-size tree if necessary.
467 */
468 if (flags & XFSA_FIXUP_CNT_OK) {
469#ifdef DEBUG
470 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
471 return error;
f9e03706
DW
472 if (XFS_IS_CORRUPT(mp,
473 i != 1 ||
474 nfbno1 != fbno ||
475 nflen1 != flen))
476 return -EFSCORRUPTED;
1da177e4
LT
477#endif
478 } else {
479 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
480 return error;
f9e03706
DW
481 if (XFS_IS_CORRUPT(mp, i != 1))
482 return -EFSCORRUPTED;
1da177e4
LT
483 }
484 /*
485 * Look up the record in the by-block tree if necessary.
486 */
487 if (flags & XFSA_FIXUP_BNO_OK) {
488#ifdef DEBUG
489 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
490 return error;
f9e03706
DW
491 if (XFS_IS_CORRUPT(mp,
492 i != 1 ||
493 nfbno1 != fbno ||
494 nflen1 != flen))
495 return -EFSCORRUPTED;
1da177e4
LT
496#endif
497 } else {
498 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
499 return error;
f9e03706
DW
500 if (XFS_IS_CORRUPT(mp, i != 1))
501 return -EFSCORRUPTED;
1da177e4 502 }
7cc95a82 503
1da177e4 504#ifdef DEBUG
7cc95a82
CH
505 if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
506 struct xfs_btree_block *bnoblock;
507 struct xfs_btree_block *cntblock;
508
6ca444cf
DW
509 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_levels[0].bp);
510 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_levels[0].bp);
1da177e4 511
f9e03706
DW
512 if (XFS_IS_CORRUPT(mp,
513 bnoblock->bb_numrecs !=
514 cntblock->bb_numrecs))
515 return -EFSCORRUPTED;
1da177e4
LT
516 }
517#endif
7cc95a82 518
1da177e4
LT
519 /*
520 * Deal with all four cases: the allocated record is contained
521 * within the freespace record, so we can have new freespace
522 * at either (or both) end, or no freespace remaining.
523 */
524 if (rbno == fbno && rlen == flen)
525 nfbno1 = nfbno2 = NULLAGBLOCK;
526 else if (rbno == fbno) {
527 nfbno1 = rbno + rlen;
528 nflen1 = flen - rlen;
529 nfbno2 = NULLAGBLOCK;
530 } else if (rbno + rlen == fbno + flen) {
531 nfbno1 = fbno;
532 nflen1 = flen - rlen;
533 nfbno2 = NULLAGBLOCK;
534 } else {
535 nfbno1 = fbno;
536 nflen1 = rbno - fbno;
537 nfbno2 = rbno + rlen;
538 nflen2 = (fbno + flen) - nfbno2;
539 }
540 /*
541 * Delete the entry from the by-size btree.
542 */
91cca5df 543 if ((error = xfs_btree_delete(cnt_cur, &i)))
1da177e4 544 return error;
f9e03706
DW
545 if (XFS_IS_CORRUPT(mp, i != 1))
546 return -EFSCORRUPTED;
1da177e4
LT
547 /*
548 * Add new by-size btree entry(s).
549 */
550 if (nfbno1 != NULLAGBLOCK) {
551 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
552 return error;
f9e03706
DW
553 if (XFS_IS_CORRUPT(mp, i != 0))
554 return -EFSCORRUPTED;
4b22a571 555 if ((error = xfs_btree_insert(cnt_cur, &i)))
1da177e4 556 return error;
f9e03706
DW
557 if (XFS_IS_CORRUPT(mp, i != 1))
558 return -EFSCORRUPTED;
1da177e4
LT
559 }
560 if (nfbno2 != NULLAGBLOCK) {
561 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
562 return error;
f9e03706
DW
563 if (XFS_IS_CORRUPT(mp, i != 0))
564 return -EFSCORRUPTED;
4b22a571 565 if ((error = xfs_btree_insert(cnt_cur, &i)))
1da177e4 566 return error;
f9e03706
DW
567 if (XFS_IS_CORRUPT(mp, i != 1))
568 return -EFSCORRUPTED;
1da177e4
LT
569 }
570 /*
571 * Fix up the by-block btree entry(s).
572 */
573 if (nfbno1 == NULLAGBLOCK) {
574 /*
575 * No remaining freespace, just delete the by-block tree entry.
576 */
91cca5df 577 if ((error = xfs_btree_delete(bno_cur, &i)))
1da177e4 578 return error;
f9e03706
DW
579 if (XFS_IS_CORRUPT(mp, i != 1))
580 return -EFSCORRUPTED;
1da177e4
LT
581 } else {
582 /*
583 * Update the by-block entry to start later|be shorter.
584 */
585 if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
586 return error;
587 }
588 if (nfbno2 != NULLAGBLOCK) {
589 /*
590 * 2 resulting free entries, need to add one.
591 */
592 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
593 return error;
f9e03706
DW
594 if (XFS_IS_CORRUPT(mp, i != 0))
595 return -EFSCORRUPTED;
4b22a571 596 if ((error = xfs_btree_insert(bno_cur, &i)))
1da177e4 597 return error;
f9e03706
DW
598 if (XFS_IS_CORRUPT(mp, i != 1))
599 return -EFSCORRUPTED;
1da177e4
LT
600 }
601 return 0;
602}
603
a6a781a5 604static xfs_failaddr_t
612cfbfe 605xfs_agfl_verify(
bb80c6d7
DC
606 struct xfs_buf *bp)
607{
dbd329f1 608 struct xfs_mount *mp = bp->b_mount;
bb80c6d7 609 struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
183606d8 610 __be32 *agfl_bno = xfs_buf_to_agfl_bno(bp);
bb80c6d7
DC
611 int i;
612
b5572597
DW
613 /*
614 * There is no verification of non-crc AGFLs because mkfs does not
615 * initialise the AGFL to zero or NULL. Hence the only valid part of the
616 * AGFL is what the AGF says is active. We can't get to the AGF, so we
617 * can't verify just those entries are valid.
618 */
38c26bfd 619 if (!xfs_has_crc(mp))
b5572597
DW
620 return NULL;
621
39708c20 622 if (!xfs_verify_magic(bp, agfl->agfl_magicnum))
a6a781a5 623 return __this_address;
39708c20 624 if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
a6a781a5 625 return __this_address;
77c95bba
CH
626 /*
627 * during growfs operations, the perag is not fully initialised,
628 * so we can't use it for any useful checking. growfs ensures we can't
629 * use it by using uncached buffers that don't have the perag attached
630 * so we can detect and avoid this problem.
631 */
632 if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
a6a781a5 633 return __this_address;
77c95bba 634
a78ee256 635 for (i = 0; i < xfs_agfl_size(mp); i++) {
183606d8
CH
636 if (be32_to_cpu(agfl_bno[i]) != NULLAGBLOCK &&
637 be32_to_cpu(agfl_bno[i]) >= mp->m_sb.sb_agblocks)
a6a781a5 638 return __this_address;
bb80c6d7 639 }
a45086e2 640
a6a781a5
DW
641 if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)))
642 return __this_address;
643 return NULL;
77c95bba
CH
644}
645
646static void
647xfs_agfl_read_verify(
648 struct xfs_buf *bp)
649{
dbd329f1 650 struct xfs_mount *mp = bp->b_mount;
bc1a09b8 651 xfs_failaddr_t fa;
77c95bba
CH
652
653 /*
654 * There is no verification of non-crc AGFLs because mkfs does not
655 * initialise the AGFL to zero or NULL. Hence the only valid part of the
656 * AGFL is what the AGF says is active. We can't get to the AGF, so we
657 * can't verify just those entries are valid.
658 */
38c26bfd 659 if (!xfs_has_crc(mp))
77c95bba
CH
660 return;
661
ce5028cf 662 if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
bc1a09b8
DW
663 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
664 else {
665 fa = xfs_agfl_verify(bp);
666 if (fa)
667 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
668 }
612cfbfe
DC
669}
670
1813dd64 671static void
612cfbfe
DC
672xfs_agfl_write_verify(
673 struct xfs_buf *bp)
674{
dbd329f1 675 struct xfs_mount *mp = bp->b_mount;
fb1755a6 676 struct xfs_buf_log_item *bip = bp->b_log_item;
bc1a09b8 677 xfs_failaddr_t fa;
612cfbfe 678
77c95bba 679 /* no verification of non-crc AGFLs */
38c26bfd 680 if (!xfs_has_crc(mp))
77c95bba
CH
681 return;
682
bc1a09b8
DW
683 fa = xfs_agfl_verify(bp);
684 if (fa) {
685 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
77c95bba
CH
686 return;
687 }
688
689 if (bip)
690 XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
691
f1dbcd7e 692 xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
bb80c6d7
DC
693}
694
1813dd64 695const struct xfs_buf_ops xfs_agfl_buf_ops = {
233135b7 696 .name = "xfs_agfl",
39708c20 697 .magic = { cpu_to_be32(XFS_AGFL_MAGIC), cpu_to_be32(XFS_AGFL_MAGIC) },
1813dd64
DC
698 .verify_read = xfs_agfl_read_verify,
699 .verify_write = xfs_agfl_write_verify,
b5572597 700 .verify_struct = xfs_agfl_verify,
1813dd64
DC
701};
702
1da177e4
LT
703/*
704 * Read in the allocation group free block array.
705 */
26788097 706int /* error */
1da177e4
LT
707xfs_alloc_read_agfl(
708 xfs_mount_t *mp, /* mount point structure */
709 xfs_trans_t *tp, /* transaction pointer */
710 xfs_agnumber_t agno, /* allocation group number */
e8222613 711 struct xfs_buf **bpp) /* buffer for the ag free block array */
1da177e4 712{
e8222613 713 struct xfs_buf *bp; /* return value */
1da177e4
LT
714 int error;
715
716 ASSERT(agno != NULLAGNUMBER);
717 error = xfs_trans_read_buf(
718 mp, tp, mp->m_ddev_targp,
719 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
1813dd64 720 XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
1da177e4
LT
721 if (error)
722 return error;
38f23232 723 xfs_buf_set_ref(bp, XFS_AGFL_REF);
1da177e4
LT
724 *bpp = bp;
725 return 0;
726}
727
ecb6928f
CH
728STATIC int
729xfs_alloc_update_counters(
730 struct xfs_trans *tp,
ecb6928f
CH
731 struct xfs_buf *agbp,
732 long len)
733{
9798f615 734 struct xfs_agf *agf = agbp->b_addr;
ecb6928f 735
92a00544 736 agbp->b_pag->pagf_freeblks += len;
ecb6928f
CH
737 be32_add_cpu(&agf->agf_freeblks, len);
738
ecb6928f 739 if (unlikely(be32_to_cpu(agf->agf_freeblks) >
a5155b87 740 be32_to_cpu(agf->agf_length))) {
8d57c216 741 xfs_buf_mark_corrupt(agbp);
2451337d 742 return -EFSCORRUPTED;
a5155b87 743 }
ecb6928f
CH
744
745 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
746 return 0;
747}
748
1da177e4 749/*
f5e7dbea 750 * Block allocation algorithm and data structures.
1da177e4 751 */
f5e7dbea
BF
752struct xfs_alloc_cur {
753 struct xfs_btree_cur *cnt; /* btree cursors */
754 struct xfs_btree_cur *bnolt;
755 struct xfs_btree_cur *bnogt;
dc8e69bd 756 xfs_extlen_t cur_len;/* current search length */
c62321a2
BF
757 xfs_agblock_t rec_bno;/* extent startblock */
758 xfs_extlen_t rec_len;/* extent length */
759 xfs_agblock_t bno; /* alloc bno */
760 xfs_extlen_t len; /* alloc len */
761 xfs_extlen_t diff; /* diff from search bno */
d6d3aff2
BF
762 unsigned int busy_gen;/* busy state */
763 bool busy;
f5e7dbea
BF
764};
765
766/*
767 * Set up cursors, etc. in the extent allocation cursor. This function can be
768 * called multiple times to reset an initialized structure without having to
769 * reallocate cursors.
770 */
771static int
772xfs_alloc_cur_setup(
773 struct xfs_alloc_arg *args,
774 struct xfs_alloc_cur *acur)
775{
776 int error;
777 int i;
778
779 ASSERT(args->alignment == 1 || args->type != XFS_ALLOCTYPE_THIS_BNO);
780
dc8e69bd 781 acur->cur_len = args->maxlen;
c62321a2
BF
782 acur->rec_bno = 0;
783 acur->rec_len = 0;
784 acur->bno = 0;
785 acur->len = 0;
396bbf3c 786 acur->diff = -1;
d6d3aff2
BF
787 acur->busy = false;
788 acur->busy_gen = 0;
789
f5e7dbea
BF
790 /*
791 * Perform an initial cntbt lookup to check for availability of maxlen
792 * extents. If this fails, we'll return -ENOSPC to signal the caller to
793 * attempt a small allocation.
794 */
795 if (!acur->cnt)
796 acur->cnt = xfs_allocbt_init_cursor(args->mp, args->tp,
289d38d2 797 args->agbp, args->pag, XFS_BTNUM_CNT);
f5e7dbea
BF
798 error = xfs_alloc_lookup_ge(acur->cnt, 0, args->maxlen, &i);
799 if (error)
800 return error;
801
802 /*
803 * Allocate the bnobt left and right search cursors.
804 */
805 if (!acur->bnolt)
806 acur->bnolt = xfs_allocbt_init_cursor(args->mp, args->tp,
289d38d2 807 args->agbp, args->pag, XFS_BTNUM_BNO);
f5e7dbea
BF
808 if (!acur->bnogt)
809 acur->bnogt = xfs_allocbt_init_cursor(args->mp, args->tp,
289d38d2 810 args->agbp, args->pag, XFS_BTNUM_BNO);
f5e7dbea
BF
811 return i == 1 ? 0 : -ENOSPC;
812}
813
814static void
815xfs_alloc_cur_close(
816 struct xfs_alloc_cur *acur,
817 bool error)
818{
819 int cur_error = XFS_BTREE_NOERROR;
820
821 if (error)
822 cur_error = XFS_BTREE_ERROR;
823
824 if (acur->cnt)
825 xfs_btree_del_cursor(acur->cnt, cur_error);
826 if (acur->bnolt)
827 xfs_btree_del_cursor(acur->bnolt, cur_error);
828 if (acur->bnogt)
829 xfs_btree_del_cursor(acur->bnogt, cur_error);
830 acur->cnt = acur->bnolt = acur->bnogt = NULL;
831}
1da177e4 832
396bbf3c
BF
833/*
834 * Check an extent for allocation and track the best available candidate in the
835 * allocation structure. The cursor is deactivated if it has entered an out of
836 * range state based on allocation arguments. Optionally return the extent
837 * extent geometry and allocation status if requested by the caller.
838 */
839static int
840xfs_alloc_cur_check(
841 struct xfs_alloc_arg *args,
842 struct xfs_alloc_cur *acur,
843 struct xfs_btree_cur *cur,
844 int *new)
845{
846 int error, i;
847 xfs_agblock_t bno, bnoa, bnew;
848 xfs_extlen_t len, lena, diff = -1;
849 bool busy;
850 unsigned busy_gen = 0;
851 bool deactivate = false;
fec0afda 852 bool isbnobt = cur->bc_btnum == XFS_BTNUM_BNO;
396bbf3c
BF
853
854 *new = 0;
855
856 error = xfs_alloc_get_rec(cur, &bno, &len, &i);
857 if (error)
858 return error;
f9e03706
DW
859 if (XFS_IS_CORRUPT(args->mp, i != 1))
860 return -EFSCORRUPTED;
396bbf3c
BF
861
862 /*
863 * Check minlen and deactivate a cntbt cursor if out of acceptable size
864 * range (i.e., walking backwards looking for a minlen extent).
865 */
866 if (len < args->minlen) {
fec0afda 867 deactivate = !isbnobt;
396bbf3c
BF
868 goto out;
869 }
870
871 busy = xfs_alloc_compute_aligned(args, bno, len, &bnoa, &lena,
872 &busy_gen);
873 acur->busy |= busy;
874 if (busy)
875 acur->busy_gen = busy_gen;
876 /* deactivate a bnobt cursor outside of locality range */
fec0afda
BF
877 if (bnoa < args->min_agbno || bnoa > args->max_agbno) {
878 deactivate = isbnobt;
396bbf3c 879 goto out;
fec0afda 880 }
396bbf3c
BF
881 if (lena < args->minlen)
882 goto out;
883
884 args->len = XFS_EXTLEN_MIN(lena, args->maxlen);
885 xfs_alloc_fix_len(args);
886 ASSERT(args->len >= args->minlen);
887 if (args->len < acur->len)
888 goto out;
889
890 /*
891 * We have an aligned record that satisfies minlen and beats or matches
892 * the candidate extent size. Compare locality for near allocation mode.
893 */
894 ASSERT(args->type == XFS_ALLOCTYPE_NEAR_BNO);
895 diff = xfs_alloc_compute_diff(args->agbno, args->len,
896 args->alignment, args->datatype,
897 bnoa, lena, &bnew);
898 if (bnew == NULLAGBLOCK)
899 goto out;
fec0afda
BF
900
901 /*
902 * Deactivate a bnobt cursor with worse locality than the current best.
903 */
904 if (diff > acur->diff) {
905 deactivate = isbnobt;
396bbf3c 906 goto out;
fec0afda 907 }
396bbf3c
BF
908
909 ASSERT(args->len > acur->len ||
910 (args->len == acur->len && diff <= acur->diff));
911 acur->rec_bno = bno;
912 acur->rec_len = len;
913 acur->bno = bnew;
914 acur->len = args->len;
915 acur->diff = diff;
916 *new = 1;
917
78d7aabd
BF
918 /*
919 * We're done if we found a perfect allocation. This only deactivates
920 * the current cursor, but this is just an optimization to terminate a
921 * cntbt search that otherwise runs to the edge of the tree.
922 */
923 if (acur->diff == 0 && acur->len == args->maxlen)
924 deactivate = true;
396bbf3c
BF
925out:
926 if (deactivate)
c4aa10d0 927 cur->bc_ag.abt.active = false;
396bbf3c
BF
928 trace_xfs_alloc_cur_check(args->mp, cur->bc_btnum, bno, len, diff,
929 *new);
930 return 0;
931}
932
d2968825
BF
933/*
934 * Complete an allocation of a candidate extent. Remove the extent from both
935 * trees and update the args structure.
936 */
937STATIC int
938xfs_alloc_cur_finish(
939 struct xfs_alloc_arg *args,
940 struct xfs_alloc_cur *acur)
941{
9798f615 942 struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
d2968825
BF
943 int error;
944
945 ASSERT(acur->cnt && acur->bnolt);
946 ASSERT(acur->bno >= acur->rec_bno);
947 ASSERT(acur->bno + acur->len <= acur->rec_bno + acur->rec_len);
9798f615 948 ASSERT(acur->rec_bno + acur->rec_len <= be32_to_cpu(agf->agf_length));
d2968825
BF
949
950 error = xfs_alloc_fixup_trees(acur->cnt, acur->bnolt, acur->rec_bno,
951 acur->rec_len, acur->bno, acur->len, 0);
952 if (error)
953 return error;
954
955 args->agbno = acur->bno;
956 args->len = acur->len;
957 args->wasfromfl = 0;
958
959 trace_xfs_alloc_cur(args);
960 return 0;
961}
962
dc8e69bd
BF
963/*
964 * Locality allocation lookup algorithm. This expects a cntbt cursor and uses
965 * bno optimized lookup to search for extents with ideal size and locality.
966 */
967STATIC int
968xfs_alloc_cntbt_iter(
969 struct xfs_alloc_arg *args,
970 struct xfs_alloc_cur *acur)
971{
972 struct xfs_btree_cur *cur = acur->cnt;
973 xfs_agblock_t bno;
974 xfs_extlen_t len, cur_len;
975 int error;
976 int i;
977
978 if (!xfs_alloc_cur_active(cur))
979 return 0;
980
981 /* locality optimized lookup */
982 cur_len = acur->cur_len;
983 error = xfs_alloc_lookup_ge(cur, args->agbno, cur_len, &i);
984 if (error)
985 return error;
986 if (i == 0)
987 return 0;
988 error = xfs_alloc_get_rec(cur, &bno, &len, &i);
989 if (error)
990 return error;
991
992 /* check the current record and update search length from it */
993 error = xfs_alloc_cur_check(args, acur, cur, &i);
994 if (error)
995 return error;
996 ASSERT(len >= acur->cur_len);
997 acur->cur_len = len;
998
999 /*
1000 * We looked up the first record >= [agbno, len] above. The agbno is a
1001 * secondary key and so the current record may lie just before or after
1002 * agbno. If it is past agbno, check the previous record too so long as
1003 * the length matches as it may be closer. Don't check a smaller record
1004 * because that could deactivate our cursor.
1005 */
1006 if (bno > args->agbno) {
1007 error = xfs_btree_decrement(cur, 0, &i);
1008 if (!error && i) {
1009 error = xfs_alloc_get_rec(cur, &bno, &len, &i);
1010 if (!error && i && len == acur->cur_len)
1011 error = xfs_alloc_cur_check(args, acur, cur,
1012 &i);
1013 }
1014 if (error)
1015 return error;
1016 }
1017
1018 /*
1019 * Increment the search key until we find at least one allocation
1020 * candidate or if the extent we found was larger. Otherwise, double the
1021 * search key to optimize the search. Efficiency is more important here
1022 * than absolute best locality.
1023 */
1024 cur_len <<= 1;
1025 if (!acur->len || acur->cur_len >= cur_len)
1026 acur->cur_len++;
1027 else
1028 acur->cur_len = cur_len;
1029
1030 return error;
1031}
1032
c63cdd4f
BF
1033/*
1034 * Deal with the case where only small freespaces remain. Either return the
1035 * contents of the last freespace record, or allocate space from the freelist if
1036 * there is nothing in the tree.
1037 */
1038STATIC int /* error */
1039xfs_alloc_ag_vextent_small(
1040 struct xfs_alloc_arg *args, /* allocation argument structure */
1041 struct xfs_btree_cur *ccur, /* optional by-size cursor */
1042 xfs_agblock_t *fbnop, /* result block number */
1043 xfs_extlen_t *flenp, /* result length */
1044 int *stat) /* status: 0-freelist, 1-normal/none */
1045{
9798f615 1046 struct xfs_agf *agf = args->agbp->b_addr;
c63cdd4f
BF
1047 int error = 0;
1048 xfs_agblock_t fbno = NULLAGBLOCK;
1049 xfs_extlen_t flen = 0;
6691cd92 1050 int i = 0;
c63cdd4f 1051
6691cd92
BF
1052 /*
1053 * If a cntbt cursor is provided, try to allocate the largest record in
1054 * the tree. Try the AGFL if the cntbt is empty, otherwise fail the
1055 * allocation. Make sure to respect minleft even when pulling from the
1056 * freelist.
1057 */
1058 if (ccur)
1059 error = xfs_btree_decrement(ccur, 0, &i);
c63cdd4f
BF
1060 if (error)
1061 goto error;
1062 if (i) {
1063 error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i);
1064 if (error)
1065 goto error;
f9e03706
DW
1066 if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1067 error = -EFSCORRUPTED;
1068 goto error;
1069 }
c63cdd4f
BF
1070 goto out;
1071 }
1072
1073 if (args->minlen != 1 || args->alignment != 1 ||
1074 args->resv == XFS_AG_RESV_AGFL ||
9798f615 1075 be32_to_cpu(agf->agf_flcount) <= args->minleft)
c63cdd4f
BF
1076 goto out;
1077
1078 error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
1079 if (error)
1080 goto error;
1081 if (fbno == NULLAGBLOCK)
1082 goto out;
1083
45d06621 1084 xfs_extent_busy_reuse(args->mp, args->pag, fbno, 1,
c34d570d 1085 (args->datatype & XFS_ALLOC_NOBUSY));
c63cdd4f 1086
c34d570d 1087 if (args->datatype & XFS_ALLOC_USERDATA) {
c63cdd4f
BF
1088 struct xfs_buf *bp;
1089
ee647f85
DW
1090 error = xfs_trans_get_buf(args->tp, args->mp->m_ddev_targp,
1091 XFS_AGB_TO_DADDR(args->mp, args->agno, fbno),
1092 args->mp->m_bsize, 0, &bp);
1093 if (error)
c63cdd4f 1094 goto error;
c63cdd4f
BF
1095 xfs_trans_binval(args->tp, bp);
1096 }
7e36a3a6
BF
1097 *fbnop = args->agbno = fbno;
1098 *flenp = args->len = 1;
9798f615 1099 if (XFS_IS_CORRUPT(args->mp, fbno >= be32_to_cpu(agf->agf_length))) {
f9e03706
DW
1100 error = -EFSCORRUPTED;
1101 goto error;
1102 }
c63cdd4f
BF
1103 args->wasfromfl = 1;
1104 trace_xfs_alloc_small_freelist(args);
1105
1106 /*
1107 * If we're feeding an AGFL block to something that doesn't live in the
1108 * free space, we need to clear out the OWN_AG rmap.
1109 */
fa9c3c19 1110 error = xfs_rmap_free(args->tp, args->agbp, args->pag, fbno, 1,
c63cdd4f
BF
1111 &XFS_RMAP_OINFO_AG);
1112 if (error)
1113 goto error;
1114
1115 *stat = 0;
1116 return 0;
1117
1118out:
1119 /*
1120 * Can't do the allocation, give up.
1121 */
1122 if (flen < args->minlen) {
1123 args->agbno = NULLAGBLOCK;
1124 trace_xfs_alloc_small_notenough(args);
1125 flen = 0;
1126 }
1127 *fbnop = fbno;
1128 *flenp = flen;
1129 *stat = 1;
1130 trace_xfs_alloc_small_done(args);
1131 return 0;
1132
1133error:
1134 trace_xfs_alloc_small_error(args);
1135 return error;
1136}
1137
1da177e4
LT
1138/*
1139 * Allocate a variable extent in the allocation group agno.
1140 * Type and bno are used to determine where in the allocation group the
1141 * extent will start.
1142 * Extent's length (returned in *len) will be between minlen and maxlen,
1143 * and of the form k * prod + mod unless there's nothing that large.
1144 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1145 */
1146STATIC int /* error */
1147xfs_alloc_ag_vextent(
1148 xfs_alloc_arg_t *args) /* argument structure for allocation */
1149{
1150 int error=0;
1da177e4
LT
1151
1152 ASSERT(args->minlen > 0);
1153 ASSERT(args->maxlen > 0);
1154 ASSERT(args->minlen <= args->maxlen);
1155 ASSERT(args->mod < args->prod);
1156 ASSERT(args->alignment > 0);
3fd129b6 1157
1da177e4
LT
1158 /*
1159 * Branch to correct routine based on the type.
1160 */
1161 args->wasfromfl = 0;
1162 switch (args->type) {
1163 case XFS_ALLOCTYPE_THIS_AG:
1164 error = xfs_alloc_ag_vextent_size(args);
1165 break;
1166 case XFS_ALLOCTYPE_NEAR_BNO:
1167 error = xfs_alloc_ag_vextent_near(args);
1168 break;
1169 case XFS_ALLOCTYPE_THIS_BNO:
1170 error = xfs_alloc_ag_vextent_exact(args);
1171 break;
1172 default:
1173 ASSERT(0);
1174 /* NOTREACHED */
1175 }
ecb6928f
CH
1176
1177 if (error || args->agbno == NULLAGBLOCK)
1da177e4 1178 return error;
ecb6928f
CH
1179
1180 ASSERT(args->len >= args->minlen);
1181 ASSERT(args->len <= args->maxlen);
0ab32086 1182 ASSERT(!args->wasfromfl || args->resv != XFS_AG_RESV_AGFL);
ecb6928f
CH
1183 ASSERT(args->agbno % args->alignment == 0);
1184
673930c3 1185 /* if not file data, insert new block into the reverse map btree */
33df3a9c 1186 if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
fa9c3c19 1187 error = xfs_rmap_alloc(args->tp, args->agbp, args->pag,
673930c3
DW
1188 args->agbno, args->len, &args->oinfo);
1189 if (error)
1190 return error;
1191 }
1192
ecb6928f 1193 if (!args->wasfromfl) {
92a00544 1194 error = xfs_alloc_update_counters(args->tp, args->agbp,
ecb6928f
CH
1195 -((long)(args->len)));
1196 if (error)
1197 return error;
1198
45d06621 1199 ASSERT(!xfs_extent_busy_search(args->mp, args->pag,
e26f0501 1200 args->agbno, args->len));
1da177e4 1201 }
ecb6928f 1202
3fd129b6 1203 xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
ecb6928f 1204
ff6d6af2
BD
1205 XFS_STATS_INC(args->mp, xs_allocx);
1206 XFS_STATS_ADD(args->mp, xs_allocb, args->len);
ecb6928f 1207 return error;
1da177e4
LT
1208}
1209
1210/*
1211 * Allocate a variable extent at exactly agno/bno.
1212 * Extent's length (returned in *len) will be between minlen and maxlen,
1213 * and of the form k * prod + mod unless there's nothing that large.
1214 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
1215 */
1216STATIC int /* error */
1217xfs_alloc_ag_vextent_exact(
1218 xfs_alloc_arg_t *args) /* allocation argument structure */
1219{
9798f615 1220 struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
ae127f08
DW
1221 struct xfs_btree_cur *bno_cur;/* by block-number btree cursor */
1222 struct xfs_btree_cur *cnt_cur;/* by count btree cursor */
1da177e4
LT
1223 int error;
1224 xfs_agblock_t fbno; /* start block of found extent */
1da177e4 1225 xfs_extlen_t flen; /* length of found extent */
ebf55872
CH
1226 xfs_agblock_t tbno; /* start block of busy extent */
1227 xfs_extlen_t tlen; /* length of busy extent */
1228 xfs_agblock_t tend; /* end block of busy extent */
1da177e4 1229 int i; /* success/failure of operation */
ebf55872 1230 unsigned busy_gen;
1da177e4
LT
1231
1232 ASSERT(args->alignment == 1);
9f9baab3 1233
1da177e4
LT
1234 /*
1235 * Allocate/initialize a cursor for the by-number freespace btree.
1236 */
561f7d17 1237 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
289d38d2 1238 args->pag, XFS_BTNUM_BNO);
9f9baab3 1239
1da177e4
LT
1240 /*
1241 * Lookup bno and minlen in the btree (minlen is irrelevant, really).
1242 * Look for the closest free block <= bno, it must contain bno
1243 * if any free block does.
1244 */
9f9baab3
CH
1245 error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
1246 if (error)
1da177e4 1247 goto error0;
9f9baab3
CH
1248 if (!i)
1249 goto not_found;
1250
1da177e4
LT
1251 /*
1252 * Grab the freespace record.
1253 */
9f9baab3
CH
1254 error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
1255 if (error)
1da177e4 1256 goto error0;
f9e03706
DW
1257 if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1258 error = -EFSCORRUPTED;
1259 goto error0;
1260 }
1da177e4 1261 ASSERT(fbno <= args->agbno);
9f9baab3 1262
1da177e4 1263 /*
e26f0501 1264 * Check for overlapping busy extents.
1da177e4 1265 */
ebf55872
CH
1266 tbno = fbno;
1267 tlen = flen;
1268 xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen);
e26f0501
CH
1269
1270 /*
1271 * Give up if the start of the extent is busy, or the freespace isn't
1272 * long enough for the minimum request.
1273 */
1274 if (tbno > args->agbno)
1275 goto not_found;
1276 if (tlen < args->minlen)
1277 goto not_found;
1278 tend = tbno + tlen;
1279 if (tend < args->agbno + args->minlen)
9f9baab3
CH
1280 goto not_found;
1281
1da177e4
LT
1282 /*
1283 * End of extent will be smaller of the freespace end and the
1284 * maximal requested end.
9f9baab3 1285 *
1da177e4
LT
1286 * Fix the length according to mod and prod if given.
1287 */
81463b1c
CS
1288 args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
1289 - args->agbno;
1da177e4 1290 xfs_alloc_fix_len(args);
81463b1c 1291 ASSERT(args->agbno + args->len <= tend);
9f9baab3 1292
1da177e4 1293 /*
81463b1c 1294 * We are allocating agbno for args->len
1da177e4
LT
1295 * Allocate/initialize a cursor for the by-size btree.
1296 */
561f7d17 1297 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
289d38d2 1298 args->pag, XFS_BTNUM_CNT);
9798f615 1299 ASSERT(args->agbno + args->len <= be32_to_cpu(agf->agf_length));
9f9baab3
CH
1300 error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
1301 args->len, XFSA_FIXUP_BNO_OK);
1302 if (error) {
1da177e4
LT
1303 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1304 goto error0;
1305 }
9f9baab3 1306
1da177e4
LT
1307 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1308 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
0b1b213f 1309
1da177e4 1310 args->wasfromfl = 0;
9f9baab3
CH
1311 trace_xfs_alloc_exact_done(args);
1312 return 0;
1313
1314not_found:
1315 /* Didn't find it, return null. */
1316 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1317 args->agbno = NULLAGBLOCK;
1318 trace_xfs_alloc_exact_notfound(args);
1da177e4
LT
1319 return 0;
1320
1321error0:
1322 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
0b1b213f 1323 trace_xfs_alloc_exact_error(args);
1da177e4
LT
1324 return error;
1325}
1326
489a150f 1327/*
78d7aabd
BF
1328 * Search a given number of btree records in a given direction. Check each
1329 * record against the good extent we've already found.
489a150f
CH
1330 */
1331STATIC int
78d7aabd 1332xfs_alloc_walk_iter(
fec0afda
BF
1333 struct xfs_alloc_arg *args,
1334 struct xfs_alloc_cur *acur,
1335 struct xfs_btree_cur *cur,
78d7aabd
BF
1336 bool increment,
1337 bool find_one, /* quit on first candidate */
1338 int count, /* rec count (-1 for infinite) */
1339 int *stat)
489a150f 1340{
489a150f
CH
1341 int error;
1342 int i;
489a150f 1343
78d7aabd
BF
1344 *stat = 0;
1345
489a150f 1346 /*
fec0afda
BF
1347 * Search so long as the cursor is active or we find a better extent.
1348 * The cursor is deactivated if it extends beyond the range of the
1349 * current allocation candidate.
489a150f 1350 */
78d7aabd 1351 while (xfs_alloc_cur_active(cur) && count) {
fec0afda 1352 error = xfs_alloc_cur_check(args, acur, cur, &i);
489a150f 1353 if (error)
fec0afda 1354 return error;
78d7aabd
BF
1355 if (i == 1) {
1356 *stat = 1;
1357 if (find_one)
1358 break;
1359 }
fec0afda
BF
1360 if (!xfs_alloc_cur_active(cur))
1361 break;
489a150f 1362
fec0afda
BF
1363 if (increment)
1364 error = xfs_btree_increment(cur, 0, &i);
489a150f 1365 else
fec0afda 1366 error = xfs_btree_decrement(cur, 0, &i);
489a150f 1367 if (error)
fec0afda
BF
1368 return error;
1369 if (i == 0)
c4aa10d0 1370 cur->bc_ag.abt.active = false;
78d7aabd
BF
1371
1372 if (count > 0)
1373 count--;
fec0afda 1374 }
489a150f 1375
489a150f 1376 return 0;
489a150f
CH
1377}
1378
0e26d5ca 1379/*
dc8e69bd
BF
1380 * Search the by-bno and by-size btrees in parallel in search of an extent with
1381 * ideal locality based on the NEAR mode ->agbno locality hint.
0e26d5ca
BF
1382 */
1383STATIC int
dc8e69bd 1384xfs_alloc_ag_vextent_locality(
0e26d5ca
BF
1385 struct xfs_alloc_arg *args,
1386 struct xfs_alloc_cur *acur,
1387 int *stat)
1388{
1389 struct xfs_btree_cur *fbcur = NULL;
1390 int error;
1391 int i;
1392 bool fbinc;
1393
1394 ASSERT(acur->len == 0);
1395 ASSERT(args->type == XFS_ALLOCTYPE_NEAR_BNO);
1396
1397 *stat = 0;
1398
dc8e69bd
BF
1399 error = xfs_alloc_lookup_ge(acur->cnt, args->agbno, acur->cur_len, &i);
1400 if (error)
1401 return error;
0e26d5ca
BF
1402 error = xfs_alloc_lookup_le(acur->bnolt, args->agbno, 0, &i);
1403 if (error)
1404 return error;
1405 error = xfs_alloc_lookup_ge(acur->bnogt, args->agbno, 0, &i);
1406 if (error)
1407 return error;
1408
1409 /*
dc8e69bd
BF
1410 * Search the bnobt and cntbt in parallel. Search the bnobt left and
1411 * right and lookup the closest extent to the locality hint for each
1412 * extent size key in the cntbt. The entire search terminates
1413 * immediately on a bnobt hit because that means we've found best case
1414 * locality. Otherwise the search continues until the cntbt cursor runs
1415 * off the end of the tree. If no allocation candidate is found at this
1416 * point, give up on locality, walk backwards from the end of the cntbt
1417 * and take the first available extent.
1418 *
1419 * The parallel tree searches balance each other out to provide fairly
1420 * consistent performance for various situations. The bnobt search can
1421 * have pathological behavior in the worst case scenario of larger
1422 * allocation requests and fragmented free space. On the other hand, the
1423 * bnobt is able to satisfy most smaller allocation requests much more
1424 * quickly than the cntbt. The cntbt search can sift through fragmented
1425 * free space and sets of free extents for larger allocation requests
1426 * more quickly than the bnobt. Since the locality hint is just a hint
1427 * and we don't want to scan the entire bnobt for perfect locality, the
1428 * cntbt search essentially bounds the bnobt search such that we can
1429 * find good enough locality at reasonable performance in most cases.
0e26d5ca
BF
1430 */
1431 while (xfs_alloc_cur_active(acur->bnolt) ||
dc8e69bd
BF
1432 xfs_alloc_cur_active(acur->bnogt) ||
1433 xfs_alloc_cur_active(acur->cnt)) {
1434
1435 trace_xfs_alloc_cur_lookup(args);
1436
1437 /*
1438 * Search the bnobt left and right. In the case of a hit, finish
1439 * the search in the opposite direction and we're done.
1440 */
0e26d5ca
BF
1441 error = xfs_alloc_walk_iter(args, acur, acur->bnolt, false,
1442 true, 1, &i);
1443 if (error)
1444 return error;
1445 if (i == 1) {
1446 trace_xfs_alloc_cur_left(args);
1447 fbcur = acur->bnogt;
1448 fbinc = true;
1449 break;
1450 }
0e26d5ca
BF
1451 error = xfs_alloc_walk_iter(args, acur, acur->bnogt, true, true,
1452 1, &i);
1453 if (error)
1454 return error;
1455 if (i == 1) {
1456 trace_xfs_alloc_cur_right(args);
1457 fbcur = acur->bnolt;
1458 fbinc = false;
1459 break;
1460 }
dc8e69bd
BF
1461
1462 /*
1463 * Check the extent with best locality based on the current
1464 * extent size search key and keep track of the best candidate.
1465 */
1466 error = xfs_alloc_cntbt_iter(args, acur);
1467 if (error)
1468 return error;
1469 if (!xfs_alloc_cur_active(acur->cnt)) {
1470 trace_xfs_alloc_cur_lookup_done(args);
1471 break;
1472 }
1473 }
1474
1475 /*
1476 * If we failed to find anything due to busy extents, return empty
1477 * handed so the caller can flush and retry. If no busy extents were
1478 * found, walk backwards from the end of the cntbt as a last resort.
1479 */
1480 if (!xfs_alloc_cur_active(acur->cnt) && !acur->len && !acur->busy) {
1481 error = xfs_btree_decrement(acur->cnt, 0, &i);
1482 if (error)
1483 return error;
1484 if (i) {
c4aa10d0 1485 acur->cnt->bc_ag.abt.active = true;
dc8e69bd
BF
1486 fbcur = acur->cnt;
1487 fbinc = false;
1488 }
0e26d5ca
BF
1489 }
1490
dc8e69bd
BF
1491 /*
1492 * Search in the opposite direction for a better entry in the case of
1493 * a bnobt hit or walk backwards from the end of the cntbt.
1494 */
0e26d5ca
BF
1495 if (fbcur) {
1496 error = xfs_alloc_walk_iter(args, acur, fbcur, fbinc, true, -1,
1497 &i);
1498 if (error)
1499 return error;
1500 }
1501
1502 if (acur->len)
1503 *stat = 1;
1504
1505 return 0;
1506}
1507
5113f8ec
DW
1508/* Check the last block of the cnt btree for allocations. */
1509static int
1510xfs_alloc_ag_vextent_lastblock(
1511 struct xfs_alloc_arg *args,
1512 struct xfs_alloc_cur *acur,
1513 xfs_agblock_t *bno,
1514 xfs_extlen_t *len,
1515 bool *allocated)
1516{
1517 int error;
1518 int i;
1519
1520#ifdef DEBUG
1521 /* Randomly don't execute the first algorithm. */
1522 if (prandom_u32() & 1)
1523 return 0;
1524#endif
1525
1526 /*
1527 * Start from the entry that lookup found, sequence through all larger
1528 * free blocks. If we're actually pointing at a record smaller than
1529 * maxlen, go to the start of this block, and skip all those smaller
1530 * than minlen.
1531 */
77ca1eed 1532 if (*len || args->alignment > 1) {
6ca444cf 1533 acur->cnt->bc_levels[0].ptr = 1;
5113f8ec
DW
1534 do {
1535 error = xfs_alloc_get_rec(acur->cnt, bno, len, &i);
1536 if (error)
1537 return error;
f9e03706
DW
1538 if (XFS_IS_CORRUPT(args->mp, i != 1))
1539 return -EFSCORRUPTED;
5113f8ec
DW
1540 if (*len >= args->minlen)
1541 break;
1542 error = xfs_btree_increment(acur->cnt, 0, &i);
1543 if (error)
1544 return error;
1545 } while (i);
1546 ASSERT(*len >= args->minlen);
1547 if (!i)
1548 return 0;
1549 }
1550
1551 error = xfs_alloc_walk_iter(args, acur, acur->cnt, true, false, -1, &i);
1552 if (error)
1553 return error;
1554
1555 /*
1556 * It didn't work. We COULD be in a case where there's a good record
1557 * somewhere, so try again.
1558 */
1559 if (acur->len == 0)
1560 return 0;
1561
1562 trace_xfs_alloc_near_first(args);
1563 *allocated = true;
1564 return 0;
1565}
1566
1da177e4
LT
1567/*
1568 * Allocate a variable extent near bno in the allocation group agno.
1569 * Extent's length (returned in len) will be between minlen and maxlen,
1570 * and of the form k * prod + mod unless there's nothing that large.
1571 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1572 */
f5e7dbea 1573STATIC int
1da177e4 1574xfs_alloc_ag_vextent_near(
f5e7dbea 1575 struct xfs_alloc_arg *args)
1da177e4 1576{
f5e7dbea 1577 struct xfs_alloc_cur acur = {};
fec0afda
BF
1578 int error; /* error code */
1579 int i; /* result code, temporary */
fec0afda
BF
1580 xfs_agblock_t bno;
1581 xfs_extlen_t len;
e26f0501 1582
cf085a1b 1583 /* handle uninitialized agbno range so caller doesn't have to */
bfe46d4e
BF
1584 if (!args->min_agbno && !args->max_agbno)
1585 args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
1586 ASSERT(args->min_agbno <= args->max_agbno);
1587
1588 /* clamp agbno to the range if it's outside */
1589 if (args->agbno < args->min_agbno)
1590 args->agbno = args->min_agbno;
1591 if (args->agbno > args->max_agbno)
1592 args->agbno = args->max_agbno;
1593
e26f0501 1594restart:
fec0afda 1595 len = 0;
e26f0501 1596
1da177e4 1597 /*
f5e7dbea
BF
1598 * Set up cursors and see if there are any free extents as big as
1599 * maxlen. If not, pick the last entry in the tree unless the tree is
1600 * empty.
1da177e4 1601 */
f5e7dbea
BF
1602 error = xfs_alloc_cur_setup(args, &acur);
1603 if (error == -ENOSPC) {
fec0afda
BF
1604 error = xfs_alloc_ag_vextent_small(args, acur.cnt, &bno,
1605 &len, &i);
f5e7dbea
BF
1606 if (error)
1607 goto out;
fec0afda 1608 if (i == 0 || len == 0) {
e26f0501 1609 trace_xfs_alloc_near_noentry(args);
f5e7dbea 1610 goto out;
1da177e4
LT
1611 }
1612 ASSERT(i == 1);
f5e7dbea
BF
1613 } else if (error) {
1614 goto out;
1da177e4 1615 }
e26f0501 1616
1da177e4
LT
1617 /*
1618 * First algorithm.
1619 * If the requested extent is large wrt the freespaces available
1620 * in this a.g., then the cursor will be pointing to a btree entry
1621 * near the right edge of the tree. If it's in the last btree leaf
1622 * block, then we just examine all the entries in that block
1623 * that are big enough, and pick the best one.
1da177e4 1624 */
5113f8ec
DW
1625 if (xfs_btree_islastblock(acur.cnt, 0)) {
1626 bool allocated = false;
78d7aabd 1627
5113f8ec
DW
1628 error = xfs_alloc_ag_vextent_lastblock(args, &acur, &bno, &len,
1629 &allocated);
78d7aabd
BF
1630 if (error)
1631 goto out;
5113f8ec
DW
1632 if (allocated)
1633 goto alloc_finish;
1da177e4 1634 }
f5e7dbea 1635
1da177e4 1636 /*
dc8e69bd
BF
1637 * Second algorithm. Combined cntbt and bnobt search to find ideal
1638 * locality.
1da177e4 1639 */
dc8e69bd 1640 error = xfs_alloc_ag_vextent_locality(args, &acur, &i);
f5e7dbea
BF
1641 if (error)
1642 goto out;
1643
1da177e4
LT
1644 /*
1645 * If we couldn't get anything, give up.
1646 */
fec0afda 1647 if (!acur.len) {
d6d3aff2 1648 if (acur.busy) {
e26f0501 1649 trace_xfs_alloc_near_busy(args);
d6d3aff2
BF
1650 xfs_extent_busy_flush(args->mp, args->pag,
1651 acur.busy_gen);
e26f0501
CH
1652 goto restart;
1653 }
0b1b213f 1654 trace_xfs_alloc_size_neither(args);
1da177e4 1655 args->agbno = NULLAGBLOCK;
f5e7dbea 1656 goto out;
1da177e4 1657 }
489a150f 1658
5113f8ec 1659alloc_finish:
d2968825
BF
1660 /* fix up btrees on a successful allocation */
1661 error = xfs_alloc_cur_finish(args, &acur);
0b1b213f 1662
f5e7dbea
BF
1663out:
1664 xfs_alloc_cur_close(&acur, error);
1da177e4
LT
1665 return error;
1666}
1667
1668/*
1669 * Allocate a variable extent anywhere in the allocation group agno.
1670 * Extent's length (returned in len) will be between minlen and maxlen,
1671 * and of the form k * prod + mod unless there's nothing that large.
1672 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1673 */
1674STATIC int /* error */
1675xfs_alloc_ag_vextent_size(
1676 xfs_alloc_arg_t *args) /* allocation argument structure */
1677{
9798f615 1678 struct xfs_agf *agf = args->agbp->b_addr;
ae127f08
DW
1679 struct xfs_btree_cur *bno_cur; /* cursor for bno btree */
1680 struct xfs_btree_cur *cnt_cur; /* cursor for cnt btree */
1da177e4
LT
1681 int error; /* error result */
1682 xfs_agblock_t fbno; /* start of found freespace */
1683 xfs_extlen_t flen; /* length of found freespace */
1da177e4
LT
1684 int i; /* temp status variable */
1685 xfs_agblock_t rbno; /* returned block number */
1686 xfs_extlen_t rlen; /* length of returned extent */
ebf55872
CH
1687 bool busy;
1688 unsigned busy_gen;
1da177e4 1689
e26f0501 1690restart:
1da177e4
LT
1691 /*
1692 * Allocate and initialize a cursor for the by-size btree.
1693 */
561f7d17 1694 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
289d38d2 1695 args->pag, XFS_BTNUM_CNT);
1da177e4 1696 bno_cur = NULL;
e26f0501 1697
1da177e4
LT
1698 /*
1699 * Look for an entry >= maxlen+alignment-1 blocks.
1700 */
1701 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1702 args->maxlen + args->alignment - 1, &i)))
1703 goto error0;
e26f0501 1704
1da177e4 1705 /*
ebf55872
CH
1706 * If none then we have to settle for a smaller extent. In the case that
1707 * there are no large extents, this will return the last entry in the
1708 * tree unless the tree is empty. In the case that there are only busy
1709 * large extents, this will return the largest small extent unless there
e26f0501 1710 * are no smaller extents available.
1da177e4 1711 */
ebf55872 1712 if (!i) {
e26f0501
CH
1713 error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1714 &fbno, &flen, &i);
1715 if (error)
1da177e4
LT
1716 goto error0;
1717 if (i == 0 || flen == 0) {
1718 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
0b1b213f 1719 trace_xfs_alloc_size_noentry(args);
1da177e4
LT
1720 return 0;
1721 }
1722 ASSERT(i == 1);
ebf55872
CH
1723 busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno,
1724 &rlen, &busy_gen);
e26f0501
CH
1725 } else {
1726 /*
1727 * Search for a non-busy extent that is large enough.
e26f0501
CH
1728 */
1729 for (;;) {
1730 error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1731 if (error)
1732 goto error0;
f9e03706
DW
1733 if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1734 error = -EFSCORRUPTED;
1735 goto error0;
1736 }
e26f0501 1737
ebf55872
CH
1738 busy = xfs_alloc_compute_aligned(args, fbno, flen,
1739 &rbno, &rlen, &busy_gen);
e26f0501
CH
1740
1741 if (rlen >= args->maxlen)
1742 break;
1743
1744 error = xfs_btree_increment(cnt_cur, 0, &i);
1745 if (error)
1746 goto error0;
1747 if (i == 0) {
1748 /*
1749 * Our only valid extents must have been busy.
1750 * Make it unbusy by forcing the log out and
ebf55872 1751 * retrying.
e26f0501
CH
1752 */
1753 xfs_btree_del_cursor(cnt_cur,
1754 XFS_BTREE_NOERROR);
1755 trace_xfs_alloc_size_busy(args);
ebf55872
CH
1756 xfs_extent_busy_flush(args->mp,
1757 args->pag, busy_gen);
e26f0501
CH
1758 goto restart;
1759 }
1760 }
1da177e4 1761 }
e26f0501 1762
1da177e4
LT
1763 /*
1764 * In the first case above, we got the last entry in the
1765 * by-size btree. Now we check to see if the space hits maxlen
1766 * once aligned; if not, we search left for something better.
1767 * This can't happen in the second case above.
1768 */
1da177e4 1769 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
f9e03706
DW
1770 if (XFS_IS_CORRUPT(args->mp,
1771 rlen != 0 &&
1772 (rlen > flen ||
1773 rbno + rlen > fbno + flen))) {
1774 error = -EFSCORRUPTED;
1775 goto error0;
1776 }
1da177e4
LT
1777 if (rlen < args->maxlen) {
1778 xfs_agblock_t bestfbno;
1779 xfs_extlen_t bestflen;
1780 xfs_agblock_t bestrbno;
1781 xfs_extlen_t bestrlen;
1782
1783 bestrlen = rlen;
1784 bestrbno = rbno;
1785 bestflen = flen;
1786 bestfbno = fbno;
1787 for (;;) {
8df4da4a 1788 if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
1da177e4
LT
1789 goto error0;
1790 if (i == 0)
1791 break;
1792 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1793 &i)))
1794 goto error0;
f9e03706
DW
1795 if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1796 error = -EFSCORRUPTED;
1797 goto error0;
1798 }
1da177e4
LT
1799 if (flen < bestrlen)
1800 break;
ebf55872
CH
1801 busy = xfs_alloc_compute_aligned(args, fbno, flen,
1802 &rbno, &rlen, &busy_gen);
1da177e4 1803 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
f9e03706
DW
1804 if (XFS_IS_CORRUPT(args->mp,
1805 rlen != 0 &&
1806 (rlen > flen ||
1807 rbno + rlen > fbno + flen))) {
1808 error = -EFSCORRUPTED;
1809 goto error0;
1810 }
1da177e4
LT
1811 if (rlen > bestrlen) {
1812 bestrlen = rlen;
1813 bestrbno = rbno;
1814 bestflen = flen;
1815 bestfbno = fbno;
1816 if (rlen == args->maxlen)
1817 break;
1818 }
1819 }
1820 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1821 &i)))
1822 goto error0;
f9e03706
DW
1823 if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1824 error = -EFSCORRUPTED;
1825 goto error0;
1826 }
1da177e4
LT
1827 rlen = bestrlen;
1828 rbno = bestrbno;
1829 flen = bestflen;
1830 fbno = bestfbno;
1831 }
1832 args->wasfromfl = 0;
1833 /*
1834 * Fix up the length.
1835 */
1836 args->len = rlen;
e26f0501 1837 if (rlen < args->minlen) {
ebf55872 1838 if (busy) {
e26f0501
CH
1839 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1840 trace_xfs_alloc_size_busy(args);
ebf55872 1841 xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
e26f0501
CH
1842 goto restart;
1843 }
1844 goto out_nominleft;
1da177e4 1845 }
e26f0501
CH
1846 xfs_alloc_fix_len(args);
1847
1da177e4 1848 rlen = args->len;
f9e03706
DW
1849 if (XFS_IS_CORRUPT(args->mp, rlen > flen)) {
1850 error = -EFSCORRUPTED;
1851 goto error0;
1852 }
1da177e4
LT
1853 /*
1854 * Allocate and initialize a cursor for the by-block tree.
1855 */
561f7d17 1856 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
289d38d2 1857 args->pag, XFS_BTNUM_BNO);
1da177e4
LT
1858 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1859 rbno, rlen, XFSA_FIXUP_CNT_OK)))
1860 goto error0;
1861 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1862 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1863 cnt_cur = bno_cur = NULL;
1864 args->len = rlen;
1865 args->agbno = rbno;
f9e03706
DW
1866 if (XFS_IS_CORRUPT(args->mp,
1867 args->agbno + args->len >
9798f615 1868 be32_to_cpu(agf->agf_length))) {
f9e03706
DW
1869 error = -EFSCORRUPTED;
1870 goto error0;
1871 }
0b1b213f 1872 trace_xfs_alloc_size_done(args);
1da177e4
LT
1873 return 0;
1874
1875error0:
0b1b213f 1876 trace_xfs_alloc_size_error(args);
1da177e4
LT
1877 if (cnt_cur)
1878 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1879 if (bno_cur)
1880 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1881 return error;
e26f0501
CH
1882
1883out_nominleft:
1884 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1885 trace_xfs_alloc_size_nominleft(args);
1886 args->agbno = NULLAGBLOCK;
1887 return 0;
1da177e4
LT
1888}
1889
1da177e4
LT
1890/*
1891 * Free the extent starting at agno/bno for length.
1892 */
340785cc 1893STATIC int
1da177e4 1894xfs_free_ag_extent(
66e3237e
DW
1895 struct xfs_trans *tp,
1896 struct xfs_buf *agbp,
1897 xfs_agnumber_t agno,
1898 xfs_agblock_t bno,
1899 xfs_extlen_t len,
1900 const struct xfs_owner_info *oinfo,
1901 enum xfs_ag_resv_type type)
1da177e4 1902{
66e3237e 1903 struct xfs_mount *mp;
66e3237e
DW
1904 struct xfs_btree_cur *bno_cur;
1905 struct xfs_btree_cur *cnt_cur;
1906 xfs_agblock_t gtbno; /* start of right neighbor */
1907 xfs_extlen_t gtlen; /* length of right neighbor */
1908 xfs_agblock_t ltbno; /* start of left neighbor */
1909 xfs_extlen_t ltlen; /* length of left neighbor */
1910 xfs_agblock_t nbno; /* new starting block of freesp */
1911 xfs_extlen_t nlen; /* new length of freespace */
1912 int haveleft; /* have a left neighbor */
1913 int haveright; /* have a right neighbor */
1914 int i;
1915 int error;
fa9c3c19 1916 struct xfs_perag *pag = agbp->b_pag;
1da177e4 1917
673930c3 1918 bno_cur = cnt_cur = NULL;
1da177e4 1919 mp = tp->t_mountp;
673930c3 1920
33df3a9c 1921 if (!xfs_rmap_should_skip_owner_update(oinfo)) {
fa9c3c19 1922 error = xfs_rmap_free(tp, agbp, pag, bno, len, oinfo);
673930c3
DW
1923 if (error)
1924 goto error0;
1925 }
1926
1da177e4
LT
1927 /*
1928 * Allocate and initialize a cursor for the by-block btree.
1929 */
289d38d2 1930 bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_BNO);
1da177e4
LT
1931 /*
1932 * Look for a neighboring block on the left (lower block numbers)
1933 * that is contiguous with this space.
1934 */
1935 if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
1936 goto error0;
1937 if (haveleft) {
1938 /*
1939 * There is a block to our left.
1940 */
1941 if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
1942 goto error0;
f9e03706
DW
1943 if (XFS_IS_CORRUPT(mp, i != 1)) {
1944 error = -EFSCORRUPTED;
1945 goto error0;
1946 }
1da177e4
LT
1947 /*
1948 * It's not contiguous, though.
1949 */
1950 if (ltbno + ltlen < bno)
1951 haveleft = 0;
1952 else {
1953 /*
1954 * If this failure happens the request to free this
1955 * space was invalid, it's (partly) already free.
1956 * Very bad.
1957 */
f9e03706
DW
1958 if (XFS_IS_CORRUPT(mp, ltbno + ltlen > bno)) {
1959 error = -EFSCORRUPTED;
1960 goto error0;
1961 }
1da177e4
LT
1962 }
1963 }
1964 /*
1965 * Look for a neighboring block on the right (higher block numbers)
1966 * that is contiguous with this space.
1967 */
637aa50f 1968 if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
1da177e4
LT
1969 goto error0;
1970 if (haveright) {
1971 /*
1972 * There is a block to our right.
1973 */
1974 if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
1975 goto error0;
f9e03706
DW
1976 if (XFS_IS_CORRUPT(mp, i != 1)) {
1977 error = -EFSCORRUPTED;
1978 goto error0;
1979 }
1da177e4
LT
1980 /*
1981 * It's not contiguous, though.
1982 */
1983 if (bno + len < gtbno)
1984 haveright = 0;
1985 else {
1986 /*
1987 * If this failure happens the request to free this
1988 * space was invalid, it's (partly) already free.
1989 * Very bad.
1990 */
f9e03706
DW
1991 if (XFS_IS_CORRUPT(mp, bno + len > gtbno)) {
1992 error = -EFSCORRUPTED;
1993 goto error0;
1994 }
1da177e4
LT
1995 }
1996 }
1997 /*
1998 * Now allocate and initialize a cursor for the by-size tree.
1999 */
289d38d2 2000 cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_CNT);
1da177e4
LT
2001 /*
2002 * Have both left and right contiguous neighbors.
2003 * Merge all three into a single free block.
2004 */
2005 if (haveleft && haveright) {
2006 /*
2007 * Delete the old by-size entry on the left.
2008 */
2009 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
2010 goto error0;
f9e03706
DW
2011 if (XFS_IS_CORRUPT(mp, i != 1)) {
2012 error = -EFSCORRUPTED;
2013 goto error0;
2014 }
91cca5df 2015 if ((error = xfs_btree_delete(cnt_cur, &i)))
1da177e4 2016 goto error0;
f9e03706
DW
2017 if (XFS_IS_CORRUPT(mp, i != 1)) {
2018 error = -EFSCORRUPTED;
2019 goto error0;
2020 }
1da177e4
LT
2021 /*
2022 * Delete the old by-size entry on the right.
2023 */
2024 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2025 goto error0;
f9e03706
DW
2026 if (XFS_IS_CORRUPT(mp, i != 1)) {
2027 error = -EFSCORRUPTED;
2028 goto error0;
2029 }
91cca5df 2030 if ((error = xfs_btree_delete(cnt_cur, &i)))
1da177e4 2031 goto error0;
f9e03706
DW
2032 if (XFS_IS_CORRUPT(mp, i != 1)) {
2033 error = -EFSCORRUPTED;
2034 goto error0;
2035 }
1da177e4
LT
2036 /*
2037 * Delete the old by-block entry for the right block.
2038 */
91cca5df 2039 if ((error = xfs_btree_delete(bno_cur, &i)))
1da177e4 2040 goto error0;
f9e03706
DW
2041 if (XFS_IS_CORRUPT(mp, i != 1)) {
2042 error = -EFSCORRUPTED;
2043 goto error0;
2044 }
1da177e4
LT
2045 /*
2046 * Move the by-block cursor back to the left neighbor.
2047 */
8df4da4a 2048 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1da177e4 2049 goto error0;
f9e03706
DW
2050 if (XFS_IS_CORRUPT(mp, i != 1)) {
2051 error = -EFSCORRUPTED;
2052 goto error0;
2053 }
1da177e4
LT
2054#ifdef DEBUG
2055 /*
2056 * Check that this is the right record: delete didn't
2057 * mangle the cursor.
2058 */
2059 {
2060 xfs_agblock_t xxbno;
2061 xfs_extlen_t xxlen;
2062
2063 if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
2064 &i)))
2065 goto error0;
f9e03706
DW
2066 if (XFS_IS_CORRUPT(mp,
2067 i != 1 ||
2068 xxbno != ltbno ||
2069 xxlen != ltlen)) {
2070 error = -EFSCORRUPTED;
2071 goto error0;
2072 }
1da177e4
LT
2073 }
2074#endif
2075 /*
2076 * Update remaining by-block entry to the new, joined block.
2077 */
2078 nbno = ltbno;
2079 nlen = len + ltlen + gtlen;
2080 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2081 goto error0;
2082 }
2083 /*
2084 * Have only a left contiguous neighbor.
2085 * Merge it together with the new freespace.
2086 */
2087 else if (haveleft) {
2088 /*
2089 * Delete the old by-size entry on the left.
2090 */
2091 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
2092 goto error0;
f9e03706
DW
2093 if (XFS_IS_CORRUPT(mp, i != 1)) {
2094 error = -EFSCORRUPTED;
2095 goto error0;
2096 }
91cca5df 2097 if ((error = xfs_btree_delete(cnt_cur, &i)))
1da177e4 2098 goto error0;
f9e03706
DW
2099 if (XFS_IS_CORRUPT(mp, i != 1)) {
2100 error = -EFSCORRUPTED;
2101 goto error0;
2102 }
1da177e4
LT
2103 /*
2104 * Back up the by-block cursor to the left neighbor, and
2105 * update its length.
2106 */
8df4da4a 2107 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1da177e4 2108 goto error0;
f9e03706
DW
2109 if (XFS_IS_CORRUPT(mp, i != 1)) {
2110 error = -EFSCORRUPTED;
2111 goto error0;
2112 }
1da177e4
LT
2113 nbno = ltbno;
2114 nlen = len + ltlen;
2115 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2116 goto error0;
2117 }
2118 /*
2119 * Have only a right contiguous neighbor.
2120 * Merge it together with the new freespace.
2121 */
2122 else if (haveright) {
2123 /*
2124 * Delete the old by-size entry on the right.
2125 */
2126 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2127 goto error0;
f9e03706
DW
2128 if (XFS_IS_CORRUPT(mp, i != 1)) {
2129 error = -EFSCORRUPTED;
2130 goto error0;
2131 }
91cca5df 2132 if ((error = xfs_btree_delete(cnt_cur, &i)))
1da177e4 2133 goto error0;
f9e03706
DW
2134 if (XFS_IS_CORRUPT(mp, i != 1)) {
2135 error = -EFSCORRUPTED;
2136 goto error0;
2137 }
1da177e4
LT
2138 /*
2139 * Update the starting block and length of the right
2140 * neighbor in the by-block tree.
2141 */
2142 nbno = bno;
2143 nlen = len + gtlen;
2144 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2145 goto error0;
2146 }
2147 /*
2148 * No contiguous neighbors.
2149 * Insert the new freespace into the by-block tree.
2150 */
2151 else {
2152 nbno = bno;
2153 nlen = len;
4b22a571 2154 if ((error = xfs_btree_insert(bno_cur, &i)))
1da177e4 2155 goto error0;
f9e03706
DW
2156 if (XFS_IS_CORRUPT(mp, i != 1)) {
2157 error = -EFSCORRUPTED;
2158 goto error0;
2159 }
1da177e4
LT
2160 }
2161 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
2162 bno_cur = NULL;
2163 /*
2164 * In all cases we need to insert the new freespace in the by-size tree.
2165 */
2166 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
2167 goto error0;
f9e03706
DW
2168 if (XFS_IS_CORRUPT(mp, i != 0)) {
2169 error = -EFSCORRUPTED;
2170 goto error0;
2171 }
4b22a571 2172 if ((error = xfs_btree_insert(cnt_cur, &i)))
1da177e4 2173 goto error0;
f9e03706
DW
2174 if (XFS_IS_CORRUPT(mp, i != 1)) {
2175 error = -EFSCORRUPTED;
2176 goto error0;
2177 }
1da177e4
LT
2178 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
2179 cnt_cur = NULL;
ecb6928f 2180
1da177e4
LT
2181 /*
2182 * Update the freespace totals in the ag and superblock.
2183 */
92a00544
GX
2184 error = xfs_alloc_update_counters(tp, agbp, len);
2185 xfs_ag_resv_free_extent(agbp->b_pag, type, tp, len);
ecb6928f
CH
2186 if (error)
2187 goto error0;
2188
ff6d6af2
BD
2189 XFS_STATS_INC(mp, xs_freex);
2190 XFS_STATS_ADD(mp, xs_freeb, len);
0b1b213f 2191
21592863 2192 trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright);
1da177e4 2193
1da177e4
LT
2194 return 0;
2195
2196 error0:
21592863 2197 trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1);
1da177e4
LT
2198 if (bno_cur)
2199 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
2200 if (cnt_cur)
2201 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
2202 return error;
2203}
2204
2205/*
2206 * Visible (exported) allocation/free functions.
2207 * Some of these are used just by xfs_alloc_btree.c and this file.
2208 */
2209
2210/*
7cb3efb4 2211 * Compute and fill in value of m_alloc_maxlevels.
1da177e4
LT
2212 */
2213void
2214xfs_alloc_compute_maxlevels(
2215 xfs_mount_t *mp) /* file system mount structure */
2216{
7cb3efb4 2217 mp->m_alloc_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr,
19b54ee6 2218 (mp->m_sb.sb_agblocks + 1) / 2);
0ed5f735 2219 ASSERT(mp->m_alloc_maxlevels <= xfs_allocbt_maxlevels_ondisk());
1da177e4
LT
2220}
2221
6cc87645 2222/*
3fd129b6
DW
2223 * Find the length of the longest extent in an AG. The 'need' parameter
2224 * specifies how much space we're going to need for the AGFL and the
2225 * 'reserved' parameter tells us how many blocks in this AG are reserved for
2226 * other callers.
6cc87645
DC
2227 */
2228xfs_extlen_t
2229xfs_alloc_longest_free_extent(
50adbcb4 2230 struct xfs_perag *pag,
3fd129b6
DW
2231 xfs_extlen_t need,
2232 xfs_extlen_t reserved)
6cc87645 2233{
50adbcb4 2234 xfs_extlen_t delta = 0;
6cc87645 2235
3fd129b6
DW
2236 /*
2237 * If the AGFL needs a recharge, we'll have to subtract that from the
2238 * longest extent.
2239 */
6cc87645
DC
2240 if (need > pag->pagf_flcount)
2241 delta = need - pag->pagf_flcount;
2242
3fd129b6
DW
2243 /*
2244 * If we cannot maintain others' reservations with space from the
2245 * not-longest freesp extents, we'll have to subtract /that/ from
2246 * the longest extent too.
2247 */
2248 if (pag->pagf_freeblks - pag->pagf_longest < reserved)
2249 delta += reserved - (pag->pagf_freeblks - pag->pagf_longest);
2250
2251 /*
2252 * If the longest extent is long enough to satisfy all the
2253 * reservations and AGFL rules in place, we can return this extent.
2254 */
6cc87645 2255 if (pag->pagf_longest > delta)
1c743574
DC
2256 return min_t(xfs_extlen_t, pag->pag_mount->m_ag_max_usable,
2257 pag->pagf_longest - delta);
3fd129b6
DW
2258
2259 /* Otherwise, let the caller try for 1 block if there's space. */
6cc87645
DC
2260 return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
2261}
2262
1cac233c
DW
2263/*
2264 * Compute the minimum length of the AGFL in the given AG. If @pag is NULL,
2265 * return the largest possible minimum length.
2266 */
496817b4
DC
2267unsigned int
2268xfs_alloc_min_freelist(
2269 struct xfs_mount *mp,
2270 struct xfs_perag *pag)
2271{
1cac233c
DW
2272 /* AG btrees have at least 1 level. */
2273 static const uint8_t fake_levels[XFS_BTNUM_AGF] = {1, 1, 1};
2274 const uint8_t *levels = pag ? pag->pagf_levels : fake_levels;
496817b4
DC
2275 unsigned int min_free;
2276
7cb3efb4 2277 ASSERT(mp->m_alloc_maxlevels > 0);
1cac233c 2278
496817b4 2279 /* space needed by-bno freespace btree */
1cac233c 2280 min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
7cb3efb4 2281 mp->m_alloc_maxlevels);
496817b4 2282 /* space needed by-size freespace btree */
1cac233c 2283 min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
7cb3efb4 2284 mp->m_alloc_maxlevels);
52548852 2285 /* space needed reverse mapping used space btree */
ebd9027d 2286 if (xfs_has_rmapbt(mp))
1cac233c
DW
2287 min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
2288 mp->m_rmap_maxlevels);
496817b4
DC
2289
2290 return min_free;
2291}
2292
72d55285
DC
2293/*
2294 * Check if the operation we are fixing up the freelist for should go ahead or
2295 * not. If we are freeing blocks, we always allow it, otherwise the allocation
2296 * is dependent on whether the size and shape of free space available will
2297 * permit the requested allocation to take place.
2298 */
2299static bool
2300xfs_alloc_space_available(
2301 struct xfs_alloc_arg *args,
2302 xfs_extlen_t min_free,
2303 int flags)
2304{
2305 struct xfs_perag *pag = args->pag;
12ef8301 2306 xfs_extlen_t alloc_len, longest;
3fd129b6 2307 xfs_extlen_t reservation; /* blocks that are still reserved */
72d55285 2308 int available;
1ca89fbc 2309 xfs_extlen_t agflcount;
72d55285
DC
2310
2311 if (flags & XFS_ALLOC_FLAG_FREEING)
2312 return true;
2313
3fd129b6
DW
2314 reservation = xfs_ag_resv_needed(pag, args->resv);
2315
72d55285 2316 /* do we have enough contiguous free space for the allocation? */
12ef8301 2317 alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
a1f69417 2318 longest = xfs_alloc_longest_free_extent(pag, min_free, reservation);
12ef8301 2319 if (longest < alloc_len)
72d55285
DC
2320 return false;
2321
1ca89fbc
BF
2322 /*
2323 * Do we have enough free space remaining for the allocation? Don't
2324 * account extra agfl blocks because we are about to defer free them,
2325 * making them unavailable until the current transaction commits.
2326 */
2327 agflcount = min_t(xfs_extlen_t, pag->pagf_flcount, min_free);
2328 available = (int)(pag->pagf_freeblks + agflcount -
54fee133 2329 reservation - min_free - args->minleft);
12ef8301 2330 if (available < (int)max(args->total, alloc_len))
72d55285
DC
2331 return false;
2332
54fee133
CH
2333 /*
2334 * Clamp maxlen to the amount of free space available for the actual
2335 * extent allocation.
2336 */
2337 if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) {
2338 args->maxlen = available;
2339 ASSERT(args->maxlen > 0);
2340 ASSERT(args->maxlen >= args->minlen);
2341 }
2342
72d55285
DC
2343 return true;
2344}
2345
4223f659
BF
2346int
2347xfs_free_agfl_block(
2348 struct xfs_trans *tp,
2349 xfs_agnumber_t agno,
2350 xfs_agblock_t agbno,
2351 struct xfs_buf *agbp,
2352 struct xfs_owner_info *oinfo)
2353{
2354 int error;
2355 struct xfs_buf *bp;
2356
2357 error = xfs_free_ag_extent(tp, agbp, agno, agbno, 1, oinfo,
2358 XFS_AG_RESV_AGFL);
2359 if (error)
2360 return error;
2361
ee647f85
DW
2362 error = xfs_trans_get_buf(tp, tp->t_mountp->m_ddev_targp,
2363 XFS_AGB_TO_DADDR(tp->t_mountp, agno, agbno),
2364 tp->t_mountp->m_bsize, 0, &bp);
2365 if (error)
2366 return error;
4223f659
BF
2367 xfs_trans_binval(tp, bp);
2368
2369 return 0;
2370}
2371
a27ba260
BF
2372/*
2373 * Check the agfl fields of the agf for inconsistency or corruption. The purpose
2374 * is to detect an agfl header padding mismatch between current and early v5
2375 * kernels. This problem manifests as a 1-slot size difference between the
2376 * on-disk flcount and the active [first, last] range of a wrapped agfl. This
2377 * may also catch variants of agfl count corruption unrelated to padding. Either
2378 * way, we'll reset the agfl and warn the user.
2379 *
2380 * Return true if a reset is required before the agfl can be used, false
2381 * otherwise.
2382 */
2383static bool
2384xfs_agfl_needs_reset(
2385 struct xfs_mount *mp,
2386 struct xfs_agf *agf)
2387{
2388 uint32_t f = be32_to_cpu(agf->agf_flfirst);
2389 uint32_t l = be32_to_cpu(agf->agf_fllast);
2390 uint32_t c = be32_to_cpu(agf->agf_flcount);
2391 int agfl_size = xfs_agfl_size(mp);
2392 int active;
2393
2394 /* no agfl header on v4 supers */
38c26bfd 2395 if (!xfs_has_crc(mp))
a27ba260
BF
2396 return false;
2397
2398 /*
2399 * The agf read verifier catches severe corruption of these fields.
2400 * Repeat some sanity checks to cover a packed -> unpacked mismatch if
2401 * the verifier allows it.
2402 */
2403 if (f >= agfl_size || l >= agfl_size)
2404 return true;
2405 if (c > agfl_size)
2406 return true;
2407
2408 /*
2409 * Check consistency between the on-disk count and the active range. An
2410 * agfl padding mismatch manifests as an inconsistent flcount.
2411 */
2412 if (c && l >= f)
2413 active = l - f + 1;
2414 else if (c)
2415 active = agfl_size - f + l + 1;
2416 else
2417 active = 0;
2418
2419 return active != c;
2420}
2421
2422/*
2423 * Reset the agfl to an empty state. Ignore/drop any existing blocks since the
2424 * agfl content cannot be trusted. Warn the user that a repair is required to
2425 * recover leaked blocks.
2426 *
2427 * The purpose of this mechanism is to handle filesystems affected by the agfl
2428 * header padding mismatch problem. A reset keeps the filesystem online with a
2429 * relatively minor free space accounting inconsistency rather than suffer the
2430 * inevitable crash from use of an invalid agfl block.
2431 */
2432static void
2433xfs_agfl_reset(
2434 struct xfs_trans *tp,
2435 struct xfs_buf *agbp,
2436 struct xfs_perag *pag)
2437{
2438 struct xfs_mount *mp = tp->t_mountp;
9798f615 2439 struct xfs_agf *agf = agbp->b_addr;
a27ba260
BF
2440
2441 ASSERT(pag->pagf_agflreset);
2442 trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_);
2443
2444 xfs_warn(mp,
2445 "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. "
2446 "Please unmount and run xfs_repair.",
2447 pag->pag_agno, pag->pagf_flcount);
2448
2449 agf->agf_flfirst = 0;
2450 agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1);
2451 agf->agf_flcount = 0;
2452 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST |
2453 XFS_AGF_FLCOUNT);
2454
2455 pag->pagf_flcount = 0;
2456 pag->pagf_agflreset = false;
2457}
2458
f8f2835a
BF
2459/*
2460 * Defer an AGFL block free. This is effectively equivalent to
c201d9ca 2461 * xfs_free_extent_later() with some special handling particular to AGFL blocks.
f8f2835a
BF
2462 *
2463 * Deferring AGFL frees helps prevent log reservation overruns due to too many
2464 * allocation operations in a transaction. AGFL frees are prone to this problem
2465 * because for one they are always freed one at a time. Further, an immediate
2466 * AGFL block free can cause a btree join and require another block free before
2467 * the real allocation can proceed. Deferring the free disconnects freeing up
2468 * the AGFL slot from freeing the block.
2469 */
2470STATIC void
2471xfs_defer_agfl_block(
0f37d178 2472 struct xfs_trans *tp,
f8f2835a
BF
2473 xfs_agnumber_t agno,
2474 xfs_fsblock_t agbno,
2475 struct xfs_owner_info *oinfo)
2476{
0f37d178 2477 struct xfs_mount *mp = tp->t_mountp;
f8f2835a
BF
2478 struct xfs_extent_free_item *new; /* new element */
2479
c201d9ca 2480 ASSERT(xfs_extfree_item_cache != NULL);
f8f2835a
BF
2481 ASSERT(oinfo != NULL);
2482
b3b5ff41 2483 new = kmem_cache_zalloc(xfs_extfree_item_cache,
3050bd0b 2484 GFP_KERNEL | __GFP_NOFAIL);
f8f2835a
BF
2485 new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno);
2486 new->xefi_blockcount = 1;
b3b5ff41 2487 new->xefi_owner = oinfo->oi_owner;
f8f2835a
BF
2488
2489 trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
2490
0f37d178 2491 xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &new->xefi_list);
f8f2835a
BF
2492}
2493
c201d9ca
DW
2494/*
2495 * Add the extent to the list of extents to be free at transaction end.
2496 * The list is maintained sorted (by block number).
2497 */
2498void
2499__xfs_free_extent_later(
2500 struct xfs_trans *tp,
2501 xfs_fsblock_t bno,
2502 xfs_filblks_t len,
2503 const struct xfs_owner_info *oinfo,
2504 bool skip_discard)
2505{
2506 struct xfs_extent_free_item *new; /* new element */
2507#ifdef DEBUG
2508 struct xfs_mount *mp = tp->t_mountp;
2509 xfs_agnumber_t agno;
2510 xfs_agblock_t agbno;
2511
2512 ASSERT(bno != NULLFSBLOCK);
2513 ASSERT(len > 0);
95f0b95e 2514 ASSERT(len <= XFS_MAX_BMBT_EXTLEN);
c201d9ca
DW
2515 ASSERT(!isnullstartblock(bno));
2516 agno = XFS_FSB_TO_AGNO(mp, bno);
2517 agbno = XFS_FSB_TO_AGBNO(mp, bno);
2518 ASSERT(agno < mp->m_sb.sb_agcount);
2519 ASSERT(agbno < mp->m_sb.sb_agblocks);
2520 ASSERT(len < mp->m_sb.sb_agblocks);
2521 ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
2522#endif
2523 ASSERT(xfs_extfree_item_cache != NULL);
2524
b3b5ff41 2525 new = kmem_cache_zalloc(xfs_extfree_item_cache,
c201d9ca
DW
2526 GFP_KERNEL | __GFP_NOFAIL);
2527 new->xefi_startblock = bno;
2528 new->xefi_blockcount = (xfs_extlen_t)len;
b3b5ff41
DW
2529 if (skip_discard)
2530 new->xefi_flags |= XFS_EFI_SKIP_DISCARD;
2531 if (oinfo) {
2532 ASSERT(oinfo->oi_offset == 0);
2533
2534 if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK)
2535 new->xefi_flags |= XFS_EFI_ATTR_FORK;
2536 if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK)
2537 new->xefi_flags |= XFS_EFI_BMBT_BLOCK;
2538 new->xefi_owner = oinfo->oi_owner;
2539 } else {
2540 new->xefi_owner = XFS_RMAP_OWN_NULL;
2541 }
c201d9ca
DW
2542 trace_xfs_bmap_free_defer(tp->t_mountp,
2543 XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0,
2544 XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len);
2545 xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list);
2546}
2547
30151967
CB
2548#ifdef DEBUG
2549/*
2550 * Check if an AGF has a free extent record whose length is equal to
2551 * args->minlen.
2552 */
2553STATIC int
2554xfs_exact_minlen_extent_available(
2555 struct xfs_alloc_arg *args,
2556 struct xfs_buf *agbp,
2557 int *stat)
2558{
2559 struct xfs_btree_cur *cnt_cur;
2560 xfs_agblock_t fbno;
2561 xfs_extlen_t flen;
2562 int error = 0;
2563
2564 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, agbp,
289d38d2 2565 args->pag, XFS_BTNUM_CNT);
30151967
CB
2566 error = xfs_alloc_lookup_ge(cnt_cur, 0, args->minlen, stat);
2567 if (error)
2568 goto out;
2569
2570 if (*stat == 0) {
2571 error = -EFSCORRUPTED;
2572 goto out;
2573 }
2574
2575 error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, stat);
2576 if (error)
2577 goto out;
2578
2579 if (*stat == 1 && flen != args->minlen)
2580 *stat = 0;
2581
2582out:
2583 xfs_btree_del_cursor(cnt_cur, error);
2584
2585 return error;
2586}
2587#endif
2588
1da177e4
LT
2589/*
2590 * Decide whether to use this allocation group for this allocation.
2591 * If so, fix up the btree freelist's size.
2592 */
2e9101da 2593int /* error */
1da177e4 2594xfs_alloc_fix_freelist(
396503fc
DC
2595 struct xfs_alloc_arg *args, /* allocation argument structure */
2596 int flags) /* XFS_ALLOC_FLAG_... */
1da177e4 2597{
396503fc
DC
2598 struct xfs_mount *mp = args->mp;
2599 struct xfs_perag *pag = args->pag;
2600 struct xfs_trans *tp = args->tp;
2601 struct xfs_buf *agbp = NULL;
2602 struct xfs_buf *agflbp = NULL;
2603 struct xfs_alloc_arg targs; /* local allocation arguments */
2604 xfs_agblock_t bno; /* freelist block */
2605 xfs_extlen_t need; /* total blocks needed in freelist */
c184f855 2606 int error = 0;
396503fc 2607
362f5e74
BF
2608 /* deferred ops (AGFL block frees) require permanent transactions */
2609 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
2610
1da177e4 2611 if (!pag->pagf_init) {
08d3e84f 2612 error = xfs_alloc_read_agf(pag, tp, flags, &agbp);
f48e2df8
DW
2613 if (error) {
2614 /* Couldn't lock the AGF so skip this AG. */
2615 if (error == -EAGAIN)
2616 error = 0;
396503fc 2617 goto out_no_agbp;
1da177e4 2618 }
396503fc 2619 }
1da177e4 2620
0e1edbd9 2621 /*
396503fc
DC
2622 * If this is a metadata preferred pag and we are user data then try
2623 * somewhere else if we are not being asked to try harder at this
2624 * point
1da177e4 2625 */
c34d570d 2626 if (pag->pagf_metadata && (args->datatype & XFS_ALLOC_USERDATA) &&
0e1edbd9
NS
2627 (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
2628 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
396503fc 2629 goto out_agbp_relse;
1da177e4
LT
2630 }
2631
496817b4 2632 need = xfs_alloc_min_freelist(mp, pag);
54fee133
CH
2633 if (!xfs_alloc_space_available(args, need, flags |
2634 XFS_ALLOC_FLAG_CHECK))
396503fc 2635 goto out_agbp_relse;
0e1edbd9 2636
1da177e4
LT
2637 /*
2638 * Get the a.g. freespace buffer.
2639 * Can fail if we're not blocking on locks, and it's held.
2640 */
396503fc 2641 if (!agbp) {
08d3e84f 2642 error = xfs_alloc_read_agf(pag, tp, flags, &agbp);
f48e2df8
DW
2643 if (error) {
2644 /* Couldn't lock the AGF so skip this AG. */
2645 if (error == -EAGAIN)
2646 error = 0;
396503fc 2647 goto out_no_agbp;
0e1edbd9 2648 }
1da177e4 2649 }
50adbcb4 2650
a27ba260
BF
2651 /* reset a padding mismatched agfl before final free space check */
2652 if (pag->pagf_agflreset)
2653 xfs_agfl_reset(tp, agbp, pag);
2654
50adbcb4 2655 /* If there isn't enough total space or single-extent, reject it. */
496817b4 2656 need = xfs_alloc_min_freelist(mp, pag);
396503fc
DC
2657 if (!xfs_alloc_space_available(args, need, flags))
2658 goto out_agbp_relse;
72d55285 2659
30151967
CB
2660#ifdef DEBUG
2661 if (args->alloc_minlen_only) {
2662 int stat;
2663
2664 error = xfs_exact_minlen_extent_available(args, agbp, &stat);
2665 if (error || !stat)
2666 goto out_agbp_relse;
2667 }
2668#endif
1da177e4
LT
2669 /*
2670 * Make the freelist shorter if it's too long.
50adbcb4 2671 *
396503fc
DC
2672 * Note that from this point onwards, we will always release the agf and
2673 * agfl buffers on error. This handles the case where we error out and
2674 * the buffers are clean or may not have been joined to the transaction
2675 * and hence need to be released manually. If they have been joined to
2676 * the transaction, then xfs_trans_brelse() will handle them
2677 * appropriately based on the recursion count and dirty state of the
2678 * buffer.
2679 *
50adbcb4
DC
2680 * XXX (dgc): When we have lots of free space, does this buy us
2681 * anything other than extra overhead when we need to put more blocks
2682 * back on the free list? Maybe we should only do this when space is
2683 * getting low or the AGFL is more than half full?
04f13060
DW
2684 *
2685 * The NOSHRINK flag prevents the AGFL from being shrunk if it's too
2686 * big; the NORMAP flag prevents AGFL expand/shrink operations from
2687 * updating the rmapbt. Both flags are used in xfs_repair while we're
2688 * rebuilding the rmapbt, and neither are used by the kernel. They're
2689 * both required to ensure that rmaps are correctly recorded for the
2690 * regenerated AGFL, bnobt, and cntbt. See repair/phase5.c and
2691 * repair/rmap.c in xfsprogs for details.
1da177e4 2692 */
04f13060 2693 memset(&targs, 0, sizeof(targs));
7280feda 2694 /* struct copy below */
04f13060 2695 if (flags & XFS_ALLOC_FLAG_NORMAP)
7280feda 2696 targs.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
04f13060 2697 else
7280feda 2698 targs.oinfo = XFS_RMAP_OINFO_AG;
04f13060 2699 while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) {
92821e2b
DC
2700 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
2701 if (error)
396503fc 2702 goto out_agbp_relse;
4223f659 2703
c03edc9e
BF
2704 /* defer agfl frees */
2705 xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
1da177e4 2706 }
50adbcb4 2707
1da177e4
LT
2708 targs.tp = tp;
2709 targs.mp = mp;
2710 targs.agbp = agbp;
2711 targs.agno = args->agno;
3fd129b6 2712 targs.alignment = targs.minlen = targs.prod = 1;
1da177e4
LT
2713 targs.type = XFS_ALLOCTYPE_THIS_AG;
2714 targs.pag = pag;
50adbcb4
DC
2715 error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp);
2716 if (error)
396503fc 2717 goto out_agbp_relse;
50adbcb4
DC
2718
2719 /* Make the freelist longer if it's too short. */
2720 while (pag->pagf_flcount < need) {
1da177e4 2721 targs.agbno = 0;
50adbcb4 2722 targs.maxlen = need - pag->pagf_flcount;
0ab32086 2723 targs.resv = XFS_AG_RESV_AGFL;
50adbcb4
DC
2724
2725 /* Allocate as many blocks as possible at once. */
2726 error = xfs_alloc_ag_vextent(&targs);
396503fc
DC
2727 if (error)
2728 goto out_agflbp_relse;
2729
1da177e4
LT
2730 /*
2731 * Stop if we run out. Won't happen if callers are obeying
2732 * the restrictions correctly. Can happen for free calls
2733 * on a completely full ag.
2734 */
d210a28c 2735 if (targs.agbno == NULLAGBLOCK) {
0e1edbd9
NS
2736 if (flags & XFS_ALLOC_FLAG_FREEING)
2737 break;
396503fc 2738 goto out_agflbp_relse;
d210a28c 2739 }
1da177e4
LT
2740 /*
2741 * Put each allocated block on the list.
2742 */
2743 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
92821e2b
DC
2744 error = xfs_alloc_put_freelist(tp, agbp,
2745 agflbp, bno, 0);
2746 if (error)
396503fc 2747 goto out_agflbp_relse;
1da177e4
LT
2748 }
2749 }
e63a3690 2750 xfs_trans_brelse(tp, agflbp);
1da177e4
LT
2751 args->agbp = agbp;
2752 return 0;
396503fc
DC
2753
2754out_agflbp_relse:
2755 xfs_trans_brelse(tp, agflbp);
2756out_agbp_relse:
2757 if (agbp)
2758 xfs_trans_brelse(tp, agbp);
2759out_no_agbp:
2760 args->agbp = NULL;
2761 return error;
1da177e4
LT
2762}
2763
2764/*
2765 * Get a block from the freelist.
2766 * Returns with the buffer for the block gotten.
2767 */
50920116 2768int
1da177e4 2769xfs_alloc_get_freelist(
50920116
DC
2770 struct xfs_trans *tp,
2771 struct xfs_buf *agbp,
2772 xfs_agblock_t *bnop,
2773 int btreeblk)
1da177e4 2774{
50920116
DC
2775 struct xfs_agf *agf = agbp->b_addr;
2776 struct xfs_buf *agflbp;
2777 xfs_agblock_t bno;
2778 __be32 *agfl_bno;
2779 int error;
f53dde11 2780 uint32_t logflags;
50920116
DC
2781 struct xfs_mount *mp = tp->t_mountp;
2782 struct xfs_perag *pag;
1da177e4 2783
1da177e4
LT
2784 /*
2785 * Freelist is empty, give up.
2786 */
2787 if (!agf->agf_flcount) {
2788 *bnop = NULLAGBLOCK;
2789 return 0;
2790 }
2791 /*
2792 * Read the array of free blocks.
2793 */
77c95bba
CH
2794 error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
2795 &agflbp);
2796 if (error)
1da177e4 2797 return error;
77c95bba
CH
2798
2799
1da177e4
LT
2800 /*
2801 * Get the block number and update the data structures.
2802 */
183606d8 2803 agfl_bno = xfs_buf_to_agfl_bno(agflbp);
77c95bba 2804 bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
413d57c9 2805 be32_add_cpu(&agf->agf_flfirst, 1);
1da177e4 2806 xfs_trans_brelse(tp, agflbp);
a78ee256 2807 if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
1da177e4 2808 agf->agf_flfirst = 0;
a862e0fd 2809
92a00544 2810 pag = agbp->b_pag;
a27ba260 2811 ASSERT(!pag->pagf_agflreset);
413d57c9 2812 be32_add_cpu(&agf->agf_flcount, -1);
1da177e4 2813 pag->pagf_flcount--;
92821e2b
DC
2814
2815 logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2816 if (btreeblk) {
413d57c9 2817 be32_add_cpu(&agf->agf_btreeblks, 1);
92821e2b
DC
2818 pag->pagf_btreeblks++;
2819 logflags |= XFS_AGF_BTREEBLKS;
2820 }
2821
92821e2b 2822 xfs_alloc_log_agf(tp, agbp, logflags);
1da177e4
LT
2823 *bnop = bno;
2824
1da177e4
LT
2825 return 0;
2826}
2827
2828/*
2829 * Log the given fields from the agf structure.
2830 */
2831void
2832xfs_alloc_log_agf(
f53dde11
DC
2833 struct xfs_trans *tp,
2834 struct xfs_buf *bp,
2835 uint32_t fields)
1da177e4
LT
2836{
2837 int first; /* first byte offset */
2838 int last; /* last byte offset */
2839 static const short offsets[] = {
2840 offsetof(xfs_agf_t, agf_magicnum),
2841 offsetof(xfs_agf_t, agf_versionnum),
2842 offsetof(xfs_agf_t, agf_seqno),
2843 offsetof(xfs_agf_t, agf_length),
2844 offsetof(xfs_agf_t, agf_roots[0]),
2845 offsetof(xfs_agf_t, agf_levels[0]),
2846 offsetof(xfs_agf_t, agf_flfirst),
2847 offsetof(xfs_agf_t, agf_fllast),
2848 offsetof(xfs_agf_t, agf_flcount),
2849 offsetof(xfs_agf_t, agf_freeblks),
2850 offsetof(xfs_agf_t, agf_longest),
92821e2b 2851 offsetof(xfs_agf_t, agf_btreeblks),
4e0e6040 2852 offsetof(xfs_agf_t, agf_uuid),
f32866fd 2853 offsetof(xfs_agf_t, agf_rmap_blocks),
bdf28630
DW
2854 offsetof(xfs_agf_t, agf_refcount_blocks),
2855 offsetof(xfs_agf_t, agf_refcount_root),
2856 offsetof(xfs_agf_t, agf_refcount_level),
da1f039d
DW
2857 /* needed so that we don't log the whole rest of the structure: */
2858 offsetof(xfs_agf_t, agf_spare64),
1da177e4
LT
2859 sizeof(xfs_agf_t)
2860 };
2861
9798f615 2862 trace_xfs_agf(tp->t_mountp, bp->b_addr, fields, _RET_IP_);
0b1b213f 2863
61fe135c 2864 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
4e0e6040 2865
1da177e4
LT
2866 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2867 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2868}
2869
1da177e4
LT
2870/*
2871 * Put the block on the freelist for the allocation group.
2872 */
50920116 2873int
1da177e4 2874xfs_alloc_put_freelist(
50920116
DC
2875 struct xfs_trans *tp,
2876 struct xfs_buf *agbp,
2877 struct xfs_buf *agflbp,
2878 xfs_agblock_t bno,
2879 int btreeblk)
1da177e4 2880{
9798f615
CH
2881 struct xfs_mount *mp = tp->t_mountp;
2882 struct xfs_agf *agf = agbp->b_addr;
50920116
DC
2883 struct xfs_perag *pag;
2884 __be32 *blockp;
1da177e4 2885 int error;
f53dde11 2886 uint32_t logflags;
77c95bba
CH
2887 __be32 *agfl_bno;
2888 int startoff;
1da177e4 2889
1da177e4 2890 if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
16259e7d 2891 be32_to_cpu(agf->agf_seqno), &agflbp)))
1da177e4 2892 return error;
413d57c9 2893 be32_add_cpu(&agf->agf_fllast, 1);
a78ee256 2894 if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp))
1da177e4 2895 agf->agf_fllast = 0;
a862e0fd 2896
92a00544 2897 pag = agbp->b_pag;
a27ba260 2898 ASSERT(!pag->pagf_agflreset);
413d57c9 2899 be32_add_cpu(&agf->agf_flcount, 1);
1da177e4 2900 pag->pagf_flcount++;
92821e2b
DC
2901
2902 logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2903 if (btreeblk) {
413d57c9 2904 be32_add_cpu(&agf->agf_btreeblks, -1);
92821e2b
DC
2905 pag->pagf_btreeblks--;
2906 logflags |= XFS_AGF_BTREEBLKS;
2907 }
2908
92821e2b
DC
2909 xfs_alloc_log_agf(tp, agbp, logflags);
2910
a78ee256 2911 ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp));
77c95bba 2912
183606d8 2913 agfl_bno = xfs_buf_to_agfl_bno(agflbp);
77c95bba 2914 blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
e2101005 2915 *blockp = cpu_to_be32(bno);
77c95bba
CH
2916 startoff = (char *)blockp - (char *)agflbp->b_addr;
2917
92821e2b 2918 xfs_alloc_log_agf(tp, agbp, logflags);
77c95bba 2919
61fe135c 2920 xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
77c95bba
CH
2921 xfs_trans_log_buf(tp, agflbp, startoff,
2922 startoff + sizeof(xfs_agblock_t) - 1);
1da177e4
LT
2923 return 0;
2924}
2925
a6a781a5 2926static xfs_failaddr_t
612cfbfe 2927xfs_agf_verify(
b5572597
DW
2928 struct xfs_buf *bp)
2929{
dbd329f1 2930 struct xfs_mount *mp = bp->b_mount;
9798f615 2931 struct xfs_agf *agf = bp->b_addr;
5d5f527d 2932
38c26bfd 2933 if (xfs_has_crc(mp)) {
a45086e2 2934 if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
a6a781a5 2935 return __this_address;
9798f615 2936 if (!xfs_log_check_lsn(mp, be64_to_cpu(agf->agf_lsn)))
a6a781a5 2937 return __this_address;
a45086e2 2938 }
5d5f527d 2939
39708c20
BF
2940 if (!xfs_verify_magic(bp, agf->agf_magicnum))
2941 return __this_address;
2942
2943 if (!(XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
4e0e6040 2944 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
a78ee256
DC
2945 be32_to_cpu(agf->agf_flfirst) < xfs_agfl_size(mp) &&
2946 be32_to_cpu(agf->agf_fllast) < xfs_agfl_size(mp) &&
2947 be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp)))
a6a781a5 2948 return __this_address;
5d5f527d 2949
d0c7feaf
ZB
2950 if (be32_to_cpu(agf->agf_length) > mp->m_sb.sb_dblocks)
2951 return __this_address;
2952
2953 if (be32_to_cpu(agf->agf_freeblks) < be32_to_cpu(agf->agf_longest) ||
2954 be32_to_cpu(agf->agf_freeblks) > be32_to_cpu(agf->agf_length))
2955 return __this_address;
2956
d2a047f3
DW
2957 if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
2958 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
7cb3efb4
DW
2959 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) >
2960 mp->m_alloc_maxlevels ||
2961 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) >
2962 mp->m_alloc_maxlevels)
a6a781a5 2963 return __this_address;
e1b05723 2964
38c26bfd 2965 if (xfs_has_rmapbt(mp) &&
d2a047f3 2966 (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
7cb3efb4
DW
2967 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) >
2968 mp->m_rmap_maxlevels))
a6a781a5 2969 return __this_address;
b8704944 2970
ebd9027d 2971 if (xfs_has_rmapbt(mp) &&
d0c7feaf
ZB
2972 be32_to_cpu(agf->agf_rmap_blocks) > be32_to_cpu(agf->agf_length))
2973 return __this_address;
2974
5d5f527d
DC
2975 /*
2976 * during growfs operations, the perag is not fully initialised,
2977 * so we can't use it for any useful checking. growfs ensures we can't
2978 * use it by using uncached buffers that don't have the perag attached
2979 * so we can detect and avoid this problem.
2980 */
4e0e6040 2981 if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
a6a781a5 2982 return __this_address;
5d5f527d 2983
ebd9027d 2984 if (xfs_has_lazysbcount(mp) &&
4e0e6040 2985 be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
a6a781a5 2986 return __this_address;
4e0e6040 2987
ebd9027d 2988 if (xfs_has_reflink(mp) &&
d0c7feaf
ZB
2989 be32_to_cpu(agf->agf_refcount_blocks) >
2990 be32_to_cpu(agf->agf_length))
2991 return __this_address;
2992
ebd9027d 2993 if (xfs_has_reflink(mp) &&
d2a047f3 2994 (be32_to_cpu(agf->agf_refcount_level) < 1 ||
973975b7 2995 be32_to_cpu(agf->agf_refcount_level) > mp->m_refc_maxlevels))
a6a781a5 2996 return __this_address;
46eeb521 2997
a6a781a5 2998 return NULL;
5d5f527d 2999
612cfbfe
DC
3000}
3001
1813dd64
DC
3002static void
3003xfs_agf_read_verify(
612cfbfe
DC
3004 struct xfs_buf *bp)
3005{
dbd329f1 3006 struct xfs_mount *mp = bp->b_mount;
bc1a09b8 3007 xfs_failaddr_t fa;
4e0e6040 3008
38c26bfd 3009 if (xfs_has_crc(mp) &&
ce5028cf 3010 !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
bc1a09b8
DW
3011 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
3012 else {
b5572597 3013 fa = xfs_agf_verify(bp);
bc1a09b8
DW
3014 if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF))
3015 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
3016 }
612cfbfe 3017}
5d5f527d 3018
b0f539de 3019static void
1813dd64 3020xfs_agf_write_verify(
612cfbfe
DC
3021 struct xfs_buf *bp)
3022{
dbd329f1 3023 struct xfs_mount *mp = bp->b_mount;
fb1755a6 3024 struct xfs_buf_log_item *bip = bp->b_log_item;
9798f615 3025 struct xfs_agf *agf = bp->b_addr;
bc1a09b8 3026 xfs_failaddr_t fa;
4e0e6040 3027
b5572597 3028 fa = xfs_agf_verify(bp);
bc1a09b8
DW
3029 if (fa) {
3030 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
4e0e6040
DC
3031 return;
3032 }
3033
38c26bfd 3034 if (!xfs_has_crc(mp))
4e0e6040
DC
3035 return;
3036
3037 if (bip)
9798f615 3038 agf->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
4e0e6040 3039
f1dbcd7e 3040 xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
5d5f527d
DC
3041}
3042
1813dd64 3043const struct xfs_buf_ops xfs_agf_buf_ops = {
233135b7 3044 .name = "xfs_agf",
39708c20 3045 .magic = { cpu_to_be32(XFS_AGF_MAGIC), cpu_to_be32(XFS_AGF_MAGIC) },
1813dd64
DC
3046 .verify_read = xfs_agf_read_verify,
3047 .verify_write = xfs_agf_write_verify,
b5572597 3048 .verify_struct = xfs_agf_verify,
1813dd64
DC
3049};
3050
1da177e4
LT
3051/*
3052 * Read in the allocation group header (free/alloc section).
3053 */
fa044ae7 3054int
4805621a 3055xfs_read_agf(
fa044ae7
DC
3056 struct xfs_perag *pag,
3057 struct xfs_trans *tp,
3058 int flags,
3059 struct xfs_buf **agfbpp)
1da177e4 3060{
fa044ae7
DC
3061 struct xfs_mount *mp = pag->pag_mount;
3062 int error;
1da177e4 3063
fa044ae7 3064 trace_xfs_read_agf(pag->pag_mount, pag->pag_agno);
d123031a 3065
4ed8e27b 3066 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
fa044ae7
DC
3067 XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGF_DADDR(mp)),
3068 XFS_FSS_TO_BB(mp, 1), flags, agfbpp, &xfs_agf_buf_ops);
1da177e4
LT
3069 if (error)
3070 return error;
4805621a 3071
fa044ae7 3072 xfs_buf_set_ref(*agfbpp, XFS_AGF_REF);
4805621a
CH
3073 return 0;
3074}
3075
3076/*
76b47e52
DC
3077 * Read in the allocation group header (free/alloc section) and initialise the
3078 * perag structure if necessary. If the caller provides @agfbpp, then return the
3079 * locked buffer to the caller, otherwise free it.
4805621a 3080 */
08d3e84f 3081int
4805621a 3082xfs_alloc_read_agf(
08d3e84f
DC
3083 struct xfs_perag *pag,
3084 struct xfs_trans *tp,
3085 int flags,
76b47e52 3086 struct xfs_buf **agfbpp)
4805621a 3087{
76b47e52 3088 struct xfs_buf *agfbp;
08d3e84f 3089 struct xfs_agf *agf;
4805621a 3090 int error;
16eaab83 3091 int allocbt_blks;
4805621a 3092
08d3e84f 3093 trace_xfs_alloc_read_agf(pag->pag_mount, pag->pag_agno);
4805621a 3094
f48e2df8
DW
3095 /* We don't support trylock when freeing. */
3096 ASSERT((flags & (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK)) !=
3097 (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK));
fa044ae7 3098 error = xfs_read_agf(pag, tp,
0cadda1c 3099 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
76b47e52 3100 &agfbp);
4805621a
CH
3101 if (error)
3102 return error;
4805621a 3103
76b47e52 3104 agf = agfbp->b_addr;
1da177e4 3105 if (!pag->pagf_init) {
16259e7d 3106 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
92821e2b 3107 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
16259e7d
CH
3108 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
3109 pag->pagf_longest = be32_to_cpu(agf->agf_longest);
1da177e4 3110 pag->pagf_levels[XFS_BTNUM_BNOi] =
16259e7d 3111 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
1da177e4 3112 pag->pagf_levels[XFS_BTNUM_CNTi] =
16259e7d 3113 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
b8704944
DW
3114 pag->pagf_levels[XFS_BTNUM_RMAPi] =
3115 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
46eeb521 3116 pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
1da177e4 3117 pag->pagf_init = 1;
08d3e84f 3118 pag->pagf_agflreset = xfs_agfl_needs_reset(pag->pag_mount, agf);
16eaab83
BF
3119
3120 /*
3121 * Update the in-core allocbt counter. Filter out the rmapbt
3122 * subset of the btreeblks counter because the rmapbt is managed
3123 * by perag reservation. Subtract one for the rmapbt root block
3124 * because the rmap counter includes it while the btreeblks
3125 * counter only tracks non-root blocks.
3126 */
3127 allocbt_blks = pag->pagf_btreeblks;
08d3e84f 3128 if (xfs_has_rmapbt(pag->pag_mount))
16eaab83
BF
3129 allocbt_blks -= be32_to_cpu(agf->agf_rmap_blocks) - 1;
3130 if (allocbt_blks > 0)
08d3e84f
DC
3131 atomic64_add(allocbt_blks,
3132 &pag->pag_mount->m_allocbt_blks);
1da177e4
LT
3133 }
3134#ifdef DEBUG
08d3e84f 3135 else if (!xfs_is_shutdown(pag->pag_mount)) {
16259e7d 3136 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
89b28393 3137 ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
16259e7d
CH
3138 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
3139 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
1da177e4 3140 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
16259e7d 3141 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
1da177e4 3142 ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
16259e7d 3143 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
1da177e4
LT
3144 }
3145#endif
76b47e52
DC
3146 if (agfbpp)
3147 *agfbpp = agfbp;
3148 else
3149 xfs_trans_brelse(tp, agfbp);
1da177e4
LT
3150 return 0;
3151}
3152
3153/*
3154 * Allocate an extent (variable-size).
3155 * Depending on the allocation type, we either look in a single allocation
3156 * group or loop over the allocation groups to find the result.
3157 */
3158int /* error */
e04426b9 3159xfs_alloc_vextent(
64396ff2 3160 struct xfs_alloc_arg *args) /* allocation argument structure */
1da177e4 3161{
64396ff2
BF
3162 xfs_agblock_t agsize; /* allocation group size */
3163 int error;
3164 int flags; /* XFS_ALLOC_FLAG_... locking flags */
3165 struct xfs_mount *mp; /* mount structure pointer */
3166 xfs_agnumber_t sagno; /* starting allocation group number */
3167 xfs_alloctype_t type; /* input allocation type */
3168 int bump_rotor = 0;
3169 xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */
1da177e4
LT
3170
3171 mp = args->mp;
3172 type = args->otype = args->type;
3173 args->agbno = NULLAGBLOCK;
3174 /*
3175 * Just fix this up, for the case where the last a.g. is shorter
3176 * (or there's only one a.g.) and the caller couldn't easily figure
3177 * that out (xfs_bmap_alloc).
3178 */
3179 agsize = mp->m_sb.sb_agblocks;
3180 if (args->maxlen > agsize)
3181 args->maxlen = agsize;
3182 if (args->alignment == 0)
3183 args->alignment = 1;
3184 ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
3185 ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
3186 ASSERT(args->minlen <= args->maxlen);
3187 ASSERT(args->minlen <= agsize);
3188 ASSERT(args->mod < args->prod);
3189 if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
3190 XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
3191 args->minlen > args->maxlen || args->minlen > agsize ||
3192 args->mod >= args->prod) {
3193 args->fsbno = NULLFSBLOCK;
0b1b213f 3194 trace_xfs_alloc_vextent_badargs(args);
1da177e4
LT
3195 return 0;
3196 }
1da177e4
LT
3197
3198 switch (type) {
3199 case XFS_ALLOCTYPE_THIS_AG:
3200 case XFS_ALLOCTYPE_NEAR_BNO:
3201 case XFS_ALLOCTYPE_THIS_BNO:
3202 /*
3203 * These three force us into a single a.g.
3204 */
3205 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
a862e0fd 3206 args->pag = xfs_perag_get(mp, args->agno);
1da177e4 3207 error = xfs_alloc_fix_freelist(args, 0);
1da177e4 3208 if (error) {
0b1b213f 3209 trace_xfs_alloc_vextent_nofix(args);
1da177e4
LT
3210 goto error0;
3211 }
3212 if (!args->agbp) {
0b1b213f 3213 trace_xfs_alloc_vextent_noagbp(args);
1da177e4
LT
3214 break;
3215 }
3216 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
3217 if ((error = xfs_alloc_ag_vextent(args)))
3218 goto error0;
1da177e4
LT
3219 break;
3220 case XFS_ALLOCTYPE_START_BNO:
3221 /*
3222 * Try near allocation first, then anywhere-in-ag after
3223 * the first a.g. fails.
3224 */
292378ed 3225 if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
2e973b2c 3226 xfs_is_inode32(mp)) {
1da177e4
LT
3227 args->fsbno = XFS_AGB_TO_FSB(mp,
3228 ((mp->m_agfrotor / rotorstep) %
3229 mp->m_sb.sb_agcount), 0);
3230 bump_rotor = 1;
3231 }
3232 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
3233 args->type = XFS_ALLOCTYPE_NEAR_BNO;
53004ee7 3234 fallthrough;
1da177e4
LT
3235 case XFS_ALLOCTYPE_FIRST_AG:
3236 /*
3237 * Rotate through the allocation groups looking for a winner.
3238 */
8d242e93 3239 if (type == XFS_ALLOCTYPE_FIRST_AG) {
1da177e4
LT
3240 /*
3241 * Start with allocation group given by bno.
3242 */
3243 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
3244 args->type = XFS_ALLOCTYPE_THIS_AG;
3245 sagno = 0;
3246 flags = 0;
3247 } else {
1da177e4
LT
3248 /*
3249 * Start with the given allocation group.
3250 */
3251 args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
3252 flags = XFS_ALLOC_FLAG_TRYLOCK;
3253 }
3254 /*
3255 * Loop over allocation groups twice; first time with
3256 * trylock set, second time without.
3257 */
1da177e4 3258 for (;;) {
a862e0fd 3259 args->pag = xfs_perag_get(mp, args->agno);
1da177e4 3260 error = xfs_alloc_fix_freelist(args, flags);
1da177e4 3261 if (error) {
0b1b213f 3262 trace_xfs_alloc_vextent_nofix(args);
1da177e4
LT
3263 goto error0;
3264 }
3265 /*
3266 * If we get a buffer back then the allocation will fly.
3267 */
3268 if (args->agbp) {
3269 if ((error = xfs_alloc_ag_vextent(args)))
3270 goto error0;
3271 break;
3272 }
0b1b213f
CH
3273
3274 trace_xfs_alloc_vextent_loopfailed(args);
3275
1da177e4
LT
3276 /*
3277 * Didn't work, figure out the next iteration.
3278 */
3279 if (args->agno == sagno &&
3280 type == XFS_ALLOCTYPE_START_BNO)
3281 args->type = XFS_ALLOCTYPE_THIS_AG;
d210a28c
YL
3282 /*
3283 * For the first allocation, we can try any AG to get
3284 * space. However, if we already have allocated a
3285 * block, we don't want to try AGs whose number is below
3286 * sagno. Otherwise, we may end up with out-of-order
3287 * locking of AGF, which might cause deadlock.
3288 */
3289 if (++(args->agno) == mp->m_sb.sb_agcount) {
64396ff2 3290 if (args->tp->t_firstblock != NULLFSBLOCK)
d210a28c
YL
3291 args->agno = sagno;
3292 else
3293 args->agno = 0;
3294 }
1da177e4
LT
3295 /*
3296 * Reached the starting a.g., must either be done
3297 * or switch to non-trylock mode.
3298 */
3299 if (args->agno == sagno) {
255c5162 3300 if (flags == 0) {
1da177e4 3301 args->agbno = NULLAGBLOCK;
0b1b213f 3302 trace_xfs_alloc_vextent_allfailed(args);
1da177e4
LT
3303 break;
3304 }
255c5162
CH
3305
3306 flags = 0;
3307 if (type == XFS_ALLOCTYPE_START_BNO) {
3308 args->agbno = XFS_FSB_TO_AGBNO(mp,
3309 args->fsbno);
3310 args->type = XFS_ALLOCTYPE_NEAR_BNO;
1da177e4
LT
3311 }
3312 }
a862e0fd 3313 xfs_perag_put(args->pag);
1da177e4 3314 }
8d242e93 3315 if (bump_rotor) {
1da177e4
LT
3316 if (args->agno == sagno)
3317 mp->m_agfrotor = (mp->m_agfrotor + 1) %
3318 (mp->m_sb.sb_agcount * rotorstep);
3319 else
3320 mp->m_agfrotor = (args->agno * rotorstep + 1) %
3321 (mp->m_sb.sb_agcount * rotorstep);
3322 }
3323 break;
3324 default:
3325 ASSERT(0);
3326 /* NOTREACHED */
3327 }
3328 if (args->agbno == NULLAGBLOCK)
3329 args->fsbno = NULLFSBLOCK;
3330 else {
3331 args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
3332#ifdef DEBUG
3333 ASSERT(args->len >= args->minlen);
3334 ASSERT(args->len <= args->maxlen);
3335 ASSERT(args->agbno % args->alignment == 0);
3336 XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
3337 args->len);
3338#endif
3fbbbea3 3339
1da177e4 3340 }
a862e0fd 3341 xfs_perag_put(args->pag);
1da177e4
LT
3342 return 0;
3343error0:
a862e0fd 3344 xfs_perag_put(args->pag);
1da177e4
LT
3345 return error;
3346}
3347
4d89e20b
DC
3348/* Ensure that the freelist is at full capacity. */
3349int
3350xfs_free_extent_fix_freelist(
3351 struct xfs_trans *tp,
45d06621 3352 struct xfs_perag *pag,
4d89e20b 3353 struct xfs_buf **agbp)
1da177e4 3354{
4d89e20b
DC
3355 struct xfs_alloc_arg args;
3356 int error;
1da177e4 3357
4d89e20b 3358 memset(&args, 0, sizeof(struct xfs_alloc_arg));
1da177e4
LT
3359 args.tp = tp;
3360 args.mp = tp->t_mountp;
45d06621
DC
3361 args.agno = pag->pag_agno;
3362 args.pag = pag;
be65b18a
DC
3363
3364 /*
3365 * validate that the block number is legal - the enables us to detect
3366 * and handle a silent filesystem corruption rather than crashing.
3367 */
be65b18a 3368 if (args.agno >= args.mp->m_sb.sb_agcount)
2451337d 3369 return -EFSCORRUPTED;
be65b18a 3370
be65b18a
DC
3371 error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
3372 if (error)
45d06621 3373 return error;
4d89e20b
DC
3374
3375 *agbp = args.agbp;
45d06621 3376 return 0;
4d89e20b
DC
3377}
3378
3379/*
3380 * Free an extent.
3381 * Just break up the extent address and hand off to xfs_free_ag_extent
3382 * after fixing up the freelist.
3383 */
66e3237e 3384int
fcb762f5 3385__xfs_free_extent(
66e3237e
DW
3386 struct xfs_trans *tp,
3387 xfs_fsblock_t bno,
3388 xfs_extlen_t len,
3389 const struct xfs_owner_info *oinfo,
3390 enum xfs_ag_resv_type type,
3391 bool skip_discard)
4d89e20b 3392{
66e3237e
DW
3393 struct xfs_mount *mp = tp->t_mountp;
3394 struct xfs_buf *agbp;
3395 xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, bno);
3396 xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, bno);
9798f615 3397 struct xfs_agf *agf;
66e3237e
DW
3398 int error;
3399 unsigned int busy_flags = 0;
45d06621 3400 struct xfs_perag *pag;
4d89e20b
DC
3401
3402 ASSERT(len != 0);
0ab32086 3403 ASSERT(type != XFS_AG_RESV_AGFL);
4d89e20b 3404
ba9e7802 3405 if (XFS_TEST_ERROR(false, mp,
9e24cfd0 3406 XFS_ERRTAG_FREE_EXTENT))
ba9e7802
DW
3407 return -EIO;
3408
45d06621
DC
3409 pag = xfs_perag_get(mp, agno);
3410 error = xfs_free_extent_fix_freelist(tp, pag, &agbp);
4d89e20b 3411 if (error)
45d06621 3412 goto err;
9798f615 3413 agf = agbp->b_addr;
4d89e20b 3414
f9e03706
DW
3415 if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) {
3416 error = -EFSCORRUPTED;
45d06621 3417 goto err_release;
f9e03706 3418 }
be65b18a
DC
3419
3420 /* validate the extent size is legal now we have the agf locked */
9798f615 3421 if (XFS_IS_CORRUPT(mp, agbno + len > be32_to_cpu(agf->agf_length))) {
f9e03706 3422 error = -EFSCORRUPTED;
45d06621 3423 goto err_release;
f9e03706 3424 }
be65b18a 3425
3fd129b6 3426 error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type);
4d89e20b 3427 if (error)
45d06621 3428 goto err_release;
4d89e20b 3429
fcb762f5
BF
3430 if (skip_discard)
3431 busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD;
45d06621
DC
3432 xfs_extent_busy_insert(tp, pag, agbno, len, busy_flags);
3433 xfs_perag_put(pag);
4d89e20b
DC
3434 return 0;
3435
45d06621 3436err_release:
4d89e20b 3437 xfs_trans_brelse(tp, agbp);
45d06621
DC
3438err:
3439 xfs_perag_put(pag);
1da177e4
LT
3440 return error;
3441}
2d520bfa
DW
3442
3443struct xfs_alloc_query_range_info {
3444 xfs_alloc_query_range_fn fn;
3445 void *priv;
3446};
3447
3448/* Format btree record and pass to our callback. */
3449STATIC int
3450xfs_alloc_query_range_helper(
3451 struct xfs_btree_cur *cur,
159eb69d 3452 const union xfs_btree_rec *rec,
2d520bfa
DW
3453 void *priv)
3454{
3455 struct xfs_alloc_query_range_info *query = priv;
3456 struct xfs_alloc_rec_incore irec;
3457
3458 irec.ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
3459 irec.ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
3460 return query->fn(cur, &irec, query->priv);
3461}
3462
3463/* Find all free space within a given range of blocks. */
3464int
3465xfs_alloc_query_range(
3466 struct xfs_btree_cur *cur,
04dcb474
DW
3467 const struct xfs_alloc_rec_incore *low_rec,
3468 const struct xfs_alloc_rec_incore *high_rec,
2d520bfa
DW
3469 xfs_alloc_query_range_fn fn,
3470 void *priv)
3471{
3472 union xfs_btree_irec low_brec;
3473 union xfs_btree_irec high_brec;
3474 struct xfs_alloc_query_range_info query;
3475
3476 ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
3477 low_brec.a = *low_rec;
3478 high_brec.a = *high_rec;
3479 query.priv = priv;
3480 query.fn = fn;
3481 return xfs_btree_query_range(cur, &low_brec, &high_brec,
3482 xfs_alloc_query_range_helper, &query);
3483}
e9a2599a
DW
3484
3485/* Find all free space records. */
3486int
3487xfs_alloc_query_all(
3488 struct xfs_btree_cur *cur,
3489 xfs_alloc_query_range_fn fn,
3490 void *priv)
3491{
3492 struct xfs_alloc_query_range_info query;
3493
3494 ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
3495 query.priv = priv;
3496 query.fn = fn;
3497 return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
3498}
21ec5416 3499
ce1d802e
DW
3500/* Is there a record covering a given extent? */
3501int
3502xfs_alloc_has_record(
3503 struct xfs_btree_cur *cur,
3504 xfs_agblock_t bno,
3505 xfs_extlen_t len,
3506 bool *exists)
3507{
3508 union xfs_btree_irec low;
3509 union xfs_btree_irec high;
3510
3511 memset(&low, 0, sizeof(low));
3512 low.a.ar_startblock = bno;
3513 memset(&high, 0xFF, sizeof(high));
3514 high.a.ar_startblock = bno + len - 1;
3515
3516 return xfs_btree_has_record(cur, &low, &high, exists);
3517}
9f3a080e
DW
3518
3519/*
3520 * Walk all the blocks in the AGFL. The @walk_fn can return any negative
5bb46e3e 3521 * error code or XFS_ITER_*.
9f3a080e
DW
3522 */
3523int
3524xfs_agfl_walk(
3525 struct xfs_mount *mp,
3526 struct xfs_agf *agf,
3527 struct xfs_buf *agflbp,
3528 xfs_agfl_walk_fn walk_fn,
3529 void *priv)
3530{
3531 __be32 *agfl_bno;
3532 unsigned int i;
3533 int error;
3534
183606d8 3535 agfl_bno = xfs_buf_to_agfl_bno(agflbp);
9f3a080e
DW
3536 i = be32_to_cpu(agf->agf_flfirst);
3537
3538 /* Nothing to walk in an empty AGFL. */
3539 if (agf->agf_flcount == cpu_to_be32(0))
3540 return 0;
3541
3542 /* Otherwise, walk from first to last, wrapping as needed. */
3543 for (;;) {
3544 error = walk_fn(mp, be32_to_cpu(agfl_bno[i]), priv);
3545 if (error)
3546 return error;
3547 if (i == be32_to_cpu(agf->agf_fllast))
3548 break;
3549 if (++i == xfs_agfl_size(mp))
3550 i = 0;
3551 }
3552
3553 return 0;
3554}
c201d9ca
DW
3555
3556int __init
3557xfs_extfree_intent_init_cache(void)
3558{
3559 xfs_extfree_item_cache = kmem_cache_create("xfs_extfree_intent",
3560 sizeof(struct xfs_extent_free_item),
3561 0, 0, NULL);
3562
3563 return xfs_extfree_item_cache != NULL ? 0 : -ENOMEM;
3564}
3565
3566void
3567xfs_extfree_intent_destroy_cache(void)
3568{
3569 kmem_cache_destroy(xfs_extfree_item_cache);
3570 xfs_extfree_item_cache = NULL;
3571}