]> git.ipfire.org Git - people/ms/linux.git/blame - fs/xfs/libxfs/xfs_alloc.c
Merge branch 'for-6.0/dax' into libnvdimm-fixes
[people/ms/linux.git] / fs / xfs / libxfs / xfs_alloc.c
CommitLineData
0b61f8a4 1// SPDX-License-Identifier: GPL-2.0
1da177e4 2/*
7b718769
NS
3 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
4 * All Rights Reserved.
1da177e4 5 */
1da177e4 6#include "xfs.h"
a844f451 7#include "xfs_fs.h"
70a9883c 8#include "xfs_format.h"
239880ef 9#include "xfs_log_format.h"
70a9883c 10#include "xfs_shared.h"
239880ef 11#include "xfs_trans_resv.h"
a844f451 12#include "xfs_bit.h"
1da177e4 13#include "xfs_mount.h"
3ab78df2 14#include "xfs_defer.h"
1da177e4 15#include "xfs_btree.h"
673930c3 16#include "xfs_rmap.h"
a4fbe6ab 17#include "xfs_alloc_btree.h"
1da177e4 18#include "xfs_alloc.h"
efc27b52 19#include "xfs_extent_busy.h"
e9e899a2 20#include "xfs_errortag.h"
1da177e4 21#include "xfs_error.h"
0b1b213f 22#include "xfs_trace.h"
239880ef 23#include "xfs_trans.h"
4e0e6040 24#include "xfs_buf_item.h"
239880ef 25#include "xfs_log.h"
9bbafc71 26#include "xfs_ag.h"
3fd129b6 27#include "xfs_ag_resv.h"
f8f2835a
BF
28#include "xfs_bmap.h"
29
c201d9ca 30struct kmem_cache *xfs_extfree_item_cache;
1da177e4 31
c999a223 32struct workqueue_struct *xfs_alloc_wq;
1da177e4
LT
33
34#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
35
36#define XFSA_FIXUP_BNO_OK 1
37#define XFSA_FIXUP_CNT_OK 2
38
1da177e4
LT
39STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
40STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
41STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
1da177e4 42
a78ee256
DC
43/*
44 * Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in
45 * the beginning of the block for a proper header with the location information
46 * and CRC.
47 */
48unsigned int
49xfs_agfl_size(
50 struct xfs_mount *mp)
51{
52 unsigned int size = mp->m_sb.sb_sectsize;
53
38c26bfd 54 if (xfs_has_crc(mp))
a78ee256
DC
55 size -= sizeof(struct xfs_agfl);
56
57 return size / sizeof(xfs_agblock_t);
58}
59
af30dfa1
DW
60unsigned int
61xfs_refc_block(
62 struct xfs_mount *mp)
63{
38c26bfd 64 if (xfs_has_rmapbt(mp))
af30dfa1 65 return XFS_RMAP_BLOCK(mp) + 1;
38c26bfd 66 if (xfs_has_finobt(mp))
af30dfa1
DW
67 return XFS_FIBT_BLOCK(mp) + 1;
68 return XFS_IBT_BLOCK(mp) + 1;
69}
70
8018026e
DW
71xfs_extlen_t
72xfs_prealloc_blocks(
73 struct xfs_mount *mp)
74{
38c26bfd 75 if (xfs_has_reflink(mp))
af30dfa1 76 return xfs_refc_block(mp) + 1;
38c26bfd 77 if (xfs_has_rmapbt(mp))
8018026e 78 return XFS_RMAP_BLOCK(mp) + 1;
38c26bfd 79 if (xfs_has_finobt(mp))
8018026e
DW
80 return XFS_FIBT_BLOCK(mp) + 1;
81 return XFS_IBT_BLOCK(mp) + 1;
82}
83
52548852 84/*
93defd5a
DW
85 * The number of blocks per AG that we withhold from xfs_mod_fdblocks to
86 * guarantee that we can refill the AGFL prior to allocating space in a nearly
4869b6e8 87 * full AG. Although the space described by the free space btrees, the
93defd5a
DW
88 * blocks used by the freesp btrees themselves, and the blocks owned by the
89 * AGFL are counted in the ondisk fdblocks, it's a mistake to let the ondisk
90 * free space in the AG drop so low that the free space btrees cannot refill an
91 * empty AGFL up to the minimum level. Rather than grind through empty AGs
92 * until the fs goes down, we subtract this many AG blocks from the incore
93 * fdblocks to ensure user allocation does not overcommit the space the
94 * filesystem needs for the AGFLs. The rmap btree uses a per-AG reservation to
95 * withhold space from xfs_mod_fdblocks, so we do not account for that here.
96 */
97#define XFS_ALLOCBT_AGFL_RESERVE 4
98
99/*
100 * Compute the number of blocks that we set aside to guarantee the ability to
101 * refill the AGFL and handle a full bmap btree split.
102 *
52548852
DW
103 * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
104 * AGF buffer (PV 947395), we place constraints on the relationship among
105 * actual allocations for data blocks, freelist blocks, and potential file data
106 * bmap btree blocks. However, these restrictions may result in no actual space
107 * allocated for a delayed extent, for example, a data block in a certain AG is
108 * allocated but there is no additional block for the additional bmap btree
109 * block due to a split of the bmap btree of the file. The result of this may
110 * lead to an infinite loop when the file gets flushed to disk and all delayed
111 * extents need to be actually allocated. To get around this, we explicitly set
112 * aside a few blocks which will not be reserved in delayed allocation.
113 *
93defd5a
DW
114 * For each AG, we need to reserve enough blocks to replenish a totally empty
115 * AGFL and 4 more to handle a potential split of the file's bmap btree.
52548852
DW
116 */
117unsigned int
118xfs_alloc_set_aside(
119 struct xfs_mount *mp)
120{
93defd5a 121 return mp->m_sb.sb_agcount * (XFS_ALLOCBT_AGFL_RESERVE + 4);
52548852
DW
122}
123
124/*
125 * When deciding how much space to allocate out of an AG, we limit the
126 * allocation maximum size to the size the AG. However, we cannot use all the
127 * blocks in the AG - some are permanently used by metadata. These
128 * blocks are generally:
129 * - the AG superblock, AGF, AGI and AGFL
130 * - the AGF (bno and cnt) and AGI btree root blocks, and optionally
131 * the AGI free inode and rmap btree root blocks.
132 * - blocks on the AGFL according to xfs_alloc_set_aside() limits
133 * - the rmapbt root block
134 *
135 * The AG headers are sector sized, so the amount of space they take up is
136 * dependent on filesystem geometry. The others are all single blocks.
137 */
138unsigned int
139xfs_alloc_ag_max_usable(
140 struct xfs_mount *mp)
141{
142 unsigned int blocks;
143
144 blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
93defd5a 145 blocks += XFS_ALLOCBT_AGFL_RESERVE;
52548852 146 blocks += 3; /* AGF, AGI btree root blocks */
38c26bfd 147 if (xfs_has_finobt(mp))
52548852 148 blocks++; /* finobt root block */
38c26bfd 149 if (xfs_has_rmapbt(mp))
93defd5a 150 blocks++; /* rmap root block */
38c26bfd 151 if (xfs_has_reflink(mp))
d0e853f3 152 blocks++; /* refcount root block */
52548852
DW
153
154 return mp->m_sb.sb_agblocks - blocks;
155}
156
fe033cc8
CH
157/*
158 * Lookup the record equal to [bno, len] in the btree given by cur.
159 */
160STATIC int /* error */
161xfs_alloc_lookup_eq(
162 struct xfs_btree_cur *cur, /* btree cursor */
163 xfs_agblock_t bno, /* starting block of extent */
164 xfs_extlen_t len, /* length of extent */
165 int *stat) /* success/failure */
166{
f6b428a4
BF
167 int error;
168
fe033cc8
CH
169 cur->bc_rec.a.ar_startblock = bno;
170 cur->bc_rec.a.ar_blockcount = len;
f6b428a4 171 error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
c4aa10d0 172 cur->bc_ag.abt.active = (*stat == 1);
f6b428a4 173 return error;
fe033cc8
CH
174}
175
176/*
177 * Lookup the first record greater than or equal to [bno, len]
178 * in the btree given by cur.
179 */
a66d6363 180int /* error */
fe033cc8
CH
181xfs_alloc_lookup_ge(
182 struct xfs_btree_cur *cur, /* btree cursor */
183 xfs_agblock_t bno, /* starting block of extent */
184 xfs_extlen_t len, /* length of extent */
185 int *stat) /* success/failure */
186{
f6b428a4
BF
187 int error;
188
fe033cc8
CH
189 cur->bc_rec.a.ar_startblock = bno;
190 cur->bc_rec.a.ar_blockcount = len;
f6b428a4 191 error = xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
c4aa10d0 192 cur->bc_ag.abt.active = (*stat == 1);
f6b428a4 193 return error;
fe033cc8
CH
194}
195
196/*
197 * Lookup the first record less than or equal to [bno, len]
198 * in the btree given by cur.
199 */
ce1d802e 200int /* error */
fe033cc8
CH
201xfs_alloc_lookup_le(
202 struct xfs_btree_cur *cur, /* btree cursor */
203 xfs_agblock_t bno, /* starting block of extent */
204 xfs_extlen_t len, /* length of extent */
205 int *stat) /* success/failure */
206{
f6b428a4 207 int error;
fe033cc8
CH
208 cur->bc_rec.a.ar_startblock = bno;
209 cur->bc_rec.a.ar_blockcount = len;
f6b428a4 210 error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
c4aa10d0 211 cur->bc_ag.abt.active = (*stat == 1);
f6b428a4
BF
212 return error;
213}
214
215static inline bool
216xfs_alloc_cur_active(
217 struct xfs_btree_cur *cur)
218{
c4aa10d0 219 return cur && cur->bc_ag.abt.active;
fe033cc8
CH
220}
221
278d0ca1
CH
222/*
223 * Update the record referred to by cur to the value given
224 * by [bno, len].
225 * This either works (return 0) or gets an EFSCORRUPTED error.
226 */
227STATIC int /* error */
228xfs_alloc_update(
229 struct xfs_btree_cur *cur, /* btree cursor */
230 xfs_agblock_t bno, /* starting block of extent */
231 xfs_extlen_t len) /* length of extent */
232{
233 union xfs_btree_rec rec;
234
235 rec.alloc.ar_startblock = cpu_to_be32(bno);
236 rec.alloc.ar_blockcount = cpu_to_be32(len);
237 return xfs_btree_update(cur, &rec);
238}
fe033cc8 239
8cc938fe
CH
240/*
241 * Get the data from the pointed-to record.
242 */
a46db608 243int /* error */
8cc938fe
CH
244xfs_alloc_get_rec(
245 struct xfs_btree_cur *cur, /* btree cursor */
246 xfs_agblock_t *bno, /* output: starting block of extent */
247 xfs_extlen_t *len, /* output: length of extent */
248 int *stat) /* output: success/failure */
249{
9e6c08d4 250 struct xfs_mount *mp = cur->bc_mp;
0800169e 251 struct xfs_perag *pag = cur->bc_ag.pag;
8cc938fe
CH
252 union xfs_btree_rec *rec;
253 int error;
254
255 error = xfs_btree_get_rec(cur, &rec, stat);
a37f7b12
DW
256 if (error || !(*stat))
257 return error;
a37f7b12
DW
258
259 *bno = be32_to_cpu(rec->alloc.ar_startblock);
260 *len = be32_to_cpu(rec->alloc.ar_blockcount);
261
efe80327
CM
262 if (*len == 0)
263 goto out_bad_rec;
264
9e6c08d4 265 /* check for valid extent range, including overflow */
0800169e 266 if (!xfs_verify_agbno(pag, *bno))
9e6c08d4
DC
267 goto out_bad_rec;
268 if (*bno > *bno + *len)
269 goto out_bad_rec;
0800169e 270 if (!xfs_verify_agbno(pag, *bno + *len - 1))
9e6c08d4
DC
271 goto out_bad_rec;
272
273 return 0;
274
275out_bad_rec:
276 xfs_warn(mp,
277 "%s Freespace BTree record corruption in AG %d detected!",
0800169e
DC
278 cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size",
279 pag->pag_agno);
9e6c08d4
DC
280 xfs_warn(mp,
281 "start block 0x%x block count 0x%x", *bno, *len);
282 return -EFSCORRUPTED;
8cc938fe
CH
283}
284
1da177e4
LT
285/*
286 * Compute aligned version of the found extent.
287 * Takes alignment and min length into account.
288 */
ebf55872 289STATIC bool
1da177e4 290xfs_alloc_compute_aligned(
86fa8af6 291 xfs_alloc_arg_t *args, /* allocation argument structure */
1da177e4
LT
292 xfs_agblock_t foundbno, /* starting block in found extent */
293 xfs_extlen_t foundlen, /* length in found extent */
1da177e4 294 xfs_agblock_t *resbno, /* result block number */
ebf55872
CH
295 xfs_extlen_t *reslen, /* result length */
296 unsigned *busy_gen)
1da177e4 297{
ebf55872
CH
298 xfs_agblock_t bno = foundbno;
299 xfs_extlen_t len = foundlen;
bfe46d4e 300 xfs_extlen_t diff;
ebf55872 301 bool busy;
1da177e4 302
e26f0501 303 /* Trim busy sections out of found extent */
ebf55872 304 busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen);
e26f0501 305
bfe46d4e
BF
306 /*
307 * If we have a largish extent that happens to start before min_agbno,
308 * see if we can shift it into range...
309 */
310 if (bno < args->min_agbno && bno + len > args->min_agbno) {
311 diff = args->min_agbno - bno;
312 if (len > diff) {
313 bno += diff;
314 len -= diff;
315 }
316 }
317
e26f0501
CH
318 if (args->alignment > 1 && len >= args->minlen) {
319 xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
bfe46d4e
BF
320
321 diff = aligned_bno - bno;
e26f0501
CH
322
323 *resbno = aligned_bno;
324 *reslen = diff >= len ? 0 : len - diff;
1da177e4 325 } else {
e26f0501
CH
326 *resbno = bno;
327 *reslen = len;
1da177e4 328 }
ebf55872
CH
329
330 return busy;
1da177e4
LT
331}
332
333/*
334 * Compute best start block and diff for "near" allocations.
335 * freelen >= wantlen already checked by caller.
336 */
337STATIC xfs_extlen_t /* difference value (absolute) */
338xfs_alloc_compute_diff(
339 xfs_agblock_t wantbno, /* target starting block */
340 xfs_extlen_t wantlen, /* target length */
341 xfs_extlen_t alignment, /* target alignment */
292378ed 342 int datatype, /* are we allocating data? */
1da177e4
LT
343 xfs_agblock_t freebno, /* freespace's starting block */
344 xfs_extlen_t freelen, /* freespace's length */
345 xfs_agblock_t *newbnop) /* result: best start block from free */
346{
347 xfs_agblock_t freeend; /* end of freespace extent */
348 xfs_agblock_t newbno1; /* return block number */
349 xfs_agblock_t newbno2; /* other new block number */
350 xfs_extlen_t newlen1=0; /* length with newbno1 */
351 xfs_extlen_t newlen2=0; /* length with newbno2 */
352 xfs_agblock_t wantend; /* end of target extent */
c34d570d 353 bool userdata = datatype & XFS_ALLOC_USERDATA;
1da177e4
LT
354
355 ASSERT(freelen >= wantlen);
356 freeend = freebno + freelen;
357 wantend = wantbno + wantlen;
211d022c
JK
358 /*
359 * We want to allocate from the start of a free extent if it is past
360 * the desired block or if we are allocating user data and the free
361 * extent is before desired block. The second case is there to allow
362 * for contiguous allocation from the remaining free space if the file
363 * grows in the short term.
364 */
365 if (freebno >= wantbno || (userdata && freeend < wantend)) {
1da177e4
LT
366 if ((newbno1 = roundup(freebno, alignment)) >= freeend)
367 newbno1 = NULLAGBLOCK;
368 } else if (freeend >= wantend && alignment > 1) {
369 newbno1 = roundup(wantbno, alignment);
370 newbno2 = newbno1 - alignment;
371 if (newbno1 >= freeend)
372 newbno1 = NULLAGBLOCK;
373 else
374 newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
375 if (newbno2 < freebno)
376 newbno2 = NULLAGBLOCK;
377 else
378 newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
379 if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
380 if (newlen1 < newlen2 ||
381 (newlen1 == newlen2 &&
382 XFS_ABSDIFF(newbno1, wantbno) >
383 XFS_ABSDIFF(newbno2, wantbno)))
384 newbno1 = newbno2;
385 } else if (newbno2 != NULLAGBLOCK)
386 newbno1 = newbno2;
387 } else if (freeend >= wantend) {
388 newbno1 = wantbno;
389 } else if (alignment > 1) {
390 newbno1 = roundup(freeend - wantlen, alignment);
391 if (newbno1 > freeend - wantlen &&
392 newbno1 - alignment >= freebno)
393 newbno1 -= alignment;
394 else if (newbno1 >= freeend)
395 newbno1 = NULLAGBLOCK;
396 } else
397 newbno1 = freeend - wantlen;
398 *newbnop = newbno1;
399 return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
400}
401
402/*
403 * Fix up the length, based on mod and prod.
404 * len should be k * prod + mod for some k.
405 * If len is too small it is returned unchanged.
406 * If len hits maxlen it is left alone.
407 */
408STATIC void
409xfs_alloc_fix_len(
410 xfs_alloc_arg_t *args) /* allocation argument structure */
411{
412 xfs_extlen_t k;
413 xfs_extlen_t rlen;
414
415 ASSERT(args->mod < args->prod);
416 rlen = args->len;
417 ASSERT(rlen >= args->minlen);
418 ASSERT(rlen <= args->maxlen);
419 if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
420 (args->mod == 0 && rlen < args->prod))
421 return;
422 k = rlen % args->prod;
423 if (k == args->mod)
424 return;
30265117
JK
425 if (k > args->mod)
426 rlen = rlen - (k - args->mod);
427 else
428 rlen = rlen - args->prod + (args->mod - k);
3790a8cd 429 /* casts to (int) catch length underflows */
30265117
JK
430 if ((int)rlen < (int)args->minlen)
431 return;
432 ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
433 ASSERT(rlen % args->prod == args->mod);
54fee133
CH
434 ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >=
435 rlen + args->minleft);
1da177e4
LT
436 args->len = rlen;
437}
438
1da177e4
LT
439/*
440 * Update the two btrees, logically removing from freespace the extent
441 * starting at rbno, rlen blocks. The extent is contained within the
442 * actual (current) free extent fbno for flen blocks.
443 * Flags are passed in indicating whether the cursors are set to the
444 * relevant records.
445 */
446STATIC int /* error code */
447xfs_alloc_fixup_trees(
ae127f08
DW
448 struct xfs_btree_cur *cnt_cur, /* cursor for by-size btree */
449 struct xfs_btree_cur *bno_cur, /* cursor for by-block btree */
1da177e4
LT
450 xfs_agblock_t fbno, /* starting block of free extent */
451 xfs_extlen_t flen, /* length of free extent */
452 xfs_agblock_t rbno, /* starting block of returned extent */
453 xfs_extlen_t rlen, /* length of returned extent */
454 int flags) /* flags, XFSA_FIXUP_... */
455{
456 int error; /* error code */
457 int i; /* operation results */
458 xfs_agblock_t nfbno1; /* first new free startblock */
459 xfs_agblock_t nfbno2; /* second new free startblock */
460 xfs_extlen_t nflen1=0; /* first new free length */
461 xfs_extlen_t nflen2=0; /* second new free length */
5fb5aeee
ES
462 struct xfs_mount *mp;
463
464 mp = cnt_cur->bc_mp;
1da177e4
LT
465
466 /*
467 * Look up the record in the by-size tree if necessary.
468 */
469 if (flags & XFSA_FIXUP_CNT_OK) {
470#ifdef DEBUG
471 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
472 return error;
f9e03706
DW
473 if (XFS_IS_CORRUPT(mp,
474 i != 1 ||
475 nfbno1 != fbno ||
476 nflen1 != flen))
477 return -EFSCORRUPTED;
1da177e4
LT
478#endif
479 } else {
480 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
481 return error;
f9e03706
DW
482 if (XFS_IS_CORRUPT(mp, i != 1))
483 return -EFSCORRUPTED;
1da177e4
LT
484 }
485 /*
486 * Look up the record in the by-block tree if necessary.
487 */
488 if (flags & XFSA_FIXUP_BNO_OK) {
489#ifdef DEBUG
490 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
491 return error;
f9e03706
DW
492 if (XFS_IS_CORRUPT(mp,
493 i != 1 ||
494 nfbno1 != fbno ||
495 nflen1 != flen))
496 return -EFSCORRUPTED;
1da177e4
LT
497#endif
498 } else {
499 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
500 return error;
f9e03706
DW
501 if (XFS_IS_CORRUPT(mp, i != 1))
502 return -EFSCORRUPTED;
1da177e4 503 }
7cc95a82 504
1da177e4 505#ifdef DEBUG
7cc95a82
CH
506 if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
507 struct xfs_btree_block *bnoblock;
508 struct xfs_btree_block *cntblock;
509
6ca444cf
DW
510 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_levels[0].bp);
511 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_levels[0].bp);
1da177e4 512
f9e03706
DW
513 if (XFS_IS_CORRUPT(mp,
514 bnoblock->bb_numrecs !=
515 cntblock->bb_numrecs))
516 return -EFSCORRUPTED;
1da177e4
LT
517 }
518#endif
7cc95a82 519
1da177e4
LT
520 /*
521 * Deal with all four cases: the allocated record is contained
522 * within the freespace record, so we can have new freespace
523 * at either (or both) end, or no freespace remaining.
524 */
525 if (rbno == fbno && rlen == flen)
526 nfbno1 = nfbno2 = NULLAGBLOCK;
527 else if (rbno == fbno) {
528 nfbno1 = rbno + rlen;
529 nflen1 = flen - rlen;
530 nfbno2 = NULLAGBLOCK;
531 } else if (rbno + rlen == fbno + flen) {
532 nfbno1 = fbno;
533 nflen1 = flen - rlen;
534 nfbno2 = NULLAGBLOCK;
535 } else {
536 nfbno1 = fbno;
537 nflen1 = rbno - fbno;
538 nfbno2 = rbno + rlen;
539 nflen2 = (fbno + flen) - nfbno2;
540 }
541 /*
542 * Delete the entry from the by-size btree.
543 */
91cca5df 544 if ((error = xfs_btree_delete(cnt_cur, &i)))
1da177e4 545 return error;
f9e03706
DW
546 if (XFS_IS_CORRUPT(mp, i != 1))
547 return -EFSCORRUPTED;
1da177e4
LT
548 /*
549 * Add new by-size btree entry(s).
550 */
551 if (nfbno1 != NULLAGBLOCK) {
552 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
553 return error;
f9e03706
DW
554 if (XFS_IS_CORRUPT(mp, i != 0))
555 return -EFSCORRUPTED;
4b22a571 556 if ((error = xfs_btree_insert(cnt_cur, &i)))
1da177e4 557 return error;
f9e03706
DW
558 if (XFS_IS_CORRUPT(mp, i != 1))
559 return -EFSCORRUPTED;
1da177e4
LT
560 }
561 if (nfbno2 != NULLAGBLOCK) {
562 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
563 return error;
f9e03706
DW
564 if (XFS_IS_CORRUPT(mp, i != 0))
565 return -EFSCORRUPTED;
4b22a571 566 if ((error = xfs_btree_insert(cnt_cur, &i)))
1da177e4 567 return error;
f9e03706
DW
568 if (XFS_IS_CORRUPT(mp, i != 1))
569 return -EFSCORRUPTED;
1da177e4
LT
570 }
571 /*
572 * Fix up the by-block btree entry(s).
573 */
574 if (nfbno1 == NULLAGBLOCK) {
575 /*
576 * No remaining freespace, just delete the by-block tree entry.
577 */
91cca5df 578 if ((error = xfs_btree_delete(bno_cur, &i)))
1da177e4 579 return error;
f9e03706
DW
580 if (XFS_IS_CORRUPT(mp, i != 1))
581 return -EFSCORRUPTED;
1da177e4
LT
582 } else {
583 /*
584 * Update the by-block entry to start later|be shorter.
585 */
586 if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
587 return error;
588 }
589 if (nfbno2 != NULLAGBLOCK) {
590 /*
591 * 2 resulting free entries, need to add one.
592 */
593 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
594 return error;
f9e03706
DW
595 if (XFS_IS_CORRUPT(mp, i != 0))
596 return -EFSCORRUPTED;
4b22a571 597 if ((error = xfs_btree_insert(bno_cur, &i)))
1da177e4 598 return error;
f9e03706
DW
599 if (XFS_IS_CORRUPT(mp, i != 1))
600 return -EFSCORRUPTED;
1da177e4
LT
601 }
602 return 0;
603}
604
a6a781a5 605static xfs_failaddr_t
612cfbfe 606xfs_agfl_verify(
bb80c6d7
DC
607 struct xfs_buf *bp)
608{
dbd329f1 609 struct xfs_mount *mp = bp->b_mount;
bb80c6d7 610 struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
183606d8 611 __be32 *agfl_bno = xfs_buf_to_agfl_bno(bp);
bb80c6d7
DC
612 int i;
613
b5572597
DW
614 /*
615 * There is no verification of non-crc AGFLs because mkfs does not
616 * initialise the AGFL to zero or NULL. Hence the only valid part of the
617 * AGFL is what the AGF says is active. We can't get to the AGF, so we
618 * can't verify just those entries are valid.
619 */
38c26bfd 620 if (!xfs_has_crc(mp))
b5572597
DW
621 return NULL;
622
39708c20 623 if (!xfs_verify_magic(bp, agfl->agfl_magicnum))
a6a781a5 624 return __this_address;
39708c20 625 if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
a6a781a5 626 return __this_address;
77c95bba
CH
627 /*
628 * during growfs operations, the perag is not fully initialised,
629 * so we can't use it for any useful checking. growfs ensures we can't
630 * use it by using uncached buffers that don't have the perag attached
631 * so we can detect and avoid this problem.
632 */
633 if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
a6a781a5 634 return __this_address;
77c95bba 635
a78ee256 636 for (i = 0; i < xfs_agfl_size(mp); i++) {
183606d8
CH
637 if (be32_to_cpu(agfl_bno[i]) != NULLAGBLOCK &&
638 be32_to_cpu(agfl_bno[i]) >= mp->m_sb.sb_agblocks)
a6a781a5 639 return __this_address;
bb80c6d7 640 }
a45086e2 641
a6a781a5
DW
642 if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)))
643 return __this_address;
644 return NULL;
77c95bba
CH
645}
646
647static void
648xfs_agfl_read_verify(
649 struct xfs_buf *bp)
650{
dbd329f1 651 struct xfs_mount *mp = bp->b_mount;
bc1a09b8 652 xfs_failaddr_t fa;
77c95bba
CH
653
654 /*
655 * There is no verification of non-crc AGFLs because mkfs does not
656 * initialise the AGFL to zero or NULL. Hence the only valid part of the
657 * AGFL is what the AGF says is active. We can't get to the AGF, so we
658 * can't verify just those entries are valid.
659 */
38c26bfd 660 if (!xfs_has_crc(mp))
77c95bba
CH
661 return;
662
ce5028cf 663 if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
bc1a09b8
DW
664 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
665 else {
666 fa = xfs_agfl_verify(bp);
667 if (fa)
668 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
669 }
612cfbfe
DC
670}
671
1813dd64 672static void
612cfbfe
DC
673xfs_agfl_write_verify(
674 struct xfs_buf *bp)
675{
dbd329f1 676 struct xfs_mount *mp = bp->b_mount;
fb1755a6 677 struct xfs_buf_log_item *bip = bp->b_log_item;
bc1a09b8 678 xfs_failaddr_t fa;
612cfbfe 679
77c95bba 680 /* no verification of non-crc AGFLs */
38c26bfd 681 if (!xfs_has_crc(mp))
77c95bba
CH
682 return;
683
bc1a09b8
DW
684 fa = xfs_agfl_verify(bp);
685 if (fa) {
686 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
77c95bba
CH
687 return;
688 }
689
690 if (bip)
691 XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
692
f1dbcd7e 693 xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
bb80c6d7
DC
694}
695
1813dd64 696const struct xfs_buf_ops xfs_agfl_buf_ops = {
233135b7 697 .name = "xfs_agfl",
39708c20 698 .magic = { cpu_to_be32(XFS_AGFL_MAGIC), cpu_to_be32(XFS_AGFL_MAGIC) },
1813dd64
DC
699 .verify_read = xfs_agfl_read_verify,
700 .verify_write = xfs_agfl_write_verify,
b5572597 701 .verify_struct = xfs_agfl_verify,
1813dd64
DC
702};
703
1da177e4
LT
704/*
705 * Read in the allocation group free block array.
706 */
cec7bb7d 707int
1da177e4 708xfs_alloc_read_agfl(
cec7bb7d
DC
709 struct xfs_perag *pag,
710 struct xfs_trans *tp,
711 struct xfs_buf **bpp)
1da177e4 712{
cec7bb7d
DC
713 struct xfs_mount *mp = pag->pag_mount;
714 struct xfs_buf *bp;
715 int error;
1da177e4 716
1da177e4
LT
717 error = xfs_trans_read_buf(
718 mp, tp, mp->m_ddev_targp,
cec7bb7d 719 XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGFL_DADDR(mp)),
1813dd64 720 XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
1da177e4
LT
721 if (error)
722 return error;
38f23232 723 xfs_buf_set_ref(bp, XFS_AGFL_REF);
1da177e4
LT
724 *bpp = bp;
725 return 0;
726}
727
ecb6928f
CH
728STATIC int
729xfs_alloc_update_counters(
730 struct xfs_trans *tp,
ecb6928f
CH
731 struct xfs_buf *agbp,
732 long len)
733{
9798f615 734 struct xfs_agf *agf = agbp->b_addr;
ecb6928f 735
92a00544 736 agbp->b_pag->pagf_freeblks += len;
ecb6928f
CH
737 be32_add_cpu(&agf->agf_freeblks, len);
738
ecb6928f 739 if (unlikely(be32_to_cpu(agf->agf_freeblks) >
a5155b87 740 be32_to_cpu(agf->agf_length))) {
8d57c216 741 xfs_buf_mark_corrupt(agbp);
2451337d 742 return -EFSCORRUPTED;
a5155b87 743 }
ecb6928f
CH
744
745 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
746 return 0;
747}
748
1da177e4 749/*
f5e7dbea 750 * Block allocation algorithm and data structures.
1da177e4 751 */
f5e7dbea
BF
752struct xfs_alloc_cur {
753 struct xfs_btree_cur *cnt; /* btree cursors */
754 struct xfs_btree_cur *bnolt;
755 struct xfs_btree_cur *bnogt;
dc8e69bd 756 xfs_extlen_t cur_len;/* current search length */
c62321a2
BF
757 xfs_agblock_t rec_bno;/* extent startblock */
758 xfs_extlen_t rec_len;/* extent length */
759 xfs_agblock_t bno; /* alloc bno */
760 xfs_extlen_t len; /* alloc len */
761 xfs_extlen_t diff; /* diff from search bno */
d6d3aff2
BF
762 unsigned int busy_gen;/* busy state */
763 bool busy;
f5e7dbea
BF
764};
765
766/*
767 * Set up cursors, etc. in the extent allocation cursor. This function can be
768 * called multiple times to reset an initialized structure without having to
769 * reallocate cursors.
770 */
771static int
772xfs_alloc_cur_setup(
773 struct xfs_alloc_arg *args,
774 struct xfs_alloc_cur *acur)
775{
776 int error;
777 int i;
778
779 ASSERT(args->alignment == 1 || args->type != XFS_ALLOCTYPE_THIS_BNO);
780
dc8e69bd 781 acur->cur_len = args->maxlen;
c62321a2
BF
782 acur->rec_bno = 0;
783 acur->rec_len = 0;
784 acur->bno = 0;
785 acur->len = 0;
396bbf3c 786 acur->diff = -1;
d6d3aff2
BF
787 acur->busy = false;
788 acur->busy_gen = 0;
789
f5e7dbea
BF
790 /*
791 * Perform an initial cntbt lookup to check for availability of maxlen
792 * extents. If this fails, we'll return -ENOSPC to signal the caller to
793 * attempt a small allocation.
794 */
795 if (!acur->cnt)
796 acur->cnt = xfs_allocbt_init_cursor(args->mp, args->tp,
289d38d2 797 args->agbp, args->pag, XFS_BTNUM_CNT);
f5e7dbea
BF
798 error = xfs_alloc_lookup_ge(acur->cnt, 0, args->maxlen, &i);
799 if (error)
800 return error;
801
802 /*
803 * Allocate the bnobt left and right search cursors.
804 */
805 if (!acur->bnolt)
806 acur->bnolt = xfs_allocbt_init_cursor(args->mp, args->tp,
289d38d2 807 args->agbp, args->pag, XFS_BTNUM_BNO);
f5e7dbea
BF
808 if (!acur->bnogt)
809 acur->bnogt = xfs_allocbt_init_cursor(args->mp, args->tp,
289d38d2 810 args->agbp, args->pag, XFS_BTNUM_BNO);
f5e7dbea
BF
811 return i == 1 ? 0 : -ENOSPC;
812}
813
814static void
815xfs_alloc_cur_close(
816 struct xfs_alloc_cur *acur,
817 bool error)
818{
819 int cur_error = XFS_BTREE_NOERROR;
820
821 if (error)
822 cur_error = XFS_BTREE_ERROR;
823
824 if (acur->cnt)
825 xfs_btree_del_cursor(acur->cnt, cur_error);
826 if (acur->bnolt)
827 xfs_btree_del_cursor(acur->bnolt, cur_error);
828 if (acur->bnogt)
829 xfs_btree_del_cursor(acur->bnogt, cur_error);
830 acur->cnt = acur->bnolt = acur->bnogt = NULL;
831}
1da177e4 832
396bbf3c
BF
833/*
834 * Check an extent for allocation and track the best available candidate in the
835 * allocation structure. The cursor is deactivated if it has entered an out of
836 * range state based on allocation arguments. Optionally return the extent
837 * extent geometry and allocation status if requested by the caller.
838 */
839static int
840xfs_alloc_cur_check(
841 struct xfs_alloc_arg *args,
842 struct xfs_alloc_cur *acur,
843 struct xfs_btree_cur *cur,
844 int *new)
845{
846 int error, i;
847 xfs_agblock_t bno, bnoa, bnew;
848 xfs_extlen_t len, lena, diff = -1;
849 bool busy;
850 unsigned busy_gen = 0;
851 bool deactivate = false;
fec0afda 852 bool isbnobt = cur->bc_btnum == XFS_BTNUM_BNO;
396bbf3c
BF
853
854 *new = 0;
855
856 error = xfs_alloc_get_rec(cur, &bno, &len, &i);
857 if (error)
858 return error;
f9e03706
DW
859 if (XFS_IS_CORRUPT(args->mp, i != 1))
860 return -EFSCORRUPTED;
396bbf3c
BF
861
862 /*
863 * Check minlen and deactivate a cntbt cursor if out of acceptable size
864 * range (i.e., walking backwards looking for a minlen extent).
865 */
866 if (len < args->minlen) {
fec0afda 867 deactivate = !isbnobt;
396bbf3c
BF
868 goto out;
869 }
870
871 busy = xfs_alloc_compute_aligned(args, bno, len, &bnoa, &lena,
872 &busy_gen);
873 acur->busy |= busy;
874 if (busy)
875 acur->busy_gen = busy_gen;
876 /* deactivate a bnobt cursor outside of locality range */
fec0afda
BF
877 if (bnoa < args->min_agbno || bnoa > args->max_agbno) {
878 deactivate = isbnobt;
396bbf3c 879 goto out;
fec0afda 880 }
396bbf3c
BF
881 if (lena < args->minlen)
882 goto out;
883
884 args->len = XFS_EXTLEN_MIN(lena, args->maxlen);
885 xfs_alloc_fix_len(args);
886 ASSERT(args->len >= args->minlen);
887 if (args->len < acur->len)
888 goto out;
889
890 /*
891 * We have an aligned record that satisfies minlen and beats or matches
892 * the candidate extent size. Compare locality for near allocation mode.
893 */
894 ASSERT(args->type == XFS_ALLOCTYPE_NEAR_BNO);
895 diff = xfs_alloc_compute_diff(args->agbno, args->len,
896 args->alignment, args->datatype,
897 bnoa, lena, &bnew);
898 if (bnew == NULLAGBLOCK)
899 goto out;
fec0afda
BF
900
901 /*
902 * Deactivate a bnobt cursor with worse locality than the current best.
903 */
904 if (diff > acur->diff) {
905 deactivate = isbnobt;
396bbf3c 906 goto out;
fec0afda 907 }
396bbf3c
BF
908
909 ASSERT(args->len > acur->len ||
910 (args->len == acur->len && diff <= acur->diff));
911 acur->rec_bno = bno;
912 acur->rec_len = len;
913 acur->bno = bnew;
914 acur->len = args->len;
915 acur->diff = diff;
916 *new = 1;
917
78d7aabd
BF
918 /*
919 * We're done if we found a perfect allocation. This only deactivates
920 * the current cursor, but this is just an optimization to terminate a
921 * cntbt search that otherwise runs to the edge of the tree.
922 */
923 if (acur->diff == 0 && acur->len == args->maxlen)
924 deactivate = true;
396bbf3c
BF
925out:
926 if (deactivate)
c4aa10d0 927 cur->bc_ag.abt.active = false;
396bbf3c
BF
928 trace_xfs_alloc_cur_check(args->mp, cur->bc_btnum, bno, len, diff,
929 *new);
930 return 0;
931}
932
d2968825
BF
933/*
934 * Complete an allocation of a candidate extent. Remove the extent from both
935 * trees and update the args structure.
936 */
937STATIC int
938xfs_alloc_cur_finish(
939 struct xfs_alloc_arg *args,
940 struct xfs_alloc_cur *acur)
941{
9798f615 942 struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
d2968825
BF
943 int error;
944
945 ASSERT(acur->cnt && acur->bnolt);
946 ASSERT(acur->bno >= acur->rec_bno);
947 ASSERT(acur->bno + acur->len <= acur->rec_bno + acur->rec_len);
9798f615 948 ASSERT(acur->rec_bno + acur->rec_len <= be32_to_cpu(agf->agf_length));
d2968825
BF
949
950 error = xfs_alloc_fixup_trees(acur->cnt, acur->bnolt, acur->rec_bno,
951 acur->rec_len, acur->bno, acur->len, 0);
952 if (error)
953 return error;
954
955 args->agbno = acur->bno;
956 args->len = acur->len;
957 args->wasfromfl = 0;
958
959 trace_xfs_alloc_cur(args);
960 return 0;
961}
962
dc8e69bd
BF
963/*
964 * Locality allocation lookup algorithm. This expects a cntbt cursor and uses
965 * bno optimized lookup to search for extents with ideal size and locality.
966 */
967STATIC int
968xfs_alloc_cntbt_iter(
969 struct xfs_alloc_arg *args,
970 struct xfs_alloc_cur *acur)
971{
972 struct xfs_btree_cur *cur = acur->cnt;
973 xfs_agblock_t bno;
974 xfs_extlen_t len, cur_len;
975 int error;
976 int i;
977
978 if (!xfs_alloc_cur_active(cur))
979 return 0;
980
981 /* locality optimized lookup */
982 cur_len = acur->cur_len;
983 error = xfs_alloc_lookup_ge(cur, args->agbno, cur_len, &i);
984 if (error)
985 return error;
986 if (i == 0)
987 return 0;
988 error = xfs_alloc_get_rec(cur, &bno, &len, &i);
989 if (error)
990 return error;
991
992 /* check the current record and update search length from it */
993 error = xfs_alloc_cur_check(args, acur, cur, &i);
994 if (error)
995 return error;
996 ASSERT(len >= acur->cur_len);
997 acur->cur_len = len;
998
999 /*
1000 * We looked up the first record >= [agbno, len] above. The agbno is a
1001 * secondary key and so the current record may lie just before or after
1002 * agbno. If it is past agbno, check the previous record too so long as
1003 * the length matches as it may be closer. Don't check a smaller record
1004 * because that could deactivate our cursor.
1005 */
1006 if (bno > args->agbno) {
1007 error = xfs_btree_decrement(cur, 0, &i);
1008 if (!error && i) {
1009 error = xfs_alloc_get_rec(cur, &bno, &len, &i);
1010 if (!error && i && len == acur->cur_len)
1011 error = xfs_alloc_cur_check(args, acur, cur,
1012 &i);
1013 }
1014 if (error)
1015 return error;
1016 }
1017
1018 /*
1019 * Increment the search key until we find at least one allocation
1020 * candidate or if the extent we found was larger. Otherwise, double the
1021 * search key to optimize the search. Efficiency is more important here
1022 * than absolute best locality.
1023 */
1024 cur_len <<= 1;
1025 if (!acur->len || acur->cur_len >= cur_len)
1026 acur->cur_len++;
1027 else
1028 acur->cur_len = cur_len;
1029
1030 return error;
1031}
1032
c63cdd4f
BF
1033/*
1034 * Deal with the case where only small freespaces remain. Either return the
1035 * contents of the last freespace record, or allocate space from the freelist if
1036 * there is nothing in the tree.
1037 */
1038STATIC int /* error */
1039xfs_alloc_ag_vextent_small(
1040 struct xfs_alloc_arg *args, /* allocation argument structure */
1041 struct xfs_btree_cur *ccur, /* optional by-size cursor */
1042 xfs_agblock_t *fbnop, /* result block number */
1043 xfs_extlen_t *flenp, /* result length */
1044 int *stat) /* status: 0-freelist, 1-normal/none */
1045{
9798f615 1046 struct xfs_agf *agf = args->agbp->b_addr;
c63cdd4f
BF
1047 int error = 0;
1048 xfs_agblock_t fbno = NULLAGBLOCK;
1049 xfs_extlen_t flen = 0;
6691cd92 1050 int i = 0;
c63cdd4f 1051
6691cd92
BF
1052 /*
1053 * If a cntbt cursor is provided, try to allocate the largest record in
1054 * the tree. Try the AGFL if the cntbt is empty, otherwise fail the
1055 * allocation. Make sure to respect minleft even when pulling from the
1056 * freelist.
1057 */
1058 if (ccur)
1059 error = xfs_btree_decrement(ccur, 0, &i);
c63cdd4f
BF
1060 if (error)
1061 goto error;
1062 if (i) {
1063 error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i);
1064 if (error)
1065 goto error;
f9e03706
DW
1066 if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1067 error = -EFSCORRUPTED;
1068 goto error;
1069 }
c63cdd4f
BF
1070 goto out;
1071 }
1072
1073 if (args->minlen != 1 || args->alignment != 1 ||
1074 args->resv == XFS_AG_RESV_AGFL ||
9798f615 1075 be32_to_cpu(agf->agf_flcount) <= args->minleft)
c63cdd4f
BF
1076 goto out;
1077
49f0d84e
DC
1078 error = xfs_alloc_get_freelist(args->pag, args->tp, args->agbp,
1079 &fbno, 0);
c63cdd4f
BF
1080 if (error)
1081 goto error;
1082 if (fbno == NULLAGBLOCK)
1083 goto out;
1084
45d06621 1085 xfs_extent_busy_reuse(args->mp, args->pag, fbno, 1,
c34d570d 1086 (args->datatype & XFS_ALLOC_NOBUSY));
c63cdd4f 1087
c34d570d 1088 if (args->datatype & XFS_ALLOC_USERDATA) {
c63cdd4f
BF
1089 struct xfs_buf *bp;
1090
ee647f85
DW
1091 error = xfs_trans_get_buf(args->tp, args->mp->m_ddev_targp,
1092 XFS_AGB_TO_DADDR(args->mp, args->agno, fbno),
1093 args->mp->m_bsize, 0, &bp);
1094 if (error)
c63cdd4f 1095 goto error;
c63cdd4f
BF
1096 xfs_trans_binval(args->tp, bp);
1097 }
7e36a3a6
BF
1098 *fbnop = args->agbno = fbno;
1099 *flenp = args->len = 1;
9798f615 1100 if (XFS_IS_CORRUPT(args->mp, fbno >= be32_to_cpu(agf->agf_length))) {
f9e03706
DW
1101 error = -EFSCORRUPTED;
1102 goto error;
1103 }
c63cdd4f
BF
1104 args->wasfromfl = 1;
1105 trace_xfs_alloc_small_freelist(args);
1106
1107 /*
1108 * If we're feeding an AGFL block to something that doesn't live in the
1109 * free space, we need to clear out the OWN_AG rmap.
1110 */
fa9c3c19 1111 error = xfs_rmap_free(args->tp, args->agbp, args->pag, fbno, 1,
c63cdd4f
BF
1112 &XFS_RMAP_OINFO_AG);
1113 if (error)
1114 goto error;
1115
1116 *stat = 0;
1117 return 0;
1118
1119out:
1120 /*
1121 * Can't do the allocation, give up.
1122 */
1123 if (flen < args->minlen) {
1124 args->agbno = NULLAGBLOCK;
1125 trace_xfs_alloc_small_notenough(args);
1126 flen = 0;
1127 }
1128 *fbnop = fbno;
1129 *flenp = flen;
1130 *stat = 1;
1131 trace_xfs_alloc_small_done(args);
1132 return 0;
1133
1134error:
1135 trace_xfs_alloc_small_error(args);
1136 return error;
1137}
1138
1da177e4
LT
1139/*
1140 * Allocate a variable extent in the allocation group agno.
1141 * Type and bno are used to determine where in the allocation group the
1142 * extent will start.
1143 * Extent's length (returned in *len) will be between minlen and maxlen,
1144 * and of the form k * prod + mod unless there's nothing that large.
1145 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1146 */
1147STATIC int /* error */
1148xfs_alloc_ag_vextent(
1149 xfs_alloc_arg_t *args) /* argument structure for allocation */
1150{
1151 int error=0;
1da177e4
LT
1152
1153 ASSERT(args->minlen > 0);
1154 ASSERT(args->maxlen > 0);
1155 ASSERT(args->minlen <= args->maxlen);
1156 ASSERT(args->mod < args->prod);
1157 ASSERT(args->alignment > 0);
3fd129b6 1158
1da177e4
LT
1159 /*
1160 * Branch to correct routine based on the type.
1161 */
1162 args->wasfromfl = 0;
1163 switch (args->type) {
1164 case XFS_ALLOCTYPE_THIS_AG:
1165 error = xfs_alloc_ag_vextent_size(args);
1166 break;
1167 case XFS_ALLOCTYPE_NEAR_BNO:
1168 error = xfs_alloc_ag_vextent_near(args);
1169 break;
1170 case XFS_ALLOCTYPE_THIS_BNO:
1171 error = xfs_alloc_ag_vextent_exact(args);
1172 break;
1173 default:
1174 ASSERT(0);
1175 /* NOTREACHED */
1176 }
ecb6928f
CH
1177
1178 if (error || args->agbno == NULLAGBLOCK)
1da177e4 1179 return error;
ecb6928f
CH
1180
1181 ASSERT(args->len >= args->minlen);
1182 ASSERT(args->len <= args->maxlen);
0ab32086 1183 ASSERT(!args->wasfromfl || args->resv != XFS_AG_RESV_AGFL);
ecb6928f
CH
1184 ASSERT(args->agbno % args->alignment == 0);
1185
673930c3 1186 /* if not file data, insert new block into the reverse map btree */
33df3a9c 1187 if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
fa9c3c19 1188 error = xfs_rmap_alloc(args->tp, args->agbp, args->pag,
673930c3
DW
1189 args->agbno, args->len, &args->oinfo);
1190 if (error)
1191 return error;
1192 }
1193
ecb6928f 1194 if (!args->wasfromfl) {
92a00544 1195 error = xfs_alloc_update_counters(args->tp, args->agbp,
ecb6928f
CH
1196 -((long)(args->len)));
1197 if (error)
1198 return error;
1199
45d06621 1200 ASSERT(!xfs_extent_busy_search(args->mp, args->pag,
e26f0501 1201 args->agbno, args->len));
1da177e4 1202 }
ecb6928f 1203
3fd129b6 1204 xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
ecb6928f 1205
ff6d6af2
BD
1206 XFS_STATS_INC(args->mp, xs_allocx);
1207 XFS_STATS_ADD(args->mp, xs_allocb, args->len);
ecb6928f 1208 return error;
1da177e4
LT
1209}
1210
1211/*
1212 * Allocate a variable extent at exactly agno/bno.
1213 * Extent's length (returned in *len) will be between minlen and maxlen,
1214 * and of the form k * prod + mod unless there's nothing that large.
1215 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
1216 */
1217STATIC int /* error */
1218xfs_alloc_ag_vextent_exact(
1219 xfs_alloc_arg_t *args) /* allocation argument structure */
1220{
9798f615 1221 struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
ae127f08
DW
1222 struct xfs_btree_cur *bno_cur;/* by block-number btree cursor */
1223 struct xfs_btree_cur *cnt_cur;/* by count btree cursor */
1da177e4
LT
1224 int error;
1225 xfs_agblock_t fbno; /* start block of found extent */
1da177e4 1226 xfs_extlen_t flen; /* length of found extent */
ebf55872
CH
1227 xfs_agblock_t tbno; /* start block of busy extent */
1228 xfs_extlen_t tlen; /* length of busy extent */
1229 xfs_agblock_t tend; /* end block of busy extent */
1da177e4 1230 int i; /* success/failure of operation */
ebf55872 1231 unsigned busy_gen;
1da177e4
LT
1232
1233 ASSERT(args->alignment == 1);
9f9baab3 1234
1da177e4
LT
1235 /*
1236 * Allocate/initialize a cursor for the by-number freespace btree.
1237 */
561f7d17 1238 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
289d38d2 1239 args->pag, XFS_BTNUM_BNO);
9f9baab3 1240
1da177e4
LT
1241 /*
1242 * Lookup bno and minlen in the btree (minlen is irrelevant, really).
1243 * Look for the closest free block <= bno, it must contain bno
1244 * if any free block does.
1245 */
9f9baab3
CH
1246 error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
1247 if (error)
1da177e4 1248 goto error0;
9f9baab3
CH
1249 if (!i)
1250 goto not_found;
1251
1da177e4
LT
1252 /*
1253 * Grab the freespace record.
1254 */
9f9baab3
CH
1255 error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
1256 if (error)
1da177e4 1257 goto error0;
f9e03706
DW
1258 if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1259 error = -EFSCORRUPTED;
1260 goto error0;
1261 }
1da177e4 1262 ASSERT(fbno <= args->agbno);
9f9baab3 1263
1da177e4 1264 /*
e26f0501 1265 * Check for overlapping busy extents.
1da177e4 1266 */
ebf55872
CH
1267 tbno = fbno;
1268 tlen = flen;
1269 xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen);
e26f0501
CH
1270
1271 /*
1272 * Give up if the start of the extent is busy, or the freespace isn't
1273 * long enough for the minimum request.
1274 */
1275 if (tbno > args->agbno)
1276 goto not_found;
1277 if (tlen < args->minlen)
1278 goto not_found;
1279 tend = tbno + tlen;
1280 if (tend < args->agbno + args->minlen)
9f9baab3
CH
1281 goto not_found;
1282
1da177e4
LT
1283 /*
1284 * End of extent will be smaller of the freespace end and the
1285 * maximal requested end.
9f9baab3 1286 *
1da177e4
LT
1287 * Fix the length according to mod and prod if given.
1288 */
81463b1c
CS
1289 args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
1290 - args->agbno;
1da177e4 1291 xfs_alloc_fix_len(args);
81463b1c 1292 ASSERT(args->agbno + args->len <= tend);
9f9baab3 1293
1da177e4 1294 /*
81463b1c 1295 * We are allocating agbno for args->len
1da177e4
LT
1296 * Allocate/initialize a cursor for the by-size btree.
1297 */
561f7d17 1298 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
289d38d2 1299 args->pag, XFS_BTNUM_CNT);
9798f615 1300 ASSERT(args->agbno + args->len <= be32_to_cpu(agf->agf_length));
9f9baab3
CH
1301 error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
1302 args->len, XFSA_FIXUP_BNO_OK);
1303 if (error) {
1da177e4
LT
1304 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1305 goto error0;
1306 }
9f9baab3 1307
1da177e4
LT
1308 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1309 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
0b1b213f 1310
1da177e4 1311 args->wasfromfl = 0;
9f9baab3
CH
1312 trace_xfs_alloc_exact_done(args);
1313 return 0;
1314
1315not_found:
1316 /* Didn't find it, return null. */
1317 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1318 args->agbno = NULLAGBLOCK;
1319 trace_xfs_alloc_exact_notfound(args);
1da177e4
LT
1320 return 0;
1321
1322error0:
1323 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
0b1b213f 1324 trace_xfs_alloc_exact_error(args);
1da177e4
LT
1325 return error;
1326}
1327
489a150f 1328/*
78d7aabd
BF
1329 * Search a given number of btree records in a given direction. Check each
1330 * record against the good extent we've already found.
489a150f
CH
1331 */
1332STATIC int
78d7aabd 1333xfs_alloc_walk_iter(
fec0afda
BF
1334 struct xfs_alloc_arg *args,
1335 struct xfs_alloc_cur *acur,
1336 struct xfs_btree_cur *cur,
78d7aabd
BF
1337 bool increment,
1338 bool find_one, /* quit on first candidate */
1339 int count, /* rec count (-1 for infinite) */
1340 int *stat)
489a150f 1341{
489a150f
CH
1342 int error;
1343 int i;
489a150f 1344
78d7aabd
BF
1345 *stat = 0;
1346
489a150f 1347 /*
fec0afda
BF
1348 * Search so long as the cursor is active or we find a better extent.
1349 * The cursor is deactivated if it extends beyond the range of the
1350 * current allocation candidate.
489a150f 1351 */
78d7aabd 1352 while (xfs_alloc_cur_active(cur) && count) {
fec0afda 1353 error = xfs_alloc_cur_check(args, acur, cur, &i);
489a150f 1354 if (error)
fec0afda 1355 return error;
78d7aabd
BF
1356 if (i == 1) {
1357 *stat = 1;
1358 if (find_one)
1359 break;
1360 }
fec0afda
BF
1361 if (!xfs_alloc_cur_active(cur))
1362 break;
489a150f 1363
fec0afda
BF
1364 if (increment)
1365 error = xfs_btree_increment(cur, 0, &i);
489a150f 1366 else
fec0afda 1367 error = xfs_btree_decrement(cur, 0, &i);
489a150f 1368 if (error)
fec0afda
BF
1369 return error;
1370 if (i == 0)
c4aa10d0 1371 cur->bc_ag.abt.active = false;
78d7aabd
BF
1372
1373 if (count > 0)
1374 count--;
fec0afda 1375 }
489a150f 1376
489a150f 1377 return 0;
489a150f
CH
1378}
1379
0e26d5ca 1380/*
dc8e69bd
BF
1381 * Search the by-bno and by-size btrees in parallel in search of an extent with
1382 * ideal locality based on the NEAR mode ->agbno locality hint.
0e26d5ca
BF
1383 */
1384STATIC int
dc8e69bd 1385xfs_alloc_ag_vextent_locality(
0e26d5ca
BF
1386 struct xfs_alloc_arg *args,
1387 struct xfs_alloc_cur *acur,
1388 int *stat)
1389{
1390 struct xfs_btree_cur *fbcur = NULL;
1391 int error;
1392 int i;
1393 bool fbinc;
1394
1395 ASSERT(acur->len == 0);
1396 ASSERT(args->type == XFS_ALLOCTYPE_NEAR_BNO);
1397
1398 *stat = 0;
1399
dc8e69bd
BF
1400 error = xfs_alloc_lookup_ge(acur->cnt, args->agbno, acur->cur_len, &i);
1401 if (error)
1402 return error;
0e26d5ca
BF
1403 error = xfs_alloc_lookup_le(acur->bnolt, args->agbno, 0, &i);
1404 if (error)
1405 return error;
1406 error = xfs_alloc_lookup_ge(acur->bnogt, args->agbno, 0, &i);
1407 if (error)
1408 return error;
1409
1410 /*
dc8e69bd
BF
1411 * Search the bnobt and cntbt in parallel. Search the bnobt left and
1412 * right and lookup the closest extent to the locality hint for each
1413 * extent size key in the cntbt. The entire search terminates
1414 * immediately on a bnobt hit because that means we've found best case
1415 * locality. Otherwise the search continues until the cntbt cursor runs
1416 * off the end of the tree. If no allocation candidate is found at this
1417 * point, give up on locality, walk backwards from the end of the cntbt
1418 * and take the first available extent.
1419 *
1420 * The parallel tree searches balance each other out to provide fairly
1421 * consistent performance for various situations. The bnobt search can
1422 * have pathological behavior in the worst case scenario of larger
1423 * allocation requests and fragmented free space. On the other hand, the
1424 * bnobt is able to satisfy most smaller allocation requests much more
1425 * quickly than the cntbt. The cntbt search can sift through fragmented
1426 * free space and sets of free extents for larger allocation requests
1427 * more quickly than the bnobt. Since the locality hint is just a hint
1428 * and we don't want to scan the entire bnobt for perfect locality, the
1429 * cntbt search essentially bounds the bnobt search such that we can
1430 * find good enough locality at reasonable performance in most cases.
0e26d5ca
BF
1431 */
1432 while (xfs_alloc_cur_active(acur->bnolt) ||
dc8e69bd
BF
1433 xfs_alloc_cur_active(acur->bnogt) ||
1434 xfs_alloc_cur_active(acur->cnt)) {
1435
1436 trace_xfs_alloc_cur_lookup(args);
1437
1438 /*
1439 * Search the bnobt left and right. In the case of a hit, finish
1440 * the search in the opposite direction and we're done.
1441 */
0e26d5ca
BF
1442 error = xfs_alloc_walk_iter(args, acur, acur->bnolt, false,
1443 true, 1, &i);
1444 if (error)
1445 return error;
1446 if (i == 1) {
1447 trace_xfs_alloc_cur_left(args);
1448 fbcur = acur->bnogt;
1449 fbinc = true;
1450 break;
1451 }
0e26d5ca
BF
1452 error = xfs_alloc_walk_iter(args, acur, acur->bnogt, true, true,
1453 1, &i);
1454 if (error)
1455 return error;
1456 if (i == 1) {
1457 trace_xfs_alloc_cur_right(args);
1458 fbcur = acur->bnolt;
1459 fbinc = false;
1460 break;
1461 }
dc8e69bd
BF
1462
1463 /*
1464 * Check the extent with best locality based on the current
1465 * extent size search key and keep track of the best candidate.
1466 */
1467 error = xfs_alloc_cntbt_iter(args, acur);
1468 if (error)
1469 return error;
1470 if (!xfs_alloc_cur_active(acur->cnt)) {
1471 trace_xfs_alloc_cur_lookup_done(args);
1472 break;
1473 }
1474 }
1475
1476 /*
1477 * If we failed to find anything due to busy extents, return empty
1478 * handed so the caller can flush and retry. If no busy extents were
1479 * found, walk backwards from the end of the cntbt as a last resort.
1480 */
1481 if (!xfs_alloc_cur_active(acur->cnt) && !acur->len && !acur->busy) {
1482 error = xfs_btree_decrement(acur->cnt, 0, &i);
1483 if (error)
1484 return error;
1485 if (i) {
c4aa10d0 1486 acur->cnt->bc_ag.abt.active = true;
dc8e69bd
BF
1487 fbcur = acur->cnt;
1488 fbinc = false;
1489 }
0e26d5ca
BF
1490 }
1491
dc8e69bd
BF
1492 /*
1493 * Search in the opposite direction for a better entry in the case of
1494 * a bnobt hit or walk backwards from the end of the cntbt.
1495 */
0e26d5ca
BF
1496 if (fbcur) {
1497 error = xfs_alloc_walk_iter(args, acur, fbcur, fbinc, true, -1,
1498 &i);
1499 if (error)
1500 return error;
1501 }
1502
1503 if (acur->len)
1504 *stat = 1;
1505
1506 return 0;
1507}
1508
5113f8ec
DW
1509/* Check the last block of the cnt btree for allocations. */
1510static int
1511xfs_alloc_ag_vextent_lastblock(
1512 struct xfs_alloc_arg *args,
1513 struct xfs_alloc_cur *acur,
1514 xfs_agblock_t *bno,
1515 xfs_extlen_t *len,
1516 bool *allocated)
1517{
1518 int error;
1519 int i;
1520
1521#ifdef DEBUG
1522 /* Randomly don't execute the first algorithm. */
1523 if (prandom_u32() & 1)
1524 return 0;
1525#endif
1526
1527 /*
1528 * Start from the entry that lookup found, sequence through all larger
1529 * free blocks. If we're actually pointing at a record smaller than
1530 * maxlen, go to the start of this block, and skip all those smaller
1531 * than minlen.
1532 */
77ca1eed 1533 if (*len || args->alignment > 1) {
6ca444cf 1534 acur->cnt->bc_levels[0].ptr = 1;
5113f8ec
DW
1535 do {
1536 error = xfs_alloc_get_rec(acur->cnt, bno, len, &i);
1537 if (error)
1538 return error;
f9e03706
DW
1539 if (XFS_IS_CORRUPT(args->mp, i != 1))
1540 return -EFSCORRUPTED;
5113f8ec
DW
1541 if (*len >= args->minlen)
1542 break;
1543 error = xfs_btree_increment(acur->cnt, 0, &i);
1544 if (error)
1545 return error;
1546 } while (i);
1547 ASSERT(*len >= args->minlen);
1548 if (!i)
1549 return 0;
1550 }
1551
1552 error = xfs_alloc_walk_iter(args, acur, acur->cnt, true, false, -1, &i);
1553 if (error)
1554 return error;
1555
1556 /*
1557 * It didn't work. We COULD be in a case where there's a good record
1558 * somewhere, so try again.
1559 */
1560 if (acur->len == 0)
1561 return 0;
1562
1563 trace_xfs_alloc_near_first(args);
1564 *allocated = true;
1565 return 0;
1566}
1567
1da177e4
LT
1568/*
1569 * Allocate a variable extent near bno in the allocation group agno.
1570 * Extent's length (returned in len) will be between minlen and maxlen,
1571 * and of the form k * prod + mod unless there's nothing that large.
1572 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1573 */
f5e7dbea 1574STATIC int
1da177e4 1575xfs_alloc_ag_vextent_near(
f5e7dbea 1576 struct xfs_alloc_arg *args)
1da177e4 1577{
f5e7dbea 1578 struct xfs_alloc_cur acur = {};
fec0afda
BF
1579 int error; /* error code */
1580 int i; /* result code, temporary */
fec0afda
BF
1581 xfs_agblock_t bno;
1582 xfs_extlen_t len;
e26f0501 1583
cf085a1b 1584 /* handle uninitialized agbno range so caller doesn't have to */
bfe46d4e
BF
1585 if (!args->min_agbno && !args->max_agbno)
1586 args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
1587 ASSERT(args->min_agbno <= args->max_agbno);
1588
1589 /* clamp agbno to the range if it's outside */
1590 if (args->agbno < args->min_agbno)
1591 args->agbno = args->min_agbno;
1592 if (args->agbno > args->max_agbno)
1593 args->agbno = args->max_agbno;
1594
e26f0501 1595restart:
fec0afda 1596 len = 0;
e26f0501 1597
1da177e4 1598 /*
f5e7dbea
BF
1599 * Set up cursors and see if there are any free extents as big as
1600 * maxlen. If not, pick the last entry in the tree unless the tree is
1601 * empty.
1da177e4 1602 */
f5e7dbea
BF
1603 error = xfs_alloc_cur_setup(args, &acur);
1604 if (error == -ENOSPC) {
fec0afda
BF
1605 error = xfs_alloc_ag_vextent_small(args, acur.cnt, &bno,
1606 &len, &i);
f5e7dbea
BF
1607 if (error)
1608 goto out;
fec0afda 1609 if (i == 0 || len == 0) {
e26f0501 1610 trace_xfs_alloc_near_noentry(args);
f5e7dbea 1611 goto out;
1da177e4
LT
1612 }
1613 ASSERT(i == 1);
f5e7dbea
BF
1614 } else if (error) {
1615 goto out;
1da177e4 1616 }
e26f0501 1617
1da177e4
LT
1618 /*
1619 * First algorithm.
1620 * If the requested extent is large wrt the freespaces available
1621 * in this a.g., then the cursor will be pointing to a btree entry
1622 * near the right edge of the tree. If it's in the last btree leaf
1623 * block, then we just examine all the entries in that block
1624 * that are big enough, and pick the best one.
1da177e4 1625 */
5113f8ec
DW
1626 if (xfs_btree_islastblock(acur.cnt, 0)) {
1627 bool allocated = false;
78d7aabd 1628
5113f8ec
DW
1629 error = xfs_alloc_ag_vextent_lastblock(args, &acur, &bno, &len,
1630 &allocated);
78d7aabd
BF
1631 if (error)
1632 goto out;
5113f8ec
DW
1633 if (allocated)
1634 goto alloc_finish;
1da177e4 1635 }
f5e7dbea 1636
1da177e4 1637 /*
dc8e69bd
BF
1638 * Second algorithm. Combined cntbt and bnobt search to find ideal
1639 * locality.
1da177e4 1640 */
dc8e69bd 1641 error = xfs_alloc_ag_vextent_locality(args, &acur, &i);
f5e7dbea
BF
1642 if (error)
1643 goto out;
1644
1da177e4
LT
1645 /*
1646 * If we couldn't get anything, give up.
1647 */
fec0afda 1648 if (!acur.len) {
d6d3aff2 1649 if (acur.busy) {
e26f0501 1650 trace_xfs_alloc_near_busy(args);
d6d3aff2
BF
1651 xfs_extent_busy_flush(args->mp, args->pag,
1652 acur.busy_gen);
e26f0501
CH
1653 goto restart;
1654 }
0b1b213f 1655 trace_xfs_alloc_size_neither(args);
1da177e4 1656 args->agbno = NULLAGBLOCK;
f5e7dbea 1657 goto out;
1da177e4 1658 }
489a150f 1659
5113f8ec 1660alloc_finish:
d2968825
BF
1661 /* fix up btrees on a successful allocation */
1662 error = xfs_alloc_cur_finish(args, &acur);
0b1b213f 1663
f5e7dbea
BF
1664out:
1665 xfs_alloc_cur_close(&acur, error);
1da177e4
LT
1666 return error;
1667}
1668
1669/*
1670 * Allocate a variable extent anywhere in the allocation group agno.
1671 * Extent's length (returned in len) will be between minlen and maxlen,
1672 * and of the form k * prod + mod unless there's nothing that large.
1673 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1674 */
1675STATIC int /* error */
1676xfs_alloc_ag_vextent_size(
1677 xfs_alloc_arg_t *args) /* allocation argument structure */
1678{
9798f615 1679 struct xfs_agf *agf = args->agbp->b_addr;
ae127f08
DW
1680 struct xfs_btree_cur *bno_cur; /* cursor for bno btree */
1681 struct xfs_btree_cur *cnt_cur; /* cursor for cnt btree */
1da177e4
LT
1682 int error; /* error result */
1683 xfs_agblock_t fbno; /* start of found freespace */
1684 xfs_extlen_t flen; /* length of found freespace */
1da177e4
LT
1685 int i; /* temp status variable */
1686 xfs_agblock_t rbno; /* returned block number */
1687 xfs_extlen_t rlen; /* length of returned extent */
ebf55872
CH
1688 bool busy;
1689 unsigned busy_gen;
1da177e4 1690
e26f0501 1691restart:
1da177e4
LT
1692 /*
1693 * Allocate and initialize a cursor for the by-size btree.
1694 */
561f7d17 1695 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
289d38d2 1696 args->pag, XFS_BTNUM_CNT);
1da177e4 1697 bno_cur = NULL;
e26f0501 1698
1da177e4
LT
1699 /*
1700 * Look for an entry >= maxlen+alignment-1 blocks.
1701 */
1702 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1703 args->maxlen + args->alignment - 1, &i)))
1704 goto error0;
e26f0501 1705
1da177e4 1706 /*
ebf55872
CH
1707 * If none then we have to settle for a smaller extent. In the case that
1708 * there are no large extents, this will return the last entry in the
1709 * tree unless the tree is empty. In the case that there are only busy
1710 * large extents, this will return the largest small extent unless there
e26f0501 1711 * are no smaller extents available.
1da177e4 1712 */
ebf55872 1713 if (!i) {
e26f0501
CH
1714 error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1715 &fbno, &flen, &i);
1716 if (error)
1da177e4
LT
1717 goto error0;
1718 if (i == 0 || flen == 0) {
1719 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
0b1b213f 1720 trace_xfs_alloc_size_noentry(args);
1da177e4
LT
1721 return 0;
1722 }
1723 ASSERT(i == 1);
ebf55872
CH
1724 busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno,
1725 &rlen, &busy_gen);
e26f0501
CH
1726 } else {
1727 /*
1728 * Search for a non-busy extent that is large enough.
e26f0501
CH
1729 */
1730 for (;;) {
1731 error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1732 if (error)
1733 goto error0;
f9e03706
DW
1734 if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1735 error = -EFSCORRUPTED;
1736 goto error0;
1737 }
e26f0501 1738
ebf55872
CH
1739 busy = xfs_alloc_compute_aligned(args, fbno, flen,
1740 &rbno, &rlen, &busy_gen);
e26f0501
CH
1741
1742 if (rlen >= args->maxlen)
1743 break;
1744
1745 error = xfs_btree_increment(cnt_cur, 0, &i);
1746 if (error)
1747 goto error0;
1748 if (i == 0) {
1749 /*
1750 * Our only valid extents must have been busy.
1751 * Make it unbusy by forcing the log out and
ebf55872 1752 * retrying.
e26f0501
CH
1753 */
1754 xfs_btree_del_cursor(cnt_cur,
1755 XFS_BTREE_NOERROR);
1756 trace_xfs_alloc_size_busy(args);
ebf55872
CH
1757 xfs_extent_busy_flush(args->mp,
1758 args->pag, busy_gen);
e26f0501
CH
1759 goto restart;
1760 }
1761 }
1da177e4 1762 }
e26f0501 1763
1da177e4
LT
1764 /*
1765 * In the first case above, we got the last entry in the
1766 * by-size btree. Now we check to see if the space hits maxlen
1767 * once aligned; if not, we search left for something better.
1768 * This can't happen in the second case above.
1769 */
1da177e4 1770 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
f9e03706
DW
1771 if (XFS_IS_CORRUPT(args->mp,
1772 rlen != 0 &&
1773 (rlen > flen ||
1774 rbno + rlen > fbno + flen))) {
1775 error = -EFSCORRUPTED;
1776 goto error0;
1777 }
1da177e4
LT
1778 if (rlen < args->maxlen) {
1779 xfs_agblock_t bestfbno;
1780 xfs_extlen_t bestflen;
1781 xfs_agblock_t bestrbno;
1782 xfs_extlen_t bestrlen;
1783
1784 bestrlen = rlen;
1785 bestrbno = rbno;
1786 bestflen = flen;
1787 bestfbno = fbno;
1788 for (;;) {
8df4da4a 1789 if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
1da177e4
LT
1790 goto error0;
1791 if (i == 0)
1792 break;
1793 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1794 &i)))
1795 goto error0;
f9e03706
DW
1796 if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1797 error = -EFSCORRUPTED;
1798 goto error0;
1799 }
1da177e4
LT
1800 if (flen < bestrlen)
1801 break;
ebf55872
CH
1802 busy = xfs_alloc_compute_aligned(args, fbno, flen,
1803 &rbno, &rlen, &busy_gen);
1da177e4 1804 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
f9e03706
DW
1805 if (XFS_IS_CORRUPT(args->mp,
1806 rlen != 0 &&
1807 (rlen > flen ||
1808 rbno + rlen > fbno + flen))) {
1809 error = -EFSCORRUPTED;
1810 goto error0;
1811 }
1da177e4
LT
1812 if (rlen > bestrlen) {
1813 bestrlen = rlen;
1814 bestrbno = rbno;
1815 bestflen = flen;
1816 bestfbno = fbno;
1817 if (rlen == args->maxlen)
1818 break;
1819 }
1820 }
1821 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1822 &i)))
1823 goto error0;
f9e03706
DW
1824 if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1825 error = -EFSCORRUPTED;
1826 goto error0;
1827 }
1da177e4
LT
1828 rlen = bestrlen;
1829 rbno = bestrbno;
1830 flen = bestflen;
1831 fbno = bestfbno;
1832 }
1833 args->wasfromfl = 0;
1834 /*
1835 * Fix up the length.
1836 */
1837 args->len = rlen;
e26f0501 1838 if (rlen < args->minlen) {
ebf55872 1839 if (busy) {
e26f0501
CH
1840 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1841 trace_xfs_alloc_size_busy(args);
ebf55872 1842 xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
e26f0501
CH
1843 goto restart;
1844 }
1845 goto out_nominleft;
1da177e4 1846 }
e26f0501
CH
1847 xfs_alloc_fix_len(args);
1848
1da177e4 1849 rlen = args->len;
f9e03706
DW
1850 if (XFS_IS_CORRUPT(args->mp, rlen > flen)) {
1851 error = -EFSCORRUPTED;
1852 goto error0;
1853 }
1da177e4
LT
1854 /*
1855 * Allocate and initialize a cursor for the by-block tree.
1856 */
561f7d17 1857 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
289d38d2 1858 args->pag, XFS_BTNUM_BNO);
1da177e4
LT
1859 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1860 rbno, rlen, XFSA_FIXUP_CNT_OK)))
1861 goto error0;
1862 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1863 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1864 cnt_cur = bno_cur = NULL;
1865 args->len = rlen;
1866 args->agbno = rbno;
f9e03706
DW
1867 if (XFS_IS_CORRUPT(args->mp,
1868 args->agbno + args->len >
9798f615 1869 be32_to_cpu(agf->agf_length))) {
f9e03706
DW
1870 error = -EFSCORRUPTED;
1871 goto error0;
1872 }
0b1b213f 1873 trace_xfs_alloc_size_done(args);
1da177e4
LT
1874 return 0;
1875
1876error0:
0b1b213f 1877 trace_xfs_alloc_size_error(args);
1da177e4
LT
1878 if (cnt_cur)
1879 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1880 if (bno_cur)
1881 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1882 return error;
e26f0501
CH
1883
1884out_nominleft:
1885 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1886 trace_xfs_alloc_size_nominleft(args);
1887 args->agbno = NULLAGBLOCK;
1888 return 0;
1da177e4
LT
1889}
1890
1da177e4
LT
1891/*
1892 * Free the extent starting at agno/bno for length.
1893 */
340785cc 1894STATIC int
1da177e4 1895xfs_free_ag_extent(
66e3237e
DW
1896 struct xfs_trans *tp,
1897 struct xfs_buf *agbp,
1898 xfs_agnumber_t agno,
1899 xfs_agblock_t bno,
1900 xfs_extlen_t len,
1901 const struct xfs_owner_info *oinfo,
1902 enum xfs_ag_resv_type type)
1da177e4 1903{
66e3237e 1904 struct xfs_mount *mp;
66e3237e
DW
1905 struct xfs_btree_cur *bno_cur;
1906 struct xfs_btree_cur *cnt_cur;
1907 xfs_agblock_t gtbno; /* start of right neighbor */
1908 xfs_extlen_t gtlen; /* length of right neighbor */
1909 xfs_agblock_t ltbno; /* start of left neighbor */
1910 xfs_extlen_t ltlen; /* length of left neighbor */
1911 xfs_agblock_t nbno; /* new starting block of freesp */
1912 xfs_extlen_t nlen; /* new length of freespace */
1913 int haveleft; /* have a left neighbor */
1914 int haveright; /* have a right neighbor */
1915 int i;
1916 int error;
fa9c3c19 1917 struct xfs_perag *pag = agbp->b_pag;
1da177e4 1918
673930c3 1919 bno_cur = cnt_cur = NULL;
1da177e4 1920 mp = tp->t_mountp;
673930c3 1921
33df3a9c 1922 if (!xfs_rmap_should_skip_owner_update(oinfo)) {
fa9c3c19 1923 error = xfs_rmap_free(tp, agbp, pag, bno, len, oinfo);
673930c3
DW
1924 if (error)
1925 goto error0;
1926 }
1927
1da177e4
LT
1928 /*
1929 * Allocate and initialize a cursor for the by-block btree.
1930 */
289d38d2 1931 bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_BNO);
1da177e4
LT
1932 /*
1933 * Look for a neighboring block on the left (lower block numbers)
1934 * that is contiguous with this space.
1935 */
1936 if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
1937 goto error0;
1938 if (haveleft) {
1939 /*
1940 * There is a block to our left.
1941 */
1942 if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
1943 goto error0;
f9e03706
DW
1944 if (XFS_IS_CORRUPT(mp, i != 1)) {
1945 error = -EFSCORRUPTED;
1946 goto error0;
1947 }
1da177e4
LT
1948 /*
1949 * It's not contiguous, though.
1950 */
1951 if (ltbno + ltlen < bno)
1952 haveleft = 0;
1953 else {
1954 /*
1955 * If this failure happens the request to free this
1956 * space was invalid, it's (partly) already free.
1957 * Very bad.
1958 */
f9e03706
DW
1959 if (XFS_IS_CORRUPT(mp, ltbno + ltlen > bno)) {
1960 error = -EFSCORRUPTED;
1961 goto error0;
1962 }
1da177e4
LT
1963 }
1964 }
1965 /*
1966 * Look for a neighboring block on the right (higher block numbers)
1967 * that is contiguous with this space.
1968 */
637aa50f 1969 if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
1da177e4
LT
1970 goto error0;
1971 if (haveright) {
1972 /*
1973 * There is a block to our right.
1974 */
1975 if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
1976 goto error0;
f9e03706
DW
1977 if (XFS_IS_CORRUPT(mp, i != 1)) {
1978 error = -EFSCORRUPTED;
1979 goto error0;
1980 }
1da177e4
LT
1981 /*
1982 * It's not contiguous, though.
1983 */
1984 if (bno + len < gtbno)
1985 haveright = 0;
1986 else {
1987 /*
1988 * If this failure happens the request to free this
1989 * space was invalid, it's (partly) already free.
1990 * Very bad.
1991 */
f9e03706
DW
1992 if (XFS_IS_CORRUPT(mp, bno + len > gtbno)) {
1993 error = -EFSCORRUPTED;
1994 goto error0;
1995 }
1da177e4
LT
1996 }
1997 }
1998 /*
1999 * Now allocate and initialize a cursor for the by-size tree.
2000 */
289d38d2 2001 cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_CNT);
1da177e4
LT
2002 /*
2003 * Have both left and right contiguous neighbors.
2004 * Merge all three into a single free block.
2005 */
2006 if (haveleft && haveright) {
2007 /*
2008 * Delete the old by-size entry on the left.
2009 */
2010 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
2011 goto error0;
f9e03706
DW
2012 if (XFS_IS_CORRUPT(mp, i != 1)) {
2013 error = -EFSCORRUPTED;
2014 goto error0;
2015 }
91cca5df 2016 if ((error = xfs_btree_delete(cnt_cur, &i)))
1da177e4 2017 goto error0;
f9e03706
DW
2018 if (XFS_IS_CORRUPT(mp, i != 1)) {
2019 error = -EFSCORRUPTED;
2020 goto error0;
2021 }
1da177e4
LT
2022 /*
2023 * Delete the old by-size entry on the right.
2024 */
2025 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2026 goto error0;
f9e03706
DW
2027 if (XFS_IS_CORRUPT(mp, i != 1)) {
2028 error = -EFSCORRUPTED;
2029 goto error0;
2030 }
91cca5df 2031 if ((error = xfs_btree_delete(cnt_cur, &i)))
1da177e4 2032 goto error0;
f9e03706
DW
2033 if (XFS_IS_CORRUPT(mp, i != 1)) {
2034 error = -EFSCORRUPTED;
2035 goto error0;
2036 }
1da177e4
LT
2037 /*
2038 * Delete the old by-block entry for the right block.
2039 */
91cca5df 2040 if ((error = xfs_btree_delete(bno_cur, &i)))
1da177e4 2041 goto error0;
f9e03706
DW
2042 if (XFS_IS_CORRUPT(mp, i != 1)) {
2043 error = -EFSCORRUPTED;
2044 goto error0;
2045 }
1da177e4
LT
2046 /*
2047 * Move the by-block cursor back to the left neighbor.
2048 */
8df4da4a 2049 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1da177e4 2050 goto error0;
f9e03706
DW
2051 if (XFS_IS_CORRUPT(mp, i != 1)) {
2052 error = -EFSCORRUPTED;
2053 goto error0;
2054 }
1da177e4
LT
2055#ifdef DEBUG
2056 /*
2057 * Check that this is the right record: delete didn't
2058 * mangle the cursor.
2059 */
2060 {
2061 xfs_agblock_t xxbno;
2062 xfs_extlen_t xxlen;
2063
2064 if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
2065 &i)))
2066 goto error0;
f9e03706
DW
2067 if (XFS_IS_CORRUPT(mp,
2068 i != 1 ||
2069 xxbno != ltbno ||
2070 xxlen != ltlen)) {
2071 error = -EFSCORRUPTED;
2072 goto error0;
2073 }
1da177e4
LT
2074 }
2075#endif
2076 /*
2077 * Update remaining by-block entry to the new, joined block.
2078 */
2079 nbno = ltbno;
2080 nlen = len + ltlen + gtlen;
2081 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2082 goto error0;
2083 }
2084 /*
2085 * Have only a left contiguous neighbor.
2086 * Merge it together with the new freespace.
2087 */
2088 else if (haveleft) {
2089 /*
2090 * Delete the old by-size entry on the left.
2091 */
2092 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
2093 goto error0;
f9e03706
DW
2094 if (XFS_IS_CORRUPT(mp, i != 1)) {
2095 error = -EFSCORRUPTED;
2096 goto error0;
2097 }
91cca5df 2098 if ((error = xfs_btree_delete(cnt_cur, &i)))
1da177e4 2099 goto error0;
f9e03706
DW
2100 if (XFS_IS_CORRUPT(mp, i != 1)) {
2101 error = -EFSCORRUPTED;
2102 goto error0;
2103 }
1da177e4
LT
2104 /*
2105 * Back up the by-block cursor to the left neighbor, and
2106 * update its length.
2107 */
8df4da4a 2108 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1da177e4 2109 goto error0;
f9e03706
DW
2110 if (XFS_IS_CORRUPT(mp, i != 1)) {
2111 error = -EFSCORRUPTED;
2112 goto error0;
2113 }
1da177e4
LT
2114 nbno = ltbno;
2115 nlen = len + ltlen;
2116 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2117 goto error0;
2118 }
2119 /*
2120 * Have only a right contiguous neighbor.
2121 * Merge it together with the new freespace.
2122 */
2123 else if (haveright) {
2124 /*
2125 * Delete the old by-size entry on the right.
2126 */
2127 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2128 goto error0;
f9e03706
DW
2129 if (XFS_IS_CORRUPT(mp, i != 1)) {
2130 error = -EFSCORRUPTED;
2131 goto error0;
2132 }
91cca5df 2133 if ((error = xfs_btree_delete(cnt_cur, &i)))
1da177e4 2134 goto error0;
f9e03706
DW
2135 if (XFS_IS_CORRUPT(mp, i != 1)) {
2136 error = -EFSCORRUPTED;
2137 goto error0;
2138 }
1da177e4
LT
2139 /*
2140 * Update the starting block and length of the right
2141 * neighbor in the by-block tree.
2142 */
2143 nbno = bno;
2144 nlen = len + gtlen;
2145 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2146 goto error0;
2147 }
2148 /*
2149 * No contiguous neighbors.
2150 * Insert the new freespace into the by-block tree.
2151 */
2152 else {
2153 nbno = bno;
2154 nlen = len;
4b22a571 2155 if ((error = xfs_btree_insert(bno_cur, &i)))
1da177e4 2156 goto error0;
f9e03706
DW
2157 if (XFS_IS_CORRUPT(mp, i != 1)) {
2158 error = -EFSCORRUPTED;
2159 goto error0;
2160 }
1da177e4
LT
2161 }
2162 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
2163 bno_cur = NULL;
2164 /*
2165 * In all cases we need to insert the new freespace in the by-size tree.
2166 */
2167 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
2168 goto error0;
f9e03706
DW
2169 if (XFS_IS_CORRUPT(mp, i != 0)) {
2170 error = -EFSCORRUPTED;
2171 goto error0;
2172 }
4b22a571 2173 if ((error = xfs_btree_insert(cnt_cur, &i)))
1da177e4 2174 goto error0;
f9e03706
DW
2175 if (XFS_IS_CORRUPT(mp, i != 1)) {
2176 error = -EFSCORRUPTED;
2177 goto error0;
2178 }
1da177e4
LT
2179 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
2180 cnt_cur = NULL;
ecb6928f 2181
1da177e4
LT
2182 /*
2183 * Update the freespace totals in the ag and superblock.
2184 */
92a00544
GX
2185 error = xfs_alloc_update_counters(tp, agbp, len);
2186 xfs_ag_resv_free_extent(agbp->b_pag, type, tp, len);
ecb6928f
CH
2187 if (error)
2188 goto error0;
2189
ff6d6af2
BD
2190 XFS_STATS_INC(mp, xs_freex);
2191 XFS_STATS_ADD(mp, xs_freeb, len);
0b1b213f 2192
21592863 2193 trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright);
1da177e4 2194
1da177e4
LT
2195 return 0;
2196
2197 error0:
21592863 2198 trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1);
1da177e4
LT
2199 if (bno_cur)
2200 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
2201 if (cnt_cur)
2202 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
2203 return error;
2204}
2205
2206/*
2207 * Visible (exported) allocation/free functions.
2208 * Some of these are used just by xfs_alloc_btree.c and this file.
2209 */
2210
2211/*
7cb3efb4 2212 * Compute and fill in value of m_alloc_maxlevels.
1da177e4
LT
2213 */
2214void
2215xfs_alloc_compute_maxlevels(
2216 xfs_mount_t *mp) /* file system mount structure */
2217{
7cb3efb4 2218 mp->m_alloc_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr,
19b54ee6 2219 (mp->m_sb.sb_agblocks + 1) / 2);
0ed5f735 2220 ASSERT(mp->m_alloc_maxlevels <= xfs_allocbt_maxlevels_ondisk());
1da177e4
LT
2221}
2222
6cc87645 2223/*
3fd129b6
DW
2224 * Find the length of the longest extent in an AG. The 'need' parameter
2225 * specifies how much space we're going to need for the AGFL and the
2226 * 'reserved' parameter tells us how many blocks in this AG are reserved for
2227 * other callers.
6cc87645
DC
2228 */
2229xfs_extlen_t
2230xfs_alloc_longest_free_extent(
50adbcb4 2231 struct xfs_perag *pag,
3fd129b6
DW
2232 xfs_extlen_t need,
2233 xfs_extlen_t reserved)
6cc87645 2234{
50adbcb4 2235 xfs_extlen_t delta = 0;
6cc87645 2236
3fd129b6
DW
2237 /*
2238 * If the AGFL needs a recharge, we'll have to subtract that from the
2239 * longest extent.
2240 */
6cc87645
DC
2241 if (need > pag->pagf_flcount)
2242 delta = need - pag->pagf_flcount;
2243
3fd129b6
DW
2244 /*
2245 * If we cannot maintain others' reservations with space from the
2246 * not-longest freesp extents, we'll have to subtract /that/ from
2247 * the longest extent too.
2248 */
2249 if (pag->pagf_freeblks - pag->pagf_longest < reserved)
2250 delta += reserved - (pag->pagf_freeblks - pag->pagf_longest);
2251
2252 /*
2253 * If the longest extent is long enough to satisfy all the
2254 * reservations and AGFL rules in place, we can return this extent.
2255 */
6cc87645 2256 if (pag->pagf_longest > delta)
1c743574
DC
2257 return min_t(xfs_extlen_t, pag->pag_mount->m_ag_max_usable,
2258 pag->pagf_longest - delta);
3fd129b6
DW
2259
2260 /* Otherwise, let the caller try for 1 block if there's space. */
6cc87645
DC
2261 return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
2262}
2263
1cac233c
DW
2264/*
2265 * Compute the minimum length of the AGFL in the given AG. If @pag is NULL,
2266 * return the largest possible minimum length.
2267 */
496817b4
DC
2268unsigned int
2269xfs_alloc_min_freelist(
2270 struct xfs_mount *mp,
2271 struct xfs_perag *pag)
2272{
1cac233c
DW
2273 /* AG btrees have at least 1 level. */
2274 static const uint8_t fake_levels[XFS_BTNUM_AGF] = {1, 1, 1};
2275 const uint8_t *levels = pag ? pag->pagf_levels : fake_levels;
496817b4
DC
2276 unsigned int min_free;
2277
7cb3efb4 2278 ASSERT(mp->m_alloc_maxlevels > 0);
1cac233c 2279
496817b4 2280 /* space needed by-bno freespace btree */
1cac233c 2281 min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
7cb3efb4 2282 mp->m_alloc_maxlevels);
496817b4 2283 /* space needed by-size freespace btree */
1cac233c 2284 min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
7cb3efb4 2285 mp->m_alloc_maxlevels);
52548852 2286 /* space needed reverse mapping used space btree */
ebd9027d 2287 if (xfs_has_rmapbt(mp))
1cac233c
DW
2288 min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
2289 mp->m_rmap_maxlevels);
496817b4
DC
2290
2291 return min_free;
2292}
2293
72d55285
DC
2294/*
2295 * Check if the operation we are fixing up the freelist for should go ahead or
2296 * not. If we are freeing blocks, we always allow it, otherwise the allocation
2297 * is dependent on whether the size and shape of free space available will
2298 * permit the requested allocation to take place.
2299 */
2300static bool
2301xfs_alloc_space_available(
2302 struct xfs_alloc_arg *args,
2303 xfs_extlen_t min_free,
2304 int flags)
2305{
2306 struct xfs_perag *pag = args->pag;
12ef8301 2307 xfs_extlen_t alloc_len, longest;
3fd129b6 2308 xfs_extlen_t reservation; /* blocks that are still reserved */
72d55285 2309 int available;
1ca89fbc 2310 xfs_extlen_t agflcount;
72d55285
DC
2311
2312 if (flags & XFS_ALLOC_FLAG_FREEING)
2313 return true;
2314
3fd129b6
DW
2315 reservation = xfs_ag_resv_needed(pag, args->resv);
2316
72d55285 2317 /* do we have enough contiguous free space for the allocation? */
12ef8301 2318 alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
a1f69417 2319 longest = xfs_alloc_longest_free_extent(pag, min_free, reservation);
12ef8301 2320 if (longest < alloc_len)
72d55285
DC
2321 return false;
2322
1ca89fbc
BF
2323 /*
2324 * Do we have enough free space remaining for the allocation? Don't
2325 * account extra agfl blocks because we are about to defer free them,
2326 * making them unavailable until the current transaction commits.
2327 */
2328 agflcount = min_t(xfs_extlen_t, pag->pagf_flcount, min_free);
2329 available = (int)(pag->pagf_freeblks + agflcount -
54fee133 2330 reservation - min_free - args->minleft);
12ef8301 2331 if (available < (int)max(args->total, alloc_len))
72d55285
DC
2332 return false;
2333
54fee133
CH
2334 /*
2335 * Clamp maxlen to the amount of free space available for the actual
2336 * extent allocation.
2337 */
2338 if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) {
2339 args->maxlen = available;
2340 ASSERT(args->maxlen > 0);
2341 ASSERT(args->maxlen >= args->minlen);
2342 }
2343
72d55285
DC
2344 return true;
2345}
2346
4223f659
BF
2347int
2348xfs_free_agfl_block(
2349 struct xfs_trans *tp,
2350 xfs_agnumber_t agno,
2351 xfs_agblock_t agbno,
2352 struct xfs_buf *agbp,
2353 struct xfs_owner_info *oinfo)
2354{
2355 int error;
2356 struct xfs_buf *bp;
2357
2358 error = xfs_free_ag_extent(tp, agbp, agno, agbno, 1, oinfo,
2359 XFS_AG_RESV_AGFL);
2360 if (error)
2361 return error;
2362
ee647f85
DW
2363 error = xfs_trans_get_buf(tp, tp->t_mountp->m_ddev_targp,
2364 XFS_AGB_TO_DADDR(tp->t_mountp, agno, agbno),
2365 tp->t_mountp->m_bsize, 0, &bp);
2366 if (error)
2367 return error;
4223f659
BF
2368 xfs_trans_binval(tp, bp);
2369
2370 return 0;
2371}
2372
a27ba260
BF
2373/*
2374 * Check the agfl fields of the agf for inconsistency or corruption. The purpose
2375 * is to detect an agfl header padding mismatch between current and early v5
2376 * kernels. This problem manifests as a 1-slot size difference between the
2377 * on-disk flcount and the active [first, last] range of a wrapped agfl. This
2378 * may also catch variants of agfl count corruption unrelated to padding. Either
2379 * way, we'll reset the agfl and warn the user.
2380 *
2381 * Return true if a reset is required before the agfl can be used, false
2382 * otherwise.
2383 */
2384static bool
2385xfs_agfl_needs_reset(
2386 struct xfs_mount *mp,
2387 struct xfs_agf *agf)
2388{
2389 uint32_t f = be32_to_cpu(agf->agf_flfirst);
2390 uint32_t l = be32_to_cpu(agf->agf_fllast);
2391 uint32_t c = be32_to_cpu(agf->agf_flcount);
2392 int agfl_size = xfs_agfl_size(mp);
2393 int active;
2394
2395 /* no agfl header on v4 supers */
38c26bfd 2396 if (!xfs_has_crc(mp))
a27ba260
BF
2397 return false;
2398
2399 /*
2400 * The agf read verifier catches severe corruption of these fields.
2401 * Repeat some sanity checks to cover a packed -> unpacked mismatch if
2402 * the verifier allows it.
2403 */
2404 if (f >= agfl_size || l >= agfl_size)
2405 return true;
2406 if (c > agfl_size)
2407 return true;
2408
2409 /*
2410 * Check consistency between the on-disk count and the active range. An
2411 * agfl padding mismatch manifests as an inconsistent flcount.
2412 */
2413 if (c && l >= f)
2414 active = l - f + 1;
2415 else if (c)
2416 active = agfl_size - f + l + 1;
2417 else
2418 active = 0;
2419
2420 return active != c;
2421}
2422
2423/*
2424 * Reset the agfl to an empty state. Ignore/drop any existing blocks since the
2425 * agfl content cannot be trusted. Warn the user that a repair is required to
2426 * recover leaked blocks.
2427 *
2428 * The purpose of this mechanism is to handle filesystems affected by the agfl
2429 * header padding mismatch problem. A reset keeps the filesystem online with a
2430 * relatively minor free space accounting inconsistency rather than suffer the
2431 * inevitable crash from use of an invalid agfl block.
2432 */
2433static void
2434xfs_agfl_reset(
2435 struct xfs_trans *tp,
2436 struct xfs_buf *agbp,
2437 struct xfs_perag *pag)
2438{
2439 struct xfs_mount *mp = tp->t_mountp;
9798f615 2440 struct xfs_agf *agf = agbp->b_addr;
a27ba260
BF
2441
2442 ASSERT(pag->pagf_agflreset);
2443 trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_);
2444
2445 xfs_warn(mp,
2446 "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. "
2447 "Please unmount and run xfs_repair.",
2448 pag->pag_agno, pag->pagf_flcount);
2449
2450 agf->agf_flfirst = 0;
2451 agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1);
2452 agf->agf_flcount = 0;
2453 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST |
2454 XFS_AGF_FLCOUNT);
2455
2456 pag->pagf_flcount = 0;
2457 pag->pagf_agflreset = false;
2458}
2459
f8f2835a
BF
2460/*
2461 * Defer an AGFL block free. This is effectively equivalent to
c201d9ca 2462 * xfs_free_extent_later() with some special handling particular to AGFL blocks.
f8f2835a
BF
2463 *
2464 * Deferring AGFL frees helps prevent log reservation overruns due to too many
2465 * allocation operations in a transaction. AGFL frees are prone to this problem
2466 * because for one they are always freed one at a time. Further, an immediate
2467 * AGFL block free can cause a btree join and require another block free before
2468 * the real allocation can proceed. Deferring the free disconnects freeing up
2469 * the AGFL slot from freeing the block.
2470 */
2471STATIC void
2472xfs_defer_agfl_block(
0f37d178 2473 struct xfs_trans *tp,
f8f2835a
BF
2474 xfs_agnumber_t agno,
2475 xfs_fsblock_t agbno,
2476 struct xfs_owner_info *oinfo)
2477{
0f37d178 2478 struct xfs_mount *mp = tp->t_mountp;
f8f2835a
BF
2479 struct xfs_extent_free_item *new; /* new element */
2480
c201d9ca 2481 ASSERT(xfs_extfree_item_cache != NULL);
f8f2835a
BF
2482 ASSERT(oinfo != NULL);
2483
b3b5ff41 2484 new = kmem_cache_zalloc(xfs_extfree_item_cache,
3050bd0b 2485 GFP_KERNEL | __GFP_NOFAIL);
f8f2835a
BF
2486 new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno);
2487 new->xefi_blockcount = 1;
b3b5ff41 2488 new->xefi_owner = oinfo->oi_owner;
f8f2835a
BF
2489
2490 trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
2491
0f37d178 2492 xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &new->xefi_list);
f8f2835a
BF
2493}
2494
c201d9ca
DW
2495/*
2496 * Add the extent to the list of extents to be free at transaction end.
2497 * The list is maintained sorted (by block number).
2498 */
2499void
2500__xfs_free_extent_later(
2501 struct xfs_trans *tp,
2502 xfs_fsblock_t bno,
2503 xfs_filblks_t len,
2504 const struct xfs_owner_info *oinfo,
2505 bool skip_discard)
2506{
2507 struct xfs_extent_free_item *new; /* new element */
2508#ifdef DEBUG
2509 struct xfs_mount *mp = tp->t_mountp;
2510 xfs_agnumber_t agno;
2511 xfs_agblock_t agbno;
2512
2513 ASSERT(bno != NULLFSBLOCK);
2514 ASSERT(len > 0);
95f0b95e 2515 ASSERT(len <= XFS_MAX_BMBT_EXTLEN);
c201d9ca
DW
2516 ASSERT(!isnullstartblock(bno));
2517 agno = XFS_FSB_TO_AGNO(mp, bno);
2518 agbno = XFS_FSB_TO_AGBNO(mp, bno);
2519 ASSERT(agno < mp->m_sb.sb_agcount);
2520 ASSERT(agbno < mp->m_sb.sb_agblocks);
2521 ASSERT(len < mp->m_sb.sb_agblocks);
2522 ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
2523#endif
2524 ASSERT(xfs_extfree_item_cache != NULL);
2525
b3b5ff41 2526 new = kmem_cache_zalloc(xfs_extfree_item_cache,
c201d9ca
DW
2527 GFP_KERNEL | __GFP_NOFAIL);
2528 new->xefi_startblock = bno;
2529 new->xefi_blockcount = (xfs_extlen_t)len;
b3b5ff41
DW
2530 if (skip_discard)
2531 new->xefi_flags |= XFS_EFI_SKIP_DISCARD;
2532 if (oinfo) {
2533 ASSERT(oinfo->oi_offset == 0);
2534
2535 if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK)
2536 new->xefi_flags |= XFS_EFI_ATTR_FORK;
2537 if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK)
2538 new->xefi_flags |= XFS_EFI_BMBT_BLOCK;
2539 new->xefi_owner = oinfo->oi_owner;
2540 } else {
2541 new->xefi_owner = XFS_RMAP_OWN_NULL;
2542 }
c201d9ca
DW
2543 trace_xfs_bmap_free_defer(tp->t_mountp,
2544 XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0,
2545 XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len);
2546 xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list);
2547}
2548
30151967
CB
2549#ifdef DEBUG
2550/*
2551 * Check if an AGF has a free extent record whose length is equal to
2552 * args->minlen.
2553 */
2554STATIC int
2555xfs_exact_minlen_extent_available(
2556 struct xfs_alloc_arg *args,
2557 struct xfs_buf *agbp,
2558 int *stat)
2559{
2560 struct xfs_btree_cur *cnt_cur;
2561 xfs_agblock_t fbno;
2562 xfs_extlen_t flen;
2563 int error = 0;
2564
2565 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, agbp,
289d38d2 2566 args->pag, XFS_BTNUM_CNT);
30151967
CB
2567 error = xfs_alloc_lookup_ge(cnt_cur, 0, args->minlen, stat);
2568 if (error)
2569 goto out;
2570
2571 if (*stat == 0) {
2572 error = -EFSCORRUPTED;
2573 goto out;
2574 }
2575
2576 error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, stat);
2577 if (error)
2578 goto out;
2579
2580 if (*stat == 1 && flen != args->minlen)
2581 *stat = 0;
2582
2583out:
2584 xfs_btree_del_cursor(cnt_cur, error);
2585
2586 return error;
2587}
2588#endif
2589
1da177e4
LT
2590/*
2591 * Decide whether to use this allocation group for this allocation.
2592 * If so, fix up the btree freelist's size.
2593 */
2e9101da 2594int /* error */
1da177e4 2595xfs_alloc_fix_freelist(
396503fc
DC
2596 struct xfs_alloc_arg *args, /* allocation argument structure */
2597 int flags) /* XFS_ALLOC_FLAG_... */
1da177e4 2598{
396503fc
DC
2599 struct xfs_mount *mp = args->mp;
2600 struct xfs_perag *pag = args->pag;
2601 struct xfs_trans *tp = args->tp;
2602 struct xfs_buf *agbp = NULL;
2603 struct xfs_buf *agflbp = NULL;
2604 struct xfs_alloc_arg targs; /* local allocation arguments */
2605 xfs_agblock_t bno; /* freelist block */
2606 xfs_extlen_t need; /* total blocks needed in freelist */
c184f855 2607 int error = 0;
396503fc 2608
362f5e74
BF
2609 /* deferred ops (AGFL block frees) require permanent transactions */
2610 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
2611
1da177e4 2612 if (!pag->pagf_init) {
08d3e84f 2613 error = xfs_alloc_read_agf(pag, tp, flags, &agbp);
f48e2df8
DW
2614 if (error) {
2615 /* Couldn't lock the AGF so skip this AG. */
2616 if (error == -EAGAIN)
2617 error = 0;
396503fc 2618 goto out_no_agbp;
1da177e4 2619 }
396503fc 2620 }
1da177e4 2621
0e1edbd9 2622 /*
396503fc
DC
2623 * If this is a metadata preferred pag and we are user data then try
2624 * somewhere else if we are not being asked to try harder at this
2625 * point
1da177e4 2626 */
c34d570d 2627 if (pag->pagf_metadata && (args->datatype & XFS_ALLOC_USERDATA) &&
0e1edbd9
NS
2628 (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
2629 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
396503fc 2630 goto out_agbp_relse;
1da177e4
LT
2631 }
2632
496817b4 2633 need = xfs_alloc_min_freelist(mp, pag);
54fee133
CH
2634 if (!xfs_alloc_space_available(args, need, flags |
2635 XFS_ALLOC_FLAG_CHECK))
396503fc 2636 goto out_agbp_relse;
0e1edbd9 2637
1da177e4
LT
2638 /*
2639 * Get the a.g. freespace buffer.
2640 * Can fail if we're not blocking on locks, and it's held.
2641 */
396503fc 2642 if (!agbp) {
08d3e84f 2643 error = xfs_alloc_read_agf(pag, tp, flags, &agbp);
f48e2df8
DW
2644 if (error) {
2645 /* Couldn't lock the AGF so skip this AG. */
2646 if (error == -EAGAIN)
2647 error = 0;
396503fc 2648 goto out_no_agbp;
0e1edbd9 2649 }
1da177e4 2650 }
50adbcb4 2651
a27ba260
BF
2652 /* reset a padding mismatched agfl before final free space check */
2653 if (pag->pagf_agflreset)
2654 xfs_agfl_reset(tp, agbp, pag);
2655
50adbcb4 2656 /* If there isn't enough total space or single-extent, reject it. */
496817b4 2657 need = xfs_alloc_min_freelist(mp, pag);
396503fc
DC
2658 if (!xfs_alloc_space_available(args, need, flags))
2659 goto out_agbp_relse;
72d55285 2660
30151967
CB
2661#ifdef DEBUG
2662 if (args->alloc_minlen_only) {
2663 int stat;
2664
2665 error = xfs_exact_minlen_extent_available(args, agbp, &stat);
2666 if (error || !stat)
2667 goto out_agbp_relse;
2668 }
2669#endif
1da177e4
LT
2670 /*
2671 * Make the freelist shorter if it's too long.
50adbcb4 2672 *
396503fc
DC
2673 * Note that from this point onwards, we will always release the agf and
2674 * agfl buffers on error. This handles the case where we error out and
2675 * the buffers are clean or may not have been joined to the transaction
2676 * and hence need to be released manually. If they have been joined to
2677 * the transaction, then xfs_trans_brelse() will handle them
2678 * appropriately based on the recursion count and dirty state of the
2679 * buffer.
2680 *
50adbcb4
DC
2681 * XXX (dgc): When we have lots of free space, does this buy us
2682 * anything other than extra overhead when we need to put more blocks
2683 * back on the free list? Maybe we should only do this when space is
2684 * getting low or the AGFL is more than half full?
04f13060
DW
2685 *
2686 * The NOSHRINK flag prevents the AGFL from being shrunk if it's too
2687 * big; the NORMAP flag prevents AGFL expand/shrink operations from
2688 * updating the rmapbt. Both flags are used in xfs_repair while we're
2689 * rebuilding the rmapbt, and neither are used by the kernel. They're
2690 * both required to ensure that rmaps are correctly recorded for the
2691 * regenerated AGFL, bnobt, and cntbt. See repair/phase5.c and
2692 * repair/rmap.c in xfsprogs for details.
1da177e4 2693 */
04f13060 2694 memset(&targs, 0, sizeof(targs));
7280feda 2695 /* struct copy below */
04f13060 2696 if (flags & XFS_ALLOC_FLAG_NORMAP)
7280feda 2697 targs.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
04f13060 2698 else
7280feda 2699 targs.oinfo = XFS_RMAP_OINFO_AG;
04f13060 2700 while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) {
49f0d84e 2701 error = xfs_alloc_get_freelist(pag, tp, agbp, &bno, 0);
92821e2b 2702 if (error)
396503fc 2703 goto out_agbp_relse;
4223f659 2704
c03edc9e
BF
2705 /* defer agfl frees */
2706 xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
1da177e4 2707 }
50adbcb4 2708
1da177e4
LT
2709 targs.tp = tp;
2710 targs.mp = mp;
2711 targs.agbp = agbp;
2712 targs.agno = args->agno;
3fd129b6 2713 targs.alignment = targs.minlen = targs.prod = 1;
1da177e4
LT
2714 targs.type = XFS_ALLOCTYPE_THIS_AG;
2715 targs.pag = pag;
cec7bb7d 2716 error = xfs_alloc_read_agfl(pag, tp, &agflbp);
50adbcb4 2717 if (error)
396503fc 2718 goto out_agbp_relse;
50adbcb4
DC
2719
2720 /* Make the freelist longer if it's too short. */
2721 while (pag->pagf_flcount < need) {
1da177e4 2722 targs.agbno = 0;
50adbcb4 2723 targs.maxlen = need - pag->pagf_flcount;
0ab32086 2724 targs.resv = XFS_AG_RESV_AGFL;
50adbcb4
DC
2725
2726 /* Allocate as many blocks as possible at once. */
2727 error = xfs_alloc_ag_vextent(&targs);
396503fc
DC
2728 if (error)
2729 goto out_agflbp_relse;
2730
1da177e4
LT
2731 /*
2732 * Stop if we run out. Won't happen if callers are obeying
2733 * the restrictions correctly. Can happen for free calls
2734 * on a completely full ag.
2735 */
d210a28c 2736 if (targs.agbno == NULLAGBLOCK) {
0e1edbd9
NS
2737 if (flags & XFS_ALLOC_FLAG_FREEING)
2738 break;
396503fc 2739 goto out_agflbp_relse;
d210a28c 2740 }
1da177e4
LT
2741 /*
2742 * Put each allocated block on the list.
2743 */
2744 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
8c392eb2 2745 error = xfs_alloc_put_freelist(pag, tp, agbp,
92821e2b
DC
2746 agflbp, bno, 0);
2747 if (error)
396503fc 2748 goto out_agflbp_relse;
1da177e4
LT
2749 }
2750 }
e63a3690 2751 xfs_trans_brelse(tp, agflbp);
1da177e4
LT
2752 args->agbp = agbp;
2753 return 0;
396503fc
DC
2754
2755out_agflbp_relse:
2756 xfs_trans_brelse(tp, agflbp);
2757out_agbp_relse:
2758 if (agbp)
2759 xfs_trans_brelse(tp, agbp);
2760out_no_agbp:
2761 args->agbp = NULL;
2762 return error;
1da177e4
LT
2763}
2764
2765/*
2766 * Get a block from the freelist.
2767 * Returns with the buffer for the block gotten.
2768 */
50920116 2769int
1da177e4 2770xfs_alloc_get_freelist(
49f0d84e 2771 struct xfs_perag *pag,
50920116
DC
2772 struct xfs_trans *tp,
2773 struct xfs_buf *agbp,
2774 xfs_agblock_t *bnop,
2775 int btreeblk)
1da177e4 2776{
50920116
DC
2777 struct xfs_agf *agf = agbp->b_addr;
2778 struct xfs_buf *agflbp;
2779 xfs_agblock_t bno;
2780 __be32 *agfl_bno;
2781 int error;
f53dde11 2782 uint32_t logflags;
50920116 2783 struct xfs_mount *mp = tp->t_mountp;
1da177e4 2784
1da177e4
LT
2785 /*
2786 * Freelist is empty, give up.
2787 */
2788 if (!agf->agf_flcount) {
2789 *bnop = NULLAGBLOCK;
2790 return 0;
2791 }
2792 /*
2793 * Read the array of free blocks.
2794 */
cec7bb7d 2795 error = xfs_alloc_read_agfl(pag, tp, &agflbp);
77c95bba 2796 if (error)
1da177e4 2797 return error;
77c95bba
CH
2798
2799
1da177e4
LT
2800 /*
2801 * Get the block number and update the data structures.
2802 */
183606d8 2803 agfl_bno = xfs_buf_to_agfl_bno(agflbp);
77c95bba 2804 bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
413d57c9 2805 be32_add_cpu(&agf->agf_flfirst, 1);
1da177e4 2806 xfs_trans_brelse(tp, agflbp);
a78ee256 2807 if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
1da177e4 2808 agf->agf_flfirst = 0;
a862e0fd 2809
a27ba260 2810 ASSERT(!pag->pagf_agflreset);
413d57c9 2811 be32_add_cpu(&agf->agf_flcount, -1);
1da177e4 2812 pag->pagf_flcount--;
92821e2b
DC
2813
2814 logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2815 if (btreeblk) {
413d57c9 2816 be32_add_cpu(&agf->agf_btreeblks, 1);
92821e2b
DC
2817 pag->pagf_btreeblks++;
2818 logflags |= XFS_AGF_BTREEBLKS;
2819 }
2820
92821e2b 2821 xfs_alloc_log_agf(tp, agbp, logflags);
1da177e4
LT
2822 *bnop = bno;
2823
1da177e4
LT
2824 return 0;
2825}
2826
2827/*
2828 * Log the given fields from the agf structure.
2829 */
2830void
2831xfs_alloc_log_agf(
f53dde11
DC
2832 struct xfs_trans *tp,
2833 struct xfs_buf *bp,
2834 uint32_t fields)
1da177e4
LT
2835{
2836 int first; /* first byte offset */
2837 int last; /* last byte offset */
2838 static const short offsets[] = {
2839 offsetof(xfs_agf_t, agf_magicnum),
2840 offsetof(xfs_agf_t, agf_versionnum),
2841 offsetof(xfs_agf_t, agf_seqno),
2842 offsetof(xfs_agf_t, agf_length),
2843 offsetof(xfs_agf_t, agf_roots[0]),
2844 offsetof(xfs_agf_t, agf_levels[0]),
2845 offsetof(xfs_agf_t, agf_flfirst),
2846 offsetof(xfs_agf_t, agf_fllast),
2847 offsetof(xfs_agf_t, agf_flcount),
2848 offsetof(xfs_agf_t, agf_freeblks),
2849 offsetof(xfs_agf_t, agf_longest),
92821e2b 2850 offsetof(xfs_agf_t, agf_btreeblks),
4e0e6040 2851 offsetof(xfs_agf_t, agf_uuid),
f32866fd 2852 offsetof(xfs_agf_t, agf_rmap_blocks),
bdf28630
DW
2853 offsetof(xfs_agf_t, agf_refcount_blocks),
2854 offsetof(xfs_agf_t, agf_refcount_root),
2855 offsetof(xfs_agf_t, agf_refcount_level),
da1f039d
DW
2856 /* needed so that we don't log the whole rest of the structure: */
2857 offsetof(xfs_agf_t, agf_spare64),
1da177e4
LT
2858 sizeof(xfs_agf_t)
2859 };
2860
9798f615 2861 trace_xfs_agf(tp->t_mountp, bp->b_addr, fields, _RET_IP_);
0b1b213f 2862
61fe135c 2863 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
4e0e6040 2864
1da177e4
LT
2865 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2866 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2867}
2868
1da177e4
LT
2869/*
2870 * Put the block on the freelist for the allocation group.
2871 */
50920116 2872int
1da177e4 2873xfs_alloc_put_freelist(
8c392eb2 2874 struct xfs_perag *pag,
50920116
DC
2875 struct xfs_trans *tp,
2876 struct xfs_buf *agbp,
2877 struct xfs_buf *agflbp,
2878 xfs_agblock_t bno,
2879 int btreeblk)
1da177e4 2880{
9798f615
CH
2881 struct xfs_mount *mp = tp->t_mountp;
2882 struct xfs_agf *agf = agbp->b_addr;
50920116 2883 __be32 *blockp;
1da177e4 2884 int error;
f53dde11 2885 uint32_t logflags;
77c95bba
CH
2886 __be32 *agfl_bno;
2887 int startoff;
1da177e4 2888
cec7bb7d
DC
2889 if (!agflbp) {
2890 error = xfs_alloc_read_agfl(pag, tp, &agflbp);
2891 if (error)
2892 return error;
2893 }
2894
413d57c9 2895 be32_add_cpu(&agf->agf_fllast, 1);
a78ee256 2896 if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp))
1da177e4 2897 agf->agf_fllast = 0;
a862e0fd 2898
a27ba260 2899 ASSERT(!pag->pagf_agflreset);
413d57c9 2900 be32_add_cpu(&agf->agf_flcount, 1);
1da177e4 2901 pag->pagf_flcount++;
92821e2b
DC
2902
2903 logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2904 if (btreeblk) {
413d57c9 2905 be32_add_cpu(&agf->agf_btreeblks, -1);
92821e2b
DC
2906 pag->pagf_btreeblks--;
2907 logflags |= XFS_AGF_BTREEBLKS;
2908 }
2909
92821e2b
DC
2910 xfs_alloc_log_agf(tp, agbp, logflags);
2911
a78ee256 2912 ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp));
77c95bba 2913
183606d8 2914 agfl_bno = xfs_buf_to_agfl_bno(agflbp);
77c95bba 2915 blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
e2101005 2916 *blockp = cpu_to_be32(bno);
77c95bba
CH
2917 startoff = (char *)blockp - (char *)agflbp->b_addr;
2918
92821e2b 2919 xfs_alloc_log_agf(tp, agbp, logflags);
77c95bba 2920
61fe135c 2921 xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
77c95bba
CH
2922 xfs_trans_log_buf(tp, agflbp, startoff,
2923 startoff + sizeof(xfs_agblock_t) - 1);
1da177e4
LT
2924 return 0;
2925}
2926
a6a781a5 2927static xfs_failaddr_t
612cfbfe 2928xfs_agf_verify(
b5572597
DW
2929 struct xfs_buf *bp)
2930{
dbd329f1 2931 struct xfs_mount *mp = bp->b_mount;
9798f615 2932 struct xfs_agf *agf = bp->b_addr;
5d5f527d 2933
38c26bfd 2934 if (xfs_has_crc(mp)) {
a45086e2 2935 if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
a6a781a5 2936 return __this_address;
9798f615 2937 if (!xfs_log_check_lsn(mp, be64_to_cpu(agf->agf_lsn)))
a6a781a5 2938 return __this_address;
a45086e2 2939 }
5d5f527d 2940
39708c20
BF
2941 if (!xfs_verify_magic(bp, agf->agf_magicnum))
2942 return __this_address;
2943
2944 if (!(XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
4e0e6040 2945 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
a78ee256
DC
2946 be32_to_cpu(agf->agf_flfirst) < xfs_agfl_size(mp) &&
2947 be32_to_cpu(agf->agf_fllast) < xfs_agfl_size(mp) &&
2948 be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp)))
a6a781a5 2949 return __this_address;
5d5f527d 2950
d0c7feaf
ZB
2951 if (be32_to_cpu(agf->agf_length) > mp->m_sb.sb_dblocks)
2952 return __this_address;
2953
2954 if (be32_to_cpu(agf->agf_freeblks) < be32_to_cpu(agf->agf_longest) ||
2955 be32_to_cpu(agf->agf_freeblks) > be32_to_cpu(agf->agf_length))
2956 return __this_address;
2957
d2a047f3
DW
2958 if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
2959 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
7cb3efb4
DW
2960 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) >
2961 mp->m_alloc_maxlevels ||
2962 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) >
2963 mp->m_alloc_maxlevels)
a6a781a5 2964 return __this_address;
e1b05723 2965
38c26bfd 2966 if (xfs_has_rmapbt(mp) &&
d2a047f3 2967 (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
7cb3efb4
DW
2968 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) >
2969 mp->m_rmap_maxlevels))
a6a781a5 2970 return __this_address;
b8704944 2971
ebd9027d 2972 if (xfs_has_rmapbt(mp) &&
d0c7feaf
ZB
2973 be32_to_cpu(agf->agf_rmap_blocks) > be32_to_cpu(agf->agf_length))
2974 return __this_address;
2975
5d5f527d
DC
2976 /*
2977 * during growfs operations, the perag is not fully initialised,
2978 * so we can't use it for any useful checking. growfs ensures we can't
2979 * use it by using uncached buffers that don't have the perag attached
2980 * so we can detect and avoid this problem.
2981 */
4e0e6040 2982 if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
a6a781a5 2983 return __this_address;
5d5f527d 2984
ebd9027d 2985 if (xfs_has_lazysbcount(mp) &&
4e0e6040 2986 be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
a6a781a5 2987 return __this_address;
4e0e6040 2988
ebd9027d 2989 if (xfs_has_reflink(mp) &&
d0c7feaf
ZB
2990 be32_to_cpu(agf->agf_refcount_blocks) >
2991 be32_to_cpu(agf->agf_length))
2992 return __this_address;
2993
ebd9027d 2994 if (xfs_has_reflink(mp) &&
d2a047f3 2995 (be32_to_cpu(agf->agf_refcount_level) < 1 ||
973975b7 2996 be32_to_cpu(agf->agf_refcount_level) > mp->m_refc_maxlevels))
a6a781a5 2997 return __this_address;
46eeb521 2998
a6a781a5 2999 return NULL;
5d5f527d 3000
612cfbfe
DC
3001}
3002
1813dd64
DC
3003static void
3004xfs_agf_read_verify(
612cfbfe
DC
3005 struct xfs_buf *bp)
3006{
dbd329f1 3007 struct xfs_mount *mp = bp->b_mount;
bc1a09b8 3008 xfs_failaddr_t fa;
4e0e6040 3009
38c26bfd 3010 if (xfs_has_crc(mp) &&
ce5028cf 3011 !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
bc1a09b8
DW
3012 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
3013 else {
b5572597 3014 fa = xfs_agf_verify(bp);
bc1a09b8
DW
3015 if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF))
3016 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
3017 }
612cfbfe 3018}
5d5f527d 3019
b0f539de 3020static void
1813dd64 3021xfs_agf_write_verify(
612cfbfe
DC
3022 struct xfs_buf *bp)
3023{
dbd329f1 3024 struct xfs_mount *mp = bp->b_mount;
fb1755a6 3025 struct xfs_buf_log_item *bip = bp->b_log_item;
9798f615 3026 struct xfs_agf *agf = bp->b_addr;
bc1a09b8 3027 xfs_failaddr_t fa;
4e0e6040 3028
b5572597 3029 fa = xfs_agf_verify(bp);
bc1a09b8
DW
3030 if (fa) {
3031 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
4e0e6040
DC
3032 return;
3033 }
3034
38c26bfd 3035 if (!xfs_has_crc(mp))
4e0e6040
DC
3036 return;
3037
3038 if (bip)
9798f615 3039 agf->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
4e0e6040 3040
f1dbcd7e 3041 xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
5d5f527d
DC
3042}
3043
1813dd64 3044const struct xfs_buf_ops xfs_agf_buf_ops = {
233135b7 3045 .name = "xfs_agf",
39708c20 3046 .magic = { cpu_to_be32(XFS_AGF_MAGIC), cpu_to_be32(XFS_AGF_MAGIC) },
1813dd64
DC
3047 .verify_read = xfs_agf_read_verify,
3048 .verify_write = xfs_agf_write_verify,
b5572597 3049 .verify_struct = xfs_agf_verify,
1813dd64
DC
3050};
3051
1da177e4
LT
3052/*
3053 * Read in the allocation group header (free/alloc section).
3054 */
fa044ae7 3055int
4805621a 3056xfs_read_agf(
fa044ae7
DC
3057 struct xfs_perag *pag,
3058 struct xfs_trans *tp,
3059 int flags,
3060 struct xfs_buf **agfbpp)
1da177e4 3061{
fa044ae7
DC
3062 struct xfs_mount *mp = pag->pag_mount;
3063 int error;
1da177e4 3064
fa044ae7 3065 trace_xfs_read_agf(pag->pag_mount, pag->pag_agno);
d123031a 3066
4ed8e27b 3067 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
fa044ae7
DC
3068 XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGF_DADDR(mp)),
3069 XFS_FSS_TO_BB(mp, 1), flags, agfbpp, &xfs_agf_buf_ops);
1da177e4
LT
3070 if (error)
3071 return error;
4805621a 3072
fa044ae7 3073 xfs_buf_set_ref(*agfbpp, XFS_AGF_REF);
4805621a
CH
3074 return 0;
3075}
3076
3077/*
76b47e52
DC
3078 * Read in the allocation group header (free/alloc section) and initialise the
3079 * perag structure if necessary. If the caller provides @agfbpp, then return the
3080 * locked buffer to the caller, otherwise free it.
4805621a 3081 */
08d3e84f 3082int
4805621a 3083xfs_alloc_read_agf(
08d3e84f
DC
3084 struct xfs_perag *pag,
3085 struct xfs_trans *tp,
3086 int flags,
76b47e52 3087 struct xfs_buf **agfbpp)
4805621a 3088{
76b47e52 3089 struct xfs_buf *agfbp;
08d3e84f 3090 struct xfs_agf *agf;
4805621a 3091 int error;
16eaab83 3092 int allocbt_blks;
4805621a 3093
08d3e84f 3094 trace_xfs_alloc_read_agf(pag->pag_mount, pag->pag_agno);
4805621a 3095
f48e2df8
DW
3096 /* We don't support trylock when freeing. */
3097 ASSERT((flags & (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK)) !=
3098 (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK));
fa044ae7 3099 error = xfs_read_agf(pag, tp,
0cadda1c 3100 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
76b47e52 3101 &agfbp);
4805621a
CH
3102 if (error)
3103 return error;
4805621a 3104
76b47e52 3105 agf = agfbp->b_addr;
1da177e4 3106 if (!pag->pagf_init) {
16259e7d 3107 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
92821e2b 3108 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
16259e7d
CH
3109 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
3110 pag->pagf_longest = be32_to_cpu(agf->agf_longest);
1da177e4 3111 pag->pagf_levels[XFS_BTNUM_BNOi] =
16259e7d 3112 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
1da177e4 3113 pag->pagf_levels[XFS_BTNUM_CNTi] =
16259e7d 3114 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
b8704944
DW
3115 pag->pagf_levels[XFS_BTNUM_RMAPi] =
3116 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
46eeb521 3117 pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
1da177e4 3118 pag->pagf_init = 1;
08d3e84f 3119 pag->pagf_agflreset = xfs_agfl_needs_reset(pag->pag_mount, agf);
16eaab83
BF
3120
3121 /*
3122 * Update the in-core allocbt counter. Filter out the rmapbt
3123 * subset of the btreeblks counter because the rmapbt is managed
3124 * by perag reservation. Subtract one for the rmapbt root block
3125 * because the rmap counter includes it while the btreeblks
3126 * counter only tracks non-root blocks.
3127 */
3128 allocbt_blks = pag->pagf_btreeblks;
08d3e84f 3129 if (xfs_has_rmapbt(pag->pag_mount))
16eaab83
BF
3130 allocbt_blks -= be32_to_cpu(agf->agf_rmap_blocks) - 1;
3131 if (allocbt_blks > 0)
08d3e84f
DC
3132 atomic64_add(allocbt_blks,
3133 &pag->pag_mount->m_allocbt_blks);
1da177e4
LT
3134 }
3135#ifdef DEBUG
08d3e84f 3136 else if (!xfs_is_shutdown(pag->pag_mount)) {
16259e7d 3137 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
89b28393 3138 ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
16259e7d
CH
3139 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
3140 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
1da177e4 3141 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
16259e7d 3142 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
1da177e4 3143 ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
16259e7d 3144 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
1da177e4
LT
3145 }
3146#endif
76b47e52
DC
3147 if (agfbpp)
3148 *agfbpp = agfbp;
3149 else
3150 xfs_trans_brelse(tp, agfbp);
1da177e4
LT
3151 return 0;
3152}
3153
3154/*
3155 * Allocate an extent (variable-size).
3156 * Depending on the allocation type, we either look in a single allocation
3157 * group or loop over the allocation groups to find the result.
3158 */
3159int /* error */
e04426b9 3160xfs_alloc_vextent(
64396ff2 3161 struct xfs_alloc_arg *args) /* allocation argument structure */
1da177e4 3162{
64396ff2
BF
3163 xfs_agblock_t agsize; /* allocation group size */
3164 int error;
3165 int flags; /* XFS_ALLOC_FLAG_... locking flags */
3166 struct xfs_mount *mp; /* mount structure pointer */
3167 xfs_agnumber_t sagno; /* starting allocation group number */
3168 xfs_alloctype_t type; /* input allocation type */
3169 int bump_rotor = 0;
3170 xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */
1da177e4
LT
3171
3172 mp = args->mp;
3173 type = args->otype = args->type;
3174 args->agbno = NULLAGBLOCK;
3175 /*
3176 * Just fix this up, for the case where the last a.g. is shorter
3177 * (or there's only one a.g.) and the caller couldn't easily figure
3178 * that out (xfs_bmap_alloc).
3179 */
3180 agsize = mp->m_sb.sb_agblocks;
3181 if (args->maxlen > agsize)
3182 args->maxlen = agsize;
3183 if (args->alignment == 0)
3184 args->alignment = 1;
3185 ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
3186 ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
3187 ASSERT(args->minlen <= args->maxlen);
3188 ASSERT(args->minlen <= agsize);
3189 ASSERT(args->mod < args->prod);
3190 if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
3191 XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
3192 args->minlen > args->maxlen || args->minlen > agsize ||
3193 args->mod >= args->prod) {
3194 args->fsbno = NULLFSBLOCK;
0b1b213f 3195 trace_xfs_alloc_vextent_badargs(args);
1da177e4
LT
3196 return 0;
3197 }
1da177e4
LT
3198
3199 switch (type) {
3200 case XFS_ALLOCTYPE_THIS_AG:
3201 case XFS_ALLOCTYPE_NEAR_BNO:
3202 case XFS_ALLOCTYPE_THIS_BNO:
3203 /*
3204 * These three force us into a single a.g.
3205 */
3206 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
a862e0fd 3207 args->pag = xfs_perag_get(mp, args->agno);
1da177e4 3208 error = xfs_alloc_fix_freelist(args, 0);
1da177e4 3209 if (error) {
0b1b213f 3210 trace_xfs_alloc_vextent_nofix(args);
1da177e4
LT
3211 goto error0;
3212 }
3213 if (!args->agbp) {
0b1b213f 3214 trace_xfs_alloc_vextent_noagbp(args);
1da177e4
LT
3215 break;
3216 }
3217 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
3218 if ((error = xfs_alloc_ag_vextent(args)))
3219 goto error0;
1da177e4
LT
3220 break;
3221 case XFS_ALLOCTYPE_START_BNO:
3222 /*
3223 * Try near allocation first, then anywhere-in-ag after
3224 * the first a.g. fails.
3225 */
292378ed 3226 if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
2e973b2c 3227 xfs_is_inode32(mp)) {
1da177e4
LT
3228 args->fsbno = XFS_AGB_TO_FSB(mp,
3229 ((mp->m_agfrotor / rotorstep) %
3230 mp->m_sb.sb_agcount), 0);
3231 bump_rotor = 1;
3232 }
3233 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
3234 args->type = XFS_ALLOCTYPE_NEAR_BNO;
53004ee7 3235 fallthrough;
1da177e4
LT
3236 case XFS_ALLOCTYPE_FIRST_AG:
3237 /*
3238 * Rotate through the allocation groups looking for a winner.
3239 */
8d242e93 3240 if (type == XFS_ALLOCTYPE_FIRST_AG) {
1da177e4
LT
3241 /*
3242 * Start with allocation group given by bno.
3243 */
3244 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
3245 args->type = XFS_ALLOCTYPE_THIS_AG;
3246 sagno = 0;
3247 flags = 0;
3248 } else {
1da177e4
LT
3249 /*
3250 * Start with the given allocation group.
3251 */
3252 args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
3253 flags = XFS_ALLOC_FLAG_TRYLOCK;
3254 }
3255 /*
3256 * Loop over allocation groups twice; first time with
3257 * trylock set, second time without.
3258 */
1da177e4 3259 for (;;) {
a862e0fd 3260 args->pag = xfs_perag_get(mp, args->agno);
1da177e4 3261 error = xfs_alloc_fix_freelist(args, flags);
1da177e4 3262 if (error) {
0b1b213f 3263 trace_xfs_alloc_vextent_nofix(args);
1da177e4
LT
3264 goto error0;
3265 }
3266 /*
3267 * If we get a buffer back then the allocation will fly.
3268 */
3269 if (args->agbp) {
3270 if ((error = xfs_alloc_ag_vextent(args)))
3271 goto error0;
3272 break;
3273 }
0b1b213f
CH
3274
3275 trace_xfs_alloc_vextent_loopfailed(args);
3276
1da177e4
LT
3277 /*
3278 * Didn't work, figure out the next iteration.
3279 */
3280 if (args->agno == sagno &&
3281 type == XFS_ALLOCTYPE_START_BNO)
3282 args->type = XFS_ALLOCTYPE_THIS_AG;
d210a28c
YL
3283 /*
3284 * For the first allocation, we can try any AG to get
3285 * space. However, if we already have allocated a
3286 * block, we don't want to try AGs whose number is below
3287 * sagno. Otherwise, we may end up with out-of-order
3288 * locking of AGF, which might cause deadlock.
3289 */
3290 if (++(args->agno) == mp->m_sb.sb_agcount) {
64396ff2 3291 if (args->tp->t_firstblock != NULLFSBLOCK)
d210a28c
YL
3292 args->agno = sagno;
3293 else
3294 args->agno = 0;
3295 }
1da177e4
LT
3296 /*
3297 * Reached the starting a.g., must either be done
3298 * or switch to non-trylock mode.
3299 */
3300 if (args->agno == sagno) {
255c5162 3301 if (flags == 0) {
1da177e4 3302 args->agbno = NULLAGBLOCK;
0b1b213f 3303 trace_xfs_alloc_vextent_allfailed(args);
1da177e4
LT
3304 break;
3305 }
255c5162
CH
3306
3307 flags = 0;
3308 if (type == XFS_ALLOCTYPE_START_BNO) {
3309 args->agbno = XFS_FSB_TO_AGBNO(mp,
3310 args->fsbno);
3311 args->type = XFS_ALLOCTYPE_NEAR_BNO;
1da177e4
LT
3312 }
3313 }
a862e0fd 3314 xfs_perag_put(args->pag);
1da177e4 3315 }
8d242e93 3316 if (bump_rotor) {
1da177e4
LT
3317 if (args->agno == sagno)
3318 mp->m_agfrotor = (mp->m_agfrotor + 1) %
3319 (mp->m_sb.sb_agcount * rotorstep);
3320 else
3321 mp->m_agfrotor = (args->agno * rotorstep + 1) %
3322 (mp->m_sb.sb_agcount * rotorstep);
3323 }
3324 break;
3325 default:
3326 ASSERT(0);
3327 /* NOTREACHED */
3328 }
3329 if (args->agbno == NULLAGBLOCK)
3330 args->fsbno = NULLFSBLOCK;
3331 else {
3332 args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
3333#ifdef DEBUG
3334 ASSERT(args->len >= args->minlen);
3335 ASSERT(args->len <= args->maxlen);
3336 ASSERT(args->agbno % args->alignment == 0);
3337 XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
3338 args->len);
3339#endif
3fbbbea3 3340
1da177e4 3341 }
a862e0fd 3342 xfs_perag_put(args->pag);
1da177e4
LT
3343 return 0;
3344error0:
a862e0fd 3345 xfs_perag_put(args->pag);
1da177e4
LT
3346 return error;
3347}
3348
4d89e20b
DC
3349/* Ensure that the freelist is at full capacity. */
3350int
3351xfs_free_extent_fix_freelist(
3352 struct xfs_trans *tp,
45d06621 3353 struct xfs_perag *pag,
4d89e20b 3354 struct xfs_buf **agbp)
1da177e4 3355{
4d89e20b
DC
3356 struct xfs_alloc_arg args;
3357 int error;
1da177e4 3358
4d89e20b 3359 memset(&args, 0, sizeof(struct xfs_alloc_arg));
1da177e4
LT
3360 args.tp = tp;
3361 args.mp = tp->t_mountp;
45d06621
DC
3362 args.agno = pag->pag_agno;
3363 args.pag = pag;
be65b18a
DC
3364
3365 /*
3366 * validate that the block number is legal - the enables us to detect
3367 * and handle a silent filesystem corruption rather than crashing.
3368 */
be65b18a 3369 if (args.agno >= args.mp->m_sb.sb_agcount)
2451337d 3370 return -EFSCORRUPTED;
be65b18a 3371
be65b18a
DC
3372 error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
3373 if (error)
45d06621 3374 return error;
4d89e20b
DC
3375
3376 *agbp = args.agbp;
45d06621 3377 return 0;
4d89e20b
DC
3378}
3379
3380/*
3381 * Free an extent.
3382 * Just break up the extent address and hand off to xfs_free_ag_extent
3383 * after fixing up the freelist.
3384 */
66e3237e 3385int
fcb762f5 3386__xfs_free_extent(
66e3237e
DW
3387 struct xfs_trans *tp,
3388 xfs_fsblock_t bno,
3389 xfs_extlen_t len,
3390 const struct xfs_owner_info *oinfo,
3391 enum xfs_ag_resv_type type,
3392 bool skip_discard)
4d89e20b 3393{
66e3237e
DW
3394 struct xfs_mount *mp = tp->t_mountp;
3395 struct xfs_buf *agbp;
3396 xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, bno);
3397 xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, bno);
9798f615 3398 struct xfs_agf *agf;
66e3237e
DW
3399 int error;
3400 unsigned int busy_flags = 0;
45d06621 3401 struct xfs_perag *pag;
4d89e20b
DC
3402
3403 ASSERT(len != 0);
0ab32086 3404 ASSERT(type != XFS_AG_RESV_AGFL);
4d89e20b 3405
ba9e7802 3406 if (XFS_TEST_ERROR(false, mp,
9e24cfd0 3407 XFS_ERRTAG_FREE_EXTENT))
ba9e7802
DW
3408 return -EIO;
3409
45d06621
DC
3410 pag = xfs_perag_get(mp, agno);
3411 error = xfs_free_extent_fix_freelist(tp, pag, &agbp);
4d89e20b 3412 if (error)
45d06621 3413 goto err;
9798f615 3414 agf = agbp->b_addr;
4d89e20b 3415
f9e03706
DW
3416 if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) {
3417 error = -EFSCORRUPTED;
45d06621 3418 goto err_release;
f9e03706 3419 }
be65b18a
DC
3420
3421 /* validate the extent size is legal now we have the agf locked */
9798f615 3422 if (XFS_IS_CORRUPT(mp, agbno + len > be32_to_cpu(agf->agf_length))) {
f9e03706 3423 error = -EFSCORRUPTED;
45d06621 3424 goto err_release;
f9e03706 3425 }
be65b18a 3426
3fd129b6 3427 error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type);
4d89e20b 3428 if (error)
45d06621 3429 goto err_release;
4d89e20b 3430
fcb762f5
BF
3431 if (skip_discard)
3432 busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD;
45d06621
DC
3433 xfs_extent_busy_insert(tp, pag, agbno, len, busy_flags);
3434 xfs_perag_put(pag);
4d89e20b
DC
3435 return 0;
3436
45d06621 3437err_release:
4d89e20b 3438 xfs_trans_brelse(tp, agbp);
45d06621
DC
3439err:
3440 xfs_perag_put(pag);
1da177e4
LT
3441 return error;
3442}
2d520bfa
DW
3443
3444struct xfs_alloc_query_range_info {
3445 xfs_alloc_query_range_fn fn;
3446 void *priv;
3447};
3448
3449/* Format btree record and pass to our callback. */
3450STATIC int
3451xfs_alloc_query_range_helper(
3452 struct xfs_btree_cur *cur,
159eb69d 3453 const union xfs_btree_rec *rec,
2d520bfa
DW
3454 void *priv)
3455{
3456 struct xfs_alloc_query_range_info *query = priv;
3457 struct xfs_alloc_rec_incore irec;
3458
3459 irec.ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
3460 irec.ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
3461 return query->fn(cur, &irec, query->priv);
3462}
3463
3464/* Find all free space within a given range of blocks. */
3465int
3466xfs_alloc_query_range(
3467 struct xfs_btree_cur *cur,
04dcb474
DW
3468 const struct xfs_alloc_rec_incore *low_rec,
3469 const struct xfs_alloc_rec_incore *high_rec,
2d520bfa
DW
3470 xfs_alloc_query_range_fn fn,
3471 void *priv)
3472{
3473 union xfs_btree_irec low_brec;
3474 union xfs_btree_irec high_brec;
3475 struct xfs_alloc_query_range_info query;
3476
3477 ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
3478 low_brec.a = *low_rec;
3479 high_brec.a = *high_rec;
3480 query.priv = priv;
3481 query.fn = fn;
3482 return xfs_btree_query_range(cur, &low_brec, &high_brec,
3483 xfs_alloc_query_range_helper, &query);
3484}
e9a2599a
DW
3485
3486/* Find all free space records. */
3487int
3488xfs_alloc_query_all(
3489 struct xfs_btree_cur *cur,
3490 xfs_alloc_query_range_fn fn,
3491 void *priv)
3492{
3493 struct xfs_alloc_query_range_info query;
3494
3495 ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
3496 query.priv = priv;
3497 query.fn = fn;
3498 return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
3499}
21ec5416 3500
ce1d802e
DW
3501/* Is there a record covering a given extent? */
3502int
3503xfs_alloc_has_record(
3504 struct xfs_btree_cur *cur,
3505 xfs_agblock_t bno,
3506 xfs_extlen_t len,
3507 bool *exists)
3508{
3509 union xfs_btree_irec low;
3510 union xfs_btree_irec high;
3511
3512 memset(&low, 0, sizeof(low));
3513 low.a.ar_startblock = bno;
3514 memset(&high, 0xFF, sizeof(high));
3515 high.a.ar_startblock = bno + len - 1;
3516
3517 return xfs_btree_has_record(cur, &low, &high, exists);
3518}
9f3a080e
DW
3519
3520/*
3521 * Walk all the blocks in the AGFL. The @walk_fn can return any negative
5bb46e3e 3522 * error code or XFS_ITER_*.
9f3a080e
DW
3523 */
3524int
3525xfs_agfl_walk(
3526 struct xfs_mount *mp,
3527 struct xfs_agf *agf,
3528 struct xfs_buf *agflbp,
3529 xfs_agfl_walk_fn walk_fn,
3530 void *priv)
3531{
3532 __be32 *agfl_bno;
3533 unsigned int i;
3534 int error;
3535
183606d8 3536 agfl_bno = xfs_buf_to_agfl_bno(agflbp);
9f3a080e
DW
3537 i = be32_to_cpu(agf->agf_flfirst);
3538
3539 /* Nothing to walk in an empty AGFL. */
3540 if (agf->agf_flcount == cpu_to_be32(0))
3541 return 0;
3542
3543 /* Otherwise, walk from first to last, wrapping as needed. */
3544 for (;;) {
3545 error = walk_fn(mp, be32_to_cpu(agfl_bno[i]), priv);
3546 if (error)
3547 return error;
3548 if (i == be32_to_cpu(agf->agf_fllast))
3549 break;
3550 if (++i == xfs_agfl_size(mp))
3551 i = 0;
3552 }
3553
3554 return 0;
3555}
c201d9ca
DW
3556
3557int __init
3558xfs_extfree_intent_init_cache(void)
3559{
3560 xfs_extfree_item_cache = kmem_cache_create("xfs_extfree_intent",
3561 sizeof(struct xfs_extent_free_item),
3562 0, 0, NULL);
3563
3564 return xfs_extfree_item_cache != NULL ? 0 : -ENOMEM;
3565}
3566
3567void
3568xfs_extfree_intent_destroy_cache(void)
3569{
3570 kmem_cache_destroy(xfs_extfree_item_cache);
3571 xfs_extfree_item_cache = NULL;
3572}