]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - libxfs/xfs_alloc.c
xfs: hoist xfs_scrub_agfl_walk to libxfs as xfs_agfl_walk
[thirdparty/xfsprogs-dev.git] / libxfs / xfs_alloc.c
CommitLineData
2bd0ea18 1/*
5e656dbb 2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
da23017d 3 * All Rights Reserved.
5000d01d 4 *
da23017d
NS
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
2bd0ea18 7 * published by the Free Software Foundation.
5000d01d 8 *
da23017d
NS
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
5000d01d 13 *
da23017d
NS
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
2bd0ea18 17 */
9c799827 18#include "libxfs_priv.h"
b626fb59
DC
19#include "xfs_fs.h"
20#include "xfs_format.h"
21#include "xfs_log_format.h"
22#include "xfs_shared.h"
23#include "xfs_trans_resv.h"
24#include "xfs_bit.h"
25#include "xfs_sb.h"
26#include "xfs_mount.h"
f944d3d0 27#include "xfs_defer.h"
b626fb59
DC
28#include "xfs_inode.h"
29#include "xfs_btree.h"
631ac87a 30#include "xfs_rmap.h"
b626fb59
DC
31#include "xfs_alloc_btree.h"
32#include "xfs_alloc.h"
56d3fc2b 33#include "xfs_errortag.h"
b626fb59
DC
34#include "xfs_cksum.h"
35#include "xfs_trace.h"
36#include "xfs_trans.h"
cf8ce220 37#include "xfs_ag_resv.h"
d5c1b462
BF
38#include "xfs_bmap.h"
39
40extern kmem_zone_t *xfs_bmap_free_item_zone;
2bd0ea18 41
ff105f75
DC
42struct workqueue_struct *xfs_alloc_wq;
43
2bd0ea18 44#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
5e656dbb
BN
45
46#define XFSA_FIXUP_BNO_OK 1
47#define XFSA_FIXUP_CNT_OK 2
48
5e656dbb
BN
49STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
50STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
51STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
52STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
a2ceac1f 53 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
2bd0ea18 54
b8165508
DC
55/*
56 * Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in
57 * the beginning of the block for a proper header with the location information
58 * and CRC.
59 */
60unsigned int
61xfs_agfl_size(
62 struct xfs_mount *mp)
63{
64 unsigned int size = mp->m_sb.sb_sectsize;
65
66 if (xfs_sb_version_hascrc(&mp->m_sb))
67 size -= sizeof(struct xfs_agfl);
68
69 return size / sizeof(xfs_agblock_t);
70}
71
2a96beb9
DW
72unsigned int
73xfs_refc_block(
74 struct xfs_mount *mp)
75{
76 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
77 return XFS_RMAP_BLOCK(mp) + 1;
78 if (xfs_sb_version_hasfinobt(&mp->m_sb))
79 return XFS_FIBT_BLOCK(mp) + 1;
80 return XFS_IBT_BLOCK(mp) + 1;
81}
82
ef5340cd
DW
83xfs_extlen_t
84xfs_prealloc_blocks(
85 struct xfs_mount *mp)
86{
2a96beb9
DW
87 if (xfs_sb_version_hasreflink(&mp->m_sb))
88 return xfs_refc_block(mp) + 1;
ef5340cd
DW
89 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
90 return XFS_RMAP_BLOCK(mp) + 1;
91 if (xfs_sb_version_hasfinobt(&mp->m_sb))
92 return XFS_FIBT_BLOCK(mp) + 1;
93 return XFS_IBT_BLOCK(mp) + 1;
94}
95
b8a8d6e5
DW
96/*
97 * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
98 * AGF buffer (PV 947395), we place constraints on the relationship among
99 * actual allocations for data blocks, freelist blocks, and potential file data
100 * bmap btree blocks. However, these restrictions may result in no actual space
101 * allocated for a delayed extent, for example, a data block in a certain AG is
102 * allocated but there is no additional block for the additional bmap btree
103 * block due to a split of the bmap btree of the file. The result of this may
104 * lead to an infinite loop when the file gets flushed to disk and all delayed
105 * extents need to be actually allocated. To get around this, we explicitly set
106 * aside a few blocks which will not be reserved in delayed allocation.
107 *
cf8ce220
DW
108 * We need to reserve 4 fsbs _per AG_ for the freelist and 4 more to handle a
109 * potential split of the file's bmap btree.
b8a8d6e5
DW
110 */
111unsigned int
112xfs_alloc_set_aside(
113 struct xfs_mount *mp)
114{
8eeb15ea 115 return mp->m_sb.sb_agcount * (XFS_ALLOC_AGFL_RESERVE + 4);
b8a8d6e5
DW
116}
117
118/*
119 * When deciding how much space to allocate out of an AG, we limit the
120 * allocation maximum size to the size the AG. However, we cannot use all the
121 * blocks in the AG - some are permanently used by metadata. These
122 * blocks are generally:
123 * - the AG superblock, AGF, AGI and AGFL
124 * - the AGF (bno and cnt) and AGI btree root blocks, and optionally
125 * the AGI free inode and rmap btree root blocks.
126 * - blocks on the AGFL according to xfs_alloc_set_aside() limits
127 * - the rmapbt root block
128 *
129 * The AG headers are sector sized, so the amount of space they take up is
130 * dependent on filesystem geometry. The others are all single blocks.
131 */
132unsigned int
133xfs_alloc_ag_max_usable(
134 struct xfs_mount *mp)
135{
136 unsigned int blocks;
137
138 blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
139 blocks += XFS_ALLOC_AGFL_RESERVE;
140 blocks += 3; /* AGF, AGI btree root blocks */
141 if (xfs_sb_version_hasfinobt(&mp->m_sb))
142 blocks++; /* finobt root block */
143 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
144 blocks++; /* rmap root block */
868c70e3
DW
145 if (xfs_sb_version_hasreflink(&mp->m_sb))
146 blocks++; /* refcount root block */
b8a8d6e5
DW
147
148 return mp->m_sb.sb_agblocks - blocks;
149}
150
b194c7d8
BN
151/*
152 * Lookup the record equal to [bno, len] in the btree given by cur.
153 */
154STATIC int /* error */
155xfs_alloc_lookup_eq(
156 struct xfs_btree_cur *cur, /* btree cursor */
157 xfs_agblock_t bno, /* starting block of extent */
158 xfs_extlen_t len, /* length of extent */
159 int *stat) /* success/failure */
160{
161 cur->bc_rec.a.ar_startblock = bno;
162 cur->bc_rec.a.ar_blockcount = len;
163 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
164}
165
166/*
167 * Lookup the first record greater than or equal to [bno, len]
168 * in the btree given by cur.
169 */
a2ceac1f 170int /* error */
b194c7d8
BN
171xfs_alloc_lookup_ge(
172 struct xfs_btree_cur *cur, /* btree cursor */
173 xfs_agblock_t bno, /* starting block of extent */
174 xfs_extlen_t len, /* length of extent */
175 int *stat) /* success/failure */
176{
177 cur->bc_rec.a.ar_startblock = bno;
178 cur->bc_rec.a.ar_blockcount = len;
179 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
180}
181
182/*
183 * Lookup the first record less than or equal to [bno, len]
184 * in the btree given by cur.
185 */
1fe41a73 186int /* error */
b194c7d8
BN
187xfs_alloc_lookup_le(
188 struct xfs_btree_cur *cur, /* btree cursor */
189 xfs_agblock_t bno, /* starting block of extent */
190 xfs_extlen_t len, /* length of extent */
191 int *stat) /* success/failure */
192{
193 cur->bc_rec.a.ar_startblock = bno;
194 cur->bc_rec.a.ar_blockcount = len;
195 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
196}
197
198/*
199 * Update the record referred to by cur to the value given
200 * by [bno, len].
201 * This either works (return 0) or gets an EFSCORRUPTED error.
202 */
203STATIC int /* error */
204xfs_alloc_update(
205 struct xfs_btree_cur *cur, /* btree cursor */
206 xfs_agblock_t bno, /* starting block of extent */
207 xfs_extlen_t len) /* length of extent */
208{
209 union xfs_btree_rec rec;
210
211 rec.alloc.ar_startblock = cpu_to_be32(bno);
212 rec.alloc.ar_blockcount = cpu_to_be32(len);
213 return xfs_btree_update(cur, &rec);
214}
215
216/*
217 * Get the data from the pointed-to record.
218 */
a2ceac1f 219int /* error */
b194c7d8
BN
220xfs_alloc_get_rec(
221 struct xfs_btree_cur *cur, /* btree cursor */
222 xfs_agblock_t *bno, /* output: starting block of extent */
223 xfs_extlen_t *len, /* output: length of extent */
224 int *stat) /* output: success/failure */
225{
226 union xfs_btree_rec *rec;
227 int error;
228
229 error = xfs_btree_get_rec(cur, &rec, stat);
230 if (!error && *stat == 1) {
231 *bno = be32_to_cpu(rec->alloc.ar_startblock);
232 *len = be32_to_cpu(rec->alloc.ar_blockcount);
233 }
234 return error;
235}
236
2bd0ea18
NS
237/*
238 * Compute aligned version of the found extent.
239 * Takes alignment and min length into account.
240 */
cd80de04 241STATIC bool
2bd0ea18 242xfs_alloc_compute_aligned(
a2ceac1f 243 xfs_alloc_arg_t *args, /* allocation argument structure */
2bd0ea18
NS
244 xfs_agblock_t foundbno, /* starting block in found extent */
245 xfs_extlen_t foundlen, /* length in found extent */
2bd0ea18 246 xfs_agblock_t *resbno, /* result block number */
cd80de04
CH
247 xfs_extlen_t *reslen, /* result length */
248 unsigned *busy_gen)
2bd0ea18 249{
cd80de04
CH
250 xfs_agblock_t bno = foundbno;
251 xfs_extlen_t len = foundlen;
ff3263dd 252 xfs_extlen_t diff;
cd80de04 253 bool busy;
2bd0ea18 254
a2ceac1f 255 /* Trim busy sections out of found extent */
cd80de04 256 busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen);
a2ceac1f 257
ff3263dd
BF
258 /*
259 * If we have a largish extent that happens to start before min_agbno,
260 * see if we can shift it into range...
261 */
262 if (bno < args->min_agbno && bno + len > args->min_agbno) {
263 diff = args->min_agbno - bno;
264 if (len > diff) {
265 bno += diff;
266 len -= diff;
267 }
268 }
269
a2ceac1f
DC
270 if (args->alignment > 1 && len >= args->minlen) {
271 xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
ff3263dd
BF
272
273 diff = aligned_bno - bno;
a2ceac1f
DC
274
275 *resbno = aligned_bno;
276 *reslen = diff >= len ? 0 : len - diff;
2bd0ea18 277 } else {
a2ceac1f
DC
278 *resbno = bno;
279 *reslen = len;
2bd0ea18 280 }
cd80de04
CH
281
282 return busy;
2bd0ea18
NS
283}
284
285/*
286 * Compute best start block and diff for "near" allocations.
287 * freelen >= wantlen already checked by caller.
288 */
289STATIC xfs_extlen_t /* difference value (absolute) */
290xfs_alloc_compute_diff(
291 xfs_agblock_t wantbno, /* target starting block */
292 xfs_extlen_t wantlen, /* target length */
293 xfs_extlen_t alignment, /* target alignment */
1fccd5c8 294 int datatype, /* are we allocating data? */
2bd0ea18
NS
295 xfs_agblock_t freebno, /* freespace's starting block */
296 xfs_extlen_t freelen, /* freespace's length */
297 xfs_agblock_t *newbnop) /* result: best start block from free */
298{
299 xfs_agblock_t freeend; /* end of freespace extent */
300 xfs_agblock_t newbno1; /* return block number */
301 xfs_agblock_t newbno2; /* other new block number */
0e266570
NS
302 xfs_extlen_t newlen1=0; /* length with newbno1 */
303 xfs_extlen_t newlen2=0; /* length with newbno2 */
2bd0ea18 304 xfs_agblock_t wantend; /* end of target extent */
1fccd5c8 305 bool userdata = xfs_alloc_is_userdata(datatype);
2bd0ea18
NS
306
307 ASSERT(freelen >= wantlen);
308 freeend = freebno + freelen;
309 wantend = wantbno + wantlen;
84a62eea
DC
310 /*
311 * We want to allocate from the start of a free extent if it is past
312 * the desired block or if we are allocating user data and the free
313 * extent is before desired block. The second case is there to allow
314 * for contiguous allocation from the remaining free space if the file
315 * grows in the short term.
316 */
317 if (freebno >= wantbno || (userdata && freeend < wantend)) {
2bd0ea18
NS
318 if ((newbno1 = roundup(freebno, alignment)) >= freeend)
319 newbno1 = NULLAGBLOCK;
320 } else if (freeend >= wantend && alignment > 1) {
321 newbno1 = roundup(wantbno, alignment);
322 newbno2 = newbno1 - alignment;
323 if (newbno1 >= freeend)
324 newbno1 = NULLAGBLOCK;
325 else
326 newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
327 if (newbno2 < freebno)
328 newbno2 = NULLAGBLOCK;
329 else
330 newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
331 if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
332 if (newlen1 < newlen2 ||
333 (newlen1 == newlen2 &&
334 XFS_ABSDIFF(newbno1, wantbno) >
335 XFS_ABSDIFF(newbno2, wantbno)))
336 newbno1 = newbno2;
337 } else if (newbno2 != NULLAGBLOCK)
338 newbno1 = newbno2;
339 } else if (freeend >= wantend) {
340 newbno1 = wantbno;
341 } else if (alignment > 1) {
342 newbno1 = roundup(freeend - wantlen, alignment);
343 if (newbno1 > freeend - wantlen &&
344 newbno1 - alignment >= freebno)
345 newbno1 -= alignment;
346 else if (newbno1 >= freeend)
347 newbno1 = NULLAGBLOCK;
348 } else
349 newbno1 = freeend - wantlen;
350 *newbnop = newbno1;
351 return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
352}
353
354/*
355 * Fix up the length, based on mod and prod.
356 * len should be k * prod + mod for some k.
357 * If len is too small it is returned unchanged.
358 * If len hits maxlen it is left alone.
359 */
360STATIC void
361xfs_alloc_fix_len(
dfc130f3 362 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18
NS
363{
364 xfs_extlen_t k;
365 xfs_extlen_t rlen;
366
367 ASSERT(args->mod < args->prod);
368 rlen = args->len;
369 ASSERT(rlen >= args->minlen);
370 ASSERT(rlen <= args->maxlen);
371 if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
372 (args->mod == 0 && rlen < args->prod))
373 return;
374 k = rlen % args->prod;
375 if (k == args->mod)
376 return;
ff105f75
DC
377 if (k > args->mod)
378 rlen = rlen - (k - args->mod);
379 else
380 rlen = rlen - args->prod + (args->mod - k);
19ebedcf 381 /* casts to (int) catch length underflows */
ff105f75
DC
382 if ((int)rlen < (int)args->minlen)
383 return;
384 ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
385 ASSERT(rlen % args->prod == args->mod);
2c003dc2
CH
386 ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >=
387 rlen + args->minleft);
2bd0ea18
NS
388 args->len = rlen;
389}
390
2bd0ea18
NS
391/*
392 * Update the two btrees, logically removing from freespace the extent
393 * starting at rbno, rlen blocks. The extent is contained within the
394 * actual (current) free extent fbno for flen blocks.
395 * Flags are passed in indicating whether the cursors are set to the
396 * relevant records.
397 */
398STATIC int /* error code */
399xfs_alloc_fixup_trees(
dfc130f3
RC
400 xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */
401 xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */
2bd0ea18
NS
402 xfs_agblock_t fbno, /* starting block of free extent */
403 xfs_extlen_t flen, /* length of free extent */
404 xfs_agblock_t rbno, /* starting block of returned extent */
405 xfs_extlen_t rlen, /* length of returned extent */
406 int flags) /* flags, XFSA_FIXUP_... */
407{
408 int error; /* error code */
409 int i; /* operation results */
410 xfs_agblock_t nfbno1; /* first new free startblock */
411 xfs_agblock_t nfbno2; /* second new free startblock */
0e266570
NS
412 xfs_extlen_t nflen1=0; /* first new free length */
413 xfs_extlen_t nflen2=0; /* second new free length */
19ebedcf
DC
414 struct xfs_mount *mp;
415
416 mp = cnt_cur->bc_mp;
2bd0ea18
NS
417
418 /*
419 * Look up the record in the by-size tree if necessary.
420 */
421 if (flags & XFSA_FIXUP_CNT_OK) {
422#ifdef DEBUG
0e266570 423 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
2bd0ea18 424 return error;
19ebedcf 425 XFS_WANT_CORRUPTED_RETURN(mp,
2bd0ea18
NS
426 i == 1 && nfbno1 == fbno && nflen1 == flen);
427#endif
428 } else {
0e266570 429 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
2bd0ea18 430 return error;
19ebedcf 431 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
2bd0ea18
NS
432 }
433 /*
434 * Look up the record in the by-block tree if necessary.
435 */
436 if (flags & XFSA_FIXUP_BNO_OK) {
437#ifdef DEBUG
0e266570 438 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
2bd0ea18 439 return error;
19ebedcf 440 XFS_WANT_CORRUPTED_RETURN(mp,
2bd0ea18
NS
441 i == 1 && nfbno1 == fbno && nflen1 == flen);
442#endif
443 } else {
0e266570 444 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
2bd0ea18 445 return error;
19ebedcf 446 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
2bd0ea18 447 }
b3563c19 448
2bd0ea18 449#ifdef DEBUG
b3563c19
BN
450 if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
451 struct xfs_btree_block *bnoblock;
452 struct xfs_btree_block *cntblock;
453
454 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
455 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
2bd0ea18 456
19ebedcf 457 XFS_WANT_CORRUPTED_RETURN(mp,
b3563c19 458 bnoblock->bb_numrecs == cntblock->bb_numrecs);
2bd0ea18
NS
459 }
460#endif
b3563c19 461
2bd0ea18
NS
462 /*
463 * Deal with all four cases: the allocated record is contained
464 * within the freespace record, so we can have new freespace
465 * at either (or both) end, or no freespace remaining.
466 */
467 if (rbno == fbno && rlen == flen)
468 nfbno1 = nfbno2 = NULLAGBLOCK;
469 else if (rbno == fbno) {
470 nfbno1 = rbno + rlen;
471 nflen1 = flen - rlen;
472 nfbno2 = NULLAGBLOCK;
473 } else if (rbno + rlen == fbno + flen) {
474 nfbno1 = fbno;
475 nflen1 = flen - rlen;
476 nfbno2 = NULLAGBLOCK;
477 } else {
478 nfbno1 = fbno;
479 nflen1 = rbno - fbno;
480 nfbno2 = rbno + rlen;
481 nflen2 = (fbno + flen) - nfbno2;
482 }
483 /*
484 * Delete the entry from the by-size btree.
485 */
b194c7d8 486 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18 487 return error;
19ebedcf 488 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
2bd0ea18
NS
489 /*
490 * Add new by-size btree entry(s).
491 */
492 if (nfbno1 != NULLAGBLOCK) {
0e266570 493 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
2bd0ea18 494 return error;
19ebedcf 495 XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
b194c7d8 496 if ((error = xfs_btree_insert(cnt_cur, &i)))
2bd0ea18 497 return error;
19ebedcf 498 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
2bd0ea18
NS
499 }
500 if (nfbno2 != NULLAGBLOCK) {
0e266570 501 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
2bd0ea18 502 return error;
19ebedcf 503 XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
b194c7d8 504 if ((error = xfs_btree_insert(cnt_cur, &i)))
2bd0ea18 505 return error;
19ebedcf 506 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
2bd0ea18
NS
507 }
508 /*
509 * Fix up the by-block btree entry(s).
510 */
511 if (nfbno1 == NULLAGBLOCK) {
512 /*
513 * No remaining freespace, just delete the by-block tree entry.
514 */
b194c7d8 515 if ((error = xfs_btree_delete(bno_cur, &i)))
2bd0ea18 516 return error;
19ebedcf 517 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
2bd0ea18
NS
518 } else {
519 /*
520 * Update the by-block entry to start later|be shorter.
521 */
0e266570 522 if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
2bd0ea18
NS
523 return error;
524 }
525 if (nfbno2 != NULLAGBLOCK) {
526 /*
527 * 2 resulting free entries, need to add one.
528 */
0e266570 529 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
2bd0ea18 530 return error;
19ebedcf 531 XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
b194c7d8 532 if ((error = xfs_btree_insert(bno_cur, &i)))
2bd0ea18 533 return error;
19ebedcf 534 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
2bd0ea18
NS
535 }
536 return 0;
537}
538
bc01119d 539static xfs_failaddr_t
a2ceac1f
DC
540xfs_agfl_verify(
541 struct xfs_buf *bp)
542{
a2ceac1f
DC
543 struct xfs_mount *mp = bp->b_target->bt_mount;
544 struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
a2ceac1f
DC
545 int i;
546
95d9582b
DW
547 /*
548 * There is no verification of non-crc AGFLs because mkfs does not
549 * initialise the AGFL to zero or NULL. Hence the only valid part of the
550 * AGFL is what the AGF says is active. We can't get to the AGF, so we
551 * can't verify just those entries are valid.
552 */
553 if (!xfs_sb_version_hascrc(&mp->m_sb))
554 return NULL;
555
9c4e12fb 556 if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
bc01119d 557 return __this_address;
dd5b876e 558 if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC)
bc01119d 559 return __this_address;
dd5b876e
DC
560 /*
561 * during growfs operations, the perag is not fully initialised,
562 * so we can't use it for any useful checking. growfs ensures we can't
563 * use it by using uncached buffers that don't have the perag attached
564 * so we can detect and avoid this problem.
565 */
566 if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
bc01119d 567 return __this_address;
dd5b876e 568
b8165508 569 for (i = 0; i < xfs_agfl_size(mp); i++) {
dd5b876e 570 if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
a2ceac1f 571 be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
bc01119d 572 return __this_address;
a2ceac1f 573 }
a65d8d29 574
bc01119d
DW
575 if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)))
576 return __this_address;
577 return NULL;
dd5b876e
DC
578}
579
580static void
581xfs_agfl_read_verify(
582 struct xfs_buf *bp)
583{
584 struct xfs_mount *mp = bp->b_target->bt_mount;
1e697959 585 xfs_failaddr_t fa;
dd5b876e
DC
586
587 /*
588 * There is no verification of non-crc AGFLs because mkfs does not
589 * initialise the AGFL to zero or NULL. Hence the only valid part of the
590 * AGFL is what the AGF says is active. We can't get to the AGF, so we
591 * can't verify just those entries are valid.
592 */
593 if (!xfs_sb_version_hascrc(&mp->m_sb))
594 return;
595
45922933 596 if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
1e697959
DW
597 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
598 else {
599 fa = xfs_agfl_verify(bp);
600 if (fa)
601 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
602 }
a2ceac1f
DC
603}
604
605static void
606xfs_agfl_write_verify(
607 struct xfs_buf *bp)
608{
37d086ca
CM
609 struct xfs_mount *mp = bp->b_target->bt_mount;
610 struct xfs_buf_log_item *bip = bp->b_log_item;
1e697959 611 xfs_failaddr_t fa;
a2ceac1f 612
dd5b876e
DC
613 /* no verification of non-crc AGFLs */
614 if (!xfs_sb_version_hascrc(&mp->m_sb))
615 return;
616
1e697959
DW
617 fa = xfs_agfl_verify(bp);
618 if (fa) {
619 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
dd5b876e
DC
620 return;
621 }
622
623 if (bip)
624 XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
625
43b5aeed 626 xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
a2ceac1f
DC
627}
628
629const struct xfs_buf_ops xfs_agfl_buf_ops = {
a3fac935 630 .name = "xfs_agfl",
a2ceac1f
DC
631 .verify_read = xfs_agfl_read_verify,
632 .verify_write = xfs_agfl_write_verify,
95d9582b 633 .verify_struct = xfs_agfl_verify,
a2ceac1f
DC
634};
635
2bd0ea18
NS
636/*
637 * Read in the allocation group free block array.
638 */
50bb67d6 639int /* error */
2bd0ea18
NS
640xfs_alloc_read_agfl(
641 xfs_mount_t *mp, /* mount point structure */
642 xfs_trans_t *tp, /* transaction pointer */
643 xfs_agnumber_t agno, /* allocation group number */
644 xfs_buf_t **bpp) /* buffer for the ag free block array */
645{
646 xfs_buf_t *bp; /* return value */
2bd0ea18
NS
647 int error;
648
649 ASSERT(agno != NULLAGNUMBER);
9440d84d
NS
650 error = xfs_trans_read_buf(
651 mp, tp, mp->m_ddev_targp,
652 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
a2ceac1f 653 XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
9440d84d 654 if (error)
2bd0ea18 655 return error;
a2ceac1f 656 xfs_buf_set_ref(bp, XFS_AGFL_REF);
2bd0ea18
NS
657 *bpp = bp;
658 return 0;
659}
660
a2ceac1f
DC
661STATIC int
662xfs_alloc_update_counters(
663 struct xfs_trans *tp,
664 struct xfs_perag *pag,
665 struct xfs_buf *agbp,
666 long len)
667{
668 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
669
670 pag->pagf_freeblks += len;
671 be32_add_cpu(&agf->agf_freeblks, len);
672
673 xfs_trans_agblocks_delta(tp, len);
674 if (unlikely(be32_to_cpu(agf->agf_freeblks) >
675 be32_to_cpu(agf->agf_length)))
12b53197 676 return -EFSCORRUPTED;
a2ceac1f
DC
677
678 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
679 return 0;
680}
681
2bd0ea18
NS
682/*
683 * Allocation group level functions.
684 */
685
686/*
687 * Allocate a variable extent in the allocation group agno.
688 * Type and bno are used to determine where in the allocation group the
689 * extent will start.
690 * Extent's length (returned in *len) will be between minlen and maxlen,
691 * and of the form k * prod + mod unless there's nothing that large.
692 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
693 */
694STATIC int /* error */
695xfs_alloc_ag_vextent(
dfc130f3 696 xfs_alloc_arg_t *args) /* argument structure for allocation */
2bd0ea18 697{
0e266570 698 int error=0;
2bd0ea18
NS
699
700 ASSERT(args->minlen > 0);
701 ASSERT(args->maxlen > 0);
702 ASSERT(args->minlen <= args->maxlen);
703 ASSERT(args->mod < args->prod);
704 ASSERT(args->alignment > 0);
cf8ce220 705
2bd0ea18
NS
706 /*
707 * Branch to correct routine based on the type.
708 */
709 args->wasfromfl = 0;
710 switch (args->type) {
711 case XFS_ALLOCTYPE_THIS_AG:
712 error = xfs_alloc_ag_vextent_size(args);
713 break;
714 case XFS_ALLOCTYPE_NEAR_BNO:
715 error = xfs_alloc_ag_vextent_near(args);
716 break;
717 case XFS_ALLOCTYPE_THIS_BNO:
718 error = xfs_alloc_ag_vextent_exact(args);
719 break;
720 default:
721 ASSERT(0);
722 /* NOTREACHED */
723 }
a2ceac1f
DC
724
725 if (error || args->agbno == NULLAGBLOCK)
2bd0ea18 726 return error;
2bd0ea18 727
a2ceac1f
DC
728 ASSERT(args->len >= args->minlen);
729 ASSERT(args->len <= args->maxlen);
9760cac2 730 ASSERT(!args->wasfromfl || args->resv != XFS_AG_RESV_AGFL);
a2ceac1f
DC
731 ASSERT(args->agbno % args->alignment == 0);
732
631ac87a 733 /* if not file data, insert new block into the reverse map btree */
3ee858aa 734 if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
631ac87a
DW
735 error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
736 args->agbno, args->len, &args->oinfo);
737 if (error)
738 return error;
739 }
740
a2ceac1f
DC
741 if (!args->wasfromfl) {
742 error = xfs_alloc_update_counters(args->tp, args->pag,
743 args->agbp,
744 -((long)(args->len)));
745 if (error)
746 return error;
747
748 ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
749 args->agbno, args->len));
2bd0ea18 750 }
a2ceac1f 751
cf8ce220 752 xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
a2ceac1f 753
79896434
BD
754 XFS_STATS_INC(args->mp, xs_allocx);
755 XFS_STATS_ADD(args->mp, xs_allocb, args->len);
a2ceac1f 756 return error;
2bd0ea18
NS
757}
758
759/*
760 * Allocate a variable extent at exactly agno/bno.
761 * Extent's length (returned in *len) will be between minlen and maxlen,
762 * and of the form k * prod + mod unless there's nothing that large.
763 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
764 */
765STATIC int /* error */
766xfs_alloc_ag_vextent_exact(
dfc130f3 767 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18 768{
dfc130f3
RC
769 xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
770 xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
2bd0ea18
NS
771 int error;
772 xfs_agblock_t fbno; /* start block of found extent */
2bd0ea18 773 xfs_extlen_t flen; /* length of found extent */
cd80de04
CH
774 xfs_agblock_t tbno; /* start block of busy extent */
775 xfs_extlen_t tlen; /* length of busy extent */
776 xfs_agblock_t tend; /* end block of busy extent */
2bd0ea18 777 int i; /* success/failure of operation */
cd80de04 778 unsigned busy_gen;
2bd0ea18
NS
779
780 ASSERT(args->alignment == 1);
a2ceac1f 781
2bd0ea18
NS
782 /*
783 * Allocate/initialize a cursor for the by-number freespace btree.
784 */
b194c7d8 785 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
56b2de80
DC
786 args->agno, XFS_BTNUM_BNO);
787
2bd0ea18
NS
788 /*
789 * Lookup bno and minlen in the btree (minlen is irrelevant, really).
790 * Look for the closest free block <= bno, it must contain bno
791 * if any free block does.
792 */
56b2de80
DC
793 error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
794 if (error)
2bd0ea18 795 goto error0;
56b2de80
DC
796 if (!i)
797 goto not_found;
798
2bd0ea18
NS
799 /*
800 * Grab the freespace record.
801 */
56b2de80
DC
802 error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
803 if (error)
2bd0ea18 804 goto error0;
19ebedcf 805 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
2bd0ea18 806 ASSERT(fbno <= args->agbno);
56b2de80 807
5000d01d 808 /*
a2ceac1f
DC
809 * Check for overlapping busy extents.
810 */
cd80de04
CH
811 tbno = fbno;
812 tlen = flen;
813 xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen);
a2ceac1f
DC
814
815 /*
816 * Give up if the start of the extent is busy, or the freespace isn't
817 * long enough for the minimum request.
2bd0ea18 818 */
a2ceac1f
DC
819 if (tbno > args->agbno)
820 goto not_found;
821 if (tlen < args->minlen)
822 goto not_found;
823 tend = tbno + tlen;
824 if (tend < args->agbno + args->minlen)
56b2de80
DC
825 goto not_found;
826
2bd0ea18
NS
827 /*
828 * End of extent will be smaller of the freespace end and the
829 * maximal requested end.
56b2de80 830 *
2bd0ea18
NS
831 * Fix the length according to mod and prod if given.
832 */
a2ceac1f
DC
833 args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
834 - args->agbno;
2bd0ea18 835 xfs_alloc_fix_len(args);
a2ceac1f 836 ASSERT(args->agbno + args->len <= tend);
56b2de80 837
2bd0ea18 838 /*
a2ceac1f 839 * We are allocating agbno for args->len
2bd0ea18
NS
840 * Allocate/initialize a cursor for the by-size btree.
841 */
b194c7d8
BN
842 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
843 args->agno, XFS_BTNUM_CNT);
2bd0ea18 844 ASSERT(args->agbno + args->len <=
6e3140c7 845 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
56b2de80
DC
846 error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
847 args->len, XFSA_FIXUP_BNO_OK);
848 if (error) {
2bd0ea18
NS
849 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
850 goto error0;
851 }
a2ceac1f 852
2bd0ea18
NS
853 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
854 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
a2ceac1f 855
2bd0ea18 856 args->wasfromfl = 0;
56b2de80
DC
857 trace_xfs_alloc_exact_done(args);
858 return 0;
859
860not_found:
861 /* Didn't find it, return null. */
862 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
863 args->agbno = NULLAGBLOCK;
864 trace_xfs_alloc_exact_notfound(args);
2bd0ea18
NS
865 return 0;
866
867error0:
868 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
56b2de80
DC
869 trace_xfs_alloc_exact_error(args);
870 return error;
871}
872
873/*
874 * Search the btree in a given direction via the search cursor and compare
875 * the records found against the good extent we've already found.
876 */
877STATIC int
878xfs_alloc_find_best_extent(
879 struct xfs_alloc_arg *args, /* allocation argument structure */
880 struct xfs_btree_cur **gcur, /* good cursor */
881 struct xfs_btree_cur **scur, /* searching cursor */
882 xfs_agblock_t gdiff, /* difference for search comparison */
883 xfs_agblock_t *sbno, /* extent found by search */
a2ceac1f
DC
884 xfs_extlen_t *slen, /* extent length */
885 xfs_agblock_t *sbnoa, /* aligned extent found by search */
886 xfs_extlen_t *slena, /* aligned extent length */
56b2de80
DC
887 int dir) /* 0 = search right, 1 = search left */
888{
56b2de80
DC
889 xfs_agblock_t new;
890 xfs_agblock_t sdiff;
891 int error;
892 int i;
cd80de04 893 unsigned busy_gen;
56b2de80
DC
894
895 /* The good extent is perfect, no need to search. */
896 if (!gdiff)
897 goto out_use_good;
898
899 /*
900 * Look until we find a better one, run out of space or run off the end.
901 */
902 do {
903 error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
904 if (error)
905 goto error0;
19ebedcf 906 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
cd80de04
CH
907 xfs_alloc_compute_aligned(args, *sbno, *slen,
908 sbnoa, slena, &busy_gen);
56b2de80
DC
909
910 /*
911 * The good extent is closer than this one.
912 */
913 if (!dir) {
ff3263dd
BF
914 if (*sbnoa > args->max_agbno)
915 goto out_use_good;
a2ceac1f 916 if (*sbnoa >= args->agbno + gdiff)
56b2de80
DC
917 goto out_use_good;
918 } else {
ff3263dd
BF
919 if (*sbnoa < args->min_agbno)
920 goto out_use_good;
a2ceac1f 921 if (*sbnoa <= args->agbno - gdiff)
56b2de80
DC
922 goto out_use_good;
923 }
924
925 /*
926 * Same distance, compare length and pick the best.
927 */
928 if (*slena >= args->minlen) {
929 args->len = XFS_EXTLEN_MIN(*slena, args->maxlen);
930 xfs_alloc_fix_len(args);
931
932 sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
84a62eea 933 args->alignment,
1fccd5c8 934 args->datatype, *sbnoa,
a2ceac1f 935 *slena, &new);
56b2de80
DC
936
937 /*
938 * Choose closer size and invalidate other cursor.
939 */
940 if (sdiff < gdiff)
941 goto out_use_search;
942 goto out_use_good;
943 }
944
945 if (!dir)
946 error = xfs_btree_increment(*scur, 0, &i);
947 else
948 error = xfs_btree_decrement(*scur, 0, &i);
949 if (error)
950 goto error0;
951 } while (i);
952
953out_use_good:
954 xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR);
955 *scur = NULL;
956 return 0;
957
958out_use_search:
959 xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR);
960 *gcur = NULL;
961 return 0;
962
963error0:
964 /* caller invalidates cursors */
2bd0ea18
NS
965 return error;
966}
967
968/*
969 * Allocate a variable extent near bno in the allocation group agno.
970 * Extent's length (returned in len) will be between minlen and maxlen,
971 * and of the form k * prod + mod unless there's nothing that large.
972 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
973 */
974STATIC int /* error */
975xfs_alloc_ag_vextent_near(
dfc130f3 976 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18 977{
dfc130f3
RC
978 xfs_btree_cur_t *bno_cur_gt; /* cursor for bno btree, right side */
979 xfs_btree_cur_t *bno_cur_lt; /* cursor for bno btree, left side */
980 xfs_btree_cur_t *cnt_cur; /* cursor for count btree */
2bd0ea18
NS
981 xfs_agblock_t gtbno; /* start bno of right side entry */
982 xfs_agblock_t gtbnoa; /* aligned ... */
983 xfs_extlen_t gtdiff; /* difference to right side entry */
984 xfs_extlen_t gtlen; /* length of right side entry */
a2ceac1f 985 xfs_extlen_t gtlena; /* aligned ... */
2bd0ea18
NS
986 xfs_agblock_t gtnew; /* useful start bno of right side */
987 int error; /* error code */
988 int i; /* result code, temporary */
989 int j; /* result code, temporary */
990 xfs_agblock_t ltbno; /* start bno of left side entry */
991 xfs_agblock_t ltbnoa; /* aligned ... */
992 xfs_extlen_t ltdiff; /* difference to left side entry */
2bd0ea18 993 xfs_extlen_t ltlen; /* length of left side entry */
a2ceac1f 994 xfs_extlen_t ltlena; /* aligned ... */
2bd0ea18
NS
995 xfs_agblock_t ltnew; /* useful start bno of left side */
996 xfs_extlen_t rlen; /* length of returned extent */
cd80de04
CH
997 bool busy;
998 unsigned busy_gen;
6beba453 999#ifdef DEBUG
2bd0ea18
NS
1000 /*
1001 * Randomly don't execute the first algorithm.
1002 */
2bd0ea18 1003 int dofirst; /* set to do first algorithm */
2bd0ea18 1004
49f693fa 1005 dofirst = prandom_u32() & 1;
2bd0ea18 1006#endif
a2ceac1f 1007
ff3263dd
BF
1008 /* handle unitialized agbno range so caller doesn't have to */
1009 if (!args->min_agbno && !args->max_agbno)
1010 args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
1011 ASSERT(args->min_agbno <= args->max_agbno);
1012
1013 /* clamp agbno to the range if it's outside */
1014 if (args->agbno < args->min_agbno)
1015 args->agbno = args->min_agbno;
1016 if (args->agbno > args->max_agbno)
1017 args->agbno = args->max_agbno;
1018
a2ceac1f
DC
1019restart:
1020 bno_cur_lt = NULL;
1021 bno_cur_gt = NULL;
1022 ltlen = 0;
1023 gtlena = 0;
1024 ltlena = 0;
cd80de04 1025 busy = false;
a2ceac1f 1026
2bd0ea18
NS
1027 /*
1028 * Get a cursor for the by-size btree.
1029 */
b194c7d8
BN
1030 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1031 args->agno, XFS_BTNUM_CNT);
a2ceac1f 1032
2bd0ea18
NS
1033 /*
1034 * See if there are any free extents as big as maxlen.
1035 */
0e266570 1036 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i)))
2bd0ea18
NS
1037 goto error0;
1038 /*
1039 * If none, then pick up the last entry in the tree unless the
1040 * tree is empty.
5000d01d 1041 */
2bd0ea18 1042 if (!i) {
0e266570
NS
1043 if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &ltbno,
1044 &ltlen, &i)))
2bd0ea18
NS
1045 goto error0;
1046 if (i == 0 || ltlen == 0) {
1047 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
a2ceac1f 1048 trace_xfs_alloc_near_noentry(args);
2bd0ea18
NS
1049 return 0;
1050 }
1051 ASSERT(i == 1);
1052 }
1053 args->wasfromfl = 0;
a2ceac1f 1054
5000d01d 1055 /*
2bd0ea18
NS
1056 * First algorithm.
1057 * If the requested extent is large wrt the freespaces available
1058 * in this a.g., then the cursor will be pointing to a btree entry
1059 * near the right edge of the tree. If it's in the last btree leaf
1060 * block, then we just examine all the entries in that block
1061 * that are big enough, and pick the best one.
1062 * This is written as a while loop so we can break out of it,
1063 * but we never loop back to the top.
1064 */
1065 while (xfs_btree_islastblock(cnt_cur, 0)) {
1066 xfs_extlen_t bdiff;
0e266570
NS
1067 int besti=0;
1068 xfs_extlen_t blen=0;
1069 xfs_agblock_t bnew=0;
2bd0ea18 1070
6beba453
DC
1071#ifdef DEBUG
1072 if (dofirst)
2bd0ea18
NS
1073 break;
1074#endif
1075 /*
1076 * Start from the entry that lookup found, sequence through
1077 * all larger free blocks. If we're actually pointing at a
1078 * record smaller than maxlen, go to the start of this block,
1079 * and skip all those smaller than minlen.
1080 */
1081 if (ltlen || args->alignment > 1) {
1082 cnt_cur->bc_ptrs[0] = 1;
1083 do {
0e266570
NS
1084 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno,
1085 &ltlen, &i)))
2bd0ea18 1086 goto error0;
19ebedcf 1087 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
2bd0ea18
NS
1088 if (ltlen >= args->minlen)
1089 break;
b194c7d8 1090 if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
2bd0ea18
NS
1091 goto error0;
1092 } while (i);
1093 ASSERT(ltlen >= args->minlen);
1094 if (!i)
1095 break;
1096 }
1097 i = cnt_cur->bc_ptrs[0];
1098 for (j = 1, blen = 0, bdiff = 0;
1099 !error && j && (blen < args->maxlen || bdiff > 0);
b194c7d8 1100 error = xfs_btree_increment(cnt_cur, 0, &j)) {
2bd0ea18
NS
1101 /*
1102 * For each entry, decide if it's better than
1103 * the previous best entry.
1104 */
0e266570 1105 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
2bd0ea18 1106 goto error0;
19ebedcf 1107 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
cd80de04
CH
1108 busy = xfs_alloc_compute_aligned(args, ltbno, ltlen,
1109 &ltbnoa, &ltlena, &busy_gen);
5e656dbb 1110 if (ltlena < args->minlen)
2bd0ea18 1111 continue;
ff3263dd
BF
1112 if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno)
1113 continue;
2bd0ea18
NS
1114 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1115 xfs_alloc_fix_len(args);
1116 ASSERT(args->len >= args->minlen);
1117 if (args->len < blen)
1118 continue;
1119 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1fccd5c8 1120 args->alignment, args->datatype, ltbnoa,
84a62eea 1121 ltlena, &ltnew);
2bd0ea18
NS
1122 if (ltnew != NULLAGBLOCK &&
1123 (args->len > blen || ltdiff < bdiff)) {
1124 bdiff = ltdiff;
1125 bnew = ltnew;
1126 blen = args->len;
1127 besti = cnt_cur->bc_ptrs[0];
1128 }
1129 }
1130 /*
1131 * It didn't work. We COULD be in a case where
1132 * there's a good record somewhere, so try again.
1133 */
1134 if (blen == 0)
1135 break;
1136 /*
1137 * Point at the best entry, and retrieve it again.
1138 */
1139 cnt_cur->bc_ptrs[0] = besti;
0e266570 1140 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
2bd0ea18 1141 goto error0;
19ebedcf 1142 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
56b2de80 1143 ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
2bd0ea18 1144 args->len = blen;
2c003dc2 1145
2bd0ea18
NS
1146 /*
1147 * We are allocating starting at bnew for blen blocks.
1148 */
1149 args->agbno = bnew;
1150 ASSERT(bnew >= ltbno);
56b2de80 1151 ASSERT(bnew + blen <= ltbno + ltlen);
2bd0ea18
NS
1152 /*
1153 * Set up a cursor for the by-bno tree.
1154 */
b194c7d8
BN
1155 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp,
1156 args->agbp, args->agno, XFS_BTNUM_BNO);
2bd0ea18
NS
1157 /*
1158 * Fix up the btree entries.
1159 */
0e266570
NS
1160 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno,
1161 ltlen, bnew, blen, XFSA_FIXUP_CNT_OK)))
2bd0ea18
NS
1162 goto error0;
1163 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1164 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
56b2de80
DC
1165
1166 trace_xfs_alloc_near_first(args);
2bd0ea18
NS
1167 return 0;
1168 }
1169 /*
1170 * Second algorithm.
1171 * Search in the by-bno tree to the left and to the right
1172 * simultaneously, until in each case we find a space big enough,
1173 * or run into the edge of the tree. When we run into the edge,
1174 * we deallocate that cursor.
1175 * If both searches succeed, we compare the two spaces and pick
1176 * the better one.
1177 * With alignment, it's possible for both to fail; the upper
1178 * level algorithm that picks allocation groups for allocations
1179 * is not supposed to do this.
1180 */
1181 /*
1182 * Allocate and initialize the cursor for the leftward search.
1183 */
b194c7d8
BN
1184 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1185 args->agno, XFS_BTNUM_BNO);
2bd0ea18
NS
1186 /*
1187 * Lookup <= bno to find the leftward search's starting point.
1188 */
0e266570 1189 if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i)))
2bd0ea18
NS
1190 goto error0;
1191 if (!i) {
1192 /*
1193 * Didn't find anything; use this cursor for the rightward
1194 * search.
1195 */
1196 bno_cur_gt = bno_cur_lt;
062998e3 1197 bno_cur_lt = NULL;
2bd0ea18
NS
1198 }
1199 /*
1200 * Found something. Duplicate the cursor for the rightward search.
1201 */
0e266570 1202 else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt)))
2bd0ea18
NS
1203 goto error0;
1204 /*
1205 * Increment the cursor, so we will point at the entry just right
1206 * of the leftward entry if any, or to the leftmost entry.
1207 */
b194c7d8 1208 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
2bd0ea18
NS
1209 goto error0;
1210 if (!i) {
1211 /*
1212 * It failed, there are no rightward entries.
1213 */
1214 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR);
1215 bno_cur_gt = NULL;
1216 }
1217 /*
1218 * Loop going left with the leftward cursor, right with the
1219 * rightward cursor, until either both directions give up or
1220 * we find an entry at least as big as minlen.
1221 */
1222 do {
1223 if (bno_cur_lt) {
0e266570 1224 if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
2bd0ea18 1225 goto error0;
19ebedcf 1226 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
cd80de04
CH
1227 busy |= xfs_alloc_compute_aligned(args, ltbno, ltlen,
1228 &ltbnoa, &ltlena, &busy_gen);
ff3263dd 1229 if (ltlena >= args->minlen && ltbnoa >= args->min_agbno)
2bd0ea18 1230 break;
b194c7d8 1231 if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
2bd0ea18 1232 goto error0;
ff3263dd 1233 if (!i || ltbnoa < args->min_agbno) {
2bd0ea18
NS
1234 xfs_btree_del_cursor(bno_cur_lt,
1235 XFS_BTREE_NOERROR);
1236 bno_cur_lt = NULL;
1237 }
1238 }
1239 if (bno_cur_gt) {
0e266570 1240 if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
2bd0ea18 1241 goto error0;
19ebedcf 1242 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
cd80de04
CH
1243 busy |= xfs_alloc_compute_aligned(args, gtbno, gtlen,
1244 &gtbnoa, &gtlena, &busy_gen);
ff3263dd 1245 if (gtlena >= args->minlen && gtbnoa <= args->max_agbno)
2bd0ea18 1246 break;
b194c7d8 1247 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
2bd0ea18 1248 goto error0;
ff3263dd 1249 if (!i || gtbnoa > args->max_agbno) {
2bd0ea18
NS
1250 xfs_btree_del_cursor(bno_cur_gt,
1251 XFS_BTREE_NOERROR);
1252 bno_cur_gt = NULL;
1253 }
1254 }
1255 } while (bno_cur_lt || bno_cur_gt);
56b2de80 1256
2bd0ea18
NS
1257 /*
1258 * Got both cursors still active, need to find better entry.
1259 */
1260 if (bno_cur_lt && bno_cur_gt) {
2bd0ea18
NS
1261 if (ltlena >= args->minlen) {
1262 /*
56b2de80 1263 * Left side is good, look for a right side entry.
2bd0ea18
NS
1264 */
1265 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1266 xfs_alloc_fix_len(args);
56b2de80 1267 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1fccd5c8 1268 args->alignment, args->datatype, ltbnoa,
84a62eea 1269 ltlena, &ltnew);
56b2de80
DC
1270
1271 error = xfs_alloc_find_best_extent(args,
1272 &bno_cur_lt, &bno_cur_gt,
a2ceac1f
DC
1273 ltdiff, &gtbno, &gtlen,
1274 &gtbnoa, &gtlena,
56b2de80
DC
1275 0 /* search right */);
1276 } else {
1277 ASSERT(gtlena >= args->minlen);
1278
2bd0ea18 1279 /*
56b2de80 1280 * Right side is good, look for a left side entry.
2bd0ea18
NS
1281 */
1282 args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
1283 xfs_alloc_fix_len(args);
56b2de80 1284 gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1fccd5c8 1285 args->alignment, args->datatype, gtbnoa,
84a62eea 1286 gtlena, &gtnew);
56b2de80
DC
1287
1288 error = xfs_alloc_find_best_extent(args,
1289 &bno_cur_gt, &bno_cur_lt,
a2ceac1f
DC
1290 gtdiff, &ltbno, &ltlen,
1291 &ltbnoa, &ltlena,
56b2de80 1292 1 /* search left */);
2bd0ea18 1293 }
56b2de80
DC
1294
1295 if (error)
1296 goto error0;
2bd0ea18 1297 }
56b2de80 1298
2bd0ea18
NS
1299 /*
1300 * If we couldn't get anything, give up.
1301 */
1302 if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
a2ceac1f
DC
1303 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1304
cd80de04 1305 if (busy) {
a2ceac1f 1306 trace_xfs_alloc_near_busy(args);
cd80de04 1307 xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
a2ceac1f
DC
1308 goto restart;
1309 }
56b2de80 1310 trace_xfs_alloc_size_neither(args);
2bd0ea18
NS
1311 args->agbno = NULLAGBLOCK;
1312 return 0;
1313 }
56b2de80 1314
2bd0ea18
NS
1315 /*
1316 * At this point we have selected a freespace entry, either to the
1317 * left or to the right. If it's on the right, copy all the
1318 * useful variables to the "left" set so we only have one
1319 * copy of this code.
1320 */
1321 if (bno_cur_gt) {
1322 bno_cur_lt = bno_cur_gt;
1323 bno_cur_gt = NULL;
1324 ltbno = gtbno;
1325 ltbnoa = gtbnoa;
1326 ltlen = gtlen;
1327 ltlena = gtlena;
1328 j = 1;
1329 } else
1330 j = 0;
56b2de80 1331
2bd0ea18
NS
1332 /*
1333 * Fix up the length and compute the useful address.
1334 */
2bd0ea18
NS
1335 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1336 xfs_alloc_fix_len(args);
2bd0ea18 1337 rlen = args->len;
a2ceac1f 1338 (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
1fccd5c8 1339 args->datatype, ltbnoa, ltlena, &ltnew);
2bd0ea18 1340 ASSERT(ltnew >= ltbno);
a2ceac1f 1341 ASSERT(ltnew + rlen <= ltbnoa + ltlena);
6e3140c7 1342 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
ff3263dd 1343 ASSERT(ltnew >= args->min_agbno && ltnew <= args->max_agbno);
2bd0ea18 1344 args->agbno = ltnew;
a2ceac1f 1345
0e266570
NS
1346 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1347 ltnew, rlen, XFSA_FIXUP_BNO_OK)))
2bd0ea18 1348 goto error0;
56b2de80
DC
1349
1350 if (j)
1351 trace_xfs_alloc_near_greater(args);
1352 else
1353 trace_xfs_alloc_near_lesser(args);
1354
2bd0ea18
NS
1355 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1356 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1357 return 0;
1358
1359 error0:
56b2de80 1360 trace_xfs_alloc_near_error(args);
2bd0ea18
NS
1361 if (cnt_cur != NULL)
1362 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1363 if (bno_cur_lt != NULL)
1364 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR);
1365 if (bno_cur_gt != NULL)
1366 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR);
1367 return error;
1368}
1369
1370/*
1371 * Allocate a variable extent anywhere in the allocation group agno.
1372 * Extent's length (returned in len) will be between minlen and maxlen,
1373 * and of the form k * prod + mod unless there's nothing that large.
1374 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1375 */
1376STATIC int /* error */
1377xfs_alloc_ag_vextent_size(
dfc130f3 1378 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18 1379{
dfc130f3
RC
1380 xfs_btree_cur_t *bno_cur; /* cursor for bno btree */
1381 xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */
2bd0ea18
NS
1382 int error; /* error result */
1383 xfs_agblock_t fbno; /* start of found freespace */
1384 xfs_extlen_t flen; /* length of found freespace */
2bd0ea18
NS
1385 int i; /* temp status variable */
1386 xfs_agblock_t rbno; /* returned block number */
1387 xfs_extlen_t rlen; /* length of returned extent */
cd80de04
CH
1388 bool busy;
1389 unsigned busy_gen;
2bd0ea18 1390
a2ceac1f 1391restart:
2bd0ea18
NS
1392 /*
1393 * Allocate and initialize a cursor for the by-size btree.
1394 */
b194c7d8
BN
1395 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1396 args->agno, XFS_BTNUM_CNT);
2bd0ea18 1397 bno_cur = NULL;
cd80de04 1398 busy = false;
a2ceac1f 1399
2bd0ea18
NS
1400 /*
1401 * Look for an entry >= maxlen+alignment-1 blocks.
1402 */
0e266570
NS
1403 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1404 args->maxlen + args->alignment - 1, &i)))
2bd0ea18 1405 goto error0;
a2ceac1f 1406
2bd0ea18 1407 /*
cd80de04
CH
1408 * If none then we have to settle for a smaller extent. In the case that
1409 * there are no large extents, this will return the last entry in the
1410 * tree unless the tree is empty. In the case that there are only busy
1411 * large extents, this will return the largest small extent unless there
a2ceac1f 1412 * are no smaller extents available.
5000d01d 1413 */
cd80de04 1414 if (!i) {
a2ceac1f
DC
1415 error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1416 &fbno, &flen, &i);
1417 if (error)
2bd0ea18
NS
1418 goto error0;
1419 if (i == 0 || flen == 0) {
1420 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
56b2de80 1421 trace_xfs_alloc_size_noentry(args);
2bd0ea18
NS
1422 return 0;
1423 }
1424 ASSERT(i == 1);
cd80de04
CH
1425 busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno,
1426 &rlen, &busy_gen);
a2ceac1f
DC
1427 } else {
1428 /*
1429 * Search for a non-busy extent that is large enough.
a2ceac1f
DC
1430 */
1431 for (;;) {
1432 error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1433 if (error)
1434 goto error0;
19ebedcf 1435 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
a2ceac1f 1436
cd80de04
CH
1437 busy = xfs_alloc_compute_aligned(args, fbno, flen,
1438 &rbno, &rlen, &busy_gen);
a2ceac1f
DC
1439
1440 if (rlen >= args->maxlen)
1441 break;
1442
1443 error = xfs_btree_increment(cnt_cur, 0, &i);
1444 if (error)
1445 goto error0;
1446 if (i == 0) {
1447 /*
1448 * Our only valid extents must have been busy.
1449 * Make it unbusy by forcing the log out and
cd80de04 1450 * retrying.
a2ceac1f
DC
1451 */
1452 xfs_btree_del_cursor(cnt_cur,
1453 XFS_BTREE_NOERROR);
1454 trace_xfs_alloc_size_busy(args);
cd80de04
CH
1455 xfs_extent_busy_flush(args->mp,
1456 args->pag, busy_gen);
a2ceac1f
DC
1457 goto restart;
1458 }
1459 }
2bd0ea18 1460 }
a2ceac1f 1461
2bd0ea18
NS
1462 /*
1463 * In the first case above, we got the last entry in the
1464 * by-size btree. Now we check to see if the space hits maxlen
1465 * once aligned; if not, we search left for something better.
1466 * This can't happen in the second case above.
1467 */
2bd0ea18 1468 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
19ebedcf 1469 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
2bd0ea18
NS
1470 (rlen <= flen && rbno + rlen <= fbno + flen), error0);
1471 if (rlen < args->maxlen) {
1472 xfs_agblock_t bestfbno;
1473 xfs_extlen_t bestflen;
1474 xfs_agblock_t bestrbno;
1475 xfs_extlen_t bestrlen;
1476
1477 bestrlen = rlen;
1478 bestrbno = rbno;
1479 bestflen = flen;
1480 bestfbno = fbno;
1481 for (;;) {
b194c7d8 1482 if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
2bd0ea18
NS
1483 goto error0;
1484 if (i == 0)
1485 break;
0e266570
NS
1486 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1487 &i)))
2bd0ea18 1488 goto error0;
19ebedcf 1489 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
2bd0ea18
NS
1490 if (flen < bestrlen)
1491 break;
cd80de04
CH
1492 busy = xfs_alloc_compute_aligned(args, fbno, flen,
1493 &rbno, &rlen, &busy_gen);
2bd0ea18 1494 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
19ebedcf 1495 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
2bd0ea18
NS
1496 (rlen <= flen && rbno + rlen <= fbno + flen),
1497 error0);
1498 if (rlen > bestrlen) {
1499 bestrlen = rlen;
1500 bestrbno = rbno;
1501 bestflen = flen;
1502 bestfbno = fbno;
1503 if (rlen == args->maxlen)
1504 break;
1505 }
5000d01d 1506 }
0e266570
NS
1507 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1508 &i)))
2bd0ea18 1509 goto error0;
19ebedcf 1510 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
2bd0ea18
NS
1511 rlen = bestrlen;
1512 rbno = bestrbno;
1513 flen = bestflen;
1514 fbno = bestfbno;
1515 }
1516 args->wasfromfl = 0;
1517 /*
1518 * Fix up the length.
1519 */
1520 args->len = rlen;
a2ceac1f 1521 if (rlen < args->minlen) {
cd80de04 1522 if (busy) {
a2ceac1f
DC
1523 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1524 trace_xfs_alloc_size_busy(args);
cd80de04 1525 xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
a2ceac1f
DC
1526 goto restart;
1527 }
1528 goto out_nominleft;
2bd0ea18 1529 }
a2ceac1f
DC
1530 xfs_alloc_fix_len(args);
1531
2bd0ea18 1532 rlen = args->len;
19ebedcf 1533 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0);
2bd0ea18
NS
1534 /*
1535 * Allocate and initialize a cursor for the by-block tree.
1536 */
b194c7d8
BN
1537 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1538 args->agno, XFS_BTNUM_BNO);
0e266570
NS
1539 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1540 rbno, rlen, XFSA_FIXUP_CNT_OK)))
2bd0ea18
NS
1541 goto error0;
1542 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1543 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1544 cnt_cur = bno_cur = NULL;
1545 args->len = rlen;
1546 args->agbno = rbno;
19ebedcf 1547 XFS_WANT_CORRUPTED_GOTO(args->mp,
2bd0ea18 1548 args->agbno + args->len <=
6e3140c7 1549 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
2bd0ea18 1550 error0);
56b2de80 1551 trace_xfs_alloc_size_done(args);
2bd0ea18
NS
1552 return 0;
1553
1554error0:
56b2de80 1555 trace_xfs_alloc_size_error(args);
2bd0ea18
NS
1556 if (cnt_cur)
1557 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1558 if (bno_cur)
1559 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1560 return error;
a2ceac1f
DC
1561
1562out_nominleft:
1563 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1564 trace_xfs_alloc_size_nominleft(args);
1565 args->agbno = NULLAGBLOCK;
1566 return 0;
2bd0ea18
NS
1567}
1568
1569/*
1570 * Deal with the case where only small freespaces remain.
1571 * Either return the contents of the last freespace record,
1572 * or allocate space from the freelist if there is nothing in the tree.
1573 */
1574STATIC int /* error */
1575xfs_alloc_ag_vextent_small(
dfc130f3
RC
1576 xfs_alloc_arg_t *args, /* allocation argument structure */
1577 xfs_btree_cur_t *ccur, /* by-size cursor */
1578 xfs_agblock_t *fbnop, /* result block number */
1579 xfs_extlen_t *flenp, /* result length */
2bd0ea18
NS
1580 int *stat) /* status: 0-freelist, 1-normal/none */
1581{
59b86360 1582 struct xfs_owner_info oinfo;
2bd0ea18
NS
1583 int error;
1584 xfs_agblock_t fbno;
1585 xfs_extlen_t flen;
2bd0ea18
NS
1586 int i;
1587
b194c7d8 1588 if ((error = xfs_btree_decrement(ccur, 0, &i)))
2bd0ea18
NS
1589 goto error0;
1590 if (i) {
0e266570 1591 if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
2bd0ea18 1592 goto error0;
19ebedcf 1593 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
2bd0ea18
NS
1594 }
1595 /*
1596 * Nothing in the btree, try the freelist. Make sure
1597 * to respect minleft even when pulling from the
1598 * freelist.
1599 */
cf8ce220 1600 else if (args->minlen == 1 && args->alignment == 1 &&
9760cac2 1601 args->resv != XFS_AG_RESV_AGFL &&
6e3140c7
NS
1602 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
1603 > args->minleft)) {
cdded3d8
DC
1604 error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
1605 if (error)
2bd0ea18
NS
1606 goto error0;
1607 if (fbno != NULLAGBLOCK) {
a2ceac1f 1608 xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
1fccd5c8 1609 xfs_alloc_allow_busy_reuse(args->datatype));
a2ceac1f 1610
1fccd5c8 1611 if (xfs_alloc_is_userdata(args->datatype)) {
2bd0ea18
NS
1612 xfs_buf_t *bp;
1613
1614 bp = xfs_btree_get_bufs(args->mp, args->tp,
1615 args->agno, fbno, 0);
b2284d05
ES
1616 if (!bp) {
1617 error = -EFSCORRUPTED;
1618 goto error0;
1619 }
2bd0ea18 1620 xfs_trans_binval(args->tp, bp);
2bd0ea18
NS
1621 }
1622 args->len = 1;
1623 args->agbno = fbno;
19ebedcf 1624 XFS_WANT_CORRUPTED_GOTO(args->mp,
2bd0ea18 1625 args->agbno + args->len <=
6e3140c7 1626 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
2bd0ea18
NS
1627 error0);
1628 args->wasfromfl = 1;
56b2de80 1629 trace_xfs_alloc_small_freelist(args);
59b86360
DW
1630
1631 /*
1632 * If we're feeding an AGFL block to something that
1633 * doesn't live in the free space, we need to clear
9760cac2 1634 * out the OWN_AG rmap.
59b86360
DW
1635 */
1636 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
1637 error = xfs_rmap_free(args->tp, args->agbp, args->agno,
1638 fbno, 1, &oinfo);
1639 if (error)
1640 goto error0;
59b86360 1641
2bd0ea18
NS
1642 *stat = 0;
1643 return 0;
1644 }
1645 /*
1646 * Nothing in the freelist.
1647 */
1648 else
1649 flen = 0;
1650 }
1651 /*
1652 * Can't allocate from the freelist for some reason.
1653 */
5e656dbb
BN
1654 else {
1655 fbno = NULLAGBLOCK;
2bd0ea18 1656 flen = 0;
5e656dbb 1657 }
2bd0ea18
NS
1658 /*
1659 * Can't do the allocation, give up.
1660 */
1661 if (flen < args->minlen) {
1662 args->agbno = NULLAGBLOCK;
56b2de80 1663 trace_xfs_alloc_small_notenough(args);
2bd0ea18
NS
1664 flen = 0;
1665 }
1666 *fbnop = fbno;
1667 *flenp = flen;
1668 *stat = 1;
56b2de80 1669 trace_xfs_alloc_small_done(args);
2bd0ea18
NS
1670 return 0;
1671
1672error0:
56b2de80 1673 trace_xfs_alloc_small_error(args);
2bd0ea18
NS
1674 return error;
1675}
1676
1677/*
1678 * Free the extent starting at agno/bno for length.
1679 */
85aec44f 1680STATIC int
2bd0ea18 1681xfs_free_ag_extent(
85aec44f
DW
1682 xfs_trans_t *tp,
1683 xfs_buf_t *agbp,
1684 xfs_agnumber_t agno,
1685 xfs_agblock_t bno,
1686 xfs_extlen_t len,
1687 struct xfs_owner_info *oinfo,
cf8ce220 1688 enum xfs_ag_resv_type type)
2bd0ea18 1689{
dfc130f3
RC
1690 xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */
1691 xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */
2bd0ea18 1692 int error; /* error return value */
2bd0ea18
NS
1693 xfs_agblock_t gtbno; /* start of right neighbor block */
1694 xfs_extlen_t gtlen; /* length of right neighbor block */
1695 int haveleft; /* have a left neighbor block */
1696 int haveright; /* have a right neighbor block */
1697 int i; /* temp, result code */
1698 xfs_agblock_t ltbno; /* start of left neighbor block */
1699 xfs_extlen_t ltlen; /* length of left neighbor block */
1700 xfs_mount_t *mp; /* mount point struct for filesystem */
1701 xfs_agblock_t nbno; /* new starting block of freespace */
1702 xfs_extlen_t nlen; /* new length of freespace */
a2ceac1f 1703 xfs_perag_t *pag; /* per allocation group data */
2bd0ea18 1704
631ac87a 1705 bno_cur = cnt_cur = NULL;
2bd0ea18 1706 mp = tp->t_mountp;
631ac87a 1707
3ee858aa 1708 if (!xfs_rmap_should_skip_owner_update(oinfo)) {
631ac87a
DW
1709 error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo);
1710 if (error)
1711 goto error0;
1712 }
1713
5000d01d 1714 /*
2bd0ea18
NS
1715 * Allocate and initialize a cursor for the by-block btree.
1716 */
b194c7d8 1717 bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
5000d01d 1718 /*
2bd0ea18
NS
1719 * Look for a neighboring block on the left (lower block numbers)
1720 * that is contiguous with this space.
1721 */
0e266570 1722 if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
2bd0ea18
NS
1723 goto error0;
1724 if (haveleft) {
1725 /*
1726 * There is a block to our left.
1727 */
0e266570 1728 if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
2bd0ea18 1729 goto error0;
19ebedcf 1730 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1731 /*
1732 * It's not contiguous, though.
1733 */
1734 if (ltbno + ltlen < bno)
1735 haveleft = 0;
1736 else {
1737 /*
1738 * If this failure happens the request to free this
1739 * space was invalid, it's (partly) already free.
1740 * Very bad.
1741 */
19ebedcf
DC
1742 XFS_WANT_CORRUPTED_GOTO(mp,
1743 ltbno + ltlen <= bno, error0);
2bd0ea18
NS
1744 }
1745 }
5000d01d 1746 /*
2bd0ea18
NS
1747 * Look for a neighboring block on the right (higher block numbers)
1748 * that is contiguous with this space.
1749 */
b194c7d8 1750 if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
2bd0ea18
NS
1751 goto error0;
1752 if (haveright) {
1753 /*
1754 * There is a block to our right.
1755 */
0e266570 1756 if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
2bd0ea18 1757 goto error0;
19ebedcf 1758 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1759 /*
1760 * It's not contiguous, though.
1761 */
1762 if (bno + len < gtbno)
1763 haveright = 0;
1764 else {
1765 /*
1766 * If this failure happens the request to free this
1767 * space was invalid, it's (partly) already free.
1768 * Very bad.
1769 */
19ebedcf 1770 XFS_WANT_CORRUPTED_GOTO(mp, gtbno >= bno + len, error0);
2bd0ea18
NS
1771 }
1772 }
1773 /*
1774 * Now allocate and initialize a cursor for the by-size tree.
1775 */
b194c7d8 1776 cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
2bd0ea18
NS
1777 /*
1778 * Have both left and right contiguous neighbors.
1779 * Merge all three into a single free block.
1780 */
1781 if (haveleft && haveright) {
1782 /*
1783 * Delete the old by-size entry on the left.
1784 */
0e266570 1785 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
2bd0ea18 1786 goto error0;
19ebedcf 1787 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
b194c7d8 1788 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18 1789 goto error0;
19ebedcf 1790 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1791 /*
1792 * Delete the old by-size entry on the right.
1793 */
0e266570 1794 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2bd0ea18 1795 goto error0;
19ebedcf 1796 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
b194c7d8 1797 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18 1798 goto error0;
19ebedcf 1799 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1800 /*
1801 * Delete the old by-block entry for the right block.
1802 */
b194c7d8 1803 if ((error = xfs_btree_delete(bno_cur, &i)))
2bd0ea18 1804 goto error0;
19ebedcf 1805 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1806 /*
1807 * Move the by-block cursor back to the left neighbor.
1808 */
b194c7d8 1809 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
2bd0ea18 1810 goto error0;
19ebedcf 1811 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1812#ifdef DEBUG
1813 /*
1814 * Check that this is the right record: delete didn't
1815 * mangle the cursor.
1816 */
1817 {
1818 xfs_agblock_t xxbno;
1819 xfs_extlen_t xxlen;
1820
0e266570
NS
1821 if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
1822 &i)))
2bd0ea18 1823 goto error0;
19ebedcf 1824 XFS_WANT_CORRUPTED_GOTO(mp,
2bd0ea18
NS
1825 i == 1 && xxbno == ltbno && xxlen == ltlen,
1826 error0);
1827 }
1828#endif
1829 /*
1830 * Update remaining by-block entry to the new, joined block.
1831 */
1832 nbno = ltbno;
1833 nlen = len + ltlen + gtlen;
0e266570 1834 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2bd0ea18
NS
1835 goto error0;
1836 }
1837 /*
1838 * Have only a left contiguous neighbor.
1839 * Merge it together with the new freespace.
1840 */
1841 else if (haveleft) {
1842 /*
1843 * Delete the old by-size entry on the left.
1844 */
0e266570 1845 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
2bd0ea18 1846 goto error0;
19ebedcf 1847 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
b194c7d8 1848 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18 1849 goto error0;
19ebedcf 1850 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1851 /*
1852 * Back up the by-block cursor to the left neighbor, and
1853 * update its length.
1854 */
b194c7d8 1855 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
2bd0ea18 1856 goto error0;
19ebedcf 1857 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1858 nbno = ltbno;
1859 nlen = len + ltlen;
0e266570 1860 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2bd0ea18
NS
1861 goto error0;
1862 }
1863 /*
1864 * Have only a right contiguous neighbor.
1865 * Merge it together with the new freespace.
1866 */
1867 else if (haveright) {
1868 /*
1869 * Delete the old by-size entry on the right.
1870 */
0e266570 1871 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2bd0ea18 1872 goto error0;
19ebedcf 1873 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
b194c7d8 1874 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18 1875 goto error0;
19ebedcf 1876 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18 1877 /*
5000d01d 1878 * Update the starting block and length of the right
2bd0ea18
NS
1879 * neighbor in the by-block tree.
1880 */
1881 nbno = bno;
1882 nlen = len + gtlen;
0e266570 1883 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2bd0ea18
NS
1884 goto error0;
1885 }
1886 /*
1887 * No contiguous neighbors.
1888 * Insert the new freespace into the by-block tree.
1889 */
1890 else {
1891 nbno = bno;
1892 nlen = len;
b194c7d8 1893 if ((error = xfs_btree_insert(bno_cur, &i)))
2bd0ea18 1894 goto error0;
19ebedcf 1895 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1896 }
1897 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1898 bno_cur = NULL;
1899 /*
1900 * In all cases we need to insert the new freespace in the by-size tree.
1901 */
0e266570 1902 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
2bd0ea18 1903 goto error0;
19ebedcf 1904 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, error0);
b194c7d8 1905 if ((error = xfs_btree_insert(cnt_cur, &i)))
2bd0ea18 1906 goto error0;
19ebedcf 1907 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1908 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1909 cnt_cur = NULL;
a2ceac1f 1910
2bd0ea18
NS
1911 /*
1912 * Update the freespace totals in the ag and superblock.
1913 */
a2ceac1f
DC
1914 pag = xfs_perag_get(mp, agno);
1915 error = xfs_alloc_update_counters(tp, pag, agbp, len);
cf8ce220 1916 xfs_ag_resv_free_extent(pag, type, tp, len);
a2ceac1f
DC
1917 xfs_perag_put(pag);
1918 if (error)
1919 goto error0;
1920
79896434
BD
1921 XFS_STATS_INC(mp, xs_freex);
1922 XFS_STATS_ADD(mp, xs_freeb, len);
56b2de80 1923
65a15e06 1924 trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright);
3e535bba 1925
2bd0ea18
NS
1926 return 0;
1927
1928 error0:
65a15e06 1929 trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1);
2bd0ea18
NS
1930 if (bno_cur)
1931 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1932 if (cnt_cur)
1933 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1934 return error;
1935}
1936
5000d01d 1937/*
2bd0ea18
NS
1938 * Visible (exported) allocation/free functions.
1939 * Some of these are used just by xfs_alloc_btree.c and this file.
1940 */
1941
1942/*
1943 * Compute and fill in value of m_ag_maxlevels.
1944 */
1945void
1946xfs_alloc_compute_maxlevels(
1947 xfs_mount_t *mp) /* file system mount structure */
1948{
1421de38 1949 mp->m_ag_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr,
730e2a19 1950 (mp->m_sb.sb_agblocks + 1) / 2);
2bd0ea18
NS
1951}
1952
56b2de80 1953/*
cf8ce220
DW
1954 * Find the length of the longest extent in an AG. The 'need' parameter
1955 * specifies how much space we're going to need for the AGFL and the
1956 * 'reserved' parameter tells us how many blocks in this AG are reserved for
1957 * other callers.
56b2de80
DC
1958 */
1959xfs_extlen_t
1960xfs_alloc_longest_free_extent(
72bda06d 1961 struct xfs_perag *pag,
cf8ce220
DW
1962 xfs_extlen_t need,
1963 xfs_extlen_t reserved)
56b2de80 1964{
72bda06d 1965 xfs_extlen_t delta = 0;
56b2de80 1966
cf8ce220
DW
1967 /*
1968 * If the AGFL needs a recharge, we'll have to subtract that from the
1969 * longest extent.
1970 */
56b2de80
DC
1971 if (need > pag->pagf_flcount)
1972 delta = need - pag->pagf_flcount;
1973
cf8ce220
DW
1974 /*
1975 * If we cannot maintain others' reservations with space from the
1976 * not-longest freesp extents, we'll have to subtract /that/ from
1977 * the longest extent too.
1978 */
1979 if (pag->pagf_freeblks - pag->pagf_longest < reserved)
1980 delta += reserved - (pag->pagf_freeblks - pag->pagf_longest);
1981
1982 /*
1983 * If the longest extent is long enough to satisfy all the
1984 * reservations and AGFL rules in place, we can return this extent.
1985 */
56b2de80
DC
1986 if (pag->pagf_longest > delta)
1987 return pag->pagf_longest - delta;
cf8ce220
DW
1988
1989 /* Otherwise, let the caller try for 1 block if there's space. */
56b2de80
DC
1990 return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
1991}
1992
de046644
DC
1993unsigned int
1994xfs_alloc_min_freelist(
1995 struct xfs_mount *mp,
1996 struct xfs_perag *pag)
1997{
1998 unsigned int min_free;
1999
2000 /* space needed by-bno freespace btree */
2001 min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1,
2002 mp->m_ag_maxlevels);
2003 /* space needed by-size freespace btree */
2004 min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1,
2005 mp->m_ag_maxlevels);
b8a8d6e5
DW
2006 /* space needed reverse mapping used space btree */
2007 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
2008 min_free += min_t(unsigned int,
2009 pag->pagf_levels[XFS_BTNUM_RMAPi] + 1,
2010 mp->m_rmap_maxlevels);
de046644
DC
2011
2012 return min_free;
2013}
2014
5515b7c1
DC
2015/*
2016 * Check if the operation we are fixing up the freelist for should go ahead or
2017 * not. If we are freeing blocks, we always allow it, otherwise the allocation
2018 * is dependent on whether the size and shape of free space available will
2019 * permit the requested allocation to take place.
2020 */
2021static bool
2022xfs_alloc_space_available(
2023 struct xfs_alloc_arg *args,
2024 xfs_extlen_t min_free,
2025 int flags)
2026{
2027 struct xfs_perag *pag = args->pag;
3fe4a6dd 2028 xfs_extlen_t alloc_len, longest;
cf8ce220 2029 xfs_extlen_t reservation; /* blocks that are still reserved */
5515b7c1
DC
2030 int available;
2031
2032 if (flags & XFS_ALLOC_FLAG_FREEING)
2033 return true;
2034
cf8ce220
DW
2035 reservation = xfs_ag_resv_needed(pag, args->resv);
2036
5515b7c1 2037 /* do we have enough contiguous free space for the allocation? */
3fe4a6dd 2038 alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
1421de38 2039 longest = xfs_alloc_longest_free_extent(pag, min_free, reservation);
3fe4a6dd 2040 if (longest < alloc_len)
5515b7c1
DC
2041 return false;
2042
cf8ce220 2043 /* do we have enough free space remaining for the allocation? */
5515b7c1 2044 available = (int)(pag->pagf_freeblks + pag->pagf_flcount -
2c003dc2 2045 reservation - min_free - args->minleft);
3fe4a6dd 2046 if (available < (int)max(args->total, alloc_len))
5515b7c1
DC
2047 return false;
2048
2c003dc2
CH
2049 /*
2050 * Clamp maxlen to the amount of free space available for the actual
2051 * extent allocation.
2052 */
2053 if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) {
2054 args->maxlen = available;
2055 ASSERT(args->maxlen > 0);
2056 ASSERT(args->maxlen >= args->minlen);
2057 }
2058
5515b7c1
DC
2059 return true;
2060}
2061
30c8be8a
BF
2062int
2063xfs_free_agfl_block(
2064 struct xfs_trans *tp,
2065 xfs_agnumber_t agno,
2066 xfs_agblock_t agbno,
2067 struct xfs_buf *agbp,
2068 struct xfs_owner_info *oinfo)
2069{
2070 int error;
2071 struct xfs_buf *bp;
2072
2073 error = xfs_free_ag_extent(tp, agbp, agno, agbno, 1, oinfo,
2074 XFS_AG_RESV_AGFL);
2075 if (error)
2076 return error;
2077
2078 bp = xfs_btree_get_bufs(tp->t_mountp, tp, agno, agbno, 0);
2079 if (!bp)
2080 return -EFSCORRUPTED;
2081 xfs_trans_binval(tp, bp);
2082
2083 return 0;
2084}
2085
8dbee8f5
BF
2086/*
2087 * Check the agfl fields of the agf for inconsistency or corruption. The purpose
2088 * is to detect an agfl header padding mismatch between current and early v5
2089 * kernels. This problem manifests as a 1-slot size difference between the
2090 * on-disk flcount and the active [first, last] range of a wrapped agfl. This
2091 * may also catch variants of agfl count corruption unrelated to padding. Either
2092 * way, we'll reset the agfl and warn the user.
2093 *
2094 * Return true if a reset is required before the agfl can be used, false
2095 * otherwise.
2096 */
2097static bool
2098xfs_agfl_needs_reset(
2099 struct xfs_mount *mp,
2100 struct xfs_agf *agf)
2101{
2102 uint32_t f = be32_to_cpu(agf->agf_flfirst);
2103 uint32_t l = be32_to_cpu(agf->agf_fllast);
2104 uint32_t c = be32_to_cpu(agf->agf_flcount);
2105 int agfl_size = xfs_agfl_size(mp);
2106 int active;
2107
2108 /* no agfl header on v4 supers */
2109 if (!xfs_sb_version_hascrc(&mp->m_sb))
2110 return false;
2111
2112 /*
2113 * The agf read verifier catches severe corruption of these fields.
2114 * Repeat some sanity checks to cover a packed -> unpacked mismatch if
2115 * the verifier allows it.
2116 */
2117 if (f >= agfl_size || l >= agfl_size)
2118 return true;
2119 if (c > agfl_size)
2120 return true;
2121
2122 /*
2123 * Check consistency between the on-disk count and the active range. An
2124 * agfl padding mismatch manifests as an inconsistent flcount.
2125 */
2126 if (c && l >= f)
2127 active = l - f + 1;
2128 else if (c)
2129 active = agfl_size - f + l + 1;
2130 else
2131 active = 0;
2132
2133 return active != c;
2134}
2135
2136/*
2137 * Reset the agfl to an empty state. Ignore/drop any existing blocks since the
2138 * agfl content cannot be trusted. Warn the user that a repair is required to
2139 * recover leaked blocks.
2140 *
2141 * The purpose of this mechanism is to handle filesystems affected by the agfl
2142 * header padding mismatch problem. A reset keeps the filesystem online with a
2143 * relatively minor free space accounting inconsistency rather than suffer the
2144 * inevitable crash from use of an invalid agfl block.
2145 */
2146static void
2147xfs_agfl_reset(
2148 struct xfs_trans *tp,
2149 struct xfs_buf *agbp,
2150 struct xfs_perag *pag)
2151{
2152 struct xfs_mount *mp = tp->t_mountp;
2153 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
2154
2155 ASSERT(pag->pagf_agflreset);
2156 trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_);
2157
2158 xfs_warn(mp,
2159 "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. "
2160 "Please unmount and run xfs_repair.",
2161 pag->pag_agno, pag->pagf_flcount);
2162
2163 agf->agf_flfirst = 0;
2164 agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1);
2165 agf->agf_flcount = 0;
2166 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST |
2167 XFS_AGF_FLCOUNT);
2168
2169 pag->pagf_flcount = 0;
2170 pag->pagf_agflreset = false;
2171}
2172
d5c1b462
BF
2173/*
2174 * Defer an AGFL block free. This is effectively equivalent to
2175 * xfs_bmap_add_free() with some special handling particular to AGFL blocks.
2176 *
2177 * Deferring AGFL frees helps prevent log reservation overruns due to too many
2178 * allocation operations in a transaction. AGFL frees are prone to this problem
2179 * because for one they are always freed one at a time. Further, an immediate
2180 * AGFL block free can cause a btree join and require another block free before
2181 * the real allocation can proceed. Deferring the free disconnects freeing up
2182 * the AGFL slot from freeing the block.
2183 */
2184STATIC void
2185xfs_defer_agfl_block(
2186 struct xfs_mount *mp,
2187 struct xfs_defer_ops *dfops,
2188 xfs_agnumber_t agno,
2189 xfs_fsblock_t agbno,
2190 struct xfs_owner_info *oinfo)
2191{
2192 struct xfs_extent_free_item *new; /* new element */
2193
2194 ASSERT(xfs_bmap_free_item_zone != NULL);
2195 ASSERT(oinfo != NULL);
2196
2197 new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP);
2198 new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno);
2199 new->xefi_blockcount = 1;
2200 new->xefi_oinfo = *oinfo;
2201
2202 trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
2203
2204 xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_AGFL_FREE, &new->xefi_list);
2205}
2206
2bd0ea18
NS
2207/*
2208 * Decide whether to use this allocation group for this allocation.
2209 * If so, fix up the btree freelist's size.
2bd0ea18 2210 */
ff105f75 2211int /* error */
2bd0ea18 2212xfs_alloc_fix_freelist(
c98e644e
DC
2213 struct xfs_alloc_arg *args, /* allocation argument structure */
2214 int flags) /* XFS_ALLOC_FLAG_... */
2bd0ea18 2215{
c98e644e
DC
2216 struct xfs_mount *mp = args->mp;
2217 struct xfs_perag *pag = args->pag;
2218 struct xfs_trans *tp = args->tp;
2219 struct xfs_buf *agbp = NULL;
2220 struct xfs_buf *agflbp = NULL;
2221 struct xfs_alloc_arg targs; /* local allocation arguments */
2222 xfs_agblock_t bno; /* freelist block */
2223 xfs_extlen_t need; /* total blocks needed in freelist */
fcdd428c 2224 int error = 0;
c98e644e 2225
2bd0ea18 2226 if (!pag->pagf_init) {
c98e644e
DC
2227 error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
2228 if (error)
2229 goto out_no_agbp;
2bd0ea18 2230 if (!pag->pagf_init) {
5e656dbb
BN
2231 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
2232 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
c98e644e 2233 goto out_agbp_relse;
2bd0ea18 2234 }
c98e644e 2235 }
34317449 2236
5e656dbb 2237 /*
c98e644e
DC
2238 * If this is a metadata preferred pag and we are user data then try
2239 * somewhere else if we are not being asked to try harder at this
2240 * point
34317449 2241 */
1fccd5c8 2242 if (pag->pagf_metadata && xfs_alloc_is_userdata(args->datatype) &&
5e656dbb
BN
2243 (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
2244 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
c98e644e 2245 goto out_agbp_relse;
34317449
NS
2246 }
2247
de046644 2248 need = xfs_alloc_min_freelist(mp, pag);
2c003dc2
CH
2249 if (!xfs_alloc_space_available(args, need, flags |
2250 XFS_ALLOC_FLAG_CHECK))
c98e644e 2251 goto out_agbp_relse;
5e656dbb 2252
2bd0ea18
NS
2253 /*
2254 * Get the a.g. freespace buffer.
2255 * Can fail if we're not blocking on locks, and it's held.
2256 */
c98e644e
DC
2257 if (!agbp) {
2258 error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
2259 if (error)
2260 goto out_no_agbp;
2261 if (!agbp) {
5e656dbb
BN
2262 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
2263 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
c98e644e 2264 goto out_no_agbp;
2bd0ea18
NS
2265 }
2266 }
72bda06d 2267
8dbee8f5
BF
2268 /* reset a padding mismatched agfl before final free space check */
2269 if (pag->pagf_agflreset)
2270 xfs_agfl_reset(tp, agbp, pag);
2271
72bda06d 2272 /* If there isn't enough total space or single-extent, reject it. */
de046644 2273 need = xfs_alloc_min_freelist(mp, pag);
c98e644e
DC
2274 if (!xfs_alloc_space_available(args, need, flags))
2275 goto out_agbp_relse;
5515b7c1 2276
2bd0ea18
NS
2277 /*
2278 * Make the freelist shorter if it's too long.
72bda06d 2279 *
c98e644e
DC
2280 * Note that from this point onwards, we will always release the agf and
2281 * agfl buffers on error. This handles the case where we error out and
2282 * the buffers are clean or may not have been joined to the transaction
2283 * and hence need to be released manually. If they have been joined to
2284 * the transaction, then xfs_trans_brelse() will handle them
2285 * appropriately based on the recursion count and dirty state of the
2286 * buffer.
2287 *
72bda06d
DC
2288 * XXX (dgc): When we have lots of free space, does this buy us
2289 * anything other than extra overhead when we need to put more blocks
2290 * back on the free list? Maybe we should only do this when space is
2291 * getting low or the AGFL is more than half full?
e365af6f
DW
2292 *
2293 * The NOSHRINK flag prevents the AGFL from being shrunk if it's too
2294 * big; the NORMAP flag prevents AGFL expand/shrink operations from
2295 * updating the rmapbt. Both flags are used in xfs_repair while we're
2296 * rebuilding the rmapbt, and neither are used by the kernel. They're
2297 * both required to ensure that rmaps are correctly recorded for the
2298 * regenerated AGFL, bnobt, and cntbt. See repair/phase5.c and
2299 * repair/rmap.c in xfsprogs for details.
2bd0ea18 2300 */
e365af6f
DW
2301 memset(&targs, 0, sizeof(targs));
2302 if (flags & XFS_ALLOC_FLAG_NORMAP)
2303 xfs_rmap_skip_owner_update(&targs.oinfo);
2304 else
2305 xfs_rmap_ag_owner(&targs.oinfo, XFS_RMAP_OWN_AG);
2306 while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) {
5e656dbb
BN
2307 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
2308 if (error)
c98e644e 2309 goto out_agbp_relse;
30c8be8a 2310
d5c1b462
BF
2311 /* defer agfl frees if dfops is provided */
2312 if (tp->t_agfl_dfops) {
2313 xfs_defer_agfl_block(mp, tp->t_agfl_dfops, args->agno,
2314 bno, &targs.oinfo);
2315 } else {
2316 error = xfs_free_agfl_block(tp, args->agno, bno, agbp,
2317 &targs.oinfo);
2318 if (error)
2319 goto out_agbp_relse;
2320 }
2bd0ea18 2321 }
72bda06d 2322
2bd0ea18
NS
2323 targs.tp = tp;
2324 targs.mp = mp;
2325 targs.agbp = agbp;
2326 targs.agno = args->agno;
cf8ce220 2327 targs.alignment = targs.minlen = targs.prod = 1;
2bd0ea18
NS
2328 targs.type = XFS_ALLOCTYPE_THIS_AG;
2329 targs.pag = pag;
72bda06d
DC
2330 error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp);
2331 if (error)
c98e644e 2332 goto out_agbp_relse;
72bda06d
DC
2333
2334 /* Make the freelist longer if it's too short. */
2335 while (pag->pagf_flcount < need) {
2bd0ea18 2336 targs.agbno = 0;
72bda06d 2337 targs.maxlen = need - pag->pagf_flcount;
9760cac2 2338 targs.resv = XFS_AG_RESV_AGFL;
72bda06d
DC
2339
2340 /* Allocate as many blocks as possible at once. */
2341 error = xfs_alloc_ag_vextent(&targs);
c98e644e
DC
2342 if (error)
2343 goto out_agflbp_relse;
2344
2bd0ea18 2345 /*
dfc130f3
RC
2346 * Stop if we run out. Won't happen if callers are obeying
2347 * the restrictions correctly. Can happen for free calls
2bd0ea18
NS
2348 * on a completely full ag.
2349 */
5e656dbb
BN
2350 if (targs.agbno == NULLAGBLOCK) {
2351 if (flags & XFS_ALLOC_FLAG_FREEING)
2352 break;
c98e644e 2353 goto out_agflbp_relse;
5e656dbb 2354 }
2bd0ea18
NS
2355 /*
2356 * Put each allocated block on the list.
2357 */
2358 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
5e656dbb
BN
2359 error = xfs_alloc_put_freelist(tp, agbp,
2360 agflbp, bno, 0);
2361 if (error)
c98e644e 2362 goto out_agflbp_relse;
2bd0ea18
NS
2363 }
2364 }
cb4deb22 2365 xfs_trans_brelse(tp, agflbp);
2bd0ea18
NS
2366 args->agbp = agbp;
2367 return 0;
c98e644e
DC
2368
2369out_agflbp_relse:
2370 xfs_trans_brelse(tp, agflbp);
2371out_agbp_relse:
2372 if (agbp)
2373 xfs_trans_brelse(tp, agbp);
2374out_no_agbp:
2375 args->agbp = NULL;
2376 return error;
2bd0ea18
NS
2377}
2378
2379/*
2380 * Get a block from the freelist.
2381 * Returns with the buffer for the block gotten.
2382 */
2383int /* error */
2384xfs_alloc_get_freelist(
2385 xfs_trans_t *tp, /* transaction pointer */
2386 xfs_buf_t *agbp, /* buffer containing the agf structure */
cdded3d8
DC
2387 xfs_agblock_t *bnop, /* block address retrieved from freelist */
2388 int btreeblk) /* destination is a AGF btree */
2bd0ea18
NS
2389{
2390 xfs_agf_t *agf; /* a.g. freespace structure */
2bd0ea18
NS
2391 xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */
2392 xfs_agblock_t bno; /* block number returned */
dd5b876e 2393 __be32 *agfl_bno;
2bd0ea18 2394 int error;
cdded3d8 2395 int logflags;
dd5b876e 2396 xfs_mount_t *mp = tp->t_mountp;
2bd0ea18
NS
2397 xfs_perag_t *pag; /* per allocation group data */
2398
2bd0ea18
NS
2399 /*
2400 * Freelist is empty, give up.
2401 */
dd5b876e 2402 agf = XFS_BUF_TO_AGF(agbp);
46eca962 2403 if (!agf->agf_flcount) {
2bd0ea18
NS
2404 *bnop = NULLAGBLOCK;
2405 return 0;
2406 }
2407 /*
2408 * Read the array of free blocks.
2409 */
dd5b876e
DC
2410 error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
2411 &agflbp);
2412 if (error)
2bd0ea18 2413 return error;
dd5b876e
DC
2414
2415
2bd0ea18
NS
2416 /*
2417 * Get the block number and update the data structures.
2418 */
dd5b876e
DC
2419 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2420 bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
5e656dbb 2421 be32_add_cpu(&agf->agf_flfirst, 1);
2bd0ea18 2422 xfs_trans_brelse(tp, agflbp);
b8165508 2423 if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
46eca962 2424 agf->agf_flfirst = 0;
56b2de80
DC
2425
2426 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
8dbee8f5 2427 ASSERT(!pag->pagf_agflreset);
5e656dbb 2428 be32_add_cpu(&agf->agf_flcount, -1);
2bd0ea18
NS
2429 xfs_trans_agflist_delta(tp, -1);
2430 pag->pagf_flcount--;
56b2de80 2431 xfs_perag_put(pag);
cdded3d8
DC
2432
2433 logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2434 if (btreeblk) {
5e656dbb 2435 be32_add_cpu(&agf->agf_btreeblks, 1);
cdded3d8
DC
2436 pag->pagf_btreeblks++;
2437 logflags |= XFS_AGF_BTREEBLKS;
2438 }
2439
cdded3d8 2440 xfs_alloc_log_agf(tp, agbp, logflags);
2bd0ea18 2441 *bnop = bno;
3e535bba 2442
2bd0ea18
NS
2443 return 0;
2444}
2445
2446/*
2447 * Log the given fields from the agf structure.
2448 */
2449void
2450xfs_alloc_log_agf(
2451 xfs_trans_t *tp, /* transaction pointer */
2452 xfs_buf_t *bp, /* buffer for a.g. freelist header */
dfc130f3 2453 int fields) /* mask of fields to be logged (XFS_AGF_...) */
2bd0ea18
NS
2454{
2455 int first; /* first byte offset */
2456 int last; /* last byte offset */
2457 static const short offsets[] = {
2458 offsetof(xfs_agf_t, agf_magicnum),
2459 offsetof(xfs_agf_t, agf_versionnum),
2460 offsetof(xfs_agf_t, agf_seqno),
2461 offsetof(xfs_agf_t, agf_length),
2462 offsetof(xfs_agf_t, agf_roots[0]),
2463 offsetof(xfs_agf_t, agf_levels[0]),
2464 offsetof(xfs_agf_t, agf_flfirst),
2465 offsetof(xfs_agf_t, agf_fllast),
2466 offsetof(xfs_agf_t, agf_flcount),
2467 offsetof(xfs_agf_t, agf_freeblks),
2468 offsetof(xfs_agf_t, agf_longest),
cdded3d8 2469 offsetof(xfs_agf_t, agf_btreeblks),
dd5b876e 2470 offsetof(xfs_agf_t, agf_uuid),
8511b71a 2471 offsetof(xfs_agf_t, agf_rmap_blocks),
bc859611
DW
2472 offsetof(xfs_agf_t, agf_refcount_blocks),
2473 offsetof(xfs_agf_t, agf_refcount_root),
2474 offsetof(xfs_agf_t, agf_refcount_level),
8511b71a
DW
2475 /* needed so that we don't log the whole rest of the structure: */
2476 offsetof(xfs_agf_t, agf_spare64),
2bd0ea18
NS
2477 sizeof(xfs_agf_t)
2478 };
2479
56b2de80
DC
2480 trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
2481
bdc16ee5 2482 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
dd5b876e 2483
2bd0ea18
NS
2484 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2485 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2486}
2487
2488/*
2489 * Interface for inode allocation to force the pag data to be initialized.
2490 */
2491int /* error */
2492xfs_alloc_pagf_init(
2493 xfs_mount_t *mp, /* file system mount structure */
2494 xfs_trans_t *tp, /* transaction pointer */
2495 xfs_agnumber_t agno, /* allocation group number */
2496 int flags) /* XFS_ALLOC_FLAGS_... */
2497{
7a3bffe4 2498 xfs_buf_t *bp;
2bd0ea18
NS
2499 int error;
2500
0e266570 2501 if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)))
2bd0ea18
NS
2502 return error;
2503 if (bp)
2504 xfs_trans_brelse(tp, bp);
2505 return 0;
2506}
2507
2508/*
2509 * Put the block on the freelist for the allocation group.
2510 */
2511int /* error */
2512xfs_alloc_put_freelist(
2513 xfs_trans_t *tp, /* transaction pointer */
2514 xfs_buf_t *agbp, /* buffer for a.g. freelist header */
2515 xfs_buf_t *agflbp,/* buffer for a.g. free block array */
cdded3d8
DC
2516 xfs_agblock_t bno, /* block being freed */
2517 int btreeblk) /* block came from a AGF btree */
2bd0ea18
NS
2518{
2519 xfs_agf_t *agf; /* a.g. freespace structure */
5e656dbb 2520 __be32 *blockp;/* pointer to array entry */
2bd0ea18 2521 int error;
cdded3d8 2522 int logflags;
2bd0ea18
NS
2523 xfs_mount_t *mp; /* mount structure */
2524 xfs_perag_t *pag; /* per allocation group data */
dd5b876e
DC
2525 __be32 *agfl_bno;
2526 int startoff;
2bd0ea18
NS
2527
2528 agf = XFS_BUF_TO_AGF(agbp);
2529 mp = tp->t_mountp;
2530
2531 if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
6e3140c7 2532 be32_to_cpu(agf->agf_seqno), &agflbp)))
2bd0ea18 2533 return error;
5e656dbb 2534 be32_add_cpu(&agf->agf_fllast, 1);
b8165508 2535 if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp))
46eca962 2536 agf->agf_fllast = 0;
56b2de80
DC
2537
2538 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
8dbee8f5 2539 ASSERT(!pag->pagf_agflreset);
5e656dbb 2540 be32_add_cpu(&agf->agf_flcount, 1);
2bd0ea18
NS
2541 xfs_trans_agflist_delta(tp, 1);
2542 pag->pagf_flcount++;
cdded3d8
DC
2543
2544 logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2545 if (btreeblk) {
5e656dbb 2546 be32_add_cpu(&agf->agf_btreeblks, -1);
cdded3d8
DC
2547 pag->pagf_btreeblks--;
2548 logflags |= XFS_AGF_BTREEBLKS;
2549 }
56b2de80 2550 xfs_perag_put(pag);
cdded3d8 2551
5e656dbb
BN
2552 xfs_alloc_log_agf(tp, agbp, logflags);
2553
b8165508 2554 ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp));
dd5b876e
DC
2555
2556 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2557 blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
5e656dbb 2558 *blockp = cpu_to_be32(bno);
dd5b876e
DC
2559 startoff = (char *)blockp - (char *)agflbp->b_addr;
2560
cdded3d8 2561 xfs_alloc_log_agf(tp, agbp, logflags);
dd5b876e 2562
bdc16ee5 2563 xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
dd5b876e
DC
2564 xfs_trans_log_buf(tp, agflbp, startoff,
2565 startoff + sizeof(xfs_agblock_t) - 1);
2bd0ea18
NS
2566 return 0;
2567}
2568
bc01119d 2569static xfs_failaddr_t
a2ceac1f 2570xfs_agf_verify(
95d9582b
DW
2571 struct xfs_buf *bp)
2572{
2573 struct xfs_mount *mp = bp->b_target->bt_mount;
2574 struct xfs_agf *agf = XFS_BUF_TO_AGF(bp);
a2ceac1f 2575
a65d8d29
BF
2576 if (xfs_sb_version_hascrc(&mp->m_sb)) {
2577 if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
bc01119d 2578 return __this_address;
a65d8d29
BF
2579 if (!xfs_log_check_lsn(mp,
2580 be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn)))
bc01119d 2581 return __this_address;
a65d8d29 2582 }
a2ceac1f 2583
dd5b876e
DC
2584 if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
2585 XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
2586 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
b8165508
DC
2587 be32_to_cpu(agf->agf_flfirst) < xfs_agfl_size(mp) &&
2588 be32_to_cpu(agf->agf_fllast) < xfs_agfl_size(mp) &&
2589 be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp)))
bc01119d 2590 return __this_address;
a2ceac1f 2591
00795aae
DW
2592 if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
2593 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
2594 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
5a35bf2c 2595 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
bc01119d 2596 return __this_address;
5a35bf2c 2597
e37838e5 2598 if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
00795aae
DW
2599 (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
2600 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS))
bc01119d 2601 return __this_address;
e37838e5 2602
a2ceac1f
DC
2603 /*
2604 * during growfs operations, the perag is not fully initialised,
2605 * so we can't use it for any useful checking. growfs ensures we can't
2606 * use it by using uncached buffers that don't have the perag attached
2607 * so we can detect and avoid this problem.
2608 */
dd5b876e 2609 if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
bc01119d 2610 return __this_address;
a2ceac1f 2611
dd5b876e
DC
2612 if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
2613 be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
bc01119d 2614 return __this_address;
dd5b876e 2615
88ce0792 2616 if (xfs_sb_version_hasreflink(&mp->m_sb) &&
00795aae
DW
2617 (be32_to_cpu(agf->agf_refcount_level) < 1 ||
2618 be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS))
bc01119d 2619 return __this_address;
88ce0792 2620
bc01119d 2621 return NULL;
a2ceac1f 2622
a2ceac1f
DC
2623}
2624
2625static void
2626xfs_agf_read_verify(
2627 struct xfs_buf *bp)
2628{
dd5b876e 2629 struct xfs_mount *mp = bp->b_target->bt_mount;
1e697959 2630 xfs_failaddr_t fa;
dd5b876e 2631
45922933
DC
2632 if (xfs_sb_version_hascrc(&mp->m_sb) &&
2633 !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
1e697959
DW
2634 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
2635 else {
95d9582b 2636 fa = xfs_agf_verify(bp);
1e697959
DW
2637 if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF))
2638 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
2639 }
a2ceac1f
DC
2640}
2641
2642static void
2643xfs_agf_write_verify(
2644 struct xfs_buf *bp)
2645{
37d086ca
CM
2646 struct xfs_mount *mp = bp->b_target->bt_mount;
2647 struct xfs_buf_log_item *bip = bp->b_log_item;
1e697959 2648 xfs_failaddr_t fa;
dd5b876e 2649
95d9582b 2650 fa = xfs_agf_verify(bp);
1e697959
DW
2651 if (fa) {
2652 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
dd5b876e
DC
2653 return;
2654 }
2655
2656 if (!xfs_sb_version_hascrc(&mp->m_sb))
2657 return;
2658
2659 if (bip)
2660 XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
2661
43b5aeed 2662 xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
a2ceac1f
DC
2663}
2664
2665const struct xfs_buf_ops xfs_agf_buf_ops = {
a3fac935 2666 .name = "xfs_agf",
a2ceac1f
DC
2667 .verify_read = xfs_agf_read_verify,
2668 .verify_write = xfs_agf_write_verify,
95d9582b 2669 .verify_struct = xfs_agf_verify,
a2ceac1f
DC
2670};
2671
2bd0ea18
NS
2672/*
2673 * Read in the allocation group header (free/alloc section).
2674 */
2675int /* error */
56b2de80
DC
2676xfs_read_agf(
2677 struct xfs_mount *mp, /* mount point structure */
2678 struct xfs_trans *tp, /* transaction pointer */
2679 xfs_agnumber_t agno, /* allocation group number */
2680 int flags, /* XFS_BUF_ */
2681 struct xfs_buf **bpp) /* buffer for the ag freelist header */
2bd0ea18 2682{
9440d84d 2683 int error;
2bd0ea18 2684
ff105f75
DC
2685 trace_xfs_read_agf(mp, agno);
2686
2bd0ea18 2687 ASSERT(agno != NULLAGNUMBER);
9440d84d
NS
2688 error = xfs_trans_read_buf(
2689 mp, tp, mp->m_ddev_targp,
2690 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
a2ceac1f 2691 XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
9440d84d 2692 if (error)
2bd0ea18 2693 return error;
56b2de80 2694 if (!*bpp)
2bd0ea18 2695 return 0;
56b2de80 2696
a2ceac1f
DC
2697 ASSERT(!(*bpp)->b_error);
2698 xfs_buf_set_ref(*bpp, XFS_AGF_REF);
56b2de80
DC
2699 return 0;
2700}
2701
2702/*
2703 * Read in the allocation group header (free/alloc section).
2704 */
2705int /* error */
2706xfs_alloc_read_agf(
2707 struct xfs_mount *mp, /* mount point structure */
2708 struct xfs_trans *tp, /* transaction pointer */
2709 xfs_agnumber_t agno, /* allocation group number */
2710 int flags, /* XFS_ALLOC_FLAG_... */
2711 struct xfs_buf **bpp) /* buffer for the ag freelist header */
2712{
2713 struct xfs_agf *agf; /* ag freelist header */
2714 struct xfs_perag *pag; /* per allocation group data */
2715 int error;
2716
ff105f75 2717 trace_xfs_alloc_read_agf(mp, agno);
56b2de80 2718
ff105f75 2719 ASSERT(agno != NULLAGNUMBER);
56b2de80
DC
2720 error = xfs_read_agf(mp, tp, agno,
2721 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
2722 bpp);
2723 if (error)
2724 return error;
2725 if (!*bpp)
2726 return 0;
a2ceac1f 2727 ASSERT(!(*bpp)->b_error);
56b2de80
DC
2728
2729 agf = XFS_BUF_TO_AGF(*bpp);
2730 pag = xfs_perag_get(mp, agno);
2bd0ea18 2731 if (!pag->pagf_init) {
6e3140c7 2732 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
cdded3d8 2733 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
6e3140c7
NS
2734 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
2735 pag->pagf_longest = be32_to_cpu(agf->agf_longest);
2bd0ea18 2736 pag->pagf_levels[XFS_BTNUM_BNOi] =
6e3140c7 2737 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
2bd0ea18 2738 pag->pagf_levels[XFS_BTNUM_CNTi] =
6e3140c7 2739 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
e37838e5
DW
2740 pag->pagf_levels[XFS_BTNUM_RMAPi] =
2741 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
88ce0792 2742 pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
5e656dbb 2743 spin_lock_init(&pag->pagb_lock);
56b2de80 2744 pag->pagb_count = 0;
ff105f75
DC
2745 /* XXX: pagb_tree doesn't exist in userspace */
2746 //pag->pagb_tree = RB_ROOT;
2bd0ea18 2747 pag->pagf_init = 1;
8dbee8f5 2748 pag->pagf_agflreset = xfs_agfl_needs_reset(mp, agf);
2bd0ea18
NS
2749 }
2750#ifdef DEBUG
2751 else if (!XFS_FORCED_SHUTDOWN(mp)) {
6e3140c7 2752 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
cdded3d8 2753 ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
6e3140c7
NS
2754 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
2755 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
2bd0ea18 2756 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
6e3140c7 2757 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
2bd0ea18 2758 ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
6e3140c7 2759 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
2bd0ea18
NS
2760 }
2761#endif
56b2de80 2762 xfs_perag_put(pag);
2bd0ea18
NS
2763 return 0;
2764}
2765
2766/*
2767 * Allocate an extent (variable-size).
2768 * Depending on the allocation type, we either look in a single allocation
2769 * group or loop over the allocation groups to find the result.
2770 */
2771int /* error */
2772xfs_alloc_vextent(
dfc130f3 2773 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18 2774{
dfc130f3 2775 xfs_agblock_t agsize; /* allocation group size */
2bd0ea18
NS
2776 int error;
2777 int flags; /* XFS_ALLOC_FLAG_... locking flags */
2bd0ea18
NS
2778 xfs_mount_t *mp; /* mount structure pointer */
2779 xfs_agnumber_t sagno; /* starting allocation group number */
dfc130f3 2780 xfs_alloctype_t type; /* input allocation type */
34317449 2781 int bump_rotor = 0;
46eca962 2782 xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */
2bd0ea18
NS
2783
2784 mp = args->mp;
2785 type = args->otype = args->type;
2786 args->agbno = NULLAGBLOCK;
2787 /*
2788 * Just fix this up, for the case where the last a.g. is shorter
2789 * (or there's only one a.g.) and the caller couldn't easily figure
2790 * that out (xfs_bmap_alloc).
2791 */
2792 agsize = mp->m_sb.sb_agblocks;
2793 if (args->maxlen > agsize)
2794 args->maxlen = agsize;
2795 if (args->alignment == 0)
2796 args->alignment = 1;
2797 ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
2798 ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
2799 ASSERT(args->minlen <= args->maxlen);
2800 ASSERT(args->minlen <= agsize);
2801 ASSERT(args->mod < args->prod);
2802 if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
2803 XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
2804 args->minlen > args->maxlen || args->minlen > agsize ||
2805 args->mod >= args->prod) {
2806 args->fsbno = NULLFSBLOCK;
56b2de80 2807 trace_xfs_alloc_vextent_badargs(args);
2bd0ea18
NS
2808 return 0;
2809 }
9baa549b 2810
2bd0ea18
NS
2811 switch (type) {
2812 case XFS_ALLOCTYPE_THIS_AG:
2813 case XFS_ALLOCTYPE_NEAR_BNO:
2814 case XFS_ALLOCTYPE_THIS_BNO:
2815 /*
2816 * These three force us into a single a.g.
2817 */
2818 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
56b2de80 2819 args->pag = xfs_perag_get(mp, args->agno);
2bd0ea18 2820 error = xfs_alloc_fix_freelist(args, 0);
2bd0ea18 2821 if (error) {
56b2de80 2822 trace_xfs_alloc_vextent_nofix(args);
2bd0ea18
NS
2823 goto error0;
2824 }
2825 if (!args->agbp) {
56b2de80 2826 trace_xfs_alloc_vextent_noagbp(args);
2bd0ea18
NS
2827 break;
2828 }
2829 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
0e266570 2830 if ((error = xfs_alloc_ag_vextent(args)))
2bd0ea18 2831 goto error0;
2bd0ea18
NS
2832 break;
2833 case XFS_ALLOCTYPE_START_BNO:
2834 /*
2835 * Try near allocation first, then anywhere-in-ag after
2836 * the first a.g. fails.
2837 */
1fccd5c8 2838 if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
34317449 2839 (mp->m_flags & XFS_MOUNT_32BITINODES)) {
46eca962
NS
2840 args->fsbno = XFS_AGB_TO_FSB(mp,
2841 ((mp->m_agfrotor / rotorstep) %
2842 mp->m_sb.sb_agcount), 0);
34317449
NS
2843 bump_rotor = 1;
2844 }
2bd0ea18
NS
2845 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2846 args->type = XFS_ALLOCTYPE_NEAR_BNO;
2847 /* FALLTHROUGH */
2bd0ea18
NS
2848 case XFS_ALLOCTYPE_FIRST_AG:
2849 /*
2850 * Rotate through the allocation groups looking for a winner.
2851 */
f3eda3a5 2852 if (type == XFS_ALLOCTYPE_FIRST_AG) {
2bd0ea18
NS
2853 /*
2854 * Start with allocation group given by bno.
2855 */
2856 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2857 args->type = XFS_ALLOCTYPE_THIS_AG;
2858 sagno = 0;
2859 flags = 0;
2860 } else {
2bd0ea18
NS
2861 /*
2862 * Start with the given allocation group.
2863 */
2864 args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2865 flags = XFS_ALLOC_FLAG_TRYLOCK;
2866 }
2867 /*
2868 * Loop over allocation groups twice; first time with
2869 * trylock set, second time without.
2870 */
2871 for (;;) {
56b2de80 2872 args->pag = xfs_perag_get(mp, args->agno);
9baa549b 2873 error = xfs_alloc_fix_freelist(args, flags);
9baa549b 2874 if (error) {
56b2de80 2875 trace_xfs_alloc_vextent_nofix(args);
2bd0ea18
NS
2876 goto error0;
2877 }
2878 /*
2879 * If we get a buffer back then the allocation will fly.
2880 */
2881 if (args->agbp) {
0e266570 2882 if ((error = xfs_alloc_ag_vextent(args)))
2bd0ea18 2883 goto error0;
2bd0ea18
NS
2884 break;
2885 }
56b2de80
DC
2886
2887 trace_xfs_alloc_vextent_loopfailed(args);
2888
2bd0ea18
NS
2889 /*
2890 * Didn't work, figure out the next iteration.
2891 */
2892 if (args->agno == sagno &&
2893 type == XFS_ALLOCTYPE_START_BNO)
2894 args->type = XFS_ALLOCTYPE_THIS_AG;
5e656dbb
BN
2895 /*
2896 * For the first allocation, we can try any AG to get
2897 * space. However, if we already have allocated a
2898 * block, we don't want to try AGs whose number is below
2899 * sagno. Otherwise, we may end up with out-of-order
2900 * locking of AGF, which might cause deadlock.
2901 */
2902 if (++(args->agno) == mp->m_sb.sb_agcount) {
2903 if (args->firstblock != NULLFSBLOCK)
2904 args->agno = sagno;
2905 else
2906 args->agno = 0;
2907 }
5000d01d 2908 /*
2bd0ea18
NS
2909 * Reached the starting a.g., must either be done
2910 * or switch to non-trylock mode.
2911 */
2912 if (args->agno == sagno) {
a3b4a951 2913 if (flags == 0) {
2bd0ea18 2914 args->agbno = NULLAGBLOCK;
56b2de80 2915 trace_xfs_alloc_vextent_allfailed(args);
2bd0ea18
NS
2916 break;
2917 }
a3b4a951
CH
2918
2919 flags = 0;
2920 if (type == XFS_ALLOCTYPE_START_BNO) {
2921 args->agbno = XFS_FSB_TO_AGBNO(mp,
2922 args->fsbno);
2923 args->type = XFS_ALLOCTYPE_NEAR_BNO;
2bd0ea18
NS
2924 }
2925 }
56b2de80 2926 xfs_perag_put(args->pag);
2bd0ea18 2927 }
f3eda3a5 2928 if (bump_rotor) {
46eca962
NS
2929 if (args->agno == sagno)
2930 mp->m_agfrotor = (mp->m_agfrotor + 1) %
2931 (mp->m_sb.sb_agcount * rotorstep);
2932 else
2933 mp->m_agfrotor = (args->agno * rotorstep + 1) %
2934 (mp->m_sb.sb_agcount * rotorstep);
2935 }
2bd0ea18
NS
2936 break;
2937 default:
2938 ASSERT(0);
2939 /* NOTREACHED */
2940 }
2941 if (args->agbno == NULLAGBLOCK)
2942 args->fsbno = NULLFSBLOCK;
2943 else {
2944 args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
2945#ifdef DEBUG
2946 ASSERT(args->len >= args->minlen);
2947 ASSERT(args->len <= args->maxlen);
2948 ASSERT(args->agbno % args->alignment == 0);
2949 XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
2950 args->len);
2951#endif
9542ae13
DC
2952
2953 /* Zero the extent if we were asked to do so */
1fccd5c8 2954 if (args->datatype & XFS_ALLOC_USERDATA_ZERO) {
9542ae13
DC
2955 error = xfs_zero_extent(args->ip, args->fsbno, args->len);
2956 if (error)
2957 goto error0;
2958 }
2959
2bd0ea18 2960 }
56b2de80 2961 xfs_perag_put(args->pag);
2bd0ea18
NS
2962 return 0;
2963error0:
56b2de80 2964 xfs_perag_put(args->pag);
2bd0ea18
NS
2965 return error;
2966}
2967
2a6da3b8
DC
2968/* Ensure that the freelist is at full capacity. */
2969int
2970xfs_free_extent_fix_freelist(
2971 struct xfs_trans *tp,
2972 xfs_agnumber_t agno,
2973 struct xfs_buf **agbp)
2bd0ea18 2974{
2a6da3b8
DC
2975 struct xfs_alloc_arg args;
2976 int error;
2bd0ea18 2977
2a6da3b8 2978 memset(&args, 0, sizeof(struct xfs_alloc_arg));
2bd0ea18
NS
2979 args.tp = tp;
2980 args.mp = tp->t_mountp;
2a6da3b8 2981 args.agno = agno;
a2ceac1f
DC
2982
2983 /*
2984 * validate that the block number is legal - the enables us to detect
2985 * and handle a silent filesystem corruption rather than crashing.
2986 */
a2ceac1f 2987 if (args.agno >= args.mp->m_sb.sb_agcount)
12b53197 2988 return -EFSCORRUPTED;
a2ceac1f 2989
56b2de80 2990 args.pag = xfs_perag_get(args.mp, args.agno);
a2ceac1f
DC
2991 ASSERT(args.pag);
2992
2993 error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
2994 if (error)
2a6da3b8
DC
2995 goto out;
2996
2997 *agbp = args.agbp;
2998out:
2999 xfs_perag_put(args.pag);
3000 return error;
3001}
3002
3003/*
3004 * Free an extent.
3005 * Just break up the extent address and hand off to xfs_free_ag_extent
3006 * after fixing up the freelist.
3007 */
3008int /* error */
3a13f959 3009__xfs_free_extent(
2a6da3b8
DC
3010 struct xfs_trans *tp, /* transaction pointer */
3011 xfs_fsblock_t bno, /* starting block number of extent */
85aec44f 3012 xfs_extlen_t len, /* length of extent */
cf8ce220 3013 struct xfs_owner_info *oinfo, /* extent owner */
3a13f959
BF
3014 enum xfs_ag_resv_type type, /* block reservation type */
3015 bool skip_discard)
2a6da3b8
DC
3016{
3017 struct xfs_mount *mp = tp->t_mountp;
3018 struct xfs_buf *agbp;
3019 xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, bno);
3020 xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, bno);
3021 int error;
3a13f959 3022 unsigned int busy_flags = 0;
2a6da3b8
DC
3023
3024 ASSERT(len != 0);
9760cac2 3025 ASSERT(type != XFS_AG_RESV_AGFL);
2a6da3b8 3026
a9da40de 3027 if (XFS_TEST_ERROR(false, mp,
e2a190dd 3028 XFS_ERRTAG_FREE_EXTENT))
a9da40de
DW
3029 return -EIO;
3030
2a6da3b8
DC
3031 error = xfs_free_extent_fix_freelist(tp, agno, &agbp);
3032 if (error)
3033 return error;
3034
3035 XFS_WANT_CORRUPTED_GOTO(mp, agbno < mp->m_sb.sb_agblocks, err);
a2ceac1f
DC
3036
3037 /* validate the extent size is legal now we have the agf locked */
2a6da3b8
DC
3038 XFS_WANT_CORRUPTED_GOTO(mp,
3039 agbno + len <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length),
3040 err);
a2ceac1f 3041
cf8ce220 3042 error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type);
2a6da3b8
DC
3043 if (error)
3044 goto err;
3045
3a13f959
BF
3046 if (skip_discard)
3047 busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD;
3048 xfs_extent_busy_insert(tp, agno, agbno, len, busy_flags);
2a6da3b8
DC
3049 return 0;
3050
3051err:
3052 xfs_trans_brelse(tp, agbp);
2bd0ea18
NS
3053 return error;
3054}
b3d83fa6
DW
3055
3056struct xfs_alloc_query_range_info {
3057 xfs_alloc_query_range_fn fn;
3058 void *priv;
3059};
3060
3061/* Format btree record and pass to our callback. */
3062STATIC int
3063xfs_alloc_query_range_helper(
3064 struct xfs_btree_cur *cur,
3065 union xfs_btree_rec *rec,
3066 void *priv)
3067{
3068 struct xfs_alloc_query_range_info *query = priv;
3069 struct xfs_alloc_rec_incore irec;
3070
3071 irec.ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
3072 irec.ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
3073 return query->fn(cur, &irec, query->priv);
3074}
3075
3076/* Find all free space within a given range of blocks. */
3077int
3078xfs_alloc_query_range(
3079 struct xfs_btree_cur *cur,
3080 struct xfs_alloc_rec_incore *low_rec,
3081 struct xfs_alloc_rec_incore *high_rec,
3082 xfs_alloc_query_range_fn fn,
3083 void *priv)
3084{
3085 union xfs_btree_irec low_brec;
3086 union xfs_btree_irec high_brec;
3087 struct xfs_alloc_query_range_info query;
3088
3089 ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
3090 low_brec.a = *low_rec;
3091 high_brec.a = *high_rec;
3092 query.priv = priv;
3093 query.fn = fn;
3094 return xfs_btree_query_range(cur, &low_brec, &high_brec,
3095 xfs_alloc_query_range_helper, &query);
3096}
7e05e856
DW
3097
3098/* Find all free space records. */
3099int
3100xfs_alloc_query_all(
3101 struct xfs_btree_cur *cur,
3102 xfs_alloc_query_range_fn fn,
3103 void *priv)
3104{
3105 struct xfs_alloc_query_range_info query;
3106
3107 ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
3108 query.priv = priv;
3109 query.fn = fn;
3110 return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
3111}
9bef6258
DW
3112
3113/* Find the size of the AG, in blocks. */
3114xfs_agblock_t
3115xfs_ag_block_count(
3116 struct xfs_mount *mp,
3117 xfs_agnumber_t agno)
3118{
3119 ASSERT(agno < mp->m_sb.sb_agcount);
3120
3121 if (agno < mp->m_sb.sb_agcount - 1)
3122 return mp->m_sb.sb_agblocks;
3123 return mp->m_sb.sb_dblocks - (agno * mp->m_sb.sb_agblocks);
3124}
3125
3126/*
3127 * Verify that an AG block number pointer neither points outside the AG
3128 * nor points at static metadata.
3129 */
3130bool
3131xfs_verify_agbno(
3132 struct xfs_mount *mp,
3133 xfs_agnumber_t agno,
3134 xfs_agblock_t agbno)
3135{
3136 xfs_agblock_t eoag;
3137
3138 eoag = xfs_ag_block_count(mp, agno);
3139 if (agbno >= eoag)
3140 return false;
3141 if (agbno <= XFS_AGFL_BLOCK(mp))
3142 return false;
3143 return true;
3144}
3145
3146/*
3147 * Verify that an FS block number pointer neither points outside the
3148 * filesystem nor points at static AG metadata.
3149 */
3150bool
3151xfs_verify_fsbno(
3152 struct xfs_mount *mp,
3153 xfs_fsblock_t fsbno)
3154{
3155 xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, fsbno);
3156
3157 if (agno >= mp->m_sb.sb_agcount)
3158 return false;
3159 return xfs_verify_agbno(mp, agno, XFS_FSB_TO_AGBNO(mp, fsbno));
3160}
1fe41a73
DW
3161
3162/* Is there a record covering a given extent? */
3163int
3164xfs_alloc_has_record(
3165 struct xfs_btree_cur *cur,
3166 xfs_agblock_t bno,
3167 xfs_extlen_t len,
3168 bool *exists)
3169{
3170 union xfs_btree_irec low;
3171 union xfs_btree_irec high;
3172
3173 memset(&low, 0, sizeof(low));
3174 low.a.ar_startblock = bno;
3175 memset(&high, 0xFF, sizeof(high));
3176 high.a.ar_startblock = bno + len - 1;
3177
3178 return xfs_btree_has_record(cur, &low, &high, exists);
3179}
71a98c66
DW
3180
3181/*
3182 * Walk all the blocks in the AGFL. The @walk_fn can return any negative
3183 * error code or XFS_BTREE_QUERY_RANGE_ABORT.
3184 */
3185int
3186xfs_agfl_walk(
3187 struct xfs_mount *mp,
3188 struct xfs_agf *agf,
3189 struct xfs_buf *agflbp,
3190 xfs_agfl_walk_fn walk_fn,
3191 void *priv)
3192{
3193 __be32 *agfl_bno;
3194 unsigned int i;
3195 int error;
3196
3197 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
3198 i = be32_to_cpu(agf->agf_flfirst);
3199
3200 /* Nothing to walk in an empty AGFL. */
3201 if (agf->agf_flcount == cpu_to_be32(0))
3202 return 0;
3203
3204 /* Otherwise, walk from first to last, wrapping as needed. */
3205 for (;;) {
3206 error = walk_fn(mp, be32_to_cpu(agfl_bno[i]), priv);
3207 if (error)
3208 return error;
3209 if (i == be32_to_cpu(agf->agf_fllast))
3210 break;
3211 if (++i == xfs_agfl_size(mp))
3212 i = 0;
3213 }
3214
3215 return 0;
3216}