]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - libxfs/xfs_alloc.c
xfs: introduce refcount btree definitions
[thirdparty/xfsprogs-dev.git] / libxfs / xfs_alloc.c
CommitLineData
2bd0ea18 1/*
5e656dbb 2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
da23017d 3 * All Rights Reserved.
5000d01d 4 *
da23017d
NS
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
2bd0ea18 7 * published by the Free Software Foundation.
5000d01d 8 *
da23017d
NS
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
5000d01d 13 *
da23017d
NS
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
2bd0ea18 17 */
9c799827 18#include "libxfs_priv.h"
b626fb59
DC
19#include "xfs_fs.h"
20#include "xfs_format.h"
21#include "xfs_log_format.h"
22#include "xfs_shared.h"
23#include "xfs_trans_resv.h"
24#include "xfs_bit.h"
25#include "xfs_sb.h"
26#include "xfs_mount.h"
f944d3d0 27#include "xfs_defer.h"
b626fb59
DC
28#include "xfs_inode.h"
29#include "xfs_btree.h"
631ac87a 30#include "xfs_rmap.h"
b626fb59
DC
31#include "xfs_alloc_btree.h"
32#include "xfs_alloc.h"
33#include "xfs_cksum.h"
34#include "xfs_trace.h"
35#include "xfs_trans.h"
cf8ce220 36#include "xfs_ag_resv.h"
2bd0ea18 37
ff105f75
DC
38struct workqueue_struct *xfs_alloc_wq;
39
2bd0ea18 40#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
5e656dbb
BN
41
42#define XFSA_FIXUP_BNO_OK 1
43#define XFSA_FIXUP_CNT_OK 2
44
5e656dbb
BN
45STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
46STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
47STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
48STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
a2ceac1f 49 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
2bd0ea18 50
ef5340cd
DW
51xfs_extlen_t
52xfs_prealloc_blocks(
53 struct xfs_mount *mp)
54{
55 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
56 return XFS_RMAP_BLOCK(mp) + 1;
57 if (xfs_sb_version_hasfinobt(&mp->m_sb))
58 return XFS_FIBT_BLOCK(mp) + 1;
59 return XFS_IBT_BLOCK(mp) + 1;
60}
61
b8a8d6e5
DW
62/*
63 * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
64 * AGF buffer (PV 947395), we place constraints on the relationship among
65 * actual allocations for data blocks, freelist blocks, and potential file data
66 * bmap btree blocks. However, these restrictions may result in no actual space
67 * allocated for a delayed extent, for example, a data block in a certain AG is
68 * allocated but there is no additional block for the additional bmap btree
69 * block due to a split of the bmap btree of the file. The result of this may
70 * lead to an infinite loop when the file gets flushed to disk and all delayed
71 * extents need to be actually allocated. To get around this, we explicitly set
72 * aside a few blocks which will not be reserved in delayed allocation.
73 *
cf8ce220
DW
74 * We need to reserve 4 fsbs _per AG_ for the freelist and 4 more to handle a
75 * potential split of the file's bmap btree.
b8a8d6e5
DW
76 */
77unsigned int
78xfs_alloc_set_aside(
79 struct xfs_mount *mp)
80{
81 unsigned int blocks;
82
83 blocks = 4 + (mp->m_sb.sb_agcount * XFS_ALLOC_AGFL_RESERVE);
b8a8d6e5
DW
84 return blocks;
85}
86
87/*
88 * When deciding how much space to allocate out of an AG, we limit the
89 * allocation maximum size to the size the AG. However, we cannot use all the
90 * blocks in the AG - some are permanently used by metadata. These
91 * blocks are generally:
92 * - the AG superblock, AGF, AGI and AGFL
93 * - the AGF (bno and cnt) and AGI btree root blocks, and optionally
94 * the AGI free inode and rmap btree root blocks.
95 * - blocks on the AGFL according to xfs_alloc_set_aside() limits
96 * - the rmapbt root block
97 *
98 * The AG headers are sector sized, so the amount of space they take up is
99 * dependent on filesystem geometry. The others are all single blocks.
100 */
101unsigned int
102xfs_alloc_ag_max_usable(
103 struct xfs_mount *mp)
104{
105 unsigned int blocks;
106
107 blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
108 blocks += XFS_ALLOC_AGFL_RESERVE;
109 blocks += 3; /* AGF, AGI btree root blocks */
110 if (xfs_sb_version_hasfinobt(&mp->m_sb))
111 blocks++; /* finobt root block */
112 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
113 blocks++; /* rmap root block */
114
115 return mp->m_sb.sb_agblocks - blocks;
116}
117
b194c7d8
BN
118/*
119 * Lookup the record equal to [bno, len] in the btree given by cur.
120 */
121STATIC int /* error */
122xfs_alloc_lookup_eq(
123 struct xfs_btree_cur *cur, /* btree cursor */
124 xfs_agblock_t bno, /* starting block of extent */
125 xfs_extlen_t len, /* length of extent */
126 int *stat) /* success/failure */
127{
128 cur->bc_rec.a.ar_startblock = bno;
129 cur->bc_rec.a.ar_blockcount = len;
130 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
131}
132
133/*
134 * Lookup the first record greater than or equal to [bno, len]
135 * in the btree given by cur.
136 */
a2ceac1f 137int /* error */
b194c7d8
BN
138xfs_alloc_lookup_ge(
139 struct xfs_btree_cur *cur, /* btree cursor */
140 xfs_agblock_t bno, /* starting block of extent */
141 xfs_extlen_t len, /* length of extent */
142 int *stat) /* success/failure */
143{
144 cur->bc_rec.a.ar_startblock = bno;
145 cur->bc_rec.a.ar_blockcount = len;
146 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
147}
148
149/*
150 * Lookup the first record less than or equal to [bno, len]
151 * in the btree given by cur.
152 */
2d066e16 153static int /* error */
b194c7d8
BN
154xfs_alloc_lookup_le(
155 struct xfs_btree_cur *cur, /* btree cursor */
156 xfs_agblock_t bno, /* starting block of extent */
157 xfs_extlen_t len, /* length of extent */
158 int *stat) /* success/failure */
159{
160 cur->bc_rec.a.ar_startblock = bno;
161 cur->bc_rec.a.ar_blockcount = len;
162 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
163}
164
165/*
166 * Update the record referred to by cur to the value given
167 * by [bno, len].
168 * This either works (return 0) or gets an EFSCORRUPTED error.
169 */
170STATIC int /* error */
171xfs_alloc_update(
172 struct xfs_btree_cur *cur, /* btree cursor */
173 xfs_agblock_t bno, /* starting block of extent */
174 xfs_extlen_t len) /* length of extent */
175{
176 union xfs_btree_rec rec;
177
178 rec.alloc.ar_startblock = cpu_to_be32(bno);
179 rec.alloc.ar_blockcount = cpu_to_be32(len);
180 return xfs_btree_update(cur, &rec);
181}
182
183/*
184 * Get the data from the pointed-to record.
185 */
a2ceac1f 186int /* error */
b194c7d8
BN
187xfs_alloc_get_rec(
188 struct xfs_btree_cur *cur, /* btree cursor */
189 xfs_agblock_t *bno, /* output: starting block of extent */
190 xfs_extlen_t *len, /* output: length of extent */
191 int *stat) /* output: success/failure */
192{
193 union xfs_btree_rec *rec;
194 int error;
195
196 error = xfs_btree_get_rec(cur, &rec, stat);
197 if (!error && *stat == 1) {
198 *bno = be32_to_cpu(rec->alloc.ar_startblock);
199 *len = be32_to_cpu(rec->alloc.ar_blockcount);
200 }
201 return error;
202}
203
2bd0ea18
NS
204/*
205 * Compute aligned version of the found extent.
206 * Takes alignment and min length into account.
207 */
5e656dbb 208STATIC void
2bd0ea18 209xfs_alloc_compute_aligned(
a2ceac1f 210 xfs_alloc_arg_t *args, /* allocation argument structure */
2bd0ea18
NS
211 xfs_agblock_t foundbno, /* starting block in found extent */
212 xfs_extlen_t foundlen, /* length in found extent */
2bd0ea18
NS
213 xfs_agblock_t *resbno, /* result block number */
214 xfs_extlen_t *reslen) /* result length */
215{
216 xfs_agblock_t bno;
2bd0ea18 217 xfs_extlen_t len;
ff3263dd 218 xfs_extlen_t diff;
2bd0ea18 219
a2ceac1f
DC
220 /* Trim busy sections out of found extent */
221 xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len);
222
ff3263dd
BF
223 /*
224 * If we have a largish extent that happens to start before min_agbno,
225 * see if we can shift it into range...
226 */
227 if (bno < args->min_agbno && bno + len > args->min_agbno) {
228 diff = args->min_agbno - bno;
229 if (len > diff) {
230 bno += diff;
231 len -= diff;
232 }
233 }
234
a2ceac1f
DC
235 if (args->alignment > 1 && len >= args->minlen) {
236 xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
ff3263dd
BF
237
238 diff = aligned_bno - bno;
a2ceac1f
DC
239
240 *resbno = aligned_bno;
241 *reslen = diff >= len ? 0 : len - diff;
2bd0ea18 242 } else {
a2ceac1f
DC
243 *resbno = bno;
244 *reslen = len;
2bd0ea18 245 }
2bd0ea18
NS
246}
247
248/*
249 * Compute best start block and diff for "near" allocations.
250 * freelen >= wantlen already checked by caller.
251 */
252STATIC xfs_extlen_t /* difference value (absolute) */
253xfs_alloc_compute_diff(
254 xfs_agblock_t wantbno, /* target starting block */
255 xfs_extlen_t wantlen, /* target length */
256 xfs_extlen_t alignment, /* target alignment */
1fccd5c8 257 int datatype, /* are we allocating data? */
2bd0ea18
NS
258 xfs_agblock_t freebno, /* freespace's starting block */
259 xfs_extlen_t freelen, /* freespace's length */
260 xfs_agblock_t *newbnop) /* result: best start block from free */
261{
262 xfs_agblock_t freeend; /* end of freespace extent */
263 xfs_agblock_t newbno1; /* return block number */
264 xfs_agblock_t newbno2; /* other new block number */
0e266570
NS
265 xfs_extlen_t newlen1=0; /* length with newbno1 */
266 xfs_extlen_t newlen2=0; /* length with newbno2 */
2bd0ea18 267 xfs_agblock_t wantend; /* end of target extent */
1fccd5c8 268 bool userdata = xfs_alloc_is_userdata(datatype);
2bd0ea18
NS
269
270 ASSERT(freelen >= wantlen);
271 freeend = freebno + freelen;
272 wantend = wantbno + wantlen;
84a62eea
DC
273 /*
274 * We want to allocate from the start of a free extent if it is past
275 * the desired block or if we are allocating user data and the free
276 * extent is before desired block. The second case is there to allow
277 * for contiguous allocation from the remaining free space if the file
278 * grows in the short term.
279 */
280 if (freebno >= wantbno || (userdata && freeend < wantend)) {
2bd0ea18
NS
281 if ((newbno1 = roundup(freebno, alignment)) >= freeend)
282 newbno1 = NULLAGBLOCK;
283 } else if (freeend >= wantend && alignment > 1) {
284 newbno1 = roundup(wantbno, alignment);
285 newbno2 = newbno1 - alignment;
286 if (newbno1 >= freeend)
287 newbno1 = NULLAGBLOCK;
288 else
289 newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
290 if (newbno2 < freebno)
291 newbno2 = NULLAGBLOCK;
292 else
293 newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
294 if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
295 if (newlen1 < newlen2 ||
296 (newlen1 == newlen2 &&
297 XFS_ABSDIFF(newbno1, wantbno) >
298 XFS_ABSDIFF(newbno2, wantbno)))
299 newbno1 = newbno2;
300 } else if (newbno2 != NULLAGBLOCK)
301 newbno1 = newbno2;
302 } else if (freeend >= wantend) {
303 newbno1 = wantbno;
304 } else if (alignment > 1) {
305 newbno1 = roundup(freeend - wantlen, alignment);
306 if (newbno1 > freeend - wantlen &&
307 newbno1 - alignment >= freebno)
308 newbno1 -= alignment;
309 else if (newbno1 >= freeend)
310 newbno1 = NULLAGBLOCK;
311 } else
312 newbno1 = freeend - wantlen;
313 *newbnop = newbno1;
314 return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
315}
316
317/*
318 * Fix up the length, based on mod and prod.
319 * len should be k * prod + mod for some k.
320 * If len is too small it is returned unchanged.
321 * If len hits maxlen it is left alone.
322 */
323STATIC void
324xfs_alloc_fix_len(
dfc130f3 325 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18
NS
326{
327 xfs_extlen_t k;
328 xfs_extlen_t rlen;
329
330 ASSERT(args->mod < args->prod);
331 rlen = args->len;
332 ASSERT(rlen >= args->minlen);
333 ASSERT(rlen <= args->maxlen);
334 if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
335 (args->mod == 0 && rlen < args->prod))
336 return;
337 k = rlen % args->prod;
338 if (k == args->mod)
339 return;
ff105f75
DC
340 if (k > args->mod)
341 rlen = rlen - (k - args->mod);
342 else
343 rlen = rlen - args->prod + (args->mod - k);
19ebedcf 344 /* casts to (int) catch length underflows */
ff105f75
DC
345 if ((int)rlen < (int)args->minlen)
346 return;
347 ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
348 ASSERT(rlen % args->prod == args->mod);
2bd0ea18
NS
349 args->len = rlen;
350}
351
352/*
353 * Fix up length if there is too little space left in the a.g.
354 * Return 1 if ok, 0 if too little, should give up.
355 */
356STATIC int
357xfs_alloc_fix_minleft(
dfc130f3 358 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18
NS
359{
360 xfs_agf_t *agf; /* a.g. freelist header */
361 int diff; /* free space difference */
362
363 if (args->minleft == 0)
364 return 1;
365 agf = XFS_BUF_TO_AGF(args->agbp);
6e3140c7 366 diff = be32_to_cpu(agf->agf_freeblks)
2bd0ea18
NS
367 - args->len - args->minleft;
368 if (diff >= 0)
369 return 1;
370 args->len += diff; /* shrink the allocated space */
19ebedcf
DC
371 /* casts to (int) catch length underflows */
372 if ((int)args->len >= (int)args->minlen)
2bd0ea18
NS
373 return 1;
374 args->agbno = NULLAGBLOCK;
375 return 0;
376}
377
378/*
379 * Update the two btrees, logically removing from freespace the extent
380 * starting at rbno, rlen blocks. The extent is contained within the
381 * actual (current) free extent fbno for flen blocks.
382 * Flags are passed in indicating whether the cursors are set to the
383 * relevant records.
384 */
385STATIC int /* error code */
386xfs_alloc_fixup_trees(
dfc130f3
RC
387 xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */
388 xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */
2bd0ea18
NS
389 xfs_agblock_t fbno, /* starting block of free extent */
390 xfs_extlen_t flen, /* length of free extent */
391 xfs_agblock_t rbno, /* starting block of returned extent */
392 xfs_extlen_t rlen, /* length of returned extent */
393 int flags) /* flags, XFSA_FIXUP_... */
394{
395 int error; /* error code */
396 int i; /* operation results */
397 xfs_agblock_t nfbno1; /* first new free startblock */
398 xfs_agblock_t nfbno2; /* second new free startblock */
0e266570
NS
399 xfs_extlen_t nflen1=0; /* first new free length */
400 xfs_extlen_t nflen2=0; /* second new free length */
19ebedcf
DC
401 struct xfs_mount *mp;
402
403 mp = cnt_cur->bc_mp;
2bd0ea18
NS
404
405 /*
406 * Look up the record in the by-size tree if necessary.
407 */
408 if (flags & XFSA_FIXUP_CNT_OK) {
409#ifdef DEBUG
0e266570 410 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
2bd0ea18 411 return error;
19ebedcf 412 XFS_WANT_CORRUPTED_RETURN(mp,
2bd0ea18
NS
413 i == 1 && nfbno1 == fbno && nflen1 == flen);
414#endif
415 } else {
0e266570 416 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
2bd0ea18 417 return error;
19ebedcf 418 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
2bd0ea18
NS
419 }
420 /*
421 * Look up the record in the by-block tree if necessary.
422 */
423 if (flags & XFSA_FIXUP_BNO_OK) {
424#ifdef DEBUG
0e266570 425 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
2bd0ea18 426 return error;
19ebedcf 427 XFS_WANT_CORRUPTED_RETURN(mp,
2bd0ea18
NS
428 i == 1 && nfbno1 == fbno && nflen1 == flen);
429#endif
430 } else {
0e266570 431 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
2bd0ea18 432 return error;
19ebedcf 433 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
2bd0ea18 434 }
b3563c19 435
2bd0ea18 436#ifdef DEBUG
b3563c19
BN
437 if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
438 struct xfs_btree_block *bnoblock;
439 struct xfs_btree_block *cntblock;
440
441 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
442 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
2bd0ea18 443
19ebedcf 444 XFS_WANT_CORRUPTED_RETURN(mp,
b3563c19 445 bnoblock->bb_numrecs == cntblock->bb_numrecs);
2bd0ea18
NS
446 }
447#endif
b3563c19 448
2bd0ea18
NS
449 /*
450 * Deal with all four cases: the allocated record is contained
451 * within the freespace record, so we can have new freespace
452 * at either (or both) end, or no freespace remaining.
453 */
454 if (rbno == fbno && rlen == flen)
455 nfbno1 = nfbno2 = NULLAGBLOCK;
456 else if (rbno == fbno) {
457 nfbno1 = rbno + rlen;
458 nflen1 = flen - rlen;
459 nfbno2 = NULLAGBLOCK;
460 } else if (rbno + rlen == fbno + flen) {
461 nfbno1 = fbno;
462 nflen1 = flen - rlen;
463 nfbno2 = NULLAGBLOCK;
464 } else {
465 nfbno1 = fbno;
466 nflen1 = rbno - fbno;
467 nfbno2 = rbno + rlen;
468 nflen2 = (fbno + flen) - nfbno2;
469 }
470 /*
471 * Delete the entry from the by-size btree.
472 */
b194c7d8 473 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18 474 return error;
19ebedcf 475 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
2bd0ea18
NS
476 /*
477 * Add new by-size btree entry(s).
478 */
479 if (nfbno1 != NULLAGBLOCK) {
0e266570 480 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
2bd0ea18 481 return error;
19ebedcf 482 XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
b194c7d8 483 if ((error = xfs_btree_insert(cnt_cur, &i)))
2bd0ea18 484 return error;
19ebedcf 485 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
2bd0ea18
NS
486 }
487 if (nfbno2 != NULLAGBLOCK) {
0e266570 488 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
2bd0ea18 489 return error;
19ebedcf 490 XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
b194c7d8 491 if ((error = xfs_btree_insert(cnt_cur, &i)))
2bd0ea18 492 return error;
19ebedcf 493 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
2bd0ea18
NS
494 }
495 /*
496 * Fix up the by-block btree entry(s).
497 */
498 if (nfbno1 == NULLAGBLOCK) {
499 /*
500 * No remaining freespace, just delete the by-block tree entry.
501 */
b194c7d8 502 if ((error = xfs_btree_delete(bno_cur, &i)))
2bd0ea18 503 return error;
19ebedcf 504 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
2bd0ea18
NS
505 } else {
506 /*
507 * Update the by-block entry to start later|be shorter.
508 */
0e266570 509 if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
2bd0ea18
NS
510 return error;
511 }
512 if (nfbno2 != NULLAGBLOCK) {
513 /*
514 * 2 resulting free entries, need to add one.
515 */
0e266570 516 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
2bd0ea18 517 return error;
19ebedcf 518 XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
b194c7d8 519 if ((error = xfs_btree_insert(bno_cur, &i)))
2bd0ea18 520 return error;
19ebedcf 521 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
2bd0ea18
NS
522 }
523 return 0;
524}
525
dd5b876e 526static bool
a2ceac1f
DC
527xfs_agfl_verify(
528 struct xfs_buf *bp)
529{
a2ceac1f
DC
530 struct xfs_mount *mp = bp->b_target->bt_mount;
531 struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
a2ceac1f
DC
532 int i;
533
9c4e12fb 534 if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
dd5b876e
DC
535 return false;
536 if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC)
537 return false;
538 /*
539 * during growfs operations, the perag is not fully initialised,
540 * so we can't use it for any useful checking. growfs ensures we can't
541 * use it by using uncached buffers that don't have the perag attached
542 * so we can detect and avoid this problem.
543 */
544 if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
545 return false;
546
a2ceac1f 547 for (i = 0; i < XFS_AGFL_SIZE(mp); i++) {
dd5b876e 548 if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
a2ceac1f 549 be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
dd5b876e 550 return false;
a2ceac1f 551 }
a65d8d29
BF
552
553 return xfs_log_check_lsn(mp,
554 be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn));
dd5b876e
DC
555}
556
557static void
558xfs_agfl_read_verify(
559 struct xfs_buf *bp)
560{
561 struct xfs_mount *mp = bp->b_target->bt_mount;
dd5b876e
DC
562
563 /*
564 * There is no verification of non-crc AGFLs because mkfs does not
565 * initialise the AGFL to zero or NULL. Hence the only valid part of the
566 * AGFL is what the AGF says is active. We can't get to the AGF, so we
567 * can't verify just those entries are valid.
568 */
569 if (!xfs_sb_version_hascrc(&mp->m_sb))
570 return;
571
45922933 572 if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
12b53197 573 xfs_buf_ioerror(bp, -EFSBADCRC);
45922933 574 else if (!xfs_agfl_verify(bp))
12b53197 575 xfs_buf_ioerror(bp, -EFSCORRUPTED);
45922933
DC
576
577 if (bp->b_error)
578 xfs_verifier_error(bp);
a2ceac1f
DC
579}
580
581static void
582xfs_agfl_write_verify(
583 struct xfs_buf *bp)
584{
dd5b876e
DC
585 struct xfs_mount *mp = bp->b_target->bt_mount;
586 struct xfs_buf_log_item *bip = bp->b_fspriv;
a2ceac1f 587
dd5b876e
DC
588 /* no verification of non-crc AGFLs */
589 if (!xfs_sb_version_hascrc(&mp->m_sb))
590 return;
591
592 if (!xfs_agfl_verify(bp)) {
12b53197 593 xfs_buf_ioerror(bp, -EFSCORRUPTED);
45922933 594 xfs_verifier_error(bp);
dd5b876e
DC
595 return;
596 }
597
598 if (bip)
599 XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
600
43b5aeed 601 xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
a2ceac1f
DC
602}
603
604const struct xfs_buf_ops xfs_agfl_buf_ops = {
a3fac935 605 .name = "xfs_agfl",
a2ceac1f
DC
606 .verify_read = xfs_agfl_read_verify,
607 .verify_write = xfs_agfl_write_verify,
608};
609
2bd0ea18
NS
610/*
611 * Read in the allocation group free block array.
612 */
613STATIC int /* error */
614xfs_alloc_read_agfl(
615 xfs_mount_t *mp, /* mount point structure */
616 xfs_trans_t *tp, /* transaction pointer */
617 xfs_agnumber_t agno, /* allocation group number */
618 xfs_buf_t **bpp) /* buffer for the ag free block array */
619{
620 xfs_buf_t *bp; /* return value */
2bd0ea18
NS
621 int error;
622
623 ASSERT(agno != NULLAGNUMBER);
9440d84d
NS
624 error = xfs_trans_read_buf(
625 mp, tp, mp->m_ddev_targp,
626 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
a2ceac1f 627 XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
9440d84d 628 if (error)
2bd0ea18 629 return error;
a2ceac1f 630 xfs_buf_set_ref(bp, XFS_AGFL_REF);
2bd0ea18
NS
631 *bpp = bp;
632 return 0;
633}
634
a2ceac1f
DC
635STATIC int
636xfs_alloc_update_counters(
637 struct xfs_trans *tp,
638 struct xfs_perag *pag,
639 struct xfs_buf *agbp,
640 long len)
641{
642 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
643
644 pag->pagf_freeblks += len;
645 be32_add_cpu(&agf->agf_freeblks, len);
646
647 xfs_trans_agblocks_delta(tp, len);
648 if (unlikely(be32_to_cpu(agf->agf_freeblks) >
649 be32_to_cpu(agf->agf_length)))
12b53197 650 return -EFSCORRUPTED;
a2ceac1f
DC
651
652 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
653 return 0;
654}
655
2bd0ea18
NS
656/*
657 * Allocation group level functions.
658 */
659
660/*
661 * Allocate a variable extent in the allocation group agno.
662 * Type and bno are used to determine where in the allocation group the
663 * extent will start.
664 * Extent's length (returned in *len) will be between minlen and maxlen,
665 * and of the form k * prod + mod unless there's nothing that large.
666 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
667 */
668STATIC int /* error */
669xfs_alloc_ag_vextent(
dfc130f3 670 xfs_alloc_arg_t *args) /* argument structure for allocation */
2bd0ea18 671{
0e266570 672 int error=0;
cf8ce220
DW
673 xfs_extlen_t reservation;
674 xfs_extlen_t oldmax;
2bd0ea18
NS
675
676 ASSERT(args->minlen > 0);
677 ASSERT(args->maxlen > 0);
678 ASSERT(args->minlen <= args->maxlen);
679 ASSERT(args->mod < args->prod);
680 ASSERT(args->alignment > 0);
cf8ce220
DW
681
682 /*
683 * Clamp maxlen to the amount of free space minus any reservations
684 * that have been made.
685 */
686 oldmax = args->maxlen;
687 reservation = xfs_ag_resv_needed(args->pag, args->resv);
688 if (args->maxlen > args->pag->pagf_freeblks - reservation)
689 args->maxlen = args->pag->pagf_freeblks - reservation;
690 if (args->maxlen == 0) {
691 args->agbno = NULLAGBLOCK;
692 args->maxlen = oldmax;
693 return 0;
694 }
695
2bd0ea18
NS
696 /*
697 * Branch to correct routine based on the type.
698 */
699 args->wasfromfl = 0;
700 switch (args->type) {
701 case XFS_ALLOCTYPE_THIS_AG:
702 error = xfs_alloc_ag_vextent_size(args);
703 break;
704 case XFS_ALLOCTYPE_NEAR_BNO:
705 error = xfs_alloc_ag_vextent_near(args);
706 break;
707 case XFS_ALLOCTYPE_THIS_BNO:
708 error = xfs_alloc_ag_vextent_exact(args);
709 break;
710 default:
711 ASSERT(0);
712 /* NOTREACHED */
713 }
a2ceac1f 714
cf8ce220
DW
715 args->maxlen = oldmax;
716
a2ceac1f 717 if (error || args->agbno == NULLAGBLOCK)
2bd0ea18 718 return error;
2bd0ea18 719
a2ceac1f
DC
720 ASSERT(args->len >= args->minlen);
721 ASSERT(args->len <= args->maxlen);
cf8ce220 722 ASSERT(!args->wasfromfl || args->resv != XFS_AG_RESV_AGFL);
a2ceac1f
DC
723 ASSERT(args->agbno % args->alignment == 0);
724
631ac87a
DW
725 /* if not file data, insert new block into the reverse map btree */
726 if (args->oinfo.oi_owner != XFS_RMAP_OWN_UNKNOWN) {
727 error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
728 args->agbno, args->len, &args->oinfo);
729 if (error)
730 return error;
731 }
732
a2ceac1f
DC
733 if (!args->wasfromfl) {
734 error = xfs_alloc_update_counters(args->tp, args->pag,
735 args->agbp,
736 -((long)(args->len)));
737 if (error)
738 return error;
739
740 ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
741 args->agbno, args->len));
2bd0ea18 742 }
a2ceac1f 743
cf8ce220 744 xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
a2ceac1f 745
79896434
BD
746 XFS_STATS_INC(args->mp, xs_allocx);
747 XFS_STATS_ADD(args->mp, xs_allocb, args->len);
a2ceac1f 748 return error;
2bd0ea18
NS
749}
750
751/*
752 * Allocate a variable extent at exactly agno/bno.
753 * Extent's length (returned in *len) will be between minlen and maxlen,
754 * and of the form k * prod + mod unless there's nothing that large.
755 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
756 */
757STATIC int /* error */
758xfs_alloc_ag_vextent_exact(
dfc130f3 759 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18 760{
dfc130f3
RC
761 xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
762 xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
2bd0ea18
NS
763 int error;
764 xfs_agblock_t fbno; /* start block of found extent */
2bd0ea18 765 xfs_extlen_t flen; /* length of found extent */
a2ceac1f
DC
766 xfs_agblock_t tbno; /* start block of trimmed extent */
767 xfs_extlen_t tlen; /* length of trimmed extent */
768 xfs_agblock_t tend; /* end block of trimmed extent */
2bd0ea18 769 int i; /* success/failure of operation */
2bd0ea18
NS
770
771 ASSERT(args->alignment == 1);
a2ceac1f 772
2bd0ea18
NS
773 /*
774 * Allocate/initialize a cursor for the by-number freespace btree.
775 */
b194c7d8 776 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
56b2de80
DC
777 args->agno, XFS_BTNUM_BNO);
778
2bd0ea18
NS
779 /*
780 * Lookup bno and minlen in the btree (minlen is irrelevant, really).
781 * Look for the closest free block <= bno, it must contain bno
782 * if any free block does.
783 */
56b2de80
DC
784 error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
785 if (error)
2bd0ea18 786 goto error0;
56b2de80
DC
787 if (!i)
788 goto not_found;
789
2bd0ea18
NS
790 /*
791 * Grab the freespace record.
792 */
56b2de80
DC
793 error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
794 if (error)
2bd0ea18 795 goto error0;
19ebedcf 796 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
2bd0ea18 797 ASSERT(fbno <= args->agbno);
56b2de80 798
5000d01d 799 /*
a2ceac1f
DC
800 * Check for overlapping busy extents.
801 */
802 xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen);
803
804 /*
805 * Give up if the start of the extent is busy, or the freespace isn't
806 * long enough for the minimum request.
2bd0ea18 807 */
a2ceac1f
DC
808 if (tbno > args->agbno)
809 goto not_found;
810 if (tlen < args->minlen)
811 goto not_found;
812 tend = tbno + tlen;
813 if (tend < args->agbno + args->minlen)
56b2de80
DC
814 goto not_found;
815
2bd0ea18
NS
816 /*
817 * End of extent will be smaller of the freespace end and the
818 * maximal requested end.
56b2de80 819 *
2bd0ea18
NS
820 * Fix the length according to mod and prod if given.
821 */
a2ceac1f
DC
822 args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
823 - args->agbno;
2bd0ea18 824 xfs_alloc_fix_len(args);
56b2de80
DC
825 if (!xfs_alloc_fix_minleft(args))
826 goto not_found;
827
a2ceac1f 828 ASSERT(args->agbno + args->len <= tend);
56b2de80 829
2bd0ea18 830 /*
a2ceac1f 831 * We are allocating agbno for args->len
2bd0ea18
NS
832 * Allocate/initialize a cursor for the by-size btree.
833 */
b194c7d8
BN
834 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
835 args->agno, XFS_BTNUM_CNT);
2bd0ea18 836 ASSERT(args->agbno + args->len <=
6e3140c7 837 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
56b2de80
DC
838 error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
839 args->len, XFSA_FIXUP_BNO_OK);
840 if (error) {
2bd0ea18
NS
841 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
842 goto error0;
843 }
a2ceac1f 844
2bd0ea18
NS
845 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
846 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
a2ceac1f 847
2bd0ea18 848 args->wasfromfl = 0;
56b2de80
DC
849 trace_xfs_alloc_exact_done(args);
850 return 0;
851
852not_found:
853 /* Didn't find it, return null. */
854 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
855 args->agbno = NULLAGBLOCK;
856 trace_xfs_alloc_exact_notfound(args);
2bd0ea18
NS
857 return 0;
858
859error0:
860 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
56b2de80
DC
861 trace_xfs_alloc_exact_error(args);
862 return error;
863}
864
865/*
866 * Search the btree in a given direction via the search cursor and compare
867 * the records found against the good extent we've already found.
868 */
869STATIC int
870xfs_alloc_find_best_extent(
871 struct xfs_alloc_arg *args, /* allocation argument structure */
872 struct xfs_btree_cur **gcur, /* good cursor */
873 struct xfs_btree_cur **scur, /* searching cursor */
874 xfs_agblock_t gdiff, /* difference for search comparison */
875 xfs_agblock_t *sbno, /* extent found by search */
a2ceac1f
DC
876 xfs_extlen_t *slen, /* extent length */
877 xfs_agblock_t *sbnoa, /* aligned extent found by search */
878 xfs_extlen_t *slena, /* aligned extent length */
56b2de80
DC
879 int dir) /* 0 = search right, 1 = search left */
880{
56b2de80
DC
881 xfs_agblock_t new;
882 xfs_agblock_t sdiff;
883 int error;
884 int i;
885
886 /* The good extent is perfect, no need to search. */
887 if (!gdiff)
888 goto out_use_good;
889
890 /*
891 * Look until we find a better one, run out of space or run off the end.
892 */
893 do {
894 error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
895 if (error)
896 goto error0;
19ebedcf 897 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
a2ceac1f 898 xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena);
56b2de80
DC
899
900 /*
901 * The good extent is closer than this one.
902 */
903 if (!dir) {
ff3263dd
BF
904 if (*sbnoa > args->max_agbno)
905 goto out_use_good;
a2ceac1f 906 if (*sbnoa >= args->agbno + gdiff)
56b2de80
DC
907 goto out_use_good;
908 } else {
ff3263dd
BF
909 if (*sbnoa < args->min_agbno)
910 goto out_use_good;
a2ceac1f 911 if (*sbnoa <= args->agbno - gdiff)
56b2de80
DC
912 goto out_use_good;
913 }
914
915 /*
916 * Same distance, compare length and pick the best.
917 */
918 if (*slena >= args->minlen) {
919 args->len = XFS_EXTLEN_MIN(*slena, args->maxlen);
920 xfs_alloc_fix_len(args);
921
922 sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
84a62eea 923 args->alignment,
1fccd5c8 924 args->datatype, *sbnoa,
a2ceac1f 925 *slena, &new);
56b2de80
DC
926
927 /*
928 * Choose closer size and invalidate other cursor.
929 */
930 if (sdiff < gdiff)
931 goto out_use_search;
932 goto out_use_good;
933 }
934
935 if (!dir)
936 error = xfs_btree_increment(*scur, 0, &i);
937 else
938 error = xfs_btree_decrement(*scur, 0, &i);
939 if (error)
940 goto error0;
941 } while (i);
942
943out_use_good:
944 xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR);
945 *scur = NULL;
946 return 0;
947
948out_use_search:
949 xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR);
950 *gcur = NULL;
951 return 0;
952
953error0:
954 /* caller invalidates cursors */
2bd0ea18
NS
955 return error;
956}
957
958/*
959 * Allocate a variable extent near bno in the allocation group agno.
960 * Extent's length (returned in len) will be between minlen and maxlen,
961 * and of the form k * prod + mod unless there's nothing that large.
962 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
963 */
964STATIC int /* error */
965xfs_alloc_ag_vextent_near(
dfc130f3 966 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18 967{
dfc130f3
RC
968 xfs_btree_cur_t *bno_cur_gt; /* cursor for bno btree, right side */
969 xfs_btree_cur_t *bno_cur_lt; /* cursor for bno btree, left side */
970 xfs_btree_cur_t *cnt_cur; /* cursor for count btree */
2bd0ea18
NS
971 xfs_agblock_t gtbno; /* start bno of right side entry */
972 xfs_agblock_t gtbnoa; /* aligned ... */
973 xfs_extlen_t gtdiff; /* difference to right side entry */
974 xfs_extlen_t gtlen; /* length of right side entry */
a2ceac1f 975 xfs_extlen_t gtlena; /* aligned ... */
2bd0ea18
NS
976 xfs_agblock_t gtnew; /* useful start bno of right side */
977 int error; /* error code */
978 int i; /* result code, temporary */
979 int j; /* result code, temporary */
980 xfs_agblock_t ltbno; /* start bno of left side entry */
981 xfs_agblock_t ltbnoa; /* aligned ... */
982 xfs_extlen_t ltdiff; /* difference to left side entry */
2bd0ea18 983 xfs_extlen_t ltlen; /* length of left side entry */
a2ceac1f 984 xfs_extlen_t ltlena; /* aligned ... */
2bd0ea18
NS
985 xfs_agblock_t ltnew; /* useful start bno of left side */
986 xfs_extlen_t rlen; /* length of returned extent */
a2ceac1f 987 int forced = 0;
6beba453 988#ifdef DEBUG
2bd0ea18
NS
989 /*
990 * Randomly don't execute the first algorithm.
991 */
2bd0ea18 992 int dofirst; /* set to do first algorithm */
2bd0ea18 993
49f693fa 994 dofirst = prandom_u32() & 1;
2bd0ea18 995#endif
a2ceac1f 996
ff3263dd
BF
997 /* handle unitialized agbno range so caller doesn't have to */
998 if (!args->min_agbno && !args->max_agbno)
999 args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
1000 ASSERT(args->min_agbno <= args->max_agbno);
1001
1002 /* clamp agbno to the range if it's outside */
1003 if (args->agbno < args->min_agbno)
1004 args->agbno = args->min_agbno;
1005 if (args->agbno > args->max_agbno)
1006 args->agbno = args->max_agbno;
1007
a2ceac1f
DC
1008restart:
1009 bno_cur_lt = NULL;
1010 bno_cur_gt = NULL;
1011 ltlen = 0;
1012 gtlena = 0;
1013 ltlena = 0;
1014
2bd0ea18
NS
1015 /*
1016 * Get a cursor for the by-size btree.
1017 */
b194c7d8
BN
1018 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1019 args->agno, XFS_BTNUM_CNT);
a2ceac1f 1020
2bd0ea18
NS
1021 /*
1022 * See if there are any free extents as big as maxlen.
1023 */
0e266570 1024 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i)))
2bd0ea18
NS
1025 goto error0;
1026 /*
1027 * If none, then pick up the last entry in the tree unless the
1028 * tree is empty.
5000d01d 1029 */
2bd0ea18 1030 if (!i) {
0e266570
NS
1031 if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &ltbno,
1032 &ltlen, &i)))
2bd0ea18
NS
1033 goto error0;
1034 if (i == 0 || ltlen == 0) {
1035 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
a2ceac1f 1036 trace_xfs_alloc_near_noentry(args);
2bd0ea18
NS
1037 return 0;
1038 }
1039 ASSERT(i == 1);
1040 }
1041 args->wasfromfl = 0;
a2ceac1f 1042
5000d01d 1043 /*
2bd0ea18
NS
1044 * First algorithm.
1045 * If the requested extent is large wrt the freespaces available
1046 * in this a.g., then the cursor will be pointing to a btree entry
1047 * near the right edge of the tree. If it's in the last btree leaf
1048 * block, then we just examine all the entries in that block
1049 * that are big enough, and pick the best one.
1050 * This is written as a while loop so we can break out of it,
1051 * but we never loop back to the top.
1052 */
1053 while (xfs_btree_islastblock(cnt_cur, 0)) {
1054 xfs_extlen_t bdiff;
0e266570
NS
1055 int besti=0;
1056 xfs_extlen_t blen=0;
1057 xfs_agblock_t bnew=0;
2bd0ea18 1058
6beba453
DC
1059#ifdef DEBUG
1060 if (dofirst)
2bd0ea18
NS
1061 break;
1062#endif
1063 /*
1064 * Start from the entry that lookup found, sequence through
1065 * all larger free blocks. If we're actually pointing at a
1066 * record smaller than maxlen, go to the start of this block,
1067 * and skip all those smaller than minlen.
1068 */
1069 if (ltlen || args->alignment > 1) {
1070 cnt_cur->bc_ptrs[0] = 1;
1071 do {
0e266570
NS
1072 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno,
1073 &ltlen, &i)))
2bd0ea18 1074 goto error0;
19ebedcf 1075 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
2bd0ea18
NS
1076 if (ltlen >= args->minlen)
1077 break;
b194c7d8 1078 if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
2bd0ea18
NS
1079 goto error0;
1080 } while (i);
1081 ASSERT(ltlen >= args->minlen);
1082 if (!i)
1083 break;
1084 }
1085 i = cnt_cur->bc_ptrs[0];
1086 for (j = 1, blen = 0, bdiff = 0;
1087 !error && j && (blen < args->maxlen || bdiff > 0);
b194c7d8 1088 error = xfs_btree_increment(cnt_cur, 0, &j)) {
2bd0ea18
NS
1089 /*
1090 * For each entry, decide if it's better than
1091 * the previous best entry.
1092 */
0e266570 1093 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
2bd0ea18 1094 goto error0;
19ebedcf 1095 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
a2ceac1f
DC
1096 xfs_alloc_compute_aligned(args, ltbno, ltlen,
1097 &ltbnoa, &ltlena);
5e656dbb 1098 if (ltlena < args->minlen)
2bd0ea18 1099 continue;
ff3263dd
BF
1100 if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno)
1101 continue;
2bd0ea18
NS
1102 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1103 xfs_alloc_fix_len(args);
1104 ASSERT(args->len >= args->minlen);
1105 if (args->len < blen)
1106 continue;
1107 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1fccd5c8 1108 args->alignment, args->datatype, ltbnoa,
84a62eea 1109 ltlena, &ltnew);
2bd0ea18
NS
1110 if (ltnew != NULLAGBLOCK &&
1111 (args->len > blen || ltdiff < bdiff)) {
1112 bdiff = ltdiff;
1113 bnew = ltnew;
1114 blen = args->len;
1115 besti = cnt_cur->bc_ptrs[0];
1116 }
1117 }
1118 /*
1119 * It didn't work. We COULD be in a case where
1120 * there's a good record somewhere, so try again.
1121 */
1122 if (blen == 0)
1123 break;
1124 /*
1125 * Point at the best entry, and retrieve it again.
1126 */
1127 cnt_cur->bc_ptrs[0] = besti;
0e266570 1128 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
2bd0ea18 1129 goto error0;
19ebedcf 1130 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
56b2de80 1131 ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
2bd0ea18
NS
1132 args->len = blen;
1133 if (!xfs_alloc_fix_minleft(args)) {
1134 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
56b2de80 1135 trace_xfs_alloc_near_nominleft(args);
2bd0ea18
NS
1136 return 0;
1137 }
1138 blen = args->len;
1139 /*
1140 * We are allocating starting at bnew for blen blocks.
1141 */
1142 args->agbno = bnew;
1143 ASSERT(bnew >= ltbno);
56b2de80 1144 ASSERT(bnew + blen <= ltbno + ltlen);
2bd0ea18
NS
1145 /*
1146 * Set up a cursor for the by-bno tree.
1147 */
b194c7d8
BN
1148 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp,
1149 args->agbp, args->agno, XFS_BTNUM_BNO);
2bd0ea18
NS
1150 /*
1151 * Fix up the btree entries.
1152 */
0e266570
NS
1153 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno,
1154 ltlen, bnew, blen, XFSA_FIXUP_CNT_OK)))
2bd0ea18
NS
1155 goto error0;
1156 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1157 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
56b2de80
DC
1158
1159 trace_xfs_alloc_near_first(args);
2bd0ea18
NS
1160 return 0;
1161 }
1162 /*
1163 * Second algorithm.
1164 * Search in the by-bno tree to the left and to the right
1165 * simultaneously, until in each case we find a space big enough,
1166 * or run into the edge of the tree. When we run into the edge,
1167 * we deallocate that cursor.
1168 * If both searches succeed, we compare the two spaces and pick
1169 * the better one.
1170 * With alignment, it's possible for both to fail; the upper
1171 * level algorithm that picks allocation groups for allocations
1172 * is not supposed to do this.
1173 */
1174 /*
1175 * Allocate and initialize the cursor for the leftward search.
1176 */
b194c7d8
BN
1177 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1178 args->agno, XFS_BTNUM_BNO);
2bd0ea18
NS
1179 /*
1180 * Lookup <= bno to find the leftward search's starting point.
1181 */
0e266570 1182 if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i)))
2bd0ea18
NS
1183 goto error0;
1184 if (!i) {
1185 /*
1186 * Didn't find anything; use this cursor for the rightward
1187 * search.
1188 */
1189 bno_cur_gt = bno_cur_lt;
062998e3 1190 bno_cur_lt = NULL;
2bd0ea18
NS
1191 }
1192 /*
1193 * Found something. Duplicate the cursor for the rightward search.
1194 */
0e266570 1195 else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt)))
2bd0ea18
NS
1196 goto error0;
1197 /*
1198 * Increment the cursor, so we will point at the entry just right
1199 * of the leftward entry if any, or to the leftmost entry.
1200 */
b194c7d8 1201 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
2bd0ea18
NS
1202 goto error0;
1203 if (!i) {
1204 /*
1205 * It failed, there are no rightward entries.
1206 */
1207 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR);
1208 bno_cur_gt = NULL;
1209 }
1210 /*
1211 * Loop going left with the leftward cursor, right with the
1212 * rightward cursor, until either both directions give up or
1213 * we find an entry at least as big as minlen.
1214 */
1215 do {
1216 if (bno_cur_lt) {
0e266570 1217 if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
2bd0ea18 1218 goto error0;
19ebedcf 1219 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
a2ceac1f
DC
1220 xfs_alloc_compute_aligned(args, ltbno, ltlen,
1221 &ltbnoa, &ltlena);
ff3263dd 1222 if (ltlena >= args->minlen && ltbnoa >= args->min_agbno)
2bd0ea18 1223 break;
b194c7d8 1224 if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
2bd0ea18 1225 goto error0;
ff3263dd 1226 if (!i || ltbnoa < args->min_agbno) {
2bd0ea18
NS
1227 xfs_btree_del_cursor(bno_cur_lt,
1228 XFS_BTREE_NOERROR);
1229 bno_cur_lt = NULL;
1230 }
1231 }
1232 if (bno_cur_gt) {
0e266570 1233 if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
2bd0ea18 1234 goto error0;
19ebedcf 1235 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
a2ceac1f
DC
1236 xfs_alloc_compute_aligned(args, gtbno, gtlen,
1237 &gtbnoa, &gtlena);
ff3263dd 1238 if (gtlena >= args->minlen && gtbnoa <= args->max_agbno)
2bd0ea18 1239 break;
b194c7d8 1240 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
2bd0ea18 1241 goto error0;
ff3263dd 1242 if (!i || gtbnoa > args->max_agbno) {
2bd0ea18
NS
1243 xfs_btree_del_cursor(bno_cur_gt,
1244 XFS_BTREE_NOERROR);
1245 bno_cur_gt = NULL;
1246 }
1247 }
1248 } while (bno_cur_lt || bno_cur_gt);
56b2de80 1249
2bd0ea18
NS
1250 /*
1251 * Got both cursors still active, need to find better entry.
1252 */
1253 if (bno_cur_lt && bno_cur_gt) {
2bd0ea18
NS
1254 if (ltlena >= args->minlen) {
1255 /*
56b2de80 1256 * Left side is good, look for a right side entry.
2bd0ea18
NS
1257 */
1258 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1259 xfs_alloc_fix_len(args);
56b2de80 1260 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1fccd5c8 1261 args->alignment, args->datatype, ltbnoa,
84a62eea 1262 ltlena, &ltnew);
56b2de80
DC
1263
1264 error = xfs_alloc_find_best_extent(args,
1265 &bno_cur_lt, &bno_cur_gt,
a2ceac1f
DC
1266 ltdiff, &gtbno, &gtlen,
1267 &gtbnoa, &gtlena,
56b2de80
DC
1268 0 /* search right */);
1269 } else {
1270 ASSERT(gtlena >= args->minlen);
1271
2bd0ea18 1272 /*
56b2de80 1273 * Right side is good, look for a left side entry.
2bd0ea18
NS
1274 */
1275 args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
1276 xfs_alloc_fix_len(args);
56b2de80 1277 gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1fccd5c8 1278 args->alignment, args->datatype, gtbnoa,
84a62eea 1279 gtlena, &gtnew);
56b2de80
DC
1280
1281 error = xfs_alloc_find_best_extent(args,
1282 &bno_cur_gt, &bno_cur_lt,
a2ceac1f
DC
1283 gtdiff, &ltbno, &ltlen,
1284 &ltbnoa, &ltlena,
56b2de80 1285 1 /* search left */);
2bd0ea18 1286 }
56b2de80
DC
1287
1288 if (error)
1289 goto error0;
2bd0ea18 1290 }
56b2de80 1291
2bd0ea18
NS
1292 /*
1293 * If we couldn't get anything, give up.
1294 */
1295 if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
a2ceac1f
DC
1296 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1297
1298 if (!forced++) {
1299 trace_xfs_alloc_near_busy(args);
1300 xfs_log_force(args->mp, XFS_LOG_SYNC);
1301 goto restart;
1302 }
56b2de80 1303 trace_xfs_alloc_size_neither(args);
2bd0ea18
NS
1304 args->agbno = NULLAGBLOCK;
1305 return 0;
1306 }
56b2de80 1307
2bd0ea18
NS
1308 /*
1309 * At this point we have selected a freespace entry, either to the
1310 * left or to the right. If it's on the right, copy all the
1311 * useful variables to the "left" set so we only have one
1312 * copy of this code.
1313 */
1314 if (bno_cur_gt) {
1315 bno_cur_lt = bno_cur_gt;
1316 bno_cur_gt = NULL;
1317 ltbno = gtbno;
1318 ltbnoa = gtbnoa;
1319 ltlen = gtlen;
1320 ltlena = gtlena;
1321 j = 1;
1322 } else
1323 j = 0;
56b2de80 1324
2bd0ea18
NS
1325 /*
1326 * Fix up the length and compute the useful address.
1327 */
2bd0ea18
NS
1328 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1329 xfs_alloc_fix_len(args);
1330 if (!xfs_alloc_fix_minleft(args)) {
56b2de80 1331 trace_xfs_alloc_near_nominleft(args);
2bd0ea18
NS
1332 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1333 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1334 return 0;
1335 }
1336 rlen = args->len;
a2ceac1f 1337 (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
1fccd5c8 1338 args->datatype, ltbnoa, ltlena, &ltnew);
2bd0ea18 1339 ASSERT(ltnew >= ltbno);
a2ceac1f 1340 ASSERT(ltnew + rlen <= ltbnoa + ltlena);
6e3140c7 1341 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
ff3263dd 1342 ASSERT(ltnew >= args->min_agbno && ltnew <= args->max_agbno);
2bd0ea18 1343 args->agbno = ltnew;
a2ceac1f 1344
0e266570
NS
1345 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1346 ltnew, rlen, XFSA_FIXUP_BNO_OK)))
2bd0ea18 1347 goto error0;
56b2de80
DC
1348
1349 if (j)
1350 trace_xfs_alloc_near_greater(args);
1351 else
1352 trace_xfs_alloc_near_lesser(args);
1353
2bd0ea18
NS
1354 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1355 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1356 return 0;
1357
1358 error0:
56b2de80 1359 trace_xfs_alloc_near_error(args);
2bd0ea18
NS
1360 if (cnt_cur != NULL)
1361 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1362 if (bno_cur_lt != NULL)
1363 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR);
1364 if (bno_cur_gt != NULL)
1365 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR);
1366 return error;
1367}
1368
1369/*
1370 * Allocate a variable extent anywhere in the allocation group agno.
1371 * Extent's length (returned in len) will be between minlen and maxlen,
1372 * and of the form k * prod + mod unless there's nothing that large.
1373 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1374 */
1375STATIC int /* error */
1376xfs_alloc_ag_vextent_size(
dfc130f3 1377 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18 1378{
dfc130f3
RC
1379 xfs_btree_cur_t *bno_cur; /* cursor for bno btree */
1380 xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */
2bd0ea18
NS
1381 int error; /* error result */
1382 xfs_agblock_t fbno; /* start of found freespace */
1383 xfs_extlen_t flen; /* length of found freespace */
2bd0ea18
NS
1384 int i; /* temp status variable */
1385 xfs_agblock_t rbno; /* returned block number */
1386 xfs_extlen_t rlen; /* length of returned extent */
a2ceac1f 1387 int forced = 0;
2bd0ea18 1388
a2ceac1f 1389restart:
2bd0ea18
NS
1390 /*
1391 * Allocate and initialize a cursor for the by-size btree.
1392 */
b194c7d8
BN
1393 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1394 args->agno, XFS_BTNUM_CNT);
2bd0ea18 1395 bno_cur = NULL;
a2ceac1f 1396
2bd0ea18
NS
1397 /*
1398 * Look for an entry >= maxlen+alignment-1 blocks.
1399 */
0e266570
NS
1400 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1401 args->maxlen + args->alignment - 1, &i)))
2bd0ea18 1402 goto error0;
a2ceac1f 1403
2bd0ea18 1404 /*
a2ceac1f
DC
1405 * If none or we have busy extents that we cannot allocate from, then
1406 * we have to settle for a smaller extent. In the case that there are
1407 * no large extents, this will return the last entry in the tree unless
1408 * the tree is empty. In the case that there are only busy large
1409 * extents, this will return the largest small extent unless there
1410 * are no smaller extents available.
5000d01d 1411 */
a2ceac1f
DC
1412 if (!i || forced > 1) {
1413 error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1414 &fbno, &flen, &i);
1415 if (error)
2bd0ea18
NS
1416 goto error0;
1417 if (i == 0 || flen == 0) {
1418 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
56b2de80 1419 trace_xfs_alloc_size_noentry(args);
2bd0ea18
NS
1420 return 0;
1421 }
1422 ASSERT(i == 1);
a2ceac1f
DC
1423 xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
1424 } else {
1425 /*
1426 * Search for a non-busy extent that is large enough.
1427 * If we are at low space, don't check, or if we fall of
1428 * the end of the btree, turn off the busy check and
1429 * restart.
1430 */
1431 for (;;) {
1432 error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1433 if (error)
1434 goto error0;
19ebedcf 1435 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
a2ceac1f
DC
1436
1437 xfs_alloc_compute_aligned(args, fbno, flen,
1438 &rbno, &rlen);
1439
1440 if (rlen >= args->maxlen)
1441 break;
1442
1443 error = xfs_btree_increment(cnt_cur, 0, &i);
1444 if (error)
1445 goto error0;
1446 if (i == 0) {
1447 /*
1448 * Our only valid extents must have been busy.
1449 * Make it unbusy by forcing the log out and
1450 * retrying. If we've been here before, forcing
1451 * the log isn't making the extents available,
1452 * which means they have probably been freed in
1453 * this transaction. In that case, we have to
1454 * give up on them and we'll attempt a minlen
1455 * allocation the next time around.
1456 */
1457 xfs_btree_del_cursor(cnt_cur,
1458 XFS_BTREE_NOERROR);
1459 trace_xfs_alloc_size_busy(args);
1460 if (!forced++)
1461 xfs_log_force(args->mp, XFS_LOG_SYNC);
1462 goto restart;
1463 }
1464 }
2bd0ea18 1465 }
a2ceac1f 1466
2bd0ea18
NS
1467 /*
1468 * In the first case above, we got the last entry in the
1469 * by-size btree. Now we check to see if the space hits maxlen
1470 * once aligned; if not, we search left for something better.
1471 * This can't happen in the second case above.
1472 */
2bd0ea18 1473 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
19ebedcf 1474 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
2bd0ea18
NS
1475 (rlen <= flen && rbno + rlen <= fbno + flen), error0);
1476 if (rlen < args->maxlen) {
1477 xfs_agblock_t bestfbno;
1478 xfs_extlen_t bestflen;
1479 xfs_agblock_t bestrbno;
1480 xfs_extlen_t bestrlen;
1481
1482 bestrlen = rlen;
1483 bestrbno = rbno;
1484 bestflen = flen;
1485 bestfbno = fbno;
1486 for (;;) {
b194c7d8 1487 if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
2bd0ea18
NS
1488 goto error0;
1489 if (i == 0)
1490 break;
0e266570
NS
1491 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1492 &i)))
2bd0ea18 1493 goto error0;
19ebedcf 1494 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
2bd0ea18
NS
1495 if (flen < bestrlen)
1496 break;
a2ceac1f
DC
1497 xfs_alloc_compute_aligned(args, fbno, flen,
1498 &rbno, &rlen);
2bd0ea18 1499 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
19ebedcf 1500 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
2bd0ea18
NS
1501 (rlen <= flen && rbno + rlen <= fbno + flen),
1502 error0);
1503 if (rlen > bestrlen) {
1504 bestrlen = rlen;
1505 bestrbno = rbno;
1506 bestflen = flen;
1507 bestfbno = fbno;
1508 if (rlen == args->maxlen)
1509 break;
1510 }
5000d01d 1511 }
0e266570
NS
1512 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1513 &i)))
2bd0ea18 1514 goto error0;
19ebedcf 1515 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
2bd0ea18
NS
1516 rlen = bestrlen;
1517 rbno = bestrbno;
1518 flen = bestflen;
1519 fbno = bestfbno;
1520 }
1521 args->wasfromfl = 0;
1522 /*
1523 * Fix up the length.
1524 */
1525 args->len = rlen;
a2ceac1f
DC
1526 if (rlen < args->minlen) {
1527 if (!forced++) {
1528 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1529 trace_xfs_alloc_size_busy(args);
1530 xfs_log_force(args->mp, XFS_LOG_SYNC);
1531 goto restart;
1532 }
1533 goto out_nominleft;
2bd0ea18 1534 }
a2ceac1f
DC
1535 xfs_alloc_fix_len(args);
1536
1537 if (!xfs_alloc_fix_minleft(args))
1538 goto out_nominleft;
2bd0ea18 1539 rlen = args->len;
19ebedcf 1540 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0);
2bd0ea18
NS
1541 /*
1542 * Allocate and initialize a cursor for the by-block tree.
1543 */
b194c7d8
BN
1544 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1545 args->agno, XFS_BTNUM_BNO);
0e266570
NS
1546 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1547 rbno, rlen, XFSA_FIXUP_CNT_OK)))
2bd0ea18
NS
1548 goto error0;
1549 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1550 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1551 cnt_cur = bno_cur = NULL;
1552 args->len = rlen;
1553 args->agbno = rbno;
19ebedcf 1554 XFS_WANT_CORRUPTED_GOTO(args->mp,
2bd0ea18 1555 args->agbno + args->len <=
6e3140c7 1556 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
2bd0ea18 1557 error0);
56b2de80 1558 trace_xfs_alloc_size_done(args);
2bd0ea18
NS
1559 return 0;
1560
1561error0:
56b2de80 1562 trace_xfs_alloc_size_error(args);
2bd0ea18
NS
1563 if (cnt_cur)
1564 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1565 if (bno_cur)
1566 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1567 return error;
a2ceac1f
DC
1568
1569out_nominleft:
1570 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1571 trace_xfs_alloc_size_nominleft(args);
1572 args->agbno = NULLAGBLOCK;
1573 return 0;
2bd0ea18
NS
1574}
1575
1576/*
1577 * Deal with the case where only small freespaces remain.
1578 * Either return the contents of the last freespace record,
1579 * or allocate space from the freelist if there is nothing in the tree.
1580 */
1581STATIC int /* error */
1582xfs_alloc_ag_vextent_small(
dfc130f3
RC
1583 xfs_alloc_arg_t *args, /* allocation argument structure */
1584 xfs_btree_cur_t *ccur, /* by-size cursor */
1585 xfs_agblock_t *fbnop, /* result block number */
1586 xfs_extlen_t *flenp, /* result length */
2bd0ea18
NS
1587 int *stat) /* status: 0-freelist, 1-normal/none */
1588{
59b86360
DW
1589 struct xfs_owner_info oinfo;
1590 struct xfs_perag *pag;
2bd0ea18
NS
1591 int error;
1592 xfs_agblock_t fbno;
1593 xfs_extlen_t flen;
2bd0ea18
NS
1594 int i;
1595
b194c7d8 1596 if ((error = xfs_btree_decrement(ccur, 0, &i)))
2bd0ea18
NS
1597 goto error0;
1598 if (i) {
0e266570 1599 if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
2bd0ea18 1600 goto error0;
19ebedcf 1601 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
2bd0ea18
NS
1602 }
1603 /*
1604 * Nothing in the btree, try the freelist. Make sure
1605 * to respect minleft even when pulling from the
1606 * freelist.
1607 */
cf8ce220
DW
1608 else if (args->minlen == 1 && args->alignment == 1 &&
1609 args->resv != XFS_AG_RESV_AGFL &&
6e3140c7
NS
1610 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
1611 > args->minleft)) {
cdded3d8
DC
1612 error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
1613 if (error)
2bd0ea18
NS
1614 goto error0;
1615 if (fbno != NULLAGBLOCK) {
a2ceac1f 1616 xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
1fccd5c8 1617 xfs_alloc_allow_busy_reuse(args->datatype));
a2ceac1f 1618
1fccd5c8 1619 if (xfs_alloc_is_userdata(args->datatype)) {
2bd0ea18
NS
1620 xfs_buf_t *bp;
1621
1622 bp = xfs_btree_get_bufs(args->mp, args->tp,
1623 args->agno, fbno, 0);
1624 xfs_trans_binval(args->tp, bp);
2bd0ea18
NS
1625 }
1626 args->len = 1;
1627 args->agbno = fbno;
19ebedcf 1628 XFS_WANT_CORRUPTED_GOTO(args->mp,
2bd0ea18 1629 args->agbno + args->len <=
6e3140c7 1630 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
2bd0ea18
NS
1631 error0);
1632 args->wasfromfl = 1;
56b2de80 1633 trace_xfs_alloc_small_freelist(args);
59b86360
DW
1634
1635 /*
1636 * If we're feeding an AGFL block to something that
1637 * doesn't live in the free space, we need to clear
cf8ce220
DW
1638 * out the OWN_AG rmap and add the block back to
1639 * the AGFL per-AG reservation.
59b86360
DW
1640 */
1641 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
1642 error = xfs_rmap_free(args->tp, args->agbp, args->agno,
1643 fbno, 1, &oinfo);
1644 if (error)
1645 goto error0;
1646 pag = xfs_perag_get(args->mp, args->agno);
cf8ce220
DW
1647 xfs_ag_resv_free_extent(pag, XFS_AG_RESV_AGFL,
1648 args->tp, 1);
59b86360
DW
1649 xfs_perag_put(pag);
1650
2bd0ea18
NS
1651 *stat = 0;
1652 return 0;
1653 }
1654 /*
1655 * Nothing in the freelist.
1656 */
1657 else
1658 flen = 0;
1659 }
1660 /*
1661 * Can't allocate from the freelist for some reason.
1662 */
5e656dbb
BN
1663 else {
1664 fbno = NULLAGBLOCK;
2bd0ea18 1665 flen = 0;
5e656dbb 1666 }
2bd0ea18
NS
1667 /*
1668 * Can't do the allocation, give up.
1669 */
1670 if (flen < args->minlen) {
1671 args->agbno = NULLAGBLOCK;
56b2de80 1672 trace_xfs_alloc_small_notenough(args);
2bd0ea18
NS
1673 flen = 0;
1674 }
1675 *fbnop = fbno;
1676 *flenp = flen;
1677 *stat = 1;
56b2de80 1678 trace_xfs_alloc_small_done(args);
2bd0ea18
NS
1679 return 0;
1680
1681error0:
56b2de80 1682 trace_xfs_alloc_small_error(args);
2bd0ea18
NS
1683 return error;
1684}
1685
1686/*
1687 * Free the extent starting at agno/bno for length.
1688 */
85aec44f 1689STATIC int
2bd0ea18 1690xfs_free_ag_extent(
85aec44f
DW
1691 xfs_trans_t *tp,
1692 xfs_buf_t *agbp,
1693 xfs_agnumber_t agno,
1694 xfs_agblock_t bno,
1695 xfs_extlen_t len,
1696 struct xfs_owner_info *oinfo,
cf8ce220 1697 enum xfs_ag_resv_type type)
2bd0ea18 1698{
dfc130f3
RC
1699 xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */
1700 xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */
2bd0ea18 1701 int error; /* error return value */
2bd0ea18
NS
1702 xfs_agblock_t gtbno; /* start of right neighbor block */
1703 xfs_extlen_t gtlen; /* length of right neighbor block */
1704 int haveleft; /* have a left neighbor block */
1705 int haveright; /* have a right neighbor block */
1706 int i; /* temp, result code */
1707 xfs_agblock_t ltbno; /* start of left neighbor block */
1708 xfs_extlen_t ltlen; /* length of left neighbor block */
1709 xfs_mount_t *mp; /* mount point struct for filesystem */
1710 xfs_agblock_t nbno; /* new starting block of freespace */
1711 xfs_extlen_t nlen; /* new length of freespace */
a2ceac1f 1712 xfs_perag_t *pag; /* per allocation group data */
2bd0ea18 1713
631ac87a 1714 bno_cur = cnt_cur = NULL;
2bd0ea18 1715 mp = tp->t_mountp;
631ac87a
DW
1716
1717 if (oinfo->oi_owner != XFS_RMAP_OWN_UNKNOWN) {
1718 error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo);
1719 if (error)
1720 goto error0;
1721 }
1722
5000d01d 1723 /*
2bd0ea18
NS
1724 * Allocate and initialize a cursor for the by-block btree.
1725 */
b194c7d8 1726 bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
5000d01d 1727 /*
2bd0ea18
NS
1728 * Look for a neighboring block on the left (lower block numbers)
1729 * that is contiguous with this space.
1730 */
0e266570 1731 if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
2bd0ea18
NS
1732 goto error0;
1733 if (haveleft) {
1734 /*
1735 * There is a block to our left.
1736 */
0e266570 1737 if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
2bd0ea18 1738 goto error0;
19ebedcf 1739 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1740 /*
1741 * It's not contiguous, though.
1742 */
1743 if (ltbno + ltlen < bno)
1744 haveleft = 0;
1745 else {
1746 /*
1747 * If this failure happens the request to free this
1748 * space was invalid, it's (partly) already free.
1749 * Very bad.
1750 */
19ebedcf
DC
1751 XFS_WANT_CORRUPTED_GOTO(mp,
1752 ltbno + ltlen <= bno, error0);
2bd0ea18
NS
1753 }
1754 }
5000d01d 1755 /*
2bd0ea18
NS
1756 * Look for a neighboring block on the right (higher block numbers)
1757 * that is contiguous with this space.
1758 */
b194c7d8 1759 if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
2bd0ea18
NS
1760 goto error0;
1761 if (haveright) {
1762 /*
1763 * There is a block to our right.
1764 */
0e266570 1765 if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
2bd0ea18 1766 goto error0;
19ebedcf 1767 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1768 /*
1769 * It's not contiguous, though.
1770 */
1771 if (bno + len < gtbno)
1772 haveright = 0;
1773 else {
1774 /*
1775 * If this failure happens the request to free this
1776 * space was invalid, it's (partly) already free.
1777 * Very bad.
1778 */
19ebedcf 1779 XFS_WANT_CORRUPTED_GOTO(mp, gtbno >= bno + len, error0);
2bd0ea18
NS
1780 }
1781 }
1782 /*
1783 * Now allocate and initialize a cursor for the by-size tree.
1784 */
b194c7d8 1785 cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
2bd0ea18
NS
1786 /*
1787 * Have both left and right contiguous neighbors.
1788 * Merge all three into a single free block.
1789 */
1790 if (haveleft && haveright) {
1791 /*
1792 * Delete the old by-size entry on the left.
1793 */
0e266570 1794 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
2bd0ea18 1795 goto error0;
19ebedcf 1796 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
b194c7d8 1797 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18 1798 goto error0;
19ebedcf 1799 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1800 /*
1801 * Delete the old by-size entry on the right.
1802 */
0e266570 1803 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2bd0ea18 1804 goto error0;
19ebedcf 1805 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
b194c7d8 1806 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18 1807 goto error0;
19ebedcf 1808 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1809 /*
1810 * Delete the old by-block entry for the right block.
1811 */
b194c7d8 1812 if ((error = xfs_btree_delete(bno_cur, &i)))
2bd0ea18 1813 goto error0;
19ebedcf 1814 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1815 /*
1816 * Move the by-block cursor back to the left neighbor.
1817 */
b194c7d8 1818 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
2bd0ea18 1819 goto error0;
19ebedcf 1820 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1821#ifdef DEBUG
1822 /*
1823 * Check that this is the right record: delete didn't
1824 * mangle the cursor.
1825 */
1826 {
1827 xfs_agblock_t xxbno;
1828 xfs_extlen_t xxlen;
1829
0e266570
NS
1830 if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
1831 &i)))
2bd0ea18 1832 goto error0;
19ebedcf 1833 XFS_WANT_CORRUPTED_GOTO(mp,
2bd0ea18
NS
1834 i == 1 && xxbno == ltbno && xxlen == ltlen,
1835 error0);
1836 }
1837#endif
1838 /*
1839 * Update remaining by-block entry to the new, joined block.
1840 */
1841 nbno = ltbno;
1842 nlen = len + ltlen + gtlen;
0e266570 1843 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2bd0ea18
NS
1844 goto error0;
1845 }
1846 /*
1847 * Have only a left contiguous neighbor.
1848 * Merge it together with the new freespace.
1849 */
1850 else if (haveleft) {
1851 /*
1852 * Delete the old by-size entry on the left.
1853 */
0e266570 1854 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
2bd0ea18 1855 goto error0;
19ebedcf 1856 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
b194c7d8 1857 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18 1858 goto error0;
19ebedcf 1859 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1860 /*
1861 * Back up the by-block cursor to the left neighbor, and
1862 * update its length.
1863 */
b194c7d8 1864 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
2bd0ea18 1865 goto error0;
19ebedcf 1866 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1867 nbno = ltbno;
1868 nlen = len + ltlen;
0e266570 1869 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2bd0ea18
NS
1870 goto error0;
1871 }
1872 /*
1873 * Have only a right contiguous neighbor.
1874 * Merge it together with the new freespace.
1875 */
1876 else if (haveright) {
1877 /*
1878 * Delete the old by-size entry on the right.
1879 */
0e266570 1880 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2bd0ea18 1881 goto error0;
19ebedcf 1882 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
b194c7d8 1883 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18 1884 goto error0;
19ebedcf 1885 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18 1886 /*
5000d01d 1887 * Update the starting block and length of the right
2bd0ea18
NS
1888 * neighbor in the by-block tree.
1889 */
1890 nbno = bno;
1891 nlen = len + gtlen;
0e266570 1892 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2bd0ea18
NS
1893 goto error0;
1894 }
1895 /*
1896 * No contiguous neighbors.
1897 * Insert the new freespace into the by-block tree.
1898 */
1899 else {
1900 nbno = bno;
1901 nlen = len;
b194c7d8 1902 if ((error = xfs_btree_insert(bno_cur, &i)))
2bd0ea18 1903 goto error0;
19ebedcf 1904 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1905 }
1906 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1907 bno_cur = NULL;
1908 /*
1909 * In all cases we need to insert the new freespace in the by-size tree.
1910 */
0e266570 1911 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
2bd0ea18 1912 goto error0;
19ebedcf 1913 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, error0);
b194c7d8 1914 if ((error = xfs_btree_insert(cnt_cur, &i)))
2bd0ea18 1915 goto error0;
19ebedcf 1916 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1917 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1918 cnt_cur = NULL;
a2ceac1f 1919
2bd0ea18
NS
1920 /*
1921 * Update the freespace totals in the ag and superblock.
1922 */
a2ceac1f
DC
1923 pag = xfs_perag_get(mp, agno);
1924 error = xfs_alloc_update_counters(tp, pag, agbp, len);
cf8ce220 1925 xfs_ag_resv_free_extent(pag, type, tp, len);
a2ceac1f
DC
1926 xfs_perag_put(pag);
1927 if (error)
1928 goto error0;
1929
79896434
BD
1930 XFS_STATS_INC(mp, xs_freex);
1931 XFS_STATS_ADD(mp, xs_freeb, len);
56b2de80 1932
cf8ce220
DW
1933 trace_xfs_free_extent(mp, agno, bno, len, type == XFS_AG_RESV_AGFL,
1934 haveleft, haveright);
3e535bba 1935
2bd0ea18
NS
1936 return 0;
1937
1938 error0:
cf8ce220
DW
1939 trace_xfs_free_extent(mp, agno, bno, len, type == XFS_AG_RESV_AGFL,
1940 -1, -1);
2bd0ea18
NS
1941 if (bno_cur)
1942 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1943 if (cnt_cur)
1944 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1945 return error;
1946}
1947
5000d01d 1948/*
2bd0ea18
NS
1949 * Visible (exported) allocation/free functions.
1950 * Some of these are used just by xfs_alloc_btree.c and this file.
1951 */
1952
1953/*
1954 * Compute and fill in value of m_ag_maxlevels.
1955 */
1956void
1957xfs_alloc_compute_maxlevels(
1958 xfs_mount_t *mp) /* file system mount structure */
1959{
730e2a19
DW
1960 mp->m_ag_maxlevels = xfs_btree_compute_maxlevels(mp, mp->m_alloc_mnr,
1961 (mp->m_sb.sb_agblocks + 1) / 2);
2bd0ea18
NS
1962}
1963
56b2de80 1964/*
cf8ce220
DW
1965 * Find the length of the longest extent in an AG. The 'need' parameter
1966 * specifies how much space we're going to need for the AGFL and the
1967 * 'reserved' parameter tells us how many blocks in this AG are reserved for
1968 * other callers.
56b2de80
DC
1969 */
1970xfs_extlen_t
1971xfs_alloc_longest_free_extent(
1972 struct xfs_mount *mp,
72bda06d 1973 struct xfs_perag *pag,
cf8ce220
DW
1974 xfs_extlen_t need,
1975 xfs_extlen_t reserved)
56b2de80 1976{
72bda06d 1977 xfs_extlen_t delta = 0;
56b2de80 1978
cf8ce220
DW
1979 /*
1980 * If the AGFL needs a recharge, we'll have to subtract that from the
1981 * longest extent.
1982 */
56b2de80
DC
1983 if (need > pag->pagf_flcount)
1984 delta = need - pag->pagf_flcount;
1985
cf8ce220
DW
1986 /*
1987 * If we cannot maintain others' reservations with space from the
1988 * not-longest freesp extents, we'll have to subtract /that/ from
1989 * the longest extent too.
1990 */
1991 if (pag->pagf_freeblks - pag->pagf_longest < reserved)
1992 delta += reserved - (pag->pagf_freeblks - pag->pagf_longest);
1993
1994 /*
1995 * If the longest extent is long enough to satisfy all the
1996 * reservations and AGFL rules in place, we can return this extent.
1997 */
56b2de80
DC
1998 if (pag->pagf_longest > delta)
1999 return pag->pagf_longest - delta;
cf8ce220
DW
2000
2001 /* Otherwise, let the caller try for 1 block if there's space. */
56b2de80
DC
2002 return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
2003}
2004
de046644
DC
2005unsigned int
2006xfs_alloc_min_freelist(
2007 struct xfs_mount *mp,
2008 struct xfs_perag *pag)
2009{
2010 unsigned int min_free;
2011
2012 /* space needed by-bno freespace btree */
2013 min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1,
2014 mp->m_ag_maxlevels);
2015 /* space needed by-size freespace btree */
2016 min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1,
2017 mp->m_ag_maxlevels);
b8a8d6e5
DW
2018 /* space needed reverse mapping used space btree */
2019 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
2020 min_free += min_t(unsigned int,
2021 pag->pagf_levels[XFS_BTNUM_RMAPi] + 1,
2022 mp->m_rmap_maxlevels);
de046644
DC
2023
2024 return min_free;
2025}
2026
5515b7c1
DC
2027/*
2028 * Check if the operation we are fixing up the freelist for should go ahead or
2029 * not. If we are freeing blocks, we always allow it, otherwise the allocation
2030 * is dependent on whether the size and shape of free space available will
2031 * permit the requested allocation to take place.
2032 */
2033static bool
2034xfs_alloc_space_available(
2035 struct xfs_alloc_arg *args,
2036 xfs_extlen_t min_free,
2037 int flags)
2038{
2039 struct xfs_perag *pag = args->pag;
2040 xfs_extlen_t longest;
cf8ce220 2041 xfs_extlen_t reservation; /* blocks that are still reserved */
5515b7c1
DC
2042 int available;
2043
2044 if (flags & XFS_ALLOC_FLAG_FREEING)
2045 return true;
2046
cf8ce220
DW
2047 reservation = xfs_ag_resv_needed(pag, args->resv);
2048
5515b7c1 2049 /* do we have enough contiguous free space for the allocation? */
cf8ce220
DW
2050 longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free,
2051 reservation);
5515b7c1
DC
2052 if ((args->minlen + args->alignment + args->minalignslop - 1) > longest)
2053 return false;
2054
cf8ce220 2055 /* do we have enough free space remaining for the allocation? */
5515b7c1 2056 available = (int)(pag->pagf_freeblks + pag->pagf_flcount -
cf8ce220
DW
2057 reservation - min_free - args->total);
2058 if (available < (int)args->minleft || available <= 0)
5515b7c1
DC
2059 return false;
2060
2061 return true;
2062}
2063
2bd0ea18
NS
2064/*
2065 * Decide whether to use this allocation group for this allocation.
2066 * If so, fix up the btree freelist's size.
2bd0ea18 2067 */
ff105f75 2068int /* error */
2bd0ea18 2069xfs_alloc_fix_freelist(
c98e644e
DC
2070 struct xfs_alloc_arg *args, /* allocation argument structure */
2071 int flags) /* XFS_ALLOC_FLAG_... */
2bd0ea18 2072{
c98e644e
DC
2073 struct xfs_mount *mp = args->mp;
2074 struct xfs_perag *pag = args->pag;
2075 struct xfs_trans *tp = args->tp;
2076 struct xfs_buf *agbp = NULL;
2077 struct xfs_buf *agflbp = NULL;
2078 struct xfs_alloc_arg targs; /* local allocation arguments */
2079 xfs_agblock_t bno; /* freelist block */
2080 xfs_extlen_t need; /* total blocks needed in freelist */
fcdd428c 2081 int error = 0;
c98e644e 2082
2bd0ea18 2083 if (!pag->pagf_init) {
c98e644e
DC
2084 error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
2085 if (error)
2086 goto out_no_agbp;
2bd0ea18 2087 if (!pag->pagf_init) {
5e656dbb
BN
2088 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
2089 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
c98e644e 2090 goto out_agbp_relse;
2bd0ea18 2091 }
c98e644e 2092 }
34317449 2093
5e656dbb 2094 /*
c98e644e
DC
2095 * If this is a metadata preferred pag and we are user data then try
2096 * somewhere else if we are not being asked to try harder at this
2097 * point
34317449 2098 */
1fccd5c8 2099 if (pag->pagf_metadata && xfs_alloc_is_userdata(args->datatype) &&
5e656dbb
BN
2100 (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
2101 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
c98e644e 2102 goto out_agbp_relse;
34317449
NS
2103 }
2104
de046644 2105 need = xfs_alloc_min_freelist(mp, pag);
c98e644e
DC
2106 if (!xfs_alloc_space_available(args, need, flags))
2107 goto out_agbp_relse;
5e656dbb 2108
2bd0ea18
NS
2109 /*
2110 * Get the a.g. freespace buffer.
2111 * Can fail if we're not blocking on locks, and it's held.
2112 */
c98e644e
DC
2113 if (!agbp) {
2114 error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
2115 if (error)
2116 goto out_no_agbp;
2117 if (!agbp) {
5e656dbb
BN
2118 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
2119 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
c98e644e 2120 goto out_no_agbp;
2bd0ea18
NS
2121 }
2122 }
72bda06d 2123
72bda06d 2124 /* If there isn't enough total space or single-extent, reject it. */
de046644 2125 need = xfs_alloc_min_freelist(mp, pag);
c98e644e
DC
2126 if (!xfs_alloc_space_available(args, need, flags))
2127 goto out_agbp_relse;
5515b7c1 2128
2bd0ea18
NS
2129 /*
2130 * Make the freelist shorter if it's too long.
72bda06d 2131 *
c98e644e
DC
2132 * Note that from this point onwards, we will always release the agf and
2133 * agfl buffers on error. This handles the case where we error out and
2134 * the buffers are clean or may not have been joined to the transaction
2135 * and hence need to be released manually. If they have been joined to
2136 * the transaction, then xfs_trans_brelse() will handle them
2137 * appropriately based on the recursion count and dirty state of the
2138 * buffer.
2139 *
72bda06d
DC
2140 * XXX (dgc): When we have lots of free space, does this buy us
2141 * anything other than extra overhead when we need to put more blocks
2142 * back on the free list? Maybe we should only do this when space is
2143 * getting low or the AGFL is more than half full?
e365af6f
DW
2144 *
2145 * The NOSHRINK flag prevents the AGFL from being shrunk if it's too
2146 * big; the NORMAP flag prevents AGFL expand/shrink operations from
2147 * updating the rmapbt. Both flags are used in xfs_repair while we're
2148 * rebuilding the rmapbt, and neither are used by the kernel. They're
2149 * both required to ensure that rmaps are correctly recorded for the
2150 * regenerated AGFL, bnobt, and cntbt. See repair/phase5.c and
2151 * repair/rmap.c in xfsprogs for details.
2bd0ea18 2152 */
e365af6f
DW
2153 memset(&targs, 0, sizeof(targs));
2154 if (flags & XFS_ALLOC_FLAG_NORMAP)
2155 xfs_rmap_skip_owner_update(&targs.oinfo);
2156 else
2157 xfs_rmap_ag_owner(&targs.oinfo, XFS_RMAP_OWN_AG);
2158 while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) {
72bda06d 2159 struct xfs_buf *bp;
2bd0ea18 2160
5e656dbb
BN
2161 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
2162 if (error)
c98e644e 2163 goto out_agbp_relse;
85aec44f 2164 error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1,
cf8ce220 2165 &targs.oinfo, XFS_AG_RESV_AGFL);
72bda06d 2166 if (error)
c98e644e 2167 goto out_agbp_relse;
2bd0ea18
NS
2168 bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
2169 xfs_trans_binval(tp, bp);
2bd0ea18 2170 }
72bda06d 2171
2bd0ea18
NS
2172 targs.tp = tp;
2173 targs.mp = mp;
2174 targs.agbp = agbp;
2175 targs.agno = args->agno;
cf8ce220 2176 targs.alignment = targs.minlen = targs.prod = 1;
2bd0ea18
NS
2177 targs.type = XFS_ALLOCTYPE_THIS_AG;
2178 targs.pag = pag;
72bda06d
DC
2179 error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp);
2180 if (error)
c98e644e 2181 goto out_agbp_relse;
72bda06d
DC
2182
2183 /* Make the freelist longer if it's too short. */
2184 while (pag->pagf_flcount < need) {
2bd0ea18 2185 targs.agbno = 0;
72bda06d 2186 targs.maxlen = need - pag->pagf_flcount;
cf8ce220 2187 targs.resv = XFS_AG_RESV_AGFL;
72bda06d
DC
2188
2189 /* Allocate as many blocks as possible at once. */
2190 error = xfs_alloc_ag_vextent(&targs);
c98e644e
DC
2191 if (error)
2192 goto out_agflbp_relse;
2193
2bd0ea18 2194 /*
dfc130f3
RC
2195 * Stop if we run out. Won't happen if callers are obeying
2196 * the restrictions correctly. Can happen for free calls
2bd0ea18
NS
2197 * on a completely full ag.
2198 */
5e656dbb
BN
2199 if (targs.agbno == NULLAGBLOCK) {
2200 if (flags & XFS_ALLOC_FLAG_FREEING)
2201 break;
c98e644e 2202 goto out_agflbp_relse;
5e656dbb 2203 }
2bd0ea18
NS
2204 /*
2205 * Put each allocated block on the list.
2206 */
2207 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
5e656dbb
BN
2208 error = xfs_alloc_put_freelist(tp, agbp,
2209 agflbp, bno, 0);
2210 if (error)
c98e644e 2211 goto out_agflbp_relse;
2bd0ea18
NS
2212 }
2213 }
cb4deb22 2214 xfs_trans_brelse(tp, agflbp);
2bd0ea18
NS
2215 args->agbp = agbp;
2216 return 0;
c98e644e
DC
2217
2218out_agflbp_relse:
2219 xfs_trans_brelse(tp, agflbp);
2220out_agbp_relse:
2221 if (agbp)
2222 xfs_trans_brelse(tp, agbp);
2223out_no_agbp:
2224 args->agbp = NULL;
2225 return error;
2bd0ea18
NS
2226}
2227
2228/*
2229 * Get a block from the freelist.
2230 * Returns with the buffer for the block gotten.
2231 */
2232int /* error */
2233xfs_alloc_get_freelist(
2234 xfs_trans_t *tp, /* transaction pointer */
2235 xfs_buf_t *agbp, /* buffer containing the agf structure */
cdded3d8
DC
2236 xfs_agblock_t *bnop, /* block address retrieved from freelist */
2237 int btreeblk) /* destination is a AGF btree */
2bd0ea18
NS
2238{
2239 xfs_agf_t *agf; /* a.g. freespace structure */
2bd0ea18
NS
2240 xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */
2241 xfs_agblock_t bno; /* block number returned */
dd5b876e 2242 __be32 *agfl_bno;
2bd0ea18 2243 int error;
cdded3d8 2244 int logflags;
dd5b876e 2245 xfs_mount_t *mp = tp->t_mountp;
2bd0ea18
NS
2246 xfs_perag_t *pag; /* per allocation group data */
2247
2bd0ea18
NS
2248 /*
2249 * Freelist is empty, give up.
2250 */
dd5b876e 2251 agf = XFS_BUF_TO_AGF(agbp);
46eca962 2252 if (!agf->agf_flcount) {
2bd0ea18
NS
2253 *bnop = NULLAGBLOCK;
2254 return 0;
2255 }
2256 /*
2257 * Read the array of free blocks.
2258 */
dd5b876e
DC
2259 error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
2260 &agflbp);
2261 if (error)
2bd0ea18 2262 return error;
dd5b876e
DC
2263
2264
2bd0ea18
NS
2265 /*
2266 * Get the block number and update the data structures.
2267 */
dd5b876e
DC
2268 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2269 bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
5e656dbb 2270 be32_add_cpu(&agf->agf_flfirst, 1);
2bd0ea18 2271 xfs_trans_brelse(tp, agflbp);
6e3140c7 2272 if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
46eca962 2273 agf->agf_flfirst = 0;
56b2de80
DC
2274
2275 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
5e656dbb 2276 be32_add_cpu(&agf->agf_flcount, -1);
2bd0ea18
NS
2277 xfs_trans_agflist_delta(tp, -1);
2278 pag->pagf_flcount--;
56b2de80 2279 xfs_perag_put(pag);
cdded3d8
DC
2280
2281 logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2282 if (btreeblk) {
5e656dbb 2283 be32_add_cpu(&agf->agf_btreeblks, 1);
cdded3d8
DC
2284 pag->pagf_btreeblks++;
2285 logflags |= XFS_AGF_BTREEBLKS;
2286 }
2287
cdded3d8 2288 xfs_alloc_log_agf(tp, agbp, logflags);
2bd0ea18 2289 *bnop = bno;
3e535bba 2290
2bd0ea18
NS
2291 return 0;
2292}
2293
2294/*
2295 * Log the given fields from the agf structure.
2296 */
2297void
2298xfs_alloc_log_agf(
2299 xfs_trans_t *tp, /* transaction pointer */
2300 xfs_buf_t *bp, /* buffer for a.g. freelist header */
dfc130f3 2301 int fields) /* mask of fields to be logged (XFS_AGF_...) */
2bd0ea18
NS
2302{
2303 int first; /* first byte offset */
2304 int last; /* last byte offset */
2305 static const short offsets[] = {
2306 offsetof(xfs_agf_t, agf_magicnum),
2307 offsetof(xfs_agf_t, agf_versionnum),
2308 offsetof(xfs_agf_t, agf_seqno),
2309 offsetof(xfs_agf_t, agf_length),
2310 offsetof(xfs_agf_t, agf_roots[0]),
2311 offsetof(xfs_agf_t, agf_levels[0]),
2312 offsetof(xfs_agf_t, agf_flfirst),
2313 offsetof(xfs_agf_t, agf_fllast),
2314 offsetof(xfs_agf_t, agf_flcount),
2315 offsetof(xfs_agf_t, agf_freeblks),
2316 offsetof(xfs_agf_t, agf_longest),
cdded3d8 2317 offsetof(xfs_agf_t, agf_btreeblks),
dd5b876e 2318 offsetof(xfs_agf_t, agf_uuid),
8511b71a
DW
2319 offsetof(xfs_agf_t, agf_rmap_blocks),
2320 /* needed so that we don't log the whole rest of the structure: */
2321 offsetof(xfs_agf_t, agf_spare64),
2bd0ea18
NS
2322 sizeof(xfs_agf_t)
2323 };
2324
56b2de80
DC
2325 trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
2326
bdc16ee5 2327 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
dd5b876e 2328
2bd0ea18
NS
2329 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2330 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2331}
2332
2333/*
2334 * Interface for inode allocation to force the pag data to be initialized.
2335 */
2336int /* error */
2337xfs_alloc_pagf_init(
2338 xfs_mount_t *mp, /* file system mount structure */
2339 xfs_trans_t *tp, /* transaction pointer */
2340 xfs_agnumber_t agno, /* allocation group number */
2341 int flags) /* XFS_ALLOC_FLAGS_... */
2342{
7a3bffe4 2343 xfs_buf_t *bp;
2bd0ea18
NS
2344 int error;
2345
0e266570 2346 if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)))
2bd0ea18
NS
2347 return error;
2348 if (bp)
2349 xfs_trans_brelse(tp, bp);
2350 return 0;
2351}
2352
2353/*
2354 * Put the block on the freelist for the allocation group.
2355 */
2356int /* error */
2357xfs_alloc_put_freelist(
2358 xfs_trans_t *tp, /* transaction pointer */
2359 xfs_buf_t *agbp, /* buffer for a.g. freelist header */
2360 xfs_buf_t *agflbp,/* buffer for a.g. free block array */
cdded3d8
DC
2361 xfs_agblock_t bno, /* block being freed */
2362 int btreeblk) /* block came from a AGF btree */
2bd0ea18
NS
2363{
2364 xfs_agf_t *agf; /* a.g. freespace structure */
5e656dbb 2365 __be32 *blockp;/* pointer to array entry */
2bd0ea18 2366 int error;
cdded3d8 2367 int logflags;
2bd0ea18
NS
2368 xfs_mount_t *mp; /* mount structure */
2369 xfs_perag_t *pag; /* per allocation group data */
dd5b876e
DC
2370 __be32 *agfl_bno;
2371 int startoff;
2bd0ea18
NS
2372
2373 agf = XFS_BUF_TO_AGF(agbp);
2374 mp = tp->t_mountp;
2375
2376 if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
6e3140c7 2377 be32_to_cpu(agf->agf_seqno), &agflbp)))
2bd0ea18 2378 return error;
5e656dbb 2379 be32_add_cpu(&agf->agf_fllast, 1);
6e3140c7 2380 if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
46eca962 2381 agf->agf_fllast = 0;
56b2de80
DC
2382
2383 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
5e656dbb 2384 be32_add_cpu(&agf->agf_flcount, 1);
2bd0ea18
NS
2385 xfs_trans_agflist_delta(tp, 1);
2386 pag->pagf_flcount++;
cdded3d8
DC
2387
2388 logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2389 if (btreeblk) {
5e656dbb 2390 be32_add_cpu(&agf->agf_btreeblks, -1);
cdded3d8
DC
2391 pag->pagf_btreeblks--;
2392 logflags |= XFS_AGF_BTREEBLKS;
2393 }
56b2de80 2394 xfs_perag_put(pag);
cdded3d8 2395
5e656dbb
BN
2396 xfs_alloc_log_agf(tp, agbp, logflags);
2397
6e3140c7 2398 ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
dd5b876e
DC
2399
2400 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2401 blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
5e656dbb 2402 *blockp = cpu_to_be32(bno);
dd5b876e
DC
2403 startoff = (char *)blockp - (char *)agflbp->b_addr;
2404
cdded3d8 2405 xfs_alloc_log_agf(tp, agbp, logflags);
dd5b876e 2406
bdc16ee5 2407 xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
dd5b876e
DC
2408 xfs_trans_log_buf(tp, agflbp, startoff,
2409 startoff + sizeof(xfs_agblock_t) - 1);
2bd0ea18
NS
2410 return 0;
2411}
2412
dd5b876e 2413static bool
a2ceac1f 2414xfs_agf_verify(
dd5b876e 2415 struct xfs_mount *mp,
a2ceac1f
DC
2416 struct xfs_buf *bp)
2417 {
dd5b876e 2418 struct xfs_agf *agf = XFS_BUF_TO_AGF(bp);
a2ceac1f 2419
a65d8d29
BF
2420 if (xfs_sb_version_hascrc(&mp->m_sb)) {
2421 if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
dd5b876e 2422 return false;
a65d8d29
BF
2423 if (!xfs_log_check_lsn(mp,
2424 be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn)))
2425 return false;
2426 }
a2ceac1f 2427
dd5b876e
DC
2428 if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
2429 XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
2430 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
2431 be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
2432 be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
2433 be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)))
2434 return false;
a2ceac1f 2435
5a35bf2c
DC
2436 if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
2437 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
2438 return false;
2439
e37838e5
DW
2440 if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
2441 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS)
2442 return false;
2443
a2ceac1f
DC
2444 /*
2445 * during growfs operations, the perag is not fully initialised,
2446 * so we can't use it for any useful checking. growfs ensures we can't
2447 * use it by using uncached buffers that don't have the perag attached
2448 * so we can detect and avoid this problem.
2449 */
dd5b876e
DC
2450 if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
2451 return false;
a2ceac1f 2452
dd5b876e
DC
2453 if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
2454 be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
2455 return false;
2456
88ce0792
DW
2457 if (xfs_sb_version_hasreflink(&mp->m_sb) &&
2458 be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS)
2459 return false;
2460
dd5b876e 2461 return true;;
a2ceac1f 2462
a2ceac1f
DC
2463}
2464
2465static void
2466xfs_agf_read_verify(
2467 struct xfs_buf *bp)
2468{
dd5b876e 2469 struct xfs_mount *mp = bp->b_target->bt_mount;
dd5b876e 2470
45922933
DC
2471 if (xfs_sb_version_hascrc(&mp->m_sb) &&
2472 !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
12b53197 2473 xfs_buf_ioerror(bp, -EFSBADCRC);
45922933
DC
2474 else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp,
2475 XFS_ERRTAG_ALLOC_READ_AGF,
2476 XFS_RANDOM_ALLOC_READ_AGF))
12b53197 2477 xfs_buf_ioerror(bp, -EFSCORRUPTED);
45922933
DC
2478
2479 if (bp->b_error)
2480 xfs_verifier_error(bp);
a2ceac1f
DC
2481}
2482
2483static void
2484xfs_agf_write_verify(
2485 struct xfs_buf *bp)
2486{
dd5b876e
DC
2487 struct xfs_mount *mp = bp->b_target->bt_mount;
2488 struct xfs_buf_log_item *bip = bp->b_fspriv;
2489
2490 if (!xfs_agf_verify(mp, bp)) {
12b53197 2491 xfs_buf_ioerror(bp, -EFSCORRUPTED);
45922933 2492 xfs_verifier_error(bp);
dd5b876e
DC
2493 return;
2494 }
2495
2496 if (!xfs_sb_version_hascrc(&mp->m_sb))
2497 return;
2498
2499 if (bip)
2500 XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
2501
43b5aeed 2502 xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
a2ceac1f
DC
2503}
2504
2505const struct xfs_buf_ops xfs_agf_buf_ops = {
a3fac935 2506 .name = "xfs_agf",
a2ceac1f
DC
2507 .verify_read = xfs_agf_read_verify,
2508 .verify_write = xfs_agf_write_verify,
2509};
2510
2bd0ea18
NS
2511/*
2512 * Read in the allocation group header (free/alloc section).
2513 */
2514int /* error */
56b2de80
DC
2515xfs_read_agf(
2516 struct xfs_mount *mp, /* mount point structure */
2517 struct xfs_trans *tp, /* transaction pointer */
2518 xfs_agnumber_t agno, /* allocation group number */
2519 int flags, /* XFS_BUF_ */
2520 struct xfs_buf **bpp) /* buffer for the ag freelist header */
2bd0ea18 2521{
9440d84d 2522 int error;
2bd0ea18 2523
ff105f75
DC
2524 trace_xfs_read_agf(mp, agno);
2525
2bd0ea18 2526 ASSERT(agno != NULLAGNUMBER);
9440d84d
NS
2527 error = xfs_trans_read_buf(
2528 mp, tp, mp->m_ddev_targp,
2529 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
a2ceac1f 2530 XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
9440d84d 2531 if (error)
2bd0ea18 2532 return error;
56b2de80 2533 if (!*bpp)
2bd0ea18 2534 return 0;
56b2de80 2535
a2ceac1f
DC
2536 ASSERT(!(*bpp)->b_error);
2537 xfs_buf_set_ref(*bpp, XFS_AGF_REF);
56b2de80
DC
2538 return 0;
2539}
2540
2541/*
2542 * Read in the allocation group header (free/alloc section).
2543 */
2544int /* error */
2545xfs_alloc_read_agf(
2546 struct xfs_mount *mp, /* mount point structure */
2547 struct xfs_trans *tp, /* transaction pointer */
2548 xfs_agnumber_t agno, /* allocation group number */
2549 int flags, /* XFS_ALLOC_FLAG_... */
2550 struct xfs_buf **bpp) /* buffer for the ag freelist header */
2551{
2552 struct xfs_agf *agf; /* ag freelist header */
2553 struct xfs_perag *pag; /* per allocation group data */
2554 int error;
2555
ff105f75 2556 trace_xfs_alloc_read_agf(mp, agno);
56b2de80 2557
ff105f75 2558 ASSERT(agno != NULLAGNUMBER);
56b2de80
DC
2559 error = xfs_read_agf(mp, tp, agno,
2560 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
2561 bpp);
2562 if (error)
2563 return error;
2564 if (!*bpp)
2565 return 0;
a2ceac1f 2566 ASSERT(!(*bpp)->b_error);
56b2de80
DC
2567
2568 agf = XFS_BUF_TO_AGF(*bpp);
2569 pag = xfs_perag_get(mp, agno);
2bd0ea18 2570 if (!pag->pagf_init) {
6e3140c7 2571 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
cdded3d8 2572 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
6e3140c7
NS
2573 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
2574 pag->pagf_longest = be32_to_cpu(agf->agf_longest);
2bd0ea18 2575 pag->pagf_levels[XFS_BTNUM_BNOi] =
6e3140c7 2576 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
2bd0ea18 2577 pag->pagf_levels[XFS_BTNUM_CNTi] =
6e3140c7 2578 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
e37838e5
DW
2579 pag->pagf_levels[XFS_BTNUM_RMAPi] =
2580 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
88ce0792 2581 pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
5e656dbb 2582 spin_lock_init(&pag->pagb_lock);
56b2de80 2583 pag->pagb_count = 0;
ff105f75
DC
2584 /* XXX: pagb_tree doesn't exist in userspace */
2585 //pag->pagb_tree = RB_ROOT;
2bd0ea18
NS
2586 pag->pagf_init = 1;
2587 }
2588#ifdef DEBUG
2589 else if (!XFS_FORCED_SHUTDOWN(mp)) {
6e3140c7 2590 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
cdded3d8 2591 ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
6e3140c7
NS
2592 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
2593 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
2bd0ea18 2594 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
6e3140c7 2595 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
2bd0ea18 2596 ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
6e3140c7 2597 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
2bd0ea18
NS
2598 }
2599#endif
56b2de80 2600 xfs_perag_put(pag);
2bd0ea18
NS
2601 return 0;
2602}
2603
2604/*
2605 * Allocate an extent (variable-size).
2606 * Depending on the allocation type, we either look in a single allocation
2607 * group or loop over the allocation groups to find the result.
2608 */
2609int /* error */
2610xfs_alloc_vextent(
dfc130f3 2611 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18 2612{
dfc130f3 2613 xfs_agblock_t agsize; /* allocation group size */
2bd0ea18
NS
2614 int error;
2615 int flags; /* XFS_ALLOC_FLAG_... locking flags */
2bd0ea18
NS
2616 xfs_extlen_t minleft;/* minimum left value, temp copy */
2617 xfs_mount_t *mp; /* mount structure pointer */
2618 xfs_agnumber_t sagno; /* starting allocation group number */
dfc130f3 2619 xfs_alloctype_t type; /* input allocation type */
34317449 2620 int bump_rotor = 0;
9baa549b 2621 int no_min = 0;
46eca962 2622 xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */
2bd0ea18
NS
2623
2624 mp = args->mp;
2625 type = args->otype = args->type;
2626 args->agbno = NULLAGBLOCK;
2627 /*
2628 * Just fix this up, for the case where the last a.g. is shorter
2629 * (or there's only one a.g.) and the caller couldn't easily figure
2630 * that out (xfs_bmap_alloc).
2631 */
2632 agsize = mp->m_sb.sb_agblocks;
2633 if (args->maxlen > agsize)
2634 args->maxlen = agsize;
2635 if (args->alignment == 0)
2636 args->alignment = 1;
2637 ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
2638 ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
2639 ASSERT(args->minlen <= args->maxlen);
2640 ASSERT(args->minlen <= agsize);
2641 ASSERT(args->mod < args->prod);
2642 if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
2643 XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
2644 args->minlen > args->maxlen || args->minlen > agsize ||
2645 args->mod >= args->prod) {
2646 args->fsbno = NULLFSBLOCK;
56b2de80 2647 trace_xfs_alloc_vextent_badargs(args);
2bd0ea18
NS
2648 return 0;
2649 }
9baa549b
SL
2650 minleft = args->minleft;
2651
2bd0ea18
NS
2652 switch (type) {
2653 case XFS_ALLOCTYPE_THIS_AG:
2654 case XFS_ALLOCTYPE_NEAR_BNO:
2655 case XFS_ALLOCTYPE_THIS_BNO:
2656 /*
2657 * These three force us into a single a.g.
2658 */
2659 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
56b2de80 2660 args->pag = xfs_perag_get(mp, args->agno);
2bd0ea18
NS
2661 args->minleft = 0;
2662 error = xfs_alloc_fix_freelist(args, 0);
2663 args->minleft = minleft;
2664 if (error) {
56b2de80 2665 trace_xfs_alloc_vextent_nofix(args);
2bd0ea18
NS
2666 goto error0;
2667 }
2668 if (!args->agbp) {
56b2de80 2669 trace_xfs_alloc_vextent_noagbp(args);
2bd0ea18
NS
2670 break;
2671 }
2672 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
0e266570 2673 if ((error = xfs_alloc_ag_vextent(args)))
2bd0ea18 2674 goto error0;
2bd0ea18
NS
2675 break;
2676 case XFS_ALLOCTYPE_START_BNO:
2677 /*
2678 * Try near allocation first, then anywhere-in-ag after
2679 * the first a.g. fails.
2680 */
1fccd5c8 2681 if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
34317449 2682 (mp->m_flags & XFS_MOUNT_32BITINODES)) {
46eca962
NS
2683 args->fsbno = XFS_AGB_TO_FSB(mp,
2684 ((mp->m_agfrotor / rotorstep) %
2685 mp->m_sb.sb_agcount), 0);
34317449
NS
2686 bump_rotor = 1;
2687 }
2bd0ea18
NS
2688 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2689 args->type = XFS_ALLOCTYPE_NEAR_BNO;
2690 /* FALLTHROUGH */
2691 case XFS_ALLOCTYPE_ANY_AG:
2692 case XFS_ALLOCTYPE_START_AG:
2693 case XFS_ALLOCTYPE_FIRST_AG:
2694 /*
2695 * Rotate through the allocation groups looking for a winner.
2696 */
2697 if (type == XFS_ALLOCTYPE_ANY_AG) {
2698 /*
2699 * Start with the last place we left off.
2700 */
46eca962
NS
2701 args->agno = sagno = (mp->m_agfrotor / rotorstep) %
2702 mp->m_sb.sb_agcount;
2bd0ea18
NS
2703 args->type = XFS_ALLOCTYPE_THIS_AG;
2704 flags = XFS_ALLOC_FLAG_TRYLOCK;
2705 } else if (type == XFS_ALLOCTYPE_FIRST_AG) {
2706 /*
2707 * Start with allocation group given by bno.
2708 */
2709 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2710 args->type = XFS_ALLOCTYPE_THIS_AG;
2711 sagno = 0;
2712 flags = 0;
2713 } else {
2714 if (type == XFS_ALLOCTYPE_START_AG)
2715 args->type = XFS_ALLOCTYPE_THIS_AG;
2716 /*
2717 * Start with the given allocation group.
2718 */
2719 args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2720 flags = XFS_ALLOC_FLAG_TRYLOCK;
2721 }
2722 /*
2723 * Loop over allocation groups twice; first time with
2724 * trylock set, second time without.
2725 */
2726 for (;;) {
56b2de80 2727 args->pag = xfs_perag_get(mp, args->agno);
9baa549b
SL
2728 if (no_min) args->minleft = 0;
2729 error = xfs_alloc_fix_freelist(args, flags);
2730 args->minleft = minleft;
2731 if (error) {
56b2de80 2732 trace_xfs_alloc_vextent_nofix(args);
2bd0ea18
NS
2733 goto error0;
2734 }
2735 /*
2736 * If we get a buffer back then the allocation will fly.
2737 */
2738 if (args->agbp) {
0e266570 2739 if ((error = xfs_alloc_ag_vextent(args)))
2bd0ea18 2740 goto error0;
2bd0ea18
NS
2741 break;
2742 }
56b2de80
DC
2743
2744 trace_xfs_alloc_vextent_loopfailed(args);
2745
2bd0ea18
NS
2746 /*
2747 * Didn't work, figure out the next iteration.
2748 */
2749 if (args->agno == sagno &&
2750 type == XFS_ALLOCTYPE_START_BNO)
2751 args->type = XFS_ALLOCTYPE_THIS_AG;
5e656dbb
BN
2752 /*
2753 * For the first allocation, we can try any AG to get
2754 * space. However, if we already have allocated a
2755 * block, we don't want to try AGs whose number is below
2756 * sagno. Otherwise, we may end up with out-of-order
2757 * locking of AGF, which might cause deadlock.
2758 */
2759 if (++(args->agno) == mp->m_sb.sb_agcount) {
2760 if (args->firstblock != NULLFSBLOCK)
2761 args->agno = sagno;
2762 else
2763 args->agno = 0;
2764 }
5000d01d 2765 /*
2bd0ea18
NS
2766 * Reached the starting a.g., must either be done
2767 * or switch to non-trylock mode.
2768 */
2769 if (args->agno == sagno) {
9baa549b 2770 if (no_min == 1) {
2bd0ea18 2771 args->agbno = NULLAGBLOCK;
56b2de80 2772 trace_xfs_alloc_vextent_allfailed(args);
2bd0ea18
NS
2773 break;
2774 }
9baa549b
SL
2775 if (flags == 0) {
2776 no_min = 1;
2777 } else {
2778 flags = 0;
2779 if (type == XFS_ALLOCTYPE_START_BNO) {
2780 args->agbno = XFS_FSB_TO_AGBNO(mp,
2781 args->fsbno);
2782 args->type = XFS_ALLOCTYPE_NEAR_BNO;
2783 }
2bd0ea18
NS
2784 }
2785 }
56b2de80 2786 xfs_perag_put(args->pag);
2bd0ea18 2787 }
46eca962
NS
2788 if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
2789 if (args->agno == sagno)
2790 mp->m_agfrotor = (mp->m_agfrotor + 1) %
2791 (mp->m_sb.sb_agcount * rotorstep);
2792 else
2793 mp->m_agfrotor = (args->agno * rotorstep + 1) %
2794 (mp->m_sb.sb_agcount * rotorstep);
2795 }
2bd0ea18
NS
2796 break;
2797 default:
2798 ASSERT(0);
2799 /* NOTREACHED */
2800 }
2801 if (args->agbno == NULLAGBLOCK)
2802 args->fsbno = NULLFSBLOCK;
2803 else {
2804 args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
2805#ifdef DEBUG
2806 ASSERT(args->len >= args->minlen);
2807 ASSERT(args->len <= args->maxlen);
2808 ASSERT(args->agbno % args->alignment == 0);
2809 XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
2810 args->len);
2811#endif
9542ae13
DC
2812
2813 /* Zero the extent if we were asked to do so */
1fccd5c8 2814 if (args->datatype & XFS_ALLOC_USERDATA_ZERO) {
9542ae13
DC
2815 error = xfs_zero_extent(args->ip, args->fsbno, args->len);
2816 if (error)
2817 goto error0;
2818 }
2819
2bd0ea18 2820 }
56b2de80 2821 xfs_perag_put(args->pag);
2bd0ea18
NS
2822 return 0;
2823error0:
56b2de80 2824 xfs_perag_put(args->pag);
2bd0ea18
NS
2825 return error;
2826}
2827
2a6da3b8
DC
2828/* Ensure that the freelist is at full capacity. */
2829int
2830xfs_free_extent_fix_freelist(
2831 struct xfs_trans *tp,
2832 xfs_agnumber_t agno,
2833 struct xfs_buf **agbp)
2bd0ea18 2834{
2a6da3b8
DC
2835 struct xfs_alloc_arg args;
2836 int error;
2bd0ea18 2837
2a6da3b8 2838 memset(&args, 0, sizeof(struct xfs_alloc_arg));
2bd0ea18
NS
2839 args.tp = tp;
2840 args.mp = tp->t_mountp;
2a6da3b8 2841 args.agno = agno;
a2ceac1f
DC
2842
2843 /*
2844 * validate that the block number is legal - the enables us to detect
2845 * and handle a silent filesystem corruption rather than crashing.
2846 */
a2ceac1f 2847 if (args.agno >= args.mp->m_sb.sb_agcount)
12b53197 2848 return -EFSCORRUPTED;
a2ceac1f 2849
56b2de80 2850 args.pag = xfs_perag_get(args.mp, args.agno);
a2ceac1f
DC
2851 ASSERT(args.pag);
2852
2853 error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
2854 if (error)
2a6da3b8
DC
2855 goto out;
2856
2857 *agbp = args.agbp;
2858out:
2859 xfs_perag_put(args.pag);
2860 return error;
2861}
2862
2863/*
2864 * Free an extent.
2865 * Just break up the extent address and hand off to xfs_free_ag_extent
2866 * after fixing up the freelist.
2867 */
2868int /* error */
2869xfs_free_extent(
2870 struct xfs_trans *tp, /* transaction pointer */
2871 xfs_fsblock_t bno, /* starting block number of extent */
85aec44f 2872 xfs_extlen_t len, /* length of extent */
cf8ce220
DW
2873 struct xfs_owner_info *oinfo, /* extent owner */
2874 enum xfs_ag_resv_type type) /* block reservation type */
2a6da3b8
DC
2875{
2876 struct xfs_mount *mp = tp->t_mountp;
2877 struct xfs_buf *agbp;
2878 xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, bno);
2879 xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, bno);
2880 int error;
2881
2882 ASSERT(len != 0);
cf8ce220 2883 ASSERT(type != XFS_AG_RESV_AGFL);
2a6da3b8 2884
a9da40de
DW
2885 if (XFS_TEST_ERROR(false, mp,
2886 XFS_ERRTAG_FREE_EXTENT,
2887 XFS_RANDOM_FREE_EXTENT))
2888 return -EIO;
2889
2a6da3b8
DC
2890 error = xfs_free_extent_fix_freelist(tp, agno, &agbp);
2891 if (error)
2892 return error;
2893
2894 XFS_WANT_CORRUPTED_GOTO(mp, agbno < mp->m_sb.sb_agblocks, err);
a2ceac1f
DC
2895
2896 /* validate the extent size is legal now we have the agf locked */
2a6da3b8
DC
2897 XFS_WANT_CORRUPTED_GOTO(mp,
2898 agbno + len <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length),
2899 err);
a2ceac1f 2900
cf8ce220 2901 error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type);
2a6da3b8
DC
2902 if (error)
2903 goto err;
2904
2905 xfs_extent_busy_insert(tp, agno, agbno, len, 0);
2906 return 0;
2907
2908err:
2909 xfs_trans_brelse(tp, agbp);
2bd0ea18
NS
2910 return error;
2911}