]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - libxfs/xfs_alloc.c
xfs: add rmap btree geometry feature flag
[thirdparty/xfsprogs-dev.git] / libxfs / xfs_alloc.c
CommitLineData
2bd0ea18 1/*
5e656dbb 2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
da23017d 3 * All Rights Reserved.
5000d01d 4 *
da23017d
NS
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
2bd0ea18 7 * published by the Free Software Foundation.
5000d01d 8 *
da23017d
NS
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
5000d01d 13 *
da23017d
NS
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
2bd0ea18 17 */
9c799827 18#include "libxfs_priv.h"
b626fb59
DC
19#include "xfs_fs.h"
20#include "xfs_format.h"
21#include "xfs_log_format.h"
22#include "xfs_shared.h"
23#include "xfs_trans_resv.h"
24#include "xfs_bit.h"
25#include "xfs_sb.h"
26#include "xfs_mount.h"
f944d3d0 27#include "xfs_defer.h"
b626fb59
DC
28#include "xfs_inode.h"
29#include "xfs_btree.h"
631ac87a 30#include "xfs_rmap.h"
b626fb59
DC
31#include "xfs_alloc_btree.h"
32#include "xfs_alloc.h"
33#include "xfs_cksum.h"
34#include "xfs_trace.h"
35#include "xfs_trans.h"
2bd0ea18 36
ff105f75
DC
37struct workqueue_struct *xfs_alloc_wq;
38
2bd0ea18 39#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
5e656dbb
BN
40
41#define XFSA_FIXUP_BNO_OK 1
42#define XFSA_FIXUP_CNT_OK 2
43
5e656dbb
BN
44STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
45STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
46STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
47STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
a2ceac1f 48 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
2bd0ea18 49
ef5340cd
DW
50xfs_extlen_t
51xfs_prealloc_blocks(
52 struct xfs_mount *mp)
53{
54 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
55 return XFS_RMAP_BLOCK(mp) + 1;
56 if (xfs_sb_version_hasfinobt(&mp->m_sb))
57 return XFS_FIBT_BLOCK(mp) + 1;
58 return XFS_IBT_BLOCK(mp) + 1;
59}
60
b8a8d6e5
DW
61/*
62 * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
63 * AGF buffer (PV 947395), we place constraints on the relationship among
64 * actual allocations for data blocks, freelist blocks, and potential file data
65 * bmap btree blocks. However, these restrictions may result in no actual space
66 * allocated for a delayed extent, for example, a data block in a certain AG is
67 * allocated but there is no additional block for the additional bmap btree
68 * block due to a split of the bmap btree of the file. The result of this may
69 * lead to an infinite loop when the file gets flushed to disk and all delayed
70 * extents need to be actually allocated. To get around this, we explicitly set
71 * aside a few blocks which will not be reserved in delayed allocation.
72 *
73 * When rmap is disabled, we need to reserve 4 fsbs _per AG_ for the freelist
74 * and 4 more to handle a potential split of the file's bmap btree.
75 *
76 * When rmap is enabled, we must also be able to handle two rmap btree inserts
77 * to record both the file data extent and a new bmbt block. The bmbt block
78 * might not be in the same AG as the file data extent. In the worst case
79 * the bmap btree splits multiple levels and all the new blocks come from
80 * different AGs, so set aside enough to handle rmap btree splits in all AGs.
81 */
82unsigned int
83xfs_alloc_set_aside(
84 struct xfs_mount *mp)
85{
86 unsigned int blocks;
87
88 blocks = 4 + (mp->m_sb.sb_agcount * XFS_ALLOC_AGFL_RESERVE);
89 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
90 blocks += mp->m_sb.sb_agcount * mp->m_rmap_maxlevels;
91 return blocks;
92}
93
94/*
95 * When deciding how much space to allocate out of an AG, we limit the
96 * allocation maximum size to the size the AG. However, we cannot use all the
97 * blocks in the AG - some are permanently used by metadata. These
98 * blocks are generally:
99 * - the AG superblock, AGF, AGI and AGFL
100 * - the AGF (bno and cnt) and AGI btree root blocks, and optionally
101 * the AGI free inode and rmap btree root blocks.
102 * - blocks on the AGFL according to xfs_alloc_set_aside() limits
103 * - the rmapbt root block
104 *
105 * The AG headers are sector sized, so the amount of space they take up is
106 * dependent on filesystem geometry. The others are all single blocks.
107 */
108unsigned int
109xfs_alloc_ag_max_usable(
110 struct xfs_mount *mp)
111{
112 unsigned int blocks;
113
114 blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
115 blocks += XFS_ALLOC_AGFL_RESERVE;
116 blocks += 3; /* AGF, AGI btree root blocks */
117 if (xfs_sb_version_hasfinobt(&mp->m_sb))
118 blocks++; /* finobt root block */
119 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
120 blocks++; /* rmap root block */
121
122 return mp->m_sb.sb_agblocks - blocks;
123}
124
b194c7d8
BN
125/*
126 * Lookup the record equal to [bno, len] in the btree given by cur.
127 */
128STATIC int /* error */
129xfs_alloc_lookup_eq(
130 struct xfs_btree_cur *cur, /* btree cursor */
131 xfs_agblock_t bno, /* starting block of extent */
132 xfs_extlen_t len, /* length of extent */
133 int *stat) /* success/failure */
134{
135 cur->bc_rec.a.ar_startblock = bno;
136 cur->bc_rec.a.ar_blockcount = len;
137 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
138}
139
140/*
141 * Lookup the first record greater than or equal to [bno, len]
142 * in the btree given by cur.
143 */
a2ceac1f 144int /* error */
b194c7d8
BN
145xfs_alloc_lookup_ge(
146 struct xfs_btree_cur *cur, /* btree cursor */
147 xfs_agblock_t bno, /* starting block of extent */
148 xfs_extlen_t len, /* length of extent */
149 int *stat) /* success/failure */
150{
151 cur->bc_rec.a.ar_startblock = bno;
152 cur->bc_rec.a.ar_blockcount = len;
153 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
154}
155
156/*
157 * Lookup the first record less than or equal to [bno, len]
158 * in the btree given by cur.
159 */
2d066e16 160static int /* error */
b194c7d8
BN
161xfs_alloc_lookup_le(
162 struct xfs_btree_cur *cur, /* btree cursor */
163 xfs_agblock_t bno, /* starting block of extent */
164 xfs_extlen_t len, /* length of extent */
165 int *stat) /* success/failure */
166{
167 cur->bc_rec.a.ar_startblock = bno;
168 cur->bc_rec.a.ar_blockcount = len;
169 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
170}
171
172/*
173 * Update the record referred to by cur to the value given
174 * by [bno, len].
175 * This either works (return 0) or gets an EFSCORRUPTED error.
176 */
177STATIC int /* error */
178xfs_alloc_update(
179 struct xfs_btree_cur *cur, /* btree cursor */
180 xfs_agblock_t bno, /* starting block of extent */
181 xfs_extlen_t len) /* length of extent */
182{
183 union xfs_btree_rec rec;
184
185 rec.alloc.ar_startblock = cpu_to_be32(bno);
186 rec.alloc.ar_blockcount = cpu_to_be32(len);
187 return xfs_btree_update(cur, &rec);
188}
189
190/*
191 * Get the data from the pointed-to record.
192 */
a2ceac1f 193int /* error */
b194c7d8
BN
194xfs_alloc_get_rec(
195 struct xfs_btree_cur *cur, /* btree cursor */
196 xfs_agblock_t *bno, /* output: starting block of extent */
197 xfs_extlen_t *len, /* output: length of extent */
198 int *stat) /* output: success/failure */
199{
200 union xfs_btree_rec *rec;
201 int error;
202
203 error = xfs_btree_get_rec(cur, &rec, stat);
204 if (!error && *stat == 1) {
205 *bno = be32_to_cpu(rec->alloc.ar_startblock);
206 *len = be32_to_cpu(rec->alloc.ar_blockcount);
207 }
208 return error;
209}
210
2bd0ea18
NS
211/*
212 * Compute aligned version of the found extent.
213 * Takes alignment and min length into account.
214 */
5e656dbb 215STATIC void
2bd0ea18 216xfs_alloc_compute_aligned(
a2ceac1f 217 xfs_alloc_arg_t *args, /* allocation argument structure */
2bd0ea18
NS
218 xfs_agblock_t foundbno, /* starting block in found extent */
219 xfs_extlen_t foundlen, /* length in found extent */
2bd0ea18
NS
220 xfs_agblock_t *resbno, /* result block number */
221 xfs_extlen_t *reslen) /* result length */
222{
223 xfs_agblock_t bno;
2bd0ea18 224 xfs_extlen_t len;
ff3263dd 225 xfs_extlen_t diff;
2bd0ea18 226
a2ceac1f
DC
227 /* Trim busy sections out of found extent */
228 xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len);
229
ff3263dd
BF
230 /*
231 * If we have a largish extent that happens to start before min_agbno,
232 * see if we can shift it into range...
233 */
234 if (bno < args->min_agbno && bno + len > args->min_agbno) {
235 diff = args->min_agbno - bno;
236 if (len > diff) {
237 bno += diff;
238 len -= diff;
239 }
240 }
241
a2ceac1f
DC
242 if (args->alignment > 1 && len >= args->minlen) {
243 xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
ff3263dd
BF
244
245 diff = aligned_bno - bno;
a2ceac1f
DC
246
247 *resbno = aligned_bno;
248 *reslen = diff >= len ? 0 : len - diff;
2bd0ea18 249 } else {
a2ceac1f
DC
250 *resbno = bno;
251 *reslen = len;
2bd0ea18 252 }
2bd0ea18
NS
253}
254
255/*
256 * Compute best start block and diff for "near" allocations.
257 * freelen >= wantlen already checked by caller.
258 */
259STATIC xfs_extlen_t /* difference value (absolute) */
260xfs_alloc_compute_diff(
261 xfs_agblock_t wantbno, /* target starting block */
262 xfs_extlen_t wantlen, /* target length */
263 xfs_extlen_t alignment, /* target alignment */
84a62eea 264 char userdata, /* are we allocating data? */
2bd0ea18
NS
265 xfs_agblock_t freebno, /* freespace's starting block */
266 xfs_extlen_t freelen, /* freespace's length */
267 xfs_agblock_t *newbnop) /* result: best start block from free */
268{
269 xfs_agblock_t freeend; /* end of freespace extent */
270 xfs_agblock_t newbno1; /* return block number */
271 xfs_agblock_t newbno2; /* other new block number */
0e266570
NS
272 xfs_extlen_t newlen1=0; /* length with newbno1 */
273 xfs_extlen_t newlen2=0; /* length with newbno2 */
2bd0ea18
NS
274 xfs_agblock_t wantend; /* end of target extent */
275
276 ASSERT(freelen >= wantlen);
277 freeend = freebno + freelen;
278 wantend = wantbno + wantlen;
84a62eea
DC
279 /*
280 * We want to allocate from the start of a free extent if it is past
281 * the desired block or if we are allocating user data and the free
282 * extent is before desired block. The second case is there to allow
283 * for contiguous allocation from the remaining free space if the file
284 * grows in the short term.
285 */
286 if (freebno >= wantbno || (userdata && freeend < wantend)) {
2bd0ea18
NS
287 if ((newbno1 = roundup(freebno, alignment)) >= freeend)
288 newbno1 = NULLAGBLOCK;
289 } else if (freeend >= wantend && alignment > 1) {
290 newbno1 = roundup(wantbno, alignment);
291 newbno2 = newbno1 - alignment;
292 if (newbno1 >= freeend)
293 newbno1 = NULLAGBLOCK;
294 else
295 newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
296 if (newbno2 < freebno)
297 newbno2 = NULLAGBLOCK;
298 else
299 newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
300 if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
301 if (newlen1 < newlen2 ||
302 (newlen1 == newlen2 &&
303 XFS_ABSDIFF(newbno1, wantbno) >
304 XFS_ABSDIFF(newbno2, wantbno)))
305 newbno1 = newbno2;
306 } else if (newbno2 != NULLAGBLOCK)
307 newbno1 = newbno2;
308 } else if (freeend >= wantend) {
309 newbno1 = wantbno;
310 } else if (alignment > 1) {
311 newbno1 = roundup(freeend - wantlen, alignment);
312 if (newbno1 > freeend - wantlen &&
313 newbno1 - alignment >= freebno)
314 newbno1 -= alignment;
315 else if (newbno1 >= freeend)
316 newbno1 = NULLAGBLOCK;
317 } else
318 newbno1 = freeend - wantlen;
319 *newbnop = newbno1;
320 return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
321}
322
323/*
324 * Fix up the length, based on mod and prod.
325 * len should be k * prod + mod for some k.
326 * If len is too small it is returned unchanged.
327 * If len hits maxlen it is left alone.
328 */
329STATIC void
330xfs_alloc_fix_len(
dfc130f3 331 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18
NS
332{
333 xfs_extlen_t k;
334 xfs_extlen_t rlen;
335
336 ASSERT(args->mod < args->prod);
337 rlen = args->len;
338 ASSERT(rlen >= args->minlen);
339 ASSERT(rlen <= args->maxlen);
340 if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
341 (args->mod == 0 && rlen < args->prod))
342 return;
343 k = rlen % args->prod;
344 if (k == args->mod)
345 return;
ff105f75
DC
346 if (k > args->mod)
347 rlen = rlen - (k - args->mod);
348 else
349 rlen = rlen - args->prod + (args->mod - k);
19ebedcf 350 /* casts to (int) catch length underflows */
ff105f75
DC
351 if ((int)rlen < (int)args->minlen)
352 return;
353 ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
354 ASSERT(rlen % args->prod == args->mod);
2bd0ea18
NS
355 args->len = rlen;
356}
357
358/*
359 * Fix up length if there is too little space left in the a.g.
360 * Return 1 if ok, 0 if too little, should give up.
361 */
362STATIC int
363xfs_alloc_fix_minleft(
dfc130f3 364 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18
NS
365{
366 xfs_agf_t *agf; /* a.g. freelist header */
367 int diff; /* free space difference */
368
369 if (args->minleft == 0)
370 return 1;
371 agf = XFS_BUF_TO_AGF(args->agbp);
6e3140c7 372 diff = be32_to_cpu(agf->agf_freeblks)
2bd0ea18
NS
373 - args->len - args->minleft;
374 if (diff >= 0)
375 return 1;
376 args->len += diff; /* shrink the allocated space */
19ebedcf
DC
377 /* casts to (int) catch length underflows */
378 if ((int)args->len >= (int)args->minlen)
2bd0ea18
NS
379 return 1;
380 args->agbno = NULLAGBLOCK;
381 return 0;
382}
383
384/*
385 * Update the two btrees, logically removing from freespace the extent
386 * starting at rbno, rlen blocks. The extent is contained within the
387 * actual (current) free extent fbno for flen blocks.
388 * Flags are passed in indicating whether the cursors are set to the
389 * relevant records.
390 */
391STATIC int /* error code */
392xfs_alloc_fixup_trees(
dfc130f3
RC
393 xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */
394 xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */
2bd0ea18
NS
395 xfs_agblock_t fbno, /* starting block of free extent */
396 xfs_extlen_t flen, /* length of free extent */
397 xfs_agblock_t rbno, /* starting block of returned extent */
398 xfs_extlen_t rlen, /* length of returned extent */
399 int flags) /* flags, XFSA_FIXUP_... */
400{
401 int error; /* error code */
402 int i; /* operation results */
403 xfs_agblock_t nfbno1; /* first new free startblock */
404 xfs_agblock_t nfbno2; /* second new free startblock */
0e266570
NS
405 xfs_extlen_t nflen1=0; /* first new free length */
406 xfs_extlen_t nflen2=0; /* second new free length */
19ebedcf
DC
407 struct xfs_mount *mp;
408
409 mp = cnt_cur->bc_mp;
2bd0ea18
NS
410
411 /*
412 * Look up the record in the by-size tree if necessary.
413 */
414 if (flags & XFSA_FIXUP_CNT_OK) {
415#ifdef DEBUG
0e266570 416 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
2bd0ea18 417 return error;
19ebedcf 418 XFS_WANT_CORRUPTED_RETURN(mp,
2bd0ea18
NS
419 i == 1 && nfbno1 == fbno && nflen1 == flen);
420#endif
421 } else {
0e266570 422 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
2bd0ea18 423 return error;
19ebedcf 424 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
2bd0ea18
NS
425 }
426 /*
427 * Look up the record in the by-block tree if necessary.
428 */
429 if (flags & XFSA_FIXUP_BNO_OK) {
430#ifdef DEBUG
0e266570 431 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
2bd0ea18 432 return error;
19ebedcf 433 XFS_WANT_CORRUPTED_RETURN(mp,
2bd0ea18
NS
434 i == 1 && nfbno1 == fbno && nflen1 == flen);
435#endif
436 } else {
0e266570 437 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
2bd0ea18 438 return error;
19ebedcf 439 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
2bd0ea18 440 }
b3563c19 441
2bd0ea18 442#ifdef DEBUG
b3563c19
BN
443 if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
444 struct xfs_btree_block *bnoblock;
445 struct xfs_btree_block *cntblock;
446
447 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
448 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
2bd0ea18 449
19ebedcf 450 XFS_WANT_CORRUPTED_RETURN(mp,
b3563c19 451 bnoblock->bb_numrecs == cntblock->bb_numrecs);
2bd0ea18
NS
452 }
453#endif
b3563c19 454
2bd0ea18
NS
455 /*
456 * Deal with all four cases: the allocated record is contained
457 * within the freespace record, so we can have new freespace
458 * at either (or both) end, or no freespace remaining.
459 */
460 if (rbno == fbno && rlen == flen)
461 nfbno1 = nfbno2 = NULLAGBLOCK;
462 else if (rbno == fbno) {
463 nfbno1 = rbno + rlen;
464 nflen1 = flen - rlen;
465 nfbno2 = NULLAGBLOCK;
466 } else if (rbno + rlen == fbno + flen) {
467 nfbno1 = fbno;
468 nflen1 = flen - rlen;
469 nfbno2 = NULLAGBLOCK;
470 } else {
471 nfbno1 = fbno;
472 nflen1 = rbno - fbno;
473 nfbno2 = rbno + rlen;
474 nflen2 = (fbno + flen) - nfbno2;
475 }
476 /*
477 * Delete the entry from the by-size btree.
478 */
b194c7d8 479 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18 480 return error;
19ebedcf 481 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
2bd0ea18
NS
482 /*
483 * Add new by-size btree entry(s).
484 */
485 if (nfbno1 != NULLAGBLOCK) {
0e266570 486 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
2bd0ea18 487 return error;
19ebedcf 488 XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
b194c7d8 489 if ((error = xfs_btree_insert(cnt_cur, &i)))
2bd0ea18 490 return error;
19ebedcf 491 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
2bd0ea18
NS
492 }
493 if (nfbno2 != NULLAGBLOCK) {
0e266570 494 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
2bd0ea18 495 return error;
19ebedcf 496 XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
b194c7d8 497 if ((error = xfs_btree_insert(cnt_cur, &i)))
2bd0ea18 498 return error;
19ebedcf 499 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
2bd0ea18
NS
500 }
501 /*
502 * Fix up the by-block btree entry(s).
503 */
504 if (nfbno1 == NULLAGBLOCK) {
505 /*
506 * No remaining freespace, just delete the by-block tree entry.
507 */
b194c7d8 508 if ((error = xfs_btree_delete(bno_cur, &i)))
2bd0ea18 509 return error;
19ebedcf 510 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
2bd0ea18
NS
511 } else {
512 /*
513 * Update the by-block entry to start later|be shorter.
514 */
0e266570 515 if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
2bd0ea18
NS
516 return error;
517 }
518 if (nfbno2 != NULLAGBLOCK) {
519 /*
520 * 2 resulting free entries, need to add one.
521 */
0e266570 522 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
2bd0ea18 523 return error;
19ebedcf 524 XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
b194c7d8 525 if ((error = xfs_btree_insert(bno_cur, &i)))
2bd0ea18 526 return error;
19ebedcf 527 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
2bd0ea18
NS
528 }
529 return 0;
530}
531
dd5b876e 532static bool
a2ceac1f
DC
533xfs_agfl_verify(
534 struct xfs_buf *bp)
535{
a2ceac1f
DC
536 struct xfs_mount *mp = bp->b_target->bt_mount;
537 struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
a2ceac1f
DC
538 int i;
539
9c4e12fb 540 if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
dd5b876e
DC
541 return false;
542 if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC)
543 return false;
544 /*
545 * during growfs operations, the perag is not fully initialised,
546 * so we can't use it for any useful checking. growfs ensures we can't
547 * use it by using uncached buffers that don't have the perag attached
548 * so we can detect and avoid this problem.
549 */
550 if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
551 return false;
552
a2ceac1f 553 for (i = 0; i < XFS_AGFL_SIZE(mp); i++) {
dd5b876e 554 if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
a2ceac1f 555 be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
dd5b876e 556 return false;
a2ceac1f 557 }
a65d8d29
BF
558
559 return xfs_log_check_lsn(mp,
560 be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn));
dd5b876e
DC
561}
562
563static void
564xfs_agfl_read_verify(
565 struct xfs_buf *bp)
566{
567 struct xfs_mount *mp = bp->b_target->bt_mount;
dd5b876e
DC
568
569 /*
570 * There is no verification of non-crc AGFLs because mkfs does not
571 * initialise the AGFL to zero or NULL. Hence the only valid part of the
572 * AGFL is what the AGF says is active. We can't get to the AGF, so we
573 * can't verify just those entries are valid.
574 */
575 if (!xfs_sb_version_hascrc(&mp->m_sb))
576 return;
577
45922933 578 if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
12b53197 579 xfs_buf_ioerror(bp, -EFSBADCRC);
45922933 580 else if (!xfs_agfl_verify(bp))
12b53197 581 xfs_buf_ioerror(bp, -EFSCORRUPTED);
45922933
DC
582
583 if (bp->b_error)
584 xfs_verifier_error(bp);
a2ceac1f
DC
585}
586
587static void
588xfs_agfl_write_verify(
589 struct xfs_buf *bp)
590{
dd5b876e
DC
591 struct xfs_mount *mp = bp->b_target->bt_mount;
592 struct xfs_buf_log_item *bip = bp->b_fspriv;
a2ceac1f 593
dd5b876e
DC
594 /* no verification of non-crc AGFLs */
595 if (!xfs_sb_version_hascrc(&mp->m_sb))
596 return;
597
598 if (!xfs_agfl_verify(bp)) {
12b53197 599 xfs_buf_ioerror(bp, -EFSCORRUPTED);
45922933 600 xfs_verifier_error(bp);
dd5b876e
DC
601 return;
602 }
603
604 if (bip)
605 XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
606
43b5aeed 607 xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
a2ceac1f
DC
608}
609
610const struct xfs_buf_ops xfs_agfl_buf_ops = {
a3fac935 611 .name = "xfs_agfl",
a2ceac1f
DC
612 .verify_read = xfs_agfl_read_verify,
613 .verify_write = xfs_agfl_write_verify,
614};
615
2bd0ea18
NS
616/*
617 * Read in the allocation group free block array.
618 */
619STATIC int /* error */
620xfs_alloc_read_agfl(
621 xfs_mount_t *mp, /* mount point structure */
622 xfs_trans_t *tp, /* transaction pointer */
623 xfs_agnumber_t agno, /* allocation group number */
624 xfs_buf_t **bpp) /* buffer for the ag free block array */
625{
626 xfs_buf_t *bp; /* return value */
2bd0ea18
NS
627 int error;
628
629 ASSERT(agno != NULLAGNUMBER);
9440d84d
NS
630 error = xfs_trans_read_buf(
631 mp, tp, mp->m_ddev_targp,
632 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
a2ceac1f 633 XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
9440d84d 634 if (error)
2bd0ea18 635 return error;
a2ceac1f 636 xfs_buf_set_ref(bp, XFS_AGFL_REF);
2bd0ea18
NS
637 *bpp = bp;
638 return 0;
639}
640
a2ceac1f
DC
641STATIC int
642xfs_alloc_update_counters(
643 struct xfs_trans *tp,
644 struct xfs_perag *pag,
645 struct xfs_buf *agbp,
646 long len)
647{
648 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
649
650 pag->pagf_freeblks += len;
651 be32_add_cpu(&agf->agf_freeblks, len);
652
653 xfs_trans_agblocks_delta(tp, len);
654 if (unlikely(be32_to_cpu(agf->agf_freeblks) >
655 be32_to_cpu(agf->agf_length)))
12b53197 656 return -EFSCORRUPTED;
a2ceac1f
DC
657
658 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
659 return 0;
660}
661
2bd0ea18
NS
662/*
663 * Allocation group level functions.
664 */
665
666/*
667 * Allocate a variable extent in the allocation group agno.
668 * Type and bno are used to determine where in the allocation group the
669 * extent will start.
670 * Extent's length (returned in *len) will be between minlen and maxlen,
671 * and of the form k * prod + mod unless there's nothing that large.
672 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
673 */
674STATIC int /* error */
675xfs_alloc_ag_vextent(
dfc130f3 676 xfs_alloc_arg_t *args) /* argument structure for allocation */
2bd0ea18 677{
0e266570 678 int error=0;
2bd0ea18
NS
679
680 ASSERT(args->minlen > 0);
681 ASSERT(args->maxlen > 0);
682 ASSERT(args->minlen <= args->maxlen);
683 ASSERT(args->mod < args->prod);
684 ASSERT(args->alignment > 0);
685 /*
686 * Branch to correct routine based on the type.
687 */
688 args->wasfromfl = 0;
689 switch (args->type) {
690 case XFS_ALLOCTYPE_THIS_AG:
691 error = xfs_alloc_ag_vextent_size(args);
692 break;
693 case XFS_ALLOCTYPE_NEAR_BNO:
694 error = xfs_alloc_ag_vextent_near(args);
695 break;
696 case XFS_ALLOCTYPE_THIS_BNO:
697 error = xfs_alloc_ag_vextent_exact(args);
698 break;
699 default:
700 ASSERT(0);
701 /* NOTREACHED */
702 }
a2ceac1f
DC
703
704 if (error || args->agbno == NULLAGBLOCK)
2bd0ea18 705 return error;
2bd0ea18 706
a2ceac1f
DC
707 ASSERT(args->len >= args->minlen);
708 ASSERT(args->len <= args->maxlen);
709 ASSERT(!args->wasfromfl || !args->isfl);
710 ASSERT(args->agbno % args->alignment == 0);
711
631ac87a
DW
712 /* if not file data, insert new block into the reverse map btree */
713 if (args->oinfo.oi_owner != XFS_RMAP_OWN_UNKNOWN) {
714 error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
715 args->agbno, args->len, &args->oinfo);
716 if (error)
717 return error;
718 }
719
a2ceac1f
DC
720 if (!args->wasfromfl) {
721 error = xfs_alloc_update_counters(args->tp, args->pag,
722 args->agbp,
723 -((long)(args->len)));
724 if (error)
725 return error;
726
727 ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
728 args->agbno, args->len));
2bd0ea18 729 }
a2ceac1f
DC
730
731 if (!args->isfl) {
732 xfs_trans_mod_sb(args->tp, args->wasdel ?
733 XFS_TRANS_SB_RES_FDBLOCKS :
734 XFS_TRANS_SB_FDBLOCKS,
735 -((long)(args->len)));
736 }
737
79896434
BD
738 XFS_STATS_INC(args->mp, xs_allocx);
739 XFS_STATS_ADD(args->mp, xs_allocb, args->len);
a2ceac1f 740 return error;
2bd0ea18
NS
741}
742
743/*
744 * Allocate a variable extent at exactly agno/bno.
745 * Extent's length (returned in *len) will be between minlen and maxlen,
746 * and of the form k * prod + mod unless there's nothing that large.
747 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
748 */
749STATIC int /* error */
750xfs_alloc_ag_vextent_exact(
dfc130f3 751 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18 752{
dfc130f3
RC
753 xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
754 xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
2bd0ea18
NS
755 int error;
756 xfs_agblock_t fbno; /* start block of found extent */
2bd0ea18 757 xfs_extlen_t flen; /* length of found extent */
a2ceac1f
DC
758 xfs_agblock_t tbno; /* start block of trimmed extent */
759 xfs_extlen_t tlen; /* length of trimmed extent */
760 xfs_agblock_t tend; /* end block of trimmed extent */
2bd0ea18 761 int i; /* success/failure of operation */
2bd0ea18
NS
762
763 ASSERT(args->alignment == 1);
a2ceac1f 764
2bd0ea18
NS
765 /*
766 * Allocate/initialize a cursor for the by-number freespace btree.
767 */
b194c7d8 768 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
56b2de80
DC
769 args->agno, XFS_BTNUM_BNO);
770
2bd0ea18
NS
771 /*
772 * Lookup bno and minlen in the btree (minlen is irrelevant, really).
773 * Look for the closest free block <= bno, it must contain bno
774 * if any free block does.
775 */
56b2de80
DC
776 error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
777 if (error)
2bd0ea18 778 goto error0;
56b2de80
DC
779 if (!i)
780 goto not_found;
781
2bd0ea18
NS
782 /*
783 * Grab the freespace record.
784 */
56b2de80
DC
785 error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
786 if (error)
2bd0ea18 787 goto error0;
19ebedcf 788 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
2bd0ea18 789 ASSERT(fbno <= args->agbno);
56b2de80 790
5000d01d 791 /*
a2ceac1f
DC
792 * Check for overlapping busy extents.
793 */
794 xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen);
795
796 /*
797 * Give up if the start of the extent is busy, or the freespace isn't
798 * long enough for the minimum request.
2bd0ea18 799 */
a2ceac1f
DC
800 if (tbno > args->agbno)
801 goto not_found;
802 if (tlen < args->minlen)
803 goto not_found;
804 tend = tbno + tlen;
805 if (tend < args->agbno + args->minlen)
56b2de80
DC
806 goto not_found;
807
2bd0ea18
NS
808 /*
809 * End of extent will be smaller of the freespace end and the
810 * maximal requested end.
56b2de80 811 *
2bd0ea18
NS
812 * Fix the length according to mod and prod if given.
813 */
a2ceac1f
DC
814 args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
815 - args->agbno;
2bd0ea18 816 xfs_alloc_fix_len(args);
56b2de80
DC
817 if (!xfs_alloc_fix_minleft(args))
818 goto not_found;
819
a2ceac1f 820 ASSERT(args->agbno + args->len <= tend);
56b2de80 821
2bd0ea18 822 /*
a2ceac1f 823 * We are allocating agbno for args->len
2bd0ea18
NS
824 * Allocate/initialize a cursor for the by-size btree.
825 */
b194c7d8
BN
826 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
827 args->agno, XFS_BTNUM_CNT);
2bd0ea18 828 ASSERT(args->agbno + args->len <=
6e3140c7 829 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
56b2de80
DC
830 error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
831 args->len, XFSA_FIXUP_BNO_OK);
832 if (error) {
2bd0ea18
NS
833 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
834 goto error0;
835 }
a2ceac1f 836
2bd0ea18
NS
837 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
838 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
a2ceac1f 839
2bd0ea18 840 args->wasfromfl = 0;
56b2de80
DC
841 trace_xfs_alloc_exact_done(args);
842 return 0;
843
844not_found:
845 /* Didn't find it, return null. */
846 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
847 args->agbno = NULLAGBLOCK;
848 trace_xfs_alloc_exact_notfound(args);
2bd0ea18
NS
849 return 0;
850
851error0:
852 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
56b2de80
DC
853 trace_xfs_alloc_exact_error(args);
854 return error;
855}
856
857/*
858 * Search the btree in a given direction via the search cursor and compare
859 * the records found against the good extent we've already found.
860 */
861STATIC int
862xfs_alloc_find_best_extent(
863 struct xfs_alloc_arg *args, /* allocation argument structure */
864 struct xfs_btree_cur **gcur, /* good cursor */
865 struct xfs_btree_cur **scur, /* searching cursor */
866 xfs_agblock_t gdiff, /* difference for search comparison */
867 xfs_agblock_t *sbno, /* extent found by search */
a2ceac1f
DC
868 xfs_extlen_t *slen, /* extent length */
869 xfs_agblock_t *sbnoa, /* aligned extent found by search */
870 xfs_extlen_t *slena, /* aligned extent length */
56b2de80
DC
871 int dir) /* 0 = search right, 1 = search left */
872{
56b2de80
DC
873 xfs_agblock_t new;
874 xfs_agblock_t sdiff;
875 int error;
876 int i;
877
878 /* The good extent is perfect, no need to search. */
879 if (!gdiff)
880 goto out_use_good;
881
882 /*
883 * Look until we find a better one, run out of space or run off the end.
884 */
885 do {
886 error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
887 if (error)
888 goto error0;
19ebedcf 889 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
a2ceac1f 890 xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena);
56b2de80
DC
891
892 /*
893 * The good extent is closer than this one.
894 */
895 if (!dir) {
ff3263dd
BF
896 if (*sbnoa > args->max_agbno)
897 goto out_use_good;
a2ceac1f 898 if (*sbnoa >= args->agbno + gdiff)
56b2de80
DC
899 goto out_use_good;
900 } else {
ff3263dd
BF
901 if (*sbnoa < args->min_agbno)
902 goto out_use_good;
a2ceac1f 903 if (*sbnoa <= args->agbno - gdiff)
56b2de80
DC
904 goto out_use_good;
905 }
906
907 /*
908 * Same distance, compare length and pick the best.
909 */
910 if (*slena >= args->minlen) {
911 args->len = XFS_EXTLEN_MIN(*slena, args->maxlen);
912 xfs_alloc_fix_len(args);
913
914 sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
84a62eea
DC
915 args->alignment,
916 args->userdata, *sbnoa,
a2ceac1f 917 *slena, &new);
56b2de80
DC
918
919 /*
920 * Choose closer size and invalidate other cursor.
921 */
922 if (sdiff < gdiff)
923 goto out_use_search;
924 goto out_use_good;
925 }
926
927 if (!dir)
928 error = xfs_btree_increment(*scur, 0, &i);
929 else
930 error = xfs_btree_decrement(*scur, 0, &i);
931 if (error)
932 goto error0;
933 } while (i);
934
935out_use_good:
936 xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR);
937 *scur = NULL;
938 return 0;
939
940out_use_search:
941 xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR);
942 *gcur = NULL;
943 return 0;
944
945error0:
946 /* caller invalidates cursors */
2bd0ea18
NS
947 return error;
948}
949
950/*
951 * Allocate a variable extent near bno in the allocation group agno.
952 * Extent's length (returned in len) will be between minlen and maxlen,
953 * and of the form k * prod + mod unless there's nothing that large.
954 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
955 */
956STATIC int /* error */
957xfs_alloc_ag_vextent_near(
dfc130f3 958 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18 959{
dfc130f3
RC
960 xfs_btree_cur_t *bno_cur_gt; /* cursor for bno btree, right side */
961 xfs_btree_cur_t *bno_cur_lt; /* cursor for bno btree, left side */
962 xfs_btree_cur_t *cnt_cur; /* cursor for count btree */
2bd0ea18
NS
963 xfs_agblock_t gtbno; /* start bno of right side entry */
964 xfs_agblock_t gtbnoa; /* aligned ... */
965 xfs_extlen_t gtdiff; /* difference to right side entry */
966 xfs_extlen_t gtlen; /* length of right side entry */
a2ceac1f 967 xfs_extlen_t gtlena; /* aligned ... */
2bd0ea18
NS
968 xfs_agblock_t gtnew; /* useful start bno of right side */
969 int error; /* error code */
970 int i; /* result code, temporary */
971 int j; /* result code, temporary */
972 xfs_agblock_t ltbno; /* start bno of left side entry */
973 xfs_agblock_t ltbnoa; /* aligned ... */
974 xfs_extlen_t ltdiff; /* difference to left side entry */
2bd0ea18 975 xfs_extlen_t ltlen; /* length of left side entry */
a2ceac1f 976 xfs_extlen_t ltlena; /* aligned ... */
2bd0ea18
NS
977 xfs_agblock_t ltnew; /* useful start bno of left side */
978 xfs_extlen_t rlen; /* length of returned extent */
a2ceac1f 979 int forced = 0;
6beba453 980#ifdef DEBUG
2bd0ea18
NS
981 /*
982 * Randomly don't execute the first algorithm.
983 */
2bd0ea18 984 int dofirst; /* set to do first algorithm */
2bd0ea18 985
49f693fa 986 dofirst = prandom_u32() & 1;
2bd0ea18 987#endif
a2ceac1f 988
ff3263dd
BF
989 /* handle unitialized agbno range so caller doesn't have to */
990 if (!args->min_agbno && !args->max_agbno)
991 args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
992 ASSERT(args->min_agbno <= args->max_agbno);
993
994 /* clamp agbno to the range if it's outside */
995 if (args->agbno < args->min_agbno)
996 args->agbno = args->min_agbno;
997 if (args->agbno > args->max_agbno)
998 args->agbno = args->max_agbno;
999
a2ceac1f
DC
1000restart:
1001 bno_cur_lt = NULL;
1002 bno_cur_gt = NULL;
1003 ltlen = 0;
1004 gtlena = 0;
1005 ltlena = 0;
1006
2bd0ea18
NS
1007 /*
1008 * Get a cursor for the by-size btree.
1009 */
b194c7d8
BN
1010 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1011 args->agno, XFS_BTNUM_CNT);
a2ceac1f 1012
2bd0ea18
NS
1013 /*
1014 * See if there are any free extents as big as maxlen.
1015 */
0e266570 1016 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i)))
2bd0ea18
NS
1017 goto error0;
1018 /*
1019 * If none, then pick up the last entry in the tree unless the
1020 * tree is empty.
5000d01d 1021 */
2bd0ea18 1022 if (!i) {
0e266570
NS
1023 if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &ltbno,
1024 &ltlen, &i)))
2bd0ea18
NS
1025 goto error0;
1026 if (i == 0 || ltlen == 0) {
1027 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
a2ceac1f 1028 trace_xfs_alloc_near_noentry(args);
2bd0ea18
NS
1029 return 0;
1030 }
1031 ASSERT(i == 1);
1032 }
1033 args->wasfromfl = 0;
a2ceac1f 1034
5000d01d 1035 /*
2bd0ea18
NS
1036 * First algorithm.
1037 * If the requested extent is large wrt the freespaces available
1038 * in this a.g., then the cursor will be pointing to a btree entry
1039 * near the right edge of the tree. If it's in the last btree leaf
1040 * block, then we just examine all the entries in that block
1041 * that are big enough, and pick the best one.
1042 * This is written as a while loop so we can break out of it,
1043 * but we never loop back to the top.
1044 */
1045 while (xfs_btree_islastblock(cnt_cur, 0)) {
1046 xfs_extlen_t bdiff;
0e266570
NS
1047 int besti=0;
1048 xfs_extlen_t blen=0;
1049 xfs_agblock_t bnew=0;
2bd0ea18 1050
6beba453
DC
1051#ifdef DEBUG
1052 if (dofirst)
2bd0ea18
NS
1053 break;
1054#endif
1055 /*
1056 * Start from the entry that lookup found, sequence through
1057 * all larger free blocks. If we're actually pointing at a
1058 * record smaller than maxlen, go to the start of this block,
1059 * and skip all those smaller than minlen.
1060 */
1061 if (ltlen || args->alignment > 1) {
1062 cnt_cur->bc_ptrs[0] = 1;
1063 do {
0e266570
NS
1064 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno,
1065 &ltlen, &i)))
2bd0ea18 1066 goto error0;
19ebedcf 1067 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
2bd0ea18
NS
1068 if (ltlen >= args->minlen)
1069 break;
b194c7d8 1070 if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
2bd0ea18
NS
1071 goto error0;
1072 } while (i);
1073 ASSERT(ltlen >= args->minlen);
1074 if (!i)
1075 break;
1076 }
1077 i = cnt_cur->bc_ptrs[0];
1078 for (j = 1, blen = 0, bdiff = 0;
1079 !error && j && (blen < args->maxlen || bdiff > 0);
b194c7d8 1080 error = xfs_btree_increment(cnt_cur, 0, &j)) {
2bd0ea18
NS
1081 /*
1082 * For each entry, decide if it's better than
1083 * the previous best entry.
1084 */
0e266570 1085 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
2bd0ea18 1086 goto error0;
19ebedcf 1087 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
a2ceac1f
DC
1088 xfs_alloc_compute_aligned(args, ltbno, ltlen,
1089 &ltbnoa, &ltlena);
5e656dbb 1090 if (ltlena < args->minlen)
2bd0ea18 1091 continue;
ff3263dd
BF
1092 if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno)
1093 continue;
2bd0ea18
NS
1094 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1095 xfs_alloc_fix_len(args);
1096 ASSERT(args->len >= args->minlen);
1097 if (args->len < blen)
1098 continue;
1099 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
84a62eea
DC
1100 args->alignment, args->userdata, ltbnoa,
1101 ltlena, &ltnew);
2bd0ea18
NS
1102 if (ltnew != NULLAGBLOCK &&
1103 (args->len > blen || ltdiff < bdiff)) {
1104 bdiff = ltdiff;
1105 bnew = ltnew;
1106 blen = args->len;
1107 besti = cnt_cur->bc_ptrs[0];
1108 }
1109 }
1110 /*
1111 * It didn't work. We COULD be in a case where
1112 * there's a good record somewhere, so try again.
1113 */
1114 if (blen == 0)
1115 break;
1116 /*
1117 * Point at the best entry, and retrieve it again.
1118 */
1119 cnt_cur->bc_ptrs[0] = besti;
0e266570 1120 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
2bd0ea18 1121 goto error0;
19ebedcf 1122 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
56b2de80 1123 ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
2bd0ea18
NS
1124 args->len = blen;
1125 if (!xfs_alloc_fix_minleft(args)) {
1126 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
56b2de80 1127 trace_xfs_alloc_near_nominleft(args);
2bd0ea18
NS
1128 return 0;
1129 }
1130 blen = args->len;
1131 /*
1132 * We are allocating starting at bnew for blen blocks.
1133 */
1134 args->agbno = bnew;
1135 ASSERT(bnew >= ltbno);
56b2de80 1136 ASSERT(bnew + blen <= ltbno + ltlen);
2bd0ea18
NS
1137 /*
1138 * Set up a cursor for the by-bno tree.
1139 */
b194c7d8
BN
1140 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp,
1141 args->agbp, args->agno, XFS_BTNUM_BNO);
2bd0ea18
NS
1142 /*
1143 * Fix up the btree entries.
1144 */
0e266570
NS
1145 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno,
1146 ltlen, bnew, blen, XFSA_FIXUP_CNT_OK)))
2bd0ea18
NS
1147 goto error0;
1148 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1149 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
56b2de80
DC
1150
1151 trace_xfs_alloc_near_first(args);
2bd0ea18
NS
1152 return 0;
1153 }
1154 /*
1155 * Second algorithm.
1156 * Search in the by-bno tree to the left and to the right
1157 * simultaneously, until in each case we find a space big enough,
1158 * or run into the edge of the tree. When we run into the edge,
1159 * we deallocate that cursor.
1160 * If both searches succeed, we compare the two spaces and pick
1161 * the better one.
1162 * With alignment, it's possible for both to fail; the upper
1163 * level algorithm that picks allocation groups for allocations
1164 * is not supposed to do this.
1165 */
1166 /*
1167 * Allocate and initialize the cursor for the leftward search.
1168 */
b194c7d8
BN
1169 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1170 args->agno, XFS_BTNUM_BNO);
2bd0ea18
NS
1171 /*
1172 * Lookup <= bno to find the leftward search's starting point.
1173 */
0e266570 1174 if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i)))
2bd0ea18
NS
1175 goto error0;
1176 if (!i) {
1177 /*
1178 * Didn't find anything; use this cursor for the rightward
1179 * search.
1180 */
1181 bno_cur_gt = bno_cur_lt;
062998e3 1182 bno_cur_lt = NULL;
2bd0ea18
NS
1183 }
1184 /*
1185 * Found something. Duplicate the cursor for the rightward search.
1186 */
0e266570 1187 else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt)))
2bd0ea18
NS
1188 goto error0;
1189 /*
1190 * Increment the cursor, so we will point at the entry just right
1191 * of the leftward entry if any, or to the leftmost entry.
1192 */
b194c7d8 1193 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
2bd0ea18
NS
1194 goto error0;
1195 if (!i) {
1196 /*
1197 * It failed, there are no rightward entries.
1198 */
1199 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR);
1200 bno_cur_gt = NULL;
1201 }
1202 /*
1203 * Loop going left with the leftward cursor, right with the
1204 * rightward cursor, until either both directions give up or
1205 * we find an entry at least as big as minlen.
1206 */
1207 do {
1208 if (bno_cur_lt) {
0e266570 1209 if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
2bd0ea18 1210 goto error0;
19ebedcf 1211 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
a2ceac1f
DC
1212 xfs_alloc_compute_aligned(args, ltbno, ltlen,
1213 &ltbnoa, &ltlena);
ff3263dd 1214 if (ltlena >= args->minlen && ltbnoa >= args->min_agbno)
2bd0ea18 1215 break;
b194c7d8 1216 if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
2bd0ea18 1217 goto error0;
ff3263dd 1218 if (!i || ltbnoa < args->min_agbno) {
2bd0ea18
NS
1219 xfs_btree_del_cursor(bno_cur_lt,
1220 XFS_BTREE_NOERROR);
1221 bno_cur_lt = NULL;
1222 }
1223 }
1224 if (bno_cur_gt) {
0e266570 1225 if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
2bd0ea18 1226 goto error0;
19ebedcf 1227 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
a2ceac1f
DC
1228 xfs_alloc_compute_aligned(args, gtbno, gtlen,
1229 &gtbnoa, &gtlena);
ff3263dd 1230 if (gtlena >= args->minlen && gtbnoa <= args->max_agbno)
2bd0ea18 1231 break;
b194c7d8 1232 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
2bd0ea18 1233 goto error0;
ff3263dd 1234 if (!i || gtbnoa > args->max_agbno) {
2bd0ea18
NS
1235 xfs_btree_del_cursor(bno_cur_gt,
1236 XFS_BTREE_NOERROR);
1237 bno_cur_gt = NULL;
1238 }
1239 }
1240 } while (bno_cur_lt || bno_cur_gt);
56b2de80 1241
2bd0ea18
NS
1242 /*
1243 * Got both cursors still active, need to find better entry.
1244 */
1245 if (bno_cur_lt && bno_cur_gt) {
2bd0ea18
NS
1246 if (ltlena >= args->minlen) {
1247 /*
56b2de80 1248 * Left side is good, look for a right side entry.
2bd0ea18
NS
1249 */
1250 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1251 xfs_alloc_fix_len(args);
56b2de80 1252 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
84a62eea
DC
1253 args->alignment, args->userdata, ltbnoa,
1254 ltlena, &ltnew);
56b2de80
DC
1255
1256 error = xfs_alloc_find_best_extent(args,
1257 &bno_cur_lt, &bno_cur_gt,
a2ceac1f
DC
1258 ltdiff, &gtbno, &gtlen,
1259 &gtbnoa, &gtlena,
56b2de80
DC
1260 0 /* search right */);
1261 } else {
1262 ASSERT(gtlena >= args->minlen);
1263
2bd0ea18 1264 /*
56b2de80 1265 * Right side is good, look for a left side entry.
2bd0ea18
NS
1266 */
1267 args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
1268 xfs_alloc_fix_len(args);
56b2de80 1269 gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
84a62eea
DC
1270 args->alignment, args->userdata, gtbnoa,
1271 gtlena, &gtnew);
56b2de80
DC
1272
1273 error = xfs_alloc_find_best_extent(args,
1274 &bno_cur_gt, &bno_cur_lt,
a2ceac1f
DC
1275 gtdiff, &ltbno, &ltlen,
1276 &ltbnoa, &ltlena,
56b2de80 1277 1 /* search left */);
2bd0ea18 1278 }
56b2de80
DC
1279
1280 if (error)
1281 goto error0;
2bd0ea18 1282 }
56b2de80 1283
2bd0ea18
NS
1284 /*
1285 * If we couldn't get anything, give up.
1286 */
1287 if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
a2ceac1f
DC
1288 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1289
1290 if (!forced++) {
1291 trace_xfs_alloc_near_busy(args);
1292 xfs_log_force(args->mp, XFS_LOG_SYNC);
1293 goto restart;
1294 }
56b2de80 1295 trace_xfs_alloc_size_neither(args);
2bd0ea18
NS
1296 args->agbno = NULLAGBLOCK;
1297 return 0;
1298 }
56b2de80 1299
2bd0ea18
NS
1300 /*
1301 * At this point we have selected a freespace entry, either to the
1302 * left or to the right. If it's on the right, copy all the
1303 * useful variables to the "left" set so we only have one
1304 * copy of this code.
1305 */
1306 if (bno_cur_gt) {
1307 bno_cur_lt = bno_cur_gt;
1308 bno_cur_gt = NULL;
1309 ltbno = gtbno;
1310 ltbnoa = gtbnoa;
1311 ltlen = gtlen;
1312 ltlena = gtlena;
1313 j = 1;
1314 } else
1315 j = 0;
56b2de80 1316
2bd0ea18
NS
1317 /*
1318 * Fix up the length and compute the useful address.
1319 */
2bd0ea18
NS
1320 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1321 xfs_alloc_fix_len(args);
1322 if (!xfs_alloc_fix_minleft(args)) {
56b2de80 1323 trace_xfs_alloc_near_nominleft(args);
2bd0ea18
NS
1324 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1325 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1326 return 0;
1327 }
1328 rlen = args->len;
a2ceac1f 1329 (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
84a62eea 1330 args->userdata, ltbnoa, ltlena, &ltnew);
2bd0ea18 1331 ASSERT(ltnew >= ltbno);
a2ceac1f 1332 ASSERT(ltnew + rlen <= ltbnoa + ltlena);
6e3140c7 1333 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
ff3263dd 1334 ASSERT(ltnew >= args->min_agbno && ltnew <= args->max_agbno);
2bd0ea18 1335 args->agbno = ltnew;
a2ceac1f 1336
0e266570
NS
1337 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1338 ltnew, rlen, XFSA_FIXUP_BNO_OK)))
2bd0ea18 1339 goto error0;
56b2de80
DC
1340
1341 if (j)
1342 trace_xfs_alloc_near_greater(args);
1343 else
1344 trace_xfs_alloc_near_lesser(args);
1345
2bd0ea18
NS
1346 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1347 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1348 return 0;
1349
1350 error0:
56b2de80 1351 trace_xfs_alloc_near_error(args);
2bd0ea18
NS
1352 if (cnt_cur != NULL)
1353 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1354 if (bno_cur_lt != NULL)
1355 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR);
1356 if (bno_cur_gt != NULL)
1357 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR);
1358 return error;
1359}
1360
1361/*
1362 * Allocate a variable extent anywhere in the allocation group agno.
1363 * Extent's length (returned in len) will be between minlen and maxlen,
1364 * and of the form k * prod + mod unless there's nothing that large.
1365 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1366 */
1367STATIC int /* error */
1368xfs_alloc_ag_vextent_size(
dfc130f3 1369 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18 1370{
dfc130f3
RC
1371 xfs_btree_cur_t *bno_cur; /* cursor for bno btree */
1372 xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */
2bd0ea18
NS
1373 int error; /* error result */
1374 xfs_agblock_t fbno; /* start of found freespace */
1375 xfs_extlen_t flen; /* length of found freespace */
2bd0ea18
NS
1376 int i; /* temp status variable */
1377 xfs_agblock_t rbno; /* returned block number */
1378 xfs_extlen_t rlen; /* length of returned extent */
a2ceac1f 1379 int forced = 0;
2bd0ea18 1380
a2ceac1f 1381restart:
2bd0ea18
NS
1382 /*
1383 * Allocate and initialize a cursor for the by-size btree.
1384 */
b194c7d8
BN
1385 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1386 args->agno, XFS_BTNUM_CNT);
2bd0ea18 1387 bno_cur = NULL;
a2ceac1f 1388
2bd0ea18
NS
1389 /*
1390 * Look for an entry >= maxlen+alignment-1 blocks.
1391 */
0e266570
NS
1392 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1393 args->maxlen + args->alignment - 1, &i)))
2bd0ea18 1394 goto error0;
a2ceac1f 1395
2bd0ea18 1396 /*
a2ceac1f
DC
1397 * If none or we have busy extents that we cannot allocate from, then
1398 * we have to settle for a smaller extent. In the case that there are
1399 * no large extents, this will return the last entry in the tree unless
1400 * the tree is empty. In the case that there are only busy large
1401 * extents, this will return the largest small extent unless there
1402 * are no smaller extents available.
5000d01d 1403 */
a2ceac1f
DC
1404 if (!i || forced > 1) {
1405 error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1406 &fbno, &flen, &i);
1407 if (error)
2bd0ea18
NS
1408 goto error0;
1409 if (i == 0 || flen == 0) {
1410 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
56b2de80 1411 trace_xfs_alloc_size_noentry(args);
2bd0ea18
NS
1412 return 0;
1413 }
1414 ASSERT(i == 1);
a2ceac1f
DC
1415 xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
1416 } else {
1417 /*
1418 * Search for a non-busy extent that is large enough.
1419 * If we are at low space, don't check, or if we fall of
1420 * the end of the btree, turn off the busy check and
1421 * restart.
1422 */
1423 for (;;) {
1424 error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1425 if (error)
1426 goto error0;
19ebedcf 1427 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
a2ceac1f
DC
1428
1429 xfs_alloc_compute_aligned(args, fbno, flen,
1430 &rbno, &rlen);
1431
1432 if (rlen >= args->maxlen)
1433 break;
1434
1435 error = xfs_btree_increment(cnt_cur, 0, &i);
1436 if (error)
1437 goto error0;
1438 if (i == 0) {
1439 /*
1440 * Our only valid extents must have been busy.
1441 * Make it unbusy by forcing the log out and
1442 * retrying. If we've been here before, forcing
1443 * the log isn't making the extents available,
1444 * which means they have probably been freed in
1445 * this transaction. In that case, we have to
1446 * give up on them and we'll attempt a minlen
1447 * allocation the next time around.
1448 */
1449 xfs_btree_del_cursor(cnt_cur,
1450 XFS_BTREE_NOERROR);
1451 trace_xfs_alloc_size_busy(args);
1452 if (!forced++)
1453 xfs_log_force(args->mp, XFS_LOG_SYNC);
1454 goto restart;
1455 }
1456 }
2bd0ea18 1457 }
a2ceac1f 1458
2bd0ea18
NS
1459 /*
1460 * In the first case above, we got the last entry in the
1461 * by-size btree. Now we check to see if the space hits maxlen
1462 * once aligned; if not, we search left for something better.
1463 * This can't happen in the second case above.
1464 */
2bd0ea18 1465 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
19ebedcf 1466 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
2bd0ea18
NS
1467 (rlen <= flen && rbno + rlen <= fbno + flen), error0);
1468 if (rlen < args->maxlen) {
1469 xfs_agblock_t bestfbno;
1470 xfs_extlen_t bestflen;
1471 xfs_agblock_t bestrbno;
1472 xfs_extlen_t bestrlen;
1473
1474 bestrlen = rlen;
1475 bestrbno = rbno;
1476 bestflen = flen;
1477 bestfbno = fbno;
1478 for (;;) {
b194c7d8 1479 if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
2bd0ea18
NS
1480 goto error0;
1481 if (i == 0)
1482 break;
0e266570
NS
1483 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1484 &i)))
2bd0ea18 1485 goto error0;
19ebedcf 1486 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
2bd0ea18
NS
1487 if (flen < bestrlen)
1488 break;
a2ceac1f
DC
1489 xfs_alloc_compute_aligned(args, fbno, flen,
1490 &rbno, &rlen);
2bd0ea18 1491 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
19ebedcf 1492 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
2bd0ea18
NS
1493 (rlen <= flen && rbno + rlen <= fbno + flen),
1494 error0);
1495 if (rlen > bestrlen) {
1496 bestrlen = rlen;
1497 bestrbno = rbno;
1498 bestflen = flen;
1499 bestfbno = fbno;
1500 if (rlen == args->maxlen)
1501 break;
1502 }
5000d01d 1503 }
0e266570
NS
1504 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1505 &i)))
2bd0ea18 1506 goto error0;
19ebedcf 1507 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
2bd0ea18
NS
1508 rlen = bestrlen;
1509 rbno = bestrbno;
1510 flen = bestflen;
1511 fbno = bestfbno;
1512 }
1513 args->wasfromfl = 0;
1514 /*
1515 * Fix up the length.
1516 */
1517 args->len = rlen;
a2ceac1f
DC
1518 if (rlen < args->minlen) {
1519 if (!forced++) {
1520 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1521 trace_xfs_alloc_size_busy(args);
1522 xfs_log_force(args->mp, XFS_LOG_SYNC);
1523 goto restart;
1524 }
1525 goto out_nominleft;
2bd0ea18 1526 }
a2ceac1f
DC
1527 xfs_alloc_fix_len(args);
1528
1529 if (!xfs_alloc_fix_minleft(args))
1530 goto out_nominleft;
2bd0ea18 1531 rlen = args->len;
19ebedcf 1532 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0);
2bd0ea18
NS
1533 /*
1534 * Allocate and initialize a cursor for the by-block tree.
1535 */
b194c7d8
BN
1536 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1537 args->agno, XFS_BTNUM_BNO);
0e266570
NS
1538 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1539 rbno, rlen, XFSA_FIXUP_CNT_OK)))
2bd0ea18
NS
1540 goto error0;
1541 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1542 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1543 cnt_cur = bno_cur = NULL;
1544 args->len = rlen;
1545 args->agbno = rbno;
19ebedcf 1546 XFS_WANT_CORRUPTED_GOTO(args->mp,
2bd0ea18 1547 args->agbno + args->len <=
6e3140c7 1548 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
2bd0ea18 1549 error0);
56b2de80 1550 trace_xfs_alloc_size_done(args);
2bd0ea18
NS
1551 return 0;
1552
1553error0:
56b2de80 1554 trace_xfs_alloc_size_error(args);
2bd0ea18
NS
1555 if (cnt_cur)
1556 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1557 if (bno_cur)
1558 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1559 return error;
a2ceac1f
DC
1560
1561out_nominleft:
1562 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1563 trace_xfs_alloc_size_nominleft(args);
1564 args->agbno = NULLAGBLOCK;
1565 return 0;
2bd0ea18
NS
1566}
1567
1568/*
1569 * Deal with the case where only small freespaces remain.
1570 * Either return the contents of the last freespace record,
1571 * or allocate space from the freelist if there is nothing in the tree.
1572 */
1573STATIC int /* error */
1574xfs_alloc_ag_vextent_small(
dfc130f3
RC
1575 xfs_alloc_arg_t *args, /* allocation argument structure */
1576 xfs_btree_cur_t *ccur, /* by-size cursor */
1577 xfs_agblock_t *fbnop, /* result block number */
1578 xfs_extlen_t *flenp, /* result length */
2bd0ea18
NS
1579 int *stat) /* status: 0-freelist, 1-normal/none */
1580{
1581 int error;
1582 xfs_agblock_t fbno;
1583 xfs_extlen_t flen;
2bd0ea18
NS
1584 int i;
1585
b194c7d8 1586 if ((error = xfs_btree_decrement(ccur, 0, &i)))
2bd0ea18
NS
1587 goto error0;
1588 if (i) {
0e266570 1589 if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
2bd0ea18 1590 goto error0;
19ebedcf 1591 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
2bd0ea18
NS
1592 }
1593 /*
1594 * Nothing in the btree, try the freelist. Make sure
1595 * to respect minleft even when pulling from the
1596 * freelist.
1597 */
1598 else if (args->minlen == 1 && args->alignment == 1 && !args->isfl &&
6e3140c7
NS
1599 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
1600 > args->minleft)) {
cdded3d8
DC
1601 error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
1602 if (error)
2bd0ea18
NS
1603 goto error0;
1604 if (fbno != NULLAGBLOCK) {
a2ceac1f
DC
1605 xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
1606 args->userdata);
1607
2bd0ea18
NS
1608 if (args->userdata) {
1609 xfs_buf_t *bp;
1610
1611 bp = xfs_btree_get_bufs(args->mp, args->tp,
1612 args->agno, fbno, 0);
1613 xfs_trans_binval(args->tp, bp);
2bd0ea18
NS
1614 }
1615 args->len = 1;
1616 args->agbno = fbno;
19ebedcf 1617 XFS_WANT_CORRUPTED_GOTO(args->mp,
2bd0ea18 1618 args->agbno + args->len <=
6e3140c7 1619 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
2bd0ea18
NS
1620 error0);
1621 args->wasfromfl = 1;
56b2de80 1622 trace_xfs_alloc_small_freelist(args);
2bd0ea18
NS
1623 *stat = 0;
1624 return 0;
1625 }
1626 /*
1627 * Nothing in the freelist.
1628 */
1629 else
1630 flen = 0;
1631 }
1632 /*
1633 * Can't allocate from the freelist for some reason.
1634 */
5e656dbb
BN
1635 else {
1636 fbno = NULLAGBLOCK;
2bd0ea18 1637 flen = 0;
5e656dbb 1638 }
2bd0ea18
NS
1639 /*
1640 * Can't do the allocation, give up.
1641 */
1642 if (flen < args->minlen) {
1643 args->agbno = NULLAGBLOCK;
56b2de80 1644 trace_xfs_alloc_small_notenough(args);
2bd0ea18
NS
1645 flen = 0;
1646 }
1647 *fbnop = fbno;
1648 *flenp = flen;
1649 *stat = 1;
56b2de80 1650 trace_xfs_alloc_small_done(args);
2bd0ea18
NS
1651 return 0;
1652
1653error0:
56b2de80 1654 trace_xfs_alloc_small_error(args);
2bd0ea18
NS
1655 return error;
1656}
1657
1658/*
1659 * Free the extent starting at agno/bno for length.
1660 */
85aec44f 1661STATIC int
2bd0ea18 1662xfs_free_ag_extent(
85aec44f
DW
1663 xfs_trans_t *tp,
1664 xfs_buf_t *agbp,
1665 xfs_agnumber_t agno,
1666 xfs_agblock_t bno,
1667 xfs_extlen_t len,
1668 struct xfs_owner_info *oinfo,
1669 int isfl)
2bd0ea18 1670{
dfc130f3
RC
1671 xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */
1672 xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */
2bd0ea18 1673 int error; /* error return value */
2bd0ea18
NS
1674 xfs_agblock_t gtbno; /* start of right neighbor block */
1675 xfs_extlen_t gtlen; /* length of right neighbor block */
1676 int haveleft; /* have a left neighbor block */
1677 int haveright; /* have a right neighbor block */
1678 int i; /* temp, result code */
1679 xfs_agblock_t ltbno; /* start of left neighbor block */
1680 xfs_extlen_t ltlen; /* length of left neighbor block */
1681 xfs_mount_t *mp; /* mount point struct for filesystem */
1682 xfs_agblock_t nbno; /* new starting block of freespace */
1683 xfs_extlen_t nlen; /* new length of freespace */
a2ceac1f 1684 xfs_perag_t *pag; /* per allocation group data */
2bd0ea18 1685
631ac87a 1686 bno_cur = cnt_cur = NULL;
2bd0ea18 1687 mp = tp->t_mountp;
631ac87a
DW
1688
1689 if (oinfo->oi_owner != XFS_RMAP_OWN_UNKNOWN) {
1690 error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo);
1691 if (error)
1692 goto error0;
1693 }
1694
5000d01d 1695 /*
2bd0ea18
NS
1696 * Allocate and initialize a cursor for the by-block btree.
1697 */
b194c7d8 1698 bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
5000d01d 1699 /*
2bd0ea18
NS
1700 * Look for a neighboring block on the left (lower block numbers)
1701 * that is contiguous with this space.
1702 */
0e266570 1703 if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
2bd0ea18
NS
1704 goto error0;
1705 if (haveleft) {
1706 /*
1707 * There is a block to our left.
1708 */
0e266570 1709 if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
2bd0ea18 1710 goto error0;
19ebedcf 1711 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1712 /*
1713 * It's not contiguous, though.
1714 */
1715 if (ltbno + ltlen < bno)
1716 haveleft = 0;
1717 else {
1718 /*
1719 * If this failure happens the request to free this
1720 * space was invalid, it's (partly) already free.
1721 * Very bad.
1722 */
19ebedcf
DC
1723 XFS_WANT_CORRUPTED_GOTO(mp,
1724 ltbno + ltlen <= bno, error0);
2bd0ea18
NS
1725 }
1726 }
5000d01d 1727 /*
2bd0ea18
NS
1728 * Look for a neighboring block on the right (higher block numbers)
1729 * that is contiguous with this space.
1730 */
b194c7d8 1731 if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
2bd0ea18
NS
1732 goto error0;
1733 if (haveright) {
1734 /*
1735 * There is a block to our right.
1736 */
0e266570 1737 if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
2bd0ea18 1738 goto error0;
19ebedcf 1739 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1740 /*
1741 * It's not contiguous, though.
1742 */
1743 if (bno + len < gtbno)
1744 haveright = 0;
1745 else {
1746 /*
1747 * If this failure happens the request to free this
1748 * space was invalid, it's (partly) already free.
1749 * Very bad.
1750 */
19ebedcf 1751 XFS_WANT_CORRUPTED_GOTO(mp, gtbno >= bno + len, error0);
2bd0ea18
NS
1752 }
1753 }
1754 /*
1755 * Now allocate and initialize a cursor for the by-size tree.
1756 */
b194c7d8 1757 cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
2bd0ea18
NS
1758 /*
1759 * Have both left and right contiguous neighbors.
1760 * Merge all three into a single free block.
1761 */
1762 if (haveleft && haveright) {
1763 /*
1764 * Delete the old by-size entry on the left.
1765 */
0e266570 1766 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
2bd0ea18 1767 goto error0;
19ebedcf 1768 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
b194c7d8 1769 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18 1770 goto error0;
19ebedcf 1771 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1772 /*
1773 * Delete the old by-size entry on the right.
1774 */
0e266570 1775 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2bd0ea18 1776 goto error0;
19ebedcf 1777 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
b194c7d8 1778 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18 1779 goto error0;
19ebedcf 1780 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1781 /*
1782 * Delete the old by-block entry for the right block.
1783 */
b194c7d8 1784 if ((error = xfs_btree_delete(bno_cur, &i)))
2bd0ea18 1785 goto error0;
19ebedcf 1786 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1787 /*
1788 * Move the by-block cursor back to the left neighbor.
1789 */
b194c7d8 1790 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
2bd0ea18 1791 goto error0;
19ebedcf 1792 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1793#ifdef DEBUG
1794 /*
1795 * Check that this is the right record: delete didn't
1796 * mangle the cursor.
1797 */
1798 {
1799 xfs_agblock_t xxbno;
1800 xfs_extlen_t xxlen;
1801
0e266570
NS
1802 if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
1803 &i)))
2bd0ea18 1804 goto error0;
19ebedcf 1805 XFS_WANT_CORRUPTED_GOTO(mp,
2bd0ea18
NS
1806 i == 1 && xxbno == ltbno && xxlen == ltlen,
1807 error0);
1808 }
1809#endif
1810 /*
1811 * Update remaining by-block entry to the new, joined block.
1812 */
1813 nbno = ltbno;
1814 nlen = len + ltlen + gtlen;
0e266570 1815 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2bd0ea18
NS
1816 goto error0;
1817 }
1818 /*
1819 * Have only a left contiguous neighbor.
1820 * Merge it together with the new freespace.
1821 */
1822 else if (haveleft) {
1823 /*
1824 * Delete the old by-size entry on the left.
1825 */
0e266570 1826 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
2bd0ea18 1827 goto error0;
19ebedcf 1828 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
b194c7d8 1829 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18 1830 goto error0;
19ebedcf 1831 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1832 /*
1833 * Back up the by-block cursor to the left neighbor, and
1834 * update its length.
1835 */
b194c7d8 1836 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
2bd0ea18 1837 goto error0;
19ebedcf 1838 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1839 nbno = ltbno;
1840 nlen = len + ltlen;
0e266570 1841 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2bd0ea18
NS
1842 goto error0;
1843 }
1844 /*
1845 * Have only a right contiguous neighbor.
1846 * Merge it together with the new freespace.
1847 */
1848 else if (haveright) {
1849 /*
1850 * Delete the old by-size entry on the right.
1851 */
0e266570 1852 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2bd0ea18 1853 goto error0;
19ebedcf 1854 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
b194c7d8 1855 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18 1856 goto error0;
19ebedcf 1857 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18 1858 /*
5000d01d 1859 * Update the starting block and length of the right
2bd0ea18
NS
1860 * neighbor in the by-block tree.
1861 */
1862 nbno = bno;
1863 nlen = len + gtlen;
0e266570 1864 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2bd0ea18
NS
1865 goto error0;
1866 }
1867 /*
1868 * No contiguous neighbors.
1869 * Insert the new freespace into the by-block tree.
1870 */
1871 else {
1872 nbno = bno;
1873 nlen = len;
b194c7d8 1874 if ((error = xfs_btree_insert(bno_cur, &i)))
2bd0ea18 1875 goto error0;
19ebedcf 1876 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1877 }
1878 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1879 bno_cur = NULL;
1880 /*
1881 * In all cases we need to insert the new freespace in the by-size tree.
1882 */
0e266570 1883 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
2bd0ea18 1884 goto error0;
19ebedcf 1885 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, error0);
b194c7d8 1886 if ((error = xfs_btree_insert(cnt_cur, &i)))
2bd0ea18 1887 goto error0;
19ebedcf 1888 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
2bd0ea18
NS
1889 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1890 cnt_cur = NULL;
a2ceac1f 1891
2bd0ea18
NS
1892 /*
1893 * Update the freespace totals in the ag and superblock.
1894 */
a2ceac1f
DC
1895 pag = xfs_perag_get(mp, agno);
1896 error = xfs_alloc_update_counters(tp, pag, agbp, len);
1897 xfs_perag_put(pag);
1898 if (error)
1899 goto error0;
1900
1901 if (!isfl)
1902 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
79896434
BD
1903 XFS_STATS_INC(mp, xs_freex);
1904 XFS_STATS_ADD(mp, xs_freeb, len);
56b2de80
DC
1905
1906 trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
3e535bba 1907
2bd0ea18
NS
1908 return 0;
1909
1910 error0:
56b2de80 1911 trace_xfs_free_extent(mp, agno, bno, len, isfl, -1, -1);
2bd0ea18
NS
1912 if (bno_cur)
1913 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1914 if (cnt_cur)
1915 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1916 return error;
1917}
1918
5000d01d 1919/*
2bd0ea18
NS
1920 * Visible (exported) allocation/free functions.
1921 * Some of these are used just by xfs_alloc_btree.c and this file.
1922 */
1923
1924/*
1925 * Compute and fill in value of m_ag_maxlevels.
1926 */
1927void
1928xfs_alloc_compute_maxlevels(
1929 xfs_mount_t *mp) /* file system mount structure */
1930{
730e2a19
DW
1931 mp->m_ag_maxlevels = xfs_btree_compute_maxlevels(mp, mp->m_alloc_mnr,
1932 (mp->m_sb.sb_agblocks + 1) / 2);
2bd0ea18
NS
1933}
1934
56b2de80
DC
1935/*
1936 * Find the length of the longest extent in an AG.
1937 */
1938xfs_extlen_t
1939xfs_alloc_longest_free_extent(
1940 struct xfs_mount *mp,
72bda06d
DC
1941 struct xfs_perag *pag,
1942 xfs_extlen_t need)
56b2de80 1943{
72bda06d 1944 xfs_extlen_t delta = 0;
56b2de80 1945
56b2de80
DC
1946 if (need > pag->pagf_flcount)
1947 delta = need - pag->pagf_flcount;
1948
1949 if (pag->pagf_longest > delta)
1950 return pag->pagf_longest - delta;
1951 return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
1952}
1953
de046644
DC
1954unsigned int
1955xfs_alloc_min_freelist(
1956 struct xfs_mount *mp,
1957 struct xfs_perag *pag)
1958{
1959 unsigned int min_free;
1960
1961 /* space needed by-bno freespace btree */
1962 min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1,
1963 mp->m_ag_maxlevels);
1964 /* space needed by-size freespace btree */
1965 min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1,
1966 mp->m_ag_maxlevels);
b8a8d6e5
DW
1967 /* space needed reverse mapping used space btree */
1968 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
1969 min_free += min_t(unsigned int,
1970 pag->pagf_levels[XFS_BTNUM_RMAPi] + 1,
1971 mp->m_rmap_maxlevels);
de046644
DC
1972
1973 return min_free;
1974}
1975
5515b7c1
DC
1976/*
1977 * Check if the operation we are fixing up the freelist for should go ahead or
1978 * not. If we are freeing blocks, we always allow it, otherwise the allocation
1979 * is dependent on whether the size and shape of free space available will
1980 * permit the requested allocation to take place.
1981 */
1982static bool
1983xfs_alloc_space_available(
1984 struct xfs_alloc_arg *args,
1985 xfs_extlen_t min_free,
1986 int flags)
1987{
1988 struct xfs_perag *pag = args->pag;
1989 xfs_extlen_t longest;
1990 int available;
1991
1992 if (flags & XFS_ALLOC_FLAG_FREEING)
1993 return true;
1994
1995 /* do we have enough contiguous free space for the allocation? */
1996 longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free);
1997 if ((args->minlen + args->alignment + args->minalignslop - 1) > longest)
1998 return false;
1999
2000 /* do have enough free space remaining for the allocation? */
2001 available = (int)(pag->pagf_freeblks + pag->pagf_flcount -
2002 min_free - args->total);
2003 if (available < (int)args->minleft)
2004 return false;
2005
2006 return true;
2007}
2008
2bd0ea18
NS
2009/*
2010 * Decide whether to use this allocation group for this allocation.
2011 * If so, fix up the btree freelist's size.
2bd0ea18 2012 */
ff105f75 2013int /* error */
2bd0ea18 2014xfs_alloc_fix_freelist(
c98e644e
DC
2015 struct xfs_alloc_arg *args, /* allocation argument structure */
2016 int flags) /* XFS_ALLOC_FLAG_... */
2bd0ea18 2017{
c98e644e
DC
2018 struct xfs_mount *mp = args->mp;
2019 struct xfs_perag *pag = args->pag;
2020 struct xfs_trans *tp = args->tp;
2021 struct xfs_buf *agbp = NULL;
2022 struct xfs_buf *agflbp = NULL;
2023 struct xfs_alloc_arg targs; /* local allocation arguments */
2024 xfs_agblock_t bno; /* freelist block */
2025 xfs_extlen_t need; /* total blocks needed in freelist */
fcdd428c 2026 int error = 0;
c98e644e 2027
2bd0ea18 2028 if (!pag->pagf_init) {
c98e644e
DC
2029 error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
2030 if (error)
2031 goto out_no_agbp;
2bd0ea18 2032 if (!pag->pagf_init) {
5e656dbb
BN
2033 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
2034 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
c98e644e 2035 goto out_agbp_relse;
2bd0ea18 2036 }
c98e644e 2037 }
34317449 2038
5e656dbb 2039 /*
c98e644e
DC
2040 * If this is a metadata preferred pag and we are user data then try
2041 * somewhere else if we are not being asked to try harder at this
2042 * point
34317449 2043 */
5e656dbb
BN
2044 if (pag->pagf_metadata && args->userdata &&
2045 (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
2046 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
c98e644e 2047 goto out_agbp_relse;
34317449
NS
2048 }
2049
de046644 2050 need = xfs_alloc_min_freelist(mp, pag);
c98e644e
DC
2051 if (!xfs_alloc_space_available(args, need, flags))
2052 goto out_agbp_relse;
5e656dbb 2053
2bd0ea18
NS
2054 /*
2055 * Get the a.g. freespace buffer.
2056 * Can fail if we're not blocking on locks, and it's held.
2057 */
c98e644e
DC
2058 if (!agbp) {
2059 error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
2060 if (error)
2061 goto out_no_agbp;
2062 if (!agbp) {
5e656dbb
BN
2063 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
2064 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
c98e644e 2065 goto out_no_agbp;
2bd0ea18
NS
2066 }
2067 }
72bda06d 2068
72bda06d 2069 /* If there isn't enough total space or single-extent, reject it. */
de046644 2070 need = xfs_alloc_min_freelist(mp, pag);
c98e644e
DC
2071 if (!xfs_alloc_space_available(args, need, flags))
2072 goto out_agbp_relse;
5515b7c1 2073
2bd0ea18
NS
2074 /*
2075 * Make the freelist shorter if it's too long.
72bda06d 2076 *
c98e644e
DC
2077 * Note that from this point onwards, we will always release the agf and
2078 * agfl buffers on error. This handles the case where we error out and
2079 * the buffers are clean or may not have been joined to the transaction
2080 * and hence need to be released manually. If they have been joined to
2081 * the transaction, then xfs_trans_brelse() will handle them
2082 * appropriately based on the recursion count and dirty state of the
2083 * buffer.
2084 *
72bda06d
DC
2085 * XXX (dgc): When we have lots of free space, does this buy us
2086 * anything other than extra overhead when we need to put more blocks
2087 * back on the free list? Maybe we should only do this when space is
2088 * getting low or the AGFL is more than half full?
2bd0ea18 2089 */
85aec44f 2090 xfs_rmap_ag_owner(&targs.oinfo, XFS_RMAP_OWN_AG);
72bda06d
DC
2091 while (pag->pagf_flcount > need) {
2092 struct xfs_buf *bp;
2bd0ea18 2093
5e656dbb
BN
2094 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
2095 if (error)
c98e644e 2096 goto out_agbp_relse;
85aec44f
DW
2097 error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1,
2098 &targs.oinfo, 1);
72bda06d 2099 if (error)
c98e644e 2100 goto out_agbp_relse;
2bd0ea18
NS
2101 bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
2102 xfs_trans_binval(tp, bp);
2bd0ea18 2103 }
72bda06d 2104
a2ceac1f 2105 memset(&targs, 0, sizeof(targs));
2bd0ea18
NS
2106 targs.tp = tp;
2107 targs.mp = mp;
85aec44f 2108 xfs_rmap_ag_owner(&targs.oinfo, XFS_RMAP_OWN_AG);
2bd0ea18
NS
2109 targs.agbp = agbp;
2110 targs.agno = args->agno;
2bd0ea18
NS
2111 targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
2112 targs.type = XFS_ALLOCTYPE_THIS_AG;
2113 targs.pag = pag;
72bda06d
DC
2114 error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp);
2115 if (error)
c98e644e 2116 goto out_agbp_relse;
72bda06d
DC
2117
2118 /* Make the freelist longer if it's too short. */
2119 while (pag->pagf_flcount < need) {
2bd0ea18 2120 targs.agbno = 0;
72bda06d
DC
2121 targs.maxlen = need - pag->pagf_flcount;
2122
2123 /* Allocate as many blocks as possible at once. */
2124 error = xfs_alloc_ag_vextent(&targs);
c98e644e
DC
2125 if (error)
2126 goto out_agflbp_relse;
2127
2bd0ea18 2128 /*
dfc130f3
RC
2129 * Stop if we run out. Won't happen if callers are obeying
2130 * the restrictions correctly. Can happen for free calls
2bd0ea18
NS
2131 * on a completely full ag.
2132 */
5e656dbb
BN
2133 if (targs.agbno == NULLAGBLOCK) {
2134 if (flags & XFS_ALLOC_FLAG_FREEING)
2135 break;
c98e644e 2136 goto out_agflbp_relse;
5e656dbb 2137 }
2bd0ea18
NS
2138 /*
2139 * Put each allocated block on the list.
2140 */
2141 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
5e656dbb
BN
2142 error = xfs_alloc_put_freelist(tp, agbp,
2143 agflbp, bno, 0);
2144 if (error)
c98e644e 2145 goto out_agflbp_relse;
2bd0ea18
NS
2146 }
2147 }
cb4deb22 2148 xfs_trans_brelse(tp, agflbp);
2bd0ea18
NS
2149 args->agbp = agbp;
2150 return 0;
c98e644e
DC
2151
2152out_agflbp_relse:
2153 xfs_trans_brelse(tp, agflbp);
2154out_agbp_relse:
2155 if (agbp)
2156 xfs_trans_brelse(tp, agbp);
2157out_no_agbp:
2158 args->agbp = NULL;
2159 return error;
2bd0ea18
NS
2160}
2161
2162/*
2163 * Get a block from the freelist.
2164 * Returns with the buffer for the block gotten.
2165 */
2166int /* error */
2167xfs_alloc_get_freelist(
2168 xfs_trans_t *tp, /* transaction pointer */
2169 xfs_buf_t *agbp, /* buffer containing the agf structure */
cdded3d8
DC
2170 xfs_agblock_t *bnop, /* block address retrieved from freelist */
2171 int btreeblk) /* destination is a AGF btree */
2bd0ea18
NS
2172{
2173 xfs_agf_t *agf; /* a.g. freespace structure */
2bd0ea18
NS
2174 xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */
2175 xfs_agblock_t bno; /* block number returned */
dd5b876e 2176 __be32 *agfl_bno;
2bd0ea18 2177 int error;
cdded3d8 2178 int logflags;
dd5b876e 2179 xfs_mount_t *mp = tp->t_mountp;
2bd0ea18
NS
2180 xfs_perag_t *pag; /* per allocation group data */
2181
2bd0ea18
NS
2182 /*
2183 * Freelist is empty, give up.
2184 */
dd5b876e 2185 agf = XFS_BUF_TO_AGF(agbp);
46eca962 2186 if (!agf->agf_flcount) {
2bd0ea18
NS
2187 *bnop = NULLAGBLOCK;
2188 return 0;
2189 }
2190 /*
2191 * Read the array of free blocks.
2192 */
dd5b876e
DC
2193 error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
2194 &agflbp);
2195 if (error)
2bd0ea18 2196 return error;
dd5b876e
DC
2197
2198
2bd0ea18
NS
2199 /*
2200 * Get the block number and update the data structures.
2201 */
dd5b876e
DC
2202 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2203 bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
5e656dbb 2204 be32_add_cpu(&agf->agf_flfirst, 1);
2bd0ea18 2205 xfs_trans_brelse(tp, agflbp);
6e3140c7 2206 if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
46eca962 2207 agf->agf_flfirst = 0;
56b2de80
DC
2208
2209 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
5e656dbb 2210 be32_add_cpu(&agf->agf_flcount, -1);
2bd0ea18
NS
2211 xfs_trans_agflist_delta(tp, -1);
2212 pag->pagf_flcount--;
56b2de80 2213 xfs_perag_put(pag);
cdded3d8
DC
2214
2215 logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2216 if (btreeblk) {
5e656dbb 2217 be32_add_cpu(&agf->agf_btreeblks, 1);
cdded3d8
DC
2218 pag->pagf_btreeblks++;
2219 logflags |= XFS_AGF_BTREEBLKS;
2220 }
2221
cdded3d8 2222 xfs_alloc_log_agf(tp, agbp, logflags);
2bd0ea18 2223 *bnop = bno;
3e535bba 2224
2bd0ea18
NS
2225 return 0;
2226}
2227
2228/*
2229 * Log the given fields from the agf structure.
2230 */
2231void
2232xfs_alloc_log_agf(
2233 xfs_trans_t *tp, /* transaction pointer */
2234 xfs_buf_t *bp, /* buffer for a.g. freelist header */
dfc130f3 2235 int fields) /* mask of fields to be logged (XFS_AGF_...) */
2bd0ea18
NS
2236{
2237 int first; /* first byte offset */
2238 int last; /* last byte offset */
2239 static const short offsets[] = {
2240 offsetof(xfs_agf_t, agf_magicnum),
2241 offsetof(xfs_agf_t, agf_versionnum),
2242 offsetof(xfs_agf_t, agf_seqno),
2243 offsetof(xfs_agf_t, agf_length),
2244 offsetof(xfs_agf_t, agf_roots[0]),
2245 offsetof(xfs_agf_t, agf_levels[0]),
2246 offsetof(xfs_agf_t, agf_flfirst),
2247 offsetof(xfs_agf_t, agf_fllast),
2248 offsetof(xfs_agf_t, agf_flcount),
2249 offsetof(xfs_agf_t, agf_freeblks),
2250 offsetof(xfs_agf_t, agf_longest),
cdded3d8 2251 offsetof(xfs_agf_t, agf_btreeblks),
dd5b876e 2252 offsetof(xfs_agf_t, agf_uuid),
2bd0ea18
NS
2253 sizeof(xfs_agf_t)
2254 };
2255
56b2de80
DC
2256 trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
2257
bdc16ee5 2258 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
dd5b876e 2259
2bd0ea18
NS
2260 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2261 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2262}
2263
2264/*
2265 * Interface for inode allocation to force the pag data to be initialized.
2266 */
2267int /* error */
2268xfs_alloc_pagf_init(
2269 xfs_mount_t *mp, /* file system mount structure */
2270 xfs_trans_t *tp, /* transaction pointer */
2271 xfs_agnumber_t agno, /* allocation group number */
2272 int flags) /* XFS_ALLOC_FLAGS_... */
2273{
7a3bffe4 2274 xfs_buf_t *bp;
2bd0ea18
NS
2275 int error;
2276
0e266570 2277 if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)))
2bd0ea18
NS
2278 return error;
2279 if (bp)
2280 xfs_trans_brelse(tp, bp);
2281 return 0;
2282}
2283
2284/*
2285 * Put the block on the freelist for the allocation group.
2286 */
2287int /* error */
2288xfs_alloc_put_freelist(
2289 xfs_trans_t *tp, /* transaction pointer */
2290 xfs_buf_t *agbp, /* buffer for a.g. freelist header */
2291 xfs_buf_t *agflbp,/* buffer for a.g. free block array */
cdded3d8
DC
2292 xfs_agblock_t bno, /* block being freed */
2293 int btreeblk) /* block came from a AGF btree */
2bd0ea18
NS
2294{
2295 xfs_agf_t *agf; /* a.g. freespace structure */
5e656dbb 2296 __be32 *blockp;/* pointer to array entry */
2bd0ea18 2297 int error;
cdded3d8 2298 int logflags;
2bd0ea18
NS
2299 xfs_mount_t *mp; /* mount structure */
2300 xfs_perag_t *pag; /* per allocation group data */
dd5b876e
DC
2301 __be32 *agfl_bno;
2302 int startoff;
2bd0ea18
NS
2303
2304 agf = XFS_BUF_TO_AGF(agbp);
2305 mp = tp->t_mountp;
2306
2307 if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
6e3140c7 2308 be32_to_cpu(agf->agf_seqno), &agflbp)))
2bd0ea18 2309 return error;
5e656dbb 2310 be32_add_cpu(&agf->agf_fllast, 1);
6e3140c7 2311 if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
46eca962 2312 agf->agf_fllast = 0;
56b2de80
DC
2313
2314 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
5e656dbb 2315 be32_add_cpu(&agf->agf_flcount, 1);
2bd0ea18
NS
2316 xfs_trans_agflist_delta(tp, 1);
2317 pag->pagf_flcount++;
cdded3d8
DC
2318
2319 logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2320 if (btreeblk) {
5e656dbb 2321 be32_add_cpu(&agf->agf_btreeblks, -1);
cdded3d8
DC
2322 pag->pagf_btreeblks--;
2323 logflags |= XFS_AGF_BTREEBLKS;
2324 }
56b2de80 2325 xfs_perag_put(pag);
cdded3d8 2326
5e656dbb
BN
2327 xfs_alloc_log_agf(tp, agbp, logflags);
2328
6e3140c7 2329 ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
dd5b876e
DC
2330
2331 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2332 blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
5e656dbb 2333 *blockp = cpu_to_be32(bno);
dd5b876e
DC
2334 startoff = (char *)blockp - (char *)agflbp->b_addr;
2335
cdded3d8 2336 xfs_alloc_log_agf(tp, agbp, logflags);
dd5b876e 2337
bdc16ee5 2338 xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
dd5b876e
DC
2339 xfs_trans_log_buf(tp, agflbp, startoff,
2340 startoff + sizeof(xfs_agblock_t) - 1);
2bd0ea18
NS
2341 return 0;
2342}
2343
dd5b876e 2344static bool
a2ceac1f 2345xfs_agf_verify(
dd5b876e 2346 struct xfs_mount *mp,
a2ceac1f
DC
2347 struct xfs_buf *bp)
2348 {
dd5b876e 2349 struct xfs_agf *agf = XFS_BUF_TO_AGF(bp);
a2ceac1f 2350
a65d8d29
BF
2351 if (xfs_sb_version_hascrc(&mp->m_sb)) {
2352 if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
dd5b876e 2353 return false;
a65d8d29
BF
2354 if (!xfs_log_check_lsn(mp,
2355 be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn)))
2356 return false;
2357 }
a2ceac1f 2358
dd5b876e
DC
2359 if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
2360 XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
2361 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
2362 be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
2363 be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
2364 be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)))
2365 return false;
a2ceac1f 2366
5a35bf2c
DC
2367 if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
2368 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
2369 return false;
2370
e37838e5
DW
2371 if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
2372 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS)
2373 return false;
2374
a2ceac1f
DC
2375 /*
2376 * during growfs operations, the perag is not fully initialised,
2377 * so we can't use it for any useful checking. growfs ensures we can't
2378 * use it by using uncached buffers that don't have the perag attached
2379 * so we can detect and avoid this problem.
2380 */
dd5b876e
DC
2381 if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
2382 return false;
a2ceac1f 2383
dd5b876e
DC
2384 if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
2385 be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
2386 return false;
2387
2388 return true;;
a2ceac1f 2389
a2ceac1f
DC
2390}
2391
2392static void
2393xfs_agf_read_verify(
2394 struct xfs_buf *bp)
2395{
dd5b876e 2396 struct xfs_mount *mp = bp->b_target->bt_mount;
dd5b876e 2397
45922933
DC
2398 if (xfs_sb_version_hascrc(&mp->m_sb) &&
2399 !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
12b53197 2400 xfs_buf_ioerror(bp, -EFSBADCRC);
45922933
DC
2401 else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp,
2402 XFS_ERRTAG_ALLOC_READ_AGF,
2403 XFS_RANDOM_ALLOC_READ_AGF))
12b53197 2404 xfs_buf_ioerror(bp, -EFSCORRUPTED);
45922933
DC
2405
2406 if (bp->b_error)
2407 xfs_verifier_error(bp);
a2ceac1f
DC
2408}
2409
2410static void
2411xfs_agf_write_verify(
2412 struct xfs_buf *bp)
2413{
dd5b876e
DC
2414 struct xfs_mount *mp = bp->b_target->bt_mount;
2415 struct xfs_buf_log_item *bip = bp->b_fspriv;
2416
2417 if (!xfs_agf_verify(mp, bp)) {
12b53197 2418 xfs_buf_ioerror(bp, -EFSCORRUPTED);
45922933 2419 xfs_verifier_error(bp);
dd5b876e
DC
2420 return;
2421 }
2422
2423 if (!xfs_sb_version_hascrc(&mp->m_sb))
2424 return;
2425
2426 if (bip)
2427 XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
2428
43b5aeed 2429 xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
a2ceac1f
DC
2430}
2431
2432const struct xfs_buf_ops xfs_agf_buf_ops = {
a3fac935 2433 .name = "xfs_agf",
a2ceac1f
DC
2434 .verify_read = xfs_agf_read_verify,
2435 .verify_write = xfs_agf_write_verify,
2436};
2437
2bd0ea18
NS
2438/*
2439 * Read in the allocation group header (free/alloc section).
2440 */
2441int /* error */
56b2de80
DC
2442xfs_read_agf(
2443 struct xfs_mount *mp, /* mount point structure */
2444 struct xfs_trans *tp, /* transaction pointer */
2445 xfs_agnumber_t agno, /* allocation group number */
2446 int flags, /* XFS_BUF_ */
2447 struct xfs_buf **bpp) /* buffer for the ag freelist header */
2bd0ea18 2448{
9440d84d 2449 int error;
2bd0ea18 2450
ff105f75
DC
2451 trace_xfs_read_agf(mp, agno);
2452
2bd0ea18 2453 ASSERT(agno != NULLAGNUMBER);
9440d84d
NS
2454 error = xfs_trans_read_buf(
2455 mp, tp, mp->m_ddev_targp,
2456 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
a2ceac1f 2457 XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
9440d84d 2458 if (error)
2bd0ea18 2459 return error;
56b2de80 2460 if (!*bpp)
2bd0ea18 2461 return 0;
56b2de80 2462
a2ceac1f
DC
2463 ASSERT(!(*bpp)->b_error);
2464 xfs_buf_set_ref(*bpp, XFS_AGF_REF);
56b2de80
DC
2465 return 0;
2466}
2467
2468/*
2469 * Read in the allocation group header (free/alloc section).
2470 */
2471int /* error */
2472xfs_alloc_read_agf(
2473 struct xfs_mount *mp, /* mount point structure */
2474 struct xfs_trans *tp, /* transaction pointer */
2475 xfs_agnumber_t agno, /* allocation group number */
2476 int flags, /* XFS_ALLOC_FLAG_... */
2477 struct xfs_buf **bpp) /* buffer for the ag freelist header */
2478{
2479 struct xfs_agf *agf; /* ag freelist header */
2480 struct xfs_perag *pag; /* per allocation group data */
2481 int error;
2482
ff105f75 2483 trace_xfs_alloc_read_agf(mp, agno);
56b2de80 2484
ff105f75 2485 ASSERT(agno != NULLAGNUMBER);
56b2de80
DC
2486 error = xfs_read_agf(mp, tp, agno,
2487 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
2488 bpp);
2489 if (error)
2490 return error;
2491 if (!*bpp)
2492 return 0;
a2ceac1f 2493 ASSERT(!(*bpp)->b_error);
56b2de80
DC
2494
2495 agf = XFS_BUF_TO_AGF(*bpp);
2496 pag = xfs_perag_get(mp, agno);
2bd0ea18 2497 if (!pag->pagf_init) {
6e3140c7 2498 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
cdded3d8 2499 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
6e3140c7
NS
2500 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
2501 pag->pagf_longest = be32_to_cpu(agf->agf_longest);
2bd0ea18 2502 pag->pagf_levels[XFS_BTNUM_BNOi] =
6e3140c7 2503 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
2bd0ea18 2504 pag->pagf_levels[XFS_BTNUM_CNTi] =
6e3140c7 2505 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
e37838e5
DW
2506 pag->pagf_levels[XFS_BTNUM_RMAPi] =
2507 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
5e656dbb 2508 spin_lock_init(&pag->pagb_lock);
56b2de80 2509 pag->pagb_count = 0;
ff105f75
DC
2510 /* XXX: pagb_tree doesn't exist in userspace */
2511 //pag->pagb_tree = RB_ROOT;
2bd0ea18
NS
2512 pag->pagf_init = 1;
2513 }
2514#ifdef DEBUG
2515 else if (!XFS_FORCED_SHUTDOWN(mp)) {
6e3140c7 2516 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
cdded3d8 2517 ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
6e3140c7
NS
2518 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
2519 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
2bd0ea18 2520 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
6e3140c7 2521 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
2bd0ea18 2522 ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
6e3140c7 2523 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
2bd0ea18
NS
2524 }
2525#endif
56b2de80 2526 xfs_perag_put(pag);
2bd0ea18
NS
2527 return 0;
2528}
2529
2530/*
2531 * Allocate an extent (variable-size).
2532 * Depending on the allocation type, we either look in a single allocation
2533 * group or loop over the allocation groups to find the result.
2534 */
2535int /* error */
2536xfs_alloc_vextent(
dfc130f3 2537 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18 2538{
dfc130f3 2539 xfs_agblock_t agsize; /* allocation group size */
2bd0ea18
NS
2540 int error;
2541 int flags; /* XFS_ALLOC_FLAG_... locking flags */
2bd0ea18
NS
2542 xfs_extlen_t minleft;/* minimum left value, temp copy */
2543 xfs_mount_t *mp; /* mount structure pointer */
2544 xfs_agnumber_t sagno; /* starting allocation group number */
dfc130f3 2545 xfs_alloctype_t type; /* input allocation type */
34317449 2546 int bump_rotor = 0;
9baa549b 2547 int no_min = 0;
46eca962 2548 xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */
2bd0ea18
NS
2549
2550 mp = args->mp;
2551 type = args->otype = args->type;
2552 args->agbno = NULLAGBLOCK;
2553 /*
2554 * Just fix this up, for the case where the last a.g. is shorter
2555 * (or there's only one a.g.) and the caller couldn't easily figure
2556 * that out (xfs_bmap_alloc).
2557 */
2558 agsize = mp->m_sb.sb_agblocks;
2559 if (args->maxlen > agsize)
2560 args->maxlen = agsize;
2561 if (args->alignment == 0)
2562 args->alignment = 1;
2563 ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
2564 ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
2565 ASSERT(args->minlen <= args->maxlen);
2566 ASSERT(args->minlen <= agsize);
2567 ASSERT(args->mod < args->prod);
2568 if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
2569 XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
2570 args->minlen > args->maxlen || args->minlen > agsize ||
2571 args->mod >= args->prod) {
2572 args->fsbno = NULLFSBLOCK;
56b2de80 2573 trace_xfs_alloc_vextent_badargs(args);
2bd0ea18
NS
2574 return 0;
2575 }
9baa549b
SL
2576 minleft = args->minleft;
2577
2bd0ea18
NS
2578 switch (type) {
2579 case XFS_ALLOCTYPE_THIS_AG:
2580 case XFS_ALLOCTYPE_NEAR_BNO:
2581 case XFS_ALLOCTYPE_THIS_BNO:
2582 /*
2583 * These three force us into a single a.g.
2584 */
2585 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
56b2de80 2586 args->pag = xfs_perag_get(mp, args->agno);
2bd0ea18
NS
2587 args->minleft = 0;
2588 error = xfs_alloc_fix_freelist(args, 0);
2589 args->minleft = minleft;
2590 if (error) {
56b2de80 2591 trace_xfs_alloc_vextent_nofix(args);
2bd0ea18
NS
2592 goto error0;
2593 }
2594 if (!args->agbp) {
56b2de80 2595 trace_xfs_alloc_vextent_noagbp(args);
2bd0ea18
NS
2596 break;
2597 }
2598 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
0e266570 2599 if ((error = xfs_alloc_ag_vextent(args)))
2bd0ea18 2600 goto error0;
2bd0ea18
NS
2601 break;
2602 case XFS_ALLOCTYPE_START_BNO:
2603 /*
2604 * Try near allocation first, then anywhere-in-ag after
2605 * the first a.g. fails.
2606 */
9542ae13 2607 if ((args->userdata & XFS_ALLOC_INITIAL_USER_DATA) &&
34317449 2608 (mp->m_flags & XFS_MOUNT_32BITINODES)) {
46eca962
NS
2609 args->fsbno = XFS_AGB_TO_FSB(mp,
2610 ((mp->m_agfrotor / rotorstep) %
2611 mp->m_sb.sb_agcount), 0);
34317449
NS
2612 bump_rotor = 1;
2613 }
2bd0ea18
NS
2614 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2615 args->type = XFS_ALLOCTYPE_NEAR_BNO;
2616 /* FALLTHROUGH */
2617 case XFS_ALLOCTYPE_ANY_AG:
2618 case XFS_ALLOCTYPE_START_AG:
2619 case XFS_ALLOCTYPE_FIRST_AG:
2620 /*
2621 * Rotate through the allocation groups looking for a winner.
2622 */
2623 if (type == XFS_ALLOCTYPE_ANY_AG) {
2624 /*
2625 * Start with the last place we left off.
2626 */
46eca962
NS
2627 args->agno = sagno = (mp->m_agfrotor / rotorstep) %
2628 mp->m_sb.sb_agcount;
2bd0ea18
NS
2629 args->type = XFS_ALLOCTYPE_THIS_AG;
2630 flags = XFS_ALLOC_FLAG_TRYLOCK;
2631 } else if (type == XFS_ALLOCTYPE_FIRST_AG) {
2632 /*
2633 * Start with allocation group given by bno.
2634 */
2635 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2636 args->type = XFS_ALLOCTYPE_THIS_AG;
2637 sagno = 0;
2638 flags = 0;
2639 } else {
2640 if (type == XFS_ALLOCTYPE_START_AG)
2641 args->type = XFS_ALLOCTYPE_THIS_AG;
2642 /*
2643 * Start with the given allocation group.
2644 */
2645 args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2646 flags = XFS_ALLOC_FLAG_TRYLOCK;
2647 }
2648 /*
2649 * Loop over allocation groups twice; first time with
2650 * trylock set, second time without.
2651 */
2652 for (;;) {
56b2de80 2653 args->pag = xfs_perag_get(mp, args->agno);
9baa549b
SL
2654 if (no_min) args->minleft = 0;
2655 error = xfs_alloc_fix_freelist(args, flags);
2656 args->minleft = minleft;
2657 if (error) {
56b2de80 2658 trace_xfs_alloc_vextent_nofix(args);
2bd0ea18
NS
2659 goto error0;
2660 }
2661 /*
2662 * If we get a buffer back then the allocation will fly.
2663 */
2664 if (args->agbp) {
0e266570 2665 if ((error = xfs_alloc_ag_vextent(args)))
2bd0ea18 2666 goto error0;
2bd0ea18
NS
2667 break;
2668 }
56b2de80
DC
2669
2670 trace_xfs_alloc_vextent_loopfailed(args);
2671
2bd0ea18
NS
2672 /*
2673 * Didn't work, figure out the next iteration.
2674 */
2675 if (args->agno == sagno &&
2676 type == XFS_ALLOCTYPE_START_BNO)
2677 args->type = XFS_ALLOCTYPE_THIS_AG;
5e656dbb
BN
2678 /*
2679 * For the first allocation, we can try any AG to get
2680 * space. However, if we already have allocated a
2681 * block, we don't want to try AGs whose number is below
2682 * sagno. Otherwise, we may end up with out-of-order
2683 * locking of AGF, which might cause deadlock.
2684 */
2685 if (++(args->agno) == mp->m_sb.sb_agcount) {
2686 if (args->firstblock != NULLFSBLOCK)
2687 args->agno = sagno;
2688 else
2689 args->agno = 0;
2690 }
5000d01d 2691 /*
2bd0ea18
NS
2692 * Reached the starting a.g., must either be done
2693 * or switch to non-trylock mode.
2694 */
2695 if (args->agno == sagno) {
9baa549b 2696 if (no_min == 1) {
2bd0ea18 2697 args->agbno = NULLAGBLOCK;
56b2de80 2698 trace_xfs_alloc_vextent_allfailed(args);
2bd0ea18
NS
2699 break;
2700 }
9baa549b
SL
2701 if (flags == 0) {
2702 no_min = 1;
2703 } else {
2704 flags = 0;
2705 if (type == XFS_ALLOCTYPE_START_BNO) {
2706 args->agbno = XFS_FSB_TO_AGBNO(mp,
2707 args->fsbno);
2708 args->type = XFS_ALLOCTYPE_NEAR_BNO;
2709 }
2bd0ea18
NS
2710 }
2711 }
56b2de80 2712 xfs_perag_put(args->pag);
2bd0ea18 2713 }
46eca962
NS
2714 if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
2715 if (args->agno == sagno)
2716 mp->m_agfrotor = (mp->m_agfrotor + 1) %
2717 (mp->m_sb.sb_agcount * rotorstep);
2718 else
2719 mp->m_agfrotor = (args->agno * rotorstep + 1) %
2720 (mp->m_sb.sb_agcount * rotorstep);
2721 }
2bd0ea18
NS
2722 break;
2723 default:
2724 ASSERT(0);
2725 /* NOTREACHED */
2726 }
2727 if (args->agbno == NULLAGBLOCK)
2728 args->fsbno = NULLFSBLOCK;
2729 else {
2730 args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
2731#ifdef DEBUG
2732 ASSERT(args->len >= args->minlen);
2733 ASSERT(args->len <= args->maxlen);
2734 ASSERT(args->agbno % args->alignment == 0);
2735 XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
2736 args->len);
2737#endif
9542ae13
DC
2738
2739 /* Zero the extent if we were asked to do so */
2740 if (args->userdata & XFS_ALLOC_USERDATA_ZERO) {
2741 error = xfs_zero_extent(args->ip, args->fsbno, args->len);
2742 if (error)
2743 goto error0;
2744 }
2745
2bd0ea18 2746 }
56b2de80 2747 xfs_perag_put(args->pag);
2bd0ea18
NS
2748 return 0;
2749error0:
56b2de80 2750 xfs_perag_put(args->pag);
2bd0ea18
NS
2751 return error;
2752}
2753
2a6da3b8
DC
2754/* Ensure that the freelist is at full capacity. */
2755int
2756xfs_free_extent_fix_freelist(
2757 struct xfs_trans *tp,
2758 xfs_agnumber_t agno,
2759 struct xfs_buf **agbp)
2bd0ea18 2760{
2a6da3b8
DC
2761 struct xfs_alloc_arg args;
2762 int error;
2bd0ea18 2763
2a6da3b8 2764 memset(&args, 0, sizeof(struct xfs_alloc_arg));
2bd0ea18
NS
2765 args.tp = tp;
2766 args.mp = tp->t_mountp;
2a6da3b8 2767 args.agno = agno;
a2ceac1f
DC
2768
2769 /*
2770 * validate that the block number is legal - the enables us to detect
2771 * and handle a silent filesystem corruption rather than crashing.
2772 */
a2ceac1f 2773 if (args.agno >= args.mp->m_sb.sb_agcount)
12b53197 2774 return -EFSCORRUPTED;
a2ceac1f 2775
56b2de80 2776 args.pag = xfs_perag_get(args.mp, args.agno);
a2ceac1f
DC
2777 ASSERT(args.pag);
2778
2779 error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
2780 if (error)
2a6da3b8
DC
2781 goto out;
2782
2783 *agbp = args.agbp;
2784out:
2785 xfs_perag_put(args.pag);
2786 return error;
2787}
2788
2789/*
2790 * Free an extent.
2791 * Just break up the extent address and hand off to xfs_free_ag_extent
2792 * after fixing up the freelist.
2793 */
2794int /* error */
2795xfs_free_extent(
2796 struct xfs_trans *tp, /* transaction pointer */
2797 xfs_fsblock_t bno, /* starting block number of extent */
85aec44f
DW
2798 xfs_extlen_t len, /* length of extent */
2799 struct xfs_owner_info *oinfo) /* extent owner */
2a6da3b8
DC
2800{
2801 struct xfs_mount *mp = tp->t_mountp;
2802 struct xfs_buf *agbp;
2803 xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, bno);
2804 xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, bno);
2805 int error;
2806
2807 ASSERT(len != 0);
2808
a9da40de
DW
2809 trace_xfs_bmap_free_deferred(mp, agno, 0, agbno, len);
2810
2811 if (XFS_TEST_ERROR(false, mp,
2812 XFS_ERRTAG_FREE_EXTENT,
2813 XFS_RANDOM_FREE_EXTENT))
2814 return -EIO;
2815
2a6da3b8
DC
2816 error = xfs_free_extent_fix_freelist(tp, agno, &agbp);
2817 if (error)
2818 return error;
2819
2820 XFS_WANT_CORRUPTED_GOTO(mp, agbno < mp->m_sb.sb_agblocks, err);
a2ceac1f
DC
2821
2822 /* validate the extent size is legal now we have the agf locked */
2a6da3b8
DC
2823 XFS_WANT_CORRUPTED_GOTO(mp,
2824 agbno + len <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length),
2825 err);
a2ceac1f 2826
85aec44f 2827 error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, 0);
2a6da3b8
DC
2828 if (error)
2829 goto err;
2830
2831 xfs_extent_busy_insert(tp, agno, agbno, len, 0);
2832 return 0;
2833
2834err:
2835 xfs_trans_brelse(tp, agbp);
2bd0ea18
NS
2836 return error;
2837}