]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - libxfs/xfs_alloc.c
Sync up libxfs to latest kernel code
[thirdparty/xfsprogs-dev.git] / libxfs / xfs_alloc.c
CommitLineData
2bd0ea18 1/*
5e656dbb 2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
da23017d 3 * All Rights Reserved.
5000d01d 4 *
da23017d
NS
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
2bd0ea18 7 * published by the Free Software Foundation.
5000d01d 8 *
da23017d
NS
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
5000d01d 13 *
da23017d
NS
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
2bd0ea18 17 */
2bd0ea18
NS
18#include <xfs.h>
19
20#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
5e656dbb
BN
21
22#define XFSA_FIXUP_BNO_OK 1
23#define XFSA_FIXUP_CNT_OK 2
24
25/*
26 * Prototypes for per-ag allocation routines
27 */
28
29STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
30STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
31STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
32STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
33 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
34
35/*
36 * Internal functions.
37 */
2bd0ea18 38
b194c7d8
BN
39/*
40 * Lookup the record equal to [bno, len] in the btree given by cur.
41 */
42STATIC int /* error */
43xfs_alloc_lookup_eq(
44 struct xfs_btree_cur *cur, /* btree cursor */
45 xfs_agblock_t bno, /* starting block of extent */
46 xfs_extlen_t len, /* length of extent */
47 int *stat) /* success/failure */
48{
49 cur->bc_rec.a.ar_startblock = bno;
50 cur->bc_rec.a.ar_blockcount = len;
51 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
52}
53
54/*
55 * Lookup the first record greater than or equal to [bno, len]
56 * in the btree given by cur.
57 */
58STATIC int /* error */
59xfs_alloc_lookup_ge(
60 struct xfs_btree_cur *cur, /* btree cursor */
61 xfs_agblock_t bno, /* starting block of extent */
62 xfs_extlen_t len, /* length of extent */
63 int *stat) /* success/failure */
64{
65 cur->bc_rec.a.ar_startblock = bno;
66 cur->bc_rec.a.ar_blockcount = len;
67 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
68}
69
70/*
71 * Lookup the first record less than or equal to [bno, len]
72 * in the btree given by cur.
73 */
74STATIC int /* error */
75xfs_alloc_lookup_le(
76 struct xfs_btree_cur *cur, /* btree cursor */
77 xfs_agblock_t bno, /* starting block of extent */
78 xfs_extlen_t len, /* length of extent */
79 int *stat) /* success/failure */
80{
81 cur->bc_rec.a.ar_startblock = bno;
82 cur->bc_rec.a.ar_blockcount = len;
83 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
84}
85
86/*
87 * Update the record referred to by cur to the value given
88 * by [bno, len].
89 * This either works (return 0) or gets an EFSCORRUPTED error.
90 */
91STATIC int /* error */
92xfs_alloc_update(
93 struct xfs_btree_cur *cur, /* btree cursor */
94 xfs_agblock_t bno, /* starting block of extent */
95 xfs_extlen_t len) /* length of extent */
96{
97 union xfs_btree_rec rec;
98
99 rec.alloc.ar_startblock = cpu_to_be32(bno);
100 rec.alloc.ar_blockcount = cpu_to_be32(len);
101 return xfs_btree_update(cur, &rec);
102}
103
104/*
105 * Get the data from the pointed-to record.
106 */
107STATIC int /* error */
108xfs_alloc_get_rec(
109 struct xfs_btree_cur *cur, /* btree cursor */
110 xfs_agblock_t *bno, /* output: starting block of extent */
111 xfs_extlen_t *len, /* output: length of extent */
112 int *stat) /* output: success/failure */
113{
114 union xfs_btree_rec *rec;
115 int error;
116
117 error = xfs_btree_get_rec(cur, &rec, stat);
118 if (!error && *stat == 1) {
119 *bno = be32_to_cpu(rec->alloc.ar_startblock);
120 *len = be32_to_cpu(rec->alloc.ar_blockcount);
121 }
122 return error;
123}
124
2bd0ea18
NS
125/*
126 * Compute aligned version of the found extent.
127 * Takes alignment and min length into account.
128 */
5e656dbb 129STATIC void
2bd0ea18
NS
130xfs_alloc_compute_aligned(
131 xfs_agblock_t foundbno, /* starting block in found extent */
132 xfs_extlen_t foundlen, /* length in found extent */
133 xfs_extlen_t alignment, /* alignment for allocation */
134 xfs_extlen_t minlen, /* minimum length for allocation */
135 xfs_agblock_t *resbno, /* result block number */
136 xfs_extlen_t *reslen) /* result length */
137{
138 xfs_agblock_t bno;
139 xfs_extlen_t diff;
140 xfs_extlen_t len;
141
142 if (alignment > 1 && foundlen >= minlen) {
143 bno = roundup(foundbno, alignment);
144 diff = bno - foundbno;
145 len = diff >= foundlen ? 0 : foundlen - diff;
146 } else {
147 bno = foundbno;
148 len = foundlen;
149 }
150 *resbno = bno;
151 *reslen = len;
2bd0ea18
NS
152}
153
154/*
155 * Compute best start block and diff for "near" allocations.
156 * freelen >= wantlen already checked by caller.
157 */
158STATIC xfs_extlen_t /* difference value (absolute) */
159xfs_alloc_compute_diff(
160 xfs_agblock_t wantbno, /* target starting block */
161 xfs_extlen_t wantlen, /* target length */
162 xfs_extlen_t alignment, /* target alignment */
163 xfs_agblock_t freebno, /* freespace's starting block */
164 xfs_extlen_t freelen, /* freespace's length */
165 xfs_agblock_t *newbnop) /* result: best start block from free */
166{
167 xfs_agblock_t freeend; /* end of freespace extent */
168 xfs_agblock_t newbno1; /* return block number */
169 xfs_agblock_t newbno2; /* other new block number */
0e266570
NS
170 xfs_extlen_t newlen1=0; /* length with newbno1 */
171 xfs_extlen_t newlen2=0; /* length with newbno2 */
2bd0ea18
NS
172 xfs_agblock_t wantend; /* end of target extent */
173
174 ASSERT(freelen >= wantlen);
175 freeend = freebno + freelen;
176 wantend = wantbno + wantlen;
177 if (freebno >= wantbno) {
178 if ((newbno1 = roundup(freebno, alignment)) >= freeend)
179 newbno1 = NULLAGBLOCK;
180 } else if (freeend >= wantend && alignment > 1) {
181 newbno1 = roundup(wantbno, alignment);
182 newbno2 = newbno1 - alignment;
183 if (newbno1 >= freeend)
184 newbno1 = NULLAGBLOCK;
185 else
186 newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
187 if (newbno2 < freebno)
188 newbno2 = NULLAGBLOCK;
189 else
190 newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
191 if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
192 if (newlen1 < newlen2 ||
193 (newlen1 == newlen2 &&
194 XFS_ABSDIFF(newbno1, wantbno) >
195 XFS_ABSDIFF(newbno2, wantbno)))
196 newbno1 = newbno2;
197 } else if (newbno2 != NULLAGBLOCK)
198 newbno1 = newbno2;
199 } else if (freeend >= wantend) {
200 newbno1 = wantbno;
201 } else if (alignment > 1) {
202 newbno1 = roundup(freeend - wantlen, alignment);
203 if (newbno1 > freeend - wantlen &&
204 newbno1 - alignment >= freebno)
205 newbno1 -= alignment;
206 else if (newbno1 >= freeend)
207 newbno1 = NULLAGBLOCK;
208 } else
209 newbno1 = freeend - wantlen;
210 *newbnop = newbno1;
211 return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
212}
213
214/*
215 * Fix up the length, based on mod and prod.
216 * len should be k * prod + mod for some k.
217 * If len is too small it is returned unchanged.
218 * If len hits maxlen it is left alone.
219 */
220STATIC void
221xfs_alloc_fix_len(
dfc130f3 222 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18
NS
223{
224 xfs_extlen_t k;
225 xfs_extlen_t rlen;
226
227 ASSERT(args->mod < args->prod);
228 rlen = args->len;
229 ASSERT(rlen >= args->minlen);
230 ASSERT(rlen <= args->maxlen);
231 if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
232 (args->mod == 0 && rlen < args->prod))
233 return;
234 k = rlen % args->prod;
235 if (k == args->mod)
236 return;
237 if (k > args->mod) {
238 if ((int)(rlen = rlen - k - args->mod) < (int)args->minlen)
239 return;
240 } else {
241 if ((int)(rlen = rlen - args->prod - (args->mod - k)) <
242 (int)args->minlen)
243 return;
244 }
245 ASSERT(rlen >= args->minlen);
246 ASSERT(rlen <= args->maxlen);
247 args->len = rlen;
248}
249
250/*
251 * Fix up length if there is too little space left in the a.g.
252 * Return 1 if ok, 0 if too little, should give up.
253 */
254STATIC int
255xfs_alloc_fix_minleft(
dfc130f3 256 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18
NS
257{
258 xfs_agf_t *agf; /* a.g. freelist header */
259 int diff; /* free space difference */
260
261 if (args->minleft == 0)
262 return 1;
263 agf = XFS_BUF_TO_AGF(args->agbp);
6e3140c7
NS
264 diff = be32_to_cpu(agf->agf_freeblks)
265 + be32_to_cpu(agf->agf_flcount)
2bd0ea18
NS
266 - args->len - args->minleft;
267 if (diff >= 0)
268 return 1;
269 args->len += diff; /* shrink the allocated space */
270 if (args->len >= args->minlen)
271 return 1;
272 args->agbno = NULLAGBLOCK;
273 return 0;
274}
275
276/*
277 * Update the two btrees, logically removing from freespace the extent
278 * starting at rbno, rlen blocks. The extent is contained within the
279 * actual (current) free extent fbno for flen blocks.
280 * Flags are passed in indicating whether the cursors are set to the
281 * relevant records.
282 */
283STATIC int /* error code */
284xfs_alloc_fixup_trees(
dfc130f3
RC
285 xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */
286 xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */
2bd0ea18
NS
287 xfs_agblock_t fbno, /* starting block of free extent */
288 xfs_extlen_t flen, /* length of free extent */
289 xfs_agblock_t rbno, /* starting block of returned extent */
290 xfs_extlen_t rlen, /* length of returned extent */
291 int flags) /* flags, XFSA_FIXUP_... */
292{
293 int error; /* error code */
294 int i; /* operation results */
295 xfs_agblock_t nfbno1; /* first new free startblock */
296 xfs_agblock_t nfbno2; /* second new free startblock */
0e266570
NS
297 xfs_extlen_t nflen1=0; /* first new free length */
298 xfs_extlen_t nflen2=0; /* second new free length */
2bd0ea18
NS
299
300 /*
301 * Look up the record in the by-size tree if necessary.
302 */
303 if (flags & XFSA_FIXUP_CNT_OK) {
304#ifdef DEBUG
0e266570 305 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
2bd0ea18
NS
306 return error;
307 XFS_WANT_CORRUPTED_RETURN(
308 i == 1 && nfbno1 == fbno && nflen1 == flen);
309#endif
310 } else {
0e266570 311 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
2bd0ea18
NS
312 return error;
313 XFS_WANT_CORRUPTED_RETURN(i == 1);
314 }
315 /*
316 * Look up the record in the by-block tree if necessary.
317 */
318 if (flags & XFSA_FIXUP_BNO_OK) {
319#ifdef DEBUG
0e266570 320 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
2bd0ea18
NS
321 return error;
322 XFS_WANT_CORRUPTED_RETURN(
323 i == 1 && nfbno1 == fbno && nflen1 == flen);
324#endif
325 } else {
0e266570 326 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
2bd0ea18
NS
327 return error;
328 XFS_WANT_CORRUPTED_RETURN(i == 1);
329 }
b3563c19 330
2bd0ea18 331#ifdef DEBUG
b3563c19
BN
332 if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
333 struct xfs_btree_block *bnoblock;
334 struct xfs_btree_block *cntblock;
335
336 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
337 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
2bd0ea18 338
b3563c19
BN
339 XFS_WANT_CORRUPTED_RETURN(
340 bnoblock->bb_numrecs == cntblock->bb_numrecs);
2bd0ea18
NS
341 }
342#endif
b3563c19 343
2bd0ea18
NS
344 /*
345 * Deal with all four cases: the allocated record is contained
346 * within the freespace record, so we can have new freespace
347 * at either (or both) end, or no freespace remaining.
348 */
349 if (rbno == fbno && rlen == flen)
350 nfbno1 = nfbno2 = NULLAGBLOCK;
351 else if (rbno == fbno) {
352 nfbno1 = rbno + rlen;
353 nflen1 = flen - rlen;
354 nfbno2 = NULLAGBLOCK;
355 } else if (rbno + rlen == fbno + flen) {
356 nfbno1 = fbno;
357 nflen1 = flen - rlen;
358 nfbno2 = NULLAGBLOCK;
359 } else {
360 nfbno1 = fbno;
361 nflen1 = rbno - fbno;
362 nfbno2 = rbno + rlen;
363 nflen2 = (fbno + flen) - nfbno2;
364 }
365 /*
366 * Delete the entry from the by-size btree.
367 */
b194c7d8 368 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18
NS
369 return error;
370 XFS_WANT_CORRUPTED_RETURN(i == 1);
371 /*
372 * Add new by-size btree entry(s).
373 */
374 if (nfbno1 != NULLAGBLOCK) {
0e266570 375 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
2bd0ea18
NS
376 return error;
377 XFS_WANT_CORRUPTED_RETURN(i == 0);
b194c7d8 378 if ((error = xfs_btree_insert(cnt_cur, &i)))
2bd0ea18
NS
379 return error;
380 XFS_WANT_CORRUPTED_RETURN(i == 1);
381 }
382 if (nfbno2 != NULLAGBLOCK) {
0e266570 383 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
2bd0ea18
NS
384 return error;
385 XFS_WANT_CORRUPTED_RETURN(i == 0);
b194c7d8 386 if ((error = xfs_btree_insert(cnt_cur, &i)))
2bd0ea18
NS
387 return error;
388 XFS_WANT_CORRUPTED_RETURN(i == 1);
389 }
390 /*
391 * Fix up the by-block btree entry(s).
392 */
393 if (nfbno1 == NULLAGBLOCK) {
394 /*
395 * No remaining freespace, just delete the by-block tree entry.
396 */
b194c7d8 397 if ((error = xfs_btree_delete(bno_cur, &i)))
2bd0ea18
NS
398 return error;
399 XFS_WANT_CORRUPTED_RETURN(i == 1);
400 } else {
401 /*
402 * Update the by-block entry to start later|be shorter.
403 */
0e266570 404 if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
2bd0ea18
NS
405 return error;
406 }
407 if (nfbno2 != NULLAGBLOCK) {
408 /*
409 * 2 resulting free entries, need to add one.
410 */
0e266570 411 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
2bd0ea18
NS
412 return error;
413 XFS_WANT_CORRUPTED_RETURN(i == 0);
b194c7d8 414 if ((error = xfs_btree_insert(bno_cur, &i)))
2bd0ea18
NS
415 return error;
416 XFS_WANT_CORRUPTED_RETURN(i == 1);
417 }
418 return 0;
419}
420
421/*
422 * Read in the allocation group free block array.
423 */
424STATIC int /* error */
425xfs_alloc_read_agfl(
426 xfs_mount_t *mp, /* mount point structure */
427 xfs_trans_t *tp, /* transaction pointer */
428 xfs_agnumber_t agno, /* allocation group number */
429 xfs_buf_t **bpp) /* buffer for the ag free block array */
430{
431 xfs_buf_t *bp; /* return value */
2bd0ea18
NS
432 int error;
433
434 ASSERT(agno != NULLAGNUMBER);
9440d84d
NS
435 error = xfs_trans_read_buf(
436 mp, tp, mp->m_ddev_targp,
437 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
438 XFS_FSS_TO_BB(mp, 1), 0, &bp);
439 if (error)
2bd0ea18
NS
440 return error;
441 ASSERT(bp);
442 ASSERT(!XFS_BUF_GETERROR(bp));
443 XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGFL, XFS_AGFL_REF);
444 *bpp = bp;
445 return 0;
446}
447
2bd0ea18
NS
448/*
449 * Allocation group level functions.
450 */
451
452/*
453 * Allocate a variable extent in the allocation group agno.
454 * Type and bno are used to determine where in the allocation group the
455 * extent will start.
456 * Extent's length (returned in *len) will be between minlen and maxlen,
457 * and of the form k * prod + mod unless there's nothing that large.
458 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
459 */
460STATIC int /* error */
461xfs_alloc_ag_vextent(
dfc130f3 462 xfs_alloc_arg_t *args) /* argument structure for allocation */
2bd0ea18 463{
0e266570 464 int error=0;
2bd0ea18
NS
465
466 ASSERT(args->minlen > 0);
467 ASSERT(args->maxlen > 0);
468 ASSERT(args->minlen <= args->maxlen);
469 ASSERT(args->mod < args->prod);
470 ASSERT(args->alignment > 0);
471 /*
472 * Branch to correct routine based on the type.
473 */
474 args->wasfromfl = 0;
475 switch (args->type) {
476 case XFS_ALLOCTYPE_THIS_AG:
477 error = xfs_alloc_ag_vextent_size(args);
478 break;
479 case XFS_ALLOCTYPE_NEAR_BNO:
480 error = xfs_alloc_ag_vextent_near(args);
481 break;
482 case XFS_ALLOCTYPE_THIS_BNO:
483 error = xfs_alloc_ag_vextent_exact(args);
484 break;
485 default:
486 ASSERT(0);
487 /* NOTREACHED */
488 }
489 if (error)
490 return error;
491 /*
492 * If the allocation worked, need to change the agf structure
493 * (and log it), and the superblock.
494 */
495 if (args->agbno != NULLAGBLOCK) {
496 xfs_agf_t *agf; /* allocation group freelist header */
497#ifdef XFS_ALLOC_TRACE
498 xfs_mount_t *mp = args->mp;
499#endif
5000d01d 500 long slen = (long)args->len;
2bd0ea18
NS
501
502 ASSERT(args->len >= args->minlen && args->len <= args->maxlen);
503 ASSERT(!(args->wasfromfl) || !args->isfl);
504 ASSERT(args->agbno % args->alignment == 0);
505 if (!(args->wasfromfl)) {
506
507 agf = XFS_BUF_TO_AGF(args->agbp);
5e656dbb 508 be32_add_cpu(&agf->agf_freeblks, -(args->len));
2bd0ea18
NS
509 xfs_trans_agblocks_delta(args->tp,
510 -((long)(args->len)));
511 args->pag->pagf_freeblks -= args->len;
6e3140c7
NS
512 ASSERT(be32_to_cpu(agf->agf_freeblks) <=
513 be32_to_cpu(agf->agf_length));
2bd0ea18
NS
514 TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS);
515 xfs_alloc_log_agf(args->tp, args->agbp,
516 XFS_AGF_FREEBLKS);
3e535bba
NS
517 /* search the busylist for these blocks */
518 xfs_alloc_search_busy(args->tp, args->agno,
519 args->agbno, args->len);
2bd0ea18
NS
520 }
521 if (!args->isfl)
522 xfs_trans_mod_sb(args->tp,
523 args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS :
524 XFS_TRANS_SB_FDBLOCKS, -slen);
32a82561
NS
525 XFS_STATS_INC(xs_allocx);
526 XFS_STATS_ADD(xs_allocb, args->len);
2bd0ea18
NS
527 }
528 return 0;
529}
530
531/*
532 * Allocate a variable extent at exactly agno/bno.
533 * Extent's length (returned in *len) will be between minlen and maxlen,
534 * and of the form k * prod + mod unless there's nothing that large.
535 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
536 */
537STATIC int /* error */
538xfs_alloc_ag_vextent_exact(
dfc130f3 539 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18 540{
dfc130f3
RC
541 xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
542 xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
2bd0ea18
NS
543 xfs_agblock_t end; /* end of allocated extent */
544 int error;
545 xfs_agblock_t fbno; /* start block of found extent */
546 xfs_agblock_t fend; /* end block of found extent */
547 xfs_extlen_t flen; /* length of found extent */
2bd0ea18 548 int i; /* success/failure of operation */
dfc130f3
RC
549 xfs_agblock_t maxend; /* end of maximal extent */
550 xfs_agblock_t minend; /* end of minimal extent */
2bd0ea18
NS
551 xfs_extlen_t rlen; /* length of returned extent */
552
553 ASSERT(args->alignment == 1);
554 /*
555 * Allocate/initialize a cursor for the by-number freespace btree.
556 */
b194c7d8
BN
557 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
558 args->agno, XFS_BTNUM_BNO);
2bd0ea18
NS
559 /*
560 * Lookup bno and minlen in the btree (minlen is irrelevant, really).
561 * Look for the closest free block <= bno, it must contain bno
562 * if any free block does.
563 */
0e266570 564 if ((error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i)))
2bd0ea18
NS
565 goto error0;
566 if (!i) {
567 /*
568 * Didn't find it, return null.
569 */
570 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
571 args->agbno = NULLAGBLOCK;
572 return 0;
573 }
574 /*
575 * Grab the freespace record.
576 */
0e266570 577 if ((error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i)))
2bd0ea18
NS
578 goto error0;
579 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
580 ASSERT(fbno <= args->agbno);
581 minend = args->agbno + args->minlen;
582 maxend = args->agbno + args->maxlen;
583 fend = fbno + flen;
5000d01d 584 /*
2bd0ea18
NS
585 * Give up if the freespace isn't long enough for the minimum request.
586 */
587 if (fend < minend) {
588 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
589 args->agbno = NULLAGBLOCK;
590 return 0;
591 }
592 /*
593 * End of extent will be smaller of the freespace end and the
594 * maximal requested end.
595 */
596 end = XFS_AGBLOCK_MIN(fend, maxend);
597 /*
598 * Fix the length according to mod and prod if given.
599 */
600 args->len = end - args->agbno;
601 xfs_alloc_fix_len(args);
602 if (!xfs_alloc_fix_minleft(args)) {
603 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
604 return 0;
605 }
606 rlen = args->len;
607 ASSERT(args->agbno + rlen <= fend);
608 end = args->agbno + rlen;
609 /*
610 * We are allocating agbno for rlen [agbno .. end]
611 * Allocate/initialize a cursor for the by-size btree.
612 */
b194c7d8
BN
613 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
614 args->agno, XFS_BTNUM_CNT);
2bd0ea18 615 ASSERT(args->agbno + args->len <=
6e3140c7 616 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
5000d01d 617 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
0e266570 618 args->agbno, args->len, XFSA_FIXUP_BNO_OK))) {
2bd0ea18
NS
619 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
620 goto error0;
621 }
622 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
623 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
624 TRACE_ALLOC("normal", args);
625 args->wasfromfl = 0;
626 return 0;
627
628error0:
629 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
630 TRACE_ALLOC("error", args);
631 return error;
632}
633
634/*
635 * Allocate a variable extent near bno in the allocation group agno.
636 * Extent's length (returned in len) will be between minlen and maxlen,
637 * and of the form k * prod + mod unless there's nothing that large.
638 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
639 */
640STATIC int /* error */
641xfs_alloc_ag_vextent_near(
dfc130f3 642 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18 643{
dfc130f3
RC
644 xfs_btree_cur_t *bno_cur_gt; /* cursor for bno btree, right side */
645 xfs_btree_cur_t *bno_cur_lt; /* cursor for bno btree, left side */
646 xfs_btree_cur_t *cnt_cur; /* cursor for count btree */
2bd0ea18
NS
647 xfs_agblock_t gtbno; /* start bno of right side entry */
648 xfs_agblock_t gtbnoa; /* aligned ... */
649 xfs_extlen_t gtdiff; /* difference to right side entry */
650 xfs_extlen_t gtlen; /* length of right side entry */
651 xfs_extlen_t gtlena; /* aligned ... */
652 xfs_agblock_t gtnew; /* useful start bno of right side */
653 int error; /* error code */
654 int i; /* result code, temporary */
655 int j; /* result code, temporary */
656 xfs_agblock_t ltbno; /* start bno of left side entry */
657 xfs_agblock_t ltbnoa; /* aligned ... */
658 xfs_extlen_t ltdiff; /* difference to left side entry */
659 /*REFERENCED*/
660 xfs_agblock_t ltend; /* end bno of left side entry */
661 xfs_extlen_t ltlen; /* length of left side entry */
662 xfs_extlen_t ltlena; /* aligned ... */
663 xfs_agblock_t ltnew; /* useful start bno of left side */
664 xfs_extlen_t rlen; /* length of returned extent */
665#if defined(DEBUG) && defined(__KERNEL__)
666 /*
667 * Randomly don't execute the first algorithm.
668 */
2bd0ea18 669 int dofirst; /* set to do first algorithm */
2bd0ea18 670
5e656dbb 671 dofirst = random32() & 1;
2bd0ea18
NS
672#endif
673 /*
674 * Get a cursor for the by-size btree.
675 */
b194c7d8
BN
676 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
677 args->agno, XFS_BTNUM_CNT);
2bd0ea18
NS
678 ltlen = 0;
679 bno_cur_lt = bno_cur_gt = NULL;
680 /*
681 * See if there are any free extents as big as maxlen.
682 */
0e266570 683 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i)))
2bd0ea18
NS
684 goto error0;
685 /*
686 * If none, then pick up the last entry in the tree unless the
687 * tree is empty.
5000d01d 688 */
2bd0ea18 689 if (!i) {
0e266570
NS
690 if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &ltbno,
691 &ltlen, &i)))
2bd0ea18
NS
692 goto error0;
693 if (i == 0 || ltlen == 0) {
694 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
695 return 0;
696 }
697 ASSERT(i == 1);
698 }
699 args->wasfromfl = 0;
5000d01d 700 /*
2bd0ea18
NS
701 * First algorithm.
702 * If the requested extent is large wrt the freespaces available
703 * in this a.g., then the cursor will be pointing to a btree entry
704 * near the right edge of the tree. If it's in the last btree leaf
705 * block, then we just examine all the entries in that block
706 * that are big enough, and pick the best one.
707 * This is written as a while loop so we can break out of it,
708 * but we never loop back to the top.
709 */
710 while (xfs_btree_islastblock(cnt_cur, 0)) {
711 xfs_extlen_t bdiff;
0e266570
NS
712 int besti=0;
713 xfs_extlen_t blen=0;
714 xfs_agblock_t bnew=0;
2bd0ea18
NS
715
716#if defined(DEBUG) && defined(__KERNEL__)
717 if (!dofirst)
718 break;
719#endif
720 /*
721 * Start from the entry that lookup found, sequence through
722 * all larger free blocks. If we're actually pointing at a
723 * record smaller than maxlen, go to the start of this block,
724 * and skip all those smaller than minlen.
725 */
726 if (ltlen || args->alignment > 1) {
727 cnt_cur->bc_ptrs[0] = 1;
728 do {
0e266570
NS
729 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno,
730 &ltlen, &i)))
2bd0ea18
NS
731 goto error0;
732 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
733 if (ltlen >= args->minlen)
734 break;
b194c7d8 735 if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
2bd0ea18
NS
736 goto error0;
737 } while (i);
738 ASSERT(ltlen >= args->minlen);
739 if (!i)
740 break;
741 }
742 i = cnt_cur->bc_ptrs[0];
743 for (j = 1, blen = 0, bdiff = 0;
744 !error && j && (blen < args->maxlen || bdiff > 0);
b194c7d8 745 error = xfs_btree_increment(cnt_cur, 0, &j)) {
2bd0ea18
NS
746 /*
747 * For each entry, decide if it's better than
748 * the previous best entry.
749 */
0e266570 750 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
2bd0ea18
NS
751 goto error0;
752 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
5e656dbb
BN
753 xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment,
754 args->minlen, &ltbnoa, &ltlena);
755 if (ltlena < args->minlen)
2bd0ea18
NS
756 continue;
757 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
758 xfs_alloc_fix_len(args);
759 ASSERT(args->len >= args->minlen);
760 if (args->len < blen)
761 continue;
762 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
763 args->alignment, ltbno, ltlen, &ltnew);
764 if (ltnew != NULLAGBLOCK &&
765 (args->len > blen || ltdiff < bdiff)) {
766 bdiff = ltdiff;
767 bnew = ltnew;
768 blen = args->len;
769 besti = cnt_cur->bc_ptrs[0];
770 }
771 }
772 /*
773 * It didn't work. We COULD be in a case where
774 * there's a good record somewhere, so try again.
775 */
776 if (blen == 0)
777 break;
778 /*
779 * Point at the best entry, and retrieve it again.
780 */
781 cnt_cur->bc_ptrs[0] = besti;
0e266570 782 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
2bd0ea18
NS
783 goto error0;
784 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
785 ltend = ltbno + ltlen;
6e3140c7 786 ASSERT(ltend <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
2bd0ea18
NS
787 args->len = blen;
788 if (!xfs_alloc_fix_minleft(args)) {
789 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
790 TRACE_ALLOC("nominleft", args);
791 return 0;
792 }
793 blen = args->len;
794 /*
795 * We are allocating starting at bnew for blen blocks.
796 */
797 args->agbno = bnew;
798 ASSERT(bnew >= ltbno);
799 ASSERT(bnew + blen <= ltend);
800 /*
801 * Set up a cursor for the by-bno tree.
802 */
b194c7d8
BN
803 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp,
804 args->agbp, args->agno, XFS_BTNUM_BNO);
2bd0ea18
NS
805 /*
806 * Fix up the btree entries.
807 */
0e266570
NS
808 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno,
809 ltlen, bnew, blen, XFSA_FIXUP_CNT_OK)))
2bd0ea18
NS
810 goto error0;
811 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
812 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
813 TRACE_ALLOC("first", args);
814 return 0;
815 }
816 /*
817 * Second algorithm.
818 * Search in the by-bno tree to the left and to the right
819 * simultaneously, until in each case we find a space big enough,
820 * or run into the edge of the tree. When we run into the edge,
821 * we deallocate that cursor.
822 * If both searches succeed, we compare the two spaces and pick
823 * the better one.
824 * With alignment, it's possible for both to fail; the upper
825 * level algorithm that picks allocation groups for allocations
826 * is not supposed to do this.
827 */
828 /*
829 * Allocate and initialize the cursor for the leftward search.
830 */
b194c7d8
BN
831 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
832 args->agno, XFS_BTNUM_BNO);
2bd0ea18
NS
833 /*
834 * Lookup <= bno to find the leftward search's starting point.
835 */
0e266570 836 if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i)))
2bd0ea18
NS
837 goto error0;
838 if (!i) {
839 /*
840 * Didn't find anything; use this cursor for the rightward
841 * search.
842 */
843 bno_cur_gt = bno_cur_lt;
062998e3 844 bno_cur_lt = NULL;
2bd0ea18
NS
845 }
846 /*
847 * Found something. Duplicate the cursor for the rightward search.
848 */
0e266570 849 else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt)))
2bd0ea18
NS
850 goto error0;
851 /*
852 * Increment the cursor, so we will point at the entry just right
853 * of the leftward entry if any, or to the leftmost entry.
854 */
b194c7d8 855 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
2bd0ea18
NS
856 goto error0;
857 if (!i) {
858 /*
859 * It failed, there are no rightward entries.
860 */
861 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR);
862 bno_cur_gt = NULL;
863 }
864 /*
865 * Loop going left with the leftward cursor, right with the
866 * rightward cursor, until either both directions give up or
867 * we find an entry at least as big as minlen.
868 */
869 do {
870 if (bno_cur_lt) {
0e266570 871 if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
2bd0ea18
NS
872 goto error0;
873 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
5e656dbb
BN
874 xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment,
875 args->minlen, &ltbnoa, &ltlena);
876 if (ltlena >= args->minlen)
2bd0ea18 877 break;
b194c7d8 878 if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
2bd0ea18
NS
879 goto error0;
880 if (!i) {
881 xfs_btree_del_cursor(bno_cur_lt,
882 XFS_BTREE_NOERROR);
883 bno_cur_lt = NULL;
884 }
885 }
886 if (bno_cur_gt) {
0e266570 887 if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
2bd0ea18
NS
888 goto error0;
889 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
5e656dbb
BN
890 xfs_alloc_compute_aligned(gtbno, gtlen, args->alignment,
891 args->minlen, &gtbnoa, &gtlena);
892 if (gtlena >= args->minlen)
2bd0ea18 893 break;
b194c7d8 894 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
2bd0ea18
NS
895 goto error0;
896 if (!i) {
897 xfs_btree_del_cursor(bno_cur_gt,
898 XFS_BTREE_NOERROR);
899 bno_cur_gt = NULL;
900 }
901 }
902 } while (bno_cur_lt || bno_cur_gt);
903 /*
904 * Got both cursors still active, need to find better entry.
905 */
906 if (bno_cur_lt && bno_cur_gt) {
907 /*
908 * Left side is long enough, look for a right side entry.
909 */
910 if (ltlena >= args->minlen) {
911 /*
912 * Fix up the length.
913 */
914 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
915 xfs_alloc_fix_len(args);
916 rlen = args->len;
917 ltdiff = xfs_alloc_compute_diff(args->agbno, rlen,
918 args->alignment, ltbno, ltlen, &ltnew);
919 /*
920 * Not perfect.
921 */
922 if (ltdiff) {
923 /*
924 * Look until we find a better one, run out of
925 * space, or run off the end.
926 */
927 while (bno_cur_lt && bno_cur_gt) {
0e266570 928 if ((error = xfs_alloc_get_rec(
2bd0ea18 929 bno_cur_gt, &gtbno,
0e266570 930 &gtlen, &i)))
2bd0ea18
NS
931 goto error0;
932 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
933 xfs_alloc_compute_aligned(gtbno, gtlen,
934 args->alignment, args->minlen,
935 &gtbnoa, &gtlena);
936 /*
937 * The left one is clearly better.
938 */
939 if (gtbnoa >= args->agbno + ltdiff) {
940 xfs_btree_del_cursor(
941 bno_cur_gt,
942 XFS_BTREE_NOERROR);
943 bno_cur_gt = NULL;
944 break;
945 }
946 /*
947 * If we reach a big enough entry,
948 * compare the two and pick the best.
949 */
950 if (gtlena >= args->minlen) {
951 args->len =
952 XFS_EXTLEN_MIN(gtlena,
953 args->maxlen);
954 xfs_alloc_fix_len(args);
955 rlen = args->len;
956 gtdiff = xfs_alloc_compute_diff(
957 args->agbno, rlen,
958 args->alignment,
959 gtbno, gtlen, &gtnew);
960 /*
961 * Right side is better.
962 */
963 if (gtdiff < ltdiff) {
964 xfs_btree_del_cursor(
965 bno_cur_lt,
966 XFS_BTREE_NOERROR);
967 bno_cur_lt = NULL;
968 }
969 /*
970 * Left side is better.
971 */
972 else {
973 xfs_btree_del_cursor(
974 bno_cur_gt,
975 XFS_BTREE_NOERROR);
976 bno_cur_gt = NULL;
977 }
978 break;
979 }
980 /*
981 * Fell off the right end.
982 */
b194c7d8 983 if ((error = xfs_btree_increment(
0e266570 984 bno_cur_gt, 0, &i)))
2bd0ea18
NS
985 goto error0;
986 if (!i) {
987 xfs_btree_del_cursor(
988 bno_cur_gt,
989 XFS_BTREE_NOERROR);
990 bno_cur_gt = NULL;
991 break;
992 }
993 }
994 }
995 /*
996 * The left side is perfect, trash the right side.
997 */
998 else {
999 xfs_btree_del_cursor(bno_cur_gt,
1000 XFS_BTREE_NOERROR);
1001 bno_cur_gt = NULL;
1002 }
1003 }
1004 /*
1005 * It's the right side that was found first, look left.
1006 */
1007 else {
1008 /*
1009 * Fix up the length.
1010 */
1011 args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
1012 xfs_alloc_fix_len(args);
1013 rlen = args->len;
1014 gtdiff = xfs_alloc_compute_diff(args->agbno, rlen,
1015 args->alignment, gtbno, gtlen, &gtnew);
1016 /*
1017 * Right side entry isn't perfect.
1018 */
1019 if (gtdiff) {
1020 /*
1021 * Look until we find a better one, run out of
1022 * space, or run off the end.
1023 */
1024 while (bno_cur_lt && bno_cur_gt) {
0e266570 1025 if ((error = xfs_alloc_get_rec(
2bd0ea18 1026 bno_cur_lt, &ltbno,
0e266570 1027 &ltlen, &i)))
2bd0ea18
NS
1028 goto error0;
1029 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1030 xfs_alloc_compute_aligned(ltbno, ltlen,
1031 args->alignment, args->minlen,
1032 &ltbnoa, &ltlena);
1033 /*
1034 * The right one is clearly better.
1035 */
1036 if (ltbnoa <= args->agbno - gtdiff) {
1037 xfs_btree_del_cursor(
1038 bno_cur_lt,
1039 XFS_BTREE_NOERROR);
1040 bno_cur_lt = NULL;
1041 break;
1042 }
1043 /*
1044 * If we reach a big enough entry,
1045 * compare the two and pick the best.
1046 */
1047 if (ltlena >= args->minlen) {
1048 args->len = XFS_EXTLEN_MIN(
1049 ltlena, args->maxlen);
1050 xfs_alloc_fix_len(args);
1051 rlen = args->len;
1052 ltdiff = xfs_alloc_compute_diff(
1053 args->agbno, rlen,
1054 args->alignment,
1055 ltbno, ltlen, &ltnew);
1056 /*
1057 * Left side is better.
1058 */
1059 if (ltdiff < gtdiff) {
1060 xfs_btree_del_cursor(
1061 bno_cur_gt,
1062 XFS_BTREE_NOERROR);
1063 bno_cur_gt = NULL;
1064 }
1065 /*
1066 * Right side is better.
1067 */
1068 else {
1069 xfs_btree_del_cursor(
1070 bno_cur_lt,
1071 XFS_BTREE_NOERROR);
1072 bno_cur_lt = NULL;
1073 }
1074 break;
1075 }
1076 /*
1077 * Fell off the left end.
1078 */
b194c7d8 1079 if ((error = xfs_btree_decrement(
0e266570 1080 bno_cur_lt, 0, &i)))
2bd0ea18
NS
1081 goto error0;
1082 if (!i) {
1083 xfs_btree_del_cursor(bno_cur_lt,
1084 XFS_BTREE_NOERROR);
1085 bno_cur_lt = NULL;
1086 break;
1087 }
1088 }
1089 }
1090 /*
1091 * The right side is perfect, trash the left side.
1092 */
1093 else {
1094 xfs_btree_del_cursor(bno_cur_lt,
1095 XFS_BTREE_NOERROR);
1096 bno_cur_lt = NULL;
1097 }
1098 }
1099 }
1100 /*
1101 * If we couldn't get anything, give up.
1102 */
1103 if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
1104 TRACE_ALLOC("neither", args);
1105 args->agbno = NULLAGBLOCK;
1106 return 0;
1107 }
1108 /*
1109 * At this point we have selected a freespace entry, either to the
1110 * left or to the right. If it's on the right, copy all the
1111 * useful variables to the "left" set so we only have one
1112 * copy of this code.
1113 */
1114 if (bno_cur_gt) {
1115 bno_cur_lt = bno_cur_gt;
1116 bno_cur_gt = NULL;
1117 ltbno = gtbno;
1118 ltbnoa = gtbnoa;
1119 ltlen = gtlen;
1120 ltlena = gtlena;
1121 j = 1;
1122 } else
1123 j = 0;
1124 /*
1125 * Fix up the length and compute the useful address.
1126 */
1127 ltend = ltbno + ltlen;
1128 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1129 xfs_alloc_fix_len(args);
1130 if (!xfs_alloc_fix_minleft(args)) {
1131 TRACE_ALLOC("nominleft", args);
1132 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1133 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1134 return 0;
1135 }
1136 rlen = args->len;
1137 (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno,
1138 ltlen, &ltnew);
1139 ASSERT(ltnew >= ltbno);
1140 ASSERT(ltnew + rlen <= ltend);
6e3140c7 1141 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
2bd0ea18 1142 args->agbno = ltnew;
0e266570
NS
1143 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1144 ltnew, rlen, XFSA_FIXUP_BNO_OK)))
2bd0ea18
NS
1145 goto error0;
1146 TRACE_ALLOC(j ? "gt" : "lt", args);
1147 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1148 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1149 return 0;
1150
1151 error0:
1152 TRACE_ALLOC("error", args);
1153 if (cnt_cur != NULL)
1154 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1155 if (bno_cur_lt != NULL)
1156 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR);
1157 if (bno_cur_gt != NULL)
1158 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR);
1159 return error;
1160}
1161
1162/*
1163 * Allocate a variable extent anywhere in the allocation group agno.
1164 * Extent's length (returned in len) will be between minlen and maxlen,
1165 * and of the form k * prod + mod unless there's nothing that large.
1166 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1167 */
1168STATIC int /* error */
1169xfs_alloc_ag_vextent_size(
dfc130f3 1170 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18 1171{
dfc130f3
RC
1172 xfs_btree_cur_t *bno_cur; /* cursor for bno btree */
1173 xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */
2bd0ea18
NS
1174 int error; /* error result */
1175 xfs_agblock_t fbno; /* start of found freespace */
1176 xfs_extlen_t flen; /* length of found freespace */
2bd0ea18
NS
1177 int i; /* temp status variable */
1178 xfs_agblock_t rbno; /* returned block number */
1179 xfs_extlen_t rlen; /* length of returned extent */
1180
1181 /*
1182 * Allocate and initialize a cursor for the by-size btree.
1183 */
b194c7d8
BN
1184 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1185 args->agno, XFS_BTNUM_CNT);
2bd0ea18
NS
1186 bno_cur = NULL;
1187 /*
1188 * Look for an entry >= maxlen+alignment-1 blocks.
1189 */
0e266570
NS
1190 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1191 args->maxlen + args->alignment - 1, &i)))
2bd0ea18
NS
1192 goto error0;
1193 /*
1194 * If none, then pick up the last entry in the tree unless the
1195 * tree is empty.
5000d01d 1196 */
2bd0ea18 1197 if (!i) {
0e266570
NS
1198 if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &fbno,
1199 &flen, &i)))
2bd0ea18
NS
1200 goto error0;
1201 if (i == 0 || flen == 0) {
1202 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1203 TRACE_ALLOC("noentry", args);
1204 return 0;
1205 }
1206 ASSERT(i == 1);
1207 }
1208 /*
1209 * There's a freespace as big as maxlen+alignment-1, get it.
1210 */
1211 else {
0e266570 1212 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i)))
2bd0ea18
NS
1213 goto error0;
1214 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1215 }
1216 /*
1217 * In the first case above, we got the last entry in the
1218 * by-size btree. Now we check to see if the space hits maxlen
1219 * once aligned; if not, we search left for something better.
1220 * This can't happen in the second case above.
1221 */
1222 xfs_alloc_compute_aligned(fbno, flen, args->alignment, args->minlen,
1223 &rbno, &rlen);
1224 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
5000d01d 1225 XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
2bd0ea18
NS
1226 (rlen <= flen && rbno + rlen <= fbno + flen), error0);
1227 if (rlen < args->maxlen) {
1228 xfs_agblock_t bestfbno;
1229 xfs_extlen_t bestflen;
1230 xfs_agblock_t bestrbno;
1231 xfs_extlen_t bestrlen;
1232
1233 bestrlen = rlen;
1234 bestrbno = rbno;
1235 bestflen = flen;
1236 bestfbno = fbno;
1237 for (;;) {
b194c7d8 1238 if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
2bd0ea18
NS
1239 goto error0;
1240 if (i == 0)
1241 break;
0e266570
NS
1242 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1243 &i)))
2bd0ea18
NS
1244 goto error0;
1245 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1246 if (flen < bestrlen)
1247 break;
1248 xfs_alloc_compute_aligned(fbno, flen, args->alignment,
1249 args->minlen, &rbno, &rlen);
1250 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1251 XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
1252 (rlen <= flen && rbno + rlen <= fbno + flen),
1253 error0);
1254 if (rlen > bestrlen) {
1255 bestrlen = rlen;
1256 bestrbno = rbno;
1257 bestflen = flen;
1258 bestfbno = fbno;
1259 if (rlen == args->maxlen)
1260 break;
1261 }
5000d01d 1262 }
0e266570
NS
1263 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1264 &i)))
2bd0ea18
NS
1265 goto error0;
1266 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1267 rlen = bestrlen;
1268 rbno = bestrbno;
1269 flen = bestflen;
1270 fbno = bestfbno;
1271 }
1272 args->wasfromfl = 0;
1273 /*
1274 * Fix up the length.
1275 */
1276 args->len = rlen;
1277 xfs_alloc_fix_len(args);
1278 if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) {
1279 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1280 TRACE_ALLOC("nominleft", args);
1281 args->agbno = NULLAGBLOCK;
1282 return 0;
1283 }
1284 rlen = args->len;
1285 XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0);
1286 /*
1287 * Allocate and initialize a cursor for the by-block tree.
1288 */
b194c7d8
BN
1289 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1290 args->agno, XFS_BTNUM_BNO);
0e266570
NS
1291 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1292 rbno, rlen, XFSA_FIXUP_CNT_OK)))
2bd0ea18
NS
1293 goto error0;
1294 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1295 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1296 cnt_cur = bno_cur = NULL;
1297 args->len = rlen;
1298 args->agbno = rbno;
1299 XFS_WANT_CORRUPTED_GOTO(
1300 args->agbno + args->len <=
6e3140c7 1301 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
2bd0ea18
NS
1302 error0);
1303 TRACE_ALLOC("normal", args);
1304 return 0;
1305
1306error0:
1307 TRACE_ALLOC("error", args);
1308 if (cnt_cur)
1309 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1310 if (bno_cur)
1311 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1312 return error;
1313}
1314
1315/*
1316 * Deal with the case where only small freespaces remain.
1317 * Either return the contents of the last freespace record,
1318 * or allocate space from the freelist if there is nothing in the tree.
1319 */
1320STATIC int /* error */
1321xfs_alloc_ag_vextent_small(
dfc130f3
RC
1322 xfs_alloc_arg_t *args, /* allocation argument structure */
1323 xfs_btree_cur_t *ccur, /* by-size cursor */
1324 xfs_agblock_t *fbnop, /* result block number */
1325 xfs_extlen_t *flenp, /* result length */
2bd0ea18
NS
1326 int *stat) /* status: 0-freelist, 1-normal/none */
1327{
1328 int error;
1329 xfs_agblock_t fbno;
1330 xfs_extlen_t flen;
2bd0ea18
NS
1331 int i;
1332
b194c7d8 1333 if ((error = xfs_btree_decrement(ccur, 0, &i)))
2bd0ea18
NS
1334 goto error0;
1335 if (i) {
0e266570 1336 if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
2bd0ea18
NS
1337 goto error0;
1338 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1339 }
1340 /*
1341 * Nothing in the btree, try the freelist. Make sure
1342 * to respect minleft even when pulling from the
1343 * freelist.
1344 */
1345 else if (args->minlen == 1 && args->alignment == 1 && !args->isfl &&
6e3140c7
NS
1346 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
1347 > args->minleft)) {
cdded3d8
DC
1348 error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
1349 if (error)
2bd0ea18
NS
1350 goto error0;
1351 if (fbno != NULLAGBLOCK) {
1352 if (args->userdata) {
1353 xfs_buf_t *bp;
1354
1355 bp = xfs_btree_get_bufs(args->mp, args->tp,
1356 args->agno, fbno, 0);
1357 xfs_trans_binval(args->tp, bp);
2bd0ea18
NS
1358 }
1359 args->len = 1;
1360 args->agbno = fbno;
1361 XFS_WANT_CORRUPTED_GOTO(
1362 args->agbno + args->len <=
6e3140c7 1363 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
2bd0ea18
NS
1364 error0);
1365 args->wasfromfl = 1;
1366 TRACE_ALLOC("freelist", args);
1367 *stat = 0;
1368 return 0;
1369 }
1370 /*
1371 * Nothing in the freelist.
1372 */
1373 else
1374 flen = 0;
1375 }
1376 /*
1377 * Can't allocate from the freelist for some reason.
1378 */
5e656dbb
BN
1379 else {
1380 fbno = NULLAGBLOCK;
2bd0ea18 1381 flen = 0;
5e656dbb 1382 }
2bd0ea18
NS
1383 /*
1384 * Can't do the allocation, give up.
1385 */
1386 if (flen < args->minlen) {
1387 args->agbno = NULLAGBLOCK;
1388 TRACE_ALLOC("notenough", args);
1389 flen = 0;
1390 }
1391 *fbnop = fbno;
1392 *flenp = flen;
1393 *stat = 1;
1394 TRACE_ALLOC("normal", args);
1395 return 0;
1396
1397error0:
1398 TRACE_ALLOC("error", args);
1399 return error;
1400}
1401
1402/*
1403 * Free the extent starting at agno/bno for length.
1404 */
1405STATIC int /* error */
1406xfs_free_ag_extent(
1407 xfs_trans_t *tp, /* transaction pointer */
dfc130f3 1408 xfs_buf_t *agbp, /* buffer for a.g. freelist header */
2bd0ea18
NS
1409 xfs_agnumber_t agno, /* allocation group number */
1410 xfs_agblock_t bno, /* starting block number */
1411 xfs_extlen_t len, /* length of extent */
1412 int isfl) /* set if is freelist blocks - no sb acctg */
1413{
dfc130f3
RC
1414 xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */
1415 xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */
2bd0ea18 1416 int error; /* error return value */
2bd0ea18
NS
1417 xfs_agblock_t gtbno; /* start of right neighbor block */
1418 xfs_extlen_t gtlen; /* length of right neighbor block */
1419 int haveleft; /* have a left neighbor block */
1420 int haveright; /* have a right neighbor block */
1421 int i; /* temp, result code */
1422 xfs_agblock_t ltbno; /* start of left neighbor block */
1423 xfs_extlen_t ltlen; /* length of left neighbor block */
1424 xfs_mount_t *mp; /* mount point struct for filesystem */
1425 xfs_agblock_t nbno; /* new starting block of freespace */
1426 xfs_extlen_t nlen; /* new length of freespace */
1427
1428 mp = tp->t_mountp;
5000d01d 1429 /*
2bd0ea18
NS
1430 * Allocate and initialize a cursor for the by-block btree.
1431 */
b194c7d8 1432 bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
2bd0ea18 1433 cnt_cur = NULL;
5000d01d 1434 /*
2bd0ea18
NS
1435 * Look for a neighboring block on the left (lower block numbers)
1436 * that is contiguous with this space.
1437 */
0e266570 1438 if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
2bd0ea18
NS
1439 goto error0;
1440 if (haveleft) {
1441 /*
1442 * There is a block to our left.
1443 */
0e266570 1444 if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
2bd0ea18
NS
1445 goto error0;
1446 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1447 /*
1448 * It's not contiguous, though.
1449 */
1450 if (ltbno + ltlen < bno)
1451 haveleft = 0;
1452 else {
1453 /*
1454 * If this failure happens the request to free this
1455 * space was invalid, it's (partly) already free.
1456 * Very bad.
1457 */
1458 XFS_WANT_CORRUPTED_GOTO(ltbno + ltlen <= bno, error0);
1459 }
1460 }
5000d01d 1461 /*
2bd0ea18
NS
1462 * Look for a neighboring block on the right (higher block numbers)
1463 * that is contiguous with this space.
1464 */
b194c7d8 1465 if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
2bd0ea18
NS
1466 goto error0;
1467 if (haveright) {
1468 /*
1469 * There is a block to our right.
1470 */
0e266570 1471 if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
2bd0ea18
NS
1472 goto error0;
1473 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1474 /*
1475 * It's not contiguous, though.
1476 */
1477 if (bno + len < gtbno)
1478 haveright = 0;
1479 else {
1480 /*
1481 * If this failure happens the request to free this
1482 * space was invalid, it's (partly) already free.
1483 * Very bad.
1484 */
1485 XFS_WANT_CORRUPTED_GOTO(gtbno >= bno + len, error0);
1486 }
1487 }
1488 /*
1489 * Now allocate and initialize a cursor for the by-size tree.
1490 */
b194c7d8 1491 cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
2bd0ea18
NS
1492 /*
1493 * Have both left and right contiguous neighbors.
1494 * Merge all three into a single free block.
1495 */
1496 if (haveleft && haveright) {
1497 /*
1498 * Delete the old by-size entry on the left.
1499 */
0e266570 1500 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
2bd0ea18
NS
1501 goto error0;
1502 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
b194c7d8 1503 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18
NS
1504 goto error0;
1505 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1506 /*
1507 * Delete the old by-size entry on the right.
1508 */
0e266570 1509 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2bd0ea18
NS
1510 goto error0;
1511 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
b194c7d8 1512 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18
NS
1513 goto error0;
1514 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1515 /*
1516 * Delete the old by-block entry for the right block.
1517 */
b194c7d8 1518 if ((error = xfs_btree_delete(bno_cur, &i)))
2bd0ea18
NS
1519 goto error0;
1520 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1521 /*
1522 * Move the by-block cursor back to the left neighbor.
1523 */
b194c7d8 1524 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
2bd0ea18
NS
1525 goto error0;
1526 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1527#ifdef DEBUG
1528 /*
1529 * Check that this is the right record: delete didn't
1530 * mangle the cursor.
1531 */
1532 {
1533 xfs_agblock_t xxbno;
1534 xfs_extlen_t xxlen;
1535
0e266570
NS
1536 if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
1537 &i)))
2bd0ea18
NS
1538 goto error0;
1539 XFS_WANT_CORRUPTED_GOTO(
1540 i == 1 && xxbno == ltbno && xxlen == ltlen,
1541 error0);
1542 }
1543#endif
1544 /*
1545 * Update remaining by-block entry to the new, joined block.
1546 */
1547 nbno = ltbno;
1548 nlen = len + ltlen + gtlen;
0e266570 1549 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2bd0ea18
NS
1550 goto error0;
1551 }
1552 /*
1553 * Have only a left contiguous neighbor.
1554 * Merge it together with the new freespace.
1555 */
1556 else if (haveleft) {
1557 /*
1558 * Delete the old by-size entry on the left.
1559 */
0e266570 1560 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
2bd0ea18
NS
1561 goto error0;
1562 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
b194c7d8 1563 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18
NS
1564 goto error0;
1565 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1566 /*
1567 * Back up the by-block cursor to the left neighbor, and
1568 * update its length.
1569 */
b194c7d8 1570 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
2bd0ea18
NS
1571 goto error0;
1572 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1573 nbno = ltbno;
1574 nlen = len + ltlen;
0e266570 1575 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2bd0ea18
NS
1576 goto error0;
1577 }
1578 /*
1579 * Have only a right contiguous neighbor.
1580 * Merge it together with the new freespace.
1581 */
1582 else if (haveright) {
1583 /*
1584 * Delete the old by-size entry on the right.
1585 */
0e266570 1586 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2bd0ea18
NS
1587 goto error0;
1588 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
b194c7d8 1589 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18
NS
1590 goto error0;
1591 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1592 /*
5000d01d 1593 * Update the starting block and length of the right
2bd0ea18
NS
1594 * neighbor in the by-block tree.
1595 */
1596 nbno = bno;
1597 nlen = len + gtlen;
0e266570 1598 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2bd0ea18
NS
1599 goto error0;
1600 }
1601 /*
1602 * No contiguous neighbors.
1603 * Insert the new freespace into the by-block tree.
1604 */
1605 else {
1606 nbno = bno;
1607 nlen = len;
b194c7d8 1608 if ((error = xfs_btree_insert(bno_cur, &i)))
2bd0ea18
NS
1609 goto error0;
1610 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1611 }
1612 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1613 bno_cur = NULL;
1614 /*
1615 * In all cases we need to insert the new freespace in the by-size tree.
1616 */
0e266570 1617 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
2bd0ea18
NS
1618 goto error0;
1619 XFS_WANT_CORRUPTED_GOTO(i == 0, error0);
b194c7d8 1620 if ((error = xfs_btree_insert(cnt_cur, &i)))
2bd0ea18
NS
1621 goto error0;
1622 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1623 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1624 cnt_cur = NULL;
1625 /*
1626 * Update the freespace totals in the ag and superblock.
1627 */
1628 {
1629 xfs_agf_t *agf;
1630 xfs_perag_t *pag; /* per allocation group data */
1631
1632 agf = XFS_BUF_TO_AGF(agbp);
1633 pag = &mp->m_perag[agno];
5e656dbb 1634 be32_add_cpu(&agf->agf_freeblks, len);
2bd0ea18
NS
1635 xfs_trans_agblocks_delta(tp, len);
1636 pag->pagf_freeblks += len;
1637 XFS_WANT_CORRUPTED_GOTO(
6e3140c7
NS
1638 be32_to_cpu(agf->agf_freeblks) <=
1639 be32_to_cpu(agf->agf_length),
2bd0ea18
NS
1640 error0);
1641 TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS);
1642 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
1643 if (!isfl)
1644 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
32a82561
NS
1645 XFS_STATS_INC(xs_freex);
1646 XFS_STATS_ADD(xs_freeb, len);
2bd0ea18
NS
1647 }
1648 TRACE_FREE(haveleft ?
1649 (haveright ? "both" : "left") :
1650 (haveright ? "right" : "none"),
1651 agno, bno, len, isfl);
3e535bba
NS
1652
1653 /*
1654 * Since blocks move to the free list without the coordination
1655 * used in xfs_bmap_finish, we can't allow block to be available
1656 * for reallocation and non-transaction writing (user data)
1657 * until we know that the transaction that moved it to the free
dfc130f3 1658 * list is permanently on disk. We track the blocks by declaring
5000d01d
SL
1659 * these blocks as "busy"; the busy list is maintained on a per-ag
1660 * basis and each transaction records which entries should be removed
3e535bba
NS
1661 * when the iclog commits to disk. If a busy block is allocated,
1662 * the iclog is pushed up to the LSN that freed the block.
1663 */
1664 xfs_alloc_mark_busy(tp, agno, bno, len);
2bd0ea18
NS
1665 return 0;
1666
1667 error0:
1668 TRACE_FREE("error", agno, bno, len, isfl);
1669 if (bno_cur)
1670 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1671 if (cnt_cur)
1672 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1673 return error;
1674}
1675
5000d01d 1676/*
2bd0ea18
NS
1677 * Visible (exported) allocation/free functions.
1678 * Some of these are used just by xfs_alloc_btree.c and this file.
1679 */
1680
1681/*
1682 * Compute and fill in value of m_ag_maxlevels.
1683 */
1684void
1685xfs_alloc_compute_maxlevels(
1686 xfs_mount_t *mp) /* file system mount structure */
1687{
1688 int level;
1689 uint maxblocks;
1690 uint maxleafents;
1691 int minleafrecs;
1692 int minnoderecs;
1693
1694 maxleafents = (mp->m_sb.sb_agblocks + 1) / 2;
1695 minleafrecs = mp->m_alloc_mnr[0];
1696 minnoderecs = mp->m_alloc_mnr[1];
1697 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
1698 for (level = 1; maxblocks > 1; level++)
1699 maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
1700 mp->m_ag_maxlevels = level;
1701}
1702
1703/*
1704 * Decide whether to use this allocation group for this allocation.
1705 * If so, fix up the btree freelist's size.
5e656dbb
BN
1706 *
1707 * Note: This is public so mkfs can call it
2bd0ea18
NS
1708 */
1709int /* error */
1710xfs_alloc_fix_freelist(
dfc130f3 1711 xfs_alloc_arg_t *args, /* allocation argument structure */
2bd0ea18
NS
1712 int flags) /* XFS_ALLOC_FLAG_... */
1713{
1714 xfs_buf_t *agbp; /* agf buffer pointer */
1715 xfs_agf_t *agf; /* a.g. freespace structure pointer */
1716 xfs_buf_t *agflbp;/* agfl buffer pointer */
1717 xfs_agblock_t bno; /* freelist block */
1718 xfs_extlen_t delta; /* new blocks needed in freelist */
1719 int error; /* error result code */
1720 xfs_extlen_t longest;/* longest extent in allocation group */
1721 xfs_mount_t *mp; /* file system mount point structure */
1722 xfs_extlen_t need; /* total blocks needed in freelist */
1723 xfs_perag_t *pag; /* per-ag information structure */
dfc130f3 1724 xfs_alloc_arg_t targs; /* local allocation arguments */
2bd0ea18
NS
1725 xfs_trans_t *tp; /* transaction pointer */
1726
1727 mp = args->mp;
1728
1729 pag = args->pag;
1730 tp = args->tp;
1731 if (!pag->pagf_init) {
0e266570
NS
1732 if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
1733 &agbp)))
2bd0ea18
NS
1734 return error;
1735 if (!pag->pagf_init) {
5e656dbb
BN
1736 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
1737 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
2bd0ea18
NS
1738 args->agbp = NULL;
1739 return 0;
1740 }
1741 } else
1742 agbp = NULL;
34317449 1743
5e656dbb
BN
1744 /*
1745 * If this is a metadata preferred pag and we are user data
34317449
NS
1746 * then try somewhere else if we are not being asked to
1747 * try harder at this point
1748 */
5e656dbb
BN
1749 if (pag->pagf_metadata && args->userdata &&
1750 (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
1751 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
34317449
NS
1752 args->agbp = NULL;
1753 return 0;
1754 }
1755
5e656dbb
BN
1756 if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
1757 need = XFS_MIN_FREELIST_PAG(pag, mp);
1758 delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0;
1759 /*
1760 * If it looks like there isn't a long enough extent, or enough
1761 * total blocks, reject it.
1762 */
1763 longest = (pag->pagf_longest > delta) ?
1764 (pag->pagf_longest - delta) :
1765 (pag->pagf_flcount > 0 || pag->pagf_longest > 0);
1766 if ((args->minlen + args->alignment + args->minalignslop - 1) >
1767 longest ||
1768 ((int)(pag->pagf_freeblks + pag->pagf_flcount -
1769 need - args->total) < (int)args->minleft)) {
1770 if (agbp)
1771 xfs_trans_brelse(tp, agbp);
1772 args->agbp = NULL;
1773 return 0;
1774 }
2bd0ea18 1775 }
5e656dbb 1776
2bd0ea18
NS
1777 /*
1778 * Get the a.g. freespace buffer.
1779 * Can fail if we're not blocking on locks, and it's held.
1780 */
1781 if (agbp == NULL) {
0e266570
NS
1782 if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
1783 &agbp)))
2bd0ea18
NS
1784 return error;
1785 if (agbp == NULL) {
5e656dbb
BN
1786 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
1787 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
2bd0ea18
NS
1788 args->agbp = NULL;
1789 return 0;
1790 }
1791 }
1792 /*
1793 * Figure out how many blocks we should have in the freelist.
1794 */
1795 agf = XFS_BUF_TO_AGF(agbp);
1796 need = XFS_MIN_FREELIST(agf, mp);
2bd0ea18
NS
1797 /*
1798 * If there isn't enough total or single-extent, reject it.
1799 */
5e656dbb
BN
1800 if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
1801 delta = need > be32_to_cpu(agf->agf_flcount) ?
1802 (need - be32_to_cpu(agf->agf_flcount)) : 0;
1803 longest = be32_to_cpu(agf->agf_longest);
1804 longest = (longest > delta) ? (longest - delta) :
1805 (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0);
1806 if ((args->minlen + args->alignment + args->minalignslop - 1) >
1807 longest ||
1808 ((int)(be32_to_cpu(agf->agf_freeblks) +
1809 be32_to_cpu(agf->agf_flcount) - need - args->total) <
1810 (int)args->minleft)) {
1811 xfs_trans_brelse(tp, agbp);
1812 args->agbp = NULL;
1813 return 0;
1814 }
2bd0ea18
NS
1815 }
1816 /*
1817 * Make the freelist shorter if it's too long.
1818 */
6e3140c7 1819 while (be32_to_cpu(agf->agf_flcount) > need) {
2bd0ea18
NS
1820 xfs_buf_t *bp;
1821
5e656dbb
BN
1822 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
1823 if (error)
2bd0ea18 1824 return error;
0e266570 1825 if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1)))
2bd0ea18
NS
1826 return error;
1827 bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
1828 xfs_trans_binval(tp, bp);
2bd0ea18
NS
1829 }
1830 /*
1831 * Initialize the args structure.
1832 */
1833 targs.tp = tp;
1834 targs.mp = mp;
1835 targs.agbp = agbp;
1836 targs.agno = args->agno;
1837 targs.mod = targs.minleft = targs.wasdel = targs.userdata =
1838 targs.minalignslop = 0;
1839 targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
1840 targs.type = XFS_ALLOCTYPE_THIS_AG;
1841 targs.pag = pag;
0e266570 1842 if ((error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp)))
2bd0ea18
NS
1843 return error;
1844 /*
1845 * Make the freelist longer if it's too short.
1846 */
6e3140c7 1847 while (be32_to_cpu(agf->agf_flcount) < need) {
2bd0ea18 1848 targs.agbno = 0;
6e3140c7 1849 targs.maxlen = need - be32_to_cpu(agf->agf_flcount);
2bd0ea18
NS
1850 /*
1851 * Allocate as many blocks as possible at once.
1852 */
cb4deb22
NS
1853 if ((error = xfs_alloc_ag_vextent(&targs))) {
1854 xfs_trans_brelse(tp, agflbp);
2bd0ea18 1855 return error;
cb4deb22 1856 }
2bd0ea18 1857 /*
dfc130f3
RC
1858 * Stop if we run out. Won't happen if callers are obeying
1859 * the restrictions correctly. Can happen for free calls
2bd0ea18
NS
1860 * on a completely full ag.
1861 */
5e656dbb
BN
1862 if (targs.agbno == NULLAGBLOCK) {
1863 if (flags & XFS_ALLOC_FLAG_FREEING)
1864 break;
1865 xfs_trans_brelse(tp, agflbp);
1866 args->agbp = NULL;
1867 return 0;
1868 }
2bd0ea18
NS
1869 /*
1870 * Put each allocated block on the list.
1871 */
1872 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
5e656dbb
BN
1873 error = xfs_alloc_put_freelist(tp, agbp,
1874 agflbp, bno, 0);
1875 if (error)
2bd0ea18
NS
1876 return error;
1877 }
1878 }
cb4deb22 1879 xfs_trans_brelse(tp, agflbp);
2bd0ea18
NS
1880 args->agbp = agbp;
1881 return 0;
1882}
1883
1884/*
1885 * Get a block from the freelist.
1886 * Returns with the buffer for the block gotten.
1887 */
1888int /* error */
1889xfs_alloc_get_freelist(
1890 xfs_trans_t *tp, /* transaction pointer */
1891 xfs_buf_t *agbp, /* buffer containing the agf structure */
cdded3d8
DC
1892 xfs_agblock_t *bnop, /* block address retrieved from freelist */
1893 int btreeblk) /* destination is a AGF btree */
2bd0ea18
NS
1894{
1895 xfs_agf_t *agf; /* a.g. freespace structure */
1896 xfs_agfl_t *agfl; /* a.g. freelist structure */
1897 xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */
1898 xfs_agblock_t bno; /* block number returned */
1899 int error;
cdded3d8 1900 int logflags;
2bd0ea18
NS
1901 xfs_mount_t *mp; /* mount structure */
1902 xfs_perag_t *pag; /* per allocation group data */
1903
1904 agf = XFS_BUF_TO_AGF(agbp);
1905 /*
1906 * Freelist is empty, give up.
1907 */
46eca962 1908 if (!agf->agf_flcount) {
2bd0ea18
NS
1909 *bnop = NULLAGBLOCK;
1910 return 0;
1911 }
1912 /*
1913 * Read the array of free blocks.
1914 */
1915 mp = tp->t_mountp;
0e266570 1916 if ((error = xfs_alloc_read_agfl(mp, tp,
6e3140c7 1917 be32_to_cpu(agf->agf_seqno), &agflbp)))
2bd0ea18
NS
1918 return error;
1919 agfl = XFS_BUF_TO_AGFL(agflbp);
1920 /*
1921 * Get the block number and update the data structures.
1922 */
5e656dbb
BN
1923 bno = be32_to_cpu(agfl->agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
1924 be32_add_cpu(&agf->agf_flfirst, 1);
2bd0ea18 1925 xfs_trans_brelse(tp, agflbp);
6e3140c7 1926 if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
46eca962 1927 agf->agf_flfirst = 0;
6e3140c7 1928 pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)];
5e656dbb 1929 be32_add_cpu(&agf->agf_flcount, -1);
2bd0ea18
NS
1930 xfs_trans_agflist_delta(tp, -1);
1931 pag->pagf_flcount--;
cdded3d8
DC
1932
1933 logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
1934 if (btreeblk) {
5e656dbb 1935 be32_add_cpu(&agf->agf_btreeblks, 1);
cdded3d8
DC
1936 pag->pagf_btreeblks++;
1937 logflags |= XFS_AGF_BTREEBLKS;
1938 }
1939
1940 TRACE_MODAGF(NULL, agf, logflags);
1941 xfs_alloc_log_agf(tp, agbp, logflags);
2bd0ea18 1942 *bnop = bno;
3e535bba
NS
1943
1944 /*
1945 * As blocks are freed, they are added to the per-ag busy list
1946 * and remain there until the freeing transaction is committed to
1947 * disk. Now that we have allocated blocks, this list must be
dfc130f3 1948 * searched to see if a block is being reused. If one is, then
3e535bba
NS
1949 * the freeing transaction must be pushed to disk NOW by forcing
1950 * to disk all iclogs up that transaction's LSN.
1951 */
6e3140c7 1952 xfs_alloc_search_busy(tp, be32_to_cpu(agf->agf_seqno), bno, 1);
2bd0ea18
NS
1953 return 0;
1954}
1955
1956/*
1957 * Log the given fields from the agf structure.
1958 */
1959void
1960xfs_alloc_log_agf(
1961 xfs_trans_t *tp, /* transaction pointer */
1962 xfs_buf_t *bp, /* buffer for a.g. freelist header */
dfc130f3 1963 int fields) /* mask of fields to be logged (XFS_AGF_...) */
2bd0ea18
NS
1964{
1965 int first; /* first byte offset */
1966 int last; /* last byte offset */
1967 static const short offsets[] = {
1968 offsetof(xfs_agf_t, agf_magicnum),
1969 offsetof(xfs_agf_t, agf_versionnum),
1970 offsetof(xfs_agf_t, agf_seqno),
1971 offsetof(xfs_agf_t, agf_length),
1972 offsetof(xfs_agf_t, agf_roots[0]),
1973 offsetof(xfs_agf_t, agf_levels[0]),
1974 offsetof(xfs_agf_t, agf_flfirst),
1975 offsetof(xfs_agf_t, agf_fllast),
1976 offsetof(xfs_agf_t, agf_flcount),
1977 offsetof(xfs_agf_t, agf_freeblks),
1978 offsetof(xfs_agf_t, agf_longest),
cdded3d8 1979 offsetof(xfs_agf_t, agf_btreeblks),
2bd0ea18
NS
1980 sizeof(xfs_agf_t)
1981 };
1982
1983 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
1984 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
1985}
1986
1987/*
1988 * Interface for inode allocation to force the pag data to be initialized.
1989 */
1990int /* error */
1991xfs_alloc_pagf_init(
1992 xfs_mount_t *mp, /* file system mount structure */
1993 xfs_trans_t *tp, /* transaction pointer */
1994 xfs_agnumber_t agno, /* allocation group number */
1995 int flags) /* XFS_ALLOC_FLAGS_... */
1996{
7a3bffe4 1997 xfs_buf_t *bp;
2bd0ea18
NS
1998 int error;
1999
0e266570 2000 if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)))
2bd0ea18
NS
2001 return error;
2002 if (bp)
2003 xfs_trans_brelse(tp, bp);
2004 return 0;
2005}
2006
2007/*
2008 * Put the block on the freelist for the allocation group.
2009 */
2010int /* error */
2011xfs_alloc_put_freelist(
2012 xfs_trans_t *tp, /* transaction pointer */
2013 xfs_buf_t *agbp, /* buffer for a.g. freelist header */
2014 xfs_buf_t *agflbp,/* buffer for a.g. free block array */
cdded3d8
DC
2015 xfs_agblock_t bno, /* block being freed */
2016 int btreeblk) /* block came from a AGF btree */
2bd0ea18
NS
2017{
2018 xfs_agf_t *agf; /* a.g. freespace structure */
2019 xfs_agfl_t *agfl; /* a.g. free block array */
5e656dbb 2020 __be32 *blockp;/* pointer to array entry */
2bd0ea18 2021 int error;
cdded3d8 2022 int logflags;
2bd0ea18
NS
2023 xfs_mount_t *mp; /* mount structure */
2024 xfs_perag_t *pag; /* per allocation group data */
2025
2026 agf = XFS_BUF_TO_AGF(agbp);
2027 mp = tp->t_mountp;
2028
2029 if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
6e3140c7 2030 be32_to_cpu(agf->agf_seqno), &agflbp)))
2bd0ea18
NS
2031 return error;
2032 agfl = XFS_BUF_TO_AGFL(agflbp);
5e656dbb 2033 be32_add_cpu(&agf->agf_fllast, 1);
6e3140c7 2034 if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
46eca962 2035 agf->agf_fllast = 0;
6e3140c7 2036 pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)];
5e656dbb 2037 be32_add_cpu(&agf->agf_flcount, 1);
2bd0ea18
NS
2038 xfs_trans_agflist_delta(tp, 1);
2039 pag->pagf_flcount++;
cdded3d8
DC
2040
2041 logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2042 if (btreeblk) {
5e656dbb 2043 be32_add_cpu(&agf->agf_btreeblks, -1);
cdded3d8
DC
2044 pag->pagf_btreeblks--;
2045 logflags |= XFS_AGF_BTREEBLKS;
2046 }
2047
5e656dbb
BN
2048 TRACE_MODAGF(NULL, agf, logflags);
2049 xfs_alloc_log_agf(tp, agbp, logflags);
2050
6e3140c7
NS
2051 ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
2052 blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)];
5e656dbb 2053 *blockp = cpu_to_be32(bno);
cdded3d8
DC
2054 TRACE_MODAGF(NULL, agf, logflags);
2055 xfs_alloc_log_agf(tp, agbp, logflags);
2bd0ea18
NS
2056 xfs_trans_log_buf(tp, agflbp,
2057 (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl),
2058 (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl +
2059 sizeof(xfs_agblock_t) - 1));
2060 return 0;
2061}
2062
2063/*
2064 * Read in the allocation group header (free/alloc section).
2065 */
2066int /* error */
2067xfs_alloc_read_agf(
2068 xfs_mount_t *mp, /* mount point structure */
2069 xfs_trans_t *tp, /* transaction pointer */
2070 xfs_agnumber_t agno, /* allocation group number */
2071 int flags, /* XFS_ALLOC_FLAG_... */
2072 xfs_buf_t **bpp) /* buffer for the ag freelist header */
2073{
2074 xfs_agf_t *agf; /* ag freelist header */
2075 int agf_ok; /* set if agf is consistent */
2076 xfs_buf_t *bp; /* return value */
2bd0ea18 2077 xfs_perag_t *pag; /* per allocation group data */
9440d84d 2078 int error;
2bd0ea18
NS
2079
2080 ASSERT(agno != NULLAGNUMBER);
9440d84d
NS
2081 error = xfs_trans_read_buf(
2082 mp, tp, mp->m_ddev_targp,
2083 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
2084 XFS_FSS_TO_BB(mp, 1),
2bd0ea18 2085 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XFS_BUF_TRYLOCK : 0U,
9440d84d
NS
2086 &bp);
2087 if (error)
2bd0ea18
NS
2088 return error;
2089 ASSERT(!bp || !XFS_BUF_GETERROR(bp));
2090 if (!bp) {
2091 *bpp = NULL;
2092 return 0;
2093 }
2094 /*
2095 * Validate the magic number of the agf block.
2096 */
2097 agf = XFS_BUF_TO_AGF(bp);
2098 agf_ok =
6e3140c7
NS
2099 be32_to_cpu(agf->agf_magicnum) == XFS_AGF_MAGIC &&
2100 XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
5e656dbb 2101 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
6e3140c7
NS
2102 be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
2103 be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
2104 be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp);
870d4cbc
BN
2105 if (xfs_sb_version_haslazysbcount(&mp->m_sb))
2106 agf_ok = agf_ok && be32_to_cpu(agf->agf_btreeblks) <=
2107 be32_to_cpu(agf->agf_length);
4ca431fc
NS
2108 if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
2109 XFS_RANDOM_ALLOC_READ_AGF))) {
2110 XFS_CORRUPTION_ERROR("xfs_alloc_read_agf",
2111 XFS_ERRLEVEL_LOW, mp, agf);
2bd0ea18
NS
2112 xfs_trans_brelse(tp, bp);
2113 return XFS_ERROR(EFSCORRUPTED);
2114 }
2115 pag = &mp->m_perag[agno];
2116 if (!pag->pagf_init) {
6e3140c7 2117 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
cdded3d8 2118 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
6e3140c7
NS
2119 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
2120 pag->pagf_longest = be32_to_cpu(agf->agf_longest);
2bd0ea18 2121 pag->pagf_levels[XFS_BTNUM_BNOi] =
6e3140c7 2122 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
2bd0ea18 2123 pag->pagf_levels[XFS_BTNUM_CNTi] =
6e3140c7 2124 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
5e656dbb 2125 spin_lock_init(&pag->pagb_lock);
044fe078
NS
2126 pag->pagb_list = kmem_zalloc(XFS_PAGB_NUM_SLOTS *
2127 sizeof(xfs_perag_busy_t), KM_SLEEP);
2bd0ea18
NS
2128 pag->pagf_init = 1;
2129 }
2130#ifdef DEBUG
2131 else if (!XFS_FORCED_SHUTDOWN(mp)) {
6e3140c7 2132 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
cdded3d8 2133 ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
6e3140c7
NS
2134 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
2135 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
2bd0ea18 2136 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
6e3140c7 2137 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
2bd0ea18 2138 ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
6e3140c7 2139 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
2bd0ea18
NS
2140 }
2141#endif
2142 XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGF, XFS_AGF_REF);
2143 *bpp = bp;
2144 return 0;
2145}
2146
2147/*
2148 * Allocate an extent (variable-size).
2149 * Depending on the allocation type, we either look in a single allocation
2150 * group or loop over the allocation groups to find the result.
2151 */
2152int /* error */
2153xfs_alloc_vextent(
dfc130f3 2154 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18 2155{
dfc130f3 2156 xfs_agblock_t agsize; /* allocation group size */
2bd0ea18
NS
2157 int error;
2158 int flags; /* XFS_ALLOC_FLAG_... locking flags */
2bd0ea18
NS
2159 xfs_extlen_t minleft;/* minimum left value, temp copy */
2160 xfs_mount_t *mp; /* mount structure pointer */
2161 xfs_agnumber_t sagno; /* starting allocation group number */
dfc130f3 2162 xfs_alloctype_t type; /* input allocation type */
34317449 2163 int bump_rotor = 0;
9baa549b 2164 int no_min = 0;
46eca962 2165 xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */
2bd0ea18
NS
2166
2167 mp = args->mp;
2168 type = args->otype = args->type;
2169 args->agbno = NULLAGBLOCK;
2170 /*
2171 * Just fix this up, for the case where the last a.g. is shorter
2172 * (or there's only one a.g.) and the caller couldn't easily figure
2173 * that out (xfs_bmap_alloc).
2174 */
2175 agsize = mp->m_sb.sb_agblocks;
2176 if (args->maxlen > agsize)
2177 args->maxlen = agsize;
2178 if (args->alignment == 0)
2179 args->alignment = 1;
2180 ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
2181 ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
2182 ASSERT(args->minlen <= args->maxlen);
2183 ASSERT(args->minlen <= agsize);
2184 ASSERT(args->mod < args->prod);
2185 if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
2186 XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
2187 args->minlen > args->maxlen || args->minlen > agsize ||
2188 args->mod >= args->prod) {
2189 args->fsbno = NULLFSBLOCK;
2190 TRACE_ALLOC("badargs", args);
2191 return 0;
2192 }
9baa549b
SL
2193 minleft = args->minleft;
2194
2bd0ea18
NS
2195 switch (type) {
2196 case XFS_ALLOCTYPE_THIS_AG:
2197 case XFS_ALLOCTYPE_NEAR_BNO:
2198 case XFS_ALLOCTYPE_THIS_BNO:
2199 /*
2200 * These three force us into a single a.g.
2201 */
2202 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
e56fcdce 2203 down_read(&mp->m_peraglock);
2bd0ea18 2204 args->pag = &mp->m_perag[args->agno];
2bd0ea18
NS
2205 args->minleft = 0;
2206 error = xfs_alloc_fix_freelist(args, 0);
2207 args->minleft = minleft;
2208 if (error) {
2209 TRACE_ALLOC("nofix", args);
2210 goto error0;
2211 }
2212 if (!args->agbp) {
e56fcdce 2213 up_read(&mp->m_peraglock);
2bd0ea18
NS
2214 TRACE_ALLOC("noagbp", args);
2215 break;
2216 }
2217 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
0e266570 2218 if ((error = xfs_alloc_ag_vextent(args)))
2bd0ea18 2219 goto error0;
e56fcdce 2220 up_read(&mp->m_peraglock);
2bd0ea18
NS
2221 break;
2222 case XFS_ALLOCTYPE_START_BNO:
2223 /*
2224 * Try near allocation first, then anywhere-in-ag after
2225 * the first a.g. fails.
2226 */
34317449
NS
2227 if ((args->userdata == XFS_ALLOC_INITIAL_USER_DATA) &&
2228 (mp->m_flags & XFS_MOUNT_32BITINODES)) {
46eca962
NS
2229 args->fsbno = XFS_AGB_TO_FSB(mp,
2230 ((mp->m_agfrotor / rotorstep) %
2231 mp->m_sb.sb_agcount), 0);
34317449
NS
2232 bump_rotor = 1;
2233 }
2bd0ea18
NS
2234 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2235 args->type = XFS_ALLOCTYPE_NEAR_BNO;
2236 /* FALLTHROUGH */
2237 case XFS_ALLOCTYPE_ANY_AG:
2238 case XFS_ALLOCTYPE_START_AG:
2239 case XFS_ALLOCTYPE_FIRST_AG:
2240 /*
2241 * Rotate through the allocation groups looking for a winner.
2242 */
2243 if (type == XFS_ALLOCTYPE_ANY_AG) {
2244 /*
2245 * Start with the last place we left off.
2246 */
46eca962
NS
2247 args->agno = sagno = (mp->m_agfrotor / rotorstep) %
2248 mp->m_sb.sb_agcount;
2bd0ea18
NS
2249 args->type = XFS_ALLOCTYPE_THIS_AG;
2250 flags = XFS_ALLOC_FLAG_TRYLOCK;
2251 } else if (type == XFS_ALLOCTYPE_FIRST_AG) {
2252 /*
2253 * Start with allocation group given by bno.
2254 */
2255 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2256 args->type = XFS_ALLOCTYPE_THIS_AG;
2257 sagno = 0;
2258 flags = 0;
2259 } else {
2260 if (type == XFS_ALLOCTYPE_START_AG)
2261 args->type = XFS_ALLOCTYPE_THIS_AG;
2262 /*
2263 * Start with the given allocation group.
2264 */
2265 args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2266 flags = XFS_ALLOC_FLAG_TRYLOCK;
2267 }
2268 /*
2269 * Loop over allocation groups twice; first time with
2270 * trylock set, second time without.
2271 */
e56fcdce 2272 down_read(&mp->m_peraglock);
2bd0ea18 2273 for (;;) {
2bd0ea18 2274 args->pag = &mp->m_perag[args->agno];
9baa549b
SL
2275 if (no_min) args->minleft = 0;
2276 error = xfs_alloc_fix_freelist(args, flags);
2277 args->minleft = minleft;
2278 if (error) {
2bd0ea18
NS
2279 TRACE_ALLOC("nofix", args);
2280 goto error0;
2281 }
2282 /*
2283 * If we get a buffer back then the allocation will fly.
2284 */
2285 if (args->agbp) {
0e266570 2286 if ((error = xfs_alloc_ag_vextent(args)))
2bd0ea18 2287 goto error0;
2bd0ea18
NS
2288 break;
2289 }
2bd0ea18
NS
2290 TRACE_ALLOC("loopfailed", args);
2291 /*
2292 * Didn't work, figure out the next iteration.
2293 */
2294 if (args->agno == sagno &&
2295 type == XFS_ALLOCTYPE_START_BNO)
2296 args->type = XFS_ALLOCTYPE_THIS_AG;
5e656dbb
BN
2297 /*
2298 * For the first allocation, we can try any AG to get
2299 * space. However, if we already have allocated a
2300 * block, we don't want to try AGs whose number is below
2301 * sagno. Otherwise, we may end up with out-of-order
2302 * locking of AGF, which might cause deadlock.
2303 */
2304 if (++(args->agno) == mp->m_sb.sb_agcount) {
2305 if (args->firstblock != NULLFSBLOCK)
2306 args->agno = sagno;
2307 else
2308 args->agno = 0;
2309 }
5000d01d 2310 /*
2bd0ea18
NS
2311 * Reached the starting a.g., must either be done
2312 * or switch to non-trylock mode.
2313 */
2314 if (args->agno == sagno) {
9baa549b 2315 if (no_min == 1) {
2bd0ea18
NS
2316 args->agbno = NULLAGBLOCK;
2317 TRACE_ALLOC("allfailed", args);
2318 break;
2319 }
9baa549b
SL
2320 if (flags == 0) {
2321 no_min = 1;
2322 } else {
2323 flags = 0;
2324 if (type == XFS_ALLOCTYPE_START_BNO) {
2325 args->agbno = XFS_FSB_TO_AGBNO(mp,
2326 args->fsbno);
2327 args->type = XFS_ALLOCTYPE_NEAR_BNO;
2328 }
2bd0ea18
NS
2329 }
2330 }
2331 }
e56fcdce 2332 up_read(&mp->m_peraglock);
46eca962
NS
2333 if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
2334 if (args->agno == sagno)
2335 mp->m_agfrotor = (mp->m_agfrotor + 1) %
2336 (mp->m_sb.sb_agcount * rotorstep);
2337 else
2338 mp->m_agfrotor = (args->agno * rotorstep + 1) %
2339 (mp->m_sb.sb_agcount * rotorstep);
2340 }
2bd0ea18
NS
2341 break;
2342 default:
2343 ASSERT(0);
2344 /* NOTREACHED */
2345 }
2346 if (args->agbno == NULLAGBLOCK)
2347 args->fsbno = NULLFSBLOCK;
2348 else {
2349 args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
2350#ifdef DEBUG
2351 ASSERT(args->len >= args->minlen);
2352 ASSERT(args->len <= args->maxlen);
2353 ASSERT(args->agbno % args->alignment == 0);
2354 XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
2355 args->len);
2356#endif
2357 }
2358 return 0;
2359error0:
e56fcdce 2360 up_read(&mp->m_peraglock);
2bd0ea18
NS
2361 return error;
2362}
2363
2364/*
2365 * Free an extent.
2366 * Just break up the extent address and hand off to xfs_free_ag_extent
2367 * after fixing up the freelist.
2368 */
2369int /* error */
2370xfs_free_extent(
2371 xfs_trans_t *tp, /* transaction pointer */
2372 xfs_fsblock_t bno, /* starting block number of extent */
2373 xfs_extlen_t len) /* length of extent */
2374{
5e656dbb 2375 xfs_alloc_arg_t args;
2bd0ea18
NS
2376 int error;
2377
2378 ASSERT(len != 0);
5e656dbb 2379 memset(&args, 0, sizeof(xfs_alloc_arg_t));
2bd0ea18
NS
2380 args.tp = tp;
2381 args.mp = tp->t_mountp;
2382 args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
2383 ASSERT(args.agno < args.mp->m_sb.sb_agcount);
2384 args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
e56fcdce 2385 down_read(&args.mp->m_peraglock);
2bd0ea18 2386 args.pag = &args.mp->m_perag[args.agno];
5e656dbb 2387 if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING)))
2bd0ea18
NS
2388 goto error0;
2389#ifdef DEBUG
2390 ASSERT(args.agbp != NULL);
5e656dbb
BN
2391 ASSERT((args.agbno + len) <=
2392 be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length));
2bd0ea18 2393#endif
5e656dbb 2394 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
2bd0ea18 2395error0:
e56fcdce 2396 up_read(&args.mp->m_peraglock);
2bd0ea18
NS
2397 return error;
2398}