]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - libxfs/xfs_alloc.c
xfs: support min/max agbno args in block allocator
[thirdparty/xfsprogs-dev.git] / libxfs / xfs_alloc.c
1 /*
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18 #include "libxfs_priv.h"
19 #include "xfs_fs.h"
20 #include "xfs_format.h"
21 #include "xfs_log_format.h"
22 #include "xfs_shared.h"
23 #include "xfs_trans_resv.h"
24 #include "xfs_bit.h"
25 #include "xfs_sb.h"
26 #include "xfs_mount.h"
27 #include "xfs_inode.h"
28 #include "xfs_btree.h"
29 #include "xfs_alloc_btree.h"
30 #include "xfs_alloc.h"
31 #include "xfs_cksum.h"
32 #include "xfs_trace.h"
33 #include "xfs_trans.h"
34
35 struct workqueue_struct *xfs_alloc_wq;
36
37 #define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
38
39 #define XFSA_FIXUP_BNO_OK 1
40 #define XFSA_FIXUP_CNT_OK 2
41
42 STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
43 STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
44 STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
45 STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
46 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
47
48 /*
49 * Lookup the record equal to [bno, len] in the btree given by cur.
50 */
51 STATIC int /* error */
52 xfs_alloc_lookup_eq(
53 struct xfs_btree_cur *cur, /* btree cursor */
54 xfs_agblock_t bno, /* starting block of extent */
55 xfs_extlen_t len, /* length of extent */
56 int *stat) /* success/failure */
57 {
58 cur->bc_rec.a.ar_startblock = bno;
59 cur->bc_rec.a.ar_blockcount = len;
60 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
61 }
62
63 /*
64 * Lookup the first record greater than or equal to [bno, len]
65 * in the btree given by cur.
66 */
67 int /* error */
68 xfs_alloc_lookup_ge(
69 struct xfs_btree_cur *cur, /* btree cursor */
70 xfs_agblock_t bno, /* starting block of extent */
71 xfs_extlen_t len, /* length of extent */
72 int *stat) /* success/failure */
73 {
74 cur->bc_rec.a.ar_startblock = bno;
75 cur->bc_rec.a.ar_blockcount = len;
76 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
77 }
78
79 /*
80 * Lookup the first record less than or equal to [bno, len]
81 * in the btree given by cur.
82 */
83 int /* error */
84 xfs_alloc_lookup_le(
85 struct xfs_btree_cur *cur, /* btree cursor */
86 xfs_agblock_t bno, /* starting block of extent */
87 xfs_extlen_t len, /* length of extent */
88 int *stat) /* success/failure */
89 {
90 cur->bc_rec.a.ar_startblock = bno;
91 cur->bc_rec.a.ar_blockcount = len;
92 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
93 }
94
95 /*
96 * Update the record referred to by cur to the value given
97 * by [bno, len].
98 * This either works (return 0) or gets an EFSCORRUPTED error.
99 */
100 STATIC int /* error */
101 xfs_alloc_update(
102 struct xfs_btree_cur *cur, /* btree cursor */
103 xfs_agblock_t bno, /* starting block of extent */
104 xfs_extlen_t len) /* length of extent */
105 {
106 union xfs_btree_rec rec;
107
108 rec.alloc.ar_startblock = cpu_to_be32(bno);
109 rec.alloc.ar_blockcount = cpu_to_be32(len);
110 return xfs_btree_update(cur, &rec);
111 }
112
113 /*
114 * Get the data from the pointed-to record.
115 */
116 int /* error */
117 xfs_alloc_get_rec(
118 struct xfs_btree_cur *cur, /* btree cursor */
119 xfs_agblock_t *bno, /* output: starting block of extent */
120 xfs_extlen_t *len, /* output: length of extent */
121 int *stat) /* output: success/failure */
122 {
123 union xfs_btree_rec *rec;
124 int error;
125
126 error = xfs_btree_get_rec(cur, &rec, stat);
127 if (!error && *stat == 1) {
128 *bno = be32_to_cpu(rec->alloc.ar_startblock);
129 *len = be32_to_cpu(rec->alloc.ar_blockcount);
130 }
131 return error;
132 }
133
134 /*
135 * Compute aligned version of the found extent.
136 * Takes alignment and min length into account.
137 */
138 STATIC void
139 xfs_alloc_compute_aligned(
140 xfs_alloc_arg_t *args, /* allocation argument structure */
141 xfs_agblock_t foundbno, /* starting block in found extent */
142 xfs_extlen_t foundlen, /* length in found extent */
143 xfs_agblock_t *resbno, /* result block number */
144 xfs_extlen_t *reslen) /* result length */
145 {
146 xfs_agblock_t bno;
147 xfs_extlen_t len;
148 xfs_extlen_t diff;
149
150 /* Trim busy sections out of found extent */
151 xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len);
152
153 /*
154 * If we have a largish extent that happens to start before min_agbno,
155 * see if we can shift it into range...
156 */
157 if (bno < args->min_agbno && bno + len > args->min_agbno) {
158 diff = args->min_agbno - bno;
159 if (len > diff) {
160 bno += diff;
161 len -= diff;
162 }
163 }
164
165 if (args->alignment > 1 && len >= args->minlen) {
166 xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
167
168 diff = aligned_bno - bno;
169
170 *resbno = aligned_bno;
171 *reslen = diff >= len ? 0 : len - diff;
172 } else {
173 *resbno = bno;
174 *reslen = len;
175 }
176 }
177
178 /*
179 * Compute best start block and diff for "near" allocations.
180 * freelen >= wantlen already checked by caller.
181 */
182 STATIC xfs_extlen_t /* difference value (absolute) */
183 xfs_alloc_compute_diff(
184 xfs_agblock_t wantbno, /* target starting block */
185 xfs_extlen_t wantlen, /* target length */
186 xfs_extlen_t alignment, /* target alignment */
187 char userdata, /* are we allocating data? */
188 xfs_agblock_t freebno, /* freespace's starting block */
189 xfs_extlen_t freelen, /* freespace's length */
190 xfs_agblock_t *newbnop) /* result: best start block from free */
191 {
192 xfs_agblock_t freeend; /* end of freespace extent */
193 xfs_agblock_t newbno1; /* return block number */
194 xfs_agblock_t newbno2; /* other new block number */
195 xfs_extlen_t newlen1=0; /* length with newbno1 */
196 xfs_extlen_t newlen2=0; /* length with newbno2 */
197 xfs_agblock_t wantend; /* end of target extent */
198
199 ASSERT(freelen >= wantlen);
200 freeend = freebno + freelen;
201 wantend = wantbno + wantlen;
202 /*
203 * We want to allocate from the start of a free extent if it is past
204 * the desired block or if we are allocating user data and the free
205 * extent is before desired block. The second case is there to allow
206 * for contiguous allocation from the remaining free space if the file
207 * grows in the short term.
208 */
209 if (freebno >= wantbno || (userdata && freeend < wantend)) {
210 if ((newbno1 = roundup(freebno, alignment)) >= freeend)
211 newbno1 = NULLAGBLOCK;
212 } else if (freeend >= wantend && alignment > 1) {
213 newbno1 = roundup(wantbno, alignment);
214 newbno2 = newbno1 - alignment;
215 if (newbno1 >= freeend)
216 newbno1 = NULLAGBLOCK;
217 else
218 newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
219 if (newbno2 < freebno)
220 newbno2 = NULLAGBLOCK;
221 else
222 newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
223 if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
224 if (newlen1 < newlen2 ||
225 (newlen1 == newlen2 &&
226 XFS_ABSDIFF(newbno1, wantbno) >
227 XFS_ABSDIFF(newbno2, wantbno)))
228 newbno1 = newbno2;
229 } else if (newbno2 != NULLAGBLOCK)
230 newbno1 = newbno2;
231 } else if (freeend >= wantend) {
232 newbno1 = wantbno;
233 } else if (alignment > 1) {
234 newbno1 = roundup(freeend - wantlen, alignment);
235 if (newbno1 > freeend - wantlen &&
236 newbno1 - alignment >= freebno)
237 newbno1 -= alignment;
238 else if (newbno1 >= freeend)
239 newbno1 = NULLAGBLOCK;
240 } else
241 newbno1 = freeend - wantlen;
242 *newbnop = newbno1;
243 return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
244 }
245
246 /*
247 * Fix up the length, based on mod and prod.
248 * len should be k * prod + mod for some k.
249 * If len is too small it is returned unchanged.
250 * If len hits maxlen it is left alone.
251 */
252 STATIC void
253 xfs_alloc_fix_len(
254 xfs_alloc_arg_t *args) /* allocation argument structure */
255 {
256 xfs_extlen_t k;
257 xfs_extlen_t rlen;
258
259 ASSERT(args->mod < args->prod);
260 rlen = args->len;
261 ASSERT(rlen >= args->minlen);
262 ASSERT(rlen <= args->maxlen);
263 if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
264 (args->mod == 0 && rlen < args->prod))
265 return;
266 k = rlen % args->prod;
267 if (k == args->mod)
268 return;
269 if (k > args->mod)
270 rlen = rlen - (k - args->mod);
271 else
272 rlen = rlen - args->prod + (args->mod - k);
273 /* casts to (int) catch length underflows */
274 if ((int)rlen < (int)args->minlen)
275 return;
276 ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
277 ASSERT(rlen % args->prod == args->mod);
278 args->len = rlen;
279 }
280
281 /*
282 * Fix up length if there is too little space left in the a.g.
283 * Return 1 if ok, 0 if too little, should give up.
284 */
285 STATIC int
286 xfs_alloc_fix_minleft(
287 xfs_alloc_arg_t *args) /* allocation argument structure */
288 {
289 xfs_agf_t *agf; /* a.g. freelist header */
290 int diff; /* free space difference */
291
292 if (args->minleft == 0)
293 return 1;
294 agf = XFS_BUF_TO_AGF(args->agbp);
295 diff = be32_to_cpu(agf->agf_freeblks)
296 - args->len - args->minleft;
297 if (diff >= 0)
298 return 1;
299 args->len += diff; /* shrink the allocated space */
300 /* casts to (int) catch length underflows */
301 if ((int)args->len >= (int)args->minlen)
302 return 1;
303 args->agbno = NULLAGBLOCK;
304 return 0;
305 }
306
307 /*
308 * Update the two btrees, logically removing from freespace the extent
309 * starting at rbno, rlen blocks. The extent is contained within the
310 * actual (current) free extent fbno for flen blocks.
311 * Flags are passed in indicating whether the cursors are set to the
312 * relevant records.
313 */
314 STATIC int /* error code */
315 xfs_alloc_fixup_trees(
316 xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */
317 xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */
318 xfs_agblock_t fbno, /* starting block of free extent */
319 xfs_extlen_t flen, /* length of free extent */
320 xfs_agblock_t rbno, /* starting block of returned extent */
321 xfs_extlen_t rlen, /* length of returned extent */
322 int flags) /* flags, XFSA_FIXUP_... */
323 {
324 int error; /* error code */
325 int i; /* operation results */
326 xfs_agblock_t nfbno1; /* first new free startblock */
327 xfs_agblock_t nfbno2; /* second new free startblock */
328 xfs_extlen_t nflen1=0; /* first new free length */
329 xfs_extlen_t nflen2=0; /* second new free length */
330 struct xfs_mount *mp;
331
332 mp = cnt_cur->bc_mp;
333
334 /*
335 * Look up the record in the by-size tree if necessary.
336 */
337 if (flags & XFSA_FIXUP_CNT_OK) {
338 #ifdef DEBUG
339 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
340 return error;
341 XFS_WANT_CORRUPTED_RETURN(mp,
342 i == 1 && nfbno1 == fbno && nflen1 == flen);
343 #endif
344 } else {
345 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
346 return error;
347 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
348 }
349 /*
350 * Look up the record in the by-block tree if necessary.
351 */
352 if (flags & XFSA_FIXUP_BNO_OK) {
353 #ifdef DEBUG
354 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
355 return error;
356 XFS_WANT_CORRUPTED_RETURN(mp,
357 i == 1 && nfbno1 == fbno && nflen1 == flen);
358 #endif
359 } else {
360 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
361 return error;
362 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
363 }
364
365 #ifdef DEBUG
366 if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
367 struct xfs_btree_block *bnoblock;
368 struct xfs_btree_block *cntblock;
369
370 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
371 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
372
373 XFS_WANT_CORRUPTED_RETURN(mp,
374 bnoblock->bb_numrecs == cntblock->bb_numrecs);
375 }
376 #endif
377
378 /*
379 * Deal with all four cases: the allocated record is contained
380 * within the freespace record, so we can have new freespace
381 * at either (or both) end, or no freespace remaining.
382 */
383 if (rbno == fbno && rlen == flen)
384 nfbno1 = nfbno2 = NULLAGBLOCK;
385 else if (rbno == fbno) {
386 nfbno1 = rbno + rlen;
387 nflen1 = flen - rlen;
388 nfbno2 = NULLAGBLOCK;
389 } else if (rbno + rlen == fbno + flen) {
390 nfbno1 = fbno;
391 nflen1 = flen - rlen;
392 nfbno2 = NULLAGBLOCK;
393 } else {
394 nfbno1 = fbno;
395 nflen1 = rbno - fbno;
396 nfbno2 = rbno + rlen;
397 nflen2 = (fbno + flen) - nfbno2;
398 }
399 /*
400 * Delete the entry from the by-size btree.
401 */
402 if ((error = xfs_btree_delete(cnt_cur, &i)))
403 return error;
404 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
405 /*
406 * Add new by-size btree entry(s).
407 */
408 if (nfbno1 != NULLAGBLOCK) {
409 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
410 return error;
411 XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
412 if ((error = xfs_btree_insert(cnt_cur, &i)))
413 return error;
414 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
415 }
416 if (nfbno2 != NULLAGBLOCK) {
417 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
418 return error;
419 XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
420 if ((error = xfs_btree_insert(cnt_cur, &i)))
421 return error;
422 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
423 }
424 /*
425 * Fix up the by-block btree entry(s).
426 */
427 if (nfbno1 == NULLAGBLOCK) {
428 /*
429 * No remaining freespace, just delete the by-block tree entry.
430 */
431 if ((error = xfs_btree_delete(bno_cur, &i)))
432 return error;
433 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
434 } else {
435 /*
436 * Update the by-block entry to start later|be shorter.
437 */
438 if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
439 return error;
440 }
441 if (nfbno2 != NULLAGBLOCK) {
442 /*
443 * 2 resulting free entries, need to add one.
444 */
445 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
446 return error;
447 XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
448 if ((error = xfs_btree_insert(bno_cur, &i)))
449 return error;
450 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
451 }
452 return 0;
453 }
454
455 static bool
456 xfs_agfl_verify(
457 struct xfs_buf *bp)
458 {
459 struct xfs_mount *mp = bp->b_target->bt_mount;
460 struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
461 int i;
462
463 if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_uuid))
464 return false;
465 if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC)
466 return false;
467 /*
468 * during growfs operations, the perag is not fully initialised,
469 * so we can't use it for any useful checking. growfs ensures we can't
470 * use it by using uncached buffers that don't have the perag attached
471 * so we can detect and avoid this problem.
472 */
473 if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
474 return false;
475
476 for (i = 0; i < XFS_AGFL_SIZE(mp); i++) {
477 if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
478 be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
479 return false;
480 }
481 return true;
482 }
483
484 static void
485 xfs_agfl_read_verify(
486 struct xfs_buf *bp)
487 {
488 struct xfs_mount *mp = bp->b_target->bt_mount;
489
490 /*
491 * There is no verification of non-crc AGFLs because mkfs does not
492 * initialise the AGFL to zero or NULL. Hence the only valid part of the
493 * AGFL is what the AGF says is active. We can't get to the AGF, so we
494 * can't verify just those entries are valid.
495 */
496 if (!xfs_sb_version_hascrc(&mp->m_sb))
497 return;
498
499 if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
500 xfs_buf_ioerror(bp, -EFSBADCRC);
501 else if (!xfs_agfl_verify(bp))
502 xfs_buf_ioerror(bp, -EFSCORRUPTED);
503
504 if (bp->b_error)
505 xfs_verifier_error(bp);
506 }
507
508 static void
509 xfs_agfl_write_verify(
510 struct xfs_buf *bp)
511 {
512 struct xfs_mount *mp = bp->b_target->bt_mount;
513 struct xfs_buf_log_item *bip = bp->b_fspriv;
514
515 /* no verification of non-crc AGFLs */
516 if (!xfs_sb_version_hascrc(&mp->m_sb))
517 return;
518
519 if (!xfs_agfl_verify(bp)) {
520 xfs_buf_ioerror(bp, -EFSCORRUPTED);
521 xfs_verifier_error(bp);
522 return;
523 }
524
525 if (bip)
526 XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
527
528 xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
529 }
530
531 const struct xfs_buf_ops xfs_agfl_buf_ops = {
532 .verify_read = xfs_agfl_read_verify,
533 .verify_write = xfs_agfl_write_verify,
534 };
535
536 /*
537 * Read in the allocation group free block array.
538 */
539 STATIC int /* error */
540 xfs_alloc_read_agfl(
541 xfs_mount_t *mp, /* mount point structure */
542 xfs_trans_t *tp, /* transaction pointer */
543 xfs_agnumber_t agno, /* allocation group number */
544 xfs_buf_t **bpp) /* buffer for the ag free block array */
545 {
546 xfs_buf_t *bp; /* return value */
547 int error;
548
549 ASSERT(agno != NULLAGNUMBER);
550 error = xfs_trans_read_buf(
551 mp, tp, mp->m_ddev_targp,
552 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
553 XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
554 if (error)
555 return error;
556 xfs_buf_set_ref(bp, XFS_AGFL_REF);
557 *bpp = bp;
558 return 0;
559 }
560
561 STATIC int
562 xfs_alloc_update_counters(
563 struct xfs_trans *tp,
564 struct xfs_perag *pag,
565 struct xfs_buf *agbp,
566 long len)
567 {
568 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
569
570 pag->pagf_freeblks += len;
571 be32_add_cpu(&agf->agf_freeblks, len);
572
573 xfs_trans_agblocks_delta(tp, len);
574 if (unlikely(be32_to_cpu(agf->agf_freeblks) >
575 be32_to_cpu(agf->agf_length)))
576 return -EFSCORRUPTED;
577
578 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
579 return 0;
580 }
581
582 /*
583 * Allocation group level functions.
584 */
585
586 /*
587 * Allocate a variable extent in the allocation group agno.
588 * Type and bno are used to determine where in the allocation group the
589 * extent will start.
590 * Extent's length (returned in *len) will be between minlen and maxlen,
591 * and of the form k * prod + mod unless there's nothing that large.
592 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
593 */
594 STATIC int /* error */
595 xfs_alloc_ag_vextent(
596 xfs_alloc_arg_t *args) /* argument structure for allocation */
597 {
598 int error=0;
599
600 ASSERT(args->minlen > 0);
601 ASSERT(args->maxlen > 0);
602 ASSERT(args->minlen <= args->maxlen);
603 ASSERT(args->mod < args->prod);
604 ASSERT(args->alignment > 0);
605 /*
606 * Branch to correct routine based on the type.
607 */
608 args->wasfromfl = 0;
609 switch (args->type) {
610 case XFS_ALLOCTYPE_THIS_AG:
611 error = xfs_alloc_ag_vextent_size(args);
612 break;
613 case XFS_ALLOCTYPE_NEAR_BNO:
614 error = xfs_alloc_ag_vextent_near(args);
615 break;
616 case XFS_ALLOCTYPE_THIS_BNO:
617 error = xfs_alloc_ag_vextent_exact(args);
618 break;
619 default:
620 ASSERT(0);
621 /* NOTREACHED */
622 }
623
624 if (error || args->agbno == NULLAGBLOCK)
625 return error;
626
627 ASSERT(args->len >= args->minlen);
628 ASSERT(args->len <= args->maxlen);
629 ASSERT(!args->wasfromfl || !args->isfl);
630 ASSERT(args->agbno % args->alignment == 0);
631
632 if (!args->wasfromfl) {
633 error = xfs_alloc_update_counters(args->tp, args->pag,
634 args->agbp,
635 -((long)(args->len)));
636 if (error)
637 return error;
638
639 ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
640 args->agbno, args->len));
641 }
642
643 if (!args->isfl) {
644 xfs_trans_mod_sb(args->tp, args->wasdel ?
645 XFS_TRANS_SB_RES_FDBLOCKS :
646 XFS_TRANS_SB_FDBLOCKS,
647 -((long)(args->len)));
648 }
649
650 XFS_STATS_INC(xs_allocx);
651 XFS_STATS_ADD(xs_allocb, args->len);
652 return error;
653 }
654
655 /*
656 * Allocate a variable extent at exactly agno/bno.
657 * Extent's length (returned in *len) will be between minlen and maxlen,
658 * and of the form k * prod + mod unless there's nothing that large.
659 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
660 */
661 STATIC int /* error */
662 xfs_alloc_ag_vextent_exact(
663 xfs_alloc_arg_t *args) /* allocation argument structure */
664 {
665 xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
666 xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
667 int error;
668 xfs_agblock_t fbno; /* start block of found extent */
669 xfs_extlen_t flen; /* length of found extent */
670 xfs_agblock_t tbno; /* start block of trimmed extent */
671 xfs_extlen_t tlen; /* length of trimmed extent */
672 xfs_agblock_t tend; /* end block of trimmed extent */
673 int i; /* success/failure of operation */
674
675 ASSERT(args->alignment == 1);
676
677 /*
678 * Allocate/initialize a cursor for the by-number freespace btree.
679 */
680 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
681 args->agno, XFS_BTNUM_BNO);
682
683 /*
684 * Lookup bno and minlen in the btree (minlen is irrelevant, really).
685 * Look for the closest free block <= bno, it must contain bno
686 * if any free block does.
687 */
688 error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
689 if (error)
690 goto error0;
691 if (!i)
692 goto not_found;
693
694 /*
695 * Grab the freespace record.
696 */
697 error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
698 if (error)
699 goto error0;
700 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
701 ASSERT(fbno <= args->agbno);
702
703 /*
704 * Check for overlapping busy extents.
705 */
706 xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen);
707
708 /*
709 * Give up if the start of the extent is busy, or the freespace isn't
710 * long enough for the minimum request.
711 */
712 if (tbno > args->agbno)
713 goto not_found;
714 if (tlen < args->minlen)
715 goto not_found;
716 tend = tbno + tlen;
717 if (tend < args->agbno + args->minlen)
718 goto not_found;
719
720 /*
721 * End of extent will be smaller of the freespace end and the
722 * maximal requested end.
723 *
724 * Fix the length according to mod and prod if given.
725 */
726 args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
727 - args->agbno;
728 xfs_alloc_fix_len(args);
729 if (!xfs_alloc_fix_minleft(args))
730 goto not_found;
731
732 ASSERT(args->agbno + args->len <= tend);
733
734 /*
735 * We are allocating agbno for args->len
736 * Allocate/initialize a cursor for the by-size btree.
737 */
738 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
739 args->agno, XFS_BTNUM_CNT);
740 ASSERT(args->agbno + args->len <=
741 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
742 error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
743 args->len, XFSA_FIXUP_BNO_OK);
744 if (error) {
745 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
746 goto error0;
747 }
748
749 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
750 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
751
752 args->wasfromfl = 0;
753 trace_xfs_alloc_exact_done(args);
754 return 0;
755
756 not_found:
757 /* Didn't find it, return null. */
758 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
759 args->agbno = NULLAGBLOCK;
760 trace_xfs_alloc_exact_notfound(args);
761 return 0;
762
763 error0:
764 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
765 trace_xfs_alloc_exact_error(args);
766 return error;
767 }
768
769 /*
770 * Search the btree in a given direction via the search cursor and compare
771 * the records found against the good extent we've already found.
772 */
773 STATIC int
774 xfs_alloc_find_best_extent(
775 struct xfs_alloc_arg *args, /* allocation argument structure */
776 struct xfs_btree_cur **gcur, /* good cursor */
777 struct xfs_btree_cur **scur, /* searching cursor */
778 xfs_agblock_t gdiff, /* difference for search comparison */
779 xfs_agblock_t *sbno, /* extent found by search */
780 xfs_extlen_t *slen, /* extent length */
781 xfs_agblock_t *sbnoa, /* aligned extent found by search */
782 xfs_extlen_t *slena, /* aligned extent length */
783 int dir) /* 0 = search right, 1 = search left */
784 {
785 xfs_agblock_t new;
786 xfs_agblock_t sdiff;
787 int error;
788 int i;
789
790 /* The good extent is perfect, no need to search. */
791 if (!gdiff)
792 goto out_use_good;
793
794 /*
795 * Look until we find a better one, run out of space or run off the end.
796 */
797 do {
798 error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
799 if (error)
800 goto error0;
801 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
802 xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena);
803
804 /*
805 * The good extent is closer than this one.
806 */
807 if (!dir) {
808 if (*sbnoa > args->max_agbno)
809 goto out_use_good;
810 if (*sbnoa >= args->agbno + gdiff)
811 goto out_use_good;
812 } else {
813 if (*sbnoa < args->min_agbno)
814 goto out_use_good;
815 if (*sbnoa <= args->agbno - gdiff)
816 goto out_use_good;
817 }
818
819 /*
820 * Same distance, compare length and pick the best.
821 */
822 if (*slena >= args->minlen) {
823 args->len = XFS_EXTLEN_MIN(*slena, args->maxlen);
824 xfs_alloc_fix_len(args);
825
826 sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
827 args->alignment,
828 args->userdata, *sbnoa,
829 *slena, &new);
830
831 /*
832 * Choose closer size and invalidate other cursor.
833 */
834 if (sdiff < gdiff)
835 goto out_use_search;
836 goto out_use_good;
837 }
838
839 if (!dir)
840 error = xfs_btree_increment(*scur, 0, &i);
841 else
842 error = xfs_btree_decrement(*scur, 0, &i);
843 if (error)
844 goto error0;
845 } while (i);
846
847 out_use_good:
848 xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR);
849 *scur = NULL;
850 return 0;
851
852 out_use_search:
853 xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR);
854 *gcur = NULL;
855 return 0;
856
857 error0:
858 /* caller invalidates cursors */
859 return error;
860 }
861
862 /*
863 * Allocate a variable extent near bno in the allocation group agno.
864 * Extent's length (returned in len) will be between minlen and maxlen,
865 * and of the form k * prod + mod unless there's nothing that large.
866 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
867 */
868 STATIC int /* error */
869 xfs_alloc_ag_vextent_near(
870 xfs_alloc_arg_t *args) /* allocation argument structure */
871 {
872 xfs_btree_cur_t *bno_cur_gt; /* cursor for bno btree, right side */
873 xfs_btree_cur_t *bno_cur_lt; /* cursor for bno btree, left side */
874 xfs_btree_cur_t *cnt_cur; /* cursor for count btree */
875 xfs_agblock_t gtbno; /* start bno of right side entry */
876 xfs_agblock_t gtbnoa; /* aligned ... */
877 xfs_extlen_t gtdiff; /* difference to right side entry */
878 xfs_extlen_t gtlen; /* length of right side entry */
879 xfs_extlen_t gtlena; /* aligned ... */
880 xfs_agblock_t gtnew; /* useful start bno of right side */
881 int error; /* error code */
882 int i; /* result code, temporary */
883 int j; /* result code, temporary */
884 xfs_agblock_t ltbno; /* start bno of left side entry */
885 xfs_agblock_t ltbnoa; /* aligned ... */
886 xfs_extlen_t ltdiff; /* difference to left side entry */
887 xfs_extlen_t ltlen; /* length of left side entry */
888 xfs_extlen_t ltlena; /* aligned ... */
889 xfs_agblock_t ltnew; /* useful start bno of left side */
890 xfs_extlen_t rlen; /* length of returned extent */
891 int forced = 0;
892 #ifdef DEBUG
893 /*
894 * Randomly don't execute the first algorithm.
895 */
896 int dofirst; /* set to do first algorithm */
897
898 dofirst = prandom_u32() & 1;
899 #endif
900
901 /* handle unitialized agbno range so caller doesn't have to */
902 if (!args->min_agbno && !args->max_agbno)
903 args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
904 ASSERT(args->min_agbno <= args->max_agbno);
905
906 /* clamp agbno to the range if it's outside */
907 if (args->agbno < args->min_agbno)
908 args->agbno = args->min_agbno;
909 if (args->agbno > args->max_agbno)
910 args->agbno = args->max_agbno;
911
912 restart:
913 bno_cur_lt = NULL;
914 bno_cur_gt = NULL;
915 ltlen = 0;
916 gtlena = 0;
917 ltlena = 0;
918
919 /*
920 * Get a cursor for the by-size btree.
921 */
922 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
923 args->agno, XFS_BTNUM_CNT);
924
925 /*
926 * See if there are any free extents as big as maxlen.
927 */
928 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i)))
929 goto error0;
930 /*
931 * If none, then pick up the last entry in the tree unless the
932 * tree is empty.
933 */
934 if (!i) {
935 if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &ltbno,
936 &ltlen, &i)))
937 goto error0;
938 if (i == 0 || ltlen == 0) {
939 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
940 trace_xfs_alloc_near_noentry(args);
941 return 0;
942 }
943 ASSERT(i == 1);
944 }
945 args->wasfromfl = 0;
946
947 /*
948 * First algorithm.
949 * If the requested extent is large wrt the freespaces available
950 * in this a.g., then the cursor will be pointing to a btree entry
951 * near the right edge of the tree. If it's in the last btree leaf
952 * block, then we just examine all the entries in that block
953 * that are big enough, and pick the best one.
954 * This is written as a while loop so we can break out of it,
955 * but we never loop back to the top.
956 */
957 while (xfs_btree_islastblock(cnt_cur, 0)) {
958 xfs_extlen_t bdiff;
959 int besti=0;
960 xfs_extlen_t blen=0;
961 xfs_agblock_t bnew=0;
962
963 #ifdef DEBUG
964 if (dofirst)
965 break;
966 #endif
967 /*
968 * Start from the entry that lookup found, sequence through
969 * all larger free blocks. If we're actually pointing at a
970 * record smaller than maxlen, go to the start of this block,
971 * and skip all those smaller than minlen.
972 */
973 if (ltlen || args->alignment > 1) {
974 cnt_cur->bc_ptrs[0] = 1;
975 do {
976 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno,
977 &ltlen, &i)))
978 goto error0;
979 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
980 if (ltlen >= args->minlen)
981 break;
982 if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
983 goto error0;
984 } while (i);
985 ASSERT(ltlen >= args->minlen);
986 if (!i)
987 break;
988 }
989 i = cnt_cur->bc_ptrs[0];
990 for (j = 1, blen = 0, bdiff = 0;
991 !error && j && (blen < args->maxlen || bdiff > 0);
992 error = xfs_btree_increment(cnt_cur, 0, &j)) {
993 /*
994 * For each entry, decide if it's better than
995 * the previous best entry.
996 */
997 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
998 goto error0;
999 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1000 xfs_alloc_compute_aligned(args, ltbno, ltlen,
1001 &ltbnoa, &ltlena);
1002 if (ltlena < args->minlen)
1003 continue;
1004 if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno)
1005 continue;
1006 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1007 xfs_alloc_fix_len(args);
1008 ASSERT(args->len >= args->minlen);
1009 if (args->len < blen)
1010 continue;
1011 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1012 args->alignment, args->userdata, ltbnoa,
1013 ltlena, &ltnew);
1014 if (ltnew != NULLAGBLOCK &&
1015 (args->len > blen || ltdiff < bdiff)) {
1016 bdiff = ltdiff;
1017 bnew = ltnew;
1018 blen = args->len;
1019 besti = cnt_cur->bc_ptrs[0];
1020 }
1021 }
1022 /*
1023 * It didn't work. We COULD be in a case where
1024 * there's a good record somewhere, so try again.
1025 */
1026 if (blen == 0)
1027 break;
1028 /*
1029 * Point at the best entry, and retrieve it again.
1030 */
1031 cnt_cur->bc_ptrs[0] = besti;
1032 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
1033 goto error0;
1034 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1035 ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1036 args->len = blen;
1037 if (!xfs_alloc_fix_minleft(args)) {
1038 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1039 trace_xfs_alloc_near_nominleft(args);
1040 return 0;
1041 }
1042 blen = args->len;
1043 /*
1044 * We are allocating starting at bnew for blen blocks.
1045 */
1046 args->agbno = bnew;
1047 ASSERT(bnew >= ltbno);
1048 ASSERT(bnew + blen <= ltbno + ltlen);
1049 /*
1050 * Set up a cursor for the by-bno tree.
1051 */
1052 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp,
1053 args->agbp, args->agno, XFS_BTNUM_BNO);
1054 /*
1055 * Fix up the btree entries.
1056 */
1057 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno,
1058 ltlen, bnew, blen, XFSA_FIXUP_CNT_OK)))
1059 goto error0;
1060 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1061 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1062
1063 trace_xfs_alloc_near_first(args);
1064 return 0;
1065 }
1066 /*
1067 * Second algorithm.
1068 * Search in the by-bno tree to the left and to the right
1069 * simultaneously, until in each case we find a space big enough,
1070 * or run into the edge of the tree. When we run into the edge,
1071 * we deallocate that cursor.
1072 * If both searches succeed, we compare the two spaces and pick
1073 * the better one.
1074 * With alignment, it's possible for both to fail; the upper
1075 * level algorithm that picks allocation groups for allocations
1076 * is not supposed to do this.
1077 */
1078 /*
1079 * Allocate and initialize the cursor for the leftward search.
1080 */
1081 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1082 args->agno, XFS_BTNUM_BNO);
1083 /*
1084 * Lookup <= bno to find the leftward search's starting point.
1085 */
1086 if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i)))
1087 goto error0;
1088 if (!i) {
1089 /*
1090 * Didn't find anything; use this cursor for the rightward
1091 * search.
1092 */
1093 bno_cur_gt = bno_cur_lt;
1094 bno_cur_lt = NULL;
1095 }
1096 /*
1097 * Found something. Duplicate the cursor for the rightward search.
1098 */
1099 else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt)))
1100 goto error0;
1101 /*
1102 * Increment the cursor, so we will point at the entry just right
1103 * of the leftward entry if any, or to the leftmost entry.
1104 */
1105 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1106 goto error0;
1107 if (!i) {
1108 /*
1109 * It failed, there are no rightward entries.
1110 */
1111 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR);
1112 bno_cur_gt = NULL;
1113 }
1114 /*
1115 * Loop going left with the leftward cursor, right with the
1116 * rightward cursor, until either both directions give up or
1117 * we find an entry at least as big as minlen.
1118 */
1119 do {
1120 if (bno_cur_lt) {
1121 if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
1122 goto error0;
1123 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1124 xfs_alloc_compute_aligned(args, ltbno, ltlen,
1125 &ltbnoa, &ltlena);
1126 if (ltlena >= args->minlen && ltbnoa >= args->min_agbno)
1127 break;
1128 if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
1129 goto error0;
1130 if (!i || ltbnoa < args->min_agbno) {
1131 xfs_btree_del_cursor(bno_cur_lt,
1132 XFS_BTREE_NOERROR);
1133 bno_cur_lt = NULL;
1134 }
1135 }
1136 if (bno_cur_gt) {
1137 if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
1138 goto error0;
1139 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1140 xfs_alloc_compute_aligned(args, gtbno, gtlen,
1141 &gtbnoa, &gtlena);
1142 if (gtlena >= args->minlen && gtbnoa <= args->max_agbno)
1143 break;
1144 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1145 goto error0;
1146 if (!i || gtbnoa > args->max_agbno) {
1147 xfs_btree_del_cursor(bno_cur_gt,
1148 XFS_BTREE_NOERROR);
1149 bno_cur_gt = NULL;
1150 }
1151 }
1152 } while (bno_cur_lt || bno_cur_gt);
1153
1154 /*
1155 * Got both cursors still active, need to find better entry.
1156 */
1157 if (bno_cur_lt && bno_cur_gt) {
1158 if (ltlena >= args->minlen) {
1159 /*
1160 * Left side is good, look for a right side entry.
1161 */
1162 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1163 xfs_alloc_fix_len(args);
1164 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1165 args->alignment, args->userdata, ltbnoa,
1166 ltlena, &ltnew);
1167
1168 error = xfs_alloc_find_best_extent(args,
1169 &bno_cur_lt, &bno_cur_gt,
1170 ltdiff, &gtbno, &gtlen,
1171 &gtbnoa, &gtlena,
1172 0 /* search right */);
1173 } else {
1174 ASSERT(gtlena >= args->minlen);
1175
1176 /*
1177 * Right side is good, look for a left side entry.
1178 */
1179 args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
1180 xfs_alloc_fix_len(args);
1181 gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1182 args->alignment, args->userdata, gtbnoa,
1183 gtlena, &gtnew);
1184
1185 error = xfs_alloc_find_best_extent(args,
1186 &bno_cur_gt, &bno_cur_lt,
1187 gtdiff, &ltbno, &ltlen,
1188 &ltbnoa, &ltlena,
1189 1 /* search left */);
1190 }
1191
1192 if (error)
1193 goto error0;
1194 }
1195
1196 /*
1197 * If we couldn't get anything, give up.
1198 */
1199 if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
1200 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1201
1202 if (!forced++) {
1203 trace_xfs_alloc_near_busy(args);
1204 xfs_log_force(args->mp, XFS_LOG_SYNC);
1205 goto restart;
1206 }
1207 trace_xfs_alloc_size_neither(args);
1208 args->agbno = NULLAGBLOCK;
1209 return 0;
1210 }
1211
1212 /*
1213 * At this point we have selected a freespace entry, either to the
1214 * left or to the right. If it's on the right, copy all the
1215 * useful variables to the "left" set so we only have one
1216 * copy of this code.
1217 */
1218 if (bno_cur_gt) {
1219 bno_cur_lt = bno_cur_gt;
1220 bno_cur_gt = NULL;
1221 ltbno = gtbno;
1222 ltbnoa = gtbnoa;
1223 ltlen = gtlen;
1224 ltlena = gtlena;
1225 j = 1;
1226 } else
1227 j = 0;
1228
1229 /*
1230 * Fix up the length and compute the useful address.
1231 */
1232 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1233 xfs_alloc_fix_len(args);
1234 if (!xfs_alloc_fix_minleft(args)) {
1235 trace_xfs_alloc_near_nominleft(args);
1236 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1237 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1238 return 0;
1239 }
1240 rlen = args->len;
1241 (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
1242 args->userdata, ltbnoa, ltlena, &ltnew);
1243 ASSERT(ltnew >= ltbno);
1244 ASSERT(ltnew + rlen <= ltbnoa + ltlena);
1245 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1246 ASSERT(ltnew >= args->min_agbno && ltnew <= args->max_agbno);
1247 args->agbno = ltnew;
1248
1249 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1250 ltnew, rlen, XFSA_FIXUP_BNO_OK)))
1251 goto error0;
1252
1253 if (j)
1254 trace_xfs_alloc_near_greater(args);
1255 else
1256 trace_xfs_alloc_near_lesser(args);
1257
1258 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1259 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1260 return 0;
1261
1262 error0:
1263 trace_xfs_alloc_near_error(args);
1264 if (cnt_cur != NULL)
1265 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1266 if (bno_cur_lt != NULL)
1267 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR);
1268 if (bno_cur_gt != NULL)
1269 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR);
1270 return error;
1271 }
1272
1273 /*
1274 * Allocate a variable extent anywhere in the allocation group agno.
1275 * Extent's length (returned in len) will be between minlen and maxlen,
1276 * and of the form k * prod + mod unless there's nothing that large.
1277 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1278 */
1279 STATIC int /* error */
1280 xfs_alloc_ag_vextent_size(
1281 xfs_alloc_arg_t *args) /* allocation argument structure */
1282 {
1283 xfs_btree_cur_t *bno_cur; /* cursor for bno btree */
1284 xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */
1285 int error; /* error result */
1286 xfs_agblock_t fbno; /* start of found freespace */
1287 xfs_extlen_t flen; /* length of found freespace */
1288 int i; /* temp status variable */
1289 xfs_agblock_t rbno; /* returned block number */
1290 xfs_extlen_t rlen; /* length of returned extent */
1291 int forced = 0;
1292
1293 restart:
1294 /*
1295 * Allocate and initialize a cursor for the by-size btree.
1296 */
1297 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1298 args->agno, XFS_BTNUM_CNT);
1299 bno_cur = NULL;
1300
1301 /*
1302 * Look for an entry >= maxlen+alignment-1 blocks.
1303 */
1304 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1305 args->maxlen + args->alignment - 1, &i)))
1306 goto error0;
1307
1308 /*
1309 * If none or we have busy extents that we cannot allocate from, then
1310 * we have to settle for a smaller extent. In the case that there are
1311 * no large extents, this will return the last entry in the tree unless
1312 * the tree is empty. In the case that there are only busy large
1313 * extents, this will return the largest small extent unless there
1314 * are no smaller extents available.
1315 */
1316 if (!i || forced > 1) {
1317 error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1318 &fbno, &flen, &i);
1319 if (error)
1320 goto error0;
1321 if (i == 0 || flen == 0) {
1322 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1323 trace_xfs_alloc_size_noentry(args);
1324 return 0;
1325 }
1326 ASSERT(i == 1);
1327 xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
1328 } else {
1329 /*
1330 * Search for a non-busy extent that is large enough.
1331 * If we are at low space, don't check, or if we fall of
1332 * the end of the btree, turn off the busy check and
1333 * restart.
1334 */
1335 for (;;) {
1336 error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1337 if (error)
1338 goto error0;
1339 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1340
1341 xfs_alloc_compute_aligned(args, fbno, flen,
1342 &rbno, &rlen);
1343
1344 if (rlen >= args->maxlen)
1345 break;
1346
1347 error = xfs_btree_increment(cnt_cur, 0, &i);
1348 if (error)
1349 goto error0;
1350 if (i == 0) {
1351 /*
1352 * Our only valid extents must have been busy.
1353 * Make it unbusy by forcing the log out and
1354 * retrying. If we've been here before, forcing
1355 * the log isn't making the extents available,
1356 * which means they have probably been freed in
1357 * this transaction. In that case, we have to
1358 * give up on them and we'll attempt a minlen
1359 * allocation the next time around.
1360 */
1361 xfs_btree_del_cursor(cnt_cur,
1362 XFS_BTREE_NOERROR);
1363 trace_xfs_alloc_size_busy(args);
1364 if (!forced++)
1365 xfs_log_force(args->mp, XFS_LOG_SYNC);
1366 goto restart;
1367 }
1368 }
1369 }
1370
1371 /*
1372 * In the first case above, we got the last entry in the
1373 * by-size btree. Now we check to see if the space hits maxlen
1374 * once aligned; if not, we search left for something better.
1375 * This can't happen in the second case above.
1376 */
1377 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1378 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
1379 (rlen <= flen && rbno + rlen <= fbno + flen), error0);
1380 if (rlen < args->maxlen) {
1381 xfs_agblock_t bestfbno;
1382 xfs_extlen_t bestflen;
1383 xfs_agblock_t bestrbno;
1384 xfs_extlen_t bestrlen;
1385
1386 bestrlen = rlen;
1387 bestrbno = rbno;
1388 bestflen = flen;
1389 bestfbno = fbno;
1390 for (;;) {
1391 if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
1392 goto error0;
1393 if (i == 0)
1394 break;
1395 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1396 &i)))
1397 goto error0;
1398 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1399 if (flen < bestrlen)
1400 break;
1401 xfs_alloc_compute_aligned(args, fbno, flen,
1402 &rbno, &rlen);
1403 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1404 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
1405 (rlen <= flen && rbno + rlen <= fbno + flen),
1406 error0);
1407 if (rlen > bestrlen) {
1408 bestrlen = rlen;
1409 bestrbno = rbno;
1410 bestflen = flen;
1411 bestfbno = fbno;
1412 if (rlen == args->maxlen)
1413 break;
1414 }
1415 }
1416 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1417 &i)))
1418 goto error0;
1419 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1420 rlen = bestrlen;
1421 rbno = bestrbno;
1422 flen = bestflen;
1423 fbno = bestfbno;
1424 }
1425 args->wasfromfl = 0;
1426 /*
1427 * Fix up the length.
1428 */
1429 args->len = rlen;
1430 if (rlen < args->minlen) {
1431 if (!forced++) {
1432 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1433 trace_xfs_alloc_size_busy(args);
1434 xfs_log_force(args->mp, XFS_LOG_SYNC);
1435 goto restart;
1436 }
1437 goto out_nominleft;
1438 }
1439 xfs_alloc_fix_len(args);
1440
1441 if (!xfs_alloc_fix_minleft(args))
1442 goto out_nominleft;
1443 rlen = args->len;
1444 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0);
1445 /*
1446 * Allocate and initialize a cursor for the by-block tree.
1447 */
1448 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1449 args->agno, XFS_BTNUM_BNO);
1450 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1451 rbno, rlen, XFSA_FIXUP_CNT_OK)))
1452 goto error0;
1453 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1454 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1455 cnt_cur = bno_cur = NULL;
1456 args->len = rlen;
1457 args->agbno = rbno;
1458 XFS_WANT_CORRUPTED_GOTO(args->mp,
1459 args->agbno + args->len <=
1460 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1461 error0);
1462 trace_xfs_alloc_size_done(args);
1463 return 0;
1464
1465 error0:
1466 trace_xfs_alloc_size_error(args);
1467 if (cnt_cur)
1468 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1469 if (bno_cur)
1470 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1471 return error;
1472
1473 out_nominleft:
1474 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1475 trace_xfs_alloc_size_nominleft(args);
1476 args->agbno = NULLAGBLOCK;
1477 return 0;
1478 }
1479
1480 /*
1481 * Deal with the case where only small freespaces remain.
1482 * Either return the contents of the last freespace record,
1483 * or allocate space from the freelist if there is nothing in the tree.
1484 */
1485 STATIC int /* error */
1486 xfs_alloc_ag_vextent_small(
1487 xfs_alloc_arg_t *args, /* allocation argument structure */
1488 xfs_btree_cur_t *ccur, /* by-size cursor */
1489 xfs_agblock_t *fbnop, /* result block number */
1490 xfs_extlen_t *flenp, /* result length */
1491 int *stat) /* status: 0-freelist, 1-normal/none */
1492 {
1493 int error;
1494 xfs_agblock_t fbno;
1495 xfs_extlen_t flen;
1496 int i;
1497
1498 if ((error = xfs_btree_decrement(ccur, 0, &i)))
1499 goto error0;
1500 if (i) {
1501 if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
1502 goto error0;
1503 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1504 }
1505 /*
1506 * Nothing in the btree, try the freelist. Make sure
1507 * to respect minleft even when pulling from the
1508 * freelist.
1509 */
1510 else if (args->minlen == 1 && args->alignment == 1 && !args->isfl &&
1511 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
1512 > args->minleft)) {
1513 error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
1514 if (error)
1515 goto error0;
1516 if (fbno != NULLAGBLOCK) {
1517 xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
1518 args->userdata);
1519
1520 if (args->userdata) {
1521 xfs_buf_t *bp;
1522
1523 bp = xfs_btree_get_bufs(args->mp, args->tp,
1524 args->agno, fbno, 0);
1525 xfs_trans_binval(args->tp, bp);
1526 }
1527 args->len = 1;
1528 args->agbno = fbno;
1529 XFS_WANT_CORRUPTED_GOTO(args->mp,
1530 args->agbno + args->len <=
1531 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1532 error0);
1533 args->wasfromfl = 1;
1534 trace_xfs_alloc_small_freelist(args);
1535 *stat = 0;
1536 return 0;
1537 }
1538 /*
1539 * Nothing in the freelist.
1540 */
1541 else
1542 flen = 0;
1543 }
1544 /*
1545 * Can't allocate from the freelist for some reason.
1546 */
1547 else {
1548 fbno = NULLAGBLOCK;
1549 flen = 0;
1550 }
1551 /*
1552 * Can't do the allocation, give up.
1553 */
1554 if (flen < args->minlen) {
1555 args->agbno = NULLAGBLOCK;
1556 trace_xfs_alloc_small_notenough(args);
1557 flen = 0;
1558 }
1559 *fbnop = fbno;
1560 *flenp = flen;
1561 *stat = 1;
1562 trace_xfs_alloc_small_done(args);
1563 return 0;
1564
1565 error0:
1566 trace_xfs_alloc_small_error(args);
1567 return error;
1568 }
1569
1570 /*
1571 * Free the extent starting at agno/bno for length.
1572 */
1573 STATIC int /* error */
1574 xfs_free_ag_extent(
1575 xfs_trans_t *tp, /* transaction pointer */
1576 xfs_buf_t *agbp, /* buffer for a.g. freelist header */
1577 xfs_agnumber_t agno, /* allocation group number */
1578 xfs_agblock_t bno, /* starting block number */
1579 xfs_extlen_t len, /* length of extent */
1580 int isfl) /* set if is freelist blocks - no sb acctg */
1581 {
1582 xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */
1583 xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */
1584 int error; /* error return value */
1585 xfs_agblock_t gtbno; /* start of right neighbor block */
1586 xfs_extlen_t gtlen; /* length of right neighbor block */
1587 int haveleft; /* have a left neighbor block */
1588 int haveright; /* have a right neighbor block */
1589 int i; /* temp, result code */
1590 xfs_agblock_t ltbno; /* start of left neighbor block */
1591 xfs_extlen_t ltlen; /* length of left neighbor block */
1592 xfs_mount_t *mp; /* mount point struct for filesystem */
1593 xfs_agblock_t nbno; /* new starting block of freespace */
1594 xfs_extlen_t nlen; /* new length of freespace */
1595 xfs_perag_t *pag; /* per allocation group data */
1596
1597 mp = tp->t_mountp;
1598 /*
1599 * Allocate and initialize a cursor for the by-block btree.
1600 */
1601 bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
1602 cnt_cur = NULL;
1603 /*
1604 * Look for a neighboring block on the left (lower block numbers)
1605 * that is contiguous with this space.
1606 */
1607 if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
1608 goto error0;
1609 if (haveleft) {
1610 /*
1611 * There is a block to our left.
1612 */
1613 if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
1614 goto error0;
1615 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1616 /*
1617 * It's not contiguous, though.
1618 */
1619 if (ltbno + ltlen < bno)
1620 haveleft = 0;
1621 else {
1622 /*
1623 * If this failure happens the request to free this
1624 * space was invalid, it's (partly) already free.
1625 * Very bad.
1626 */
1627 XFS_WANT_CORRUPTED_GOTO(mp,
1628 ltbno + ltlen <= bno, error0);
1629 }
1630 }
1631 /*
1632 * Look for a neighboring block on the right (higher block numbers)
1633 * that is contiguous with this space.
1634 */
1635 if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
1636 goto error0;
1637 if (haveright) {
1638 /*
1639 * There is a block to our right.
1640 */
1641 if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
1642 goto error0;
1643 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1644 /*
1645 * It's not contiguous, though.
1646 */
1647 if (bno + len < gtbno)
1648 haveright = 0;
1649 else {
1650 /*
1651 * If this failure happens the request to free this
1652 * space was invalid, it's (partly) already free.
1653 * Very bad.
1654 */
1655 XFS_WANT_CORRUPTED_GOTO(mp, gtbno >= bno + len, error0);
1656 }
1657 }
1658 /*
1659 * Now allocate and initialize a cursor for the by-size tree.
1660 */
1661 cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
1662 /*
1663 * Have both left and right contiguous neighbors.
1664 * Merge all three into a single free block.
1665 */
1666 if (haveleft && haveright) {
1667 /*
1668 * Delete the old by-size entry on the left.
1669 */
1670 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1671 goto error0;
1672 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1673 if ((error = xfs_btree_delete(cnt_cur, &i)))
1674 goto error0;
1675 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1676 /*
1677 * Delete the old by-size entry on the right.
1678 */
1679 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1680 goto error0;
1681 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1682 if ((error = xfs_btree_delete(cnt_cur, &i)))
1683 goto error0;
1684 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1685 /*
1686 * Delete the old by-block entry for the right block.
1687 */
1688 if ((error = xfs_btree_delete(bno_cur, &i)))
1689 goto error0;
1690 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1691 /*
1692 * Move the by-block cursor back to the left neighbor.
1693 */
1694 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1695 goto error0;
1696 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1697 #ifdef DEBUG
1698 /*
1699 * Check that this is the right record: delete didn't
1700 * mangle the cursor.
1701 */
1702 {
1703 xfs_agblock_t xxbno;
1704 xfs_extlen_t xxlen;
1705
1706 if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
1707 &i)))
1708 goto error0;
1709 XFS_WANT_CORRUPTED_GOTO(mp,
1710 i == 1 && xxbno == ltbno && xxlen == ltlen,
1711 error0);
1712 }
1713 #endif
1714 /*
1715 * Update remaining by-block entry to the new, joined block.
1716 */
1717 nbno = ltbno;
1718 nlen = len + ltlen + gtlen;
1719 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1720 goto error0;
1721 }
1722 /*
1723 * Have only a left contiguous neighbor.
1724 * Merge it together with the new freespace.
1725 */
1726 else if (haveleft) {
1727 /*
1728 * Delete the old by-size entry on the left.
1729 */
1730 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1731 goto error0;
1732 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1733 if ((error = xfs_btree_delete(cnt_cur, &i)))
1734 goto error0;
1735 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1736 /*
1737 * Back up the by-block cursor to the left neighbor, and
1738 * update its length.
1739 */
1740 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1741 goto error0;
1742 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1743 nbno = ltbno;
1744 nlen = len + ltlen;
1745 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1746 goto error0;
1747 }
1748 /*
1749 * Have only a right contiguous neighbor.
1750 * Merge it together with the new freespace.
1751 */
1752 else if (haveright) {
1753 /*
1754 * Delete the old by-size entry on the right.
1755 */
1756 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1757 goto error0;
1758 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1759 if ((error = xfs_btree_delete(cnt_cur, &i)))
1760 goto error0;
1761 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1762 /*
1763 * Update the starting block and length of the right
1764 * neighbor in the by-block tree.
1765 */
1766 nbno = bno;
1767 nlen = len + gtlen;
1768 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1769 goto error0;
1770 }
1771 /*
1772 * No contiguous neighbors.
1773 * Insert the new freespace into the by-block tree.
1774 */
1775 else {
1776 nbno = bno;
1777 nlen = len;
1778 if ((error = xfs_btree_insert(bno_cur, &i)))
1779 goto error0;
1780 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1781 }
1782 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1783 bno_cur = NULL;
1784 /*
1785 * In all cases we need to insert the new freespace in the by-size tree.
1786 */
1787 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
1788 goto error0;
1789 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, error0);
1790 if ((error = xfs_btree_insert(cnt_cur, &i)))
1791 goto error0;
1792 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1793 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1794 cnt_cur = NULL;
1795
1796 /*
1797 * Update the freespace totals in the ag and superblock.
1798 */
1799 pag = xfs_perag_get(mp, agno);
1800 error = xfs_alloc_update_counters(tp, pag, agbp, len);
1801 xfs_perag_put(pag);
1802 if (error)
1803 goto error0;
1804
1805 if (!isfl)
1806 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
1807 XFS_STATS_INC(xs_freex);
1808 XFS_STATS_ADD(xs_freeb, len);
1809
1810 trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
1811
1812 return 0;
1813
1814 error0:
1815 trace_xfs_free_extent(mp, agno, bno, len, isfl, -1, -1);
1816 if (bno_cur)
1817 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1818 if (cnt_cur)
1819 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1820 return error;
1821 }
1822
1823 /*
1824 * Visible (exported) allocation/free functions.
1825 * Some of these are used just by xfs_alloc_btree.c and this file.
1826 */
1827
1828 /*
1829 * Compute and fill in value of m_ag_maxlevels.
1830 */
1831 void
1832 xfs_alloc_compute_maxlevels(
1833 xfs_mount_t *mp) /* file system mount structure */
1834 {
1835 int level;
1836 uint maxblocks;
1837 uint maxleafents;
1838 int minleafrecs;
1839 int minnoderecs;
1840
1841 maxleafents = (mp->m_sb.sb_agblocks + 1) / 2;
1842 minleafrecs = mp->m_alloc_mnr[0];
1843 minnoderecs = mp->m_alloc_mnr[1];
1844 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
1845 for (level = 1; maxblocks > 1; level++)
1846 maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
1847 mp->m_ag_maxlevels = level;
1848 }
1849
1850 /*
1851 * Find the length of the longest extent in an AG.
1852 */
1853 xfs_extlen_t
1854 xfs_alloc_longest_free_extent(
1855 struct xfs_mount *mp,
1856 struct xfs_perag *pag)
1857 {
1858 xfs_extlen_t need, delta = 0;
1859
1860 need = XFS_MIN_FREELIST_PAG(pag, mp);
1861 if (need > pag->pagf_flcount)
1862 delta = need - pag->pagf_flcount;
1863
1864 if (pag->pagf_longest > delta)
1865 return pag->pagf_longest - delta;
1866 return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
1867 }
1868
1869 /*
1870 * Decide whether to use this allocation group for this allocation.
1871 * If so, fix up the btree freelist's size.
1872 */
1873 int /* error */
1874 xfs_alloc_fix_freelist(
1875 xfs_alloc_arg_t *args, /* allocation argument structure */
1876 int flags) /* XFS_ALLOC_FLAG_... */
1877 {
1878 xfs_buf_t *agbp; /* agf buffer pointer */
1879 xfs_agf_t *agf; /* a.g. freespace structure pointer */
1880 xfs_buf_t *agflbp;/* agfl buffer pointer */
1881 xfs_agblock_t bno; /* freelist block */
1882 xfs_extlen_t delta; /* new blocks needed in freelist */
1883 int error; /* error result code */
1884 xfs_extlen_t longest;/* longest extent in allocation group */
1885 xfs_mount_t *mp; /* file system mount point structure */
1886 xfs_extlen_t need; /* total blocks needed in freelist */
1887 xfs_perag_t *pag; /* per-ag information structure */
1888 xfs_alloc_arg_t targs; /* local allocation arguments */
1889 xfs_trans_t *tp; /* transaction pointer */
1890
1891 mp = args->mp;
1892
1893 pag = args->pag;
1894 tp = args->tp;
1895 if (!pag->pagf_init) {
1896 if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
1897 &agbp)))
1898 return error;
1899 if (!pag->pagf_init) {
1900 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
1901 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1902 args->agbp = NULL;
1903 return 0;
1904 }
1905 } else
1906 agbp = NULL;
1907
1908 /*
1909 * If this is a metadata preferred pag and we are user data
1910 * then try somewhere else if we are not being asked to
1911 * try harder at this point
1912 */
1913 if (pag->pagf_metadata && args->userdata &&
1914 (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
1915 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1916 args->agbp = NULL;
1917 return 0;
1918 }
1919
1920 if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
1921 /*
1922 * If it looks like there isn't a long enough extent, or enough
1923 * total blocks, reject it.
1924 */
1925 need = XFS_MIN_FREELIST_PAG(pag, mp);
1926 longest = xfs_alloc_longest_free_extent(mp, pag);
1927 if ((args->minlen + args->alignment + args->minalignslop - 1) >
1928 longest ||
1929 ((int)(pag->pagf_freeblks + pag->pagf_flcount -
1930 need - args->total) < (int)args->minleft)) {
1931 if (agbp)
1932 xfs_trans_brelse(tp, agbp);
1933 args->agbp = NULL;
1934 return 0;
1935 }
1936 }
1937
1938 /*
1939 * Get the a.g. freespace buffer.
1940 * Can fail if we're not blocking on locks, and it's held.
1941 */
1942 if (agbp == NULL) {
1943 if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
1944 &agbp)))
1945 return error;
1946 if (agbp == NULL) {
1947 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
1948 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1949 args->agbp = NULL;
1950 return 0;
1951 }
1952 }
1953 /*
1954 * Figure out how many blocks we should have in the freelist.
1955 */
1956 agf = XFS_BUF_TO_AGF(agbp);
1957 need = XFS_MIN_FREELIST(agf, mp);
1958 /*
1959 * If there isn't enough total or single-extent, reject it.
1960 */
1961 if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
1962 delta = need > be32_to_cpu(agf->agf_flcount) ?
1963 (need - be32_to_cpu(agf->agf_flcount)) : 0;
1964 longest = be32_to_cpu(agf->agf_longest);
1965 longest = (longest > delta) ? (longest - delta) :
1966 (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0);
1967 if ((args->minlen + args->alignment + args->minalignslop - 1) >
1968 longest ||
1969 ((int)(be32_to_cpu(agf->agf_freeblks) +
1970 be32_to_cpu(agf->agf_flcount) - need - args->total) <
1971 (int)args->minleft)) {
1972 xfs_trans_brelse(tp, agbp);
1973 args->agbp = NULL;
1974 return 0;
1975 }
1976 }
1977 /*
1978 * Make the freelist shorter if it's too long.
1979 */
1980 while (be32_to_cpu(agf->agf_flcount) > need) {
1981 xfs_buf_t *bp;
1982
1983 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
1984 if (error)
1985 return error;
1986 if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1)))
1987 return error;
1988 bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
1989 xfs_trans_binval(tp, bp);
1990 }
1991 /*
1992 * Initialize the args structure.
1993 */
1994 memset(&targs, 0, sizeof(targs));
1995 targs.tp = tp;
1996 targs.mp = mp;
1997 targs.agbp = agbp;
1998 targs.agno = args->agno;
1999 targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
2000 targs.type = XFS_ALLOCTYPE_THIS_AG;
2001 targs.pag = pag;
2002 if ((error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp)))
2003 return error;
2004 /*
2005 * Make the freelist longer if it's too short.
2006 */
2007 while (be32_to_cpu(agf->agf_flcount) < need) {
2008 targs.agbno = 0;
2009 targs.maxlen = need - be32_to_cpu(agf->agf_flcount);
2010 /*
2011 * Allocate as many blocks as possible at once.
2012 */
2013 if ((error = xfs_alloc_ag_vextent(&targs))) {
2014 xfs_trans_brelse(tp, agflbp);
2015 return error;
2016 }
2017 /*
2018 * Stop if we run out. Won't happen if callers are obeying
2019 * the restrictions correctly. Can happen for free calls
2020 * on a completely full ag.
2021 */
2022 if (targs.agbno == NULLAGBLOCK) {
2023 if (flags & XFS_ALLOC_FLAG_FREEING)
2024 break;
2025 xfs_trans_brelse(tp, agflbp);
2026 args->agbp = NULL;
2027 return 0;
2028 }
2029 /*
2030 * Put each allocated block on the list.
2031 */
2032 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
2033 error = xfs_alloc_put_freelist(tp, agbp,
2034 agflbp, bno, 0);
2035 if (error)
2036 return error;
2037 }
2038 }
2039 xfs_trans_brelse(tp, agflbp);
2040 args->agbp = agbp;
2041 return 0;
2042 }
2043
2044 /*
2045 * Get a block from the freelist.
2046 * Returns with the buffer for the block gotten.
2047 */
2048 int /* error */
2049 xfs_alloc_get_freelist(
2050 xfs_trans_t *tp, /* transaction pointer */
2051 xfs_buf_t *agbp, /* buffer containing the agf structure */
2052 xfs_agblock_t *bnop, /* block address retrieved from freelist */
2053 int btreeblk) /* destination is a AGF btree */
2054 {
2055 xfs_agf_t *agf; /* a.g. freespace structure */
2056 xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */
2057 xfs_agblock_t bno; /* block number returned */
2058 __be32 *agfl_bno;
2059 int error;
2060 int logflags;
2061 xfs_mount_t *mp = tp->t_mountp;
2062 xfs_perag_t *pag; /* per allocation group data */
2063
2064 /*
2065 * Freelist is empty, give up.
2066 */
2067 agf = XFS_BUF_TO_AGF(agbp);
2068 if (!agf->agf_flcount) {
2069 *bnop = NULLAGBLOCK;
2070 return 0;
2071 }
2072 /*
2073 * Read the array of free blocks.
2074 */
2075 error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
2076 &agflbp);
2077 if (error)
2078 return error;
2079
2080
2081 /*
2082 * Get the block number and update the data structures.
2083 */
2084 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2085 bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
2086 be32_add_cpu(&agf->agf_flfirst, 1);
2087 xfs_trans_brelse(tp, agflbp);
2088 if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
2089 agf->agf_flfirst = 0;
2090
2091 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
2092 be32_add_cpu(&agf->agf_flcount, -1);
2093 xfs_trans_agflist_delta(tp, -1);
2094 pag->pagf_flcount--;
2095 xfs_perag_put(pag);
2096
2097 logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2098 if (btreeblk) {
2099 be32_add_cpu(&agf->agf_btreeblks, 1);
2100 pag->pagf_btreeblks++;
2101 logflags |= XFS_AGF_BTREEBLKS;
2102 }
2103
2104 xfs_alloc_log_agf(tp, agbp, logflags);
2105 *bnop = bno;
2106
2107 return 0;
2108 }
2109
2110 /*
2111 * Log the given fields from the agf structure.
2112 */
2113 void
2114 xfs_alloc_log_agf(
2115 xfs_trans_t *tp, /* transaction pointer */
2116 xfs_buf_t *bp, /* buffer for a.g. freelist header */
2117 int fields) /* mask of fields to be logged (XFS_AGF_...) */
2118 {
2119 int first; /* first byte offset */
2120 int last; /* last byte offset */
2121 static const short offsets[] = {
2122 offsetof(xfs_agf_t, agf_magicnum),
2123 offsetof(xfs_agf_t, agf_versionnum),
2124 offsetof(xfs_agf_t, agf_seqno),
2125 offsetof(xfs_agf_t, agf_length),
2126 offsetof(xfs_agf_t, agf_roots[0]),
2127 offsetof(xfs_agf_t, agf_levels[0]),
2128 offsetof(xfs_agf_t, agf_flfirst),
2129 offsetof(xfs_agf_t, agf_fllast),
2130 offsetof(xfs_agf_t, agf_flcount),
2131 offsetof(xfs_agf_t, agf_freeblks),
2132 offsetof(xfs_agf_t, agf_longest),
2133 offsetof(xfs_agf_t, agf_btreeblks),
2134 offsetof(xfs_agf_t, agf_uuid),
2135 sizeof(xfs_agf_t)
2136 };
2137
2138 trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
2139
2140 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
2141
2142 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2143 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2144 }
2145
2146 /*
2147 * Interface for inode allocation to force the pag data to be initialized.
2148 */
2149 int /* error */
2150 xfs_alloc_pagf_init(
2151 xfs_mount_t *mp, /* file system mount structure */
2152 xfs_trans_t *tp, /* transaction pointer */
2153 xfs_agnumber_t agno, /* allocation group number */
2154 int flags) /* XFS_ALLOC_FLAGS_... */
2155 {
2156 xfs_buf_t *bp;
2157 int error;
2158
2159 if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)))
2160 return error;
2161 if (bp)
2162 xfs_trans_brelse(tp, bp);
2163 return 0;
2164 }
2165
2166 /*
2167 * Put the block on the freelist for the allocation group.
2168 */
2169 int /* error */
2170 xfs_alloc_put_freelist(
2171 xfs_trans_t *tp, /* transaction pointer */
2172 xfs_buf_t *agbp, /* buffer for a.g. freelist header */
2173 xfs_buf_t *agflbp,/* buffer for a.g. free block array */
2174 xfs_agblock_t bno, /* block being freed */
2175 int btreeblk) /* block came from a AGF btree */
2176 {
2177 xfs_agf_t *agf; /* a.g. freespace structure */
2178 __be32 *blockp;/* pointer to array entry */
2179 int error;
2180 int logflags;
2181 xfs_mount_t *mp; /* mount structure */
2182 xfs_perag_t *pag; /* per allocation group data */
2183 __be32 *agfl_bno;
2184 int startoff;
2185
2186 agf = XFS_BUF_TO_AGF(agbp);
2187 mp = tp->t_mountp;
2188
2189 if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
2190 be32_to_cpu(agf->agf_seqno), &agflbp)))
2191 return error;
2192 be32_add_cpu(&agf->agf_fllast, 1);
2193 if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
2194 agf->agf_fllast = 0;
2195
2196 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
2197 be32_add_cpu(&agf->agf_flcount, 1);
2198 xfs_trans_agflist_delta(tp, 1);
2199 pag->pagf_flcount++;
2200
2201 logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2202 if (btreeblk) {
2203 be32_add_cpu(&agf->agf_btreeblks, -1);
2204 pag->pagf_btreeblks--;
2205 logflags |= XFS_AGF_BTREEBLKS;
2206 }
2207 xfs_perag_put(pag);
2208
2209 xfs_alloc_log_agf(tp, agbp, logflags);
2210
2211 ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
2212
2213 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2214 blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
2215 *blockp = cpu_to_be32(bno);
2216 startoff = (char *)blockp - (char *)agflbp->b_addr;
2217
2218 xfs_alloc_log_agf(tp, agbp, logflags);
2219
2220 xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
2221 xfs_trans_log_buf(tp, agflbp, startoff,
2222 startoff + sizeof(xfs_agblock_t) - 1);
2223 return 0;
2224 }
2225
2226 static bool
2227 xfs_agf_verify(
2228 struct xfs_mount *mp,
2229 struct xfs_buf *bp)
2230 {
2231 struct xfs_agf *agf = XFS_BUF_TO_AGF(bp);
2232
2233 if (xfs_sb_version_hascrc(&mp->m_sb) &&
2234 !uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_uuid))
2235 return false;
2236
2237 if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
2238 XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
2239 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
2240 be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
2241 be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
2242 be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)))
2243 return false;
2244
2245 if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
2246 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
2247 return false;
2248
2249 /*
2250 * during growfs operations, the perag is not fully initialised,
2251 * so we can't use it for any useful checking. growfs ensures we can't
2252 * use it by using uncached buffers that don't have the perag attached
2253 * so we can detect and avoid this problem.
2254 */
2255 if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
2256 return false;
2257
2258 if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
2259 be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
2260 return false;
2261
2262 return true;;
2263
2264 }
2265
2266 static void
2267 xfs_agf_read_verify(
2268 struct xfs_buf *bp)
2269 {
2270 struct xfs_mount *mp = bp->b_target->bt_mount;
2271
2272 if (xfs_sb_version_hascrc(&mp->m_sb) &&
2273 !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
2274 xfs_buf_ioerror(bp, -EFSBADCRC);
2275 else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp,
2276 XFS_ERRTAG_ALLOC_READ_AGF,
2277 XFS_RANDOM_ALLOC_READ_AGF))
2278 xfs_buf_ioerror(bp, -EFSCORRUPTED);
2279
2280 if (bp->b_error)
2281 xfs_verifier_error(bp);
2282 }
2283
2284 static void
2285 xfs_agf_write_verify(
2286 struct xfs_buf *bp)
2287 {
2288 struct xfs_mount *mp = bp->b_target->bt_mount;
2289 struct xfs_buf_log_item *bip = bp->b_fspriv;
2290
2291 if (!xfs_agf_verify(mp, bp)) {
2292 xfs_buf_ioerror(bp, -EFSCORRUPTED);
2293 xfs_verifier_error(bp);
2294 return;
2295 }
2296
2297 if (!xfs_sb_version_hascrc(&mp->m_sb))
2298 return;
2299
2300 if (bip)
2301 XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
2302
2303 xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
2304 }
2305
2306 const struct xfs_buf_ops xfs_agf_buf_ops = {
2307 .verify_read = xfs_agf_read_verify,
2308 .verify_write = xfs_agf_write_verify,
2309 };
2310
2311 /*
2312 * Read in the allocation group header (free/alloc section).
2313 */
2314 int /* error */
2315 xfs_read_agf(
2316 struct xfs_mount *mp, /* mount point structure */
2317 struct xfs_trans *tp, /* transaction pointer */
2318 xfs_agnumber_t agno, /* allocation group number */
2319 int flags, /* XFS_BUF_ */
2320 struct xfs_buf **bpp) /* buffer for the ag freelist header */
2321 {
2322 int error;
2323
2324 trace_xfs_read_agf(mp, agno);
2325
2326 ASSERT(agno != NULLAGNUMBER);
2327 error = xfs_trans_read_buf(
2328 mp, tp, mp->m_ddev_targp,
2329 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
2330 XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
2331 if (error)
2332 return error;
2333 if (!*bpp)
2334 return 0;
2335
2336 ASSERT(!(*bpp)->b_error);
2337 xfs_buf_set_ref(*bpp, XFS_AGF_REF);
2338 return 0;
2339 }
2340
2341 /*
2342 * Read in the allocation group header (free/alloc section).
2343 */
2344 int /* error */
2345 xfs_alloc_read_agf(
2346 struct xfs_mount *mp, /* mount point structure */
2347 struct xfs_trans *tp, /* transaction pointer */
2348 xfs_agnumber_t agno, /* allocation group number */
2349 int flags, /* XFS_ALLOC_FLAG_... */
2350 struct xfs_buf **bpp) /* buffer for the ag freelist header */
2351 {
2352 struct xfs_agf *agf; /* ag freelist header */
2353 struct xfs_perag *pag; /* per allocation group data */
2354 int error;
2355
2356 trace_xfs_alloc_read_agf(mp, agno);
2357
2358 ASSERT(agno != NULLAGNUMBER);
2359 error = xfs_read_agf(mp, tp, agno,
2360 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
2361 bpp);
2362 if (error)
2363 return error;
2364 if (!*bpp)
2365 return 0;
2366 ASSERT(!(*bpp)->b_error);
2367
2368 agf = XFS_BUF_TO_AGF(*bpp);
2369 pag = xfs_perag_get(mp, agno);
2370 if (!pag->pagf_init) {
2371 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
2372 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
2373 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
2374 pag->pagf_longest = be32_to_cpu(agf->agf_longest);
2375 pag->pagf_levels[XFS_BTNUM_BNOi] =
2376 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
2377 pag->pagf_levels[XFS_BTNUM_CNTi] =
2378 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
2379 spin_lock_init(&pag->pagb_lock);
2380 pag->pagb_count = 0;
2381 /* XXX: pagb_tree doesn't exist in userspace */
2382 //pag->pagb_tree = RB_ROOT;
2383 pag->pagf_init = 1;
2384 }
2385 #ifdef DEBUG
2386 else if (!XFS_FORCED_SHUTDOWN(mp)) {
2387 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
2388 ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
2389 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
2390 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
2391 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
2392 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
2393 ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
2394 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
2395 }
2396 #endif
2397 xfs_perag_put(pag);
2398 return 0;
2399 }
2400
2401 /*
2402 * Allocate an extent (variable-size).
2403 * Depending on the allocation type, we either look in a single allocation
2404 * group or loop over the allocation groups to find the result.
2405 */
2406 int /* error */
2407 xfs_alloc_vextent(
2408 xfs_alloc_arg_t *args) /* allocation argument structure */
2409 {
2410 xfs_agblock_t agsize; /* allocation group size */
2411 int error;
2412 int flags; /* XFS_ALLOC_FLAG_... locking flags */
2413 xfs_extlen_t minleft;/* minimum left value, temp copy */
2414 xfs_mount_t *mp; /* mount structure pointer */
2415 xfs_agnumber_t sagno; /* starting allocation group number */
2416 xfs_alloctype_t type; /* input allocation type */
2417 int bump_rotor = 0;
2418 int no_min = 0;
2419 xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */
2420
2421 mp = args->mp;
2422 type = args->otype = args->type;
2423 args->agbno = NULLAGBLOCK;
2424 /*
2425 * Just fix this up, for the case where the last a.g. is shorter
2426 * (or there's only one a.g.) and the caller couldn't easily figure
2427 * that out (xfs_bmap_alloc).
2428 */
2429 agsize = mp->m_sb.sb_agblocks;
2430 if (args->maxlen > agsize)
2431 args->maxlen = agsize;
2432 if (args->alignment == 0)
2433 args->alignment = 1;
2434 ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
2435 ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
2436 ASSERT(args->minlen <= args->maxlen);
2437 ASSERT(args->minlen <= agsize);
2438 ASSERT(args->mod < args->prod);
2439 if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
2440 XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
2441 args->minlen > args->maxlen || args->minlen > agsize ||
2442 args->mod >= args->prod) {
2443 args->fsbno = NULLFSBLOCK;
2444 trace_xfs_alloc_vextent_badargs(args);
2445 return 0;
2446 }
2447 minleft = args->minleft;
2448
2449 switch (type) {
2450 case XFS_ALLOCTYPE_THIS_AG:
2451 case XFS_ALLOCTYPE_NEAR_BNO:
2452 case XFS_ALLOCTYPE_THIS_BNO:
2453 /*
2454 * These three force us into a single a.g.
2455 */
2456 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2457 args->pag = xfs_perag_get(mp, args->agno);
2458 args->minleft = 0;
2459 error = xfs_alloc_fix_freelist(args, 0);
2460 args->minleft = minleft;
2461 if (error) {
2462 trace_xfs_alloc_vextent_nofix(args);
2463 goto error0;
2464 }
2465 if (!args->agbp) {
2466 trace_xfs_alloc_vextent_noagbp(args);
2467 break;
2468 }
2469 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2470 if ((error = xfs_alloc_ag_vextent(args)))
2471 goto error0;
2472 break;
2473 case XFS_ALLOCTYPE_START_BNO:
2474 /*
2475 * Try near allocation first, then anywhere-in-ag after
2476 * the first a.g. fails.
2477 */
2478 if ((args->userdata == XFS_ALLOC_INITIAL_USER_DATA) &&
2479 (mp->m_flags & XFS_MOUNT_32BITINODES)) {
2480 args->fsbno = XFS_AGB_TO_FSB(mp,
2481 ((mp->m_agfrotor / rotorstep) %
2482 mp->m_sb.sb_agcount), 0);
2483 bump_rotor = 1;
2484 }
2485 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2486 args->type = XFS_ALLOCTYPE_NEAR_BNO;
2487 /* FALLTHROUGH */
2488 case XFS_ALLOCTYPE_ANY_AG:
2489 case XFS_ALLOCTYPE_START_AG:
2490 case XFS_ALLOCTYPE_FIRST_AG:
2491 /*
2492 * Rotate through the allocation groups looking for a winner.
2493 */
2494 if (type == XFS_ALLOCTYPE_ANY_AG) {
2495 /*
2496 * Start with the last place we left off.
2497 */
2498 args->agno = sagno = (mp->m_agfrotor / rotorstep) %
2499 mp->m_sb.sb_agcount;
2500 args->type = XFS_ALLOCTYPE_THIS_AG;
2501 flags = XFS_ALLOC_FLAG_TRYLOCK;
2502 } else if (type == XFS_ALLOCTYPE_FIRST_AG) {
2503 /*
2504 * Start with allocation group given by bno.
2505 */
2506 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2507 args->type = XFS_ALLOCTYPE_THIS_AG;
2508 sagno = 0;
2509 flags = 0;
2510 } else {
2511 if (type == XFS_ALLOCTYPE_START_AG)
2512 args->type = XFS_ALLOCTYPE_THIS_AG;
2513 /*
2514 * Start with the given allocation group.
2515 */
2516 args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2517 flags = XFS_ALLOC_FLAG_TRYLOCK;
2518 }
2519 /*
2520 * Loop over allocation groups twice; first time with
2521 * trylock set, second time without.
2522 */
2523 for (;;) {
2524 args->pag = xfs_perag_get(mp, args->agno);
2525 if (no_min) args->minleft = 0;
2526 error = xfs_alloc_fix_freelist(args, flags);
2527 args->minleft = minleft;
2528 if (error) {
2529 trace_xfs_alloc_vextent_nofix(args);
2530 goto error0;
2531 }
2532 /*
2533 * If we get a buffer back then the allocation will fly.
2534 */
2535 if (args->agbp) {
2536 if ((error = xfs_alloc_ag_vextent(args)))
2537 goto error0;
2538 break;
2539 }
2540
2541 trace_xfs_alloc_vextent_loopfailed(args);
2542
2543 /*
2544 * Didn't work, figure out the next iteration.
2545 */
2546 if (args->agno == sagno &&
2547 type == XFS_ALLOCTYPE_START_BNO)
2548 args->type = XFS_ALLOCTYPE_THIS_AG;
2549 /*
2550 * For the first allocation, we can try any AG to get
2551 * space. However, if we already have allocated a
2552 * block, we don't want to try AGs whose number is below
2553 * sagno. Otherwise, we may end up with out-of-order
2554 * locking of AGF, which might cause deadlock.
2555 */
2556 if (++(args->agno) == mp->m_sb.sb_agcount) {
2557 if (args->firstblock != NULLFSBLOCK)
2558 args->agno = sagno;
2559 else
2560 args->agno = 0;
2561 }
2562 /*
2563 * Reached the starting a.g., must either be done
2564 * or switch to non-trylock mode.
2565 */
2566 if (args->agno == sagno) {
2567 if (no_min == 1) {
2568 args->agbno = NULLAGBLOCK;
2569 trace_xfs_alloc_vextent_allfailed(args);
2570 break;
2571 }
2572 if (flags == 0) {
2573 no_min = 1;
2574 } else {
2575 flags = 0;
2576 if (type == XFS_ALLOCTYPE_START_BNO) {
2577 args->agbno = XFS_FSB_TO_AGBNO(mp,
2578 args->fsbno);
2579 args->type = XFS_ALLOCTYPE_NEAR_BNO;
2580 }
2581 }
2582 }
2583 xfs_perag_put(args->pag);
2584 }
2585 if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
2586 if (args->agno == sagno)
2587 mp->m_agfrotor = (mp->m_agfrotor + 1) %
2588 (mp->m_sb.sb_agcount * rotorstep);
2589 else
2590 mp->m_agfrotor = (args->agno * rotorstep + 1) %
2591 (mp->m_sb.sb_agcount * rotorstep);
2592 }
2593 break;
2594 default:
2595 ASSERT(0);
2596 /* NOTREACHED */
2597 }
2598 if (args->agbno == NULLAGBLOCK)
2599 args->fsbno = NULLFSBLOCK;
2600 else {
2601 args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
2602 #ifdef DEBUG
2603 ASSERT(args->len >= args->minlen);
2604 ASSERT(args->len <= args->maxlen);
2605 ASSERT(args->agbno % args->alignment == 0);
2606 XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
2607 args->len);
2608 #endif
2609 }
2610 xfs_perag_put(args->pag);
2611 return 0;
2612 error0:
2613 xfs_perag_put(args->pag);
2614 return error;
2615 }
2616
2617 /*
2618 * Free an extent.
2619 * Just break up the extent address and hand off to xfs_free_ag_extent
2620 * after fixing up the freelist.
2621 */
2622 int /* error */
2623 xfs_free_extent(
2624 xfs_trans_t *tp, /* transaction pointer */
2625 xfs_fsblock_t bno, /* starting block number of extent */
2626 xfs_extlen_t len) /* length of extent */
2627 {
2628 xfs_alloc_arg_t args;
2629 int error;
2630
2631 ASSERT(len != 0);
2632 memset(&args, 0, sizeof(xfs_alloc_arg_t));
2633 args.tp = tp;
2634 args.mp = tp->t_mountp;
2635
2636 /*
2637 * validate that the block number is legal - the enables us to detect
2638 * and handle a silent filesystem corruption rather than crashing.
2639 */
2640 args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
2641 if (args.agno >= args.mp->m_sb.sb_agcount)
2642 return -EFSCORRUPTED;
2643
2644 args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
2645 if (args.agbno >= args.mp->m_sb.sb_agblocks)
2646 return -EFSCORRUPTED;
2647
2648 args.pag = xfs_perag_get(args.mp, args.agno);
2649 ASSERT(args.pag);
2650
2651 error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
2652 if (error)
2653 goto error0;
2654
2655 /* validate the extent size is legal now we have the agf locked */
2656 if (args.agbno + len >
2657 be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) {
2658 error = -EFSCORRUPTED;
2659 goto error0;
2660 }
2661
2662 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
2663 if (!error)
2664 xfs_extent_busy_insert(tp, args.agno, args.agbno, len, 0);
2665 error0:
2666 xfs_perag_put(args.pag);
2667 return error;
2668 }