]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - libxfs/xfs_alloc.c
libxfs: add helper for verifying checksums on xfs_bufs
[thirdparty/xfsprogs-dev.git] / libxfs / xfs_alloc.c
CommitLineData
2bd0ea18 1/*
5e656dbb 2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
da23017d 3 * All Rights Reserved.
5000d01d 4 *
da23017d
NS
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
2bd0ea18 7 * published by the Free Software Foundation.
5000d01d 8 *
da23017d
NS
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
5000d01d 13 *
da23017d
NS
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
2bd0ea18 17 */
2bd0ea18
NS
18#include <xfs.h>
19
20#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
5e656dbb
BN
21
22#define XFSA_FIXUP_BNO_OK 1
23#define XFSA_FIXUP_CNT_OK 2
24
5e656dbb
BN
25STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
26STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
27STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
28STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
a2ceac1f 29 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
2bd0ea18 30
b194c7d8
BN
31/*
32 * Lookup the record equal to [bno, len] in the btree given by cur.
33 */
34STATIC int /* error */
35xfs_alloc_lookup_eq(
36 struct xfs_btree_cur *cur, /* btree cursor */
37 xfs_agblock_t bno, /* starting block of extent */
38 xfs_extlen_t len, /* length of extent */
39 int *stat) /* success/failure */
40{
41 cur->bc_rec.a.ar_startblock = bno;
42 cur->bc_rec.a.ar_blockcount = len;
43 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
44}
45
46/*
47 * Lookup the first record greater than or equal to [bno, len]
48 * in the btree given by cur.
49 */
a2ceac1f 50int /* error */
b194c7d8
BN
51xfs_alloc_lookup_ge(
52 struct xfs_btree_cur *cur, /* btree cursor */
53 xfs_agblock_t bno, /* starting block of extent */
54 xfs_extlen_t len, /* length of extent */
55 int *stat) /* success/failure */
56{
57 cur->bc_rec.a.ar_startblock = bno;
58 cur->bc_rec.a.ar_blockcount = len;
59 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
60}
61
62/*
63 * Lookup the first record less than or equal to [bno, len]
64 * in the btree given by cur.
65 */
a2ceac1f 66int /* error */
b194c7d8
BN
67xfs_alloc_lookup_le(
68 struct xfs_btree_cur *cur, /* btree cursor */
69 xfs_agblock_t bno, /* starting block of extent */
70 xfs_extlen_t len, /* length of extent */
71 int *stat) /* success/failure */
72{
73 cur->bc_rec.a.ar_startblock = bno;
74 cur->bc_rec.a.ar_blockcount = len;
75 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
76}
77
78/*
79 * Update the record referred to by cur to the value given
80 * by [bno, len].
81 * This either works (return 0) or gets an EFSCORRUPTED error.
82 */
83STATIC int /* error */
84xfs_alloc_update(
85 struct xfs_btree_cur *cur, /* btree cursor */
86 xfs_agblock_t bno, /* starting block of extent */
87 xfs_extlen_t len) /* length of extent */
88{
89 union xfs_btree_rec rec;
90
91 rec.alloc.ar_startblock = cpu_to_be32(bno);
92 rec.alloc.ar_blockcount = cpu_to_be32(len);
93 return xfs_btree_update(cur, &rec);
94}
95
96/*
97 * Get the data from the pointed-to record.
98 */
a2ceac1f 99int /* error */
b194c7d8
BN
100xfs_alloc_get_rec(
101 struct xfs_btree_cur *cur, /* btree cursor */
102 xfs_agblock_t *bno, /* output: starting block of extent */
103 xfs_extlen_t *len, /* output: length of extent */
104 int *stat) /* output: success/failure */
105{
106 union xfs_btree_rec *rec;
107 int error;
108
109 error = xfs_btree_get_rec(cur, &rec, stat);
110 if (!error && *stat == 1) {
111 *bno = be32_to_cpu(rec->alloc.ar_startblock);
112 *len = be32_to_cpu(rec->alloc.ar_blockcount);
113 }
114 return error;
115}
116
2bd0ea18
NS
117/*
118 * Compute aligned version of the found extent.
119 * Takes alignment and min length into account.
120 */
5e656dbb 121STATIC void
2bd0ea18 122xfs_alloc_compute_aligned(
a2ceac1f 123 xfs_alloc_arg_t *args, /* allocation argument structure */
2bd0ea18
NS
124 xfs_agblock_t foundbno, /* starting block in found extent */
125 xfs_extlen_t foundlen, /* length in found extent */
2bd0ea18
NS
126 xfs_agblock_t *resbno, /* result block number */
127 xfs_extlen_t *reslen) /* result length */
128{
129 xfs_agblock_t bno;
2bd0ea18
NS
130 xfs_extlen_t len;
131
a2ceac1f
DC
132 /* Trim busy sections out of found extent */
133 xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len);
134
135 if (args->alignment > 1 && len >= args->minlen) {
136 xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
137 xfs_extlen_t diff = aligned_bno - bno;
138
139 *resbno = aligned_bno;
140 *reslen = diff >= len ? 0 : len - diff;
2bd0ea18 141 } else {
a2ceac1f
DC
142 *resbno = bno;
143 *reslen = len;
2bd0ea18 144 }
2bd0ea18
NS
145}
146
147/*
148 * Compute best start block and diff for "near" allocations.
149 * freelen >= wantlen already checked by caller.
150 */
151STATIC xfs_extlen_t /* difference value (absolute) */
152xfs_alloc_compute_diff(
153 xfs_agblock_t wantbno, /* target starting block */
154 xfs_extlen_t wantlen, /* target length */
155 xfs_extlen_t alignment, /* target alignment */
84a62eea 156 char userdata, /* are we allocating data? */
2bd0ea18
NS
157 xfs_agblock_t freebno, /* freespace's starting block */
158 xfs_extlen_t freelen, /* freespace's length */
159 xfs_agblock_t *newbnop) /* result: best start block from free */
160{
161 xfs_agblock_t freeend; /* end of freespace extent */
162 xfs_agblock_t newbno1; /* return block number */
163 xfs_agblock_t newbno2; /* other new block number */
0e266570
NS
164 xfs_extlen_t newlen1=0; /* length with newbno1 */
165 xfs_extlen_t newlen2=0; /* length with newbno2 */
2bd0ea18
NS
166 xfs_agblock_t wantend; /* end of target extent */
167
168 ASSERT(freelen >= wantlen);
169 freeend = freebno + freelen;
170 wantend = wantbno + wantlen;
84a62eea
DC
171 /*
172 * We want to allocate from the start of a free extent if it is past
173 * the desired block or if we are allocating user data and the free
174 * extent is before desired block. The second case is there to allow
175 * for contiguous allocation from the remaining free space if the file
176 * grows in the short term.
177 */
178 if (freebno >= wantbno || (userdata && freeend < wantend)) {
2bd0ea18
NS
179 if ((newbno1 = roundup(freebno, alignment)) >= freeend)
180 newbno1 = NULLAGBLOCK;
181 } else if (freeend >= wantend && alignment > 1) {
182 newbno1 = roundup(wantbno, alignment);
183 newbno2 = newbno1 - alignment;
184 if (newbno1 >= freeend)
185 newbno1 = NULLAGBLOCK;
186 else
187 newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
188 if (newbno2 < freebno)
189 newbno2 = NULLAGBLOCK;
190 else
191 newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
192 if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
193 if (newlen1 < newlen2 ||
194 (newlen1 == newlen2 &&
195 XFS_ABSDIFF(newbno1, wantbno) >
196 XFS_ABSDIFF(newbno2, wantbno)))
197 newbno1 = newbno2;
198 } else if (newbno2 != NULLAGBLOCK)
199 newbno1 = newbno2;
200 } else if (freeend >= wantend) {
201 newbno1 = wantbno;
202 } else if (alignment > 1) {
203 newbno1 = roundup(freeend - wantlen, alignment);
204 if (newbno1 > freeend - wantlen &&
205 newbno1 - alignment >= freebno)
206 newbno1 -= alignment;
207 else if (newbno1 >= freeend)
208 newbno1 = NULLAGBLOCK;
209 } else
210 newbno1 = freeend - wantlen;
211 *newbnop = newbno1;
212 return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
213}
214
215/*
216 * Fix up the length, based on mod and prod.
217 * len should be k * prod + mod for some k.
218 * If len is too small it is returned unchanged.
219 * If len hits maxlen it is left alone.
220 */
221STATIC void
222xfs_alloc_fix_len(
dfc130f3 223 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18
NS
224{
225 xfs_extlen_t k;
226 xfs_extlen_t rlen;
227
228 ASSERT(args->mod < args->prod);
229 rlen = args->len;
230 ASSERT(rlen >= args->minlen);
231 ASSERT(rlen <= args->maxlen);
232 if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
233 (args->mod == 0 && rlen < args->prod))
234 return;
235 k = rlen % args->prod;
236 if (k == args->mod)
237 return;
238 if (k > args->mod) {
239 if ((int)(rlen = rlen - k - args->mod) < (int)args->minlen)
240 return;
241 } else {
242 if ((int)(rlen = rlen - args->prod - (args->mod - k)) <
243 (int)args->minlen)
244 return;
245 }
246 ASSERT(rlen >= args->minlen);
247 ASSERT(rlen <= args->maxlen);
248 args->len = rlen;
249}
250
251/*
252 * Fix up length if there is too little space left in the a.g.
253 * Return 1 if ok, 0 if too little, should give up.
254 */
255STATIC int
256xfs_alloc_fix_minleft(
dfc130f3 257 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18
NS
258{
259 xfs_agf_t *agf; /* a.g. freelist header */
260 int diff; /* free space difference */
261
262 if (args->minleft == 0)
263 return 1;
264 agf = XFS_BUF_TO_AGF(args->agbp);
6e3140c7 265 diff = be32_to_cpu(agf->agf_freeblks)
2bd0ea18
NS
266 - args->len - args->minleft;
267 if (diff >= 0)
268 return 1;
269 args->len += diff; /* shrink the allocated space */
270 if (args->len >= args->minlen)
271 return 1;
272 args->agbno = NULLAGBLOCK;
273 return 0;
274}
275
276/*
277 * Update the two btrees, logically removing from freespace the extent
278 * starting at rbno, rlen blocks. The extent is contained within the
279 * actual (current) free extent fbno for flen blocks.
280 * Flags are passed in indicating whether the cursors are set to the
281 * relevant records.
282 */
283STATIC int /* error code */
284xfs_alloc_fixup_trees(
dfc130f3
RC
285 xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */
286 xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */
2bd0ea18
NS
287 xfs_agblock_t fbno, /* starting block of free extent */
288 xfs_extlen_t flen, /* length of free extent */
289 xfs_agblock_t rbno, /* starting block of returned extent */
290 xfs_extlen_t rlen, /* length of returned extent */
291 int flags) /* flags, XFSA_FIXUP_... */
292{
293 int error; /* error code */
294 int i; /* operation results */
295 xfs_agblock_t nfbno1; /* first new free startblock */
296 xfs_agblock_t nfbno2; /* second new free startblock */
0e266570
NS
297 xfs_extlen_t nflen1=0; /* first new free length */
298 xfs_extlen_t nflen2=0; /* second new free length */
2bd0ea18
NS
299
300 /*
301 * Look up the record in the by-size tree if necessary.
302 */
303 if (flags & XFSA_FIXUP_CNT_OK) {
304#ifdef DEBUG
0e266570 305 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
2bd0ea18
NS
306 return error;
307 XFS_WANT_CORRUPTED_RETURN(
308 i == 1 && nfbno1 == fbno && nflen1 == flen);
309#endif
310 } else {
0e266570 311 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
2bd0ea18
NS
312 return error;
313 XFS_WANT_CORRUPTED_RETURN(i == 1);
314 }
315 /*
316 * Look up the record in the by-block tree if necessary.
317 */
318 if (flags & XFSA_FIXUP_BNO_OK) {
319#ifdef DEBUG
0e266570 320 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
2bd0ea18
NS
321 return error;
322 XFS_WANT_CORRUPTED_RETURN(
323 i == 1 && nfbno1 == fbno && nflen1 == flen);
324#endif
325 } else {
0e266570 326 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
2bd0ea18
NS
327 return error;
328 XFS_WANT_CORRUPTED_RETURN(i == 1);
329 }
b3563c19 330
2bd0ea18 331#ifdef DEBUG
b3563c19
BN
332 if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
333 struct xfs_btree_block *bnoblock;
334 struct xfs_btree_block *cntblock;
335
336 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
337 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
2bd0ea18 338
b3563c19
BN
339 XFS_WANT_CORRUPTED_RETURN(
340 bnoblock->bb_numrecs == cntblock->bb_numrecs);
2bd0ea18
NS
341 }
342#endif
b3563c19 343
2bd0ea18
NS
344 /*
345 * Deal with all four cases: the allocated record is contained
346 * within the freespace record, so we can have new freespace
347 * at either (or both) end, or no freespace remaining.
348 */
349 if (rbno == fbno && rlen == flen)
350 nfbno1 = nfbno2 = NULLAGBLOCK;
351 else if (rbno == fbno) {
352 nfbno1 = rbno + rlen;
353 nflen1 = flen - rlen;
354 nfbno2 = NULLAGBLOCK;
355 } else if (rbno + rlen == fbno + flen) {
356 nfbno1 = fbno;
357 nflen1 = flen - rlen;
358 nfbno2 = NULLAGBLOCK;
359 } else {
360 nfbno1 = fbno;
361 nflen1 = rbno - fbno;
362 nfbno2 = rbno + rlen;
363 nflen2 = (fbno + flen) - nfbno2;
364 }
365 /*
366 * Delete the entry from the by-size btree.
367 */
b194c7d8 368 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18
NS
369 return error;
370 XFS_WANT_CORRUPTED_RETURN(i == 1);
371 /*
372 * Add new by-size btree entry(s).
373 */
374 if (nfbno1 != NULLAGBLOCK) {
0e266570 375 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
2bd0ea18
NS
376 return error;
377 XFS_WANT_CORRUPTED_RETURN(i == 0);
b194c7d8 378 if ((error = xfs_btree_insert(cnt_cur, &i)))
2bd0ea18
NS
379 return error;
380 XFS_WANT_CORRUPTED_RETURN(i == 1);
381 }
382 if (nfbno2 != NULLAGBLOCK) {
0e266570 383 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
2bd0ea18
NS
384 return error;
385 XFS_WANT_CORRUPTED_RETURN(i == 0);
b194c7d8 386 if ((error = xfs_btree_insert(cnt_cur, &i)))
2bd0ea18
NS
387 return error;
388 XFS_WANT_CORRUPTED_RETURN(i == 1);
389 }
390 /*
391 * Fix up the by-block btree entry(s).
392 */
393 if (nfbno1 == NULLAGBLOCK) {
394 /*
395 * No remaining freespace, just delete the by-block tree entry.
396 */
b194c7d8 397 if ((error = xfs_btree_delete(bno_cur, &i)))
2bd0ea18
NS
398 return error;
399 XFS_WANT_CORRUPTED_RETURN(i == 1);
400 } else {
401 /*
402 * Update the by-block entry to start later|be shorter.
403 */
0e266570 404 if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
2bd0ea18
NS
405 return error;
406 }
407 if (nfbno2 != NULLAGBLOCK) {
408 /*
409 * 2 resulting free entries, need to add one.
410 */
0e266570 411 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
2bd0ea18
NS
412 return error;
413 XFS_WANT_CORRUPTED_RETURN(i == 0);
b194c7d8 414 if ((error = xfs_btree_insert(bno_cur, &i)))
2bd0ea18
NS
415 return error;
416 XFS_WANT_CORRUPTED_RETURN(i == 1);
417 }
418 return 0;
419}
420
dd5b876e 421static bool
a2ceac1f
DC
422xfs_agfl_verify(
423 struct xfs_buf *bp)
424{
a2ceac1f
DC
425 struct xfs_mount *mp = bp->b_target->bt_mount;
426 struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
a2ceac1f
DC
427 int i;
428
dd5b876e
DC
429 if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_uuid))
430 return false;
431 if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC)
432 return false;
433 /*
434 * during growfs operations, the perag is not fully initialised,
435 * so we can't use it for any useful checking. growfs ensures we can't
436 * use it by using uncached buffers that don't have the perag attached
437 * so we can detect and avoid this problem.
438 */
439 if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
440 return false;
441
a2ceac1f 442 for (i = 0; i < XFS_AGFL_SIZE(mp); i++) {
dd5b876e 443 if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
a2ceac1f 444 be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
dd5b876e 445 return false;
a2ceac1f 446 }
dd5b876e
DC
447 return true;
448}
449
450static void
451xfs_agfl_read_verify(
452 struct xfs_buf *bp)
453{
454 struct xfs_mount *mp = bp->b_target->bt_mount;
455 int agfl_ok = 1;
456
457 /*
458 * There is no verification of non-crc AGFLs because mkfs does not
459 * initialise the AGFL to zero or NULL. Hence the only valid part of the
460 * AGFL is what the AGF says is active. We can't get to the AGF, so we
461 * can't verify just those entries are valid.
462 */
463 if (!xfs_sb_version_hascrc(&mp->m_sb))
464 return;
465
d21ca64d 466 agfl_ok = xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF);
dd5b876e
DC
467
468 agfl_ok = agfl_ok && xfs_agfl_verify(bp);
a2ceac1f
DC
469
470 if (!agfl_ok) {
dd5b876e 471 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
a2ceac1f
DC
472 xfs_buf_ioerror(bp, EFSCORRUPTED);
473 }
a2ceac1f
DC
474}
475
476static void
477xfs_agfl_write_verify(
478 struct xfs_buf *bp)
479{
dd5b876e
DC
480 struct xfs_mount *mp = bp->b_target->bt_mount;
481 struct xfs_buf_log_item *bip = bp->b_fspriv;
a2ceac1f 482
dd5b876e
DC
483 /* no verification of non-crc AGFLs */
484 if (!xfs_sb_version_hascrc(&mp->m_sb))
485 return;
486
487 if (!xfs_agfl_verify(bp)) {
488 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
489 xfs_buf_ioerror(bp, EFSCORRUPTED);
490 return;
491 }
492
493 if (bip)
494 XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
495
3faddc54 496 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_AGFL_CRC_OFF);
a2ceac1f
DC
497}
498
499const struct xfs_buf_ops xfs_agfl_buf_ops = {
500 .verify_read = xfs_agfl_read_verify,
501 .verify_write = xfs_agfl_write_verify,
502};
503
2bd0ea18
NS
504/*
505 * Read in the allocation group free block array.
506 */
507STATIC int /* error */
508xfs_alloc_read_agfl(
509 xfs_mount_t *mp, /* mount point structure */
510 xfs_trans_t *tp, /* transaction pointer */
511 xfs_agnumber_t agno, /* allocation group number */
512 xfs_buf_t **bpp) /* buffer for the ag free block array */
513{
514 xfs_buf_t *bp; /* return value */
2bd0ea18
NS
515 int error;
516
517 ASSERT(agno != NULLAGNUMBER);
9440d84d
NS
518 error = xfs_trans_read_buf(
519 mp, tp, mp->m_ddev_targp,
520 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
a2ceac1f 521 XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
9440d84d 522 if (error)
2bd0ea18 523 return error;
a2ceac1f
DC
524 ASSERT(!xfs_buf_geterror(bp));
525 xfs_buf_set_ref(bp, XFS_AGFL_REF);
2bd0ea18
NS
526 *bpp = bp;
527 return 0;
528}
529
a2ceac1f
DC
530STATIC int
531xfs_alloc_update_counters(
532 struct xfs_trans *tp,
533 struct xfs_perag *pag,
534 struct xfs_buf *agbp,
535 long len)
536{
537 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
538
539 pag->pagf_freeblks += len;
540 be32_add_cpu(&agf->agf_freeblks, len);
541
542 xfs_trans_agblocks_delta(tp, len);
543 if (unlikely(be32_to_cpu(agf->agf_freeblks) >
544 be32_to_cpu(agf->agf_length)))
545 return EFSCORRUPTED;
546
547 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
548 return 0;
549}
550
2bd0ea18
NS
551/*
552 * Allocation group level functions.
553 */
554
555/*
556 * Allocate a variable extent in the allocation group agno.
557 * Type and bno are used to determine where in the allocation group the
558 * extent will start.
559 * Extent's length (returned in *len) will be between minlen and maxlen,
560 * and of the form k * prod + mod unless there's nothing that large.
561 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
562 */
563STATIC int /* error */
564xfs_alloc_ag_vextent(
dfc130f3 565 xfs_alloc_arg_t *args) /* argument structure for allocation */
2bd0ea18 566{
0e266570 567 int error=0;
2bd0ea18
NS
568
569 ASSERT(args->minlen > 0);
570 ASSERT(args->maxlen > 0);
571 ASSERT(args->minlen <= args->maxlen);
572 ASSERT(args->mod < args->prod);
573 ASSERT(args->alignment > 0);
574 /*
575 * Branch to correct routine based on the type.
576 */
577 args->wasfromfl = 0;
578 switch (args->type) {
579 case XFS_ALLOCTYPE_THIS_AG:
580 error = xfs_alloc_ag_vextent_size(args);
581 break;
582 case XFS_ALLOCTYPE_NEAR_BNO:
583 error = xfs_alloc_ag_vextent_near(args);
584 break;
585 case XFS_ALLOCTYPE_THIS_BNO:
586 error = xfs_alloc_ag_vextent_exact(args);
587 break;
588 default:
589 ASSERT(0);
590 /* NOTREACHED */
591 }
a2ceac1f
DC
592
593 if (error || args->agbno == NULLAGBLOCK)
2bd0ea18 594 return error;
2bd0ea18 595
a2ceac1f
DC
596 ASSERT(args->len >= args->minlen);
597 ASSERT(args->len <= args->maxlen);
598 ASSERT(!args->wasfromfl || !args->isfl);
599 ASSERT(args->agbno % args->alignment == 0);
600
601 if (!args->wasfromfl) {
602 error = xfs_alloc_update_counters(args->tp, args->pag,
603 args->agbp,
604 -((long)(args->len)));
605 if (error)
606 return error;
607
608 ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
609 args->agbno, args->len));
2bd0ea18 610 }
a2ceac1f
DC
611
612 if (!args->isfl) {
613 xfs_trans_mod_sb(args->tp, args->wasdel ?
614 XFS_TRANS_SB_RES_FDBLOCKS :
615 XFS_TRANS_SB_FDBLOCKS,
616 -((long)(args->len)));
617 }
618
619 XFS_STATS_INC(xs_allocx);
620 XFS_STATS_ADD(xs_allocb, args->len);
621 return error;
2bd0ea18
NS
622}
623
624/*
625 * Allocate a variable extent at exactly agno/bno.
626 * Extent's length (returned in *len) will be between minlen and maxlen,
627 * and of the form k * prod + mod unless there's nothing that large.
628 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
629 */
630STATIC int /* error */
631xfs_alloc_ag_vextent_exact(
dfc130f3 632 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18 633{
dfc130f3
RC
634 xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
635 xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
2bd0ea18
NS
636 int error;
637 xfs_agblock_t fbno; /* start block of found extent */
2bd0ea18 638 xfs_extlen_t flen; /* length of found extent */
a2ceac1f
DC
639 xfs_agblock_t tbno; /* start block of trimmed extent */
640 xfs_extlen_t tlen; /* length of trimmed extent */
641 xfs_agblock_t tend; /* end block of trimmed extent */
2bd0ea18 642 int i; /* success/failure of operation */
2bd0ea18
NS
643
644 ASSERT(args->alignment == 1);
a2ceac1f 645
2bd0ea18
NS
646 /*
647 * Allocate/initialize a cursor for the by-number freespace btree.
648 */
b194c7d8 649 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
56b2de80
DC
650 args->agno, XFS_BTNUM_BNO);
651
2bd0ea18
NS
652 /*
653 * Lookup bno and minlen in the btree (minlen is irrelevant, really).
654 * Look for the closest free block <= bno, it must contain bno
655 * if any free block does.
656 */
56b2de80
DC
657 error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
658 if (error)
2bd0ea18 659 goto error0;
56b2de80
DC
660 if (!i)
661 goto not_found;
662
2bd0ea18
NS
663 /*
664 * Grab the freespace record.
665 */
56b2de80
DC
666 error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
667 if (error)
2bd0ea18
NS
668 goto error0;
669 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
670 ASSERT(fbno <= args->agbno);
56b2de80 671
5000d01d 672 /*
a2ceac1f
DC
673 * Check for overlapping busy extents.
674 */
675 xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen);
676
677 /*
678 * Give up if the start of the extent is busy, or the freespace isn't
679 * long enough for the minimum request.
2bd0ea18 680 */
a2ceac1f
DC
681 if (tbno > args->agbno)
682 goto not_found;
683 if (tlen < args->minlen)
684 goto not_found;
685 tend = tbno + tlen;
686 if (tend < args->agbno + args->minlen)
56b2de80
DC
687 goto not_found;
688
2bd0ea18
NS
689 /*
690 * End of extent will be smaller of the freespace end and the
691 * maximal requested end.
56b2de80 692 *
2bd0ea18
NS
693 * Fix the length according to mod and prod if given.
694 */
a2ceac1f
DC
695 args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
696 - args->agbno;
2bd0ea18 697 xfs_alloc_fix_len(args);
56b2de80
DC
698 if (!xfs_alloc_fix_minleft(args))
699 goto not_found;
700
a2ceac1f 701 ASSERT(args->agbno + args->len <= tend);
56b2de80 702
2bd0ea18 703 /*
a2ceac1f 704 * We are allocating agbno for args->len
2bd0ea18
NS
705 * Allocate/initialize a cursor for the by-size btree.
706 */
b194c7d8
BN
707 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
708 args->agno, XFS_BTNUM_CNT);
2bd0ea18 709 ASSERT(args->agbno + args->len <=
6e3140c7 710 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
56b2de80
DC
711 error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
712 args->len, XFSA_FIXUP_BNO_OK);
713 if (error) {
2bd0ea18
NS
714 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
715 goto error0;
716 }
a2ceac1f 717
2bd0ea18
NS
718 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
719 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
a2ceac1f 720
2bd0ea18 721 args->wasfromfl = 0;
56b2de80
DC
722 trace_xfs_alloc_exact_done(args);
723 return 0;
724
725not_found:
726 /* Didn't find it, return null. */
727 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
728 args->agbno = NULLAGBLOCK;
729 trace_xfs_alloc_exact_notfound(args);
2bd0ea18
NS
730 return 0;
731
732error0:
733 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
56b2de80
DC
734 trace_xfs_alloc_exact_error(args);
735 return error;
736}
737
738/*
739 * Search the btree in a given direction via the search cursor and compare
740 * the records found against the good extent we've already found.
741 */
742STATIC int
743xfs_alloc_find_best_extent(
744 struct xfs_alloc_arg *args, /* allocation argument structure */
745 struct xfs_btree_cur **gcur, /* good cursor */
746 struct xfs_btree_cur **scur, /* searching cursor */
747 xfs_agblock_t gdiff, /* difference for search comparison */
748 xfs_agblock_t *sbno, /* extent found by search */
a2ceac1f
DC
749 xfs_extlen_t *slen, /* extent length */
750 xfs_agblock_t *sbnoa, /* aligned extent found by search */
751 xfs_extlen_t *slena, /* aligned extent length */
56b2de80
DC
752 int dir) /* 0 = search right, 1 = search left */
753{
56b2de80
DC
754 xfs_agblock_t new;
755 xfs_agblock_t sdiff;
756 int error;
757 int i;
758
759 /* The good extent is perfect, no need to search. */
760 if (!gdiff)
761 goto out_use_good;
762
763 /*
764 * Look until we find a better one, run out of space or run off the end.
765 */
766 do {
767 error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
768 if (error)
769 goto error0;
770 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
a2ceac1f 771 xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena);
56b2de80
DC
772
773 /*
774 * The good extent is closer than this one.
775 */
776 if (!dir) {
a2ceac1f 777 if (*sbnoa >= args->agbno + gdiff)
56b2de80
DC
778 goto out_use_good;
779 } else {
a2ceac1f 780 if (*sbnoa <= args->agbno - gdiff)
56b2de80
DC
781 goto out_use_good;
782 }
783
784 /*
785 * Same distance, compare length and pick the best.
786 */
787 if (*slena >= args->minlen) {
788 args->len = XFS_EXTLEN_MIN(*slena, args->maxlen);
789 xfs_alloc_fix_len(args);
790
791 sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
84a62eea
DC
792 args->alignment,
793 args->userdata, *sbnoa,
a2ceac1f 794 *slena, &new);
56b2de80
DC
795
796 /*
797 * Choose closer size and invalidate other cursor.
798 */
799 if (sdiff < gdiff)
800 goto out_use_search;
801 goto out_use_good;
802 }
803
804 if (!dir)
805 error = xfs_btree_increment(*scur, 0, &i);
806 else
807 error = xfs_btree_decrement(*scur, 0, &i);
808 if (error)
809 goto error0;
810 } while (i);
811
812out_use_good:
813 xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR);
814 *scur = NULL;
815 return 0;
816
817out_use_search:
818 xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR);
819 *gcur = NULL;
820 return 0;
821
822error0:
823 /* caller invalidates cursors */
2bd0ea18
NS
824 return error;
825}
826
827/*
828 * Allocate a variable extent near bno in the allocation group agno.
829 * Extent's length (returned in len) will be between minlen and maxlen,
830 * and of the form k * prod + mod unless there's nothing that large.
831 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
832 */
833STATIC int /* error */
834xfs_alloc_ag_vextent_near(
dfc130f3 835 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18 836{
dfc130f3
RC
837 xfs_btree_cur_t *bno_cur_gt; /* cursor for bno btree, right side */
838 xfs_btree_cur_t *bno_cur_lt; /* cursor for bno btree, left side */
839 xfs_btree_cur_t *cnt_cur; /* cursor for count btree */
2bd0ea18
NS
840 xfs_agblock_t gtbno; /* start bno of right side entry */
841 xfs_agblock_t gtbnoa; /* aligned ... */
842 xfs_extlen_t gtdiff; /* difference to right side entry */
843 xfs_extlen_t gtlen; /* length of right side entry */
a2ceac1f 844 xfs_extlen_t gtlena; /* aligned ... */
2bd0ea18
NS
845 xfs_agblock_t gtnew; /* useful start bno of right side */
846 int error; /* error code */
847 int i; /* result code, temporary */
848 int j; /* result code, temporary */
849 xfs_agblock_t ltbno; /* start bno of left side entry */
850 xfs_agblock_t ltbnoa; /* aligned ... */
851 xfs_extlen_t ltdiff; /* difference to left side entry */
2bd0ea18 852 xfs_extlen_t ltlen; /* length of left side entry */
a2ceac1f 853 xfs_extlen_t ltlena; /* aligned ... */
2bd0ea18
NS
854 xfs_agblock_t ltnew; /* useful start bno of left side */
855 xfs_extlen_t rlen; /* length of returned extent */
a2ceac1f 856 int forced = 0;
6beba453 857#ifdef DEBUG
2bd0ea18
NS
858 /*
859 * Randomly don't execute the first algorithm.
860 */
2bd0ea18 861 int dofirst; /* set to do first algorithm */
2bd0ea18 862
49f693fa 863 dofirst = prandom_u32() & 1;
2bd0ea18 864#endif
a2ceac1f
DC
865
866restart:
867 bno_cur_lt = NULL;
868 bno_cur_gt = NULL;
869 ltlen = 0;
870 gtlena = 0;
871 ltlena = 0;
872
2bd0ea18
NS
873 /*
874 * Get a cursor for the by-size btree.
875 */
b194c7d8
BN
876 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
877 args->agno, XFS_BTNUM_CNT);
a2ceac1f 878
2bd0ea18
NS
879 /*
880 * See if there are any free extents as big as maxlen.
881 */
0e266570 882 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i)))
2bd0ea18
NS
883 goto error0;
884 /*
885 * If none, then pick up the last entry in the tree unless the
886 * tree is empty.
5000d01d 887 */
2bd0ea18 888 if (!i) {
0e266570
NS
889 if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &ltbno,
890 &ltlen, &i)))
2bd0ea18
NS
891 goto error0;
892 if (i == 0 || ltlen == 0) {
893 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
a2ceac1f 894 trace_xfs_alloc_near_noentry(args);
2bd0ea18
NS
895 return 0;
896 }
897 ASSERT(i == 1);
898 }
899 args->wasfromfl = 0;
a2ceac1f 900
5000d01d 901 /*
2bd0ea18
NS
902 * First algorithm.
903 * If the requested extent is large wrt the freespaces available
904 * in this a.g., then the cursor will be pointing to a btree entry
905 * near the right edge of the tree. If it's in the last btree leaf
906 * block, then we just examine all the entries in that block
907 * that are big enough, and pick the best one.
908 * This is written as a while loop so we can break out of it,
909 * but we never loop back to the top.
910 */
911 while (xfs_btree_islastblock(cnt_cur, 0)) {
912 xfs_extlen_t bdiff;
0e266570
NS
913 int besti=0;
914 xfs_extlen_t blen=0;
915 xfs_agblock_t bnew=0;
2bd0ea18 916
6beba453
DC
917#ifdef DEBUG
918 if (dofirst)
2bd0ea18
NS
919 break;
920#endif
921 /*
922 * Start from the entry that lookup found, sequence through
923 * all larger free blocks. If we're actually pointing at a
924 * record smaller than maxlen, go to the start of this block,
925 * and skip all those smaller than minlen.
926 */
927 if (ltlen || args->alignment > 1) {
928 cnt_cur->bc_ptrs[0] = 1;
929 do {
0e266570
NS
930 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno,
931 &ltlen, &i)))
2bd0ea18
NS
932 goto error0;
933 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
934 if (ltlen >= args->minlen)
935 break;
b194c7d8 936 if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
2bd0ea18
NS
937 goto error0;
938 } while (i);
939 ASSERT(ltlen >= args->minlen);
940 if (!i)
941 break;
942 }
943 i = cnt_cur->bc_ptrs[0];
944 for (j = 1, blen = 0, bdiff = 0;
945 !error && j && (blen < args->maxlen || bdiff > 0);
b194c7d8 946 error = xfs_btree_increment(cnt_cur, 0, &j)) {
2bd0ea18
NS
947 /*
948 * For each entry, decide if it's better than
949 * the previous best entry.
950 */
0e266570 951 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
2bd0ea18
NS
952 goto error0;
953 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
a2ceac1f
DC
954 xfs_alloc_compute_aligned(args, ltbno, ltlen,
955 &ltbnoa, &ltlena);
5e656dbb 956 if (ltlena < args->minlen)
2bd0ea18
NS
957 continue;
958 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
959 xfs_alloc_fix_len(args);
960 ASSERT(args->len >= args->minlen);
961 if (args->len < blen)
962 continue;
963 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
84a62eea
DC
964 args->alignment, args->userdata, ltbnoa,
965 ltlena, &ltnew);
2bd0ea18
NS
966 if (ltnew != NULLAGBLOCK &&
967 (args->len > blen || ltdiff < bdiff)) {
968 bdiff = ltdiff;
969 bnew = ltnew;
970 blen = args->len;
971 besti = cnt_cur->bc_ptrs[0];
972 }
973 }
974 /*
975 * It didn't work. We COULD be in a case where
976 * there's a good record somewhere, so try again.
977 */
978 if (blen == 0)
979 break;
980 /*
981 * Point at the best entry, and retrieve it again.
982 */
983 cnt_cur->bc_ptrs[0] = besti;
0e266570 984 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
2bd0ea18
NS
985 goto error0;
986 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
56b2de80 987 ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
2bd0ea18
NS
988 args->len = blen;
989 if (!xfs_alloc_fix_minleft(args)) {
990 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
56b2de80 991 trace_xfs_alloc_near_nominleft(args);
2bd0ea18
NS
992 return 0;
993 }
994 blen = args->len;
995 /*
996 * We are allocating starting at bnew for blen blocks.
997 */
998 args->agbno = bnew;
999 ASSERT(bnew >= ltbno);
56b2de80 1000 ASSERT(bnew + blen <= ltbno + ltlen);
2bd0ea18
NS
1001 /*
1002 * Set up a cursor for the by-bno tree.
1003 */
b194c7d8
BN
1004 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp,
1005 args->agbp, args->agno, XFS_BTNUM_BNO);
2bd0ea18
NS
1006 /*
1007 * Fix up the btree entries.
1008 */
0e266570
NS
1009 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno,
1010 ltlen, bnew, blen, XFSA_FIXUP_CNT_OK)))
2bd0ea18
NS
1011 goto error0;
1012 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1013 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
56b2de80
DC
1014
1015 trace_xfs_alloc_near_first(args);
2bd0ea18
NS
1016 return 0;
1017 }
1018 /*
1019 * Second algorithm.
1020 * Search in the by-bno tree to the left and to the right
1021 * simultaneously, until in each case we find a space big enough,
1022 * or run into the edge of the tree. When we run into the edge,
1023 * we deallocate that cursor.
1024 * If both searches succeed, we compare the two spaces and pick
1025 * the better one.
1026 * With alignment, it's possible for both to fail; the upper
1027 * level algorithm that picks allocation groups for allocations
1028 * is not supposed to do this.
1029 */
1030 /*
1031 * Allocate and initialize the cursor for the leftward search.
1032 */
b194c7d8
BN
1033 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1034 args->agno, XFS_BTNUM_BNO);
2bd0ea18
NS
1035 /*
1036 * Lookup <= bno to find the leftward search's starting point.
1037 */
0e266570 1038 if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i)))
2bd0ea18
NS
1039 goto error0;
1040 if (!i) {
1041 /*
1042 * Didn't find anything; use this cursor for the rightward
1043 * search.
1044 */
1045 bno_cur_gt = bno_cur_lt;
062998e3 1046 bno_cur_lt = NULL;
2bd0ea18
NS
1047 }
1048 /*
1049 * Found something. Duplicate the cursor for the rightward search.
1050 */
0e266570 1051 else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt)))
2bd0ea18
NS
1052 goto error0;
1053 /*
1054 * Increment the cursor, so we will point at the entry just right
1055 * of the leftward entry if any, or to the leftmost entry.
1056 */
b194c7d8 1057 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
2bd0ea18
NS
1058 goto error0;
1059 if (!i) {
1060 /*
1061 * It failed, there are no rightward entries.
1062 */
1063 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR);
1064 bno_cur_gt = NULL;
1065 }
1066 /*
1067 * Loop going left with the leftward cursor, right with the
1068 * rightward cursor, until either both directions give up or
1069 * we find an entry at least as big as minlen.
1070 */
1071 do {
1072 if (bno_cur_lt) {
0e266570 1073 if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
2bd0ea18
NS
1074 goto error0;
1075 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
a2ceac1f
DC
1076 xfs_alloc_compute_aligned(args, ltbno, ltlen,
1077 &ltbnoa, &ltlena);
5e656dbb 1078 if (ltlena >= args->minlen)
2bd0ea18 1079 break;
b194c7d8 1080 if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
2bd0ea18
NS
1081 goto error0;
1082 if (!i) {
1083 xfs_btree_del_cursor(bno_cur_lt,
1084 XFS_BTREE_NOERROR);
1085 bno_cur_lt = NULL;
1086 }
1087 }
1088 if (bno_cur_gt) {
0e266570 1089 if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
2bd0ea18
NS
1090 goto error0;
1091 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
a2ceac1f
DC
1092 xfs_alloc_compute_aligned(args, gtbno, gtlen,
1093 &gtbnoa, &gtlena);
5e656dbb 1094 if (gtlena >= args->minlen)
2bd0ea18 1095 break;
b194c7d8 1096 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
2bd0ea18
NS
1097 goto error0;
1098 if (!i) {
1099 xfs_btree_del_cursor(bno_cur_gt,
1100 XFS_BTREE_NOERROR);
1101 bno_cur_gt = NULL;
1102 }
1103 }
1104 } while (bno_cur_lt || bno_cur_gt);
56b2de80 1105
2bd0ea18
NS
1106 /*
1107 * Got both cursors still active, need to find better entry.
1108 */
1109 if (bno_cur_lt && bno_cur_gt) {
2bd0ea18
NS
1110 if (ltlena >= args->minlen) {
1111 /*
56b2de80 1112 * Left side is good, look for a right side entry.
2bd0ea18
NS
1113 */
1114 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1115 xfs_alloc_fix_len(args);
56b2de80 1116 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
84a62eea
DC
1117 args->alignment, args->userdata, ltbnoa,
1118 ltlena, &ltnew);
56b2de80
DC
1119
1120 error = xfs_alloc_find_best_extent(args,
1121 &bno_cur_lt, &bno_cur_gt,
a2ceac1f
DC
1122 ltdiff, &gtbno, &gtlen,
1123 &gtbnoa, &gtlena,
56b2de80
DC
1124 0 /* search right */);
1125 } else {
1126 ASSERT(gtlena >= args->minlen);
1127
2bd0ea18 1128 /*
56b2de80 1129 * Right side is good, look for a left side entry.
2bd0ea18
NS
1130 */
1131 args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
1132 xfs_alloc_fix_len(args);
56b2de80 1133 gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
84a62eea
DC
1134 args->alignment, args->userdata, gtbnoa,
1135 gtlena, &gtnew);
56b2de80
DC
1136
1137 error = xfs_alloc_find_best_extent(args,
1138 &bno_cur_gt, &bno_cur_lt,
a2ceac1f
DC
1139 gtdiff, &ltbno, &ltlen,
1140 &ltbnoa, &ltlena,
56b2de80 1141 1 /* search left */);
2bd0ea18 1142 }
56b2de80
DC
1143
1144 if (error)
1145 goto error0;
2bd0ea18 1146 }
56b2de80 1147
2bd0ea18
NS
1148 /*
1149 * If we couldn't get anything, give up.
1150 */
1151 if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
a2ceac1f
DC
1152 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1153
1154 if (!forced++) {
1155 trace_xfs_alloc_near_busy(args);
1156 xfs_log_force(args->mp, XFS_LOG_SYNC);
1157 goto restart;
1158 }
56b2de80 1159 trace_xfs_alloc_size_neither(args);
2bd0ea18
NS
1160 args->agbno = NULLAGBLOCK;
1161 return 0;
1162 }
56b2de80 1163
2bd0ea18
NS
1164 /*
1165 * At this point we have selected a freespace entry, either to the
1166 * left or to the right. If it's on the right, copy all the
1167 * useful variables to the "left" set so we only have one
1168 * copy of this code.
1169 */
1170 if (bno_cur_gt) {
1171 bno_cur_lt = bno_cur_gt;
1172 bno_cur_gt = NULL;
1173 ltbno = gtbno;
1174 ltbnoa = gtbnoa;
1175 ltlen = gtlen;
1176 ltlena = gtlena;
1177 j = 1;
1178 } else
1179 j = 0;
56b2de80 1180
2bd0ea18
NS
1181 /*
1182 * Fix up the length and compute the useful address.
1183 */
2bd0ea18
NS
1184 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1185 xfs_alloc_fix_len(args);
1186 if (!xfs_alloc_fix_minleft(args)) {
56b2de80 1187 trace_xfs_alloc_near_nominleft(args);
2bd0ea18
NS
1188 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1189 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1190 return 0;
1191 }
1192 rlen = args->len;
a2ceac1f 1193 (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
84a62eea 1194 args->userdata, ltbnoa, ltlena, &ltnew);
2bd0ea18 1195 ASSERT(ltnew >= ltbno);
a2ceac1f 1196 ASSERT(ltnew + rlen <= ltbnoa + ltlena);
6e3140c7 1197 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
2bd0ea18 1198 args->agbno = ltnew;
a2ceac1f 1199
0e266570
NS
1200 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1201 ltnew, rlen, XFSA_FIXUP_BNO_OK)))
2bd0ea18 1202 goto error0;
56b2de80
DC
1203
1204 if (j)
1205 trace_xfs_alloc_near_greater(args);
1206 else
1207 trace_xfs_alloc_near_lesser(args);
1208
2bd0ea18
NS
1209 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1210 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1211 return 0;
1212
1213 error0:
56b2de80 1214 trace_xfs_alloc_near_error(args);
2bd0ea18
NS
1215 if (cnt_cur != NULL)
1216 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1217 if (bno_cur_lt != NULL)
1218 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR);
1219 if (bno_cur_gt != NULL)
1220 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR);
1221 return error;
1222}
1223
1224/*
1225 * Allocate a variable extent anywhere in the allocation group agno.
1226 * Extent's length (returned in len) will be between minlen and maxlen,
1227 * and of the form k * prod + mod unless there's nothing that large.
1228 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1229 */
1230STATIC int /* error */
1231xfs_alloc_ag_vextent_size(
dfc130f3 1232 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18 1233{
dfc130f3
RC
1234 xfs_btree_cur_t *bno_cur; /* cursor for bno btree */
1235 xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */
2bd0ea18
NS
1236 int error; /* error result */
1237 xfs_agblock_t fbno; /* start of found freespace */
1238 xfs_extlen_t flen; /* length of found freespace */
2bd0ea18
NS
1239 int i; /* temp status variable */
1240 xfs_agblock_t rbno; /* returned block number */
1241 xfs_extlen_t rlen; /* length of returned extent */
a2ceac1f 1242 int forced = 0;
2bd0ea18 1243
a2ceac1f 1244restart:
2bd0ea18
NS
1245 /*
1246 * Allocate and initialize a cursor for the by-size btree.
1247 */
b194c7d8
BN
1248 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1249 args->agno, XFS_BTNUM_CNT);
2bd0ea18 1250 bno_cur = NULL;
a2ceac1f 1251
2bd0ea18
NS
1252 /*
1253 * Look for an entry >= maxlen+alignment-1 blocks.
1254 */
0e266570
NS
1255 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1256 args->maxlen + args->alignment - 1, &i)))
2bd0ea18 1257 goto error0;
a2ceac1f 1258
2bd0ea18 1259 /*
a2ceac1f
DC
1260 * If none or we have busy extents that we cannot allocate from, then
1261 * we have to settle for a smaller extent. In the case that there are
1262 * no large extents, this will return the last entry in the tree unless
1263 * the tree is empty. In the case that there are only busy large
1264 * extents, this will return the largest small extent unless there
1265 * are no smaller extents available.
5000d01d 1266 */
a2ceac1f
DC
1267 if (!i || forced > 1) {
1268 error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1269 &fbno, &flen, &i);
1270 if (error)
2bd0ea18
NS
1271 goto error0;
1272 if (i == 0 || flen == 0) {
1273 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
56b2de80 1274 trace_xfs_alloc_size_noentry(args);
2bd0ea18
NS
1275 return 0;
1276 }
1277 ASSERT(i == 1);
a2ceac1f
DC
1278 xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
1279 } else {
1280 /*
1281 * Search for a non-busy extent that is large enough.
1282 * If we are at low space, don't check, or if we fall of
1283 * the end of the btree, turn off the busy check and
1284 * restart.
1285 */
1286 for (;;) {
1287 error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1288 if (error)
1289 goto error0;
1290 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1291
1292 xfs_alloc_compute_aligned(args, fbno, flen,
1293 &rbno, &rlen);
1294
1295 if (rlen >= args->maxlen)
1296 break;
1297
1298 error = xfs_btree_increment(cnt_cur, 0, &i);
1299 if (error)
1300 goto error0;
1301 if (i == 0) {
1302 /*
1303 * Our only valid extents must have been busy.
1304 * Make it unbusy by forcing the log out and
1305 * retrying. If we've been here before, forcing
1306 * the log isn't making the extents available,
1307 * which means they have probably been freed in
1308 * this transaction. In that case, we have to
1309 * give up on them and we'll attempt a minlen
1310 * allocation the next time around.
1311 */
1312 xfs_btree_del_cursor(cnt_cur,
1313 XFS_BTREE_NOERROR);
1314 trace_xfs_alloc_size_busy(args);
1315 if (!forced++)
1316 xfs_log_force(args->mp, XFS_LOG_SYNC);
1317 goto restart;
1318 }
1319 }
2bd0ea18 1320 }
a2ceac1f 1321
2bd0ea18
NS
1322 /*
1323 * In the first case above, we got the last entry in the
1324 * by-size btree. Now we check to see if the space hits maxlen
1325 * once aligned; if not, we search left for something better.
1326 * This can't happen in the second case above.
1327 */
2bd0ea18 1328 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
5000d01d 1329 XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
2bd0ea18
NS
1330 (rlen <= flen && rbno + rlen <= fbno + flen), error0);
1331 if (rlen < args->maxlen) {
1332 xfs_agblock_t bestfbno;
1333 xfs_extlen_t bestflen;
1334 xfs_agblock_t bestrbno;
1335 xfs_extlen_t bestrlen;
1336
1337 bestrlen = rlen;
1338 bestrbno = rbno;
1339 bestflen = flen;
1340 bestfbno = fbno;
1341 for (;;) {
b194c7d8 1342 if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
2bd0ea18
NS
1343 goto error0;
1344 if (i == 0)
1345 break;
0e266570
NS
1346 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1347 &i)))
2bd0ea18
NS
1348 goto error0;
1349 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1350 if (flen < bestrlen)
1351 break;
a2ceac1f
DC
1352 xfs_alloc_compute_aligned(args, fbno, flen,
1353 &rbno, &rlen);
2bd0ea18
NS
1354 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1355 XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
1356 (rlen <= flen && rbno + rlen <= fbno + flen),
1357 error0);
1358 if (rlen > bestrlen) {
1359 bestrlen = rlen;
1360 bestrbno = rbno;
1361 bestflen = flen;
1362 bestfbno = fbno;
1363 if (rlen == args->maxlen)
1364 break;
1365 }
5000d01d 1366 }
0e266570
NS
1367 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1368 &i)))
2bd0ea18
NS
1369 goto error0;
1370 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1371 rlen = bestrlen;
1372 rbno = bestrbno;
1373 flen = bestflen;
1374 fbno = bestfbno;
1375 }
1376 args->wasfromfl = 0;
1377 /*
1378 * Fix up the length.
1379 */
1380 args->len = rlen;
a2ceac1f
DC
1381 if (rlen < args->minlen) {
1382 if (!forced++) {
1383 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1384 trace_xfs_alloc_size_busy(args);
1385 xfs_log_force(args->mp, XFS_LOG_SYNC);
1386 goto restart;
1387 }
1388 goto out_nominleft;
2bd0ea18 1389 }
a2ceac1f
DC
1390 xfs_alloc_fix_len(args);
1391
1392 if (!xfs_alloc_fix_minleft(args))
1393 goto out_nominleft;
2bd0ea18
NS
1394 rlen = args->len;
1395 XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0);
1396 /*
1397 * Allocate and initialize a cursor for the by-block tree.
1398 */
b194c7d8
BN
1399 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1400 args->agno, XFS_BTNUM_BNO);
0e266570
NS
1401 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1402 rbno, rlen, XFSA_FIXUP_CNT_OK)))
2bd0ea18
NS
1403 goto error0;
1404 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1405 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1406 cnt_cur = bno_cur = NULL;
1407 args->len = rlen;
1408 args->agbno = rbno;
1409 XFS_WANT_CORRUPTED_GOTO(
1410 args->agbno + args->len <=
6e3140c7 1411 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
2bd0ea18 1412 error0);
56b2de80 1413 trace_xfs_alloc_size_done(args);
2bd0ea18
NS
1414 return 0;
1415
1416error0:
56b2de80 1417 trace_xfs_alloc_size_error(args);
2bd0ea18
NS
1418 if (cnt_cur)
1419 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1420 if (bno_cur)
1421 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1422 return error;
a2ceac1f
DC
1423
1424out_nominleft:
1425 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1426 trace_xfs_alloc_size_nominleft(args);
1427 args->agbno = NULLAGBLOCK;
1428 return 0;
2bd0ea18
NS
1429}
1430
1431/*
1432 * Deal with the case where only small freespaces remain.
1433 * Either return the contents of the last freespace record,
1434 * or allocate space from the freelist if there is nothing in the tree.
1435 */
1436STATIC int /* error */
1437xfs_alloc_ag_vextent_small(
dfc130f3
RC
1438 xfs_alloc_arg_t *args, /* allocation argument structure */
1439 xfs_btree_cur_t *ccur, /* by-size cursor */
1440 xfs_agblock_t *fbnop, /* result block number */
1441 xfs_extlen_t *flenp, /* result length */
2bd0ea18
NS
1442 int *stat) /* status: 0-freelist, 1-normal/none */
1443{
1444 int error;
1445 xfs_agblock_t fbno;
1446 xfs_extlen_t flen;
2bd0ea18
NS
1447 int i;
1448
b194c7d8 1449 if ((error = xfs_btree_decrement(ccur, 0, &i)))
2bd0ea18
NS
1450 goto error0;
1451 if (i) {
0e266570 1452 if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
2bd0ea18
NS
1453 goto error0;
1454 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1455 }
1456 /*
1457 * Nothing in the btree, try the freelist. Make sure
1458 * to respect minleft even when pulling from the
1459 * freelist.
1460 */
1461 else if (args->minlen == 1 && args->alignment == 1 && !args->isfl &&
6e3140c7
NS
1462 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
1463 > args->minleft)) {
cdded3d8
DC
1464 error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
1465 if (error)
2bd0ea18
NS
1466 goto error0;
1467 if (fbno != NULLAGBLOCK) {
a2ceac1f
DC
1468 xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
1469 args->userdata);
1470
2bd0ea18
NS
1471 if (args->userdata) {
1472 xfs_buf_t *bp;
1473
1474 bp = xfs_btree_get_bufs(args->mp, args->tp,
1475 args->agno, fbno, 0);
1476 xfs_trans_binval(args->tp, bp);
2bd0ea18
NS
1477 }
1478 args->len = 1;
1479 args->agbno = fbno;
1480 XFS_WANT_CORRUPTED_GOTO(
1481 args->agbno + args->len <=
6e3140c7 1482 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
2bd0ea18
NS
1483 error0);
1484 args->wasfromfl = 1;
56b2de80 1485 trace_xfs_alloc_small_freelist(args);
2bd0ea18
NS
1486 *stat = 0;
1487 return 0;
1488 }
1489 /*
1490 * Nothing in the freelist.
1491 */
1492 else
1493 flen = 0;
1494 }
1495 /*
1496 * Can't allocate from the freelist for some reason.
1497 */
5e656dbb
BN
1498 else {
1499 fbno = NULLAGBLOCK;
2bd0ea18 1500 flen = 0;
5e656dbb 1501 }
2bd0ea18
NS
1502 /*
1503 * Can't do the allocation, give up.
1504 */
1505 if (flen < args->minlen) {
1506 args->agbno = NULLAGBLOCK;
56b2de80 1507 trace_xfs_alloc_small_notenough(args);
2bd0ea18
NS
1508 flen = 0;
1509 }
1510 *fbnop = fbno;
1511 *flenp = flen;
1512 *stat = 1;
56b2de80 1513 trace_xfs_alloc_small_done(args);
2bd0ea18
NS
1514 return 0;
1515
1516error0:
56b2de80 1517 trace_xfs_alloc_small_error(args);
2bd0ea18
NS
1518 return error;
1519}
1520
1521/*
1522 * Free the extent starting at agno/bno for length.
1523 */
1524STATIC int /* error */
1525xfs_free_ag_extent(
1526 xfs_trans_t *tp, /* transaction pointer */
dfc130f3 1527 xfs_buf_t *agbp, /* buffer for a.g. freelist header */
2bd0ea18
NS
1528 xfs_agnumber_t agno, /* allocation group number */
1529 xfs_agblock_t bno, /* starting block number */
1530 xfs_extlen_t len, /* length of extent */
1531 int isfl) /* set if is freelist blocks - no sb acctg */
1532{
dfc130f3
RC
1533 xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */
1534 xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */
2bd0ea18 1535 int error; /* error return value */
2bd0ea18
NS
1536 xfs_agblock_t gtbno; /* start of right neighbor block */
1537 xfs_extlen_t gtlen; /* length of right neighbor block */
1538 int haveleft; /* have a left neighbor block */
1539 int haveright; /* have a right neighbor block */
1540 int i; /* temp, result code */
1541 xfs_agblock_t ltbno; /* start of left neighbor block */
1542 xfs_extlen_t ltlen; /* length of left neighbor block */
1543 xfs_mount_t *mp; /* mount point struct for filesystem */
1544 xfs_agblock_t nbno; /* new starting block of freespace */
1545 xfs_extlen_t nlen; /* new length of freespace */
a2ceac1f 1546 xfs_perag_t *pag; /* per allocation group data */
2bd0ea18
NS
1547
1548 mp = tp->t_mountp;
5000d01d 1549 /*
2bd0ea18
NS
1550 * Allocate and initialize a cursor for the by-block btree.
1551 */
b194c7d8 1552 bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
2bd0ea18 1553 cnt_cur = NULL;
5000d01d 1554 /*
2bd0ea18
NS
1555 * Look for a neighboring block on the left (lower block numbers)
1556 * that is contiguous with this space.
1557 */
0e266570 1558 if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
2bd0ea18
NS
1559 goto error0;
1560 if (haveleft) {
1561 /*
1562 * There is a block to our left.
1563 */
0e266570 1564 if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
2bd0ea18
NS
1565 goto error0;
1566 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1567 /*
1568 * It's not contiguous, though.
1569 */
1570 if (ltbno + ltlen < bno)
1571 haveleft = 0;
1572 else {
1573 /*
1574 * If this failure happens the request to free this
1575 * space was invalid, it's (partly) already free.
1576 * Very bad.
1577 */
1578 XFS_WANT_CORRUPTED_GOTO(ltbno + ltlen <= bno, error0);
1579 }
1580 }
5000d01d 1581 /*
2bd0ea18
NS
1582 * Look for a neighboring block on the right (higher block numbers)
1583 * that is contiguous with this space.
1584 */
b194c7d8 1585 if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
2bd0ea18
NS
1586 goto error0;
1587 if (haveright) {
1588 /*
1589 * There is a block to our right.
1590 */
0e266570 1591 if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
2bd0ea18
NS
1592 goto error0;
1593 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1594 /*
1595 * It's not contiguous, though.
1596 */
1597 if (bno + len < gtbno)
1598 haveright = 0;
1599 else {
1600 /*
1601 * If this failure happens the request to free this
1602 * space was invalid, it's (partly) already free.
1603 * Very bad.
1604 */
1605 XFS_WANT_CORRUPTED_GOTO(gtbno >= bno + len, error0);
1606 }
1607 }
1608 /*
1609 * Now allocate and initialize a cursor for the by-size tree.
1610 */
b194c7d8 1611 cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
2bd0ea18
NS
1612 /*
1613 * Have both left and right contiguous neighbors.
1614 * Merge all three into a single free block.
1615 */
1616 if (haveleft && haveright) {
1617 /*
1618 * Delete the old by-size entry on the left.
1619 */
0e266570 1620 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
2bd0ea18
NS
1621 goto error0;
1622 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
b194c7d8 1623 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18
NS
1624 goto error0;
1625 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1626 /*
1627 * Delete the old by-size entry on the right.
1628 */
0e266570 1629 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2bd0ea18
NS
1630 goto error0;
1631 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
b194c7d8 1632 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18
NS
1633 goto error0;
1634 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1635 /*
1636 * Delete the old by-block entry for the right block.
1637 */
b194c7d8 1638 if ((error = xfs_btree_delete(bno_cur, &i)))
2bd0ea18
NS
1639 goto error0;
1640 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1641 /*
1642 * Move the by-block cursor back to the left neighbor.
1643 */
b194c7d8 1644 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
2bd0ea18
NS
1645 goto error0;
1646 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1647#ifdef DEBUG
1648 /*
1649 * Check that this is the right record: delete didn't
1650 * mangle the cursor.
1651 */
1652 {
1653 xfs_agblock_t xxbno;
1654 xfs_extlen_t xxlen;
1655
0e266570
NS
1656 if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
1657 &i)))
2bd0ea18
NS
1658 goto error0;
1659 XFS_WANT_CORRUPTED_GOTO(
1660 i == 1 && xxbno == ltbno && xxlen == ltlen,
1661 error0);
1662 }
1663#endif
1664 /*
1665 * Update remaining by-block entry to the new, joined block.
1666 */
1667 nbno = ltbno;
1668 nlen = len + ltlen + gtlen;
0e266570 1669 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2bd0ea18
NS
1670 goto error0;
1671 }
1672 /*
1673 * Have only a left contiguous neighbor.
1674 * Merge it together with the new freespace.
1675 */
1676 else if (haveleft) {
1677 /*
1678 * Delete the old by-size entry on the left.
1679 */
0e266570 1680 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
2bd0ea18
NS
1681 goto error0;
1682 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
b194c7d8 1683 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18
NS
1684 goto error0;
1685 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1686 /*
1687 * Back up the by-block cursor to the left neighbor, and
1688 * update its length.
1689 */
b194c7d8 1690 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
2bd0ea18
NS
1691 goto error0;
1692 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1693 nbno = ltbno;
1694 nlen = len + ltlen;
0e266570 1695 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2bd0ea18
NS
1696 goto error0;
1697 }
1698 /*
1699 * Have only a right contiguous neighbor.
1700 * Merge it together with the new freespace.
1701 */
1702 else if (haveright) {
1703 /*
1704 * Delete the old by-size entry on the right.
1705 */
0e266570 1706 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2bd0ea18
NS
1707 goto error0;
1708 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
b194c7d8 1709 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18
NS
1710 goto error0;
1711 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1712 /*
5000d01d 1713 * Update the starting block and length of the right
2bd0ea18
NS
1714 * neighbor in the by-block tree.
1715 */
1716 nbno = bno;
1717 nlen = len + gtlen;
0e266570 1718 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2bd0ea18
NS
1719 goto error0;
1720 }
1721 /*
1722 * No contiguous neighbors.
1723 * Insert the new freespace into the by-block tree.
1724 */
1725 else {
1726 nbno = bno;
1727 nlen = len;
b194c7d8 1728 if ((error = xfs_btree_insert(bno_cur, &i)))
2bd0ea18
NS
1729 goto error0;
1730 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1731 }
1732 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1733 bno_cur = NULL;
1734 /*
1735 * In all cases we need to insert the new freespace in the by-size tree.
1736 */
0e266570 1737 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
2bd0ea18
NS
1738 goto error0;
1739 XFS_WANT_CORRUPTED_GOTO(i == 0, error0);
b194c7d8 1740 if ((error = xfs_btree_insert(cnt_cur, &i)))
2bd0ea18
NS
1741 goto error0;
1742 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1743 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1744 cnt_cur = NULL;
a2ceac1f 1745
2bd0ea18
NS
1746 /*
1747 * Update the freespace totals in the ag and superblock.
1748 */
a2ceac1f
DC
1749 pag = xfs_perag_get(mp, agno);
1750 error = xfs_alloc_update_counters(tp, pag, agbp, len);
1751 xfs_perag_put(pag);
1752 if (error)
1753 goto error0;
1754
1755 if (!isfl)
1756 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
1757 XFS_STATS_INC(xs_freex);
1758 XFS_STATS_ADD(xs_freeb, len);
56b2de80
DC
1759
1760 trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
3e535bba 1761
2bd0ea18
NS
1762 return 0;
1763
1764 error0:
56b2de80 1765 trace_xfs_free_extent(mp, agno, bno, len, isfl, -1, -1);
2bd0ea18
NS
1766 if (bno_cur)
1767 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1768 if (cnt_cur)
1769 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1770 return error;
1771}
1772
5000d01d 1773/*
2bd0ea18
NS
1774 * Visible (exported) allocation/free functions.
1775 * Some of these are used just by xfs_alloc_btree.c and this file.
1776 */
1777
1778/*
1779 * Compute and fill in value of m_ag_maxlevels.
1780 */
1781void
1782xfs_alloc_compute_maxlevels(
1783 xfs_mount_t *mp) /* file system mount structure */
1784{
1785 int level;
1786 uint maxblocks;
1787 uint maxleafents;
1788 int minleafrecs;
1789 int minnoderecs;
1790
1791 maxleafents = (mp->m_sb.sb_agblocks + 1) / 2;
1792 minleafrecs = mp->m_alloc_mnr[0];
1793 minnoderecs = mp->m_alloc_mnr[1];
1794 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
1795 for (level = 1; maxblocks > 1; level++)
1796 maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
1797 mp->m_ag_maxlevels = level;
1798}
1799
56b2de80
DC
1800/*
1801 * Find the length of the longest extent in an AG.
1802 */
1803xfs_extlen_t
1804xfs_alloc_longest_free_extent(
1805 struct xfs_mount *mp,
1806 struct xfs_perag *pag)
1807{
1808 xfs_extlen_t need, delta = 0;
1809
1810 need = XFS_MIN_FREELIST_PAG(pag, mp);
1811 if (need > pag->pagf_flcount)
1812 delta = need - pag->pagf_flcount;
1813
1814 if (pag->pagf_longest > delta)
1815 return pag->pagf_longest - delta;
1816 return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
1817}
1818
2bd0ea18
NS
1819/*
1820 * Decide whether to use this allocation group for this allocation.
1821 * If so, fix up the btree freelist's size.
5e656dbb
BN
1822 *
1823 * Note: This is public so mkfs can call it
2bd0ea18
NS
1824 */
1825int /* error */
1826xfs_alloc_fix_freelist(
dfc130f3 1827 xfs_alloc_arg_t *args, /* allocation argument structure */
2bd0ea18
NS
1828 int flags) /* XFS_ALLOC_FLAG_... */
1829{
1830 xfs_buf_t *agbp; /* agf buffer pointer */
1831 xfs_agf_t *agf; /* a.g. freespace structure pointer */
1832 xfs_buf_t *agflbp;/* agfl buffer pointer */
1833 xfs_agblock_t bno; /* freelist block */
1834 xfs_extlen_t delta; /* new blocks needed in freelist */
1835 int error; /* error result code */
1836 xfs_extlen_t longest;/* longest extent in allocation group */
1837 xfs_mount_t *mp; /* file system mount point structure */
1838 xfs_extlen_t need; /* total blocks needed in freelist */
1839 xfs_perag_t *pag; /* per-ag information structure */
dfc130f3 1840 xfs_alloc_arg_t targs; /* local allocation arguments */
2bd0ea18
NS
1841 xfs_trans_t *tp; /* transaction pointer */
1842
1843 mp = args->mp;
1844
1845 pag = args->pag;
1846 tp = args->tp;
1847 if (!pag->pagf_init) {
0e266570
NS
1848 if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
1849 &agbp)))
2bd0ea18
NS
1850 return error;
1851 if (!pag->pagf_init) {
5e656dbb
BN
1852 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
1853 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
2bd0ea18
NS
1854 args->agbp = NULL;
1855 return 0;
1856 }
1857 } else
1858 agbp = NULL;
34317449 1859
5e656dbb
BN
1860 /*
1861 * If this is a metadata preferred pag and we are user data
34317449
NS
1862 * then try somewhere else if we are not being asked to
1863 * try harder at this point
1864 */
5e656dbb
BN
1865 if (pag->pagf_metadata && args->userdata &&
1866 (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
1867 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
34317449
NS
1868 args->agbp = NULL;
1869 return 0;
1870 }
1871
5e656dbb 1872 if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
5e656dbb
BN
1873 /*
1874 * If it looks like there isn't a long enough extent, or enough
1875 * total blocks, reject it.
1876 */
56b2de80
DC
1877 need = XFS_MIN_FREELIST_PAG(pag, mp);
1878 longest = xfs_alloc_longest_free_extent(mp, pag);
5e656dbb
BN
1879 if ((args->minlen + args->alignment + args->minalignslop - 1) >
1880 longest ||
1881 ((int)(pag->pagf_freeblks + pag->pagf_flcount -
1882 need - args->total) < (int)args->minleft)) {
1883 if (agbp)
1884 xfs_trans_brelse(tp, agbp);
1885 args->agbp = NULL;
1886 return 0;
1887 }
2bd0ea18 1888 }
5e656dbb 1889
2bd0ea18
NS
1890 /*
1891 * Get the a.g. freespace buffer.
1892 * Can fail if we're not blocking on locks, and it's held.
1893 */
1894 if (agbp == NULL) {
0e266570
NS
1895 if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
1896 &agbp)))
2bd0ea18
NS
1897 return error;
1898 if (agbp == NULL) {
5e656dbb
BN
1899 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
1900 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
2bd0ea18
NS
1901 args->agbp = NULL;
1902 return 0;
1903 }
1904 }
1905 /*
1906 * Figure out how many blocks we should have in the freelist.
1907 */
1908 agf = XFS_BUF_TO_AGF(agbp);
1909 need = XFS_MIN_FREELIST(agf, mp);
2bd0ea18
NS
1910 /*
1911 * If there isn't enough total or single-extent, reject it.
1912 */
5e656dbb
BN
1913 if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
1914 delta = need > be32_to_cpu(agf->agf_flcount) ?
1915 (need - be32_to_cpu(agf->agf_flcount)) : 0;
1916 longest = be32_to_cpu(agf->agf_longest);
1917 longest = (longest > delta) ? (longest - delta) :
1918 (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0);
1919 if ((args->minlen + args->alignment + args->minalignslop - 1) >
1920 longest ||
1921 ((int)(be32_to_cpu(agf->agf_freeblks) +
1922 be32_to_cpu(agf->agf_flcount) - need - args->total) <
1923 (int)args->minleft)) {
1924 xfs_trans_brelse(tp, agbp);
1925 args->agbp = NULL;
1926 return 0;
1927 }
2bd0ea18
NS
1928 }
1929 /*
1930 * Make the freelist shorter if it's too long.
1931 */
6e3140c7 1932 while (be32_to_cpu(agf->agf_flcount) > need) {
2bd0ea18
NS
1933 xfs_buf_t *bp;
1934
5e656dbb
BN
1935 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
1936 if (error)
2bd0ea18 1937 return error;
0e266570 1938 if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1)))
2bd0ea18
NS
1939 return error;
1940 bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
1941 xfs_trans_binval(tp, bp);
2bd0ea18
NS
1942 }
1943 /*
1944 * Initialize the args structure.
1945 */
a2ceac1f 1946 memset(&targs, 0, sizeof(targs));
2bd0ea18
NS
1947 targs.tp = tp;
1948 targs.mp = mp;
1949 targs.agbp = agbp;
1950 targs.agno = args->agno;
2bd0ea18
NS
1951 targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
1952 targs.type = XFS_ALLOCTYPE_THIS_AG;
1953 targs.pag = pag;
0e266570 1954 if ((error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp)))
2bd0ea18
NS
1955 return error;
1956 /*
1957 * Make the freelist longer if it's too short.
1958 */
6e3140c7 1959 while (be32_to_cpu(agf->agf_flcount) < need) {
2bd0ea18 1960 targs.agbno = 0;
6e3140c7 1961 targs.maxlen = need - be32_to_cpu(agf->agf_flcount);
2bd0ea18
NS
1962 /*
1963 * Allocate as many blocks as possible at once.
1964 */
cb4deb22
NS
1965 if ((error = xfs_alloc_ag_vextent(&targs))) {
1966 xfs_trans_brelse(tp, agflbp);
2bd0ea18 1967 return error;
cb4deb22 1968 }
2bd0ea18 1969 /*
dfc130f3
RC
1970 * Stop if we run out. Won't happen if callers are obeying
1971 * the restrictions correctly. Can happen for free calls
2bd0ea18
NS
1972 * on a completely full ag.
1973 */
5e656dbb
BN
1974 if (targs.agbno == NULLAGBLOCK) {
1975 if (flags & XFS_ALLOC_FLAG_FREEING)
1976 break;
1977 xfs_trans_brelse(tp, agflbp);
1978 args->agbp = NULL;
1979 return 0;
1980 }
2bd0ea18
NS
1981 /*
1982 * Put each allocated block on the list.
1983 */
1984 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
5e656dbb
BN
1985 error = xfs_alloc_put_freelist(tp, agbp,
1986 agflbp, bno, 0);
1987 if (error)
2bd0ea18
NS
1988 return error;
1989 }
1990 }
cb4deb22 1991 xfs_trans_brelse(tp, agflbp);
2bd0ea18
NS
1992 args->agbp = agbp;
1993 return 0;
1994}
1995
1996/*
1997 * Get a block from the freelist.
1998 * Returns with the buffer for the block gotten.
1999 */
2000int /* error */
2001xfs_alloc_get_freelist(
2002 xfs_trans_t *tp, /* transaction pointer */
2003 xfs_buf_t *agbp, /* buffer containing the agf structure */
cdded3d8
DC
2004 xfs_agblock_t *bnop, /* block address retrieved from freelist */
2005 int btreeblk) /* destination is a AGF btree */
2bd0ea18
NS
2006{
2007 xfs_agf_t *agf; /* a.g. freespace structure */
2bd0ea18
NS
2008 xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */
2009 xfs_agblock_t bno; /* block number returned */
dd5b876e 2010 __be32 *agfl_bno;
2bd0ea18 2011 int error;
cdded3d8 2012 int logflags;
dd5b876e 2013 xfs_mount_t *mp = tp->t_mountp;
2bd0ea18
NS
2014 xfs_perag_t *pag; /* per allocation group data */
2015
2bd0ea18
NS
2016 /*
2017 * Freelist is empty, give up.
2018 */
dd5b876e 2019 agf = XFS_BUF_TO_AGF(agbp);
46eca962 2020 if (!agf->agf_flcount) {
2bd0ea18
NS
2021 *bnop = NULLAGBLOCK;
2022 return 0;
2023 }
2024 /*
2025 * Read the array of free blocks.
2026 */
dd5b876e
DC
2027 error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
2028 &agflbp);
2029 if (error)
2bd0ea18 2030 return error;
dd5b876e
DC
2031
2032
2bd0ea18
NS
2033 /*
2034 * Get the block number and update the data structures.
2035 */
dd5b876e
DC
2036 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2037 bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
5e656dbb 2038 be32_add_cpu(&agf->agf_flfirst, 1);
2bd0ea18 2039 xfs_trans_brelse(tp, agflbp);
6e3140c7 2040 if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
46eca962 2041 agf->agf_flfirst = 0;
56b2de80
DC
2042
2043 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
5e656dbb 2044 be32_add_cpu(&agf->agf_flcount, -1);
2bd0ea18
NS
2045 xfs_trans_agflist_delta(tp, -1);
2046 pag->pagf_flcount--;
56b2de80 2047 xfs_perag_put(pag);
cdded3d8
DC
2048
2049 logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2050 if (btreeblk) {
5e656dbb 2051 be32_add_cpu(&agf->agf_btreeblks, 1);
cdded3d8
DC
2052 pag->pagf_btreeblks++;
2053 logflags |= XFS_AGF_BTREEBLKS;
2054 }
2055
cdded3d8 2056 xfs_alloc_log_agf(tp, agbp, logflags);
2bd0ea18 2057 *bnop = bno;
3e535bba 2058
2bd0ea18
NS
2059 return 0;
2060}
2061
2062/*
2063 * Log the given fields from the agf structure.
2064 */
2065void
2066xfs_alloc_log_agf(
2067 xfs_trans_t *tp, /* transaction pointer */
2068 xfs_buf_t *bp, /* buffer for a.g. freelist header */
dfc130f3 2069 int fields) /* mask of fields to be logged (XFS_AGF_...) */
2bd0ea18
NS
2070{
2071 int first; /* first byte offset */
2072 int last; /* last byte offset */
2073 static const short offsets[] = {
2074 offsetof(xfs_agf_t, agf_magicnum),
2075 offsetof(xfs_agf_t, agf_versionnum),
2076 offsetof(xfs_agf_t, agf_seqno),
2077 offsetof(xfs_agf_t, agf_length),
2078 offsetof(xfs_agf_t, agf_roots[0]),
2079 offsetof(xfs_agf_t, agf_levels[0]),
2080 offsetof(xfs_agf_t, agf_flfirst),
2081 offsetof(xfs_agf_t, agf_fllast),
2082 offsetof(xfs_agf_t, agf_flcount),
2083 offsetof(xfs_agf_t, agf_freeblks),
2084 offsetof(xfs_agf_t, agf_longest),
cdded3d8 2085 offsetof(xfs_agf_t, agf_btreeblks),
dd5b876e 2086 offsetof(xfs_agf_t, agf_uuid),
2bd0ea18
NS
2087 sizeof(xfs_agf_t)
2088 };
2089
56b2de80
DC
2090 trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
2091
bdc16ee5 2092 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
dd5b876e 2093
2bd0ea18
NS
2094 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2095 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2096}
2097
2098/*
2099 * Interface for inode allocation to force the pag data to be initialized.
2100 */
2101int /* error */
2102xfs_alloc_pagf_init(
2103 xfs_mount_t *mp, /* file system mount structure */
2104 xfs_trans_t *tp, /* transaction pointer */
2105 xfs_agnumber_t agno, /* allocation group number */
2106 int flags) /* XFS_ALLOC_FLAGS_... */
2107{
7a3bffe4 2108 xfs_buf_t *bp;
2bd0ea18
NS
2109 int error;
2110
0e266570 2111 if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)))
2bd0ea18
NS
2112 return error;
2113 if (bp)
2114 xfs_trans_brelse(tp, bp);
2115 return 0;
2116}
2117
2118/*
2119 * Put the block on the freelist for the allocation group.
2120 */
2121int /* error */
2122xfs_alloc_put_freelist(
2123 xfs_trans_t *tp, /* transaction pointer */
2124 xfs_buf_t *agbp, /* buffer for a.g. freelist header */
2125 xfs_buf_t *agflbp,/* buffer for a.g. free block array */
cdded3d8
DC
2126 xfs_agblock_t bno, /* block being freed */
2127 int btreeblk) /* block came from a AGF btree */
2bd0ea18
NS
2128{
2129 xfs_agf_t *agf; /* a.g. freespace structure */
5e656dbb 2130 __be32 *blockp;/* pointer to array entry */
2bd0ea18 2131 int error;
cdded3d8 2132 int logflags;
2bd0ea18
NS
2133 xfs_mount_t *mp; /* mount structure */
2134 xfs_perag_t *pag; /* per allocation group data */
dd5b876e
DC
2135 __be32 *agfl_bno;
2136 int startoff;
2bd0ea18
NS
2137
2138 agf = XFS_BUF_TO_AGF(agbp);
2139 mp = tp->t_mountp;
2140
2141 if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
6e3140c7 2142 be32_to_cpu(agf->agf_seqno), &agflbp)))
2bd0ea18 2143 return error;
5e656dbb 2144 be32_add_cpu(&agf->agf_fllast, 1);
6e3140c7 2145 if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
46eca962 2146 agf->agf_fllast = 0;
56b2de80
DC
2147
2148 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
5e656dbb 2149 be32_add_cpu(&agf->agf_flcount, 1);
2bd0ea18
NS
2150 xfs_trans_agflist_delta(tp, 1);
2151 pag->pagf_flcount++;
cdded3d8
DC
2152
2153 logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2154 if (btreeblk) {
5e656dbb 2155 be32_add_cpu(&agf->agf_btreeblks, -1);
cdded3d8
DC
2156 pag->pagf_btreeblks--;
2157 logflags |= XFS_AGF_BTREEBLKS;
2158 }
56b2de80 2159 xfs_perag_put(pag);
cdded3d8 2160
5e656dbb
BN
2161 xfs_alloc_log_agf(tp, agbp, logflags);
2162
6e3140c7 2163 ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
dd5b876e
DC
2164
2165 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2166 blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
5e656dbb 2167 *blockp = cpu_to_be32(bno);
dd5b876e
DC
2168 startoff = (char *)blockp - (char *)agflbp->b_addr;
2169
cdded3d8 2170 xfs_alloc_log_agf(tp, agbp, logflags);
dd5b876e 2171
bdc16ee5 2172 xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
dd5b876e
DC
2173 xfs_trans_log_buf(tp, agflbp, startoff,
2174 startoff + sizeof(xfs_agblock_t) - 1);
2bd0ea18
NS
2175 return 0;
2176}
2177
dd5b876e 2178static bool
a2ceac1f 2179xfs_agf_verify(
dd5b876e 2180 struct xfs_mount *mp,
a2ceac1f
DC
2181 struct xfs_buf *bp)
2182 {
dd5b876e 2183 struct xfs_agf *agf = XFS_BUF_TO_AGF(bp);
a2ceac1f 2184
dd5b876e 2185 if (xfs_sb_version_hascrc(&mp->m_sb) &&
84a62eea 2186 !uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_uuid))
dd5b876e 2187 return false;
a2ceac1f 2188
dd5b876e
DC
2189 if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
2190 XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
2191 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
2192 be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
2193 be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
2194 be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)))
2195 return false;
a2ceac1f
DC
2196
2197 /*
2198 * during growfs operations, the perag is not fully initialised,
2199 * so we can't use it for any useful checking. growfs ensures we can't
2200 * use it by using uncached buffers that don't have the perag attached
2201 * so we can detect and avoid this problem.
2202 */
dd5b876e
DC
2203 if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
2204 return false;
a2ceac1f 2205
dd5b876e
DC
2206 if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
2207 be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
2208 return false;
2209
2210 return true;;
a2ceac1f 2211
a2ceac1f
DC
2212}
2213
2214static void
2215xfs_agf_read_verify(
2216 struct xfs_buf *bp)
2217{
dd5b876e
DC
2218 struct xfs_mount *mp = bp->b_target->bt_mount;
2219 int agf_ok = 1;
2220
2221 if (xfs_sb_version_hascrc(&mp->m_sb))
d21ca64d 2222 agf_ok = xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF);
dd5b876e
DC
2223
2224 agf_ok = agf_ok && xfs_agf_verify(mp, bp);
2225
2226 if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
2227 XFS_RANDOM_ALLOC_READ_AGF))) {
2228 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
2229 xfs_buf_ioerror(bp, EFSCORRUPTED);
2230 }
a2ceac1f
DC
2231}
2232
2233static void
2234xfs_agf_write_verify(
2235 struct xfs_buf *bp)
2236{
dd5b876e
DC
2237 struct xfs_mount *mp = bp->b_target->bt_mount;
2238 struct xfs_buf_log_item *bip = bp->b_fspriv;
2239
2240 if (!xfs_agf_verify(mp, bp)) {
2241 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
2242 xfs_buf_ioerror(bp, EFSCORRUPTED);
2243 return;
2244 }
2245
2246 if (!xfs_sb_version_hascrc(&mp->m_sb))
2247 return;
2248
2249 if (bip)
2250 XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
2251
3faddc54 2252 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_AGF_CRC_OFF);
a2ceac1f
DC
2253}
2254
2255const struct xfs_buf_ops xfs_agf_buf_ops = {
2256 .verify_read = xfs_agf_read_verify,
2257 .verify_write = xfs_agf_write_verify,
2258};
2259
2bd0ea18
NS
2260/*
2261 * Read in the allocation group header (free/alloc section).
2262 */
2263int /* error */
56b2de80
DC
2264xfs_read_agf(
2265 struct xfs_mount *mp, /* mount point structure */
2266 struct xfs_trans *tp, /* transaction pointer */
2267 xfs_agnumber_t agno, /* allocation group number */
2268 int flags, /* XFS_BUF_ */
2269 struct xfs_buf **bpp) /* buffer for the ag freelist header */
2bd0ea18 2270{
9440d84d 2271 int error;
2bd0ea18
NS
2272
2273 ASSERT(agno != NULLAGNUMBER);
9440d84d
NS
2274 error = xfs_trans_read_buf(
2275 mp, tp, mp->m_ddev_targp,
2276 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
a2ceac1f 2277 XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
9440d84d 2278 if (error)
2bd0ea18 2279 return error;
56b2de80 2280 if (!*bpp)
2bd0ea18 2281 return 0;
56b2de80 2282
a2ceac1f
DC
2283 ASSERT(!(*bpp)->b_error);
2284 xfs_buf_set_ref(*bpp, XFS_AGF_REF);
56b2de80
DC
2285 return 0;
2286}
2287
2288/*
2289 * Read in the allocation group header (free/alloc section).
2290 */
2291int /* error */
2292xfs_alloc_read_agf(
2293 struct xfs_mount *mp, /* mount point structure */
2294 struct xfs_trans *tp, /* transaction pointer */
2295 xfs_agnumber_t agno, /* allocation group number */
2296 int flags, /* XFS_ALLOC_FLAG_... */
2297 struct xfs_buf **bpp) /* buffer for the ag freelist header */
2298{
2299 struct xfs_agf *agf; /* ag freelist header */
2300 struct xfs_perag *pag; /* per allocation group data */
2301 int error;
2302
2303 ASSERT(agno != NULLAGNUMBER);
2304
2305 error = xfs_read_agf(mp, tp, agno,
2306 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
2307 bpp);
2308 if (error)
2309 return error;
2310 if (!*bpp)
2311 return 0;
a2ceac1f 2312 ASSERT(!(*bpp)->b_error);
56b2de80
DC
2313
2314 agf = XFS_BUF_TO_AGF(*bpp);
2315 pag = xfs_perag_get(mp, agno);
2bd0ea18 2316 if (!pag->pagf_init) {
6e3140c7 2317 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
cdded3d8 2318 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
6e3140c7
NS
2319 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
2320 pag->pagf_longest = be32_to_cpu(agf->agf_longest);
2bd0ea18 2321 pag->pagf_levels[XFS_BTNUM_BNOi] =
6e3140c7 2322 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
2bd0ea18 2323 pag->pagf_levels[XFS_BTNUM_CNTi] =
6e3140c7 2324 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
5e656dbb 2325 spin_lock_init(&pag->pagb_lock);
56b2de80 2326 pag->pagb_count = 0;
2bd0ea18
NS
2327 pag->pagf_init = 1;
2328 }
2329#ifdef DEBUG
2330 else if (!XFS_FORCED_SHUTDOWN(mp)) {
6e3140c7 2331 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
cdded3d8 2332 ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
6e3140c7
NS
2333 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
2334 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
2bd0ea18 2335 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
6e3140c7 2336 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
2bd0ea18 2337 ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
6e3140c7 2338 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
2bd0ea18
NS
2339 }
2340#endif
56b2de80 2341 xfs_perag_put(pag);
2bd0ea18
NS
2342 return 0;
2343}
2344
2345/*
2346 * Allocate an extent (variable-size).
2347 * Depending on the allocation type, we either look in a single allocation
2348 * group or loop over the allocation groups to find the result.
2349 */
2350int /* error */
2351xfs_alloc_vextent(
dfc130f3 2352 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18 2353{
dfc130f3 2354 xfs_agblock_t agsize; /* allocation group size */
2bd0ea18
NS
2355 int error;
2356 int flags; /* XFS_ALLOC_FLAG_... locking flags */
2bd0ea18
NS
2357 xfs_extlen_t minleft;/* minimum left value, temp copy */
2358 xfs_mount_t *mp; /* mount structure pointer */
2359 xfs_agnumber_t sagno; /* starting allocation group number */
dfc130f3 2360 xfs_alloctype_t type; /* input allocation type */
34317449 2361 int bump_rotor = 0;
9baa549b 2362 int no_min = 0;
46eca962 2363 xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */
2bd0ea18
NS
2364
2365 mp = args->mp;
2366 type = args->otype = args->type;
2367 args->agbno = NULLAGBLOCK;
2368 /*
2369 * Just fix this up, for the case where the last a.g. is shorter
2370 * (or there's only one a.g.) and the caller couldn't easily figure
2371 * that out (xfs_bmap_alloc).
2372 */
2373 agsize = mp->m_sb.sb_agblocks;
2374 if (args->maxlen > agsize)
2375 args->maxlen = agsize;
2376 if (args->alignment == 0)
2377 args->alignment = 1;
2378 ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
2379 ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
2380 ASSERT(args->minlen <= args->maxlen);
2381 ASSERT(args->minlen <= agsize);
2382 ASSERT(args->mod < args->prod);
2383 if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
2384 XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
2385 args->minlen > args->maxlen || args->minlen > agsize ||
2386 args->mod >= args->prod) {
2387 args->fsbno = NULLFSBLOCK;
56b2de80 2388 trace_xfs_alloc_vextent_badargs(args);
2bd0ea18
NS
2389 return 0;
2390 }
9baa549b
SL
2391 minleft = args->minleft;
2392
2bd0ea18
NS
2393 switch (type) {
2394 case XFS_ALLOCTYPE_THIS_AG:
2395 case XFS_ALLOCTYPE_NEAR_BNO:
2396 case XFS_ALLOCTYPE_THIS_BNO:
2397 /*
2398 * These three force us into a single a.g.
2399 */
2400 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
56b2de80 2401 args->pag = xfs_perag_get(mp, args->agno);
2bd0ea18
NS
2402 args->minleft = 0;
2403 error = xfs_alloc_fix_freelist(args, 0);
2404 args->minleft = minleft;
2405 if (error) {
56b2de80 2406 trace_xfs_alloc_vextent_nofix(args);
2bd0ea18
NS
2407 goto error0;
2408 }
2409 if (!args->agbp) {
56b2de80 2410 trace_xfs_alloc_vextent_noagbp(args);
2bd0ea18
NS
2411 break;
2412 }
2413 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
0e266570 2414 if ((error = xfs_alloc_ag_vextent(args)))
2bd0ea18 2415 goto error0;
2bd0ea18
NS
2416 break;
2417 case XFS_ALLOCTYPE_START_BNO:
2418 /*
2419 * Try near allocation first, then anywhere-in-ag after
2420 * the first a.g. fails.
2421 */
34317449
NS
2422 if ((args->userdata == XFS_ALLOC_INITIAL_USER_DATA) &&
2423 (mp->m_flags & XFS_MOUNT_32BITINODES)) {
46eca962
NS
2424 args->fsbno = XFS_AGB_TO_FSB(mp,
2425 ((mp->m_agfrotor / rotorstep) %
2426 mp->m_sb.sb_agcount), 0);
34317449
NS
2427 bump_rotor = 1;
2428 }
2bd0ea18
NS
2429 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2430 args->type = XFS_ALLOCTYPE_NEAR_BNO;
2431 /* FALLTHROUGH */
2432 case XFS_ALLOCTYPE_ANY_AG:
2433 case XFS_ALLOCTYPE_START_AG:
2434 case XFS_ALLOCTYPE_FIRST_AG:
2435 /*
2436 * Rotate through the allocation groups looking for a winner.
2437 */
2438 if (type == XFS_ALLOCTYPE_ANY_AG) {
2439 /*
2440 * Start with the last place we left off.
2441 */
46eca962
NS
2442 args->agno = sagno = (mp->m_agfrotor / rotorstep) %
2443 mp->m_sb.sb_agcount;
2bd0ea18
NS
2444 args->type = XFS_ALLOCTYPE_THIS_AG;
2445 flags = XFS_ALLOC_FLAG_TRYLOCK;
2446 } else if (type == XFS_ALLOCTYPE_FIRST_AG) {
2447 /*
2448 * Start with allocation group given by bno.
2449 */
2450 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2451 args->type = XFS_ALLOCTYPE_THIS_AG;
2452 sagno = 0;
2453 flags = 0;
2454 } else {
2455 if (type == XFS_ALLOCTYPE_START_AG)
2456 args->type = XFS_ALLOCTYPE_THIS_AG;
2457 /*
2458 * Start with the given allocation group.
2459 */
2460 args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2461 flags = XFS_ALLOC_FLAG_TRYLOCK;
2462 }
2463 /*
2464 * Loop over allocation groups twice; first time with
2465 * trylock set, second time without.
2466 */
2467 for (;;) {
56b2de80 2468 args->pag = xfs_perag_get(mp, args->agno);
9baa549b
SL
2469 if (no_min) args->minleft = 0;
2470 error = xfs_alloc_fix_freelist(args, flags);
2471 args->minleft = minleft;
2472 if (error) {
56b2de80 2473 trace_xfs_alloc_vextent_nofix(args);
2bd0ea18
NS
2474 goto error0;
2475 }
2476 /*
2477 * If we get a buffer back then the allocation will fly.
2478 */
2479 if (args->agbp) {
0e266570 2480 if ((error = xfs_alloc_ag_vextent(args)))
2bd0ea18 2481 goto error0;
2bd0ea18
NS
2482 break;
2483 }
56b2de80
DC
2484
2485 trace_xfs_alloc_vextent_loopfailed(args);
2486
2bd0ea18
NS
2487 /*
2488 * Didn't work, figure out the next iteration.
2489 */
2490 if (args->agno == sagno &&
2491 type == XFS_ALLOCTYPE_START_BNO)
2492 args->type = XFS_ALLOCTYPE_THIS_AG;
5e656dbb
BN
2493 /*
2494 * For the first allocation, we can try any AG to get
2495 * space. However, if we already have allocated a
2496 * block, we don't want to try AGs whose number is below
2497 * sagno. Otherwise, we may end up with out-of-order
2498 * locking of AGF, which might cause deadlock.
2499 */
2500 if (++(args->agno) == mp->m_sb.sb_agcount) {
2501 if (args->firstblock != NULLFSBLOCK)
2502 args->agno = sagno;
2503 else
2504 args->agno = 0;
2505 }
5000d01d 2506 /*
2bd0ea18
NS
2507 * Reached the starting a.g., must either be done
2508 * or switch to non-trylock mode.
2509 */
2510 if (args->agno == sagno) {
9baa549b 2511 if (no_min == 1) {
2bd0ea18 2512 args->agbno = NULLAGBLOCK;
56b2de80 2513 trace_xfs_alloc_vextent_allfailed(args);
2bd0ea18
NS
2514 break;
2515 }
9baa549b
SL
2516 if (flags == 0) {
2517 no_min = 1;
2518 } else {
2519 flags = 0;
2520 if (type == XFS_ALLOCTYPE_START_BNO) {
2521 args->agbno = XFS_FSB_TO_AGBNO(mp,
2522 args->fsbno);
2523 args->type = XFS_ALLOCTYPE_NEAR_BNO;
2524 }
2bd0ea18
NS
2525 }
2526 }
56b2de80 2527 xfs_perag_put(args->pag);
2bd0ea18 2528 }
46eca962
NS
2529 if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
2530 if (args->agno == sagno)
2531 mp->m_agfrotor = (mp->m_agfrotor + 1) %
2532 (mp->m_sb.sb_agcount * rotorstep);
2533 else
2534 mp->m_agfrotor = (args->agno * rotorstep + 1) %
2535 (mp->m_sb.sb_agcount * rotorstep);
2536 }
2bd0ea18
NS
2537 break;
2538 default:
2539 ASSERT(0);
2540 /* NOTREACHED */
2541 }
2542 if (args->agbno == NULLAGBLOCK)
2543 args->fsbno = NULLFSBLOCK;
2544 else {
2545 args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
2546#ifdef DEBUG
2547 ASSERT(args->len >= args->minlen);
2548 ASSERT(args->len <= args->maxlen);
2549 ASSERT(args->agbno % args->alignment == 0);
2550 XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
2551 args->len);
2552#endif
2553 }
56b2de80 2554 xfs_perag_put(args->pag);
2bd0ea18
NS
2555 return 0;
2556error0:
56b2de80 2557 xfs_perag_put(args->pag);
2bd0ea18
NS
2558 return error;
2559}
2560
2561/*
2562 * Free an extent.
2563 * Just break up the extent address and hand off to xfs_free_ag_extent
2564 * after fixing up the freelist.
2565 */
2566int /* error */
2567xfs_free_extent(
2568 xfs_trans_t *tp, /* transaction pointer */
2569 xfs_fsblock_t bno, /* starting block number of extent */
2570 xfs_extlen_t len) /* length of extent */
2571{
5e656dbb 2572 xfs_alloc_arg_t args;
2bd0ea18
NS
2573 int error;
2574
2575 ASSERT(len != 0);
5e656dbb 2576 memset(&args, 0, sizeof(xfs_alloc_arg_t));
2bd0ea18
NS
2577 args.tp = tp;
2578 args.mp = tp->t_mountp;
a2ceac1f
DC
2579
2580 /*
2581 * validate that the block number is legal - the enables us to detect
2582 * and handle a silent filesystem corruption rather than crashing.
2583 */
2bd0ea18 2584 args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
a2ceac1f
DC
2585 if (args.agno >= args.mp->m_sb.sb_agcount)
2586 return EFSCORRUPTED;
2587
2bd0ea18 2588 args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
a2ceac1f
DC
2589 if (args.agbno >= args.mp->m_sb.sb_agblocks)
2590 return EFSCORRUPTED;
2591
56b2de80 2592 args.pag = xfs_perag_get(args.mp, args.agno);
a2ceac1f
DC
2593 ASSERT(args.pag);
2594
2595 error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
2596 if (error)
2bd0ea18 2597 goto error0;
a2ceac1f
DC
2598
2599 /* validate the extent size is legal now we have the agf locked */
2600 if (args.agbno + len >
2601 be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) {
2602 error = EFSCORRUPTED;
2603 goto error0;
2604 }
2605
5e656dbb 2606 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
a2ceac1f
DC
2607 if (!error)
2608 xfs_extent_busy_insert(tp, args.agno, args.agbno, len, 0);
2bd0ea18 2609error0:
56b2de80 2610 xfs_perag_put(args.pag);
2bd0ea18
NS
2611 return error;
2612}