]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - libxfs/xfs_alloc.c
libxfs: restructure to match kernel layout
[thirdparty/xfsprogs-dev.git] / libxfs / xfs_alloc.c
CommitLineData
2bd0ea18 1/*
5e656dbb 2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
da23017d 3 * All Rights Reserved.
5000d01d 4 *
da23017d
NS
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
2bd0ea18 7 * published by the Free Software Foundation.
5000d01d 8 *
da23017d
NS
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
5000d01d 13 *
da23017d
NS
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
2bd0ea18 17 */
2bd0ea18
NS
18#include <xfs.h>
19
ff105f75
DC
20struct workqueue_struct *xfs_alloc_wq;
21
2bd0ea18 22#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
5e656dbb
BN
23
24#define XFSA_FIXUP_BNO_OK 1
25#define XFSA_FIXUP_CNT_OK 2
26
5e656dbb
BN
27STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
28STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
29STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
30STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
a2ceac1f 31 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
2bd0ea18 32
b194c7d8
BN
33/*
34 * Lookup the record equal to [bno, len] in the btree given by cur.
35 */
36STATIC int /* error */
37xfs_alloc_lookup_eq(
38 struct xfs_btree_cur *cur, /* btree cursor */
39 xfs_agblock_t bno, /* starting block of extent */
40 xfs_extlen_t len, /* length of extent */
41 int *stat) /* success/failure */
42{
43 cur->bc_rec.a.ar_startblock = bno;
44 cur->bc_rec.a.ar_blockcount = len;
45 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
46}
47
48/*
49 * Lookup the first record greater than or equal to [bno, len]
50 * in the btree given by cur.
51 */
a2ceac1f 52int /* error */
b194c7d8
BN
53xfs_alloc_lookup_ge(
54 struct xfs_btree_cur *cur, /* btree cursor */
55 xfs_agblock_t bno, /* starting block of extent */
56 xfs_extlen_t len, /* length of extent */
57 int *stat) /* success/failure */
58{
59 cur->bc_rec.a.ar_startblock = bno;
60 cur->bc_rec.a.ar_blockcount = len;
61 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
62}
63
64/*
65 * Lookup the first record less than or equal to [bno, len]
66 * in the btree given by cur.
67 */
a2ceac1f 68int /* error */
b194c7d8
BN
69xfs_alloc_lookup_le(
70 struct xfs_btree_cur *cur, /* btree cursor */
71 xfs_agblock_t bno, /* starting block of extent */
72 xfs_extlen_t len, /* length of extent */
73 int *stat) /* success/failure */
74{
75 cur->bc_rec.a.ar_startblock = bno;
76 cur->bc_rec.a.ar_blockcount = len;
77 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
78}
79
80/*
81 * Update the record referred to by cur to the value given
82 * by [bno, len].
83 * This either works (return 0) or gets an EFSCORRUPTED error.
84 */
85STATIC int /* error */
86xfs_alloc_update(
87 struct xfs_btree_cur *cur, /* btree cursor */
88 xfs_agblock_t bno, /* starting block of extent */
89 xfs_extlen_t len) /* length of extent */
90{
91 union xfs_btree_rec rec;
92
93 rec.alloc.ar_startblock = cpu_to_be32(bno);
94 rec.alloc.ar_blockcount = cpu_to_be32(len);
95 return xfs_btree_update(cur, &rec);
96}
97
98/*
99 * Get the data from the pointed-to record.
100 */
a2ceac1f 101int /* error */
b194c7d8
BN
102xfs_alloc_get_rec(
103 struct xfs_btree_cur *cur, /* btree cursor */
104 xfs_agblock_t *bno, /* output: starting block of extent */
105 xfs_extlen_t *len, /* output: length of extent */
106 int *stat) /* output: success/failure */
107{
108 union xfs_btree_rec *rec;
109 int error;
110
111 error = xfs_btree_get_rec(cur, &rec, stat);
112 if (!error && *stat == 1) {
113 *bno = be32_to_cpu(rec->alloc.ar_startblock);
114 *len = be32_to_cpu(rec->alloc.ar_blockcount);
115 }
116 return error;
117}
118
2bd0ea18
NS
119/*
120 * Compute aligned version of the found extent.
121 * Takes alignment and min length into account.
122 */
5e656dbb 123STATIC void
2bd0ea18 124xfs_alloc_compute_aligned(
a2ceac1f 125 xfs_alloc_arg_t *args, /* allocation argument structure */
2bd0ea18
NS
126 xfs_agblock_t foundbno, /* starting block in found extent */
127 xfs_extlen_t foundlen, /* length in found extent */
2bd0ea18
NS
128 xfs_agblock_t *resbno, /* result block number */
129 xfs_extlen_t *reslen) /* result length */
130{
131 xfs_agblock_t bno;
2bd0ea18
NS
132 xfs_extlen_t len;
133
a2ceac1f
DC
134 /* Trim busy sections out of found extent */
135 xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len);
136
137 if (args->alignment > 1 && len >= args->minlen) {
138 xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
139 xfs_extlen_t diff = aligned_bno - bno;
140
141 *resbno = aligned_bno;
142 *reslen = diff >= len ? 0 : len - diff;
2bd0ea18 143 } else {
a2ceac1f
DC
144 *resbno = bno;
145 *reslen = len;
2bd0ea18 146 }
2bd0ea18
NS
147}
148
149/*
150 * Compute best start block and diff for "near" allocations.
151 * freelen >= wantlen already checked by caller.
152 */
153STATIC xfs_extlen_t /* difference value (absolute) */
154xfs_alloc_compute_diff(
155 xfs_agblock_t wantbno, /* target starting block */
156 xfs_extlen_t wantlen, /* target length */
157 xfs_extlen_t alignment, /* target alignment */
84a62eea 158 char userdata, /* are we allocating data? */
2bd0ea18
NS
159 xfs_agblock_t freebno, /* freespace's starting block */
160 xfs_extlen_t freelen, /* freespace's length */
161 xfs_agblock_t *newbnop) /* result: best start block from free */
162{
163 xfs_agblock_t freeend; /* end of freespace extent */
164 xfs_agblock_t newbno1; /* return block number */
165 xfs_agblock_t newbno2; /* other new block number */
0e266570
NS
166 xfs_extlen_t newlen1=0; /* length with newbno1 */
167 xfs_extlen_t newlen2=0; /* length with newbno2 */
2bd0ea18
NS
168 xfs_agblock_t wantend; /* end of target extent */
169
170 ASSERT(freelen >= wantlen);
171 freeend = freebno + freelen;
172 wantend = wantbno + wantlen;
84a62eea
DC
173 /*
174 * We want to allocate from the start of a free extent if it is past
175 * the desired block or if we are allocating user data and the free
176 * extent is before desired block. The second case is there to allow
177 * for contiguous allocation from the remaining free space if the file
178 * grows in the short term.
179 */
180 if (freebno >= wantbno || (userdata && freeend < wantend)) {
2bd0ea18
NS
181 if ((newbno1 = roundup(freebno, alignment)) >= freeend)
182 newbno1 = NULLAGBLOCK;
183 } else if (freeend >= wantend && alignment > 1) {
184 newbno1 = roundup(wantbno, alignment);
185 newbno2 = newbno1 - alignment;
186 if (newbno1 >= freeend)
187 newbno1 = NULLAGBLOCK;
188 else
189 newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
190 if (newbno2 < freebno)
191 newbno2 = NULLAGBLOCK;
192 else
193 newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
194 if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
195 if (newlen1 < newlen2 ||
196 (newlen1 == newlen2 &&
197 XFS_ABSDIFF(newbno1, wantbno) >
198 XFS_ABSDIFF(newbno2, wantbno)))
199 newbno1 = newbno2;
200 } else if (newbno2 != NULLAGBLOCK)
201 newbno1 = newbno2;
202 } else if (freeend >= wantend) {
203 newbno1 = wantbno;
204 } else if (alignment > 1) {
205 newbno1 = roundup(freeend - wantlen, alignment);
206 if (newbno1 > freeend - wantlen &&
207 newbno1 - alignment >= freebno)
208 newbno1 -= alignment;
209 else if (newbno1 >= freeend)
210 newbno1 = NULLAGBLOCK;
211 } else
212 newbno1 = freeend - wantlen;
213 *newbnop = newbno1;
214 return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
215}
216
217/*
218 * Fix up the length, based on mod and prod.
219 * len should be k * prod + mod for some k.
220 * If len is too small it is returned unchanged.
221 * If len hits maxlen it is left alone.
222 */
223STATIC void
224xfs_alloc_fix_len(
dfc130f3 225 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18
NS
226{
227 xfs_extlen_t k;
228 xfs_extlen_t rlen;
229
230 ASSERT(args->mod < args->prod);
231 rlen = args->len;
232 ASSERT(rlen >= args->minlen);
233 ASSERT(rlen <= args->maxlen);
234 if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
235 (args->mod == 0 && rlen < args->prod))
236 return;
237 k = rlen % args->prod;
238 if (k == args->mod)
239 return;
ff105f75
DC
240 if (k > args->mod)
241 rlen = rlen - (k - args->mod);
242 else
243 rlen = rlen - args->prod + (args->mod - k);
244 if ((int)rlen < (int)args->minlen)
245 return;
246 ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
247 ASSERT(rlen % args->prod == args->mod);
2bd0ea18
NS
248 args->len = rlen;
249}
250
251/*
252 * Fix up length if there is too little space left in the a.g.
253 * Return 1 if ok, 0 if too little, should give up.
254 */
255STATIC int
256xfs_alloc_fix_minleft(
dfc130f3 257 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18
NS
258{
259 xfs_agf_t *agf; /* a.g. freelist header */
260 int diff; /* free space difference */
261
262 if (args->minleft == 0)
263 return 1;
264 agf = XFS_BUF_TO_AGF(args->agbp);
6e3140c7 265 diff = be32_to_cpu(agf->agf_freeblks)
2bd0ea18
NS
266 - args->len - args->minleft;
267 if (diff >= 0)
268 return 1;
269 args->len += diff; /* shrink the allocated space */
270 if (args->len >= args->minlen)
271 return 1;
272 args->agbno = NULLAGBLOCK;
273 return 0;
274}
275
276/*
277 * Update the two btrees, logically removing from freespace the extent
278 * starting at rbno, rlen blocks. The extent is contained within the
279 * actual (current) free extent fbno for flen blocks.
280 * Flags are passed in indicating whether the cursors are set to the
281 * relevant records.
282 */
283STATIC int /* error code */
284xfs_alloc_fixup_trees(
dfc130f3
RC
285 xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */
286 xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */
2bd0ea18
NS
287 xfs_agblock_t fbno, /* starting block of free extent */
288 xfs_extlen_t flen, /* length of free extent */
289 xfs_agblock_t rbno, /* starting block of returned extent */
290 xfs_extlen_t rlen, /* length of returned extent */
291 int flags) /* flags, XFSA_FIXUP_... */
292{
293 int error; /* error code */
294 int i; /* operation results */
295 xfs_agblock_t nfbno1; /* first new free startblock */
296 xfs_agblock_t nfbno2; /* second new free startblock */
0e266570
NS
297 xfs_extlen_t nflen1=0; /* first new free length */
298 xfs_extlen_t nflen2=0; /* second new free length */
2bd0ea18
NS
299
300 /*
301 * Look up the record in the by-size tree if necessary.
302 */
303 if (flags & XFSA_FIXUP_CNT_OK) {
304#ifdef DEBUG
0e266570 305 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
2bd0ea18
NS
306 return error;
307 XFS_WANT_CORRUPTED_RETURN(
308 i == 1 && nfbno1 == fbno && nflen1 == flen);
309#endif
310 } else {
0e266570 311 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
2bd0ea18
NS
312 return error;
313 XFS_WANT_CORRUPTED_RETURN(i == 1);
314 }
315 /*
316 * Look up the record in the by-block tree if necessary.
317 */
318 if (flags & XFSA_FIXUP_BNO_OK) {
319#ifdef DEBUG
0e266570 320 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
2bd0ea18
NS
321 return error;
322 XFS_WANT_CORRUPTED_RETURN(
323 i == 1 && nfbno1 == fbno && nflen1 == flen);
324#endif
325 } else {
0e266570 326 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
2bd0ea18
NS
327 return error;
328 XFS_WANT_CORRUPTED_RETURN(i == 1);
329 }
b3563c19 330
2bd0ea18 331#ifdef DEBUG
b3563c19
BN
332 if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
333 struct xfs_btree_block *bnoblock;
334 struct xfs_btree_block *cntblock;
335
336 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
337 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
2bd0ea18 338
b3563c19
BN
339 XFS_WANT_CORRUPTED_RETURN(
340 bnoblock->bb_numrecs == cntblock->bb_numrecs);
2bd0ea18
NS
341 }
342#endif
b3563c19 343
2bd0ea18
NS
344 /*
345 * Deal with all four cases: the allocated record is contained
346 * within the freespace record, so we can have new freespace
347 * at either (or both) end, or no freespace remaining.
348 */
349 if (rbno == fbno && rlen == flen)
350 nfbno1 = nfbno2 = NULLAGBLOCK;
351 else if (rbno == fbno) {
352 nfbno1 = rbno + rlen;
353 nflen1 = flen - rlen;
354 nfbno2 = NULLAGBLOCK;
355 } else if (rbno + rlen == fbno + flen) {
356 nfbno1 = fbno;
357 nflen1 = flen - rlen;
358 nfbno2 = NULLAGBLOCK;
359 } else {
360 nfbno1 = fbno;
361 nflen1 = rbno - fbno;
362 nfbno2 = rbno + rlen;
363 nflen2 = (fbno + flen) - nfbno2;
364 }
365 /*
366 * Delete the entry from the by-size btree.
367 */
b194c7d8 368 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18
NS
369 return error;
370 XFS_WANT_CORRUPTED_RETURN(i == 1);
371 /*
372 * Add new by-size btree entry(s).
373 */
374 if (nfbno1 != NULLAGBLOCK) {
0e266570 375 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
2bd0ea18
NS
376 return error;
377 XFS_WANT_CORRUPTED_RETURN(i == 0);
b194c7d8 378 if ((error = xfs_btree_insert(cnt_cur, &i)))
2bd0ea18
NS
379 return error;
380 XFS_WANT_CORRUPTED_RETURN(i == 1);
381 }
382 if (nfbno2 != NULLAGBLOCK) {
0e266570 383 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
2bd0ea18
NS
384 return error;
385 XFS_WANT_CORRUPTED_RETURN(i == 0);
b194c7d8 386 if ((error = xfs_btree_insert(cnt_cur, &i)))
2bd0ea18
NS
387 return error;
388 XFS_WANT_CORRUPTED_RETURN(i == 1);
389 }
390 /*
391 * Fix up the by-block btree entry(s).
392 */
393 if (nfbno1 == NULLAGBLOCK) {
394 /*
395 * No remaining freespace, just delete the by-block tree entry.
396 */
b194c7d8 397 if ((error = xfs_btree_delete(bno_cur, &i)))
2bd0ea18
NS
398 return error;
399 XFS_WANT_CORRUPTED_RETURN(i == 1);
400 } else {
401 /*
402 * Update the by-block entry to start later|be shorter.
403 */
0e266570 404 if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
2bd0ea18
NS
405 return error;
406 }
407 if (nfbno2 != NULLAGBLOCK) {
408 /*
409 * 2 resulting free entries, need to add one.
410 */
0e266570 411 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
2bd0ea18
NS
412 return error;
413 XFS_WANT_CORRUPTED_RETURN(i == 0);
b194c7d8 414 if ((error = xfs_btree_insert(bno_cur, &i)))
2bd0ea18
NS
415 return error;
416 XFS_WANT_CORRUPTED_RETURN(i == 1);
417 }
418 return 0;
419}
420
dd5b876e 421static bool
a2ceac1f
DC
422xfs_agfl_verify(
423 struct xfs_buf *bp)
424{
a2ceac1f
DC
425 struct xfs_mount *mp = bp->b_target->bt_mount;
426 struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
a2ceac1f
DC
427 int i;
428
dd5b876e
DC
429 if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_uuid))
430 return false;
431 if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC)
432 return false;
433 /*
434 * during growfs operations, the perag is not fully initialised,
435 * so we can't use it for any useful checking. growfs ensures we can't
436 * use it by using uncached buffers that don't have the perag attached
437 * so we can detect and avoid this problem.
438 */
439 if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
440 return false;
441
a2ceac1f 442 for (i = 0; i < XFS_AGFL_SIZE(mp); i++) {
dd5b876e 443 if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
a2ceac1f 444 be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
dd5b876e 445 return false;
a2ceac1f 446 }
dd5b876e
DC
447 return true;
448}
449
450static void
451xfs_agfl_read_verify(
452 struct xfs_buf *bp)
453{
454 struct xfs_mount *mp = bp->b_target->bt_mount;
dd5b876e
DC
455
456 /*
457 * There is no verification of non-crc AGFLs because mkfs does not
458 * initialise the AGFL to zero or NULL. Hence the only valid part of the
459 * AGFL is what the AGF says is active. We can't get to the AGF, so we
460 * can't verify just those entries are valid.
461 */
462 if (!xfs_sb_version_hascrc(&mp->m_sb))
463 return;
464
45922933 465 if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
12b53197 466 xfs_buf_ioerror(bp, -EFSBADCRC);
45922933 467 else if (!xfs_agfl_verify(bp))
12b53197 468 xfs_buf_ioerror(bp, -EFSCORRUPTED);
45922933
DC
469
470 if (bp->b_error)
471 xfs_verifier_error(bp);
a2ceac1f
DC
472}
473
474static void
475xfs_agfl_write_verify(
476 struct xfs_buf *bp)
477{
dd5b876e
DC
478 struct xfs_mount *mp = bp->b_target->bt_mount;
479 struct xfs_buf_log_item *bip = bp->b_fspriv;
a2ceac1f 480
dd5b876e
DC
481 /* no verification of non-crc AGFLs */
482 if (!xfs_sb_version_hascrc(&mp->m_sb))
483 return;
484
485 if (!xfs_agfl_verify(bp)) {
12b53197 486 xfs_buf_ioerror(bp, -EFSCORRUPTED);
45922933 487 xfs_verifier_error(bp);
dd5b876e
DC
488 return;
489 }
490
491 if (bip)
492 XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
493
43b5aeed 494 xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
a2ceac1f
DC
495}
496
497const struct xfs_buf_ops xfs_agfl_buf_ops = {
498 .verify_read = xfs_agfl_read_verify,
499 .verify_write = xfs_agfl_write_verify,
500};
501
2bd0ea18
NS
502/*
503 * Read in the allocation group free block array.
504 */
505STATIC int /* error */
506xfs_alloc_read_agfl(
507 xfs_mount_t *mp, /* mount point structure */
508 xfs_trans_t *tp, /* transaction pointer */
509 xfs_agnumber_t agno, /* allocation group number */
510 xfs_buf_t **bpp) /* buffer for the ag free block array */
511{
512 xfs_buf_t *bp; /* return value */
2bd0ea18
NS
513 int error;
514
515 ASSERT(agno != NULLAGNUMBER);
9440d84d
NS
516 error = xfs_trans_read_buf(
517 mp, tp, mp->m_ddev_targp,
518 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
a2ceac1f 519 XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
9440d84d 520 if (error)
2bd0ea18 521 return error;
a2ceac1f 522 xfs_buf_set_ref(bp, XFS_AGFL_REF);
2bd0ea18
NS
523 *bpp = bp;
524 return 0;
525}
526
a2ceac1f
DC
527STATIC int
528xfs_alloc_update_counters(
529 struct xfs_trans *tp,
530 struct xfs_perag *pag,
531 struct xfs_buf *agbp,
532 long len)
533{
534 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
535
536 pag->pagf_freeblks += len;
537 be32_add_cpu(&agf->agf_freeblks, len);
538
539 xfs_trans_agblocks_delta(tp, len);
540 if (unlikely(be32_to_cpu(agf->agf_freeblks) >
541 be32_to_cpu(agf->agf_length)))
12b53197 542 return -EFSCORRUPTED;
a2ceac1f
DC
543
544 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
545 return 0;
546}
547
2bd0ea18
NS
548/*
549 * Allocation group level functions.
550 */
551
552/*
553 * Allocate a variable extent in the allocation group agno.
554 * Type and bno are used to determine where in the allocation group the
555 * extent will start.
556 * Extent's length (returned in *len) will be between minlen and maxlen,
557 * and of the form k * prod + mod unless there's nothing that large.
558 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
559 */
560STATIC int /* error */
561xfs_alloc_ag_vextent(
dfc130f3 562 xfs_alloc_arg_t *args) /* argument structure for allocation */
2bd0ea18 563{
0e266570 564 int error=0;
2bd0ea18
NS
565
566 ASSERT(args->minlen > 0);
567 ASSERT(args->maxlen > 0);
568 ASSERT(args->minlen <= args->maxlen);
569 ASSERT(args->mod < args->prod);
570 ASSERT(args->alignment > 0);
571 /*
572 * Branch to correct routine based on the type.
573 */
574 args->wasfromfl = 0;
575 switch (args->type) {
576 case XFS_ALLOCTYPE_THIS_AG:
577 error = xfs_alloc_ag_vextent_size(args);
578 break;
579 case XFS_ALLOCTYPE_NEAR_BNO:
580 error = xfs_alloc_ag_vextent_near(args);
581 break;
582 case XFS_ALLOCTYPE_THIS_BNO:
583 error = xfs_alloc_ag_vextent_exact(args);
584 break;
585 default:
586 ASSERT(0);
587 /* NOTREACHED */
588 }
a2ceac1f
DC
589
590 if (error || args->agbno == NULLAGBLOCK)
2bd0ea18 591 return error;
2bd0ea18 592
a2ceac1f
DC
593 ASSERT(args->len >= args->minlen);
594 ASSERT(args->len <= args->maxlen);
595 ASSERT(!args->wasfromfl || !args->isfl);
596 ASSERT(args->agbno % args->alignment == 0);
597
598 if (!args->wasfromfl) {
599 error = xfs_alloc_update_counters(args->tp, args->pag,
600 args->agbp,
601 -((long)(args->len)));
602 if (error)
603 return error;
604
605 ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
606 args->agbno, args->len));
2bd0ea18 607 }
a2ceac1f
DC
608
609 if (!args->isfl) {
610 xfs_trans_mod_sb(args->tp, args->wasdel ?
611 XFS_TRANS_SB_RES_FDBLOCKS :
612 XFS_TRANS_SB_FDBLOCKS,
613 -((long)(args->len)));
614 }
615
616 XFS_STATS_INC(xs_allocx);
617 XFS_STATS_ADD(xs_allocb, args->len);
618 return error;
2bd0ea18
NS
619}
620
621/*
622 * Allocate a variable extent at exactly agno/bno.
623 * Extent's length (returned in *len) will be between minlen and maxlen,
624 * and of the form k * prod + mod unless there's nothing that large.
625 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
626 */
627STATIC int /* error */
628xfs_alloc_ag_vextent_exact(
dfc130f3 629 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18 630{
dfc130f3
RC
631 xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
632 xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
2bd0ea18
NS
633 int error;
634 xfs_agblock_t fbno; /* start block of found extent */
2bd0ea18 635 xfs_extlen_t flen; /* length of found extent */
a2ceac1f
DC
636 xfs_agblock_t tbno; /* start block of trimmed extent */
637 xfs_extlen_t tlen; /* length of trimmed extent */
638 xfs_agblock_t tend; /* end block of trimmed extent */
2bd0ea18 639 int i; /* success/failure of operation */
2bd0ea18
NS
640
641 ASSERT(args->alignment == 1);
a2ceac1f 642
2bd0ea18
NS
643 /*
644 * Allocate/initialize a cursor for the by-number freespace btree.
645 */
b194c7d8 646 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
56b2de80
DC
647 args->agno, XFS_BTNUM_BNO);
648
2bd0ea18
NS
649 /*
650 * Lookup bno and minlen in the btree (minlen is irrelevant, really).
651 * Look for the closest free block <= bno, it must contain bno
652 * if any free block does.
653 */
56b2de80
DC
654 error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
655 if (error)
2bd0ea18 656 goto error0;
56b2de80
DC
657 if (!i)
658 goto not_found;
659
2bd0ea18
NS
660 /*
661 * Grab the freespace record.
662 */
56b2de80
DC
663 error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
664 if (error)
2bd0ea18
NS
665 goto error0;
666 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
667 ASSERT(fbno <= args->agbno);
56b2de80 668
5000d01d 669 /*
a2ceac1f
DC
670 * Check for overlapping busy extents.
671 */
672 xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen);
673
674 /*
675 * Give up if the start of the extent is busy, or the freespace isn't
676 * long enough for the minimum request.
2bd0ea18 677 */
a2ceac1f
DC
678 if (tbno > args->agbno)
679 goto not_found;
680 if (tlen < args->minlen)
681 goto not_found;
682 tend = tbno + tlen;
683 if (tend < args->agbno + args->minlen)
56b2de80
DC
684 goto not_found;
685
2bd0ea18
NS
686 /*
687 * End of extent will be smaller of the freespace end and the
688 * maximal requested end.
56b2de80 689 *
2bd0ea18
NS
690 * Fix the length according to mod and prod if given.
691 */
a2ceac1f
DC
692 args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
693 - args->agbno;
2bd0ea18 694 xfs_alloc_fix_len(args);
56b2de80
DC
695 if (!xfs_alloc_fix_minleft(args))
696 goto not_found;
697
a2ceac1f 698 ASSERT(args->agbno + args->len <= tend);
56b2de80 699
2bd0ea18 700 /*
a2ceac1f 701 * We are allocating agbno for args->len
2bd0ea18
NS
702 * Allocate/initialize a cursor for the by-size btree.
703 */
b194c7d8
BN
704 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
705 args->agno, XFS_BTNUM_CNT);
2bd0ea18 706 ASSERT(args->agbno + args->len <=
6e3140c7 707 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
56b2de80
DC
708 error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
709 args->len, XFSA_FIXUP_BNO_OK);
710 if (error) {
2bd0ea18
NS
711 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
712 goto error0;
713 }
a2ceac1f 714
2bd0ea18
NS
715 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
716 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
a2ceac1f 717
2bd0ea18 718 args->wasfromfl = 0;
56b2de80
DC
719 trace_xfs_alloc_exact_done(args);
720 return 0;
721
722not_found:
723 /* Didn't find it, return null. */
724 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
725 args->agbno = NULLAGBLOCK;
726 trace_xfs_alloc_exact_notfound(args);
2bd0ea18
NS
727 return 0;
728
729error0:
730 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
56b2de80
DC
731 trace_xfs_alloc_exact_error(args);
732 return error;
733}
734
735/*
736 * Search the btree in a given direction via the search cursor and compare
737 * the records found against the good extent we've already found.
738 */
739STATIC int
740xfs_alloc_find_best_extent(
741 struct xfs_alloc_arg *args, /* allocation argument structure */
742 struct xfs_btree_cur **gcur, /* good cursor */
743 struct xfs_btree_cur **scur, /* searching cursor */
744 xfs_agblock_t gdiff, /* difference for search comparison */
745 xfs_agblock_t *sbno, /* extent found by search */
a2ceac1f
DC
746 xfs_extlen_t *slen, /* extent length */
747 xfs_agblock_t *sbnoa, /* aligned extent found by search */
748 xfs_extlen_t *slena, /* aligned extent length */
56b2de80
DC
749 int dir) /* 0 = search right, 1 = search left */
750{
56b2de80
DC
751 xfs_agblock_t new;
752 xfs_agblock_t sdiff;
753 int error;
754 int i;
755
756 /* The good extent is perfect, no need to search. */
757 if (!gdiff)
758 goto out_use_good;
759
760 /*
761 * Look until we find a better one, run out of space or run off the end.
762 */
763 do {
764 error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
765 if (error)
766 goto error0;
767 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
a2ceac1f 768 xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena);
56b2de80
DC
769
770 /*
771 * The good extent is closer than this one.
772 */
773 if (!dir) {
a2ceac1f 774 if (*sbnoa >= args->agbno + gdiff)
56b2de80
DC
775 goto out_use_good;
776 } else {
a2ceac1f 777 if (*sbnoa <= args->agbno - gdiff)
56b2de80
DC
778 goto out_use_good;
779 }
780
781 /*
782 * Same distance, compare length and pick the best.
783 */
784 if (*slena >= args->minlen) {
785 args->len = XFS_EXTLEN_MIN(*slena, args->maxlen);
786 xfs_alloc_fix_len(args);
787
788 sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
84a62eea
DC
789 args->alignment,
790 args->userdata, *sbnoa,
a2ceac1f 791 *slena, &new);
56b2de80
DC
792
793 /*
794 * Choose closer size and invalidate other cursor.
795 */
796 if (sdiff < gdiff)
797 goto out_use_search;
798 goto out_use_good;
799 }
800
801 if (!dir)
802 error = xfs_btree_increment(*scur, 0, &i);
803 else
804 error = xfs_btree_decrement(*scur, 0, &i);
805 if (error)
806 goto error0;
807 } while (i);
808
809out_use_good:
810 xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR);
811 *scur = NULL;
812 return 0;
813
814out_use_search:
815 xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR);
816 *gcur = NULL;
817 return 0;
818
819error0:
820 /* caller invalidates cursors */
2bd0ea18
NS
821 return error;
822}
823
824/*
825 * Allocate a variable extent near bno in the allocation group agno.
826 * Extent's length (returned in len) will be between minlen and maxlen,
827 * and of the form k * prod + mod unless there's nothing that large.
828 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
829 */
830STATIC int /* error */
831xfs_alloc_ag_vextent_near(
dfc130f3 832 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18 833{
dfc130f3
RC
834 xfs_btree_cur_t *bno_cur_gt; /* cursor for bno btree, right side */
835 xfs_btree_cur_t *bno_cur_lt; /* cursor for bno btree, left side */
836 xfs_btree_cur_t *cnt_cur; /* cursor for count btree */
2bd0ea18
NS
837 xfs_agblock_t gtbno; /* start bno of right side entry */
838 xfs_agblock_t gtbnoa; /* aligned ... */
839 xfs_extlen_t gtdiff; /* difference to right side entry */
840 xfs_extlen_t gtlen; /* length of right side entry */
a2ceac1f 841 xfs_extlen_t gtlena; /* aligned ... */
2bd0ea18
NS
842 xfs_agblock_t gtnew; /* useful start bno of right side */
843 int error; /* error code */
844 int i; /* result code, temporary */
845 int j; /* result code, temporary */
846 xfs_agblock_t ltbno; /* start bno of left side entry */
847 xfs_agblock_t ltbnoa; /* aligned ... */
848 xfs_extlen_t ltdiff; /* difference to left side entry */
2bd0ea18 849 xfs_extlen_t ltlen; /* length of left side entry */
a2ceac1f 850 xfs_extlen_t ltlena; /* aligned ... */
2bd0ea18
NS
851 xfs_agblock_t ltnew; /* useful start bno of left side */
852 xfs_extlen_t rlen; /* length of returned extent */
a2ceac1f 853 int forced = 0;
6beba453 854#ifdef DEBUG
2bd0ea18
NS
855 /*
856 * Randomly don't execute the first algorithm.
857 */
2bd0ea18 858 int dofirst; /* set to do first algorithm */
2bd0ea18 859
49f693fa 860 dofirst = prandom_u32() & 1;
2bd0ea18 861#endif
a2ceac1f
DC
862
863restart:
864 bno_cur_lt = NULL;
865 bno_cur_gt = NULL;
866 ltlen = 0;
867 gtlena = 0;
868 ltlena = 0;
869
2bd0ea18
NS
870 /*
871 * Get a cursor for the by-size btree.
872 */
b194c7d8
BN
873 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
874 args->agno, XFS_BTNUM_CNT);
a2ceac1f 875
2bd0ea18
NS
876 /*
877 * See if there are any free extents as big as maxlen.
878 */
0e266570 879 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i)))
2bd0ea18
NS
880 goto error0;
881 /*
882 * If none, then pick up the last entry in the tree unless the
883 * tree is empty.
5000d01d 884 */
2bd0ea18 885 if (!i) {
0e266570
NS
886 if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &ltbno,
887 &ltlen, &i)))
2bd0ea18
NS
888 goto error0;
889 if (i == 0 || ltlen == 0) {
890 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
a2ceac1f 891 trace_xfs_alloc_near_noentry(args);
2bd0ea18
NS
892 return 0;
893 }
894 ASSERT(i == 1);
895 }
896 args->wasfromfl = 0;
a2ceac1f 897
5000d01d 898 /*
2bd0ea18
NS
899 * First algorithm.
900 * If the requested extent is large wrt the freespaces available
901 * in this a.g., then the cursor will be pointing to a btree entry
902 * near the right edge of the tree. If it's in the last btree leaf
903 * block, then we just examine all the entries in that block
904 * that are big enough, and pick the best one.
905 * This is written as a while loop so we can break out of it,
906 * but we never loop back to the top.
907 */
908 while (xfs_btree_islastblock(cnt_cur, 0)) {
909 xfs_extlen_t bdiff;
0e266570
NS
910 int besti=0;
911 xfs_extlen_t blen=0;
912 xfs_agblock_t bnew=0;
2bd0ea18 913
6beba453
DC
914#ifdef DEBUG
915 if (dofirst)
2bd0ea18
NS
916 break;
917#endif
918 /*
919 * Start from the entry that lookup found, sequence through
920 * all larger free blocks. If we're actually pointing at a
921 * record smaller than maxlen, go to the start of this block,
922 * and skip all those smaller than minlen.
923 */
924 if (ltlen || args->alignment > 1) {
925 cnt_cur->bc_ptrs[0] = 1;
926 do {
0e266570
NS
927 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno,
928 &ltlen, &i)))
2bd0ea18
NS
929 goto error0;
930 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
931 if (ltlen >= args->minlen)
932 break;
b194c7d8 933 if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
2bd0ea18
NS
934 goto error0;
935 } while (i);
936 ASSERT(ltlen >= args->minlen);
937 if (!i)
938 break;
939 }
940 i = cnt_cur->bc_ptrs[0];
941 for (j = 1, blen = 0, bdiff = 0;
942 !error && j && (blen < args->maxlen || bdiff > 0);
b194c7d8 943 error = xfs_btree_increment(cnt_cur, 0, &j)) {
2bd0ea18
NS
944 /*
945 * For each entry, decide if it's better than
946 * the previous best entry.
947 */
0e266570 948 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
2bd0ea18
NS
949 goto error0;
950 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
a2ceac1f
DC
951 xfs_alloc_compute_aligned(args, ltbno, ltlen,
952 &ltbnoa, &ltlena);
5e656dbb 953 if (ltlena < args->minlen)
2bd0ea18
NS
954 continue;
955 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
956 xfs_alloc_fix_len(args);
957 ASSERT(args->len >= args->minlen);
958 if (args->len < blen)
959 continue;
960 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
84a62eea
DC
961 args->alignment, args->userdata, ltbnoa,
962 ltlena, &ltnew);
2bd0ea18
NS
963 if (ltnew != NULLAGBLOCK &&
964 (args->len > blen || ltdiff < bdiff)) {
965 bdiff = ltdiff;
966 bnew = ltnew;
967 blen = args->len;
968 besti = cnt_cur->bc_ptrs[0];
969 }
970 }
971 /*
972 * It didn't work. We COULD be in a case where
973 * there's a good record somewhere, so try again.
974 */
975 if (blen == 0)
976 break;
977 /*
978 * Point at the best entry, and retrieve it again.
979 */
980 cnt_cur->bc_ptrs[0] = besti;
0e266570 981 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
2bd0ea18
NS
982 goto error0;
983 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
56b2de80 984 ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
2bd0ea18
NS
985 args->len = blen;
986 if (!xfs_alloc_fix_minleft(args)) {
987 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
56b2de80 988 trace_xfs_alloc_near_nominleft(args);
2bd0ea18
NS
989 return 0;
990 }
991 blen = args->len;
992 /*
993 * We are allocating starting at bnew for blen blocks.
994 */
995 args->agbno = bnew;
996 ASSERT(bnew >= ltbno);
56b2de80 997 ASSERT(bnew + blen <= ltbno + ltlen);
2bd0ea18
NS
998 /*
999 * Set up a cursor for the by-bno tree.
1000 */
b194c7d8
BN
1001 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp,
1002 args->agbp, args->agno, XFS_BTNUM_BNO);
2bd0ea18
NS
1003 /*
1004 * Fix up the btree entries.
1005 */
0e266570
NS
1006 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno,
1007 ltlen, bnew, blen, XFSA_FIXUP_CNT_OK)))
2bd0ea18
NS
1008 goto error0;
1009 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1010 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
56b2de80
DC
1011
1012 trace_xfs_alloc_near_first(args);
2bd0ea18
NS
1013 return 0;
1014 }
1015 /*
1016 * Second algorithm.
1017 * Search in the by-bno tree to the left and to the right
1018 * simultaneously, until in each case we find a space big enough,
1019 * or run into the edge of the tree. When we run into the edge,
1020 * we deallocate that cursor.
1021 * If both searches succeed, we compare the two spaces and pick
1022 * the better one.
1023 * With alignment, it's possible for both to fail; the upper
1024 * level algorithm that picks allocation groups for allocations
1025 * is not supposed to do this.
1026 */
1027 /*
1028 * Allocate and initialize the cursor for the leftward search.
1029 */
b194c7d8
BN
1030 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1031 args->agno, XFS_BTNUM_BNO);
2bd0ea18
NS
1032 /*
1033 * Lookup <= bno to find the leftward search's starting point.
1034 */
0e266570 1035 if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i)))
2bd0ea18
NS
1036 goto error0;
1037 if (!i) {
1038 /*
1039 * Didn't find anything; use this cursor for the rightward
1040 * search.
1041 */
1042 bno_cur_gt = bno_cur_lt;
062998e3 1043 bno_cur_lt = NULL;
2bd0ea18
NS
1044 }
1045 /*
1046 * Found something. Duplicate the cursor for the rightward search.
1047 */
0e266570 1048 else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt)))
2bd0ea18
NS
1049 goto error0;
1050 /*
1051 * Increment the cursor, so we will point at the entry just right
1052 * of the leftward entry if any, or to the leftmost entry.
1053 */
b194c7d8 1054 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
2bd0ea18
NS
1055 goto error0;
1056 if (!i) {
1057 /*
1058 * It failed, there are no rightward entries.
1059 */
1060 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR);
1061 bno_cur_gt = NULL;
1062 }
1063 /*
1064 * Loop going left with the leftward cursor, right with the
1065 * rightward cursor, until either both directions give up or
1066 * we find an entry at least as big as minlen.
1067 */
1068 do {
1069 if (bno_cur_lt) {
0e266570 1070 if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
2bd0ea18
NS
1071 goto error0;
1072 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
a2ceac1f
DC
1073 xfs_alloc_compute_aligned(args, ltbno, ltlen,
1074 &ltbnoa, &ltlena);
5e656dbb 1075 if (ltlena >= args->minlen)
2bd0ea18 1076 break;
b194c7d8 1077 if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
2bd0ea18
NS
1078 goto error0;
1079 if (!i) {
1080 xfs_btree_del_cursor(bno_cur_lt,
1081 XFS_BTREE_NOERROR);
1082 bno_cur_lt = NULL;
1083 }
1084 }
1085 if (bno_cur_gt) {
0e266570 1086 if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
2bd0ea18
NS
1087 goto error0;
1088 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
a2ceac1f
DC
1089 xfs_alloc_compute_aligned(args, gtbno, gtlen,
1090 &gtbnoa, &gtlena);
5e656dbb 1091 if (gtlena >= args->minlen)
2bd0ea18 1092 break;
b194c7d8 1093 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
2bd0ea18
NS
1094 goto error0;
1095 if (!i) {
1096 xfs_btree_del_cursor(bno_cur_gt,
1097 XFS_BTREE_NOERROR);
1098 bno_cur_gt = NULL;
1099 }
1100 }
1101 } while (bno_cur_lt || bno_cur_gt);
56b2de80 1102
2bd0ea18
NS
1103 /*
1104 * Got both cursors still active, need to find better entry.
1105 */
1106 if (bno_cur_lt && bno_cur_gt) {
2bd0ea18
NS
1107 if (ltlena >= args->minlen) {
1108 /*
56b2de80 1109 * Left side is good, look for a right side entry.
2bd0ea18
NS
1110 */
1111 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1112 xfs_alloc_fix_len(args);
56b2de80 1113 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
84a62eea
DC
1114 args->alignment, args->userdata, ltbnoa,
1115 ltlena, &ltnew);
56b2de80
DC
1116
1117 error = xfs_alloc_find_best_extent(args,
1118 &bno_cur_lt, &bno_cur_gt,
a2ceac1f
DC
1119 ltdiff, &gtbno, &gtlen,
1120 &gtbnoa, &gtlena,
56b2de80
DC
1121 0 /* search right */);
1122 } else {
1123 ASSERT(gtlena >= args->minlen);
1124
2bd0ea18 1125 /*
56b2de80 1126 * Right side is good, look for a left side entry.
2bd0ea18
NS
1127 */
1128 args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
1129 xfs_alloc_fix_len(args);
56b2de80 1130 gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
84a62eea
DC
1131 args->alignment, args->userdata, gtbnoa,
1132 gtlena, &gtnew);
56b2de80
DC
1133
1134 error = xfs_alloc_find_best_extent(args,
1135 &bno_cur_gt, &bno_cur_lt,
a2ceac1f
DC
1136 gtdiff, &ltbno, &ltlen,
1137 &ltbnoa, &ltlena,
56b2de80 1138 1 /* search left */);
2bd0ea18 1139 }
56b2de80
DC
1140
1141 if (error)
1142 goto error0;
2bd0ea18 1143 }
56b2de80 1144
2bd0ea18
NS
1145 /*
1146 * If we couldn't get anything, give up.
1147 */
1148 if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
a2ceac1f
DC
1149 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1150
1151 if (!forced++) {
1152 trace_xfs_alloc_near_busy(args);
1153 xfs_log_force(args->mp, XFS_LOG_SYNC);
1154 goto restart;
1155 }
56b2de80 1156 trace_xfs_alloc_size_neither(args);
2bd0ea18
NS
1157 args->agbno = NULLAGBLOCK;
1158 return 0;
1159 }
56b2de80 1160
2bd0ea18
NS
1161 /*
1162 * At this point we have selected a freespace entry, either to the
1163 * left or to the right. If it's on the right, copy all the
1164 * useful variables to the "left" set so we only have one
1165 * copy of this code.
1166 */
1167 if (bno_cur_gt) {
1168 bno_cur_lt = bno_cur_gt;
1169 bno_cur_gt = NULL;
1170 ltbno = gtbno;
1171 ltbnoa = gtbnoa;
1172 ltlen = gtlen;
1173 ltlena = gtlena;
1174 j = 1;
1175 } else
1176 j = 0;
56b2de80 1177
2bd0ea18
NS
1178 /*
1179 * Fix up the length and compute the useful address.
1180 */
2bd0ea18
NS
1181 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1182 xfs_alloc_fix_len(args);
1183 if (!xfs_alloc_fix_minleft(args)) {
56b2de80 1184 trace_xfs_alloc_near_nominleft(args);
2bd0ea18
NS
1185 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1186 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1187 return 0;
1188 }
1189 rlen = args->len;
a2ceac1f 1190 (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
84a62eea 1191 args->userdata, ltbnoa, ltlena, &ltnew);
2bd0ea18 1192 ASSERT(ltnew >= ltbno);
a2ceac1f 1193 ASSERT(ltnew + rlen <= ltbnoa + ltlena);
6e3140c7 1194 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
2bd0ea18 1195 args->agbno = ltnew;
a2ceac1f 1196
0e266570
NS
1197 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1198 ltnew, rlen, XFSA_FIXUP_BNO_OK)))
2bd0ea18 1199 goto error0;
56b2de80
DC
1200
1201 if (j)
1202 trace_xfs_alloc_near_greater(args);
1203 else
1204 trace_xfs_alloc_near_lesser(args);
1205
2bd0ea18
NS
1206 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1207 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1208 return 0;
1209
1210 error0:
56b2de80 1211 trace_xfs_alloc_near_error(args);
2bd0ea18
NS
1212 if (cnt_cur != NULL)
1213 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1214 if (bno_cur_lt != NULL)
1215 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR);
1216 if (bno_cur_gt != NULL)
1217 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR);
1218 return error;
1219}
1220
1221/*
1222 * Allocate a variable extent anywhere in the allocation group agno.
1223 * Extent's length (returned in len) will be between minlen and maxlen,
1224 * and of the form k * prod + mod unless there's nothing that large.
1225 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1226 */
1227STATIC int /* error */
1228xfs_alloc_ag_vextent_size(
dfc130f3 1229 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18 1230{
dfc130f3
RC
1231 xfs_btree_cur_t *bno_cur; /* cursor for bno btree */
1232 xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */
2bd0ea18
NS
1233 int error; /* error result */
1234 xfs_agblock_t fbno; /* start of found freespace */
1235 xfs_extlen_t flen; /* length of found freespace */
2bd0ea18
NS
1236 int i; /* temp status variable */
1237 xfs_agblock_t rbno; /* returned block number */
1238 xfs_extlen_t rlen; /* length of returned extent */
a2ceac1f 1239 int forced = 0;
2bd0ea18 1240
a2ceac1f 1241restart:
2bd0ea18
NS
1242 /*
1243 * Allocate and initialize a cursor for the by-size btree.
1244 */
b194c7d8
BN
1245 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1246 args->agno, XFS_BTNUM_CNT);
2bd0ea18 1247 bno_cur = NULL;
a2ceac1f 1248
2bd0ea18
NS
1249 /*
1250 * Look for an entry >= maxlen+alignment-1 blocks.
1251 */
0e266570
NS
1252 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1253 args->maxlen + args->alignment - 1, &i)))
2bd0ea18 1254 goto error0;
a2ceac1f 1255
2bd0ea18 1256 /*
a2ceac1f
DC
1257 * If none or we have busy extents that we cannot allocate from, then
1258 * we have to settle for a smaller extent. In the case that there are
1259 * no large extents, this will return the last entry in the tree unless
1260 * the tree is empty. In the case that there are only busy large
1261 * extents, this will return the largest small extent unless there
1262 * are no smaller extents available.
5000d01d 1263 */
a2ceac1f
DC
1264 if (!i || forced > 1) {
1265 error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1266 &fbno, &flen, &i);
1267 if (error)
2bd0ea18
NS
1268 goto error0;
1269 if (i == 0 || flen == 0) {
1270 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
56b2de80 1271 trace_xfs_alloc_size_noentry(args);
2bd0ea18
NS
1272 return 0;
1273 }
1274 ASSERT(i == 1);
a2ceac1f
DC
1275 xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
1276 } else {
1277 /*
1278 * Search for a non-busy extent that is large enough.
1279 * If we are at low space, don't check, or if we fall of
1280 * the end of the btree, turn off the busy check and
1281 * restart.
1282 */
1283 for (;;) {
1284 error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1285 if (error)
1286 goto error0;
1287 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1288
1289 xfs_alloc_compute_aligned(args, fbno, flen,
1290 &rbno, &rlen);
1291
1292 if (rlen >= args->maxlen)
1293 break;
1294
1295 error = xfs_btree_increment(cnt_cur, 0, &i);
1296 if (error)
1297 goto error0;
1298 if (i == 0) {
1299 /*
1300 * Our only valid extents must have been busy.
1301 * Make it unbusy by forcing the log out and
1302 * retrying. If we've been here before, forcing
1303 * the log isn't making the extents available,
1304 * which means they have probably been freed in
1305 * this transaction. In that case, we have to
1306 * give up on them and we'll attempt a minlen
1307 * allocation the next time around.
1308 */
1309 xfs_btree_del_cursor(cnt_cur,
1310 XFS_BTREE_NOERROR);
1311 trace_xfs_alloc_size_busy(args);
1312 if (!forced++)
1313 xfs_log_force(args->mp, XFS_LOG_SYNC);
1314 goto restart;
1315 }
1316 }
2bd0ea18 1317 }
a2ceac1f 1318
2bd0ea18
NS
1319 /*
1320 * In the first case above, we got the last entry in the
1321 * by-size btree. Now we check to see if the space hits maxlen
1322 * once aligned; if not, we search left for something better.
1323 * This can't happen in the second case above.
1324 */
2bd0ea18 1325 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
5000d01d 1326 XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
2bd0ea18
NS
1327 (rlen <= flen && rbno + rlen <= fbno + flen), error0);
1328 if (rlen < args->maxlen) {
1329 xfs_agblock_t bestfbno;
1330 xfs_extlen_t bestflen;
1331 xfs_agblock_t bestrbno;
1332 xfs_extlen_t bestrlen;
1333
1334 bestrlen = rlen;
1335 bestrbno = rbno;
1336 bestflen = flen;
1337 bestfbno = fbno;
1338 for (;;) {
b194c7d8 1339 if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
2bd0ea18
NS
1340 goto error0;
1341 if (i == 0)
1342 break;
0e266570
NS
1343 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1344 &i)))
2bd0ea18
NS
1345 goto error0;
1346 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1347 if (flen < bestrlen)
1348 break;
a2ceac1f
DC
1349 xfs_alloc_compute_aligned(args, fbno, flen,
1350 &rbno, &rlen);
2bd0ea18
NS
1351 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1352 XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
1353 (rlen <= flen && rbno + rlen <= fbno + flen),
1354 error0);
1355 if (rlen > bestrlen) {
1356 bestrlen = rlen;
1357 bestrbno = rbno;
1358 bestflen = flen;
1359 bestfbno = fbno;
1360 if (rlen == args->maxlen)
1361 break;
1362 }
5000d01d 1363 }
0e266570
NS
1364 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1365 &i)))
2bd0ea18
NS
1366 goto error0;
1367 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1368 rlen = bestrlen;
1369 rbno = bestrbno;
1370 flen = bestflen;
1371 fbno = bestfbno;
1372 }
1373 args->wasfromfl = 0;
1374 /*
1375 * Fix up the length.
1376 */
1377 args->len = rlen;
a2ceac1f
DC
1378 if (rlen < args->minlen) {
1379 if (!forced++) {
1380 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1381 trace_xfs_alloc_size_busy(args);
1382 xfs_log_force(args->mp, XFS_LOG_SYNC);
1383 goto restart;
1384 }
1385 goto out_nominleft;
2bd0ea18 1386 }
a2ceac1f
DC
1387 xfs_alloc_fix_len(args);
1388
1389 if (!xfs_alloc_fix_minleft(args))
1390 goto out_nominleft;
2bd0ea18
NS
1391 rlen = args->len;
1392 XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0);
1393 /*
1394 * Allocate and initialize a cursor for the by-block tree.
1395 */
b194c7d8
BN
1396 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1397 args->agno, XFS_BTNUM_BNO);
0e266570
NS
1398 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1399 rbno, rlen, XFSA_FIXUP_CNT_OK)))
2bd0ea18
NS
1400 goto error0;
1401 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1402 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1403 cnt_cur = bno_cur = NULL;
1404 args->len = rlen;
1405 args->agbno = rbno;
1406 XFS_WANT_CORRUPTED_GOTO(
1407 args->agbno + args->len <=
6e3140c7 1408 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
2bd0ea18 1409 error0);
56b2de80 1410 trace_xfs_alloc_size_done(args);
2bd0ea18
NS
1411 return 0;
1412
1413error0:
56b2de80 1414 trace_xfs_alloc_size_error(args);
2bd0ea18
NS
1415 if (cnt_cur)
1416 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1417 if (bno_cur)
1418 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1419 return error;
a2ceac1f
DC
1420
1421out_nominleft:
1422 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1423 trace_xfs_alloc_size_nominleft(args);
1424 args->agbno = NULLAGBLOCK;
1425 return 0;
2bd0ea18
NS
1426}
1427
1428/*
1429 * Deal with the case where only small freespaces remain.
1430 * Either return the contents of the last freespace record,
1431 * or allocate space from the freelist if there is nothing in the tree.
1432 */
1433STATIC int /* error */
1434xfs_alloc_ag_vextent_small(
dfc130f3
RC
1435 xfs_alloc_arg_t *args, /* allocation argument structure */
1436 xfs_btree_cur_t *ccur, /* by-size cursor */
1437 xfs_agblock_t *fbnop, /* result block number */
1438 xfs_extlen_t *flenp, /* result length */
2bd0ea18
NS
1439 int *stat) /* status: 0-freelist, 1-normal/none */
1440{
1441 int error;
1442 xfs_agblock_t fbno;
1443 xfs_extlen_t flen;
2bd0ea18
NS
1444 int i;
1445
b194c7d8 1446 if ((error = xfs_btree_decrement(ccur, 0, &i)))
2bd0ea18
NS
1447 goto error0;
1448 if (i) {
0e266570 1449 if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
2bd0ea18
NS
1450 goto error0;
1451 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1452 }
1453 /*
1454 * Nothing in the btree, try the freelist. Make sure
1455 * to respect minleft even when pulling from the
1456 * freelist.
1457 */
1458 else if (args->minlen == 1 && args->alignment == 1 && !args->isfl &&
6e3140c7
NS
1459 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
1460 > args->minleft)) {
cdded3d8
DC
1461 error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
1462 if (error)
2bd0ea18
NS
1463 goto error0;
1464 if (fbno != NULLAGBLOCK) {
a2ceac1f
DC
1465 xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
1466 args->userdata);
1467
2bd0ea18
NS
1468 if (args->userdata) {
1469 xfs_buf_t *bp;
1470
1471 bp = xfs_btree_get_bufs(args->mp, args->tp,
1472 args->agno, fbno, 0);
1473 xfs_trans_binval(args->tp, bp);
2bd0ea18
NS
1474 }
1475 args->len = 1;
1476 args->agbno = fbno;
1477 XFS_WANT_CORRUPTED_GOTO(
1478 args->agbno + args->len <=
6e3140c7 1479 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
2bd0ea18
NS
1480 error0);
1481 args->wasfromfl = 1;
56b2de80 1482 trace_xfs_alloc_small_freelist(args);
2bd0ea18
NS
1483 *stat = 0;
1484 return 0;
1485 }
1486 /*
1487 * Nothing in the freelist.
1488 */
1489 else
1490 flen = 0;
1491 }
1492 /*
1493 * Can't allocate from the freelist for some reason.
1494 */
5e656dbb
BN
1495 else {
1496 fbno = NULLAGBLOCK;
2bd0ea18 1497 flen = 0;
5e656dbb 1498 }
2bd0ea18
NS
1499 /*
1500 * Can't do the allocation, give up.
1501 */
1502 if (flen < args->minlen) {
1503 args->agbno = NULLAGBLOCK;
56b2de80 1504 trace_xfs_alloc_small_notenough(args);
2bd0ea18
NS
1505 flen = 0;
1506 }
1507 *fbnop = fbno;
1508 *flenp = flen;
1509 *stat = 1;
56b2de80 1510 trace_xfs_alloc_small_done(args);
2bd0ea18
NS
1511 return 0;
1512
1513error0:
56b2de80 1514 trace_xfs_alloc_small_error(args);
2bd0ea18
NS
1515 return error;
1516}
1517
1518/*
1519 * Free the extent starting at agno/bno for length.
1520 */
1521STATIC int /* error */
1522xfs_free_ag_extent(
1523 xfs_trans_t *tp, /* transaction pointer */
dfc130f3 1524 xfs_buf_t *agbp, /* buffer for a.g. freelist header */
2bd0ea18
NS
1525 xfs_agnumber_t agno, /* allocation group number */
1526 xfs_agblock_t bno, /* starting block number */
1527 xfs_extlen_t len, /* length of extent */
1528 int isfl) /* set if is freelist blocks - no sb acctg */
1529{
dfc130f3
RC
1530 xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */
1531 xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */
2bd0ea18 1532 int error; /* error return value */
2bd0ea18
NS
1533 xfs_agblock_t gtbno; /* start of right neighbor block */
1534 xfs_extlen_t gtlen; /* length of right neighbor block */
1535 int haveleft; /* have a left neighbor block */
1536 int haveright; /* have a right neighbor block */
1537 int i; /* temp, result code */
1538 xfs_agblock_t ltbno; /* start of left neighbor block */
1539 xfs_extlen_t ltlen; /* length of left neighbor block */
1540 xfs_mount_t *mp; /* mount point struct for filesystem */
1541 xfs_agblock_t nbno; /* new starting block of freespace */
1542 xfs_extlen_t nlen; /* new length of freespace */
a2ceac1f 1543 xfs_perag_t *pag; /* per allocation group data */
2bd0ea18
NS
1544
1545 mp = tp->t_mountp;
5000d01d 1546 /*
2bd0ea18
NS
1547 * Allocate and initialize a cursor for the by-block btree.
1548 */
b194c7d8 1549 bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
2bd0ea18 1550 cnt_cur = NULL;
5000d01d 1551 /*
2bd0ea18
NS
1552 * Look for a neighboring block on the left (lower block numbers)
1553 * that is contiguous with this space.
1554 */
0e266570 1555 if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
2bd0ea18
NS
1556 goto error0;
1557 if (haveleft) {
1558 /*
1559 * There is a block to our left.
1560 */
0e266570 1561 if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
2bd0ea18
NS
1562 goto error0;
1563 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1564 /*
1565 * It's not contiguous, though.
1566 */
1567 if (ltbno + ltlen < bno)
1568 haveleft = 0;
1569 else {
1570 /*
1571 * If this failure happens the request to free this
1572 * space was invalid, it's (partly) already free.
1573 * Very bad.
1574 */
1575 XFS_WANT_CORRUPTED_GOTO(ltbno + ltlen <= bno, error0);
1576 }
1577 }
5000d01d 1578 /*
2bd0ea18
NS
1579 * Look for a neighboring block on the right (higher block numbers)
1580 * that is contiguous with this space.
1581 */
b194c7d8 1582 if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
2bd0ea18
NS
1583 goto error0;
1584 if (haveright) {
1585 /*
1586 * There is a block to our right.
1587 */
0e266570 1588 if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
2bd0ea18
NS
1589 goto error0;
1590 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1591 /*
1592 * It's not contiguous, though.
1593 */
1594 if (bno + len < gtbno)
1595 haveright = 0;
1596 else {
1597 /*
1598 * If this failure happens the request to free this
1599 * space was invalid, it's (partly) already free.
1600 * Very bad.
1601 */
1602 XFS_WANT_CORRUPTED_GOTO(gtbno >= bno + len, error0);
1603 }
1604 }
1605 /*
1606 * Now allocate and initialize a cursor for the by-size tree.
1607 */
b194c7d8 1608 cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
2bd0ea18
NS
1609 /*
1610 * Have both left and right contiguous neighbors.
1611 * Merge all three into a single free block.
1612 */
1613 if (haveleft && haveright) {
1614 /*
1615 * Delete the old by-size entry on the left.
1616 */
0e266570 1617 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
2bd0ea18
NS
1618 goto error0;
1619 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
b194c7d8 1620 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18
NS
1621 goto error0;
1622 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1623 /*
1624 * Delete the old by-size entry on the right.
1625 */
0e266570 1626 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2bd0ea18
NS
1627 goto error0;
1628 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
b194c7d8 1629 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18
NS
1630 goto error0;
1631 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1632 /*
1633 * Delete the old by-block entry for the right block.
1634 */
b194c7d8 1635 if ((error = xfs_btree_delete(bno_cur, &i)))
2bd0ea18
NS
1636 goto error0;
1637 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1638 /*
1639 * Move the by-block cursor back to the left neighbor.
1640 */
b194c7d8 1641 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
2bd0ea18
NS
1642 goto error0;
1643 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1644#ifdef DEBUG
1645 /*
1646 * Check that this is the right record: delete didn't
1647 * mangle the cursor.
1648 */
1649 {
1650 xfs_agblock_t xxbno;
1651 xfs_extlen_t xxlen;
1652
0e266570
NS
1653 if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
1654 &i)))
2bd0ea18
NS
1655 goto error0;
1656 XFS_WANT_CORRUPTED_GOTO(
1657 i == 1 && xxbno == ltbno && xxlen == ltlen,
1658 error0);
1659 }
1660#endif
1661 /*
1662 * Update remaining by-block entry to the new, joined block.
1663 */
1664 nbno = ltbno;
1665 nlen = len + ltlen + gtlen;
0e266570 1666 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2bd0ea18
NS
1667 goto error0;
1668 }
1669 /*
1670 * Have only a left contiguous neighbor.
1671 * Merge it together with the new freespace.
1672 */
1673 else if (haveleft) {
1674 /*
1675 * Delete the old by-size entry on the left.
1676 */
0e266570 1677 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
2bd0ea18
NS
1678 goto error0;
1679 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
b194c7d8 1680 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18
NS
1681 goto error0;
1682 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1683 /*
1684 * Back up the by-block cursor to the left neighbor, and
1685 * update its length.
1686 */
b194c7d8 1687 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
2bd0ea18
NS
1688 goto error0;
1689 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1690 nbno = ltbno;
1691 nlen = len + ltlen;
0e266570 1692 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2bd0ea18
NS
1693 goto error0;
1694 }
1695 /*
1696 * Have only a right contiguous neighbor.
1697 * Merge it together with the new freespace.
1698 */
1699 else if (haveright) {
1700 /*
1701 * Delete the old by-size entry on the right.
1702 */
0e266570 1703 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2bd0ea18
NS
1704 goto error0;
1705 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
b194c7d8 1706 if ((error = xfs_btree_delete(cnt_cur, &i)))
2bd0ea18
NS
1707 goto error0;
1708 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1709 /*
5000d01d 1710 * Update the starting block and length of the right
2bd0ea18
NS
1711 * neighbor in the by-block tree.
1712 */
1713 nbno = bno;
1714 nlen = len + gtlen;
0e266570 1715 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2bd0ea18
NS
1716 goto error0;
1717 }
1718 /*
1719 * No contiguous neighbors.
1720 * Insert the new freespace into the by-block tree.
1721 */
1722 else {
1723 nbno = bno;
1724 nlen = len;
b194c7d8 1725 if ((error = xfs_btree_insert(bno_cur, &i)))
2bd0ea18
NS
1726 goto error0;
1727 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1728 }
1729 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1730 bno_cur = NULL;
1731 /*
1732 * In all cases we need to insert the new freespace in the by-size tree.
1733 */
0e266570 1734 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
2bd0ea18
NS
1735 goto error0;
1736 XFS_WANT_CORRUPTED_GOTO(i == 0, error0);
b194c7d8 1737 if ((error = xfs_btree_insert(cnt_cur, &i)))
2bd0ea18
NS
1738 goto error0;
1739 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1740 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1741 cnt_cur = NULL;
a2ceac1f 1742
2bd0ea18
NS
1743 /*
1744 * Update the freespace totals in the ag and superblock.
1745 */
a2ceac1f
DC
1746 pag = xfs_perag_get(mp, agno);
1747 error = xfs_alloc_update_counters(tp, pag, agbp, len);
1748 xfs_perag_put(pag);
1749 if (error)
1750 goto error0;
1751
1752 if (!isfl)
1753 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
1754 XFS_STATS_INC(xs_freex);
1755 XFS_STATS_ADD(xs_freeb, len);
56b2de80
DC
1756
1757 trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
3e535bba 1758
2bd0ea18
NS
1759 return 0;
1760
1761 error0:
56b2de80 1762 trace_xfs_free_extent(mp, agno, bno, len, isfl, -1, -1);
2bd0ea18
NS
1763 if (bno_cur)
1764 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1765 if (cnt_cur)
1766 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1767 return error;
1768}
1769
5000d01d 1770/*
2bd0ea18
NS
1771 * Visible (exported) allocation/free functions.
1772 * Some of these are used just by xfs_alloc_btree.c and this file.
1773 */
1774
1775/*
1776 * Compute and fill in value of m_ag_maxlevels.
1777 */
1778void
1779xfs_alloc_compute_maxlevels(
1780 xfs_mount_t *mp) /* file system mount structure */
1781{
1782 int level;
1783 uint maxblocks;
1784 uint maxleafents;
1785 int minleafrecs;
1786 int minnoderecs;
1787
1788 maxleafents = (mp->m_sb.sb_agblocks + 1) / 2;
1789 minleafrecs = mp->m_alloc_mnr[0];
1790 minnoderecs = mp->m_alloc_mnr[1];
1791 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
1792 for (level = 1; maxblocks > 1; level++)
1793 maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
1794 mp->m_ag_maxlevels = level;
1795}
1796
56b2de80
DC
1797/*
1798 * Find the length of the longest extent in an AG.
1799 */
1800xfs_extlen_t
1801xfs_alloc_longest_free_extent(
1802 struct xfs_mount *mp,
1803 struct xfs_perag *pag)
1804{
1805 xfs_extlen_t need, delta = 0;
1806
1807 need = XFS_MIN_FREELIST_PAG(pag, mp);
1808 if (need > pag->pagf_flcount)
1809 delta = need - pag->pagf_flcount;
1810
1811 if (pag->pagf_longest > delta)
1812 return pag->pagf_longest - delta;
1813 return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
1814}
1815
2bd0ea18
NS
1816/*
1817 * Decide whether to use this allocation group for this allocation.
1818 * If so, fix up the btree freelist's size.
2bd0ea18 1819 */
ff105f75 1820int /* error */
2bd0ea18 1821xfs_alloc_fix_freelist(
dfc130f3 1822 xfs_alloc_arg_t *args, /* allocation argument structure */
2bd0ea18
NS
1823 int flags) /* XFS_ALLOC_FLAG_... */
1824{
1825 xfs_buf_t *agbp; /* agf buffer pointer */
1826 xfs_agf_t *agf; /* a.g. freespace structure pointer */
1827 xfs_buf_t *agflbp;/* agfl buffer pointer */
1828 xfs_agblock_t bno; /* freelist block */
1829 xfs_extlen_t delta; /* new blocks needed in freelist */
1830 int error; /* error result code */
1831 xfs_extlen_t longest;/* longest extent in allocation group */
1832 xfs_mount_t *mp; /* file system mount point structure */
1833 xfs_extlen_t need; /* total blocks needed in freelist */
1834 xfs_perag_t *pag; /* per-ag information structure */
dfc130f3 1835 xfs_alloc_arg_t targs; /* local allocation arguments */
2bd0ea18
NS
1836 xfs_trans_t *tp; /* transaction pointer */
1837
1838 mp = args->mp;
1839
1840 pag = args->pag;
1841 tp = args->tp;
1842 if (!pag->pagf_init) {
0e266570
NS
1843 if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
1844 &agbp)))
2bd0ea18
NS
1845 return error;
1846 if (!pag->pagf_init) {
5e656dbb
BN
1847 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
1848 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
2bd0ea18
NS
1849 args->agbp = NULL;
1850 return 0;
1851 }
1852 } else
1853 agbp = NULL;
34317449 1854
5e656dbb
BN
1855 /*
1856 * If this is a metadata preferred pag and we are user data
34317449
NS
1857 * then try somewhere else if we are not being asked to
1858 * try harder at this point
1859 */
5e656dbb
BN
1860 if (pag->pagf_metadata && args->userdata &&
1861 (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
1862 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
34317449
NS
1863 args->agbp = NULL;
1864 return 0;
1865 }
1866
5e656dbb 1867 if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
5e656dbb
BN
1868 /*
1869 * If it looks like there isn't a long enough extent, or enough
1870 * total blocks, reject it.
1871 */
56b2de80
DC
1872 need = XFS_MIN_FREELIST_PAG(pag, mp);
1873 longest = xfs_alloc_longest_free_extent(mp, pag);
5e656dbb
BN
1874 if ((args->minlen + args->alignment + args->minalignslop - 1) >
1875 longest ||
1876 ((int)(pag->pagf_freeblks + pag->pagf_flcount -
1877 need - args->total) < (int)args->minleft)) {
1878 if (agbp)
1879 xfs_trans_brelse(tp, agbp);
1880 args->agbp = NULL;
1881 return 0;
1882 }
2bd0ea18 1883 }
5e656dbb 1884
2bd0ea18
NS
1885 /*
1886 * Get the a.g. freespace buffer.
1887 * Can fail if we're not blocking on locks, and it's held.
1888 */
1889 if (agbp == NULL) {
0e266570
NS
1890 if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
1891 &agbp)))
2bd0ea18
NS
1892 return error;
1893 if (agbp == NULL) {
5e656dbb
BN
1894 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
1895 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
2bd0ea18
NS
1896 args->agbp = NULL;
1897 return 0;
1898 }
1899 }
1900 /*
1901 * Figure out how many blocks we should have in the freelist.
1902 */
1903 agf = XFS_BUF_TO_AGF(agbp);
1904 need = XFS_MIN_FREELIST(agf, mp);
2bd0ea18
NS
1905 /*
1906 * If there isn't enough total or single-extent, reject it.
1907 */
5e656dbb
BN
1908 if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
1909 delta = need > be32_to_cpu(agf->agf_flcount) ?
1910 (need - be32_to_cpu(agf->agf_flcount)) : 0;
1911 longest = be32_to_cpu(agf->agf_longest);
1912 longest = (longest > delta) ? (longest - delta) :
1913 (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0);
1914 if ((args->minlen + args->alignment + args->minalignslop - 1) >
1915 longest ||
1916 ((int)(be32_to_cpu(agf->agf_freeblks) +
1917 be32_to_cpu(agf->agf_flcount) - need - args->total) <
1918 (int)args->minleft)) {
1919 xfs_trans_brelse(tp, agbp);
1920 args->agbp = NULL;
1921 return 0;
1922 }
2bd0ea18
NS
1923 }
1924 /*
1925 * Make the freelist shorter if it's too long.
1926 */
6e3140c7 1927 while (be32_to_cpu(agf->agf_flcount) > need) {
2bd0ea18
NS
1928 xfs_buf_t *bp;
1929
5e656dbb
BN
1930 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
1931 if (error)
2bd0ea18 1932 return error;
0e266570 1933 if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1)))
2bd0ea18
NS
1934 return error;
1935 bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
1936 xfs_trans_binval(tp, bp);
2bd0ea18
NS
1937 }
1938 /*
1939 * Initialize the args structure.
1940 */
a2ceac1f 1941 memset(&targs, 0, sizeof(targs));
2bd0ea18
NS
1942 targs.tp = tp;
1943 targs.mp = mp;
1944 targs.agbp = agbp;
1945 targs.agno = args->agno;
2bd0ea18
NS
1946 targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
1947 targs.type = XFS_ALLOCTYPE_THIS_AG;
1948 targs.pag = pag;
0e266570 1949 if ((error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp)))
2bd0ea18
NS
1950 return error;
1951 /*
1952 * Make the freelist longer if it's too short.
1953 */
6e3140c7 1954 while (be32_to_cpu(agf->agf_flcount) < need) {
2bd0ea18 1955 targs.agbno = 0;
6e3140c7 1956 targs.maxlen = need - be32_to_cpu(agf->agf_flcount);
2bd0ea18
NS
1957 /*
1958 * Allocate as many blocks as possible at once.
1959 */
cb4deb22
NS
1960 if ((error = xfs_alloc_ag_vextent(&targs))) {
1961 xfs_trans_brelse(tp, agflbp);
2bd0ea18 1962 return error;
cb4deb22 1963 }
2bd0ea18 1964 /*
dfc130f3
RC
1965 * Stop if we run out. Won't happen if callers are obeying
1966 * the restrictions correctly. Can happen for free calls
2bd0ea18
NS
1967 * on a completely full ag.
1968 */
5e656dbb
BN
1969 if (targs.agbno == NULLAGBLOCK) {
1970 if (flags & XFS_ALLOC_FLAG_FREEING)
1971 break;
1972 xfs_trans_brelse(tp, agflbp);
1973 args->agbp = NULL;
1974 return 0;
1975 }
2bd0ea18
NS
1976 /*
1977 * Put each allocated block on the list.
1978 */
1979 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
5e656dbb
BN
1980 error = xfs_alloc_put_freelist(tp, agbp,
1981 agflbp, bno, 0);
1982 if (error)
2bd0ea18
NS
1983 return error;
1984 }
1985 }
cb4deb22 1986 xfs_trans_brelse(tp, agflbp);
2bd0ea18
NS
1987 args->agbp = agbp;
1988 return 0;
1989}
1990
1991/*
1992 * Get a block from the freelist.
1993 * Returns with the buffer for the block gotten.
1994 */
1995int /* error */
1996xfs_alloc_get_freelist(
1997 xfs_trans_t *tp, /* transaction pointer */
1998 xfs_buf_t *agbp, /* buffer containing the agf structure */
cdded3d8
DC
1999 xfs_agblock_t *bnop, /* block address retrieved from freelist */
2000 int btreeblk) /* destination is a AGF btree */
2bd0ea18
NS
2001{
2002 xfs_agf_t *agf; /* a.g. freespace structure */
2bd0ea18
NS
2003 xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */
2004 xfs_agblock_t bno; /* block number returned */
dd5b876e 2005 __be32 *agfl_bno;
2bd0ea18 2006 int error;
cdded3d8 2007 int logflags;
dd5b876e 2008 xfs_mount_t *mp = tp->t_mountp;
2bd0ea18
NS
2009 xfs_perag_t *pag; /* per allocation group data */
2010
2bd0ea18
NS
2011 /*
2012 * Freelist is empty, give up.
2013 */
dd5b876e 2014 agf = XFS_BUF_TO_AGF(agbp);
46eca962 2015 if (!agf->agf_flcount) {
2bd0ea18
NS
2016 *bnop = NULLAGBLOCK;
2017 return 0;
2018 }
2019 /*
2020 * Read the array of free blocks.
2021 */
dd5b876e
DC
2022 error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
2023 &agflbp);
2024 if (error)
2bd0ea18 2025 return error;
dd5b876e
DC
2026
2027
2bd0ea18
NS
2028 /*
2029 * Get the block number and update the data structures.
2030 */
dd5b876e
DC
2031 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2032 bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
5e656dbb 2033 be32_add_cpu(&agf->agf_flfirst, 1);
2bd0ea18 2034 xfs_trans_brelse(tp, agflbp);
6e3140c7 2035 if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
46eca962 2036 agf->agf_flfirst = 0;
56b2de80
DC
2037
2038 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
5e656dbb 2039 be32_add_cpu(&agf->agf_flcount, -1);
2bd0ea18
NS
2040 xfs_trans_agflist_delta(tp, -1);
2041 pag->pagf_flcount--;
56b2de80 2042 xfs_perag_put(pag);
cdded3d8
DC
2043
2044 logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2045 if (btreeblk) {
5e656dbb 2046 be32_add_cpu(&agf->agf_btreeblks, 1);
cdded3d8
DC
2047 pag->pagf_btreeblks++;
2048 logflags |= XFS_AGF_BTREEBLKS;
2049 }
2050
cdded3d8 2051 xfs_alloc_log_agf(tp, agbp, logflags);
2bd0ea18 2052 *bnop = bno;
3e535bba 2053
2bd0ea18
NS
2054 return 0;
2055}
2056
2057/*
2058 * Log the given fields from the agf structure.
2059 */
2060void
2061xfs_alloc_log_agf(
2062 xfs_trans_t *tp, /* transaction pointer */
2063 xfs_buf_t *bp, /* buffer for a.g. freelist header */
dfc130f3 2064 int fields) /* mask of fields to be logged (XFS_AGF_...) */
2bd0ea18
NS
2065{
2066 int first; /* first byte offset */
2067 int last; /* last byte offset */
2068 static const short offsets[] = {
2069 offsetof(xfs_agf_t, agf_magicnum),
2070 offsetof(xfs_agf_t, agf_versionnum),
2071 offsetof(xfs_agf_t, agf_seqno),
2072 offsetof(xfs_agf_t, agf_length),
2073 offsetof(xfs_agf_t, agf_roots[0]),
2074 offsetof(xfs_agf_t, agf_levels[0]),
2075 offsetof(xfs_agf_t, agf_flfirst),
2076 offsetof(xfs_agf_t, agf_fllast),
2077 offsetof(xfs_agf_t, agf_flcount),
2078 offsetof(xfs_agf_t, agf_freeblks),
2079 offsetof(xfs_agf_t, agf_longest),
cdded3d8 2080 offsetof(xfs_agf_t, agf_btreeblks),
dd5b876e 2081 offsetof(xfs_agf_t, agf_uuid),
2bd0ea18
NS
2082 sizeof(xfs_agf_t)
2083 };
2084
56b2de80
DC
2085 trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
2086
bdc16ee5 2087 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
dd5b876e 2088
2bd0ea18
NS
2089 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2090 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2091}
2092
2093/*
2094 * Interface for inode allocation to force the pag data to be initialized.
2095 */
2096int /* error */
2097xfs_alloc_pagf_init(
2098 xfs_mount_t *mp, /* file system mount structure */
2099 xfs_trans_t *tp, /* transaction pointer */
2100 xfs_agnumber_t agno, /* allocation group number */
2101 int flags) /* XFS_ALLOC_FLAGS_... */
2102{
7a3bffe4 2103 xfs_buf_t *bp;
2bd0ea18
NS
2104 int error;
2105
0e266570 2106 if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)))
2bd0ea18
NS
2107 return error;
2108 if (bp)
2109 xfs_trans_brelse(tp, bp);
2110 return 0;
2111}
2112
2113/*
2114 * Put the block on the freelist for the allocation group.
2115 */
2116int /* error */
2117xfs_alloc_put_freelist(
2118 xfs_trans_t *tp, /* transaction pointer */
2119 xfs_buf_t *agbp, /* buffer for a.g. freelist header */
2120 xfs_buf_t *agflbp,/* buffer for a.g. free block array */
cdded3d8
DC
2121 xfs_agblock_t bno, /* block being freed */
2122 int btreeblk) /* block came from a AGF btree */
2bd0ea18
NS
2123{
2124 xfs_agf_t *agf; /* a.g. freespace structure */
5e656dbb 2125 __be32 *blockp;/* pointer to array entry */
2bd0ea18 2126 int error;
cdded3d8 2127 int logflags;
2bd0ea18
NS
2128 xfs_mount_t *mp; /* mount structure */
2129 xfs_perag_t *pag; /* per allocation group data */
dd5b876e
DC
2130 __be32 *agfl_bno;
2131 int startoff;
2bd0ea18
NS
2132
2133 agf = XFS_BUF_TO_AGF(agbp);
2134 mp = tp->t_mountp;
2135
2136 if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
6e3140c7 2137 be32_to_cpu(agf->agf_seqno), &agflbp)))
2bd0ea18 2138 return error;
5e656dbb 2139 be32_add_cpu(&agf->agf_fllast, 1);
6e3140c7 2140 if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
46eca962 2141 agf->agf_fllast = 0;
56b2de80
DC
2142
2143 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
5e656dbb 2144 be32_add_cpu(&agf->agf_flcount, 1);
2bd0ea18
NS
2145 xfs_trans_agflist_delta(tp, 1);
2146 pag->pagf_flcount++;
cdded3d8
DC
2147
2148 logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2149 if (btreeblk) {
5e656dbb 2150 be32_add_cpu(&agf->agf_btreeblks, -1);
cdded3d8
DC
2151 pag->pagf_btreeblks--;
2152 logflags |= XFS_AGF_BTREEBLKS;
2153 }
56b2de80 2154 xfs_perag_put(pag);
cdded3d8 2155
5e656dbb
BN
2156 xfs_alloc_log_agf(tp, agbp, logflags);
2157
6e3140c7 2158 ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
dd5b876e
DC
2159
2160 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2161 blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
5e656dbb 2162 *blockp = cpu_to_be32(bno);
dd5b876e
DC
2163 startoff = (char *)blockp - (char *)agflbp->b_addr;
2164
cdded3d8 2165 xfs_alloc_log_agf(tp, agbp, logflags);
dd5b876e 2166
bdc16ee5 2167 xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
dd5b876e
DC
2168 xfs_trans_log_buf(tp, agflbp, startoff,
2169 startoff + sizeof(xfs_agblock_t) - 1);
2bd0ea18
NS
2170 return 0;
2171}
2172
dd5b876e 2173static bool
a2ceac1f 2174xfs_agf_verify(
dd5b876e 2175 struct xfs_mount *mp,
a2ceac1f
DC
2176 struct xfs_buf *bp)
2177 {
dd5b876e 2178 struct xfs_agf *agf = XFS_BUF_TO_AGF(bp);
a2ceac1f 2179
dd5b876e 2180 if (xfs_sb_version_hascrc(&mp->m_sb) &&
84a62eea 2181 !uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_uuid))
dd5b876e 2182 return false;
a2ceac1f 2183
dd5b876e
DC
2184 if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
2185 XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
2186 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
2187 be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
2188 be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
2189 be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)))
2190 return false;
a2ceac1f
DC
2191
2192 /*
2193 * during growfs operations, the perag is not fully initialised,
2194 * so we can't use it for any useful checking. growfs ensures we can't
2195 * use it by using uncached buffers that don't have the perag attached
2196 * so we can detect and avoid this problem.
2197 */
dd5b876e
DC
2198 if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
2199 return false;
a2ceac1f 2200
dd5b876e
DC
2201 if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
2202 be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
2203 return false;
2204
2205 return true;;
a2ceac1f 2206
a2ceac1f
DC
2207}
2208
2209static void
2210xfs_agf_read_verify(
2211 struct xfs_buf *bp)
2212{
dd5b876e 2213 struct xfs_mount *mp = bp->b_target->bt_mount;
dd5b876e 2214
45922933
DC
2215 if (xfs_sb_version_hascrc(&mp->m_sb) &&
2216 !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
12b53197 2217 xfs_buf_ioerror(bp, -EFSBADCRC);
45922933
DC
2218 else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp,
2219 XFS_ERRTAG_ALLOC_READ_AGF,
2220 XFS_RANDOM_ALLOC_READ_AGF))
12b53197 2221 xfs_buf_ioerror(bp, -EFSCORRUPTED);
45922933
DC
2222
2223 if (bp->b_error)
2224 xfs_verifier_error(bp);
a2ceac1f
DC
2225}
2226
2227static void
2228xfs_agf_write_verify(
2229 struct xfs_buf *bp)
2230{
dd5b876e
DC
2231 struct xfs_mount *mp = bp->b_target->bt_mount;
2232 struct xfs_buf_log_item *bip = bp->b_fspriv;
2233
2234 if (!xfs_agf_verify(mp, bp)) {
12b53197 2235 xfs_buf_ioerror(bp, -EFSCORRUPTED);
45922933 2236 xfs_verifier_error(bp);
dd5b876e
DC
2237 return;
2238 }
2239
2240 if (!xfs_sb_version_hascrc(&mp->m_sb))
2241 return;
2242
2243 if (bip)
2244 XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
2245
43b5aeed 2246 xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
a2ceac1f
DC
2247}
2248
2249const struct xfs_buf_ops xfs_agf_buf_ops = {
2250 .verify_read = xfs_agf_read_verify,
2251 .verify_write = xfs_agf_write_verify,
2252};
2253
2bd0ea18
NS
2254/*
2255 * Read in the allocation group header (free/alloc section).
2256 */
2257int /* error */
56b2de80
DC
2258xfs_read_agf(
2259 struct xfs_mount *mp, /* mount point structure */
2260 struct xfs_trans *tp, /* transaction pointer */
2261 xfs_agnumber_t agno, /* allocation group number */
2262 int flags, /* XFS_BUF_ */
2263 struct xfs_buf **bpp) /* buffer for the ag freelist header */
2bd0ea18 2264{
9440d84d 2265 int error;
2bd0ea18 2266
ff105f75
DC
2267 trace_xfs_read_agf(mp, agno);
2268
2bd0ea18 2269 ASSERT(agno != NULLAGNUMBER);
9440d84d
NS
2270 error = xfs_trans_read_buf(
2271 mp, tp, mp->m_ddev_targp,
2272 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
a2ceac1f 2273 XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
9440d84d 2274 if (error)
2bd0ea18 2275 return error;
56b2de80 2276 if (!*bpp)
2bd0ea18 2277 return 0;
56b2de80 2278
a2ceac1f
DC
2279 ASSERT(!(*bpp)->b_error);
2280 xfs_buf_set_ref(*bpp, XFS_AGF_REF);
56b2de80
DC
2281 return 0;
2282}
2283
2284/*
2285 * Read in the allocation group header (free/alloc section).
2286 */
2287int /* error */
2288xfs_alloc_read_agf(
2289 struct xfs_mount *mp, /* mount point structure */
2290 struct xfs_trans *tp, /* transaction pointer */
2291 xfs_agnumber_t agno, /* allocation group number */
2292 int flags, /* XFS_ALLOC_FLAG_... */
2293 struct xfs_buf **bpp) /* buffer for the ag freelist header */
2294{
2295 struct xfs_agf *agf; /* ag freelist header */
2296 struct xfs_perag *pag; /* per allocation group data */
2297 int error;
2298
ff105f75 2299 trace_xfs_alloc_read_agf(mp, agno);
56b2de80 2300
ff105f75 2301 ASSERT(agno != NULLAGNUMBER);
56b2de80
DC
2302 error = xfs_read_agf(mp, tp, agno,
2303 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
2304 bpp);
2305 if (error)
2306 return error;
2307 if (!*bpp)
2308 return 0;
a2ceac1f 2309 ASSERT(!(*bpp)->b_error);
56b2de80
DC
2310
2311 agf = XFS_BUF_TO_AGF(*bpp);
2312 pag = xfs_perag_get(mp, agno);
2bd0ea18 2313 if (!pag->pagf_init) {
6e3140c7 2314 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
cdded3d8 2315 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
6e3140c7
NS
2316 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
2317 pag->pagf_longest = be32_to_cpu(agf->agf_longest);
2bd0ea18 2318 pag->pagf_levels[XFS_BTNUM_BNOi] =
6e3140c7 2319 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
2bd0ea18 2320 pag->pagf_levels[XFS_BTNUM_CNTi] =
6e3140c7 2321 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
5e656dbb 2322 spin_lock_init(&pag->pagb_lock);
56b2de80 2323 pag->pagb_count = 0;
ff105f75
DC
2324 /* XXX: pagb_tree doesn't exist in userspace */
2325 //pag->pagb_tree = RB_ROOT;
2bd0ea18
NS
2326 pag->pagf_init = 1;
2327 }
2328#ifdef DEBUG
2329 else if (!XFS_FORCED_SHUTDOWN(mp)) {
6e3140c7 2330 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
cdded3d8 2331 ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
6e3140c7
NS
2332 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
2333 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
2bd0ea18 2334 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
6e3140c7 2335 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
2bd0ea18 2336 ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
6e3140c7 2337 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
2bd0ea18
NS
2338 }
2339#endif
56b2de80 2340 xfs_perag_put(pag);
2bd0ea18
NS
2341 return 0;
2342}
2343
2344/*
2345 * Allocate an extent (variable-size).
2346 * Depending on the allocation type, we either look in a single allocation
2347 * group or loop over the allocation groups to find the result.
2348 */
2349int /* error */
2350xfs_alloc_vextent(
dfc130f3 2351 xfs_alloc_arg_t *args) /* allocation argument structure */
2bd0ea18 2352{
dfc130f3 2353 xfs_agblock_t agsize; /* allocation group size */
2bd0ea18
NS
2354 int error;
2355 int flags; /* XFS_ALLOC_FLAG_... locking flags */
2bd0ea18
NS
2356 xfs_extlen_t minleft;/* minimum left value, temp copy */
2357 xfs_mount_t *mp; /* mount structure pointer */
2358 xfs_agnumber_t sagno; /* starting allocation group number */
dfc130f3 2359 xfs_alloctype_t type; /* input allocation type */
34317449 2360 int bump_rotor = 0;
9baa549b 2361 int no_min = 0;
46eca962 2362 xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */
2bd0ea18
NS
2363
2364 mp = args->mp;
2365 type = args->otype = args->type;
2366 args->agbno = NULLAGBLOCK;
2367 /*
2368 * Just fix this up, for the case where the last a.g. is shorter
2369 * (or there's only one a.g.) and the caller couldn't easily figure
2370 * that out (xfs_bmap_alloc).
2371 */
2372 agsize = mp->m_sb.sb_agblocks;
2373 if (args->maxlen > agsize)
2374 args->maxlen = agsize;
2375 if (args->alignment == 0)
2376 args->alignment = 1;
2377 ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
2378 ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
2379 ASSERT(args->minlen <= args->maxlen);
2380 ASSERT(args->minlen <= agsize);
2381 ASSERT(args->mod < args->prod);
2382 if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
2383 XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
2384 args->minlen > args->maxlen || args->minlen > agsize ||
2385 args->mod >= args->prod) {
2386 args->fsbno = NULLFSBLOCK;
56b2de80 2387 trace_xfs_alloc_vextent_badargs(args);
2bd0ea18
NS
2388 return 0;
2389 }
9baa549b
SL
2390 minleft = args->minleft;
2391
2bd0ea18
NS
2392 switch (type) {
2393 case XFS_ALLOCTYPE_THIS_AG:
2394 case XFS_ALLOCTYPE_NEAR_BNO:
2395 case XFS_ALLOCTYPE_THIS_BNO:
2396 /*
2397 * These three force us into a single a.g.
2398 */
2399 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
56b2de80 2400 args->pag = xfs_perag_get(mp, args->agno);
2bd0ea18
NS
2401 args->minleft = 0;
2402 error = xfs_alloc_fix_freelist(args, 0);
2403 args->minleft = minleft;
2404 if (error) {
56b2de80 2405 trace_xfs_alloc_vextent_nofix(args);
2bd0ea18
NS
2406 goto error0;
2407 }
2408 if (!args->agbp) {
56b2de80 2409 trace_xfs_alloc_vextent_noagbp(args);
2bd0ea18
NS
2410 break;
2411 }
2412 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
0e266570 2413 if ((error = xfs_alloc_ag_vextent(args)))
2bd0ea18 2414 goto error0;
2bd0ea18
NS
2415 break;
2416 case XFS_ALLOCTYPE_START_BNO:
2417 /*
2418 * Try near allocation first, then anywhere-in-ag after
2419 * the first a.g. fails.
2420 */
34317449
NS
2421 if ((args->userdata == XFS_ALLOC_INITIAL_USER_DATA) &&
2422 (mp->m_flags & XFS_MOUNT_32BITINODES)) {
46eca962
NS
2423 args->fsbno = XFS_AGB_TO_FSB(mp,
2424 ((mp->m_agfrotor / rotorstep) %
2425 mp->m_sb.sb_agcount), 0);
34317449
NS
2426 bump_rotor = 1;
2427 }
2bd0ea18
NS
2428 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2429 args->type = XFS_ALLOCTYPE_NEAR_BNO;
2430 /* FALLTHROUGH */
2431 case XFS_ALLOCTYPE_ANY_AG:
2432 case XFS_ALLOCTYPE_START_AG:
2433 case XFS_ALLOCTYPE_FIRST_AG:
2434 /*
2435 * Rotate through the allocation groups looking for a winner.
2436 */
2437 if (type == XFS_ALLOCTYPE_ANY_AG) {
2438 /*
2439 * Start with the last place we left off.
2440 */
46eca962
NS
2441 args->agno = sagno = (mp->m_agfrotor / rotorstep) %
2442 mp->m_sb.sb_agcount;
2bd0ea18
NS
2443 args->type = XFS_ALLOCTYPE_THIS_AG;
2444 flags = XFS_ALLOC_FLAG_TRYLOCK;
2445 } else if (type == XFS_ALLOCTYPE_FIRST_AG) {
2446 /*
2447 * Start with allocation group given by bno.
2448 */
2449 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2450 args->type = XFS_ALLOCTYPE_THIS_AG;
2451 sagno = 0;
2452 flags = 0;
2453 } else {
2454 if (type == XFS_ALLOCTYPE_START_AG)
2455 args->type = XFS_ALLOCTYPE_THIS_AG;
2456 /*
2457 * Start with the given allocation group.
2458 */
2459 args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2460 flags = XFS_ALLOC_FLAG_TRYLOCK;
2461 }
2462 /*
2463 * Loop over allocation groups twice; first time with
2464 * trylock set, second time without.
2465 */
2466 for (;;) {
56b2de80 2467 args->pag = xfs_perag_get(mp, args->agno);
9baa549b
SL
2468 if (no_min) args->minleft = 0;
2469 error = xfs_alloc_fix_freelist(args, flags);
2470 args->minleft = minleft;
2471 if (error) {
56b2de80 2472 trace_xfs_alloc_vextent_nofix(args);
2bd0ea18
NS
2473 goto error0;
2474 }
2475 /*
2476 * If we get a buffer back then the allocation will fly.
2477 */
2478 if (args->agbp) {
0e266570 2479 if ((error = xfs_alloc_ag_vextent(args)))
2bd0ea18 2480 goto error0;
2bd0ea18
NS
2481 break;
2482 }
56b2de80
DC
2483
2484 trace_xfs_alloc_vextent_loopfailed(args);
2485
2bd0ea18
NS
2486 /*
2487 * Didn't work, figure out the next iteration.
2488 */
2489 if (args->agno == sagno &&
2490 type == XFS_ALLOCTYPE_START_BNO)
2491 args->type = XFS_ALLOCTYPE_THIS_AG;
5e656dbb
BN
2492 /*
2493 * For the first allocation, we can try any AG to get
2494 * space. However, if we already have allocated a
2495 * block, we don't want to try AGs whose number is below
2496 * sagno. Otherwise, we may end up with out-of-order
2497 * locking of AGF, which might cause deadlock.
2498 */
2499 if (++(args->agno) == mp->m_sb.sb_agcount) {
2500 if (args->firstblock != NULLFSBLOCK)
2501 args->agno = sagno;
2502 else
2503 args->agno = 0;
2504 }
5000d01d 2505 /*
2bd0ea18
NS
2506 * Reached the starting a.g., must either be done
2507 * or switch to non-trylock mode.
2508 */
2509 if (args->agno == sagno) {
9baa549b 2510 if (no_min == 1) {
2bd0ea18 2511 args->agbno = NULLAGBLOCK;
56b2de80 2512 trace_xfs_alloc_vextent_allfailed(args);
2bd0ea18
NS
2513 break;
2514 }
9baa549b
SL
2515 if (flags == 0) {
2516 no_min = 1;
2517 } else {
2518 flags = 0;
2519 if (type == XFS_ALLOCTYPE_START_BNO) {
2520 args->agbno = XFS_FSB_TO_AGBNO(mp,
2521 args->fsbno);
2522 args->type = XFS_ALLOCTYPE_NEAR_BNO;
2523 }
2bd0ea18
NS
2524 }
2525 }
56b2de80 2526 xfs_perag_put(args->pag);
2bd0ea18 2527 }
46eca962
NS
2528 if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
2529 if (args->agno == sagno)
2530 mp->m_agfrotor = (mp->m_agfrotor + 1) %
2531 (mp->m_sb.sb_agcount * rotorstep);
2532 else
2533 mp->m_agfrotor = (args->agno * rotorstep + 1) %
2534 (mp->m_sb.sb_agcount * rotorstep);
2535 }
2bd0ea18
NS
2536 break;
2537 default:
2538 ASSERT(0);
2539 /* NOTREACHED */
2540 }
2541 if (args->agbno == NULLAGBLOCK)
2542 args->fsbno = NULLFSBLOCK;
2543 else {
2544 args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
2545#ifdef DEBUG
2546 ASSERT(args->len >= args->minlen);
2547 ASSERT(args->len <= args->maxlen);
2548 ASSERT(args->agbno % args->alignment == 0);
2549 XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
2550 args->len);
2551#endif
2552 }
56b2de80 2553 xfs_perag_put(args->pag);
2bd0ea18
NS
2554 return 0;
2555error0:
56b2de80 2556 xfs_perag_put(args->pag);
2bd0ea18
NS
2557 return error;
2558}
2559
2560/*
2561 * Free an extent.
2562 * Just break up the extent address and hand off to xfs_free_ag_extent
2563 * after fixing up the freelist.
2564 */
2565int /* error */
2566xfs_free_extent(
2567 xfs_trans_t *tp, /* transaction pointer */
2568 xfs_fsblock_t bno, /* starting block number of extent */
2569 xfs_extlen_t len) /* length of extent */
2570{
5e656dbb 2571 xfs_alloc_arg_t args;
2bd0ea18
NS
2572 int error;
2573
2574 ASSERT(len != 0);
5e656dbb 2575 memset(&args, 0, sizeof(xfs_alloc_arg_t));
2bd0ea18
NS
2576 args.tp = tp;
2577 args.mp = tp->t_mountp;
a2ceac1f
DC
2578
2579 /*
2580 * validate that the block number is legal - the enables us to detect
2581 * and handle a silent filesystem corruption rather than crashing.
2582 */
2bd0ea18 2583 args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
a2ceac1f 2584 if (args.agno >= args.mp->m_sb.sb_agcount)
12b53197 2585 return -EFSCORRUPTED;
a2ceac1f 2586
2bd0ea18 2587 args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
a2ceac1f 2588 if (args.agbno >= args.mp->m_sb.sb_agblocks)
12b53197 2589 return -EFSCORRUPTED;
a2ceac1f 2590
56b2de80 2591 args.pag = xfs_perag_get(args.mp, args.agno);
a2ceac1f
DC
2592 ASSERT(args.pag);
2593
2594 error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
2595 if (error)
2bd0ea18 2596 goto error0;
a2ceac1f
DC
2597
2598 /* validate the extent size is legal now we have the agf locked */
2599 if (args.agbno + len >
2600 be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) {
12b53197 2601 error = -EFSCORRUPTED;
a2ceac1f
DC
2602 goto error0;
2603 }
2604
5e656dbb 2605 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
a2ceac1f
DC
2606 if (!error)
2607 xfs_extent_busy_insert(tp, args.agno, args.agbno, len, 0);
2bd0ea18 2608error0:
56b2de80 2609 xfs_perag_put(args.pag);
2bd0ea18
NS
2610 return error;
2611}