]> git.ipfire.org Git - people/ms/linux.git/blame - fs/xfs/libxfs/xfs_alloc.c
xfs: get rid of the log item descriptor
[people/ms/linux.git] / fs / xfs / libxfs / xfs_alloc.c
CommitLineData
1da177e4 1/*
7b718769
NS
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
1da177e4 4 *
7b718769
NS
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
1da177e4
LT
7 * published by the Free Software Foundation.
8 *
7b718769
NS
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
1da177e4 13 *
7b718769
NS
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1da177e4 17 */
1da177e4 18#include "xfs.h"
a844f451 19#include "xfs_fs.h"
70a9883c 20#include "xfs_format.h"
239880ef 21#include "xfs_log_format.h"
70a9883c 22#include "xfs_shared.h"
239880ef 23#include "xfs_trans_resv.h"
a844f451 24#include "xfs_bit.h"
1da177e4 25#include "xfs_sb.h"
1da177e4 26#include "xfs_mount.h"
3ab78df2 27#include "xfs_defer.h"
a844f451 28#include "xfs_inode.h"
1da177e4 29#include "xfs_btree.h"
673930c3 30#include "xfs_rmap.h"
a4fbe6ab 31#include "xfs_alloc_btree.h"
1da177e4 32#include "xfs_alloc.h"
efc27b52 33#include "xfs_extent_busy.h"
e9e899a2 34#include "xfs_errortag.h"
1da177e4 35#include "xfs_error.h"
4e0e6040 36#include "xfs_cksum.h"
0b1b213f 37#include "xfs_trace.h"
239880ef 38#include "xfs_trans.h"
4e0e6040 39#include "xfs_buf_item.h"
239880ef 40#include "xfs_log.h"
3fd129b6 41#include "xfs_ag_resv.h"
f8f2835a
BF
42#include "xfs_bmap.h"
43
44extern kmem_zone_t *xfs_bmap_free_item_zone;
1da177e4 45
c999a223 46struct workqueue_struct *xfs_alloc_wq;
1da177e4
LT
47
48#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
49
50#define XFSA_FIXUP_BNO_OK 1
51#define XFSA_FIXUP_CNT_OK 2
52
1da177e4
LT
53STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
54STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
55STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
56STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
e26f0501 57 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
1da177e4 58
a78ee256
DC
59/*
60 * Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in
61 * the beginning of the block for a proper header with the location information
62 * and CRC.
63 */
64unsigned int
65xfs_agfl_size(
66 struct xfs_mount *mp)
67{
68 unsigned int size = mp->m_sb.sb_sectsize;
69
70 if (xfs_sb_version_hascrc(&mp->m_sb))
71 size -= sizeof(struct xfs_agfl);
72
73 return size / sizeof(xfs_agblock_t);
74}
75
af30dfa1
DW
76unsigned int
77xfs_refc_block(
78 struct xfs_mount *mp)
79{
80 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
81 return XFS_RMAP_BLOCK(mp) + 1;
82 if (xfs_sb_version_hasfinobt(&mp->m_sb))
83 return XFS_FIBT_BLOCK(mp) + 1;
84 return XFS_IBT_BLOCK(mp) + 1;
85}
86
8018026e
DW
87xfs_extlen_t
88xfs_prealloc_blocks(
89 struct xfs_mount *mp)
90{
af30dfa1
DW
91 if (xfs_sb_version_hasreflink(&mp->m_sb))
92 return xfs_refc_block(mp) + 1;
8018026e
DW
93 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
94 return XFS_RMAP_BLOCK(mp) + 1;
95 if (xfs_sb_version_hasfinobt(&mp->m_sb))
96 return XFS_FIBT_BLOCK(mp) + 1;
97 return XFS_IBT_BLOCK(mp) + 1;
98}
99
52548852
DW
100/*
101 * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
102 * AGF buffer (PV 947395), we place constraints on the relationship among
103 * actual allocations for data blocks, freelist blocks, and potential file data
104 * bmap btree blocks. However, these restrictions may result in no actual space
105 * allocated for a delayed extent, for example, a data block in a certain AG is
106 * allocated but there is no additional block for the additional bmap btree
107 * block due to a split of the bmap btree of the file. The result of this may
108 * lead to an infinite loop when the file gets flushed to disk and all delayed
109 * extents need to be actually allocated. To get around this, we explicitly set
110 * aside a few blocks which will not be reserved in delayed allocation.
111 *
3fd129b6
DW
112 * We need to reserve 4 fsbs _per AG_ for the freelist and 4 more to handle a
113 * potential split of the file's bmap btree.
52548852
DW
114 */
115unsigned int
116xfs_alloc_set_aside(
117 struct xfs_mount *mp)
118{
5149fd32 119 return mp->m_sb.sb_agcount * (XFS_ALLOC_AGFL_RESERVE + 4);
52548852
DW
120}
121
122/*
123 * When deciding how much space to allocate out of an AG, we limit the
124 * allocation maximum size to the size the AG. However, we cannot use all the
125 * blocks in the AG - some are permanently used by metadata. These
126 * blocks are generally:
127 * - the AG superblock, AGF, AGI and AGFL
128 * - the AGF (bno and cnt) and AGI btree root blocks, and optionally
129 * the AGI free inode and rmap btree root blocks.
130 * - blocks on the AGFL according to xfs_alloc_set_aside() limits
131 * - the rmapbt root block
132 *
133 * The AG headers are sector sized, so the amount of space they take up is
134 * dependent on filesystem geometry. The others are all single blocks.
135 */
136unsigned int
137xfs_alloc_ag_max_usable(
138 struct xfs_mount *mp)
139{
140 unsigned int blocks;
141
142 blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
143 blocks += XFS_ALLOC_AGFL_RESERVE;
144 blocks += 3; /* AGF, AGI btree root blocks */
145 if (xfs_sb_version_hasfinobt(&mp->m_sb))
146 blocks++; /* finobt root block */
147 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
148 blocks++; /* rmap root block */
d0e853f3
DW
149 if (xfs_sb_version_hasreflink(&mp->m_sb))
150 blocks++; /* refcount root block */
52548852
DW
151
152 return mp->m_sb.sb_agblocks - blocks;
153}
154
fe033cc8
CH
155/*
156 * Lookup the record equal to [bno, len] in the btree given by cur.
157 */
158STATIC int /* error */
159xfs_alloc_lookup_eq(
160 struct xfs_btree_cur *cur, /* btree cursor */
161 xfs_agblock_t bno, /* starting block of extent */
162 xfs_extlen_t len, /* length of extent */
163 int *stat) /* success/failure */
164{
165 cur->bc_rec.a.ar_startblock = bno;
166 cur->bc_rec.a.ar_blockcount = len;
167 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
168}
169
170/*
171 * Lookup the first record greater than or equal to [bno, len]
172 * in the btree given by cur.
173 */
a66d6363 174int /* error */
fe033cc8
CH
175xfs_alloc_lookup_ge(
176 struct xfs_btree_cur *cur, /* btree cursor */
177 xfs_agblock_t bno, /* starting block of extent */
178 xfs_extlen_t len, /* length of extent */
179 int *stat) /* success/failure */
180{
181 cur->bc_rec.a.ar_startblock = bno;
182 cur->bc_rec.a.ar_blockcount = len;
183 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
184}
185
186/*
187 * Lookup the first record less than or equal to [bno, len]
188 * in the btree given by cur.
189 */
ce1d802e 190int /* error */
fe033cc8
CH
191xfs_alloc_lookup_le(
192 struct xfs_btree_cur *cur, /* btree cursor */
193 xfs_agblock_t bno, /* starting block of extent */
194 xfs_extlen_t len, /* length of extent */
195 int *stat) /* success/failure */
196{
197 cur->bc_rec.a.ar_startblock = bno;
198 cur->bc_rec.a.ar_blockcount = len;
199 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
200}
201
278d0ca1
CH
202/*
203 * Update the record referred to by cur to the value given
204 * by [bno, len].
205 * This either works (return 0) or gets an EFSCORRUPTED error.
206 */
207STATIC int /* error */
208xfs_alloc_update(
209 struct xfs_btree_cur *cur, /* btree cursor */
210 xfs_agblock_t bno, /* starting block of extent */
211 xfs_extlen_t len) /* length of extent */
212{
213 union xfs_btree_rec rec;
214
215 rec.alloc.ar_startblock = cpu_to_be32(bno);
216 rec.alloc.ar_blockcount = cpu_to_be32(len);
217 return xfs_btree_update(cur, &rec);
218}
fe033cc8 219
8cc938fe
CH
220/*
221 * Get the data from the pointed-to record.
222 */
a46db608 223int /* error */
8cc938fe
CH
224xfs_alloc_get_rec(
225 struct xfs_btree_cur *cur, /* btree cursor */
226 xfs_agblock_t *bno, /* output: starting block of extent */
227 xfs_extlen_t *len, /* output: length of extent */
228 int *stat) /* output: success/failure */
229{
230 union xfs_btree_rec *rec;
231 int error;
232
233 error = xfs_btree_get_rec(cur, &rec, stat);
234 if (!error && *stat == 1) {
235 *bno = be32_to_cpu(rec->alloc.ar_startblock);
236 *len = be32_to_cpu(rec->alloc.ar_blockcount);
237 }
238 return error;
239}
240
1da177e4
LT
241/*
242 * Compute aligned version of the found extent.
243 * Takes alignment and min length into account.
244 */
ebf55872 245STATIC bool
1da177e4 246xfs_alloc_compute_aligned(
86fa8af6 247 xfs_alloc_arg_t *args, /* allocation argument structure */
1da177e4
LT
248 xfs_agblock_t foundbno, /* starting block in found extent */
249 xfs_extlen_t foundlen, /* length in found extent */
1da177e4 250 xfs_agblock_t *resbno, /* result block number */
ebf55872
CH
251 xfs_extlen_t *reslen, /* result length */
252 unsigned *busy_gen)
1da177e4 253{
ebf55872
CH
254 xfs_agblock_t bno = foundbno;
255 xfs_extlen_t len = foundlen;
bfe46d4e 256 xfs_extlen_t diff;
ebf55872 257 bool busy;
1da177e4 258
e26f0501 259 /* Trim busy sections out of found extent */
ebf55872 260 busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen);
e26f0501 261
bfe46d4e
BF
262 /*
263 * If we have a largish extent that happens to start before min_agbno,
264 * see if we can shift it into range...
265 */
266 if (bno < args->min_agbno && bno + len > args->min_agbno) {
267 diff = args->min_agbno - bno;
268 if (len > diff) {
269 bno += diff;
270 len -= diff;
271 }
272 }
273
e26f0501
CH
274 if (args->alignment > 1 && len >= args->minlen) {
275 xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
bfe46d4e
BF
276
277 diff = aligned_bno - bno;
e26f0501
CH
278
279 *resbno = aligned_bno;
280 *reslen = diff >= len ? 0 : len - diff;
1da177e4 281 } else {
e26f0501
CH
282 *resbno = bno;
283 *reslen = len;
1da177e4 284 }
ebf55872
CH
285
286 return busy;
1da177e4
LT
287}
288
289/*
290 * Compute best start block and diff for "near" allocations.
291 * freelen >= wantlen already checked by caller.
292 */
293STATIC xfs_extlen_t /* difference value (absolute) */
294xfs_alloc_compute_diff(
295 xfs_agblock_t wantbno, /* target starting block */
296 xfs_extlen_t wantlen, /* target length */
297 xfs_extlen_t alignment, /* target alignment */
292378ed 298 int datatype, /* are we allocating data? */
1da177e4
LT
299 xfs_agblock_t freebno, /* freespace's starting block */
300 xfs_extlen_t freelen, /* freespace's length */
301 xfs_agblock_t *newbnop) /* result: best start block from free */
302{
303 xfs_agblock_t freeend; /* end of freespace extent */
304 xfs_agblock_t newbno1; /* return block number */
305 xfs_agblock_t newbno2; /* other new block number */
306 xfs_extlen_t newlen1=0; /* length with newbno1 */
307 xfs_extlen_t newlen2=0; /* length with newbno2 */
308 xfs_agblock_t wantend; /* end of target extent */
292378ed 309 bool userdata = xfs_alloc_is_userdata(datatype);
1da177e4
LT
310
311 ASSERT(freelen >= wantlen);
312 freeend = freebno + freelen;
313 wantend = wantbno + wantlen;
211d022c
JK
314 /*
315 * We want to allocate from the start of a free extent if it is past
316 * the desired block or if we are allocating user data and the free
317 * extent is before desired block. The second case is there to allow
318 * for contiguous allocation from the remaining free space if the file
319 * grows in the short term.
320 */
321 if (freebno >= wantbno || (userdata && freeend < wantend)) {
1da177e4
LT
322 if ((newbno1 = roundup(freebno, alignment)) >= freeend)
323 newbno1 = NULLAGBLOCK;
324 } else if (freeend >= wantend && alignment > 1) {
325 newbno1 = roundup(wantbno, alignment);
326 newbno2 = newbno1 - alignment;
327 if (newbno1 >= freeend)
328 newbno1 = NULLAGBLOCK;
329 else
330 newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
331 if (newbno2 < freebno)
332 newbno2 = NULLAGBLOCK;
333 else
334 newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
335 if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
336 if (newlen1 < newlen2 ||
337 (newlen1 == newlen2 &&
338 XFS_ABSDIFF(newbno1, wantbno) >
339 XFS_ABSDIFF(newbno2, wantbno)))
340 newbno1 = newbno2;
341 } else if (newbno2 != NULLAGBLOCK)
342 newbno1 = newbno2;
343 } else if (freeend >= wantend) {
344 newbno1 = wantbno;
345 } else if (alignment > 1) {
346 newbno1 = roundup(freeend - wantlen, alignment);
347 if (newbno1 > freeend - wantlen &&
348 newbno1 - alignment >= freebno)
349 newbno1 -= alignment;
350 else if (newbno1 >= freeend)
351 newbno1 = NULLAGBLOCK;
352 } else
353 newbno1 = freeend - wantlen;
354 *newbnop = newbno1;
355 return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
356}
357
358/*
359 * Fix up the length, based on mod and prod.
360 * len should be k * prod + mod for some k.
361 * If len is too small it is returned unchanged.
362 * If len hits maxlen it is left alone.
363 */
364STATIC void
365xfs_alloc_fix_len(
366 xfs_alloc_arg_t *args) /* allocation argument structure */
367{
368 xfs_extlen_t k;
369 xfs_extlen_t rlen;
370
371 ASSERT(args->mod < args->prod);
372 rlen = args->len;
373 ASSERT(rlen >= args->minlen);
374 ASSERT(rlen <= args->maxlen);
375 if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
376 (args->mod == 0 && rlen < args->prod))
377 return;
378 k = rlen % args->prod;
379 if (k == args->mod)
380 return;
30265117
JK
381 if (k > args->mod)
382 rlen = rlen - (k - args->mod);
383 else
384 rlen = rlen - args->prod + (args->mod - k);
3790a8cd 385 /* casts to (int) catch length underflows */
30265117
JK
386 if ((int)rlen < (int)args->minlen)
387 return;
388 ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
389 ASSERT(rlen % args->prod == args->mod);
54fee133
CH
390 ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >=
391 rlen + args->minleft);
1da177e4
LT
392 args->len = rlen;
393}
394
1da177e4
LT
395/*
396 * Update the two btrees, logically removing from freespace the extent
397 * starting at rbno, rlen blocks. The extent is contained within the
398 * actual (current) free extent fbno for flen blocks.
399 * Flags are passed in indicating whether the cursors are set to the
400 * relevant records.
401 */
402STATIC int /* error code */
403xfs_alloc_fixup_trees(
404 xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */
405 xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */
406 xfs_agblock_t fbno, /* starting block of free extent */
407 xfs_extlen_t flen, /* length of free extent */
408 xfs_agblock_t rbno, /* starting block of returned extent */
409 xfs_extlen_t rlen, /* length of returned extent */
410 int flags) /* flags, XFSA_FIXUP_... */
411{
412 int error; /* error code */
413 int i; /* operation results */
414 xfs_agblock_t nfbno1; /* first new free startblock */
415 xfs_agblock_t nfbno2; /* second new free startblock */
416 xfs_extlen_t nflen1=0; /* first new free length */
417 xfs_extlen_t nflen2=0; /* second new free length */
5fb5aeee
ES
418 struct xfs_mount *mp;
419
420 mp = cnt_cur->bc_mp;
1da177e4
LT
421
422 /*
423 * Look up the record in the by-size tree if necessary.
424 */
425 if (flags & XFSA_FIXUP_CNT_OK) {
426#ifdef DEBUG
427 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
428 return error;
5fb5aeee 429 XFS_WANT_CORRUPTED_RETURN(mp,
1da177e4
LT
430 i == 1 && nfbno1 == fbno && nflen1 == flen);
431#endif
432 } else {
433 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
434 return error;
5fb5aeee 435 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
1da177e4
LT
436 }
437 /*
438 * Look up the record in the by-block tree if necessary.
439 */
440 if (flags & XFSA_FIXUP_BNO_OK) {
441#ifdef DEBUG
442 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
443 return error;
5fb5aeee 444 XFS_WANT_CORRUPTED_RETURN(mp,
1da177e4
LT
445 i == 1 && nfbno1 == fbno && nflen1 == flen);
446#endif
447 } else {
448 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
449 return error;
5fb5aeee 450 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
1da177e4 451 }
7cc95a82 452
1da177e4 453#ifdef DEBUG
7cc95a82
CH
454 if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
455 struct xfs_btree_block *bnoblock;
456 struct xfs_btree_block *cntblock;
457
458 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
459 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
1da177e4 460
5fb5aeee 461 XFS_WANT_CORRUPTED_RETURN(mp,
7cc95a82 462 bnoblock->bb_numrecs == cntblock->bb_numrecs);
1da177e4
LT
463 }
464#endif
7cc95a82 465
1da177e4
LT
466 /*
467 * Deal with all four cases: the allocated record is contained
468 * within the freespace record, so we can have new freespace
469 * at either (or both) end, or no freespace remaining.
470 */
471 if (rbno == fbno && rlen == flen)
472 nfbno1 = nfbno2 = NULLAGBLOCK;
473 else if (rbno == fbno) {
474 nfbno1 = rbno + rlen;
475 nflen1 = flen - rlen;
476 nfbno2 = NULLAGBLOCK;
477 } else if (rbno + rlen == fbno + flen) {
478 nfbno1 = fbno;
479 nflen1 = flen - rlen;
480 nfbno2 = NULLAGBLOCK;
481 } else {
482 nfbno1 = fbno;
483 nflen1 = rbno - fbno;
484 nfbno2 = rbno + rlen;
485 nflen2 = (fbno + flen) - nfbno2;
486 }
487 /*
488 * Delete the entry from the by-size btree.
489 */
91cca5df 490 if ((error = xfs_btree_delete(cnt_cur, &i)))
1da177e4 491 return error;
5fb5aeee 492 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
1da177e4
LT
493 /*
494 * Add new by-size btree entry(s).
495 */
496 if (nfbno1 != NULLAGBLOCK) {
497 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
498 return error;
5fb5aeee 499 XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
4b22a571 500 if ((error = xfs_btree_insert(cnt_cur, &i)))
1da177e4 501 return error;
5fb5aeee 502 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
1da177e4
LT
503 }
504 if (nfbno2 != NULLAGBLOCK) {
505 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
506 return error;
5fb5aeee 507 XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
4b22a571 508 if ((error = xfs_btree_insert(cnt_cur, &i)))
1da177e4 509 return error;
5fb5aeee 510 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
1da177e4
LT
511 }
512 /*
513 * Fix up the by-block btree entry(s).
514 */
515 if (nfbno1 == NULLAGBLOCK) {
516 /*
517 * No remaining freespace, just delete the by-block tree entry.
518 */
91cca5df 519 if ((error = xfs_btree_delete(bno_cur, &i)))
1da177e4 520 return error;
5fb5aeee 521 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
1da177e4
LT
522 } else {
523 /*
524 * Update the by-block entry to start later|be shorter.
525 */
526 if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
527 return error;
528 }
529 if (nfbno2 != NULLAGBLOCK) {
530 /*
531 * 2 resulting free entries, need to add one.
532 */
533 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
534 return error;
5fb5aeee 535 XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
4b22a571 536 if ((error = xfs_btree_insert(bno_cur, &i)))
1da177e4 537 return error;
5fb5aeee 538 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
1da177e4
LT
539 }
540 return 0;
541}
542
a6a781a5 543static xfs_failaddr_t
612cfbfe 544xfs_agfl_verify(
bb80c6d7
DC
545 struct xfs_buf *bp)
546{
bb80c6d7
DC
547 struct xfs_mount *mp = bp->b_target->bt_mount;
548 struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
bb80c6d7
DC
549 int i;
550
b5572597
DW
551 /*
552 * There is no verification of non-crc AGFLs because mkfs does not
553 * initialise the AGFL to zero or NULL. Hence the only valid part of the
554 * AGFL is what the AGF says is active. We can't get to the AGF, so we
555 * can't verify just those entries are valid.
556 */
557 if (!xfs_sb_version_hascrc(&mp->m_sb))
558 return NULL;
559
ce748eaa 560 if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
a6a781a5 561 return __this_address;
77c95bba 562 if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC)
a6a781a5 563 return __this_address;
77c95bba
CH
564 /*
565 * during growfs operations, the perag is not fully initialised,
566 * so we can't use it for any useful checking. growfs ensures we can't
567 * use it by using uncached buffers that don't have the perag attached
568 * so we can detect and avoid this problem.
569 */
570 if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
a6a781a5 571 return __this_address;
77c95bba 572
a78ee256 573 for (i = 0; i < xfs_agfl_size(mp); i++) {
77c95bba 574 if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
bb80c6d7 575 be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
a6a781a5 576 return __this_address;
bb80c6d7 577 }
a45086e2 578
a6a781a5
DW
579 if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)))
580 return __this_address;
581 return NULL;
77c95bba
CH
582}
583
584static void
585xfs_agfl_read_verify(
586 struct xfs_buf *bp)
587{
588 struct xfs_mount *mp = bp->b_target->bt_mount;
bc1a09b8 589 xfs_failaddr_t fa;
77c95bba
CH
590
591 /*
592 * There is no verification of non-crc AGFLs because mkfs does not
593 * initialise the AGFL to zero or NULL. Hence the only valid part of the
594 * AGFL is what the AGF says is active. We can't get to the AGF, so we
595 * can't verify just those entries are valid.
596 */
597 if (!xfs_sb_version_hascrc(&mp->m_sb))
598 return;
599
ce5028cf 600 if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
bc1a09b8
DW
601 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
602 else {
603 fa = xfs_agfl_verify(bp);
604 if (fa)
605 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
606 }
612cfbfe
DC
607}
608
1813dd64 609static void
612cfbfe
DC
610xfs_agfl_write_verify(
611 struct xfs_buf *bp)
612{
fb1755a6
CM
613 struct xfs_mount *mp = bp->b_target->bt_mount;
614 struct xfs_buf_log_item *bip = bp->b_log_item;
bc1a09b8 615 xfs_failaddr_t fa;
612cfbfe 616
77c95bba
CH
617 /* no verification of non-crc AGFLs */
618 if (!xfs_sb_version_hascrc(&mp->m_sb))
619 return;
620
bc1a09b8
DW
621 fa = xfs_agfl_verify(bp);
622 if (fa) {
623 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
77c95bba
CH
624 return;
625 }
626
627 if (bip)
628 XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
629
f1dbcd7e 630 xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
bb80c6d7
DC
631}
632
1813dd64 633const struct xfs_buf_ops xfs_agfl_buf_ops = {
233135b7 634 .name = "xfs_agfl",
1813dd64
DC
635 .verify_read = xfs_agfl_read_verify,
636 .verify_write = xfs_agfl_write_verify,
b5572597 637 .verify_struct = xfs_agfl_verify,
1813dd64
DC
638};
639
1da177e4
LT
640/*
641 * Read in the allocation group free block array.
642 */
26788097 643int /* error */
1da177e4
LT
644xfs_alloc_read_agfl(
645 xfs_mount_t *mp, /* mount point structure */
646 xfs_trans_t *tp, /* transaction pointer */
647 xfs_agnumber_t agno, /* allocation group number */
648 xfs_buf_t **bpp) /* buffer for the ag free block array */
649{
650 xfs_buf_t *bp; /* return value */
651 int error;
652
653 ASSERT(agno != NULLAGNUMBER);
654 error = xfs_trans_read_buf(
655 mp, tp, mp->m_ddev_targp,
656 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
1813dd64 657 XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
1da177e4
LT
658 if (error)
659 return error;
38f23232 660 xfs_buf_set_ref(bp, XFS_AGFL_REF);
1da177e4
LT
661 *bpp = bp;
662 return 0;
663}
664
ecb6928f
CH
665STATIC int
666xfs_alloc_update_counters(
667 struct xfs_trans *tp,
668 struct xfs_perag *pag,
669 struct xfs_buf *agbp,
670 long len)
671{
672 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
673
674 pag->pagf_freeblks += len;
675 be32_add_cpu(&agf->agf_freeblks, len);
676
677 xfs_trans_agblocks_delta(tp, len);
678 if (unlikely(be32_to_cpu(agf->agf_freeblks) >
679 be32_to_cpu(agf->agf_length)))
2451337d 680 return -EFSCORRUPTED;
ecb6928f
CH
681
682 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
683 return 0;
684}
685
1da177e4
LT
686/*
687 * Allocation group level functions.
688 */
689
690/*
691 * Allocate a variable extent in the allocation group agno.
692 * Type and bno are used to determine where in the allocation group the
693 * extent will start.
694 * Extent's length (returned in *len) will be between minlen and maxlen,
695 * and of the form k * prod + mod unless there's nothing that large.
696 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
697 */
698STATIC int /* error */
699xfs_alloc_ag_vextent(
700 xfs_alloc_arg_t *args) /* argument structure for allocation */
701{
702 int error=0;
1da177e4
LT
703
704 ASSERT(args->minlen > 0);
705 ASSERT(args->maxlen > 0);
706 ASSERT(args->minlen <= args->maxlen);
707 ASSERT(args->mod < args->prod);
708 ASSERT(args->alignment > 0);
3fd129b6 709
1da177e4
LT
710 /*
711 * Branch to correct routine based on the type.
712 */
713 args->wasfromfl = 0;
714 switch (args->type) {
715 case XFS_ALLOCTYPE_THIS_AG:
716 error = xfs_alloc_ag_vextent_size(args);
717 break;
718 case XFS_ALLOCTYPE_NEAR_BNO:
719 error = xfs_alloc_ag_vextent_near(args);
720 break;
721 case XFS_ALLOCTYPE_THIS_BNO:
722 error = xfs_alloc_ag_vextent_exact(args);
723 break;
724 default:
725 ASSERT(0);
726 /* NOTREACHED */
727 }
ecb6928f
CH
728
729 if (error || args->agbno == NULLAGBLOCK)
1da177e4 730 return error;
ecb6928f
CH
731
732 ASSERT(args->len >= args->minlen);
733 ASSERT(args->len <= args->maxlen);
0ab32086 734 ASSERT(!args->wasfromfl || args->resv != XFS_AG_RESV_AGFL);
ecb6928f
CH
735 ASSERT(args->agbno % args->alignment == 0);
736
673930c3 737 /* if not file data, insert new block into the reverse map btree */
33df3a9c 738 if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
673930c3
DW
739 error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
740 args->agbno, args->len, &args->oinfo);
741 if (error)
742 return error;
743 }
744
ecb6928f
CH
745 if (!args->wasfromfl) {
746 error = xfs_alloc_update_counters(args->tp, args->pag,
747 args->agbp,
748 -((long)(args->len)));
749 if (error)
750 return error;
751
4ecbfe63 752 ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
e26f0501 753 args->agbno, args->len));
1da177e4 754 }
ecb6928f 755
3fd129b6 756 xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
ecb6928f 757
ff6d6af2
BD
758 XFS_STATS_INC(args->mp, xs_allocx);
759 XFS_STATS_ADD(args->mp, xs_allocb, args->len);
ecb6928f 760 return error;
1da177e4
LT
761}
762
763/*
764 * Allocate a variable extent at exactly agno/bno.
765 * Extent's length (returned in *len) will be between minlen and maxlen,
766 * and of the form k * prod + mod unless there's nothing that large.
767 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
768 */
769STATIC int /* error */
770xfs_alloc_ag_vextent_exact(
771 xfs_alloc_arg_t *args) /* allocation argument structure */
772{
773 xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
774 xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
1da177e4
LT
775 int error;
776 xfs_agblock_t fbno; /* start block of found extent */
1da177e4 777 xfs_extlen_t flen; /* length of found extent */
ebf55872
CH
778 xfs_agblock_t tbno; /* start block of busy extent */
779 xfs_extlen_t tlen; /* length of busy extent */
780 xfs_agblock_t tend; /* end block of busy extent */
1da177e4 781 int i; /* success/failure of operation */
ebf55872 782 unsigned busy_gen;
1da177e4
LT
783
784 ASSERT(args->alignment == 1);
9f9baab3 785
1da177e4
LT
786 /*
787 * Allocate/initialize a cursor for the by-number freespace btree.
788 */
561f7d17 789 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
9f9baab3
CH
790 args->agno, XFS_BTNUM_BNO);
791
1da177e4
LT
792 /*
793 * Lookup bno and minlen in the btree (minlen is irrelevant, really).
794 * Look for the closest free block <= bno, it must contain bno
795 * if any free block does.
796 */
9f9baab3
CH
797 error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
798 if (error)
1da177e4 799 goto error0;
9f9baab3
CH
800 if (!i)
801 goto not_found;
802
1da177e4
LT
803 /*
804 * Grab the freespace record.
805 */
9f9baab3
CH
806 error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
807 if (error)
1da177e4 808 goto error0;
c29aad41 809 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1da177e4 810 ASSERT(fbno <= args->agbno);
9f9baab3 811
1da177e4 812 /*
e26f0501 813 * Check for overlapping busy extents.
1da177e4 814 */
ebf55872
CH
815 tbno = fbno;
816 tlen = flen;
817 xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen);
e26f0501
CH
818
819 /*
820 * Give up if the start of the extent is busy, or the freespace isn't
821 * long enough for the minimum request.
822 */
823 if (tbno > args->agbno)
824 goto not_found;
825 if (tlen < args->minlen)
826 goto not_found;
827 tend = tbno + tlen;
828 if (tend < args->agbno + args->minlen)
9f9baab3
CH
829 goto not_found;
830
1da177e4
LT
831 /*
832 * End of extent will be smaller of the freespace end and the
833 * maximal requested end.
9f9baab3 834 *
1da177e4
LT
835 * Fix the length according to mod and prod if given.
836 */
81463b1c
CS
837 args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
838 - args->agbno;
1da177e4 839 xfs_alloc_fix_len(args);
81463b1c 840 ASSERT(args->agbno + args->len <= tend);
9f9baab3 841
1da177e4 842 /*
81463b1c 843 * We are allocating agbno for args->len
1da177e4
LT
844 * Allocate/initialize a cursor for the by-size btree.
845 */
561f7d17
CH
846 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
847 args->agno, XFS_BTNUM_CNT);
1da177e4 848 ASSERT(args->agbno + args->len <=
16259e7d 849 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
9f9baab3
CH
850 error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
851 args->len, XFSA_FIXUP_BNO_OK);
852 if (error) {
1da177e4
LT
853 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
854 goto error0;
855 }
9f9baab3 856
1da177e4
LT
857 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
858 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
0b1b213f 859
1da177e4 860 args->wasfromfl = 0;
9f9baab3
CH
861 trace_xfs_alloc_exact_done(args);
862 return 0;
863
864not_found:
865 /* Didn't find it, return null. */
866 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
867 args->agbno = NULLAGBLOCK;
868 trace_xfs_alloc_exact_notfound(args);
1da177e4
LT
869 return 0;
870
871error0:
872 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
0b1b213f 873 trace_xfs_alloc_exact_error(args);
1da177e4
LT
874 return error;
875}
876
489a150f
CH
877/*
878 * Search the btree in a given direction via the search cursor and compare
879 * the records found against the good extent we've already found.
880 */
881STATIC int
882xfs_alloc_find_best_extent(
883 struct xfs_alloc_arg *args, /* allocation argument structure */
884 struct xfs_btree_cur **gcur, /* good cursor */
885 struct xfs_btree_cur **scur, /* searching cursor */
886 xfs_agblock_t gdiff, /* difference for search comparison */
887 xfs_agblock_t *sbno, /* extent found by search */
e26f0501
CH
888 xfs_extlen_t *slen, /* extent length */
889 xfs_agblock_t *sbnoa, /* aligned extent found by search */
890 xfs_extlen_t *slena, /* aligned extent length */
489a150f
CH
891 int dir) /* 0 = search right, 1 = search left */
892{
489a150f
CH
893 xfs_agblock_t new;
894 xfs_agblock_t sdiff;
895 int error;
896 int i;
ebf55872 897 unsigned busy_gen;
489a150f
CH
898
899 /* The good extent is perfect, no need to search. */
900 if (!gdiff)
901 goto out_use_good;
902
903 /*
904 * Look until we find a better one, run out of space or run off the end.
905 */
906 do {
907 error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
908 if (error)
909 goto error0;
c29aad41 910 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
ebf55872
CH
911 xfs_alloc_compute_aligned(args, *sbno, *slen,
912 sbnoa, slena, &busy_gen);
489a150f
CH
913
914 /*
915 * The good extent is closer than this one.
916 */
917 if (!dir) {
bfe46d4e
BF
918 if (*sbnoa > args->max_agbno)
919 goto out_use_good;
e26f0501 920 if (*sbnoa >= args->agbno + gdiff)
489a150f
CH
921 goto out_use_good;
922 } else {
bfe46d4e
BF
923 if (*sbnoa < args->min_agbno)
924 goto out_use_good;
e26f0501 925 if (*sbnoa <= args->agbno - gdiff)
489a150f
CH
926 goto out_use_good;
927 }
928
929 /*
930 * Same distance, compare length and pick the best.
931 */
932 if (*slena >= args->minlen) {
933 args->len = XFS_EXTLEN_MIN(*slena, args->maxlen);
934 xfs_alloc_fix_len(args);
935
936 sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
211d022c 937 args->alignment,
292378ed 938 args->datatype, *sbnoa,
e26f0501 939 *slena, &new);
489a150f
CH
940
941 /*
942 * Choose closer size and invalidate other cursor.
943 */
944 if (sdiff < gdiff)
945 goto out_use_search;
946 goto out_use_good;
947 }
948
949 if (!dir)
950 error = xfs_btree_increment(*scur, 0, &i);
951 else
952 error = xfs_btree_decrement(*scur, 0, &i);
953 if (error)
954 goto error0;
955 } while (i);
956
957out_use_good:
958 xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR);
959 *scur = NULL;
960 return 0;
961
962out_use_search:
963 xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR);
964 *gcur = NULL;
965 return 0;
966
967error0:
968 /* caller invalidates cursors */
969 return error;
970}
971
1da177e4
LT
972/*
973 * Allocate a variable extent near bno in the allocation group agno.
974 * Extent's length (returned in len) will be between minlen and maxlen,
975 * and of the form k * prod + mod unless there's nothing that large.
976 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
977 */
978STATIC int /* error */
979xfs_alloc_ag_vextent_near(
980 xfs_alloc_arg_t *args) /* allocation argument structure */
981{
982 xfs_btree_cur_t *bno_cur_gt; /* cursor for bno btree, right side */
983 xfs_btree_cur_t *bno_cur_lt; /* cursor for bno btree, left side */
984 xfs_btree_cur_t *cnt_cur; /* cursor for count btree */
1da177e4
LT
985 xfs_agblock_t gtbno; /* start bno of right side entry */
986 xfs_agblock_t gtbnoa; /* aligned ... */
987 xfs_extlen_t gtdiff; /* difference to right side entry */
988 xfs_extlen_t gtlen; /* length of right side entry */
e26f0501 989 xfs_extlen_t gtlena; /* aligned ... */
1da177e4
LT
990 xfs_agblock_t gtnew; /* useful start bno of right side */
991 int error; /* error code */
992 int i; /* result code, temporary */
993 int j; /* result code, temporary */
994 xfs_agblock_t ltbno; /* start bno of left side entry */
995 xfs_agblock_t ltbnoa; /* aligned ... */
996 xfs_extlen_t ltdiff; /* difference to left side entry */
1da177e4 997 xfs_extlen_t ltlen; /* length of left side entry */
e26f0501 998 xfs_extlen_t ltlena; /* aligned ... */
1da177e4
LT
999 xfs_agblock_t ltnew; /* useful start bno of left side */
1000 xfs_extlen_t rlen; /* length of returned extent */
ebf55872
CH
1001 bool busy;
1002 unsigned busy_gen;
63d20d6e 1003#ifdef DEBUG
1da177e4
LT
1004 /*
1005 * Randomly don't execute the first algorithm.
1006 */
1007 int dofirst; /* set to do first algorithm */
1008
ecb3403d 1009 dofirst = prandom_u32() & 1;
1da177e4 1010#endif
e26f0501 1011
bfe46d4e
BF
1012 /* handle unitialized agbno range so caller doesn't have to */
1013 if (!args->min_agbno && !args->max_agbno)
1014 args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
1015 ASSERT(args->min_agbno <= args->max_agbno);
1016
1017 /* clamp agbno to the range if it's outside */
1018 if (args->agbno < args->min_agbno)
1019 args->agbno = args->min_agbno;
1020 if (args->agbno > args->max_agbno)
1021 args->agbno = args->max_agbno;
1022
e26f0501
CH
1023restart:
1024 bno_cur_lt = NULL;
1025 bno_cur_gt = NULL;
1026 ltlen = 0;
1027 gtlena = 0;
1028 ltlena = 0;
ebf55872 1029 busy = false;
e26f0501 1030
1da177e4
LT
1031 /*
1032 * Get a cursor for the by-size btree.
1033 */
561f7d17
CH
1034 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1035 args->agno, XFS_BTNUM_CNT);
e26f0501 1036
1da177e4
LT
1037 /*
1038 * See if there are any free extents as big as maxlen.
1039 */
1040 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i)))
1041 goto error0;
1042 /*
1043 * If none, then pick up the last entry in the tree unless the
1044 * tree is empty.
1045 */
1046 if (!i) {
1047 if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &ltbno,
1048 &ltlen, &i)))
1049 goto error0;
1050 if (i == 0 || ltlen == 0) {
1051 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
e26f0501 1052 trace_xfs_alloc_near_noentry(args);
1da177e4
LT
1053 return 0;
1054 }
1055 ASSERT(i == 1);
1056 }
1057 args->wasfromfl = 0;
e26f0501 1058
1da177e4
LT
1059 /*
1060 * First algorithm.
1061 * If the requested extent is large wrt the freespaces available
1062 * in this a.g., then the cursor will be pointing to a btree entry
1063 * near the right edge of the tree. If it's in the last btree leaf
1064 * block, then we just examine all the entries in that block
1065 * that are big enough, and pick the best one.
1066 * This is written as a while loop so we can break out of it,
1067 * but we never loop back to the top.
1068 */
1069 while (xfs_btree_islastblock(cnt_cur, 0)) {
1070 xfs_extlen_t bdiff;
1071 int besti=0;
1072 xfs_extlen_t blen=0;
1073 xfs_agblock_t bnew=0;
1074
63d20d6e
DC
1075#ifdef DEBUG
1076 if (dofirst)
1da177e4
LT
1077 break;
1078#endif
1079 /*
1080 * Start from the entry that lookup found, sequence through
1081 * all larger free blocks. If we're actually pointing at a
1082 * record smaller than maxlen, go to the start of this block,
1083 * and skip all those smaller than minlen.
1084 */
1085 if (ltlen || args->alignment > 1) {
1086 cnt_cur->bc_ptrs[0] = 1;
1087 do {
1088 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno,
1089 &ltlen, &i)))
1090 goto error0;
c29aad41 1091 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1da177e4
LT
1092 if (ltlen >= args->minlen)
1093 break;
637aa50f 1094 if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
1da177e4
LT
1095 goto error0;
1096 } while (i);
1097 ASSERT(ltlen >= args->minlen);
1098 if (!i)
1099 break;
1100 }
1101 i = cnt_cur->bc_ptrs[0];
1102 for (j = 1, blen = 0, bdiff = 0;
1103 !error && j && (blen < args->maxlen || bdiff > 0);
637aa50f 1104 error = xfs_btree_increment(cnt_cur, 0, &j)) {
1da177e4
LT
1105 /*
1106 * For each entry, decide if it's better than
1107 * the previous best entry.
1108 */
1109 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
1110 goto error0;
c29aad41 1111 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
ebf55872
CH
1112 busy = xfs_alloc_compute_aligned(args, ltbno, ltlen,
1113 &ltbnoa, &ltlena, &busy_gen);
e6430037 1114 if (ltlena < args->minlen)
1da177e4 1115 continue;
bfe46d4e
BF
1116 if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno)
1117 continue;
1da177e4
LT
1118 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1119 xfs_alloc_fix_len(args);
1120 ASSERT(args->len >= args->minlen);
1121 if (args->len < blen)
1122 continue;
1123 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
292378ed 1124 args->alignment, args->datatype, ltbnoa,
211d022c 1125 ltlena, &ltnew);
1da177e4
LT
1126 if (ltnew != NULLAGBLOCK &&
1127 (args->len > blen || ltdiff < bdiff)) {
1128 bdiff = ltdiff;
1129 bnew = ltnew;
1130 blen = args->len;
1131 besti = cnt_cur->bc_ptrs[0];
1132 }
1133 }
1134 /*
1135 * It didn't work. We COULD be in a case where
1136 * there's a good record somewhere, so try again.
1137 */
1138 if (blen == 0)
1139 break;
1140 /*
1141 * Point at the best entry, and retrieve it again.
1142 */
1143 cnt_cur->bc_ptrs[0] = besti;
1144 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
1145 goto error0;
c29aad41 1146 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
73523a2e 1147 ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1da177e4 1148 args->len = blen;
54fee133 1149
1da177e4
LT
1150 /*
1151 * We are allocating starting at bnew for blen blocks.
1152 */
1153 args->agbno = bnew;
1154 ASSERT(bnew >= ltbno);
73523a2e 1155 ASSERT(bnew + blen <= ltbno + ltlen);
1da177e4
LT
1156 /*
1157 * Set up a cursor for the by-bno tree.
1158 */
561f7d17
CH
1159 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp,
1160 args->agbp, args->agno, XFS_BTNUM_BNO);
1da177e4
LT
1161 /*
1162 * Fix up the btree entries.
1163 */
1164 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno,
1165 ltlen, bnew, blen, XFSA_FIXUP_CNT_OK)))
1166 goto error0;
1167 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1168 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
0b1b213f
CH
1169
1170 trace_xfs_alloc_near_first(args);
1da177e4
LT
1171 return 0;
1172 }
1173 /*
1174 * Second algorithm.
1175 * Search in the by-bno tree to the left and to the right
1176 * simultaneously, until in each case we find a space big enough,
1177 * or run into the edge of the tree. When we run into the edge,
1178 * we deallocate that cursor.
1179 * If both searches succeed, we compare the two spaces and pick
1180 * the better one.
1181 * With alignment, it's possible for both to fail; the upper
1182 * level algorithm that picks allocation groups for allocations
1183 * is not supposed to do this.
1184 */
1185 /*
1186 * Allocate and initialize the cursor for the leftward search.
1187 */
561f7d17
CH
1188 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1189 args->agno, XFS_BTNUM_BNO);
1da177e4
LT
1190 /*
1191 * Lookup <= bno to find the leftward search's starting point.
1192 */
1193 if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i)))
1194 goto error0;
1195 if (!i) {
1196 /*
1197 * Didn't find anything; use this cursor for the rightward
1198 * search.
1199 */
1200 bno_cur_gt = bno_cur_lt;
1201 bno_cur_lt = NULL;
1202 }
1203 /*
1204 * Found something. Duplicate the cursor for the rightward search.
1205 */
1206 else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt)))
1207 goto error0;
1208 /*
1209 * Increment the cursor, so we will point at the entry just right
1210 * of the leftward entry if any, or to the leftmost entry.
1211 */
637aa50f 1212 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1da177e4
LT
1213 goto error0;
1214 if (!i) {
1215 /*
1216 * It failed, there are no rightward entries.
1217 */
1218 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR);
1219 bno_cur_gt = NULL;
1220 }
1221 /*
1222 * Loop going left with the leftward cursor, right with the
1223 * rightward cursor, until either both directions give up or
1224 * we find an entry at least as big as minlen.
1225 */
1226 do {
1227 if (bno_cur_lt) {
1228 if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
1229 goto error0;
c29aad41 1230 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
ebf55872
CH
1231 busy |= xfs_alloc_compute_aligned(args, ltbno, ltlen,
1232 &ltbnoa, &ltlena, &busy_gen);
bfe46d4e 1233 if (ltlena >= args->minlen && ltbnoa >= args->min_agbno)
1da177e4 1234 break;
8df4da4a 1235 if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
1da177e4 1236 goto error0;
bfe46d4e 1237 if (!i || ltbnoa < args->min_agbno) {
1da177e4
LT
1238 xfs_btree_del_cursor(bno_cur_lt,
1239 XFS_BTREE_NOERROR);
1240 bno_cur_lt = NULL;
1241 }
1242 }
1243 if (bno_cur_gt) {
1244 if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
1245 goto error0;
c29aad41 1246 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
ebf55872
CH
1247 busy |= xfs_alloc_compute_aligned(args, gtbno, gtlen,
1248 &gtbnoa, &gtlena, &busy_gen);
bfe46d4e 1249 if (gtlena >= args->minlen && gtbnoa <= args->max_agbno)
1da177e4 1250 break;
637aa50f 1251 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1da177e4 1252 goto error0;
bfe46d4e 1253 if (!i || gtbnoa > args->max_agbno) {
1da177e4
LT
1254 xfs_btree_del_cursor(bno_cur_gt,
1255 XFS_BTREE_NOERROR);
1256 bno_cur_gt = NULL;
1257 }
1258 }
1259 } while (bno_cur_lt || bno_cur_gt);
489a150f 1260
1da177e4
LT
1261 /*
1262 * Got both cursors still active, need to find better entry.
1263 */
1264 if (bno_cur_lt && bno_cur_gt) {
1da177e4
LT
1265 if (ltlena >= args->minlen) {
1266 /*
489a150f 1267 * Left side is good, look for a right side entry.
1da177e4
LT
1268 */
1269 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1270 xfs_alloc_fix_len(args);
489a150f 1271 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
292378ed 1272 args->alignment, args->datatype, ltbnoa,
211d022c 1273 ltlena, &ltnew);
489a150f
CH
1274
1275 error = xfs_alloc_find_best_extent(args,
1276 &bno_cur_lt, &bno_cur_gt,
e26f0501
CH
1277 ltdiff, &gtbno, &gtlen,
1278 &gtbnoa, &gtlena,
489a150f
CH
1279 0 /* search right */);
1280 } else {
1281 ASSERT(gtlena >= args->minlen);
1282
1da177e4 1283 /*
489a150f 1284 * Right side is good, look for a left side entry.
1da177e4
LT
1285 */
1286 args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
1287 xfs_alloc_fix_len(args);
489a150f 1288 gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
292378ed 1289 args->alignment, args->datatype, gtbnoa,
211d022c 1290 gtlena, &gtnew);
489a150f
CH
1291
1292 error = xfs_alloc_find_best_extent(args,
1293 &bno_cur_gt, &bno_cur_lt,
e26f0501
CH
1294 gtdiff, &ltbno, &ltlen,
1295 &ltbnoa, &ltlena,
489a150f 1296 1 /* search left */);
1da177e4 1297 }
489a150f
CH
1298
1299 if (error)
1300 goto error0;
1da177e4 1301 }
489a150f 1302
1da177e4
LT
1303 /*
1304 * If we couldn't get anything, give up.
1305 */
1306 if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
e3a746f5
DC
1307 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1308
ebf55872 1309 if (busy) {
e26f0501 1310 trace_xfs_alloc_near_busy(args);
ebf55872 1311 xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
e26f0501
CH
1312 goto restart;
1313 }
0b1b213f 1314 trace_xfs_alloc_size_neither(args);
1da177e4
LT
1315 args->agbno = NULLAGBLOCK;
1316 return 0;
1317 }
489a150f 1318
1da177e4
LT
1319 /*
1320 * At this point we have selected a freespace entry, either to the
1321 * left or to the right. If it's on the right, copy all the
1322 * useful variables to the "left" set so we only have one
1323 * copy of this code.
1324 */
1325 if (bno_cur_gt) {
1326 bno_cur_lt = bno_cur_gt;
1327 bno_cur_gt = NULL;
1328 ltbno = gtbno;
1329 ltbnoa = gtbnoa;
1330 ltlen = gtlen;
1331 ltlena = gtlena;
1332 j = 1;
1333 } else
1334 j = 0;
489a150f 1335
1da177e4
LT
1336 /*
1337 * Fix up the length and compute the useful address.
1338 */
1da177e4
LT
1339 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1340 xfs_alloc_fix_len(args);
1da177e4 1341 rlen = args->len;
e26f0501 1342 (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
292378ed 1343 args->datatype, ltbnoa, ltlena, &ltnew);
1da177e4 1344 ASSERT(ltnew >= ltbno);
e26f0501 1345 ASSERT(ltnew + rlen <= ltbnoa + ltlena);
16259e7d 1346 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
bfe46d4e 1347 ASSERT(ltnew >= args->min_agbno && ltnew <= args->max_agbno);
1da177e4 1348 args->agbno = ltnew;
e26f0501 1349
1da177e4
LT
1350 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1351 ltnew, rlen, XFSA_FIXUP_BNO_OK)))
1352 goto error0;
0b1b213f
CH
1353
1354 if (j)
1355 trace_xfs_alloc_near_greater(args);
1356 else
1357 trace_xfs_alloc_near_lesser(args);
1358
1da177e4
LT
1359 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1360 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1361 return 0;
1362
1363 error0:
0b1b213f 1364 trace_xfs_alloc_near_error(args);
1da177e4
LT
1365 if (cnt_cur != NULL)
1366 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1367 if (bno_cur_lt != NULL)
1368 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR);
1369 if (bno_cur_gt != NULL)
1370 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR);
1371 return error;
1372}
1373
1374/*
1375 * Allocate a variable extent anywhere in the allocation group agno.
1376 * Extent's length (returned in len) will be between minlen and maxlen,
1377 * and of the form k * prod + mod unless there's nothing that large.
1378 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1379 */
1380STATIC int /* error */
1381xfs_alloc_ag_vextent_size(
1382 xfs_alloc_arg_t *args) /* allocation argument structure */
1383{
1384 xfs_btree_cur_t *bno_cur; /* cursor for bno btree */
1385 xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */
1386 int error; /* error result */
1387 xfs_agblock_t fbno; /* start of found freespace */
1388 xfs_extlen_t flen; /* length of found freespace */
1da177e4
LT
1389 int i; /* temp status variable */
1390 xfs_agblock_t rbno; /* returned block number */
1391 xfs_extlen_t rlen; /* length of returned extent */
ebf55872
CH
1392 bool busy;
1393 unsigned busy_gen;
1da177e4 1394
e26f0501 1395restart:
1da177e4
LT
1396 /*
1397 * Allocate and initialize a cursor for the by-size btree.
1398 */
561f7d17
CH
1399 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1400 args->agno, XFS_BTNUM_CNT);
1da177e4 1401 bno_cur = NULL;
ebf55872 1402 busy = false;
e26f0501 1403
1da177e4
LT
1404 /*
1405 * Look for an entry >= maxlen+alignment-1 blocks.
1406 */
1407 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1408 args->maxlen + args->alignment - 1, &i)))
1409 goto error0;
e26f0501 1410
1da177e4 1411 /*
ebf55872
CH
1412 * If none then we have to settle for a smaller extent. In the case that
1413 * there are no large extents, this will return the last entry in the
1414 * tree unless the tree is empty. In the case that there are only busy
1415 * large extents, this will return the largest small extent unless there
e26f0501 1416 * are no smaller extents available.
1da177e4 1417 */
ebf55872 1418 if (!i) {
e26f0501
CH
1419 error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1420 &fbno, &flen, &i);
1421 if (error)
1da177e4
LT
1422 goto error0;
1423 if (i == 0 || flen == 0) {
1424 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
0b1b213f 1425 trace_xfs_alloc_size_noentry(args);
1da177e4
LT
1426 return 0;
1427 }
1428 ASSERT(i == 1);
ebf55872
CH
1429 busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno,
1430 &rlen, &busy_gen);
e26f0501
CH
1431 } else {
1432 /*
1433 * Search for a non-busy extent that is large enough.
e26f0501
CH
1434 */
1435 for (;;) {
1436 error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1437 if (error)
1438 goto error0;
c29aad41 1439 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
e26f0501 1440
ebf55872
CH
1441 busy = xfs_alloc_compute_aligned(args, fbno, flen,
1442 &rbno, &rlen, &busy_gen);
e26f0501
CH
1443
1444 if (rlen >= args->maxlen)
1445 break;
1446
1447 error = xfs_btree_increment(cnt_cur, 0, &i);
1448 if (error)
1449 goto error0;
1450 if (i == 0) {
1451 /*
1452 * Our only valid extents must have been busy.
1453 * Make it unbusy by forcing the log out and
ebf55872 1454 * retrying.
e26f0501
CH
1455 */
1456 xfs_btree_del_cursor(cnt_cur,
1457 XFS_BTREE_NOERROR);
1458 trace_xfs_alloc_size_busy(args);
ebf55872
CH
1459 xfs_extent_busy_flush(args->mp,
1460 args->pag, busy_gen);
e26f0501
CH
1461 goto restart;
1462 }
1463 }
1da177e4 1464 }
e26f0501 1465
1da177e4
LT
1466 /*
1467 * In the first case above, we got the last entry in the
1468 * by-size btree. Now we check to see if the space hits maxlen
1469 * once aligned; if not, we search left for something better.
1470 * This can't happen in the second case above.
1471 */
1da177e4 1472 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
c29aad41 1473 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
1da177e4
LT
1474 (rlen <= flen && rbno + rlen <= fbno + flen), error0);
1475 if (rlen < args->maxlen) {
1476 xfs_agblock_t bestfbno;
1477 xfs_extlen_t bestflen;
1478 xfs_agblock_t bestrbno;
1479 xfs_extlen_t bestrlen;
1480
1481 bestrlen = rlen;
1482 bestrbno = rbno;
1483 bestflen = flen;
1484 bestfbno = fbno;
1485 for (;;) {
8df4da4a 1486 if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
1da177e4
LT
1487 goto error0;
1488 if (i == 0)
1489 break;
1490 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1491 &i)))
1492 goto error0;
c29aad41 1493 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1da177e4
LT
1494 if (flen < bestrlen)
1495 break;
ebf55872
CH
1496 busy = xfs_alloc_compute_aligned(args, fbno, flen,
1497 &rbno, &rlen, &busy_gen);
1da177e4 1498 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
c29aad41 1499 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
1da177e4
LT
1500 (rlen <= flen && rbno + rlen <= fbno + flen),
1501 error0);
1502 if (rlen > bestrlen) {
1503 bestrlen = rlen;
1504 bestrbno = rbno;
1505 bestflen = flen;
1506 bestfbno = fbno;
1507 if (rlen == args->maxlen)
1508 break;
1509 }
1510 }
1511 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1512 &i)))
1513 goto error0;
c29aad41 1514 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1da177e4
LT
1515 rlen = bestrlen;
1516 rbno = bestrbno;
1517 flen = bestflen;
1518 fbno = bestfbno;
1519 }
1520 args->wasfromfl = 0;
1521 /*
1522 * Fix up the length.
1523 */
1524 args->len = rlen;
e26f0501 1525 if (rlen < args->minlen) {
ebf55872 1526 if (busy) {
e26f0501
CH
1527 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1528 trace_xfs_alloc_size_busy(args);
ebf55872 1529 xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
e26f0501
CH
1530 goto restart;
1531 }
1532 goto out_nominleft;
1da177e4 1533 }
e26f0501
CH
1534 xfs_alloc_fix_len(args);
1535
1da177e4 1536 rlen = args->len;
c29aad41 1537 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0);
1da177e4
LT
1538 /*
1539 * Allocate and initialize a cursor for the by-block tree.
1540 */
561f7d17
CH
1541 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1542 args->agno, XFS_BTNUM_BNO);
1da177e4
LT
1543 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1544 rbno, rlen, XFSA_FIXUP_CNT_OK)))
1545 goto error0;
1546 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1547 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1548 cnt_cur = bno_cur = NULL;
1549 args->len = rlen;
1550 args->agbno = rbno;
c29aad41 1551 XFS_WANT_CORRUPTED_GOTO(args->mp,
1da177e4 1552 args->agbno + args->len <=
16259e7d 1553 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1da177e4 1554 error0);
0b1b213f 1555 trace_xfs_alloc_size_done(args);
1da177e4
LT
1556 return 0;
1557
1558error0:
0b1b213f 1559 trace_xfs_alloc_size_error(args);
1da177e4
LT
1560 if (cnt_cur)
1561 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1562 if (bno_cur)
1563 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1564 return error;
e26f0501
CH
1565
1566out_nominleft:
1567 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1568 trace_xfs_alloc_size_nominleft(args);
1569 args->agbno = NULLAGBLOCK;
1570 return 0;
1da177e4
LT
1571}
1572
1573/*
1574 * Deal with the case where only small freespaces remain.
1575 * Either return the contents of the last freespace record,
1576 * or allocate space from the freelist if there is nothing in the tree.
1577 */
1578STATIC int /* error */
1579xfs_alloc_ag_vextent_small(
1580 xfs_alloc_arg_t *args, /* allocation argument structure */
1581 xfs_btree_cur_t *ccur, /* by-size cursor */
1582 xfs_agblock_t *fbnop, /* result block number */
1583 xfs_extlen_t *flenp, /* result length */
1584 int *stat) /* status: 0-freelist, 1-normal/none */
1585{
a03f1a66 1586 struct xfs_owner_info oinfo;
1da177e4
LT
1587 int error;
1588 xfs_agblock_t fbno;
1589 xfs_extlen_t flen;
1da177e4
LT
1590 int i;
1591
8df4da4a 1592 if ((error = xfs_btree_decrement(ccur, 0, &i)))
1da177e4
LT
1593 goto error0;
1594 if (i) {
1595 if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
1596 goto error0;
c29aad41 1597 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1da177e4
LT
1598 }
1599 /*
1600 * Nothing in the btree, try the freelist. Make sure
1601 * to respect minleft even when pulling from the
1602 * freelist.
1603 */
3fd129b6 1604 else if (args->minlen == 1 && args->alignment == 1 &&
0ab32086 1605 args->resv != XFS_AG_RESV_AGFL &&
16259e7d
CH
1606 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
1607 > args->minleft)) {
92821e2b
DC
1608 error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
1609 if (error)
1da177e4
LT
1610 goto error0;
1611 if (fbno != NULLAGBLOCK) {
4ecbfe63 1612 xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
292378ed 1613 xfs_alloc_allow_busy_reuse(args->datatype));
97d3ac75 1614
292378ed 1615 if (xfs_alloc_is_userdata(args->datatype)) {
1da177e4
LT
1616 xfs_buf_t *bp;
1617
1618 bp = xfs_btree_get_bufs(args->mp, args->tp,
1619 args->agno, fbno, 0);
93e8befc
ES
1620 if (!bp) {
1621 error = -EFSCORRUPTED;
1622 goto error0;
1623 }
1da177e4
LT
1624 xfs_trans_binval(args->tp, bp);
1625 }
1626 args->len = 1;
1627 args->agbno = fbno;
c29aad41 1628 XFS_WANT_CORRUPTED_GOTO(args->mp,
1da177e4 1629 args->agbno + args->len <=
16259e7d 1630 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1da177e4
LT
1631 error0);
1632 args->wasfromfl = 1;
0b1b213f 1633 trace_xfs_alloc_small_freelist(args);
a03f1a66
DW
1634
1635 /*
1636 * If we're feeding an AGFL block to something that
1637 * doesn't live in the free space, we need to clear
0ab32086 1638 * out the OWN_AG rmap.
a03f1a66
DW
1639 */
1640 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
1641 error = xfs_rmap_free(args->tp, args->agbp, args->agno,
1642 fbno, 1, &oinfo);
1643 if (error)
1644 goto error0;
1645
1da177e4
LT
1646 *stat = 0;
1647 return 0;
1648 }
1649 /*
1650 * Nothing in the freelist.
1651 */
1652 else
1653 flen = 0;
1654 }
1655 /*
1656 * Can't allocate from the freelist for some reason.
1657 */
d432c80e
NS
1658 else {
1659 fbno = NULLAGBLOCK;
1da177e4 1660 flen = 0;
d432c80e 1661 }
1da177e4
LT
1662 /*
1663 * Can't do the allocation, give up.
1664 */
1665 if (flen < args->minlen) {
1666 args->agbno = NULLAGBLOCK;
0b1b213f 1667 trace_xfs_alloc_small_notenough(args);
1da177e4
LT
1668 flen = 0;
1669 }
1670 *fbnop = fbno;
1671 *flenp = flen;
1672 *stat = 1;
0b1b213f 1673 trace_xfs_alloc_small_done(args);
1da177e4
LT
1674 return 0;
1675
1676error0:
0b1b213f 1677 trace_xfs_alloc_small_error(args);
1da177e4
LT
1678 return error;
1679}
1680
1681/*
1682 * Free the extent starting at agno/bno for length.
1683 */
340785cc 1684STATIC int
1da177e4 1685xfs_free_ag_extent(
340785cc
DW
1686 xfs_trans_t *tp,
1687 xfs_buf_t *agbp,
1688 xfs_agnumber_t agno,
1689 xfs_agblock_t bno,
1690 xfs_extlen_t len,
1691 struct xfs_owner_info *oinfo,
3fd129b6 1692 enum xfs_ag_resv_type type)
1da177e4
LT
1693{
1694 xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */
1695 xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */
1696 int error; /* error return value */
1da177e4
LT
1697 xfs_agblock_t gtbno; /* start of right neighbor block */
1698 xfs_extlen_t gtlen; /* length of right neighbor block */
1699 int haveleft; /* have a left neighbor block */
1700 int haveright; /* have a right neighbor block */
1701 int i; /* temp, result code */
1702 xfs_agblock_t ltbno; /* start of left neighbor block */
1703 xfs_extlen_t ltlen; /* length of left neighbor block */
1704 xfs_mount_t *mp; /* mount point struct for filesystem */
1705 xfs_agblock_t nbno; /* new starting block of freespace */
1706 xfs_extlen_t nlen; /* new length of freespace */
ecb6928f 1707 xfs_perag_t *pag; /* per allocation group data */
1da177e4 1708
673930c3 1709 bno_cur = cnt_cur = NULL;
1da177e4 1710 mp = tp->t_mountp;
673930c3 1711
33df3a9c 1712 if (!xfs_rmap_should_skip_owner_update(oinfo)) {
673930c3
DW
1713 error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo);
1714 if (error)
1715 goto error0;
1716 }
1717
1da177e4
LT
1718 /*
1719 * Allocate and initialize a cursor for the by-block btree.
1720 */
561f7d17 1721 bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
1da177e4
LT
1722 /*
1723 * Look for a neighboring block on the left (lower block numbers)
1724 * that is contiguous with this space.
1725 */
1726 if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
1727 goto error0;
1728 if (haveleft) {
1729 /*
1730 * There is a block to our left.
1731 */
1732 if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
1733 goto error0;
c29aad41 1734 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1da177e4
LT
1735 /*
1736 * It's not contiguous, though.
1737 */
1738 if (ltbno + ltlen < bno)
1739 haveleft = 0;
1740 else {
1741 /*
1742 * If this failure happens the request to free this
1743 * space was invalid, it's (partly) already free.
1744 * Very bad.
1745 */
c29aad41
ES
1746 XFS_WANT_CORRUPTED_GOTO(mp,
1747 ltbno + ltlen <= bno, error0);
1da177e4
LT
1748 }
1749 }
1750 /*
1751 * Look for a neighboring block on the right (higher block numbers)
1752 * that is contiguous with this space.
1753 */
637aa50f 1754 if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
1da177e4
LT
1755 goto error0;
1756 if (haveright) {
1757 /*
1758 * There is a block to our right.
1759 */
1760 if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
1761 goto error0;
c29aad41 1762 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1da177e4
LT
1763 /*
1764 * It's not contiguous, though.
1765 */
1766 if (bno + len < gtbno)
1767 haveright = 0;
1768 else {
1769 /*
1770 * If this failure happens the request to free this
1771 * space was invalid, it's (partly) already free.
1772 * Very bad.
1773 */
c29aad41 1774 XFS_WANT_CORRUPTED_GOTO(mp, gtbno >= bno + len, error0);
1da177e4
LT
1775 }
1776 }
1777 /*
1778 * Now allocate and initialize a cursor for the by-size tree.
1779 */
561f7d17 1780 cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
1da177e4
LT
1781 /*
1782 * Have both left and right contiguous neighbors.
1783 * Merge all three into a single free block.
1784 */
1785 if (haveleft && haveright) {
1786 /*
1787 * Delete the old by-size entry on the left.
1788 */
1789 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1790 goto error0;
c29aad41 1791 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
91cca5df 1792 if ((error = xfs_btree_delete(cnt_cur, &i)))
1da177e4 1793 goto error0;
c29aad41 1794 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1da177e4
LT
1795 /*
1796 * Delete the old by-size entry on the right.
1797 */
1798 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1799 goto error0;
c29aad41 1800 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
91cca5df 1801 if ((error = xfs_btree_delete(cnt_cur, &i)))
1da177e4 1802 goto error0;
c29aad41 1803 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1da177e4
LT
1804 /*
1805 * Delete the old by-block entry for the right block.
1806 */
91cca5df 1807 if ((error = xfs_btree_delete(bno_cur, &i)))
1da177e4 1808 goto error0;
c29aad41 1809 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1da177e4
LT
1810 /*
1811 * Move the by-block cursor back to the left neighbor.
1812 */
8df4da4a 1813 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1da177e4 1814 goto error0;
c29aad41 1815 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1da177e4
LT
1816#ifdef DEBUG
1817 /*
1818 * Check that this is the right record: delete didn't
1819 * mangle the cursor.
1820 */
1821 {
1822 xfs_agblock_t xxbno;
1823 xfs_extlen_t xxlen;
1824
1825 if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
1826 &i)))
1827 goto error0;
c29aad41 1828 XFS_WANT_CORRUPTED_GOTO(mp,
1da177e4
LT
1829 i == 1 && xxbno == ltbno && xxlen == ltlen,
1830 error0);
1831 }
1832#endif
1833 /*
1834 * Update remaining by-block entry to the new, joined block.
1835 */
1836 nbno = ltbno;
1837 nlen = len + ltlen + gtlen;
1838 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1839 goto error0;
1840 }
1841 /*
1842 * Have only a left contiguous neighbor.
1843 * Merge it together with the new freespace.
1844 */
1845 else if (haveleft) {
1846 /*
1847 * Delete the old by-size entry on the left.
1848 */
1849 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1850 goto error0;
c29aad41 1851 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
91cca5df 1852 if ((error = xfs_btree_delete(cnt_cur, &i)))
1da177e4 1853 goto error0;
c29aad41 1854 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1da177e4
LT
1855 /*
1856 * Back up the by-block cursor to the left neighbor, and
1857 * update its length.
1858 */
8df4da4a 1859 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1da177e4 1860 goto error0;
c29aad41 1861 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1da177e4
LT
1862 nbno = ltbno;
1863 nlen = len + ltlen;
1864 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1865 goto error0;
1866 }
1867 /*
1868 * Have only a right contiguous neighbor.
1869 * Merge it together with the new freespace.
1870 */
1871 else if (haveright) {
1872 /*
1873 * Delete the old by-size entry on the right.
1874 */
1875 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1876 goto error0;
c29aad41 1877 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
91cca5df 1878 if ((error = xfs_btree_delete(cnt_cur, &i)))
1da177e4 1879 goto error0;
c29aad41 1880 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1da177e4
LT
1881 /*
1882 * Update the starting block and length of the right
1883 * neighbor in the by-block tree.
1884 */
1885 nbno = bno;
1886 nlen = len + gtlen;
1887 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1888 goto error0;
1889 }
1890 /*
1891 * No contiguous neighbors.
1892 * Insert the new freespace into the by-block tree.
1893 */
1894 else {
1895 nbno = bno;
1896 nlen = len;
4b22a571 1897 if ((error = xfs_btree_insert(bno_cur, &i)))
1da177e4 1898 goto error0;
c29aad41 1899 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1da177e4
LT
1900 }
1901 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1902 bno_cur = NULL;
1903 /*
1904 * In all cases we need to insert the new freespace in the by-size tree.
1905 */
1906 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
1907 goto error0;
c29aad41 1908 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, error0);
4b22a571 1909 if ((error = xfs_btree_insert(cnt_cur, &i)))
1da177e4 1910 goto error0;
c29aad41 1911 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1da177e4
LT
1912 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1913 cnt_cur = NULL;
ecb6928f 1914
1da177e4
LT
1915 /*
1916 * Update the freespace totals in the ag and superblock.
1917 */
ecb6928f
CH
1918 pag = xfs_perag_get(mp, agno);
1919 error = xfs_alloc_update_counters(tp, pag, agbp, len);
3fd129b6 1920 xfs_ag_resv_free_extent(pag, type, tp, len);
ecb6928f
CH
1921 xfs_perag_put(pag);
1922 if (error)
1923 goto error0;
1924
ff6d6af2
BD
1925 XFS_STATS_INC(mp, xs_freex);
1926 XFS_STATS_ADD(mp, xs_freeb, len);
0b1b213f 1927
21592863 1928 trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright);
1da177e4 1929
1da177e4
LT
1930 return 0;
1931
1932 error0:
21592863 1933 trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1);
1da177e4
LT
1934 if (bno_cur)
1935 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1936 if (cnt_cur)
1937 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1938 return error;
1939}
1940
1941/*
1942 * Visible (exported) allocation/free functions.
1943 * Some of these are used just by xfs_alloc_btree.c and this file.
1944 */
1945
1946/*
1947 * Compute and fill in value of m_ag_maxlevels.
1948 */
1949void
1950xfs_alloc_compute_maxlevels(
1951 xfs_mount_t *mp) /* file system mount structure */
1952{
a1f69417 1953 mp->m_ag_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr,
19b54ee6 1954 (mp->m_sb.sb_agblocks + 1) / 2);
1da177e4
LT
1955}
1956
6cc87645 1957/*
3fd129b6
DW
1958 * Find the length of the longest extent in an AG. The 'need' parameter
1959 * specifies how much space we're going to need for the AGFL and the
1960 * 'reserved' parameter tells us how many blocks in this AG are reserved for
1961 * other callers.
6cc87645
DC
1962 */
1963xfs_extlen_t
1964xfs_alloc_longest_free_extent(
50adbcb4 1965 struct xfs_perag *pag,
3fd129b6
DW
1966 xfs_extlen_t need,
1967 xfs_extlen_t reserved)
6cc87645 1968{
50adbcb4 1969 xfs_extlen_t delta = 0;
6cc87645 1970
3fd129b6
DW
1971 /*
1972 * If the AGFL needs a recharge, we'll have to subtract that from the
1973 * longest extent.
1974 */
6cc87645
DC
1975 if (need > pag->pagf_flcount)
1976 delta = need - pag->pagf_flcount;
1977
3fd129b6
DW
1978 /*
1979 * If we cannot maintain others' reservations with space from the
1980 * not-longest freesp extents, we'll have to subtract /that/ from
1981 * the longest extent too.
1982 */
1983 if (pag->pagf_freeblks - pag->pagf_longest < reserved)
1984 delta += reserved - (pag->pagf_freeblks - pag->pagf_longest);
1985
1986 /*
1987 * If the longest extent is long enough to satisfy all the
1988 * reservations and AGFL rules in place, we can return this extent.
1989 */
6cc87645
DC
1990 if (pag->pagf_longest > delta)
1991 return pag->pagf_longest - delta;
3fd129b6
DW
1992
1993 /* Otherwise, let the caller try for 1 block if there's space. */
6cc87645
DC
1994 return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
1995}
1996
496817b4
DC
1997unsigned int
1998xfs_alloc_min_freelist(
1999 struct xfs_mount *mp,
2000 struct xfs_perag *pag)
2001{
2002 unsigned int min_free;
2003
2004 /* space needed by-bno freespace btree */
2005 min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1,
2006 mp->m_ag_maxlevels);
2007 /* space needed by-size freespace btree */
2008 min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1,
2009 mp->m_ag_maxlevels);
52548852
DW
2010 /* space needed reverse mapping used space btree */
2011 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
2012 min_free += min_t(unsigned int,
2013 pag->pagf_levels[XFS_BTNUM_RMAPi] + 1,
2014 mp->m_rmap_maxlevels);
496817b4
DC
2015
2016 return min_free;
2017}
2018
72d55285
DC
2019/*
2020 * Check if the operation we are fixing up the freelist for should go ahead or
2021 * not. If we are freeing blocks, we always allow it, otherwise the allocation
2022 * is dependent on whether the size and shape of free space available will
2023 * permit the requested allocation to take place.
2024 */
2025static bool
2026xfs_alloc_space_available(
2027 struct xfs_alloc_arg *args,
2028 xfs_extlen_t min_free,
2029 int flags)
2030{
2031 struct xfs_perag *pag = args->pag;
12ef8301 2032 xfs_extlen_t alloc_len, longest;
3fd129b6 2033 xfs_extlen_t reservation; /* blocks that are still reserved */
72d55285
DC
2034 int available;
2035
2036 if (flags & XFS_ALLOC_FLAG_FREEING)
2037 return true;
2038
3fd129b6
DW
2039 reservation = xfs_ag_resv_needed(pag, args->resv);
2040
72d55285 2041 /* do we have enough contiguous free space for the allocation? */
12ef8301 2042 alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
a1f69417 2043 longest = xfs_alloc_longest_free_extent(pag, min_free, reservation);
12ef8301 2044 if (longest < alloc_len)
72d55285
DC
2045 return false;
2046
3fd129b6 2047 /* do we have enough free space remaining for the allocation? */
72d55285 2048 available = (int)(pag->pagf_freeblks + pag->pagf_flcount -
54fee133 2049 reservation - min_free - args->minleft);
12ef8301 2050 if (available < (int)max(args->total, alloc_len))
72d55285
DC
2051 return false;
2052
54fee133
CH
2053 /*
2054 * Clamp maxlen to the amount of free space available for the actual
2055 * extent allocation.
2056 */
2057 if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) {
2058 args->maxlen = available;
2059 ASSERT(args->maxlen > 0);
2060 ASSERT(args->maxlen >= args->minlen);
2061 }
2062
72d55285
DC
2063 return true;
2064}
2065
4223f659
BF
2066int
2067xfs_free_agfl_block(
2068 struct xfs_trans *tp,
2069 xfs_agnumber_t agno,
2070 xfs_agblock_t agbno,
2071 struct xfs_buf *agbp,
2072 struct xfs_owner_info *oinfo)
2073{
2074 int error;
2075 struct xfs_buf *bp;
2076
2077 error = xfs_free_ag_extent(tp, agbp, agno, agbno, 1, oinfo,
2078 XFS_AG_RESV_AGFL);
2079 if (error)
2080 return error;
2081
2082 bp = xfs_btree_get_bufs(tp->t_mountp, tp, agno, agbno, 0);
2083 if (!bp)
2084 return -EFSCORRUPTED;
2085 xfs_trans_binval(tp, bp);
2086
2087 return 0;
2088}
2089
a27ba260
BF
2090/*
2091 * Check the agfl fields of the agf for inconsistency or corruption. The purpose
2092 * is to detect an agfl header padding mismatch between current and early v5
2093 * kernels. This problem manifests as a 1-slot size difference between the
2094 * on-disk flcount and the active [first, last] range of a wrapped agfl. This
2095 * may also catch variants of agfl count corruption unrelated to padding. Either
2096 * way, we'll reset the agfl and warn the user.
2097 *
2098 * Return true if a reset is required before the agfl can be used, false
2099 * otherwise.
2100 */
2101static bool
2102xfs_agfl_needs_reset(
2103 struct xfs_mount *mp,
2104 struct xfs_agf *agf)
2105{
2106 uint32_t f = be32_to_cpu(agf->agf_flfirst);
2107 uint32_t l = be32_to_cpu(agf->agf_fllast);
2108 uint32_t c = be32_to_cpu(agf->agf_flcount);
2109 int agfl_size = xfs_agfl_size(mp);
2110 int active;
2111
2112 /* no agfl header on v4 supers */
2113 if (!xfs_sb_version_hascrc(&mp->m_sb))
2114 return false;
2115
2116 /*
2117 * The agf read verifier catches severe corruption of these fields.
2118 * Repeat some sanity checks to cover a packed -> unpacked mismatch if
2119 * the verifier allows it.
2120 */
2121 if (f >= agfl_size || l >= agfl_size)
2122 return true;
2123 if (c > agfl_size)
2124 return true;
2125
2126 /*
2127 * Check consistency between the on-disk count and the active range. An
2128 * agfl padding mismatch manifests as an inconsistent flcount.
2129 */
2130 if (c && l >= f)
2131 active = l - f + 1;
2132 else if (c)
2133 active = agfl_size - f + l + 1;
2134 else
2135 active = 0;
2136
2137 return active != c;
2138}
2139
2140/*
2141 * Reset the agfl to an empty state. Ignore/drop any existing blocks since the
2142 * agfl content cannot be trusted. Warn the user that a repair is required to
2143 * recover leaked blocks.
2144 *
2145 * The purpose of this mechanism is to handle filesystems affected by the agfl
2146 * header padding mismatch problem. A reset keeps the filesystem online with a
2147 * relatively minor free space accounting inconsistency rather than suffer the
2148 * inevitable crash from use of an invalid agfl block.
2149 */
2150static void
2151xfs_agfl_reset(
2152 struct xfs_trans *tp,
2153 struct xfs_buf *agbp,
2154 struct xfs_perag *pag)
2155{
2156 struct xfs_mount *mp = tp->t_mountp;
2157 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
2158
2159 ASSERT(pag->pagf_agflreset);
2160 trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_);
2161
2162 xfs_warn(mp,
2163 "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. "
2164 "Please unmount and run xfs_repair.",
2165 pag->pag_agno, pag->pagf_flcount);
2166
2167 agf->agf_flfirst = 0;
2168 agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1);
2169 agf->agf_flcount = 0;
2170 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST |
2171 XFS_AGF_FLCOUNT);
2172
2173 pag->pagf_flcount = 0;
2174 pag->pagf_agflreset = false;
2175}
2176
f8f2835a
BF
2177/*
2178 * Defer an AGFL block free. This is effectively equivalent to
2179 * xfs_bmap_add_free() with some special handling particular to AGFL blocks.
2180 *
2181 * Deferring AGFL frees helps prevent log reservation overruns due to too many
2182 * allocation operations in a transaction. AGFL frees are prone to this problem
2183 * because for one they are always freed one at a time. Further, an immediate
2184 * AGFL block free can cause a btree join and require another block free before
2185 * the real allocation can proceed. Deferring the free disconnects freeing up
2186 * the AGFL slot from freeing the block.
2187 */
2188STATIC void
2189xfs_defer_agfl_block(
2190 struct xfs_mount *mp,
2191 struct xfs_defer_ops *dfops,
2192 xfs_agnumber_t agno,
2193 xfs_fsblock_t agbno,
2194 struct xfs_owner_info *oinfo)
2195{
2196 struct xfs_extent_free_item *new; /* new element */
2197
2198 ASSERT(xfs_bmap_free_item_zone != NULL);
2199 ASSERT(oinfo != NULL);
2200
2201 new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP);
2202 new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno);
2203 new->xefi_blockcount = 1;
2204 new->xefi_oinfo = *oinfo;
2205
2206 trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
2207
2208 xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_AGFL_FREE, &new->xefi_list);
2209}
2210
1da177e4
LT
2211/*
2212 * Decide whether to use this allocation group for this allocation.
2213 * If so, fix up the btree freelist's size.
2214 */
2e9101da 2215int /* error */
1da177e4 2216xfs_alloc_fix_freelist(
396503fc
DC
2217 struct xfs_alloc_arg *args, /* allocation argument structure */
2218 int flags) /* XFS_ALLOC_FLAG_... */
1da177e4 2219{
396503fc
DC
2220 struct xfs_mount *mp = args->mp;
2221 struct xfs_perag *pag = args->pag;
2222 struct xfs_trans *tp = args->tp;
2223 struct xfs_buf *agbp = NULL;
2224 struct xfs_buf *agflbp = NULL;
2225 struct xfs_alloc_arg targs; /* local allocation arguments */
2226 xfs_agblock_t bno; /* freelist block */
2227 xfs_extlen_t need; /* total blocks needed in freelist */
c184f855 2228 int error = 0;
396503fc 2229
1da177e4 2230 if (!pag->pagf_init) {
396503fc
DC
2231 error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
2232 if (error)
2233 goto out_no_agbp;
1da177e4 2234 if (!pag->pagf_init) {
0e1edbd9
NS
2235 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
2236 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
396503fc 2237 goto out_agbp_relse;
1da177e4 2238 }
396503fc 2239 }
1da177e4 2240
0e1edbd9 2241 /*
396503fc
DC
2242 * If this is a metadata preferred pag and we are user data then try
2243 * somewhere else if we are not being asked to try harder at this
2244 * point
1da177e4 2245 */
292378ed 2246 if (pag->pagf_metadata && xfs_alloc_is_userdata(args->datatype) &&
0e1edbd9
NS
2247 (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
2248 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
396503fc 2249 goto out_agbp_relse;
1da177e4
LT
2250 }
2251
496817b4 2252 need = xfs_alloc_min_freelist(mp, pag);
54fee133
CH
2253 if (!xfs_alloc_space_available(args, need, flags |
2254 XFS_ALLOC_FLAG_CHECK))
396503fc 2255 goto out_agbp_relse;
0e1edbd9 2256
1da177e4
LT
2257 /*
2258 * Get the a.g. freespace buffer.
2259 * Can fail if we're not blocking on locks, and it's held.
2260 */
396503fc
DC
2261 if (!agbp) {
2262 error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
2263 if (error)
2264 goto out_no_agbp;
2265 if (!agbp) {
0e1edbd9
NS
2266 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
2267 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
396503fc 2268 goto out_no_agbp;
0e1edbd9 2269 }
1da177e4 2270 }
50adbcb4 2271
a27ba260
BF
2272 /* reset a padding mismatched agfl before final free space check */
2273 if (pag->pagf_agflreset)
2274 xfs_agfl_reset(tp, agbp, pag);
2275
50adbcb4 2276 /* If there isn't enough total space or single-extent, reject it. */
496817b4 2277 need = xfs_alloc_min_freelist(mp, pag);
396503fc
DC
2278 if (!xfs_alloc_space_available(args, need, flags))
2279 goto out_agbp_relse;
72d55285 2280
1da177e4
LT
2281 /*
2282 * Make the freelist shorter if it's too long.
50adbcb4 2283 *
396503fc
DC
2284 * Note that from this point onwards, we will always release the agf and
2285 * agfl buffers on error. This handles the case where we error out and
2286 * the buffers are clean or may not have been joined to the transaction
2287 * and hence need to be released manually. If they have been joined to
2288 * the transaction, then xfs_trans_brelse() will handle them
2289 * appropriately based on the recursion count and dirty state of the
2290 * buffer.
2291 *
50adbcb4
DC
2292 * XXX (dgc): When we have lots of free space, does this buy us
2293 * anything other than extra overhead when we need to put more blocks
2294 * back on the free list? Maybe we should only do this when space is
2295 * getting low or the AGFL is more than half full?
04f13060
DW
2296 *
2297 * The NOSHRINK flag prevents the AGFL from being shrunk if it's too
2298 * big; the NORMAP flag prevents AGFL expand/shrink operations from
2299 * updating the rmapbt. Both flags are used in xfs_repair while we're
2300 * rebuilding the rmapbt, and neither are used by the kernel. They're
2301 * both required to ensure that rmaps are correctly recorded for the
2302 * regenerated AGFL, bnobt, and cntbt. See repair/phase5.c and
2303 * repair/rmap.c in xfsprogs for details.
1da177e4 2304 */
04f13060
DW
2305 memset(&targs, 0, sizeof(targs));
2306 if (flags & XFS_ALLOC_FLAG_NORMAP)
2307 xfs_rmap_skip_owner_update(&targs.oinfo);
2308 else
2309 xfs_rmap_ag_owner(&targs.oinfo, XFS_RMAP_OWN_AG);
2310 while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) {
92821e2b
DC
2311 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
2312 if (error)
396503fc 2313 goto out_agbp_relse;
4223f659 2314
f8f2835a
BF
2315 /* defer agfl frees if dfops is provided */
2316 if (tp->t_agfl_dfops) {
2317 xfs_defer_agfl_block(mp, tp->t_agfl_dfops, args->agno,
2318 bno, &targs.oinfo);
2319 } else {
2320 error = xfs_free_agfl_block(tp, args->agno, bno, agbp,
2321 &targs.oinfo);
2322 if (error)
2323 goto out_agbp_relse;
2324 }
1da177e4 2325 }
50adbcb4 2326
1da177e4
LT
2327 targs.tp = tp;
2328 targs.mp = mp;
2329 targs.agbp = agbp;
2330 targs.agno = args->agno;
3fd129b6 2331 targs.alignment = targs.minlen = targs.prod = 1;
1da177e4
LT
2332 targs.type = XFS_ALLOCTYPE_THIS_AG;
2333 targs.pag = pag;
50adbcb4
DC
2334 error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp);
2335 if (error)
396503fc 2336 goto out_agbp_relse;
50adbcb4
DC
2337
2338 /* Make the freelist longer if it's too short. */
2339 while (pag->pagf_flcount < need) {
1da177e4 2340 targs.agbno = 0;
50adbcb4 2341 targs.maxlen = need - pag->pagf_flcount;
0ab32086 2342 targs.resv = XFS_AG_RESV_AGFL;
50adbcb4
DC
2343
2344 /* Allocate as many blocks as possible at once. */
2345 error = xfs_alloc_ag_vextent(&targs);
396503fc
DC
2346 if (error)
2347 goto out_agflbp_relse;
2348
1da177e4
LT
2349 /*
2350 * Stop if we run out. Won't happen if callers are obeying
2351 * the restrictions correctly. Can happen for free calls
2352 * on a completely full ag.
2353 */
d210a28c 2354 if (targs.agbno == NULLAGBLOCK) {
0e1edbd9
NS
2355 if (flags & XFS_ALLOC_FLAG_FREEING)
2356 break;
396503fc 2357 goto out_agflbp_relse;
d210a28c 2358 }
1da177e4
LT
2359 /*
2360 * Put each allocated block on the list.
2361 */
2362 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
92821e2b
DC
2363 error = xfs_alloc_put_freelist(tp, agbp,
2364 agflbp, bno, 0);
2365 if (error)
396503fc 2366 goto out_agflbp_relse;
1da177e4
LT
2367 }
2368 }
e63a3690 2369 xfs_trans_brelse(tp, agflbp);
1da177e4
LT
2370 args->agbp = agbp;
2371 return 0;
396503fc
DC
2372
2373out_agflbp_relse:
2374 xfs_trans_brelse(tp, agflbp);
2375out_agbp_relse:
2376 if (agbp)
2377 xfs_trans_brelse(tp, agbp);
2378out_no_agbp:
2379 args->agbp = NULL;
2380 return error;
1da177e4
LT
2381}
2382
2383/*
2384 * Get a block from the freelist.
2385 * Returns with the buffer for the block gotten.
2386 */
2387int /* error */
2388xfs_alloc_get_freelist(
2389 xfs_trans_t *tp, /* transaction pointer */
2390 xfs_buf_t *agbp, /* buffer containing the agf structure */
92821e2b
DC
2391 xfs_agblock_t *bnop, /* block address retrieved from freelist */
2392 int btreeblk) /* destination is a AGF btree */
1da177e4
LT
2393{
2394 xfs_agf_t *agf; /* a.g. freespace structure */
1da177e4
LT
2395 xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */
2396 xfs_agblock_t bno; /* block number returned */
77c95bba 2397 __be32 *agfl_bno;
1da177e4 2398 int error;
92821e2b 2399 int logflags;
77c95bba 2400 xfs_mount_t *mp = tp->t_mountp;
1da177e4
LT
2401 xfs_perag_t *pag; /* per allocation group data */
2402
1da177e4
LT
2403 /*
2404 * Freelist is empty, give up.
2405 */
77c95bba 2406 agf = XFS_BUF_TO_AGF(agbp);
1da177e4
LT
2407 if (!agf->agf_flcount) {
2408 *bnop = NULLAGBLOCK;
2409 return 0;
2410 }
2411 /*
2412 * Read the array of free blocks.
2413 */
77c95bba
CH
2414 error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno),
2415 &agflbp);
2416 if (error)
1da177e4 2417 return error;
77c95bba
CH
2418
2419
1da177e4
LT
2420 /*
2421 * Get the block number and update the data structures.
2422 */
77c95bba
CH
2423 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2424 bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
413d57c9 2425 be32_add_cpu(&agf->agf_flfirst, 1);
1da177e4 2426 xfs_trans_brelse(tp, agflbp);
a78ee256 2427 if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
1da177e4 2428 agf->agf_flfirst = 0;
a862e0fd
DC
2429
2430 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
a27ba260 2431 ASSERT(!pag->pagf_agflreset);
413d57c9 2432 be32_add_cpu(&agf->agf_flcount, -1);
1da177e4
LT
2433 xfs_trans_agflist_delta(tp, -1);
2434 pag->pagf_flcount--;
a862e0fd 2435 xfs_perag_put(pag);
92821e2b
DC
2436
2437 logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2438 if (btreeblk) {
413d57c9 2439 be32_add_cpu(&agf->agf_btreeblks, 1);
92821e2b
DC
2440 pag->pagf_btreeblks++;
2441 logflags |= XFS_AGF_BTREEBLKS;
2442 }
2443
92821e2b 2444 xfs_alloc_log_agf(tp, agbp, logflags);
1da177e4
LT
2445 *bnop = bno;
2446
1da177e4
LT
2447 return 0;
2448}
2449
2450/*
2451 * Log the given fields from the agf structure.
2452 */
2453void
2454xfs_alloc_log_agf(
2455 xfs_trans_t *tp, /* transaction pointer */
2456 xfs_buf_t *bp, /* buffer for a.g. freelist header */
2457 int fields) /* mask of fields to be logged (XFS_AGF_...) */
2458{
2459 int first; /* first byte offset */
2460 int last; /* last byte offset */
2461 static const short offsets[] = {
2462 offsetof(xfs_agf_t, agf_magicnum),
2463 offsetof(xfs_agf_t, agf_versionnum),
2464 offsetof(xfs_agf_t, agf_seqno),
2465 offsetof(xfs_agf_t, agf_length),
2466 offsetof(xfs_agf_t, agf_roots[0]),
2467 offsetof(xfs_agf_t, agf_levels[0]),
2468 offsetof(xfs_agf_t, agf_flfirst),
2469 offsetof(xfs_agf_t, agf_fllast),
2470 offsetof(xfs_agf_t, agf_flcount),
2471 offsetof(xfs_agf_t, agf_freeblks),
2472 offsetof(xfs_agf_t, agf_longest),
92821e2b 2473 offsetof(xfs_agf_t, agf_btreeblks),
4e0e6040 2474 offsetof(xfs_agf_t, agf_uuid),
f32866fd 2475 offsetof(xfs_agf_t, agf_rmap_blocks),
bdf28630
DW
2476 offsetof(xfs_agf_t, agf_refcount_blocks),
2477 offsetof(xfs_agf_t, agf_refcount_root),
2478 offsetof(xfs_agf_t, agf_refcount_level),
da1f039d
DW
2479 /* needed so that we don't log the whole rest of the structure: */
2480 offsetof(xfs_agf_t, agf_spare64),
1da177e4
LT
2481 sizeof(xfs_agf_t)
2482 };
2483
0b1b213f
CH
2484 trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
2485
61fe135c 2486 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
4e0e6040 2487
1da177e4
LT
2488 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2489 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2490}
2491
2492/*
2493 * Interface for inode allocation to force the pag data to be initialized.
2494 */
2495int /* error */
2496xfs_alloc_pagf_init(
2497 xfs_mount_t *mp, /* file system mount structure */
2498 xfs_trans_t *tp, /* transaction pointer */
2499 xfs_agnumber_t agno, /* allocation group number */
2500 int flags) /* XFS_ALLOC_FLAGS_... */
2501{
2502 xfs_buf_t *bp;
2503 int error;
2504
2505 if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)))
2506 return error;
2507 if (bp)
2508 xfs_trans_brelse(tp, bp);
2509 return 0;
2510}
2511
2512/*
2513 * Put the block on the freelist for the allocation group.
2514 */
2515int /* error */
2516xfs_alloc_put_freelist(
2517 xfs_trans_t *tp, /* transaction pointer */
2518 xfs_buf_t *agbp, /* buffer for a.g. freelist header */
2519 xfs_buf_t *agflbp,/* buffer for a.g. free block array */
92821e2b
DC
2520 xfs_agblock_t bno, /* block being freed */
2521 int btreeblk) /* block came from a AGF btree */
1da177e4
LT
2522{
2523 xfs_agf_t *agf; /* a.g. freespace structure */
e2101005 2524 __be32 *blockp;/* pointer to array entry */
1da177e4 2525 int error;
92821e2b 2526 int logflags;
1da177e4
LT
2527 xfs_mount_t *mp; /* mount structure */
2528 xfs_perag_t *pag; /* per allocation group data */
77c95bba
CH
2529 __be32 *agfl_bno;
2530 int startoff;
1da177e4
LT
2531
2532 agf = XFS_BUF_TO_AGF(agbp);
2533 mp = tp->t_mountp;
2534
2535 if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
16259e7d 2536 be32_to_cpu(agf->agf_seqno), &agflbp)))
1da177e4 2537 return error;
413d57c9 2538 be32_add_cpu(&agf->agf_fllast, 1);
a78ee256 2539 if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp))
1da177e4 2540 agf->agf_fllast = 0;
a862e0fd
DC
2541
2542 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
a27ba260 2543 ASSERT(!pag->pagf_agflreset);
413d57c9 2544 be32_add_cpu(&agf->agf_flcount, 1);
1da177e4
LT
2545 xfs_trans_agflist_delta(tp, 1);
2546 pag->pagf_flcount++;
92821e2b
DC
2547
2548 logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2549 if (btreeblk) {
413d57c9 2550 be32_add_cpu(&agf->agf_btreeblks, -1);
92821e2b
DC
2551 pag->pagf_btreeblks--;
2552 logflags |= XFS_AGF_BTREEBLKS;
2553 }
a862e0fd 2554 xfs_perag_put(pag);
92821e2b 2555
92821e2b
DC
2556 xfs_alloc_log_agf(tp, agbp, logflags);
2557
a78ee256 2558 ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp));
77c95bba
CH
2559
2560 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
2561 blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
e2101005 2562 *blockp = cpu_to_be32(bno);
77c95bba
CH
2563 startoff = (char *)blockp - (char *)agflbp->b_addr;
2564
92821e2b 2565 xfs_alloc_log_agf(tp, agbp, logflags);
77c95bba 2566
61fe135c 2567 xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
77c95bba
CH
2568 xfs_trans_log_buf(tp, agflbp, startoff,
2569 startoff + sizeof(xfs_agblock_t) - 1);
1da177e4
LT
2570 return 0;
2571}
2572
a6a781a5 2573static xfs_failaddr_t
612cfbfe 2574xfs_agf_verify(
b5572597
DW
2575 struct xfs_buf *bp)
2576{
2577 struct xfs_mount *mp = bp->b_target->bt_mount;
2578 struct xfs_agf *agf = XFS_BUF_TO_AGF(bp);
5d5f527d 2579
a45086e2
BF
2580 if (xfs_sb_version_hascrc(&mp->m_sb)) {
2581 if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
a6a781a5 2582 return __this_address;
a45086e2
BF
2583 if (!xfs_log_check_lsn(mp,
2584 be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn)))
a6a781a5 2585 return __this_address;
a45086e2 2586 }
5d5f527d 2587
4e0e6040
DC
2588 if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
2589 XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
2590 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
a78ee256
DC
2591 be32_to_cpu(agf->agf_flfirst) < xfs_agfl_size(mp) &&
2592 be32_to_cpu(agf->agf_fllast) < xfs_agfl_size(mp) &&
2593 be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp)))
a6a781a5 2594 return __this_address;
5d5f527d 2595
d2a047f3
DW
2596 if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
2597 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
2598 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
e1b05723 2599 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS)
a6a781a5 2600 return __this_address;
e1b05723 2601
b8704944 2602 if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
d2a047f3
DW
2603 (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
2604 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS))
a6a781a5 2605 return __this_address;
b8704944 2606
5d5f527d
DC
2607 /*
2608 * during growfs operations, the perag is not fully initialised,
2609 * so we can't use it for any useful checking. growfs ensures we can't
2610 * use it by using uncached buffers that don't have the perag attached
2611 * so we can detect and avoid this problem.
2612 */
4e0e6040 2613 if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno)
a6a781a5 2614 return __this_address;
5d5f527d 2615
4e0e6040
DC
2616 if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
2617 be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
a6a781a5 2618 return __this_address;
4e0e6040 2619
46eeb521 2620 if (xfs_sb_version_hasreflink(&mp->m_sb) &&
d2a047f3
DW
2621 (be32_to_cpu(agf->agf_refcount_level) < 1 ||
2622 be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS))
a6a781a5 2623 return __this_address;
46eeb521 2624
a6a781a5 2625 return NULL;
5d5f527d 2626
612cfbfe
DC
2627}
2628
1813dd64
DC
2629static void
2630xfs_agf_read_verify(
612cfbfe
DC
2631 struct xfs_buf *bp)
2632{
4e0e6040 2633 struct xfs_mount *mp = bp->b_target->bt_mount;
bc1a09b8 2634 xfs_failaddr_t fa;
4e0e6040 2635
ce5028cf
ES
2636 if (xfs_sb_version_hascrc(&mp->m_sb) &&
2637 !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
bc1a09b8
DW
2638 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
2639 else {
b5572597 2640 fa = xfs_agf_verify(bp);
bc1a09b8
DW
2641 if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF))
2642 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
2643 }
612cfbfe 2644}
5d5f527d 2645
b0f539de 2646static void
1813dd64 2647xfs_agf_write_verify(
612cfbfe
DC
2648 struct xfs_buf *bp)
2649{
fb1755a6
CM
2650 struct xfs_mount *mp = bp->b_target->bt_mount;
2651 struct xfs_buf_log_item *bip = bp->b_log_item;
bc1a09b8 2652 xfs_failaddr_t fa;
4e0e6040 2653
b5572597 2654 fa = xfs_agf_verify(bp);
bc1a09b8
DW
2655 if (fa) {
2656 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
4e0e6040
DC
2657 return;
2658 }
2659
2660 if (!xfs_sb_version_hascrc(&mp->m_sb))
2661 return;
2662
2663 if (bip)
2664 XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
2665
f1dbcd7e 2666 xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
5d5f527d
DC
2667}
2668
1813dd64 2669const struct xfs_buf_ops xfs_agf_buf_ops = {
233135b7 2670 .name = "xfs_agf",
1813dd64
DC
2671 .verify_read = xfs_agf_read_verify,
2672 .verify_write = xfs_agf_write_verify,
b5572597 2673 .verify_struct = xfs_agf_verify,
1813dd64
DC
2674};
2675
1da177e4
LT
2676/*
2677 * Read in the allocation group header (free/alloc section).
2678 */
2679int /* error */
4805621a
CH
2680xfs_read_agf(
2681 struct xfs_mount *mp, /* mount point structure */
2682 struct xfs_trans *tp, /* transaction pointer */
2683 xfs_agnumber_t agno, /* allocation group number */
2684 int flags, /* XFS_BUF_ */
2685 struct xfs_buf **bpp) /* buffer for the ag freelist header */
1da177e4 2686{
1da177e4
LT
2687 int error;
2688
d123031a
DC
2689 trace_xfs_read_agf(mp, agno);
2690
1da177e4
LT
2691 ASSERT(agno != NULLAGNUMBER);
2692 error = xfs_trans_read_buf(
2693 mp, tp, mp->m_ddev_targp,
2694 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
1813dd64 2695 XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
1da177e4
LT
2696 if (error)
2697 return error;
4805621a 2698 if (!*bpp)
1da177e4 2699 return 0;
4805621a 2700
5a52c2a5 2701 ASSERT(!(*bpp)->b_error);
38f23232 2702 xfs_buf_set_ref(*bpp, XFS_AGF_REF);
4805621a
CH
2703 return 0;
2704}
2705
2706/*
2707 * Read in the allocation group header (free/alloc section).
2708 */
2709int /* error */
2710xfs_alloc_read_agf(
2711 struct xfs_mount *mp, /* mount point structure */
2712 struct xfs_trans *tp, /* transaction pointer */
2713 xfs_agnumber_t agno, /* allocation group number */
2714 int flags, /* XFS_ALLOC_FLAG_... */
2715 struct xfs_buf **bpp) /* buffer for the ag freelist header */
2716{
2717 struct xfs_agf *agf; /* ag freelist header */
2718 struct xfs_perag *pag; /* per allocation group data */
2719 int error;
2720
d123031a 2721 trace_xfs_alloc_read_agf(mp, agno);
4805621a 2722
d123031a 2723 ASSERT(agno != NULLAGNUMBER);
4805621a 2724 error = xfs_read_agf(mp, tp, agno,
0cadda1c 2725 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
4805621a
CH
2726 bpp);
2727 if (error)
2728 return error;
2729 if (!*bpp)
2730 return 0;
5a52c2a5 2731 ASSERT(!(*bpp)->b_error);
4805621a
CH
2732
2733 agf = XFS_BUF_TO_AGF(*bpp);
a862e0fd 2734 pag = xfs_perag_get(mp, agno);
1da177e4 2735 if (!pag->pagf_init) {
16259e7d 2736 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
92821e2b 2737 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
16259e7d
CH
2738 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
2739 pag->pagf_longest = be32_to_cpu(agf->agf_longest);
1da177e4 2740 pag->pagf_levels[XFS_BTNUM_BNOi] =
16259e7d 2741 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
1da177e4 2742 pag->pagf_levels[XFS_BTNUM_CNTi] =
16259e7d 2743 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
b8704944
DW
2744 pag->pagf_levels[XFS_BTNUM_RMAPi] =
2745 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
46eeb521 2746 pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
007c61c6 2747 spin_lock_init(&pag->pagb_lock);
e57336ff 2748 pag->pagb_count = 0;
ed3b4d6c 2749 pag->pagb_tree = RB_ROOT;
1da177e4 2750 pag->pagf_init = 1;
a27ba260 2751 pag->pagf_agflreset = xfs_agfl_needs_reset(mp, agf);
1da177e4
LT
2752 }
2753#ifdef DEBUG
2754 else if (!XFS_FORCED_SHUTDOWN(mp)) {
16259e7d 2755 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
89b28393 2756 ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
16259e7d
CH
2757 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
2758 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
1da177e4 2759 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
16259e7d 2760 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
1da177e4 2761 ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
16259e7d 2762 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
1da177e4
LT
2763 }
2764#endif
a862e0fd 2765 xfs_perag_put(pag);
1da177e4
LT
2766 return 0;
2767}
2768
2769/*
2770 * Allocate an extent (variable-size).
2771 * Depending on the allocation type, we either look in a single allocation
2772 * group or loop over the allocation groups to find the result.
2773 */
2774int /* error */
e04426b9 2775xfs_alloc_vextent(
1da177e4
LT
2776 xfs_alloc_arg_t *args) /* allocation argument structure */
2777{
2778 xfs_agblock_t agsize; /* allocation group size */
2779 int error;
2780 int flags; /* XFS_ALLOC_FLAG_... locking flags */
1da177e4
LT
2781 xfs_mount_t *mp; /* mount structure pointer */
2782 xfs_agnumber_t sagno; /* starting allocation group number */
2783 xfs_alloctype_t type; /* input allocation type */
2784 int bump_rotor = 0;
1da177e4
LT
2785 xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */
2786
2787 mp = args->mp;
2788 type = args->otype = args->type;
2789 args->agbno = NULLAGBLOCK;
2790 /*
2791 * Just fix this up, for the case where the last a.g. is shorter
2792 * (or there's only one a.g.) and the caller couldn't easily figure
2793 * that out (xfs_bmap_alloc).
2794 */
2795 agsize = mp->m_sb.sb_agblocks;
2796 if (args->maxlen > agsize)
2797 args->maxlen = agsize;
2798 if (args->alignment == 0)
2799 args->alignment = 1;
2800 ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
2801 ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
2802 ASSERT(args->minlen <= args->maxlen);
2803 ASSERT(args->minlen <= agsize);
2804 ASSERT(args->mod < args->prod);
2805 if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
2806 XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
2807 args->minlen > args->maxlen || args->minlen > agsize ||
2808 args->mod >= args->prod) {
2809 args->fsbno = NULLFSBLOCK;
0b1b213f 2810 trace_xfs_alloc_vextent_badargs(args);
1da177e4
LT
2811 return 0;
2812 }
1da177e4
LT
2813
2814 switch (type) {
2815 case XFS_ALLOCTYPE_THIS_AG:
2816 case XFS_ALLOCTYPE_NEAR_BNO:
2817 case XFS_ALLOCTYPE_THIS_BNO:
2818 /*
2819 * These three force us into a single a.g.
2820 */
2821 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
a862e0fd 2822 args->pag = xfs_perag_get(mp, args->agno);
1da177e4 2823 error = xfs_alloc_fix_freelist(args, 0);
1da177e4 2824 if (error) {
0b1b213f 2825 trace_xfs_alloc_vextent_nofix(args);
1da177e4
LT
2826 goto error0;
2827 }
2828 if (!args->agbp) {
0b1b213f 2829 trace_xfs_alloc_vextent_noagbp(args);
1da177e4
LT
2830 break;
2831 }
2832 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2833 if ((error = xfs_alloc_ag_vextent(args)))
2834 goto error0;
1da177e4
LT
2835 break;
2836 case XFS_ALLOCTYPE_START_BNO:
2837 /*
2838 * Try near allocation first, then anywhere-in-ag after
2839 * the first a.g. fails.
2840 */
292378ed 2841 if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
1da177e4
LT
2842 (mp->m_flags & XFS_MOUNT_32BITINODES)) {
2843 args->fsbno = XFS_AGB_TO_FSB(mp,
2844 ((mp->m_agfrotor / rotorstep) %
2845 mp->m_sb.sb_agcount), 0);
2846 bump_rotor = 1;
2847 }
2848 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2849 args->type = XFS_ALLOCTYPE_NEAR_BNO;
2850 /* FALLTHROUGH */
1da177e4
LT
2851 case XFS_ALLOCTYPE_FIRST_AG:
2852 /*
2853 * Rotate through the allocation groups looking for a winner.
2854 */
8d242e93 2855 if (type == XFS_ALLOCTYPE_FIRST_AG) {
1da177e4
LT
2856 /*
2857 * Start with allocation group given by bno.
2858 */
2859 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2860 args->type = XFS_ALLOCTYPE_THIS_AG;
2861 sagno = 0;
2862 flags = 0;
2863 } else {
1da177e4
LT
2864 /*
2865 * Start with the given allocation group.
2866 */
2867 args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2868 flags = XFS_ALLOC_FLAG_TRYLOCK;
2869 }
2870 /*
2871 * Loop over allocation groups twice; first time with
2872 * trylock set, second time without.
2873 */
1da177e4 2874 for (;;) {
a862e0fd 2875 args->pag = xfs_perag_get(mp, args->agno);
1da177e4 2876 error = xfs_alloc_fix_freelist(args, flags);
1da177e4 2877 if (error) {
0b1b213f 2878 trace_xfs_alloc_vextent_nofix(args);
1da177e4
LT
2879 goto error0;
2880 }
2881 /*
2882 * If we get a buffer back then the allocation will fly.
2883 */
2884 if (args->agbp) {
2885 if ((error = xfs_alloc_ag_vextent(args)))
2886 goto error0;
2887 break;
2888 }
0b1b213f
CH
2889
2890 trace_xfs_alloc_vextent_loopfailed(args);
2891
1da177e4
LT
2892 /*
2893 * Didn't work, figure out the next iteration.
2894 */
2895 if (args->agno == sagno &&
2896 type == XFS_ALLOCTYPE_START_BNO)
2897 args->type = XFS_ALLOCTYPE_THIS_AG;
d210a28c
YL
2898 /*
2899 * For the first allocation, we can try any AG to get
2900 * space. However, if we already have allocated a
2901 * block, we don't want to try AGs whose number is below
2902 * sagno. Otherwise, we may end up with out-of-order
2903 * locking of AGF, which might cause deadlock.
2904 */
2905 if (++(args->agno) == mp->m_sb.sb_agcount) {
2906 if (args->firstblock != NULLFSBLOCK)
2907 args->agno = sagno;
2908 else
2909 args->agno = 0;
2910 }
1da177e4
LT
2911 /*
2912 * Reached the starting a.g., must either be done
2913 * or switch to non-trylock mode.
2914 */
2915 if (args->agno == sagno) {
255c5162 2916 if (flags == 0) {
1da177e4 2917 args->agbno = NULLAGBLOCK;
0b1b213f 2918 trace_xfs_alloc_vextent_allfailed(args);
1da177e4
LT
2919 break;
2920 }
255c5162
CH
2921
2922 flags = 0;
2923 if (type == XFS_ALLOCTYPE_START_BNO) {
2924 args->agbno = XFS_FSB_TO_AGBNO(mp,
2925 args->fsbno);
2926 args->type = XFS_ALLOCTYPE_NEAR_BNO;
1da177e4
LT
2927 }
2928 }
a862e0fd 2929 xfs_perag_put(args->pag);
1da177e4 2930 }
8d242e93 2931 if (bump_rotor) {
1da177e4
LT
2932 if (args->agno == sagno)
2933 mp->m_agfrotor = (mp->m_agfrotor + 1) %
2934 (mp->m_sb.sb_agcount * rotorstep);
2935 else
2936 mp->m_agfrotor = (args->agno * rotorstep + 1) %
2937 (mp->m_sb.sb_agcount * rotorstep);
2938 }
2939 break;
2940 default:
2941 ASSERT(0);
2942 /* NOTREACHED */
2943 }
2944 if (args->agbno == NULLAGBLOCK)
2945 args->fsbno = NULLFSBLOCK;
2946 else {
2947 args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
2948#ifdef DEBUG
2949 ASSERT(args->len >= args->minlen);
2950 ASSERT(args->len <= args->maxlen);
2951 ASSERT(args->agbno % args->alignment == 0);
2952 XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
2953 args->len);
2954#endif
3fbbbea3
DC
2955
2956 /* Zero the extent if we were asked to do so */
292378ed 2957 if (args->datatype & XFS_ALLOC_USERDATA_ZERO) {
3fbbbea3
DC
2958 error = xfs_zero_extent(args->ip, args->fsbno, args->len);
2959 if (error)
2960 goto error0;
2961 }
2962
1da177e4 2963 }
a862e0fd 2964 xfs_perag_put(args->pag);
1da177e4
LT
2965 return 0;
2966error0:
a862e0fd 2967 xfs_perag_put(args->pag);
1da177e4
LT
2968 return error;
2969}
2970
4d89e20b
DC
2971/* Ensure that the freelist is at full capacity. */
2972int
2973xfs_free_extent_fix_freelist(
2974 struct xfs_trans *tp,
2975 xfs_agnumber_t agno,
2976 struct xfs_buf **agbp)
1da177e4 2977{
4d89e20b
DC
2978 struct xfs_alloc_arg args;
2979 int error;
1da177e4 2980
4d89e20b 2981 memset(&args, 0, sizeof(struct xfs_alloc_arg));
1da177e4
LT
2982 args.tp = tp;
2983 args.mp = tp->t_mountp;
4d89e20b 2984 args.agno = agno;
be65b18a
DC
2985
2986 /*
2987 * validate that the block number is legal - the enables us to detect
2988 * and handle a silent filesystem corruption rather than crashing.
2989 */
be65b18a 2990 if (args.agno >= args.mp->m_sb.sb_agcount)
2451337d 2991 return -EFSCORRUPTED;
be65b18a 2992
a862e0fd 2993 args.pag = xfs_perag_get(args.mp, args.agno);
be65b18a
DC
2994 ASSERT(args.pag);
2995
2996 error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
2997 if (error)
4d89e20b
DC
2998 goto out;
2999
3000 *agbp = args.agbp;
3001out:
3002 xfs_perag_put(args.pag);
3003 return error;
3004}
3005
3006/*
3007 * Free an extent.
3008 * Just break up the extent address and hand off to xfs_free_ag_extent
3009 * after fixing up the freelist.
3010 */
3011int /* error */
3012xfs_free_extent(
3013 struct xfs_trans *tp, /* transaction pointer */
3014 xfs_fsblock_t bno, /* starting block number of extent */
340785cc 3015 xfs_extlen_t len, /* length of extent */
3fd129b6
DW
3016 struct xfs_owner_info *oinfo, /* extent owner */
3017 enum xfs_ag_resv_type type) /* block reservation type */
4d89e20b
DC
3018{
3019 struct xfs_mount *mp = tp->t_mountp;
3020 struct xfs_buf *agbp;
3021 xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, bno);
3022 xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, bno);
3023 int error;
3024
3025 ASSERT(len != 0);
0ab32086 3026 ASSERT(type != XFS_AG_RESV_AGFL);
4d89e20b 3027
ba9e7802 3028 if (XFS_TEST_ERROR(false, mp,
9e24cfd0 3029 XFS_ERRTAG_FREE_EXTENT))
ba9e7802
DW
3030 return -EIO;
3031
4d89e20b
DC
3032 error = xfs_free_extent_fix_freelist(tp, agno, &agbp);
3033 if (error)
3034 return error;
3035
3036 XFS_WANT_CORRUPTED_GOTO(mp, agbno < mp->m_sb.sb_agblocks, err);
be65b18a
DC
3037
3038 /* validate the extent size is legal now we have the agf locked */
4d89e20b
DC
3039 XFS_WANT_CORRUPTED_GOTO(mp,
3040 agbno + len <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length),
3041 err);
be65b18a 3042
3fd129b6 3043 error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type);
4d89e20b
DC
3044 if (error)
3045 goto err;
3046
3047 xfs_extent_busy_insert(tp, agno, agbno, len, 0);
3048 return 0;
3049
3050err:
3051 xfs_trans_brelse(tp, agbp);
1da177e4
LT
3052 return error;
3053}
2d520bfa
DW
3054
3055struct xfs_alloc_query_range_info {
3056 xfs_alloc_query_range_fn fn;
3057 void *priv;
3058};
3059
3060/* Format btree record and pass to our callback. */
3061STATIC int
3062xfs_alloc_query_range_helper(
3063 struct xfs_btree_cur *cur,
3064 union xfs_btree_rec *rec,
3065 void *priv)
3066{
3067 struct xfs_alloc_query_range_info *query = priv;
3068 struct xfs_alloc_rec_incore irec;
3069
3070 irec.ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
3071 irec.ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
3072 return query->fn(cur, &irec, query->priv);
3073}
3074
3075/* Find all free space within a given range of blocks. */
3076int
3077xfs_alloc_query_range(
3078 struct xfs_btree_cur *cur,
3079 struct xfs_alloc_rec_incore *low_rec,
3080 struct xfs_alloc_rec_incore *high_rec,
3081 xfs_alloc_query_range_fn fn,
3082 void *priv)
3083{
3084 union xfs_btree_irec low_brec;
3085 union xfs_btree_irec high_brec;
3086 struct xfs_alloc_query_range_info query;
3087
3088 ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
3089 low_brec.a = *low_rec;
3090 high_brec.a = *high_rec;
3091 query.priv = priv;
3092 query.fn = fn;
3093 return xfs_btree_query_range(cur, &low_brec, &high_brec,
3094 xfs_alloc_query_range_helper, &query);
3095}
e9a2599a
DW
3096
3097/* Find all free space records. */
3098int
3099xfs_alloc_query_all(
3100 struct xfs_btree_cur *cur,
3101 xfs_alloc_query_range_fn fn,
3102 void *priv)
3103{
3104 struct xfs_alloc_query_range_info query;
3105
3106 ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
3107 query.priv = priv;
3108 query.fn = fn;
3109 return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
3110}
21ec5416
DW
3111
3112/* Find the size of the AG, in blocks. */
3113xfs_agblock_t
3114xfs_ag_block_count(
3115 struct xfs_mount *mp,
3116 xfs_agnumber_t agno)
3117{
3118 ASSERT(agno < mp->m_sb.sb_agcount);
3119
3120 if (agno < mp->m_sb.sb_agcount - 1)
3121 return mp->m_sb.sb_agblocks;
3122 return mp->m_sb.sb_dblocks - (agno * mp->m_sb.sb_agblocks);
3123}
3124
3125/*
3126 * Verify that an AG block number pointer neither points outside the AG
3127 * nor points at static metadata.
3128 */
3129bool
3130xfs_verify_agbno(
3131 struct xfs_mount *mp,
3132 xfs_agnumber_t agno,
3133 xfs_agblock_t agbno)
3134{
3135 xfs_agblock_t eoag;
3136
3137 eoag = xfs_ag_block_count(mp, agno);
3138 if (agbno >= eoag)
3139 return false;
3140 if (agbno <= XFS_AGFL_BLOCK(mp))
3141 return false;
3142 return true;
3143}
3144
3145/*
3146 * Verify that an FS block number pointer neither points outside the
3147 * filesystem nor points at static AG metadata.
3148 */
3149bool
3150xfs_verify_fsbno(
3151 struct xfs_mount *mp,
3152 xfs_fsblock_t fsbno)
3153{
3154 xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, fsbno);
3155
3156 if (agno >= mp->m_sb.sb_agcount)
3157 return false;
3158 return xfs_verify_agbno(mp, agno, XFS_FSB_TO_AGBNO(mp, fsbno));
3159}
ce1d802e
DW
3160
3161/* Is there a record covering a given extent? */
3162int
3163xfs_alloc_has_record(
3164 struct xfs_btree_cur *cur,
3165 xfs_agblock_t bno,
3166 xfs_extlen_t len,
3167 bool *exists)
3168{
3169 union xfs_btree_irec low;
3170 union xfs_btree_irec high;
3171
3172 memset(&low, 0, sizeof(low));
3173 low.a.ar_startblock = bno;
3174 memset(&high, 0xFF, sizeof(high));
3175 high.a.ar_startblock = bno + len - 1;
3176
3177 return xfs_btree_has_record(cur, &low, &high, exists);
3178}