]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - libxfs/xfs_bmap_btree.c
xfs: try any AG when allocating the first btree block when reflinking
[thirdparty/xfsprogs-dev.git] / libxfs / xfs_bmap_btree.c
1 /*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18 #include "libxfs_priv.h"
19 #include "xfs_fs.h"
20 #include "xfs_shared.h"
21 #include "xfs_format.h"
22 #include "xfs_log_format.h"
23 #include "xfs_trans_resv.h"
24 #include "xfs_bit.h"
25 #include "xfs_mount.h"
26 #include "xfs_defer.h"
27 #include "xfs_inode.h"
28 #include "xfs_trans.h"
29 #include "xfs_alloc.h"
30 #include "xfs_btree.h"
31 #include "xfs_bmap_btree.h"
32 #include "xfs_bmap.h"
33 #include "xfs_trace.h"
34 #include "xfs_cksum.h"
35 #include "xfs_rmap.h"
36
37 /*
38 * Determine the extent state.
39 */
40 /* ARGSUSED */
41 STATIC xfs_exntst_t
42 xfs_extent_state(
43 xfs_filblks_t blks,
44 int extent_flag)
45 {
46 if (extent_flag) {
47 ASSERT(blks != 0); /* saved for DMIG */
48 return XFS_EXT_UNWRITTEN;
49 }
50 return XFS_EXT_NORM;
51 }
52
53 /*
54 * Convert on-disk form of btree root to in-memory form.
55 */
56 void
57 xfs_bmdr_to_bmbt(
58 struct xfs_inode *ip,
59 xfs_bmdr_block_t *dblock,
60 int dblocklen,
61 struct xfs_btree_block *rblock,
62 int rblocklen)
63 {
64 struct xfs_mount *mp = ip->i_mount;
65 int dmxr;
66 xfs_bmbt_key_t *fkp;
67 __be64 *fpp;
68 xfs_bmbt_key_t *tkp;
69 __be64 *tpp;
70
71 xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL,
72 XFS_BTNUM_BMAP, 0, 0, ip->i_ino,
73 XFS_BTREE_LONG_PTRS);
74 rblock->bb_level = dblock->bb_level;
75 ASSERT(be16_to_cpu(rblock->bb_level) > 0);
76 rblock->bb_numrecs = dblock->bb_numrecs;
77 dmxr = xfs_bmdr_maxrecs(dblocklen, 0);
78 fkp = XFS_BMDR_KEY_ADDR(dblock, 1);
79 tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);
80 fpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr);
81 tpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen);
82 dmxr = be16_to_cpu(dblock->bb_numrecs);
83 memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
84 memcpy(tpp, fpp, sizeof(*fpp) * dmxr);
85 }
86
87 /*
88 * Convert a compressed bmap extent record to an uncompressed form.
89 * This code must be in sync with the routines xfs_bmbt_get_startoff,
90 * xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state.
91 */
92 STATIC void
93 __xfs_bmbt_get_all(
94 __uint64_t l0,
95 __uint64_t l1,
96 xfs_bmbt_irec_t *s)
97 {
98 int ext_flag;
99 xfs_exntst_t st;
100
101 ext_flag = (int)(l0 >> (64 - BMBT_EXNTFLAG_BITLEN));
102 s->br_startoff = ((xfs_fileoff_t)l0 &
103 xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
104 s->br_startblock = (((xfs_fsblock_t)l0 & xfs_mask64lo(9)) << 43) |
105 (((xfs_fsblock_t)l1) >> 21);
106 s->br_blockcount = (xfs_filblks_t)(l1 & xfs_mask64lo(21));
107 /* This is xfs_extent_state() in-line */
108 if (ext_flag) {
109 ASSERT(s->br_blockcount != 0); /* saved for DMIG */
110 st = XFS_EXT_UNWRITTEN;
111 } else
112 st = XFS_EXT_NORM;
113 s->br_state = st;
114 }
115
116 void
117 xfs_bmbt_get_all(
118 xfs_bmbt_rec_host_t *r,
119 xfs_bmbt_irec_t *s)
120 {
121 __xfs_bmbt_get_all(r->l0, r->l1, s);
122 }
123
124 /*
125 * Extract the blockcount field from an in memory bmap extent record.
126 */
127 xfs_filblks_t
128 xfs_bmbt_get_blockcount(
129 xfs_bmbt_rec_host_t *r)
130 {
131 return (xfs_filblks_t)(r->l1 & xfs_mask64lo(21));
132 }
133
134 /*
135 * Extract the startblock field from an in memory bmap extent record.
136 */
137 xfs_fsblock_t
138 xfs_bmbt_get_startblock(
139 xfs_bmbt_rec_host_t *r)
140 {
141 return (((xfs_fsblock_t)r->l0 & xfs_mask64lo(9)) << 43) |
142 (((xfs_fsblock_t)r->l1) >> 21);
143 }
144
145 /*
146 * Extract the startoff field from an in memory bmap extent record.
147 */
148 xfs_fileoff_t
149 xfs_bmbt_get_startoff(
150 xfs_bmbt_rec_host_t *r)
151 {
152 return ((xfs_fileoff_t)r->l0 &
153 xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
154 }
155
156 xfs_exntst_t
157 xfs_bmbt_get_state(
158 xfs_bmbt_rec_host_t *r)
159 {
160 int ext_flag;
161
162 ext_flag = (int)((r->l0) >> (64 - BMBT_EXNTFLAG_BITLEN));
163 return xfs_extent_state(xfs_bmbt_get_blockcount(r),
164 ext_flag);
165 }
166
167 /*
168 * Extract the blockcount field from an on disk bmap extent record.
169 */
170 xfs_filblks_t
171 xfs_bmbt_disk_get_blockcount(
172 xfs_bmbt_rec_t *r)
173 {
174 return (xfs_filblks_t)(be64_to_cpu(r->l1) & xfs_mask64lo(21));
175 }
176
177 /*
178 * Extract the startoff field from a disk format bmap extent record.
179 */
180 xfs_fileoff_t
181 xfs_bmbt_disk_get_startoff(
182 xfs_bmbt_rec_t *r)
183 {
184 return ((xfs_fileoff_t)be64_to_cpu(r->l0) &
185 xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
186 }
187
188
189 /*
190 * Set all the fields in a bmap extent record from the arguments.
191 */
192 void
193 xfs_bmbt_set_allf(
194 xfs_bmbt_rec_host_t *r,
195 xfs_fileoff_t startoff,
196 xfs_fsblock_t startblock,
197 xfs_filblks_t blockcount,
198 xfs_exntst_t state)
199 {
200 int extent_flag = (state == XFS_EXT_NORM) ? 0 : 1;
201
202 ASSERT(state == XFS_EXT_NORM || state == XFS_EXT_UNWRITTEN);
203 ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0);
204 ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0);
205
206 ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0);
207
208 r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) |
209 ((xfs_bmbt_rec_base_t)startoff << 9) |
210 ((xfs_bmbt_rec_base_t)startblock >> 43);
211 r->l1 = ((xfs_bmbt_rec_base_t)startblock << 21) |
212 ((xfs_bmbt_rec_base_t)blockcount &
213 (xfs_bmbt_rec_base_t)xfs_mask64lo(21));
214 }
215
216 /*
217 * Set all the fields in a bmap extent record from the uncompressed form.
218 */
219 void
220 xfs_bmbt_set_all(
221 xfs_bmbt_rec_host_t *r,
222 xfs_bmbt_irec_t *s)
223 {
224 xfs_bmbt_set_allf(r, s->br_startoff, s->br_startblock,
225 s->br_blockcount, s->br_state);
226 }
227
228
229 /*
230 * Set all the fields in a disk format bmap extent record from the arguments.
231 */
232 void
233 xfs_bmbt_disk_set_allf(
234 xfs_bmbt_rec_t *r,
235 xfs_fileoff_t startoff,
236 xfs_fsblock_t startblock,
237 xfs_filblks_t blockcount,
238 xfs_exntst_t state)
239 {
240 int extent_flag = (state == XFS_EXT_NORM) ? 0 : 1;
241
242 ASSERT(state == XFS_EXT_NORM || state == XFS_EXT_UNWRITTEN);
243 ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0);
244 ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0);
245 ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0);
246
247 r->l0 = cpu_to_be64(
248 ((xfs_bmbt_rec_base_t)extent_flag << 63) |
249 ((xfs_bmbt_rec_base_t)startoff << 9) |
250 ((xfs_bmbt_rec_base_t)startblock >> 43));
251 r->l1 = cpu_to_be64(
252 ((xfs_bmbt_rec_base_t)startblock << 21) |
253 ((xfs_bmbt_rec_base_t)blockcount &
254 (xfs_bmbt_rec_base_t)xfs_mask64lo(21)));
255 }
256
257 /*
258 * Set all the fields in a bmap extent record from the uncompressed form.
259 */
260 STATIC void
261 xfs_bmbt_disk_set_all(
262 xfs_bmbt_rec_t *r,
263 xfs_bmbt_irec_t *s)
264 {
265 xfs_bmbt_disk_set_allf(r, s->br_startoff, s->br_startblock,
266 s->br_blockcount, s->br_state);
267 }
268
269 /*
270 * Set the blockcount field in a bmap extent record.
271 */
272 void
273 xfs_bmbt_set_blockcount(
274 xfs_bmbt_rec_host_t *r,
275 xfs_filblks_t v)
276 {
277 ASSERT((v & xfs_mask64hi(43)) == 0);
278 r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64hi(43)) |
279 (xfs_bmbt_rec_base_t)(v & xfs_mask64lo(21));
280 }
281
282 /*
283 * Set the startblock field in a bmap extent record.
284 */
285 void
286 xfs_bmbt_set_startblock(
287 xfs_bmbt_rec_host_t *r,
288 xfs_fsblock_t v)
289 {
290 ASSERT((v & xfs_mask64hi(12)) == 0);
291 r->l0 = (r->l0 & (xfs_bmbt_rec_base_t)xfs_mask64hi(55)) |
292 (xfs_bmbt_rec_base_t)(v >> 43);
293 r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21)) |
294 (xfs_bmbt_rec_base_t)(v << 21);
295 }
296
297 /*
298 * Set the startoff field in a bmap extent record.
299 */
300 void
301 xfs_bmbt_set_startoff(
302 xfs_bmbt_rec_host_t *r,
303 xfs_fileoff_t v)
304 {
305 ASSERT((v & xfs_mask64hi(9)) == 0);
306 r->l0 = (r->l0 & (xfs_bmbt_rec_base_t) xfs_mask64hi(1)) |
307 ((xfs_bmbt_rec_base_t)v << 9) |
308 (r->l0 & (xfs_bmbt_rec_base_t)xfs_mask64lo(9));
309 }
310
311 /*
312 * Set the extent state field in a bmap extent record.
313 */
314 void
315 xfs_bmbt_set_state(
316 xfs_bmbt_rec_host_t *r,
317 xfs_exntst_t v)
318 {
319 ASSERT(v == XFS_EXT_NORM || v == XFS_EXT_UNWRITTEN);
320 if (v == XFS_EXT_NORM)
321 r->l0 &= xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN);
322 else
323 r->l0 |= xfs_mask64hi(BMBT_EXNTFLAG_BITLEN);
324 }
325
326 /*
327 * Convert in-memory form of btree root to on-disk form.
328 */
329 void
330 xfs_bmbt_to_bmdr(
331 struct xfs_mount *mp,
332 struct xfs_btree_block *rblock,
333 int rblocklen,
334 xfs_bmdr_block_t *dblock,
335 int dblocklen)
336 {
337 int dmxr;
338 xfs_bmbt_key_t *fkp;
339 __be64 *fpp;
340 xfs_bmbt_key_t *tkp;
341 __be64 *tpp;
342
343 if (xfs_sb_version_hascrc(&mp->m_sb)) {
344 ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_CRC_MAGIC));
345 ASSERT(uuid_equal(&rblock->bb_u.l.bb_uuid,
346 &mp->m_sb.sb_meta_uuid));
347 ASSERT(rblock->bb_u.l.bb_blkno ==
348 cpu_to_be64(XFS_BUF_DADDR_NULL));
349 } else
350 ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC));
351 ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK));
352 ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK));
353 ASSERT(rblock->bb_level != 0);
354 dblock->bb_level = rblock->bb_level;
355 dblock->bb_numrecs = rblock->bb_numrecs;
356 dmxr = xfs_bmdr_maxrecs(dblocklen, 0);
357 fkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);
358 tkp = XFS_BMDR_KEY_ADDR(dblock, 1);
359 fpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen);
360 tpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr);
361 dmxr = be16_to_cpu(dblock->bb_numrecs);
362 memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
363 memcpy(tpp, fpp, sizeof(*fpp) * dmxr);
364 }
365
366 /*
367 * Check extent records, which have just been read, for
368 * any bit in the extent flag field. ASSERT on debug
369 * kernels, as this condition should not occur.
370 * Return an error condition (1) if any flags found,
371 * otherwise return 0.
372 */
373
374 int
375 xfs_check_nostate_extents(
376 xfs_ifork_t *ifp,
377 xfs_extnum_t idx,
378 xfs_extnum_t num)
379 {
380 for (; num > 0; num--, idx++) {
381 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx);
382 if ((ep->l0 >>
383 (64 - BMBT_EXNTFLAG_BITLEN)) != 0) {
384 ASSERT(0);
385 return 1;
386 }
387 }
388 return 0;
389 }
390
391
392 STATIC struct xfs_btree_cur *
393 xfs_bmbt_dup_cursor(
394 struct xfs_btree_cur *cur)
395 {
396 struct xfs_btree_cur *new;
397
398 new = xfs_bmbt_init_cursor(cur->bc_mp, cur->bc_tp,
399 cur->bc_private.b.ip, cur->bc_private.b.whichfork);
400
401 /*
402 * Copy the firstblock, dfops, and flags values,
403 * since init cursor doesn't get them.
404 */
405 new->bc_private.b.firstblock = cur->bc_private.b.firstblock;
406 new->bc_private.b.dfops = cur->bc_private.b.dfops;
407 new->bc_private.b.flags = cur->bc_private.b.flags;
408
409 return new;
410 }
411
412 STATIC void
413 xfs_bmbt_update_cursor(
414 struct xfs_btree_cur *src,
415 struct xfs_btree_cur *dst)
416 {
417 ASSERT((dst->bc_private.b.firstblock != NULLFSBLOCK) ||
418 (dst->bc_private.b.ip->i_d.di_flags & XFS_DIFLAG_REALTIME));
419 ASSERT(dst->bc_private.b.dfops == src->bc_private.b.dfops);
420
421 dst->bc_private.b.allocated += src->bc_private.b.allocated;
422 dst->bc_private.b.firstblock = src->bc_private.b.firstblock;
423
424 src->bc_private.b.allocated = 0;
425 }
426
427 STATIC int
428 xfs_bmbt_alloc_block(
429 struct xfs_btree_cur *cur,
430 union xfs_btree_ptr *start,
431 union xfs_btree_ptr *new,
432 int *stat)
433 {
434 xfs_alloc_arg_t args; /* block allocation args */
435 int error; /* error return value */
436
437 memset(&args, 0, sizeof(args));
438 args.tp = cur->bc_tp;
439 args.mp = cur->bc_mp;
440 args.fsbno = cur->bc_private.b.firstblock;
441 args.firstblock = args.fsbno;
442 xfs_rmap_ino_bmbt_owner(&args.oinfo, cur->bc_private.b.ip->i_ino,
443 cur->bc_private.b.whichfork);
444
445 if (args.fsbno == NULLFSBLOCK) {
446 args.fsbno = be64_to_cpu(start->l);
447 args.type = XFS_ALLOCTYPE_START_BNO;
448 try_another_ag:
449 /*
450 * Make sure there is sufficient room left in the AG to
451 * complete a full tree split for an extent insert. If
452 * we are converting the middle part of an extent then
453 * we may need space for two tree splits.
454 *
455 * We are relying on the caller to make the correct block
456 * reservation for this operation to succeed. If the
457 * reservation amount is insufficient then we may fail a
458 * block allocation here and corrupt the filesystem.
459 */
460 args.minleft = args.tp->t_blk_res;
461 } else if (cur->bc_private.b.dfops->dop_low) {
462 args.type = XFS_ALLOCTYPE_START_BNO;
463 } else {
464 args.type = XFS_ALLOCTYPE_NEAR_BNO;
465 }
466
467 args.minlen = args.maxlen = args.prod = 1;
468 args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
469 if (!args.wasdel && args.tp->t_blk_res == 0) {
470 error = -ENOSPC;
471 goto error0;
472 }
473 error = xfs_alloc_vextent(&args);
474 if (error)
475 goto error0;
476
477 /*
478 * During a CoW operation, the allocation and bmbt updates occur in
479 * different transactions. The mapping code tries to put new bmbt
480 * blocks near extents being mapped, but the only way to guarantee this
481 * is if the alloc and the mapping happen in a single transaction that
482 * has a block reservation. That isn't the case here, so if we run out
483 * of space we'll try again with another AG.
484 */
485 if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) &&
486 args.fsbno == NULLFSBLOCK &&
487 args.type == XFS_ALLOCTYPE_NEAR_BNO) {
488 args.fsbno = cur->bc_private.b.firstblock;
489 args.type = XFS_ALLOCTYPE_FIRST_AG;
490 goto try_another_ag;
491 }
492
493 if (args.fsbno == NULLFSBLOCK && args.minleft) {
494 /*
495 * Could not find an AG with enough free space to satisfy
496 * a full btree split. Try again and if
497 * successful activate the lowspace algorithm.
498 */
499 args.fsbno = 0;
500 args.type = XFS_ALLOCTYPE_FIRST_AG;
501 error = xfs_alloc_vextent(&args);
502 if (error)
503 goto error0;
504 cur->bc_private.b.dfops->dop_low = true;
505 }
506 if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
507 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
508 *stat = 0;
509 return 0;
510 }
511 ASSERT(args.len == 1);
512 cur->bc_private.b.firstblock = args.fsbno;
513 cur->bc_private.b.allocated++;
514 cur->bc_private.b.ip->i_d.di_nblocks++;
515 xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE);
516 xfs_trans_mod_dquot_byino(args.tp, cur->bc_private.b.ip,
517 XFS_TRANS_DQ_BCOUNT, 1L);
518
519 new->l = cpu_to_be64(args.fsbno);
520
521 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
522 *stat = 1;
523 return 0;
524
525 error0:
526 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
527 return error;
528 }
529
530 STATIC int
531 xfs_bmbt_free_block(
532 struct xfs_btree_cur *cur,
533 struct xfs_buf *bp)
534 {
535 struct xfs_mount *mp = cur->bc_mp;
536 struct xfs_inode *ip = cur->bc_private.b.ip;
537 struct xfs_trans *tp = cur->bc_tp;
538 xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp));
539 struct xfs_owner_info oinfo;
540
541 xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_private.b.whichfork);
542 xfs_bmap_add_free(mp, cur->bc_private.b.dfops, fsbno, 1, &oinfo);
543 ip->i_d.di_nblocks--;
544
545 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
546 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
547 return 0;
548 }
549
550 STATIC int
551 xfs_bmbt_get_minrecs(
552 struct xfs_btree_cur *cur,
553 int level)
554 {
555 if (level == cur->bc_nlevels - 1) {
556 struct xfs_ifork *ifp;
557
558 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip,
559 cur->bc_private.b.whichfork);
560
561 return xfs_bmbt_maxrecs(cur->bc_mp,
562 ifp->if_broot_bytes, level == 0) / 2;
563 }
564
565 return cur->bc_mp->m_bmap_dmnr[level != 0];
566 }
567
568 int
569 xfs_bmbt_get_maxrecs(
570 struct xfs_btree_cur *cur,
571 int level)
572 {
573 if (level == cur->bc_nlevels - 1) {
574 struct xfs_ifork *ifp;
575
576 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip,
577 cur->bc_private.b.whichfork);
578
579 return xfs_bmbt_maxrecs(cur->bc_mp,
580 ifp->if_broot_bytes, level == 0);
581 }
582
583 return cur->bc_mp->m_bmap_dmxr[level != 0];
584
585 }
586
587 /*
588 * Get the maximum records we could store in the on-disk format.
589 *
590 * For non-root nodes this is equivalent to xfs_bmbt_get_maxrecs, but
591 * for the root node this checks the available space in the dinode fork
592 * so that we can resize the in-memory buffer to match it. After a
593 * resize to the maximum size this function returns the same value
594 * as xfs_bmbt_get_maxrecs for the root node, too.
595 */
596 STATIC int
597 xfs_bmbt_get_dmaxrecs(
598 struct xfs_btree_cur *cur,
599 int level)
600 {
601 if (level != cur->bc_nlevels - 1)
602 return cur->bc_mp->m_bmap_dmxr[level != 0];
603 return xfs_bmdr_maxrecs(cur->bc_private.b.forksize, level == 0);
604 }
605
606 STATIC void
607 xfs_bmbt_init_key_from_rec(
608 union xfs_btree_key *key,
609 union xfs_btree_rec *rec)
610 {
611 key->bmbt.br_startoff =
612 cpu_to_be64(xfs_bmbt_disk_get_startoff(&rec->bmbt));
613 }
614
615 STATIC void
616 xfs_bmbt_init_rec_from_cur(
617 struct xfs_btree_cur *cur,
618 union xfs_btree_rec *rec)
619 {
620 xfs_bmbt_disk_set_all(&rec->bmbt, &cur->bc_rec.b);
621 }
622
623 STATIC void
624 xfs_bmbt_init_ptr_from_cur(
625 struct xfs_btree_cur *cur,
626 union xfs_btree_ptr *ptr)
627 {
628 ptr->l = 0;
629 }
630
631 STATIC __int64_t
632 xfs_bmbt_key_diff(
633 struct xfs_btree_cur *cur,
634 union xfs_btree_key *key)
635 {
636 return (__int64_t)be64_to_cpu(key->bmbt.br_startoff) -
637 cur->bc_rec.b.br_startoff;
638 }
639
640 static bool
641 xfs_bmbt_verify(
642 struct xfs_buf *bp)
643 {
644 struct xfs_mount *mp = bp->b_target->bt_mount;
645 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
646 unsigned int level;
647
648 switch (block->bb_magic) {
649 case cpu_to_be32(XFS_BMAP_CRC_MAGIC):
650 if (!xfs_sb_version_hascrc(&mp->m_sb))
651 return false;
652 if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
653 return false;
654 if (be64_to_cpu(block->bb_u.l.bb_blkno) != bp->b_bn)
655 return false;
656 /*
657 * XXX: need a better way of verifying the owner here. Right now
658 * just make sure there has been one set.
659 */
660 if (be64_to_cpu(block->bb_u.l.bb_owner) == 0)
661 return false;
662 /* fall through */
663 case cpu_to_be32(XFS_BMAP_MAGIC):
664 break;
665 default:
666 return false;
667 }
668
669 /*
670 * numrecs and level verification.
671 *
672 * We don't know what fork we belong to, so just verify that the level
673 * is less than the maximum of the two. Later checks will be more
674 * precise.
675 */
676 level = be16_to_cpu(block->bb_level);
677 if (level > max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1]))
678 return false;
679 if (be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
680 return false;
681
682 /* sibling pointer verification */
683 if (!block->bb_u.l.bb_leftsib ||
684 (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) &&
685 !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_leftsib))))
686 return false;
687 if (!block->bb_u.l.bb_rightsib ||
688 (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) &&
689 !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_rightsib))))
690 return false;
691
692 return true;
693 }
694
695 static void
696 xfs_bmbt_read_verify(
697 struct xfs_buf *bp)
698 {
699 if (!xfs_btree_lblock_verify_crc(bp))
700 xfs_buf_ioerror(bp, -EFSBADCRC);
701 else if (!xfs_bmbt_verify(bp))
702 xfs_buf_ioerror(bp, -EFSCORRUPTED);
703
704 if (bp->b_error) {
705 trace_xfs_btree_corrupt(bp, _RET_IP_);
706 xfs_verifier_error(bp);
707 }
708 }
709
710 static void
711 xfs_bmbt_write_verify(
712 struct xfs_buf *bp)
713 {
714 if (!xfs_bmbt_verify(bp)) {
715 trace_xfs_btree_corrupt(bp, _RET_IP_);
716 xfs_buf_ioerror(bp, -EFSCORRUPTED);
717 xfs_verifier_error(bp);
718 return;
719 }
720 xfs_btree_lblock_calc_crc(bp);
721 }
722
723 const struct xfs_buf_ops xfs_bmbt_buf_ops = {
724 .name = "xfs_bmbt",
725 .verify_read = xfs_bmbt_read_verify,
726 .verify_write = xfs_bmbt_write_verify,
727 };
728
729
730 #if defined(DEBUG) || defined(XFS_WARN)
731 STATIC int
732 xfs_bmbt_keys_inorder(
733 struct xfs_btree_cur *cur,
734 union xfs_btree_key *k1,
735 union xfs_btree_key *k2)
736 {
737 return be64_to_cpu(k1->bmbt.br_startoff) <
738 be64_to_cpu(k2->bmbt.br_startoff);
739 }
740
741 STATIC int
742 xfs_bmbt_recs_inorder(
743 struct xfs_btree_cur *cur,
744 union xfs_btree_rec *r1,
745 union xfs_btree_rec *r2)
746 {
747 return xfs_bmbt_disk_get_startoff(&r1->bmbt) +
748 xfs_bmbt_disk_get_blockcount(&r1->bmbt) <=
749 xfs_bmbt_disk_get_startoff(&r2->bmbt);
750 }
751 #endif /* DEBUG */
752
753 static const struct xfs_btree_ops xfs_bmbt_ops = {
754 .rec_len = sizeof(xfs_bmbt_rec_t),
755 .key_len = sizeof(xfs_bmbt_key_t),
756
757 .dup_cursor = xfs_bmbt_dup_cursor,
758 .update_cursor = xfs_bmbt_update_cursor,
759 .alloc_block = xfs_bmbt_alloc_block,
760 .free_block = xfs_bmbt_free_block,
761 .get_maxrecs = xfs_bmbt_get_maxrecs,
762 .get_minrecs = xfs_bmbt_get_minrecs,
763 .get_dmaxrecs = xfs_bmbt_get_dmaxrecs,
764 .init_key_from_rec = xfs_bmbt_init_key_from_rec,
765 .init_rec_from_cur = xfs_bmbt_init_rec_from_cur,
766 .init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur,
767 .key_diff = xfs_bmbt_key_diff,
768 .buf_ops = &xfs_bmbt_buf_ops,
769 #if defined(DEBUG) || defined(XFS_WARN)
770 .keys_inorder = xfs_bmbt_keys_inorder,
771 .recs_inorder = xfs_bmbt_recs_inorder,
772 #endif
773 };
774
775 /*
776 * Allocate a new bmap btree cursor.
777 */
778 struct xfs_btree_cur * /* new bmap btree cursor */
779 xfs_bmbt_init_cursor(
780 struct xfs_mount *mp, /* file system mount point */
781 struct xfs_trans *tp, /* transaction pointer */
782 struct xfs_inode *ip, /* inode owning the btree */
783 int whichfork) /* data or attr fork */
784 {
785 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
786 struct xfs_btree_cur *cur;
787 ASSERT(whichfork != XFS_COW_FORK);
788
789 cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS);
790
791 cur->bc_tp = tp;
792 cur->bc_mp = mp;
793 cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1;
794 cur->bc_btnum = XFS_BTNUM_BMAP;
795 cur->bc_blocklog = mp->m_sb.sb_blocklog;
796 cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_bmbt_2);
797
798 cur->bc_ops = &xfs_bmbt_ops;
799 cur->bc_flags = XFS_BTREE_LONG_PTRS | XFS_BTREE_ROOT_IN_INODE;
800 if (xfs_sb_version_hascrc(&mp->m_sb))
801 cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
802
803 cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork);
804 cur->bc_private.b.ip = ip;
805 cur->bc_private.b.firstblock = NULLFSBLOCK;
806 cur->bc_private.b.dfops = NULL;
807 cur->bc_private.b.allocated = 0;
808 cur->bc_private.b.flags = 0;
809 cur->bc_private.b.whichfork = whichfork;
810
811 return cur;
812 }
813
814 /*
815 * Calculate number of records in a bmap btree block.
816 */
817 int
818 xfs_bmbt_maxrecs(
819 struct xfs_mount *mp,
820 int blocklen,
821 int leaf)
822 {
823 blocklen -= XFS_BMBT_BLOCK_LEN(mp);
824
825 if (leaf)
826 return blocklen / sizeof(xfs_bmbt_rec_t);
827 return blocklen / (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t));
828 }
829
830 /*
831 * Calculate number of records in a bmap btree inode root.
832 */
833 int
834 xfs_bmdr_maxrecs(
835 int blocklen,
836 int leaf)
837 {
838 blocklen -= sizeof(xfs_bmdr_block_t);
839
840 if (leaf)
841 return blocklen / sizeof(xfs_bmdr_rec_t);
842 return blocklen / (sizeof(xfs_bmdr_key_t) + sizeof(xfs_bmdr_ptr_t));
843 }
844
845 /*
846 * Change the owner of a btree format fork fo the inode passed in. Change it to
847 * the owner of that is passed in so that we can change owners before or after
848 * we switch forks between inodes. The operation that the caller is doing will
849 * determine whether is needs to change owner before or after the switch.
850 *
851 * For demand paged transactional modification, the fork switch should be done
852 * after reading in all the blocks, modifying them and pinning them in the
853 * transaction. For modification when the buffers are already pinned in memory,
854 * the fork switch can be done before changing the owner as we won't need to
855 * validate the owner until the btree buffers are unpinned and writes can occur
856 * again.
857 *
858 * For recovery based ownership change, there is no transactional context and
859 * so a buffer list must be supplied so that we can record the buffers that we
860 * modified for the caller to issue IO on.
861 */
862 int
863 xfs_bmbt_change_owner(
864 struct xfs_trans *tp,
865 struct xfs_inode *ip,
866 int whichfork,
867 xfs_ino_t new_owner,
868 struct list_head *buffer_list)
869 {
870 struct xfs_btree_cur *cur;
871 int error;
872
873 ASSERT(tp || buffer_list);
874 ASSERT(!(tp && buffer_list));
875 if (whichfork == XFS_DATA_FORK)
876 ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_BTREE);
877 else
878 ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_BTREE);
879
880 cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork);
881 if (!cur)
882 return -ENOMEM;
883
884 error = xfs_btree_change_owner(cur, new_owner, buffer_list);
885 xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
886 return error;
887 }