]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - libxfs/xfs_bmap.c
xfs: pass transaction to xfs_defer_add()
[thirdparty/xfsprogs-dev.git] / libxfs / xfs_bmap.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
4 * All Rights Reserved.
5 */
6 #include "libxfs_priv.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_bit.h"
13 #include "xfs_sb.h"
14 #include "xfs_mount.h"
15 #include "xfs_defer.h"
16 #include "xfs_da_format.h"
17 #include "xfs_da_btree.h"
18 #include "xfs_dir2.h"
19 #include "xfs_inode.h"
20 #include "xfs_btree.h"
21 #include "xfs_trans.h"
22 #include "xfs_alloc.h"
23 #include "xfs_bmap.h"
24 #include "xfs_bmap_btree.h"
25 #include "xfs_errortag.h"
26 #include "xfs_trans_space.h"
27 #include "xfs_trace.h"
28 #include "xfs_attr_leaf.h"
29 #include "xfs_quota_defs.h"
30 #include "xfs_rmap.h"
31 #include "xfs_ag_resv.h"
32 #include "xfs_refcount.h"
33
34
35 kmem_zone_t *xfs_bmap_free_item_zone;
36
37 /*
38 * Miscellaneous helper functions
39 */
40
41 /*
42 * Compute and fill in the value of the maximum depth of a bmap btree
43 * in this filesystem. Done once, during mount.
44 */
45 void
46 xfs_bmap_compute_maxlevels(
47 xfs_mount_t *mp, /* file system mount structure */
48 int whichfork) /* data or attr fork */
49 {
50 int level; /* btree level */
51 uint maxblocks; /* max blocks at this level */
52 uint maxleafents; /* max leaf entries possible */
53 int maxrootrecs; /* max records in root block */
54 int minleafrecs; /* min records in leaf block */
55 int minnoderecs; /* min records in node block */
56 int sz; /* root block size */
57
58 /*
59 * The maximum number of extents in a file, hence the maximum
60 * number of leaf entries, is controlled by the type of di_nextents
61 * (a signed 32-bit number, xfs_extnum_t), or by di_anextents
62 * (a signed 16-bit number, xfs_aextnum_t).
63 *
64 * Note that we can no longer assume that if we are in ATTR1 that
65 * the fork offset of all the inodes will be
66 * (xfs_default_attroffset(ip) >> 3) because we could have mounted
67 * with ATTR2 and then mounted back with ATTR1, keeping the
68 * di_forkoff's fixed but probably at various positions. Therefore,
69 * for both ATTR1 and ATTR2 we have to assume the worst case scenario
70 * of a minimum size available.
71 */
72 if (whichfork == XFS_DATA_FORK) {
73 maxleafents = MAXEXTNUM;
74 sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
75 } else {
76 maxleafents = MAXAEXTNUM;
77 sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
78 }
79 maxrootrecs = xfs_bmdr_maxrecs(sz, 0);
80 minleafrecs = mp->m_bmap_dmnr[0];
81 minnoderecs = mp->m_bmap_dmnr[1];
82 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
83 for (level = 1; maxblocks > 1; level++) {
84 if (maxblocks <= maxrootrecs)
85 maxblocks = 1;
86 else
87 maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
88 }
89 mp->m_bm_maxlevels[whichfork] = level;
90 }
91
92 STATIC int /* error */
93 xfs_bmbt_lookup_eq(
94 struct xfs_btree_cur *cur,
95 struct xfs_bmbt_irec *irec,
96 int *stat) /* success/failure */
97 {
98 cur->bc_rec.b = *irec;
99 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
100 }
101
102 STATIC int /* error */
103 xfs_bmbt_lookup_first(
104 struct xfs_btree_cur *cur,
105 int *stat) /* success/failure */
106 {
107 cur->bc_rec.b.br_startoff = 0;
108 cur->bc_rec.b.br_startblock = 0;
109 cur->bc_rec.b.br_blockcount = 0;
110 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
111 }
112
113 /*
114 * Check if the inode needs to be converted to btree format.
115 */
116 static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork)
117 {
118 return whichfork != XFS_COW_FORK &&
119 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
120 XFS_IFORK_NEXTENTS(ip, whichfork) >
121 XFS_IFORK_MAXEXT(ip, whichfork);
122 }
123
124 /*
125 * Check if the inode should be converted to extent format.
126 */
127 static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork)
128 {
129 return whichfork != XFS_COW_FORK &&
130 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE &&
131 XFS_IFORK_NEXTENTS(ip, whichfork) <=
132 XFS_IFORK_MAXEXT(ip, whichfork);
133 }
134
135 /*
136 * Update the record referred to by cur to the value given by irec
137 * This either works (return 0) or gets an EFSCORRUPTED error.
138 */
139 STATIC int
140 xfs_bmbt_update(
141 struct xfs_btree_cur *cur,
142 struct xfs_bmbt_irec *irec)
143 {
144 union xfs_btree_rec rec;
145
146 xfs_bmbt_disk_set_all(&rec.bmbt, irec);
147 return xfs_btree_update(cur, &rec);
148 }
149
150 /*
151 * Compute the worst-case number of indirect blocks that will be used
152 * for ip's delayed extent of length "len".
153 */
154 STATIC xfs_filblks_t
155 xfs_bmap_worst_indlen(
156 xfs_inode_t *ip, /* incore inode pointer */
157 xfs_filblks_t len) /* delayed extent length */
158 {
159 int level; /* btree level number */
160 int maxrecs; /* maximum record count at this level */
161 xfs_mount_t *mp; /* mount structure */
162 xfs_filblks_t rval; /* return value */
163
164 mp = ip->i_mount;
165 maxrecs = mp->m_bmap_dmxr[0];
166 for (level = 0, rval = 0;
167 level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
168 level++) {
169 len += maxrecs - 1;
170 do_div(len, maxrecs);
171 rval += len;
172 if (len == 1)
173 return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
174 level - 1;
175 if (level == 0)
176 maxrecs = mp->m_bmap_dmxr[1];
177 }
178 return rval;
179 }
180
181 /*
182 * Calculate the default attribute fork offset for newly created inodes.
183 */
184 uint
185 xfs_default_attroffset(
186 struct xfs_inode *ip)
187 {
188 struct xfs_mount *mp = ip->i_mount;
189 uint offset;
190
191 if (mp->m_sb.sb_inodesize == 256) {
192 offset = XFS_LITINO(mp, ip->i_d.di_version) -
193 XFS_BMDR_SPACE_CALC(MINABTPTRS);
194 } else {
195 offset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
196 }
197
198 ASSERT(offset < XFS_LITINO(mp, ip->i_d.di_version));
199 return offset;
200 }
201
202 /*
203 * Helper routine to reset inode di_forkoff field when switching
204 * attribute fork from local to extent format - we reset it where
205 * possible to make space available for inline data fork extents.
206 */
207 STATIC void
208 xfs_bmap_forkoff_reset(
209 xfs_inode_t *ip,
210 int whichfork)
211 {
212 if (whichfork == XFS_ATTR_FORK &&
213 ip->i_d.di_format != XFS_DINODE_FMT_DEV &&
214 ip->i_d.di_format != XFS_DINODE_FMT_BTREE) {
215 uint dfl_forkoff = xfs_default_attroffset(ip) >> 3;
216
217 if (dfl_forkoff > ip->i_d.di_forkoff)
218 ip->i_d.di_forkoff = dfl_forkoff;
219 }
220 }
221
222 #ifdef DEBUG
223 STATIC struct xfs_buf *
224 xfs_bmap_get_bp(
225 struct xfs_btree_cur *cur,
226 xfs_fsblock_t bno)
227 {
228 struct xfs_log_item *lip;
229 int i;
230
231 if (!cur)
232 return NULL;
233
234 for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) {
235 if (!cur->bc_bufs[i])
236 break;
237 if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno)
238 return cur->bc_bufs[i];
239 }
240
241 /* Chase down all the log items to see if the bp is there */
242 list_for_each_entry(lip, &cur->bc_tp->t_items, li_trans) {
243 struct xfs_buf_log_item *bip = (struct xfs_buf_log_item *)lip;
244
245 if (bip->bli_item.li_type == XFS_LI_BUF &&
246 XFS_BUF_ADDR(bip->bli_buf) == bno)
247 return bip->bli_buf;
248 }
249
250 return NULL;
251 }
252
253 STATIC void
254 xfs_check_block(
255 struct xfs_btree_block *block,
256 xfs_mount_t *mp,
257 int root,
258 short sz)
259 {
260 int i, j, dmxr;
261 __be64 *pp, *thispa; /* pointer to block address */
262 xfs_bmbt_key_t *prevp, *keyp;
263
264 ASSERT(be16_to_cpu(block->bb_level) > 0);
265
266 prevp = NULL;
267 for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
268 dmxr = mp->m_bmap_dmxr[0];
269 keyp = XFS_BMBT_KEY_ADDR(mp, block, i);
270
271 if (prevp) {
272 ASSERT(be64_to_cpu(prevp->br_startoff) <
273 be64_to_cpu(keyp->br_startoff));
274 }
275 prevp = keyp;
276
277 /*
278 * Compare the block numbers to see if there are dups.
279 */
280 if (root)
281 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz);
282 else
283 pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr);
284
285 for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
286 if (root)
287 thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz);
288 else
289 thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
290 if (*thispa == *pp) {
291 xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld",
292 __func__, j, i,
293 (unsigned long long)be64_to_cpu(*thispa));
294 xfs_err(mp, "%s: ptrs are equal in node\n",
295 __func__);
296 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
297 }
298 }
299 }
300 }
301
302 /*
303 * Check that the extents for the inode ip are in the right order in all
304 * btree leaves. THis becomes prohibitively expensive for large extent count
305 * files, so don't bother with inodes that have more than 10,000 extents in
306 * them. The btree record ordering checks will still be done, so for such large
307 * bmapbt constructs that is going to catch most corruptions.
308 */
309 STATIC void
310 xfs_bmap_check_leaf_extents(
311 xfs_btree_cur_t *cur, /* btree cursor or null */
312 xfs_inode_t *ip, /* incore inode pointer */
313 int whichfork) /* data or attr fork */
314 {
315 struct xfs_btree_block *block; /* current btree block */
316 xfs_fsblock_t bno; /* block # of "block" */
317 xfs_buf_t *bp; /* buffer for "block" */
318 int error; /* error return value */
319 xfs_extnum_t i=0, j; /* index into the extents list */
320 struct xfs_ifork *ifp; /* fork structure */
321 int level; /* btree level, for checking */
322 xfs_mount_t *mp; /* file system mount structure */
323 __be64 *pp; /* pointer to block address */
324 xfs_bmbt_rec_t *ep; /* pointer to current extent */
325 xfs_bmbt_rec_t last = {0, 0}; /* last extent in prev block */
326 xfs_bmbt_rec_t *nextp; /* pointer to next extent */
327 int bp_release = 0;
328
329 if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) {
330 return;
331 }
332
333 /* skip large extent count inodes */
334 if (ip->i_d.di_nextents > 10000)
335 return;
336
337 bno = NULLFSBLOCK;
338 mp = ip->i_mount;
339 ifp = XFS_IFORK_PTR(ip, whichfork);
340 block = ifp->if_broot;
341 /*
342 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
343 */
344 level = be16_to_cpu(block->bb_level);
345 ASSERT(level > 0);
346 xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
347 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
348 bno = be64_to_cpu(*pp);
349
350 ASSERT(bno != NULLFSBLOCK);
351 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
352 ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
353
354 /*
355 * Go down the tree until leaf level is reached, following the first
356 * pointer (leftmost) at each level.
357 */
358 while (level-- > 0) {
359 /* See if buf is in cur first */
360 bp_release = 0;
361 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
362 if (!bp) {
363 bp_release = 1;
364 error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
365 XFS_BMAP_BTREE_REF,
366 &xfs_bmbt_buf_ops);
367 if (error)
368 goto error_norelse;
369 }
370 block = XFS_BUF_TO_BLOCK(bp);
371 if (level == 0)
372 break;
373
374 /*
375 * Check this block for basic sanity (increasing keys and
376 * no duplicate blocks).
377 */
378
379 xfs_check_block(block, mp, 0, 0);
380 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
381 bno = be64_to_cpu(*pp);
382 XFS_WANT_CORRUPTED_GOTO(mp,
383 xfs_verify_fsbno(mp, bno), error0);
384 if (bp_release) {
385 bp_release = 0;
386 xfs_trans_brelse(NULL, bp);
387 }
388 }
389
390 /*
391 * Here with bp and block set to the leftmost leaf node in the tree.
392 */
393 i = 0;
394
395 /*
396 * Loop over all leaf nodes checking that all extents are in the right order.
397 */
398 for (;;) {
399 xfs_fsblock_t nextbno;
400 xfs_extnum_t num_recs;
401
402
403 num_recs = xfs_btree_get_numrecs(block);
404
405 /*
406 * Read-ahead the next leaf block, if any.
407 */
408
409 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
410
411 /*
412 * Check all the extents to make sure they are OK.
413 * If we had a previous block, the last entry should
414 * conform with the first entry in this one.
415 */
416
417 ep = XFS_BMBT_REC_ADDR(mp, block, 1);
418 if (i) {
419 ASSERT(xfs_bmbt_disk_get_startoff(&last) +
420 xfs_bmbt_disk_get_blockcount(&last) <=
421 xfs_bmbt_disk_get_startoff(ep));
422 }
423 for (j = 1; j < num_recs; j++) {
424 nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1);
425 ASSERT(xfs_bmbt_disk_get_startoff(ep) +
426 xfs_bmbt_disk_get_blockcount(ep) <=
427 xfs_bmbt_disk_get_startoff(nextp));
428 ep = nextp;
429 }
430
431 last = *ep;
432 i += num_recs;
433 if (bp_release) {
434 bp_release = 0;
435 xfs_trans_brelse(NULL, bp);
436 }
437 bno = nextbno;
438 /*
439 * If we've reached the end, stop.
440 */
441 if (bno == NULLFSBLOCK)
442 break;
443
444 bp_release = 0;
445 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
446 if (!bp) {
447 bp_release = 1;
448 error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
449 XFS_BMAP_BTREE_REF,
450 &xfs_bmbt_buf_ops);
451 if (error)
452 goto error_norelse;
453 }
454 block = XFS_BUF_TO_BLOCK(bp);
455 }
456
457 return;
458
459 error0:
460 xfs_warn(mp, "%s: at error0", __func__);
461 if (bp_release)
462 xfs_trans_brelse(NULL, bp);
463 error_norelse:
464 xfs_warn(mp, "%s: BAD after btree leaves for %d extents",
465 __func__, i);
466 xfs_err(mp, "%s: CORRUPTED BTREE OR SOMETHING", __func__);
467 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
468 return;
469 }
470
471 /*
472 * Validate that the bmbt_irecs being returned from bmapi are valid
473 * given the caller's original parameters. Specifically check the
474 * ranges of the returned irecs to ensure that they only extend beyond
475 * the given parameters if the XFS_BMAPI_ENTIRE flag was set.
476 */
477 STATIC void
478 xfs_bmap_validate_ret(
479 xfs_fileoff_t bno,
480 xfs_filblks_t len,
481 int flags,
482 xfs_bmbt_irec_t *mval,
483 int nmap,
484 int ret_nmap)
485 {
486 int i; /* index to map values */
487
488 ASSERT(ret_nmap <= nmap);
489
490 for (i = 0; i < ret_nmap; i++) {
491 ASSERT(mval[i].br_blockcount > 0);
492 if (!(flags & XFS_BMAPI_ENTIRE)) {
493 ASSERT(mval[i].br_startoff >= bno);
494 ASSERT(mval[i].br_blockcount <= len);
495 ASSERT(mval[i].br_startoff + mval[i].br_blockcount <=
496 bno + len);
497 } else {
498 ASSERT(mval[i].br_startoff < bno + len);
499 ASSERT(mval[i].br_startoff + mval[i].br_blockcount >
500 bno);
501 }
502 ASSERT(i == 0 ||
503 mval[i - 1].br_startoff + mval[i - 1].br_blockcount ==
504 mval[i].br_startoff);
505 ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK &&
506 mval[i].br_startblock != HOLESTARTBLOCK);
507 ASSERT(mval[i].br_state == XFS_EXT_NORM ||
508 mval[i].br_state == XFS_EXT_UNWRITTEN);
509 }
510 }
511
512 #else
513 #define xfs_bmap_check_leaf_extents(cur, ip, whichfork) do { } while (0)
514 #define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap) do { } while (0)
515 #endif /* DEBUG */
516
517 /*
518 * bmap free list manipulation functions
519 */
520
521 /*
522 * Add the extent to the list of extents to be free at transaction end.
523 * The list is maintained sorted (by block number).
524 */
525 void
526 __xfs_bmap_add_free(
527 struct xfs_trans *tp,
528 xfs_fsblock_t bno,
529 xfs_filblks_t len,
530 struct xfs_owner_info *oinfo,
531 bool skip_discard)
532 {
533 struct xfs_extent_free_item *new; /* new element */
534 #ifdef DEBUG
535 struct xfs_mount *mp = tp->t_mountp;
536 xfs_agnumber_t agno;
537 xfs_agblock_t agbno;
538
539 ASSERT(bno != NULLFSBLOCK);
540 ASSERT(len > 0);
541 ASSERT(len <= MAXEXTLEN);
542 ASSERT(!isnullstartblock(bno));
543 agno = XFS_FSB_TO_AGNO(mp, bno);
544 agbno = XFS_FSB_TO_AGBNO(mp, bno);
545 ASSERT(agno < mp->m_sb.sb_agcount);
546 ASSERT(agbno < mp->m_sb.sb_agblocks);
547 ASSERT(len < mp->m_sb.sb_agblocks);
548 ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
549 #endif
550 ASSERT(xfs_bmap_free_item_zone != NULL);
551
552 new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP);
553 new->xefi_startblock = bno;
554 new->xefi_blockcount = (xfs_extlen_t)len;
555 if (oinfo)
556 new->xefi_oinfo = *oinfo;
557 else
558 xfs_rmap_skip_owner_update(&new->xefi_oinfo);
559 new->xefi_skip_discard = skip_discard;
560 trace_xfs_bmap_free_defer(tp->t_mountp,
561 XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0,
562 XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len);
563 xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list);
564 }
565
566 /*
567 * Inode fork format manipulation functions
568 */
569
570 /*
571 * Transform a btree format file with only one leaf node, where the
572 * extents list will fit in the inode, into an extents format file.
573 * Since the file extents are already in-core, all we have to do is
574 * give up the space for the btree root and pitch the leaf block.
575 */
576 STATIC int /* error */
577 xfs_bmap_btree_to_extents(
578 xfs_trans_t *tp, /* transaction pointer */
579 xfs_inode_t *ip, /* incore inode pointer */
580 xfs_btree_cur_t *cur, /* btree cursor */
581 int *logflagsp, /* inode logging flags */
582 int whichfork) /* data or attr fork */
583 {
584 /* REFERENCED */
585 struct xfs_btree_block *cblock;/* child btree block */
586 xfs_fsblock_t cbno; /* child block number */
587 xfs_buf_t *cbp; /* child block's buffer */
588 int error; /* error return value */
589 struct xfs_ifork *ifp; /* inode fork data */
590 xfs_mount_t *mp; /* mount point structure */
591 __be64 *pp; /* ptr to block address */
592 struct xfs_btree_block *rblock;/* root btree block */
593 struct xfs_owner_info oinfo;
594
595 mp = ip->i_mount;
596 ifp = XFS_IFORK_PTR(ip, whichfork);
597 ASSERT(whichfork != XFS_COW_FORK);
598 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
599 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
600 rblock = ifp->if_broot;
601 ASSERT(be16_to_cpu(rblock->bb_level) == 1);
602 ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
603 ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
604 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
605 cbno = be64_to_cpu(*pp);
606 *logflagsp = 0;
607 #ifdef DEBUG
608 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
609 xfs_btree_check_lptr(cur, cbno, 1));
610 #endif
611 error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF,
612 &xfs_bmbt_buf_ops);
613 if (error)
614 return error;
615 cblock = XFS_BUF_TO_BLOCK(cbp);
616 if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
617 return error;
618 xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
619 xfs_bmap_add_free(cur->bc_tp, cbno, 1, &oinfo);
620 ip->i_d.di_nblocks--;
621 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
622 xfs_trans_binval(tp, cbp);
623 if (cur->bc_bufs[0] == cbp)
624 cur->bc_bufs[0] = NULL;
625 xfs_iroot_realloc(ip, -1, whichfork);
626 ASSERT(ifp->if_broot == NULL);
627 ASSERT((ifp->if_flags & XFS_IFBROOT) == 0);
628 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
629 *logflagsp = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
630 return 0;
631 }
632
633 /*
634 * Convert an extents-format file into a btree-format file.
635 * The new file will have a root block (in the inode) and a single child block.
636 */
637 STATIC int /* error */
638 xfs_bmap_extents_to_btree(
639 struct xfs_trans *tp, /* transaction pointer */
640 struct xfs_inode *ip, /* incore inode pointer */
641 struct xfs_btree_cur **curp, /* cursor returned to caller */
642 int wasdel, /* converting a delayed alloc */
643 int *logflagsp, /* inode logging flags */
644 int whichfork) /* data or attr fork */
645 {
646 struct xfs_btree_block *ablock; /* allocated (child) bt block */
647 struct xfs_buf *abp; /* buffer for ablock */
648 struct xfs_alloc_arg args; /* allocation arguments */
649 struct xfs_bmbt_rec *arp; /* child record pointer */
650 struct xfs_btree_block *block; /* btree root block */
651 struct xfs_btree_cur *cur; /* bmap btree cursor */
652 int error; /* error return value */
653 struct xfs_ifork *ifp; /* inode fork pointer */
654 struct xfs_bmbt_key *kp; /* root block key pointer */
655 struct xfs_mount *mp; /* mount structure */
656 xfs_bmbt_ptr_t *pp; /* root block address pointer */
657 struct xfs_iext_cursor icur;
658 struct xfs_bmbt_irec rec;
659 xfs_extnum_t cnt = 0;
660
661 mp = ip->i_mount;
662 ASSERT(whichfork != XFS_COW_FORK);
663 ifp = XFS_IFORK_PTR(ip, whichfork);
664 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS);
665
666 /*
667 * Make space in the inode incore.
668 */
669 xfs_iroot_realloc(ip, 1, whichfork);
670 ifp->if_flags |= XFS_IFBROOT;
671
672 /*
673 * Fill in the root.
674 */
675 block = ifp->if_broot;
676 xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
677 XFS_BTNUM_BMAP, 1, 1, ip->i_ino,
678 XFS_BTREE_LONG_PTRS);
679 /*
680 * Need a cursor. Can't allocate until bb_level is filled in.
681 */
682 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
683 cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
684 /*
685 * Convert to a btree with two levels, one record in root.
686 */
687 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
688 memset(&args, 0, sizeof(args));
689 args.tp = tp;
690 args.mp = mp;
691 xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, whichfork);
692 if (tp->t_firstblock == NULLFSBLOCK) {
693 args.type = XFS_ALLOCTYPE_START_BNO;
694 args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
695 } else if (tp->t_flags & XFS_TRANS_LOWMODE) {
696 args.type = XFS_ALLOCTYPE_START_BNO;
697 args.fsbno = tp->t_firstblock;
698 } else {
699 args.type = XFS_ALLOCTYPE_NEAR_BNO;
700 args.fsbno = tp->t_firstblock;
701 }
702 args.minlen = args.maxlen = args.prod = 1;
703 args.wasdel = wasdel;
704 *logflagsp = 0;
705 if ((error = xfs_alloc_vextent(&args))) {
706 xfs_iroot_realloc(ip, -1, whichfork);
707 ASSERT(ifp->if_broot == NULL);
708 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
709 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
710 return error;
711 }
712
713 if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
714 xfs_iroot_realloc(ip, -1, whichfork);
715 ASSERT(ifp->if_broot == NULL);
716 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
717 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
718 return -ENOSPC;
719 }
720 /*
721 * Allocation can't fail, the space was reserved.
722 */
723 ASSERT(tp->t_firstblock == NULLFSBLOCK ||
724 args.agno >= XFS_FSB_TO_AGNO(mp, tp->t_firstblock));
725 tp->t_firstblock = args.fsbno;
726 cur->bc_private.b.allocated++;
727 ip->i_d.di_nblocks++;
728 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
729 abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0);
730 /*
731 * Fill in the child block.
732 */
733 abp->b_ops = &xfs_bmbt_buf_ops;
734 ablock = XFS_BUF_TO_BLOCK(abp);
735 xfs_btree_init_block_int(mp, ablock, abp->b_bn,
736 XFS_BTNUM_BMAP, 0, 0, ip->i_ino,
737 XFS_BTREE_LONG_PTRS);
738
739 for_each_xfs_iext(ifp, &icur, &rec) {
740 if (isnullstartblock(rec.br_startblock))
741 continue;
742 arp = XFS_BMBT_REC_ADDR(mp, ablock, 1 + cnt);
743 xfs_bmbt_disk_set_all(arp, &rec);
744 cnt++;
745 }
746 ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork));
747 xfs_btree_set_numrecs(ablock, cnt);
748
749 /*
750 * Fill in the root key and pointer.
751 */
752 kp = XFS_BMBT_KEY_ADDR(mp, block, 1);
753 arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
754 kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
755 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
756 be16_to_cpu(block->bb_level)));
757 *pp = cpu_to_be64(args.fsbno);
758
759 /*
760 * Do all this logging at the end so that
761 * the root is at the right level.
762 */
763 xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);
764 xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
765 ASSERT(*curp == NULL);
766 *curp = cur;
767 *logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork);
768 return 0;
769 }
770
771 /*
772 * Convert a local file to an extents file.
773 * This code is out of bounds for data forks of regular files,
774 * since the file data needs to get logged so things will stay consistent.
775 * (The bmap-level manipulations are ok, though).
776 */
777 void
778 xfs_bmap_local_to_extents_empty(
779 struct xfs_inode *ip,
780 int whichfork)
781 {
782 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
783
784 ASSERT(whichfork != XFS_COW_FORK);
785 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
786 ASSERT(ifp->if_bytes == 0);
787 ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0);
788
789 xfs_bmap_forkoff_reset(ip, whichfork);
790 ifp->if_flags &= ~XFS_IFINLINE;
791 ifp->if_flags |= XFS_IFEXTENTS;
792 ifp->if_u1.if_root = NULL;
793 ifp->if_height = 0;
794 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
795 }
796
797
798 STATIC int /* error */
799 xfs_bmap_local_to_extents(
800 xfs_trans_t *tp, /* transaction pointer */
801 xfs_inode_t *ip, /* incore inode pointer */
802 xfs_extlen_t total, /* total blocks needed by transaction */
803 int *logflagsp, /* inode logging flags */
804 int whichfork,
805 void (*init_fn)(struct xfs_trans *tp,
806 struct xfs_buf *bp,
807 struct xfs_inode *ip,
808 struct xfs_ifork *ifp))
809 {
810 int error = 0;
811 int flags; /* logging flags returned */
812 struct xfs_ifork *ifp; /* inode fork pointer */
813 xfs_alloc_arg_t args; /* allocation arguments */
814 xfs_buf_t *bp; /* buffer for extent block */
815 struct xfs_bmbt_irec rec;
816 struct xfs_iext_cursor icur;
817
818 /*
819 * We don't want to deal with the case of keeping inode data inline yet.
820 * So sending the data fork of a regular inode is invalid.
821 */
822 ASSERT(!(S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK));
823 ifp = XFS_IFORK_PTR(ip, whichfork);
824 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
825
826 if (!ifp->if_bytes) {
827 xfs_bmap_local_to_extents_empty(ip, whichfork);
828 flags = XFS_ILOG_CORE;
829 goto done;
830 }
831
832 flags = 0;
833 error = 0;
834 ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS)) == XFS_IFINLINE);
835 memset(&args, 0, sizeof(args));
836 args.tp = tp;
837 args.mp = ip->i_mount;
838 xfs_rmap_ino_owner(&args.oinfo, ip->i_ino, whichfork, 0);
839 /*
840 * Allocate a block. We know we need only one, since the
841 * file currently fits in an inode.
842 */
843 if (tp->t_firstblock == NULLFSBLOCK) {
844 args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino);
845 args.type = XFS_ALLOCTYPE_START_BNO;
846 } else {
847 args.fsbno = tp->t_firstblock;
848 args.type = XFS_ALLOCTYPE_NEAR_BNO;
849 }
850 args.total = total;
851 args.minlen = args.maxlen = args.prod = 1;
852 error = xfs_alloc_vextent(&args);
853 if (error)
854 goto done;
855
856 /* Can't fail, the space was reserved. */
857 ASSERT(args.fsbno != NULLFSBLOCK);
858 ASSERT(args.len == 1);
859 tp->t_firstblock = args.fsbno;
860 bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
861
862 /*
863 * Initialize the block, copy the data and log the remote buffer.
864 *
865 * The callout is responsible for logging because the remote format
866 * might differ from the local format and thus we don't know how much to
867 * log here. Note that init_fn must also set the buffer log item type
868 * correctly.
869 */
870 init_fn(tp, bp, ip, ifp);
871
872 /* account for the change in fork size */
873 xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
874 xfs_bmap_local_to_extents_empty(ip, whichfork);
875 flags |= XFS_ILOG_CORE;
876
877 ifp->if_u1.if_root = NULL;
878 ifp->if_height = 0;
879
880 rec.br_startoff = 0;
881 rec.br_startblock = args.fsbno;
882 rec.br_blockcount = 1;
883 rec.br_state = XFS_EXT_NORM;
884 xfs_iext_first(ifp, &icur);
885 xfs_iext_insert(ip, &icur, &rec, 0);
886
887 XFS_IFORK_NEXT_SET(ip, whichfork, 1);
888 ip->i_d.di_nblocks = 1;
889 xfs_trans_mod_dquot_byino(tp, ip,
890 XFS_TRANS_DQ_BCOUNT, 1L);
891 flags |= xfs_ilog_fext(whichfork);
892
893 done:
894 *logflagsp = flags;
895 return error;
896 }
897
898 /*
899 * Called from xfs_bmap_add_attrfork to handle btree format files.
900 */
901 STATIC int /* error */
902 xfs_bmap_add_attrfork_btree(
903 xfs_trans_t *tp, /* transaction pointer */
904 xfs_inode_t *ip, /* incore inode pointer */
905 int *flags) /* inode logging flags */
906 {
907 xfs_btree_cur_t *cur; /* btree cursor */
908 int error; /* error return value */
909 xfs_mount_t *mp; /* file system mount struct */
910 int stat; /* newroot status */
911
912 mp = ip->i_mount;
913 if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip))
914 *flags |= XFS_ILOG_DBROOT;
915 else {
916 cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
917 error = xfs_bmbt_lookup_first(cur, &stat);
918 if (error)
919 goto error0;
920 /* must be at least one entry */
921 XFS_WANT_CORRUPTED_GOTO(mp, stat == 1, error0);
922 if ((error = xfs_btree_new_iroot(cur, flags, &stat)))
923 goto error0;
924 if (stat == 0) {
925 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
926 return -ENOSPC;
927 }
928 cur->bc_private.b.allocated = 0;
929 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
930 }
931 return 0;
932 error0:
933 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
934 return error;
935 }
936
937 /*
938 * Called from xfs_bmap_add_attrfork to handle extents format files.
939 */
940 STATIC int /* error */
941 xfs_bmap_add_attrfork_extents(
942 struct xfs_trans *tp, /* transaction pointer */
943 struct xfs_inode *ip, /* incore inode pointer */
944 int *flags) /* inode logging flags */
945 {
946 xfs_btree_cur_t *cur; /* bmap btree cursor */
947 int error; /* error return value */
948
949 if (ip->i_d.di_nextents * sizeof(xfs_bmbt_rec_t) <= XFS_IFORK_DSIZE(ip))
950 return 0;
951 cur = NULL;
952 error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0, flags,
953 XFS_DATA_FORK);
954 if (cur) {
955 cur->bc_private.b.allocated = 0;
956 xfs_btree_del_cursor(cur, error);
957 }
958 return error;
959 }
960
961 /*
962 * Called from xfs_bmap_add_attrfork to handle local format files. Each
963 * different data fork content type needs a different callout to do the
964 * conversion. Some are basic and only require special block initialisation
965 * callouts for the data formating, others (directories) are so specialised they
966 * handle everything themselves.
967 *
968 * XXX (dgc): investigate whether directory conversion can use the generic
969 * formatting callout. It should be possible - it's just a very complex
970 * formatter.
971 */
972 STATIC int /* error */
973 xfs_bmap_add_attrfork_local(
974 struct xfs_trans *tp, /* transaction pointer */
975 struct xfs_inode *ip, /* incore inode pointer */
976 int *flags) /* inode logging flags */
977 {
978 struct xfs_da_args dargs; /* args for dir/attr code */
979
980 if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
981 return 0;
982
983 if (S_ISDIR(VFS_I(ip)->i_mode)) {
984 memset(&dargs, 0, sizeof(dargs));
985 dargs.geo = ip->i_mount->m_dir_geo;
986 dargs.dp = ip;
987 dargs.total = dargs.geo->fsbcount;
988 dargs.whichfork = XFS_DATA_FORK;
989 dargs.trans = tp;
990 return xfs_dir2_sf_to_block(&dargs);
991 }
992
993 if (S_ISLNK(VFS_I(ip)->i_mode))
994 return xfs_bmap_local_to_extents(tp, ip, 1, flags,
995 XFS_DATA_FORK,
996 xfs_symlink_local_to_remote);
997
998 /* should only be called for types that support local format data */
999 ASSERT(0);
1000 return -EFSCORRUPTED;
1001 }
1002
1003 /*
1004 * Convert inode from non-attributed to attributed.
1005 * Must not be in a transaction, ip must not be locked.
1006 */
1007 int /* error code */
1008 xfs_bmap_add_attrfork(
1009 xfs_inode_t *ip, /* incore inode pointer */
1010 int size, /* space new attribute needs */
1011 int rsvd) /* xact may use reserved blks */
1012 {
1013 xfs_mount_t *mp; /* mount structure */
1014 xfs_trans_t *tp; /* transaction pointer */
1015 int blks; /* space reservation */
1016 int version = 1; /* superblock attr version */
1017 int logflags; /* logging flags */
1018 int error; /* error return value */
1019
1020 ASSERT(XFS_IFORK_Q(ip) == 0);
1021
1022 mp = ip->i_mount;
1023 ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
1024
1025 blks = XFS_ADDAFORK_SPACE_RES(mp);
1026
1027 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_addafork, blks, 0,
1028 rsvd ? XFS_TRANS_RESERVE : 0, &tp);
1029 if (error)
1030 return error;
1031
1032 xfs_ilock(ip, XFS_ILOCK_EXCL);
1033 error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
1034 XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
1035 XFS_QMOPT_RES_REGBLKS);
1036 if (error)
1037 goto trans_cancel;
1038 if (XFS_IFORK_Q(ip))
1039 goto trans_cancel;
1040 if (ip->i_d.di_anextents != 0) {
1041 error = -EFSCORRUPTED;
1042 goto trans_cancel;
1043 }
1044 if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) {
1045 /*
1046 * For inodes coming from pre-6.2 filesystems.
1047 */
1048 ASSERT(ip->i_d.di_aformat == 0);
1049 ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
1050 }
1051
1052 xfs_trans_ijoin(tp, ip, 0);
1053 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1054
1055 switch (ip->i_d.di_format) {
1056 case XFS_DINODE_FMT_DEV:
1057 ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
1058 break;
1059 case XFS_DINODE_FMT_LOCAL:
1060 case XFS_DINODE_FMT_EXTENTS:
1061 case XFS_DINODE_FMT_BTREE:
1062 ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
1063 if (!ip->i_d.di_forkoff)
1064 ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3;
1065 else if (mp->m_flags & XFS_MOUNT_ATTR2)
1066 version = 2;
1067 break;
1068 default:
1069 ASSERT(0);
1070 error = -EINVAL;
1071 goto trans_cancel;
1072 }
1073
1074 ASSERT(ip->i_afp == NULL);
1075 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
1076 ip->i_afp->if_flags = XFS_IFEXTENTS;
1077 logflags = 0;
1078 switch (ip->i_d.di_format) {
1079 case XFS_DINODE_FMT_LOCAL:
1080 error = xfs_bmap_add_attrfork_local(tp, ip, &logflags);
1081 break;
1082 case XFS_DINODE_FMT_EXTENTS:
1083 error = xfs_bmap_add_attrfork_extents(tp, ip, &logflags);
1084 break;
1085 case XFS_DINODE_FMT_BTREE:
1086 error = xfs_bmap_add_attrfork_btree(tp, ip, &logflags);
1087 break;
1088 default:
1089 error = 0;
1090 break;
1091 }
1092 if (logflags)
1093 xfs_trans_log_inode(tp, ip, logflags);
1094 if (error)
1095 goto trans_cancel;
1096 if (!xfs_sb_version_hasattr(&mp->m_sb) ||
1097 (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) {
1098 bool log_sb = false;
1099
1100 spin_lock(&mp->m_sb_lock);
1101 if (!xfs_sb_version_hasattr(&mp->m_sb)) {
1102 xfs_sb_version_addattr(&mp->m_sb);
1103 log_sb = true;
1104 }
1105 if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) {
1106 xfs_sb_version_addattr2(&mp->m_sb);
1107 log_sb = true;
1108 }
1109 spin_unlock(&mp->m_sb_lock);
1110 if (log_sb)
1111 xfs_log_sb(tp);
1112 }
1113
1114 error = xfs_trans_commit(tp);
1115 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1116 return error;
1117
1118 trans_cancel:
1119 xfs_trans_cancel(tp);
1120 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1121 return error;
1122 }
1123
1124 /*
1125 * Internal and external extent tree search functions.
1126 */
1127
1128 /*
1129 * Read in extents from a btree-format inode.
1130 */
1131 int
1132 xfs_iread_extents(
1133 struct xfs_trans *tp,
1134 struct xfs_inode *ip,
1135 int whichfork)
1136 {
1137 struct xfs_mount *mp = ip->i_mount;
1138 int state = xfs_bmap_fork_to_state(whichfork);
1139 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
1140 xfs_extnum_t nextents = XFS_IFORK_NEXTENTS(ip, whichfork);
1141 struct xfs_btree_block *block = ifp->if_broot;
1142 struct xfs_iext_cursor icur;
1143 struct xfs_bmbt_irec new;
1144 xfs_fsblock_t bno;
1145 struct xfs_buf *bp;
1146 xfs_extnum_t i, j;
1147 int level;
1148 __be64 *pp;
1149 int error;
1150
1151 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1152
1153 if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
1154 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
1155 return -EFSCORRUPTED;
1156 }
1157
1158 /*
1159 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
1160 */
1161 level = be16_to_cpu(block->bb_level);
1162 ASSERT(level > 0);
1163 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
1164 bno = be64_to_cpu(*pp);
1165
1166 /*
1167 * Go down the tree until leaf level is reached, following the first
1168 * pointer (leftmost) at each level.
1169 */
1170 while (level-- > 0) {
1171 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
1172 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
1173 if (error)
1174 goto out;
1175 block = XFS_BUF_TO_BLOCK(bp);
1176 if (level == 0)
1177 break;
1178 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
1179 bno = be64_to_cpu(*pp);
1180 XFS_WANT_CORRUPTED_GOTO(mp,
1181 xfs_verify_fsbno(mp, bno), out_brelse);
1182 xfs_trans_brelse(tp, bp);
1183 }
1184
1185 /*
1186 * Here with bp and block set to the leftmost leaf node in the tree.
1187 */
1188 i = 0;
1189 xfs_iext_first(ifp, &icur);
1190
1191 /*
1192 * Loop over all leaf nodes. Copy information to the extent records.
1193 */
1194 for (;;) {
1195 xfs_bmbt_rec_t *frp;
1196 xfs_fsblock_t nextbno;
1197 xfs_extnum_t num_recs;
1198
1199 num_recs = xfs_btree_get_numrecs(block);
1200 if (unlikely(i + num_recs > nextents)) {
1201 xfs_warn(ip->i_mount,
1202 "corrupt dinode %Lu, (btree extents).",
1203 (unsigned long long) ip->i_ino);
1204 xfs_inode_verifier_error(ip, -EFSCORRUPTED,
1205 __func__, block, sizeof(*block),
1206 __this_address);
1207 error = -EFSCORRUPTED;
1208 goto out_brelse;
1209 }
1210 /*
1211 * Read-ahead the next leaf block, if any.
1212 */
1213 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
1214 if (nextbno != NULLFSBLOCK)
1215 xfs_btree_reada_bufl(mp, nextbno, 1,
1216 &xfs_bmbt_buf_ops);
1217 /*
1218 * Copy records into the extent records.
1219 */
1220 frp = XFS_BMBT_REC_ADDR(mp, block, 1);
1221 for (j = 0; j < num_recs; j++, frp++, i++) {
1222 xfs_failaddr_t fa;
1223
1224 xfs_bmbt_disk_get_all(frp, &new);
1225 fa = xfs_bmap_validate_extent(ip, whichfork, &new);
1226 if (fa) {
1227 error = -EFSCORRUPTED;
1228 xfs_inode_verifier_error(ip, error,
1229 "xfs_iread_extents(2)",
1230 frp, sizeof(*frp), fa);
1231 goto out_brelse;
1232 }
1233 xfs_iext_insert(ip, &icur, &new, state);
1234 trace_xfs_read_extent(ip, &icur, state, _THIS_IP_);
1235 xfs_iext_next(ifp, &icur);
1236 }
1237 xfs_trans_brelse(tp, bp);
1238 bno = nextbno;
1239 /*
1240 * If we've reached the end, stop.
1241 */
1242 if (bno == NULLFSBLOCK)
1243 break;
1244 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
1245 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
1246 if (error)
1247 goto out;
1248 block = XFS_BUF_TO_BLOCK(bp);
1249 }
1250
1251 if (i != XFS_IFORK_NEXTENTS(ip, whichfork)) {
1252 error = -EFSCORRUPTED;
1253 goto out;
1254 }
1255 ASSERT(i == xfs_iext_count(ifp));
1256
1257 ifp->if_flags |= XFS_IFEXTENTS;
1258 return 0;
1259
1260 out_brelse:
1261 xfs_trans_brelse(tp, bp);
1262 out:
1263 xfs_iext_destroy(ifp);
1264 return error;
1265 }
1266
1267 /*
1268 * Returns the relative block number of the first unused block(s) in the given
1269 * fork with at least "len" logically contiguous blocks free. This is the
1270 * lowest-address hole if the fork has holes, else the first block past the end
1271 * of fork. Return 0 if the fork is currently local (in-inode).
1272 */
1273 int /* error */
1274 xfs_bmap_first_unused(
1275 struct xfs_trans *tp, /* transaction pointer */
1276 struct xfs_inode *ip, /* incore inode */
1277 xfs_extlen_t len, /* size of hole to find */
1278 xfs_fileoff_t *first_unused, /* unused block */
1279 int whichfork) /* data or attr fork */
1280 {
1281 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
1282 struct xfs_bmbt_irec got;
1283 struct xfs_iext_cursor icur;
1284 xfs_fileoff_t lastaddr = 0;
1285 xfs_fileoff_t lowest, max;
1286 int error;
1287
1288 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE ||
1289 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ||
1290 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
1291
1292 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
1293 *first_unused = 0;
1294 return 0;
1295 }
1296
1297 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1298 error = xfs_iread_extents(tp, ip, whichfork);
1299 if (error)
1300 return error;
1301 }
1302
1303 lowest = max = *first_unused;
1304 for_each_xfs_iext(ifp, &icur, &got) {
1305 /*
1306 * See if the hole before this extent will work.
1307 */
1308 if (got.br_startoff >= lowest + len &&
1309 got.br_startoff - max >= len)
1310 break;
1311 lastaddr = got.br_startoff + got.br_blockcount;
1312 max = XFS_FILEOFF_MAX(lastaddr, lowest);
1313 }
1314
1315 *first_unused = max;
1316 return 0;
1317 }
1318
1319 /*
1320 * Returns the file-relative block number of the last block - 1 before
1321 * last_block (input value) in the file.
1322 * This is not based on i_size, it is based on the extent records.
1323 * Returns 0 for local files, as they do not have extent records.
1324 */
1325 int /* error */
1326 xfs_bmap_last_before(
1327 struct xfs_trans *tp, /* transaction pointer */
1328 struct xfs_inode *ip, /* incore inode */
1329 xfs_fileoff_t *last_block, /* last block */
1330 int whichfork) /* data or attr fork */
1331 {
1332 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
1333 struct xfs_bmbt_irec got;
1334 struct xfs_iext_cursor icur;
1335 int error;
1336
1337 switch (XFS_IFORK_FORMAT(ip, whichfork)) {
1338 case XFS_DINODE_FMT_LOCAL:
1339 *last_block = 0;
1340 return 0;
1341 case XFS_DINODE_FMT_BTREE:
1342 case XFS_DINODE_FMT_EXTENTS:
1343 break;
1344 default:
1345 return -EIO;
1346 }
1347
1348 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1349 error = xfs_iread_extents(tp, ip, whichfork);
1350 if (error)
1351 return error;
1352 }
1353
1354 if (!xfs_iext_lookup_extent_before(ip, ifp, last_block, &icur, &got))
1355 *last_block = 0;
1356 return 0;
1357 }
1358
1359 int
1360 xfs_bmap_last_extent(
1361 struct xfs_trans *tp,
1362 struct xfs_inode *ip,
1363 int whichfork,
1364 struct xfs_bmbt_irec *rec,
1365 int *is_empty)
1366 {
1367 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
1368 struct xfs_iext_cursor icur;
1369 int error;
1370
1371 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1372 error = xfs_iread_extents(tp, ip, whichfork);
1373 if (error)
1374 return error;
1375 }
1376
1377 xfs_iext_last(ifp, &icur);
1378 if (!xfs_iext_get_extent(ifp, &icur, rec))
1379 *is_empty = 1;
1380 else
1381 *is_empty = 0;
1382 return 0;
1383 }
1384
1385 /*
1386 * Check the last inode extent to determine whether this allocation will result
1387 * in blocks being allocated at the end of the file. When we allocate new data
1388 * blocks at the end of the file which do not start at the previous data block,
1389 * we will try to align the new blocks at stripe unit boundaries.
1390 *
1391 * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be
1392 * at, or past the EOF.
1393 */
1394 STATIC int
1395 xfs_bmap_isaeof(
1396 struct xfs_bmalloca *bma,
1397 int whichfork)
1398 {
1399 struct xfs_bmbt_irec rec;
1400 int is_empty;
1401 int error;
1402
1403 bma->aeof = false;
1404 error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
1405 &is_empty);
1406 if (error)
1407 return error;
1408
1409 if (is_empty) {
1410 bma->aeof = true;
1411 return 0;
1412 }
1413
1414 /*
1415 * Check if we are allocation or past the last extent, or at least into
1416 * the last delayed allocated extent.
1417 */
1418 bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount ||
1419 (bma->offset >= rec.br_startoff &&
1420 isnullstartblock(rec.br_startblock));
1421 return 0;
1422 }
1423
1424 /*
1425 * Returns the file-relative block number of the first block past eof in
1426 * the file. This is not based on i_size, it is based on the extent records.
1427 * Returns 0 for local files, as they do not have extent records.
1428 */
1429 int
1430 xfs_bmap_last_offset(
1431 struct xfs_inode *ip,
1432 xfs_fileoff_t *last_block,
1433 int whichfork)
1434 {
1435 struct xfs_bmbt_irec rec;
1436 int is_empty;
1437 int error;
1438
1439 *last_block = 0;
1440
1441 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL)
1442 return 0;
1443
1444 if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
1445 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
1446 return -EIO;
1447
1448 error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
1449 if (error || is_empty)
1450 return error;
1451
1452 *last_block = rec.br_startoff + rec.br_blockcount;
1453 return 0;
1454 }
1455
1456 /*
1457 * Returns whether the selected fork of the inode has exactly one
1458 * block or not. For the data fork we check this matches di_size,
1459 * implying the file's range is 0..bsize-1.
1460 */
1461 int /* 1=>1 block, 0=>otherwise */
1462 xfs_bmap_one_block(
1463 xfs_inode_t *ip, /* incore inode */
1464 int whichfork) /* data or attr fork */
1465 {
1466 struct xfs_ifork *ifp; /* inode fork pointer */
1467 int rval; /* return value */
1468 xfs_bmbt_irec_t s; /* internal version of extent */
1469 struct xfs_iext_cursor icur;
1470
1471 #ifndef DEBUG
1472 if (whichfork == XFS_DATA_FORK)
1473 return XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize;
1474 #endif /* !DEBUG */
1475 if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1)
1476 return 0;
1477 if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
1478 return 0;
1479 ifp = XFS_IFORK_PTR(ip, whichfork);
1480 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
1481 xfs_iext_first(ifp, &icur);
1482 xfs_iext_get_extent(ifp, &icur, &s);
1483 rval = s.br_startoff == 0 && s.br_blockcount == 1;
1484 if (rval && whichfork == XFS_DATA_FORK)
1485 ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize);
1486 return rval;
1487 }
1488
1489 /*
1490 * Extent tree manipulation functions used during allocation.
1491 */
1492
1493 /*
1494 * Convert a delayed allocation to a real allocation.
1495 */
1496 STATIC int /* error */
1497 xfs_bmap_add_extent_delay_real(
1498 struct xfs_bmalloca *bma,
1499 int whichfork)
1500 {
1501 struct xfs_bmbt_irec *new = &bma->got;
1502 int error; /* error return value */
1503 int i; /* temp state */
1504 struct xfs_ifork *ifp; /* inode fork pointer */
1505 xfs_fileoff_t new_endoff; /* end offset of new entry */
1506 xfs_bmbt_irec_t r[3]; /* neighbor extent entries */
1507 /* left is 0, right is 1, prev is 2 */
1508 int rval=0; /* return value (logging flags) */
1509 int state = xfs_bmap_fork_to_state(whichfork);
1510 xfs_filblks_t da_new; /* new count del alloc blocks used */
1511 xfs_filblks_t da_old; /* old count del alloc blocks used */
1512 xfs_filblks_t temp=0; /* value for da_new calculations */
1513 int tmp_rval; /* partial logging flags */
1514 struct xfs_mount *mp;
1515 xfs_extnum_t *nextents;
1516 struct xfs_bmbt_irec old;
1517
1518 mp = bma->ip->i_mount;
1519 ifp = XFS_IFORK_PTR(bma->ip, whichfork);
1520 ASSERT(whichfork != XFS_ATTR_FORK);
1521 nextents = (whichfork == XFS_COW_FORK ? &bma->ip->i_cnextents :
1522 &bma->ip->i_d.di_nextents);
1523
1524 ASSERT(!isnullstartblock(new->br_startblock));
1525 ASSERT(!bma->cur ||
1526 (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
1527
1528 XFS_STATS_INC(mp, xs_add_exlist);
1529
1530 #define LEFT r[0]
1531 #define RIGHT r[1]
1532 #define PREV r[2]
1533
1534 /*
1535 * Set up a bunch of variables to make the tests simpler.
1536 */
1537 xfs_iext_get_extent(ifp, &bma->icur, &PREV);
1538 new_endoff = new->br_startoff + new->br_blockcount;
1539 ASSERT(isnullstartblock(PREV.br_startblock));
1540 ASSERT(PREV.br_startoff <= new->br_startoff);
1541 ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
1542
1543 da_old = startblockval(PREV.br_startblock);
1544 da_new = 0;
1545
1546 /*
1547 * Set flags determining what part of the previous delayed allocation
1548 * extent is being replaced by a real allocation.
1549 */
1550 if (PREV.br_startoff == new->br_startoff)
1551 state |= BMAP_LEFT_FILLING;
1552 if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
1553 state |= BMAP_RIGHT_FILLING;
1554
1555 /*
1556 * Check and set flags if this segment has a left neighbor.
1557 * Don't set contiguous if the combined extent would be too large.
1558 */
1559 if (xfs_iext_peek_prev_extent(ifp, &bma->icur, &LEFT)) {
1560 state |= BMAP_LEFT_VALID;
1561 if (isnullstartblock(LEFT.br_startblock))
1562 state |= BMAP_LEFT_DELAY;
1563 }
1564
1565 if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
1566 LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
1567 LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
1568 LEFT.br_state == new->br_state &&
1569 LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
1570 state |= BMAP_LEFT_CONTIG;
1571
1572 /*
1573 * Check and set flags if this segment has a right neighbor.
1574 * Don't set contiguous if the combined extent would be too large.
1575 * Also check for all-three-contiguous being too large.
1576 */
1577 if (xfs_iext_peek_next_extent(ifp, &bma->icur, &RIGHT)) {
1578 state |= BMAP_RIGHT_VALID;
1579 if (isnullstartblock(RIGHT.br_startblock))
1580 state |= BMAP_RIGHT_DELAY;
1581 }
1582
1583 if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
1584 new_endoff == RIGHT.br_startoff &&
1585 new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
1586 new->br_state == RIGHT.br_state &&
1587 new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
1588 ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1589 BMAP_RIGHT_FILLING)) !=
1590 (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1591 BMAP_RIGHT_FILLING) ||
1592 LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
1593 <= MAXEXTLEN))
1594 state |= BMAP_RIGHT_CONTIG;
1595
1596 error = 0;
1597 /*
1598 * Switch out based on the FILLING and CONTIG state bits.
1599 */
1600 switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1601 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
1602 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1603 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1604 /*
1605 * Filling in all of a previously delayed allocation extent.
1606 * The left and right neighbors are both contiguous with new.
1607 */
1608 LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
1609
1610 xfs_iext_remove(bma->ip, &bma->icur, state);
1611 xfs_iext_remove(bma->ip, &bma->icur, state);
1612 xfs_iext_prev(ifp, &bma->icur);
1613 xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1614 (*nextents)--;
1615
1616 if (bma->cur == NULL)
1617 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1618 else {
1619 rval = XFS_ILOG_CORE;
1620 error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
1621 if (error)
1622 goto done;
1623 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1624 error = xfs_btree_delete(bma->cur, &i);
1625 if (error)
1626 goto done;
1627 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1628 error = xfs_btree_decrement(bma->cur, 0, &i);
1629 if (error)
1630 goto done;
1631 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1632 error = xfs_bmbt_update(bma->cur, &LEFT);
1633 if (error)
1634 goto done;
1635 }
1636 break;
1637
1638 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1639 /*
1640 * Filling in all of a previously delayed allocation extent.
1641 * The left neighbor is contiguous, the right is not.
1642 */
1643 old = LEFT;
1644 LEFT.br_blockcount += PREV.br_blockcount;
1645
1646 xfs_iext_remove(bma->ip, &bma->icur, state);
1647 xfs_iext_prev(ifp, &bma->icur);
1648 xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1649
1650 if (bma->cur == NULL)
1651 rval = XFS_ILOG_DEXT;
1652 else {
1653 rval = 0;
1654 error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1655 if (error)
1656 goto done;
1657 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1658 error = xfs_bmbt_update(bma->cur, &LEFT);
1659 if (error)
1660 goto done;
1661 }
1662 break;
1663
1664 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1665 /*
1666 * Filling in all of a previously delayed allocation extent.
1667 * The right neighbor is contiguous, the left is not.
1668 */
1669 PREV.br_startblock = new->br_startblock;
1670 PREV.br_blockcount += RIGHT.br_blockcount;
1671
1672 xfs_iext_next(ifp, &bma->icur);
1673 xfs_iext_remove(bma->ip, &bma->icur, state);
1674 xfs_iext_prev(ifp, &bma->icur);
1675 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1676
1677 if (bma->cur == NULL)
1678 rval = XFS_ILOG_DEXT;
1679 else {
1680 rval = 0;
1681 error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
1682 if (error)
1683 goto done;
1684 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1685 error = xfs_bmbt_update(bma->cur, &PREV);
1686 if (error)
1687 goto done;
1688 }
1689 break;
1690
1691 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
1692 /*
1693 * Filling in all of a previously delayed allocation extent.
1694 * Neither the left nor right neighbors are contiguous with
1695 * the new one.
1696 */
1697 PREV.br_startblock = new->br_startblock;
1698 PREV.br_state = new->br_state;
1699 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1700
1701 (*nextents)++;
1702 if (bma->cur == NULL)
1703 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1704 else {
1705 rval = XFS_ILOG_CORE;
1706 error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1707 if (error)
1708 goto done;
1709 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
1710 error = xfs_btree_insert(bma->cur, &i);
1711 if (error)
1712 goto done;
1713 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1714 }
1715 break;
1716
1717 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
1718 /*
1719 * Filling in the first part of a previous delayed allocation.
1720 * The left neighbor is contiguous.
1721 */
1722 old = LEFT;
1723 temp = PREV.br_blockcount - new->br_blockcount;
1724 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1725 startblockval(PREV.br_startblock));
1726
1727 LEFT.br_blockcount += new->br_blockcount;
1728
1729 PREV.br_blockcount = temp;
1730 PREV.br_startoff += new->br_blockcount;
1731 PREV.br_startblock = nullstartblock(da_new);
1732
1733 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1734 xfs_iext_prev(ifp, &bma->icur);
1735 xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1736
1737 if (bma->cur == NULL)
1738 rval = XFS_ILOG_DEXT;
1739 else {
1740 rval = 0;
1741 error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1742 if (error)
1743 goto done;
1744 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1745 error = xfs_bmbt_update(bma->cur, &LEFT);
1746 if (error)
1747 goto done;
1748 }
1749 break;
1750
1751 case BMAP_LEFT_FILLING:
1752 /*
1753 * Filling in the first part of a previous delayed allocation.
1754 * The left neighbor is not contiguous.
1755 */
1756 xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
1757 (*nextents)++;
1758 if (bma->cur == NULL)
1759 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1760 else {
1761 rval = XFS_ILOG_CORE;
1762 error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1763 if (error)
1764 goto done;
1765 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
1766 error = xfs_btree_insert(bma->cur, &i);
1767 if (error)
1768 goto done;
1769 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1770 }
1771
1772 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1773 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1774 &bma->cur, 1, &tmp_rval, whichfork);
1775 rval |= tmp_rval;
1776 if (error)
1777 goto done;
1778 }
1779
1780 temp = PREV.br_blockcount - new->br_blockcount;
1781 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1782 startblockval(PREV.br_startblock) -
1783 (bma->cur ? bma->cur->bc_private.b.allocated : 0));
1784
1785 PREV.br_startoff = new_endoff;
1786 PREV.br_blockcount = temp;
1787 PREV.br_startblock = nullstartblock(da_new);
1788 xfs_iext_next(ifp, &bma->icur);
1789 xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
1790 xfs_iext_prev(ifp, &bma->icur);
1791 break;
1792
1793 case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1794 /*
1795 * Filling in the last part of a previous delayed allocation.
1796 * The right neighbor is contiguous with the new allocation.
1797 */
1798 old = RIGHT;
1799 RIGHT.br_startoff = new->br_startoff;
1800 RIGHT.br_startblock = new->br_startblock;
1801 RIGHT.br_blockcount += new->br_blockcount;
1802
1803 if (bma->cur == NULL)
1804 rval = XFS_ILOG_DEXT;
1805 else {
1806 rval = 0;
1807 error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1808 if (error)
1809 goto done;
1810 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1811 error = xfs_bmbt_update(bma->cur, &RIGHT);
1812 if (error)
1813 goto done;
1814 }
1815
1816 temp = PREV.br_blockcount - new->br_blockcount;
1817 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1818 startblockval(PREV.br_startblock));
1819
1820 PREV.br_blockcount = temp;
1821 PREV.br_startblock = nullstartblock(da_new);
1822
1823 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1824 xfs_iext_next(ifp, &bma->icur);
1825 xfs_iext_update_extent(bma->ip, state, &bma->icur, &RIGHT);
1826 break;
1827
1828 case BMAP_RIGHT_FILLING:
1829 /*
1830 * Filling in the last part of a previous delayed allocation.
1831 * The right neighbor is not contiguous.
1832 */
1833 xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
1834 (*nextents)++;
1835 if (bma->cur == NULL)
1836 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1837 else {
1838 rval = XFS_ILOG_CORE;
1839 error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1840 if (error)
1841 goto done;
1842 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
1843 error = xfs_btree_insert(bma->cur, &i);
1844 if (error)
1845 goto done;
1846 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1847 }
1848
1849 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1850 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1851 &bma->cur, 1, &tmp_rval, whichfork);
1852 rval |= tmp_rval;
1853 if (error)
1854 goto done;
1855 }
1856
1857 temp = PREV.br_blockcount - new->br_blockcount;
1858 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1859 startblockval(PREV.br_startblock) -
1860 (bma->cur ? bma->cur->bc_private.b.allocated : 0));
1861
1862 PREV.br_startblock = nullstartblock(da_new);
1863 PREV.br_blockcount = temp;
1864 xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
1865 xfs_iext_next(ifp, &bma->icur);
1866 break;
1867
1868 case 0:
1869 /*
1870 * Filling in the middle part of a previous delayed allocation.
1871 * Contiguity is impossible here.
1872 * This case is avoided almost all the time.
1873 *
1874 * We start with a delayed allocation:
1875 *
1876 * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+
1877 * PREV @ idx
1878 *
1879 * and we are allocating:
1880 * +rrrrrrrrrrrrrrrrr+
1881 * new
1882 *
1883 * and we set it up for insertion as:
1884 * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+
1885 * new
1886 * PREV @ idx LEFT RIGHT
1887 * inserted at idx + 1
1888 */
1889 old = PREV;
1890
1891 /* LEFT is the new middle */
1892 LEFT = *new;
1893
1894 /* RIGHT is the new right */
1895 RIGHT.br_state = PREV.br_state;
1896 RIGHT.br_startoff = new_endoff;
1897 RIGHT.br_blockcount =
1898 PREV.br_startoff + PREV.br_blockcount - new_endoff;
1899 RIGHT.br_startblock =
1900 nullstartblock(xfs_bmap_worst_indlen(bma->ip,
1901 RIGHT.br_blockcount));
1902
1903 /* truncate PREV */
1904 PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
1905 PREV.br_startblock =
1906 nullstartblock(xfs_bmap_worst_indlen(bma->ip,
1907 PREV.br_blockcount));
1908 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1909
1910 xfs_iext_next(ifp, &bma->icur);
1911 xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state);
1912 xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state);
1913 (*nextents)++;
1914
1915 if (bma->cur == NULL)
1916 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1917 else {
1918 rval = XFS_ILOG_CORE;
1919 error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1920 if (error)
1921 goto done;
1922 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
1923 error = xfs_btree_insert(bma->cur, &i);
1924 if (error)
1925 goto done;
1926 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1927 }
1928
1929 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1930 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1931 &bma->cur, 1, &tmp_rval, whichfork);
1932 rval |= tmp_rval;
1933 if (error)
1934 goto done;
1935 }
1936
1937 da_new = startblockval(PREV.br_startblock) +
1938 startblockval(RIGHT.br_startblock);
1939 break;
1940
1941 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1942 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1943 case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
1944 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1945 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1946 case BMAP_LEFT_CONTIG:
1947 case BMAP_RIGHT_CONTIG:
1948 /*
1949 * These cases are all impossible.
1950 */
1951 ASSERT(0);
1952 }
1953
1954 /* add reverse mapping unless caller opted out */
1955 if (!(bma->flags & XFS_BMAPI_NORMAP)) {
1956 error = xfs_rmap_map_extent(bma->tp, bma->ip, whichfork, new);
1957 if (error)
1958 goto done;
1959 }
1960
1961 /* convert to a btree if necessary */
1962 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1963 int tmp_logflags; /* partial log flag return val */
1964
1965 ASSERT(bma->cur == NULL);
1966 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1967 &bma->cur, da_old > 0, &tmp_logflags,
1968 whichfork);
1969 bma->logflags |= tmp_logflags;
1970 if (error)
1971 goto done;
1972 }
1973
1974 if (bma->cur) {
1975 da_new += bma->cur->bc_private.b.allocated;
1976 bma->cur->bc_private.b.allocated = 0;
1977 }
1978
1979 /* adjust for changes in reserved delayed indirect blocks */
1980 if (da_new != da_old) {
1981 ASSERT(state == 0 || da_new < da_old);
1982 error = xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new),
1983 false);
1984 }
1985
1986 xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
1987 done:
1988 if (whichfork != XFS_COW_FORK)
1989 bma->logflags |= rval;
1990 return error;
1991 #undef LEFT
1992 #undef RIGHT
1993 #undef PREV
1994 }
1995
1996 /*
1997 * Convert an unwritten allocation to a real allocation or vice versa.
1998 */
1999 STATIC int /* error */
2000 xfs_bmap_add_extent_unwritten_real(
2001 struct xfs_trans *tp,
2002 xfs_inode_t *ip, /* incore inode pointer */
2003 int whichfork,
2004 struct xfs_iext_cursor *icur,
2005 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
2006 xfs_bmbt_irec_t *new, /* new data to add to file extents */
2007 int *logflagsp) /* inode logging flags */
2008 {
2009 xfs_btree_cur_t *cur; /* btree cursor */
2010 int error; /* error return value */
2011 int i; /* temp state */
2012 struct xfs_ifork *ifp; /* inode fork pointer */
2013 xfs_fileoff_t new_endoff; /* end offset of new entry */
2014 xfs_bmbt_irec_t r[3]; /* neighbor extent entries */
2015 /* left is 0, right is 1, prev is 2 */
2016 int rval=0; /* return value (logging flags) */
2017 int state = xfs_bmap_fork_to_state(whichfork);
2018 struct xfs_mount *mp = ip->i_mount;
2019 struct xfs_bmbt_irec old;
2020
2021 *logflagsp = 0;
2022
2023 cur = *curp;
2024 ifp = XFS_IFORK_PTR(ip, whichfork);
2025
2026 ASSERT(!isnullstartblock(new->br_startblock));
2027
2028 XFS_STATS_INC(mp, xs_add_exlist);
2029
2030 #define LEFT r[0]
2031 #define RIGHT r[1]
2032 #define PREV r[2]
2033
2034 /*
2035 * Set up a bunch of variables to make the tests simpler.
2036 */
2037 error = 0;
2038 xfs_iext_get_extent(ifp, icur, &PREV);
2039 ASSERT(new->br_state != PREV.br_state);
2040 new_endoff = new->br_startoff + new->br_blockcount;
2041 ASSERT(PREV.br_startoff <= new->br_startoff);
2042 ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
2043
2044 /*
2045 * Set flags determining what part of the previous oldext allocation
2046 * extent is being replaced by a newext allocation.
2047 */
2048 if (PREV.br_startoff == new->br_startoff)
2049 state |= BMAP_LEFT_FILLING;
2050 if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
2051 state |= BMAP_RIGHT_FILLING;
2052
2053 /*
2054 * Check and set flags if this segment has a left neighbor.
2055 * Don't set contiguous if the combined extent would be too large.
2056 */
2057 if (xfs_iext_peek_prev_extent(ifp, icur, &LEFT)) {
2058 state |= BMAP_LEFT_VALID;
2059 if (isnullstartblock(LEFT.br_startblock))
2060 state |= BMAP_LEFT_DELAY;
2061 }
2062
2063 if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2064 LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
2065 LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
2066 LEFT.br_state == new->br_state &&
2067 LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2068 state |= BMAP_LEFT_CONTIG;
2069
2070 /*
2071 * Check and set flags if this segment has a right neighbor.
2072 * Don't set contiguous if the combined extent would be too large.
2073 * Also check for all-three-contiguous being too large.
2074 */
2075 if (xfs_iext_peek_next_extent(ifp, icur, &RIGHT)) {
2076 state |= BMAP_RIGHT_VALID;
2077 if (isnullstartblock(RIGHT.br_startblock))
2078 state |= BMAP_RIGHT_DELAY;
2079 }
2080
2081 if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2082 new_endoff == RIGHT.br_startoff &&
2083 new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
2084 new->br_state == RIGHT.br_state &&
2085 new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
2086 ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2087 BMAP_RIGHT_FILLING)) !=
2088 (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2089 BMAP_RIGHT_FILLING) ||
2090 LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
2091 <= MAXEXTLEN))
2092 state |= BMAP_RIGHT_CONTIG;
2093
2094 /*
2095 * Switch out based on the FILLING and CONTIG state bits.
2096 */
2097 switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2098 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
2099 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2100 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2101 /*
2102 * Setting all of a previous oldext extent to newext.
2103 * The left and right neighbors are both contiguous with new.
2104 */
2105 LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
2106
2107 xfs_iext_remove(ip, icur, state);
2108 xfs_iext_remove(ip, icur, state);
2109 xfs_iext_prev(ifp, icur);
2110 xfs_iext_update_extent(ip, state, icur, &LEFT);
2111 XFS_IFORK_NEXT_SET(ip, whichfork,
2112 XFS_IFORK_NEXTENTS(ip, whichfork) - 2);
2113 if (cur == NULL)
2114 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2115 else {
2116 rval = XFS_ILOG_CORE;
2117 error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
2118 if (error)
2119 goto done;
2120 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2121 if ((error = xfs_btree_delete(cur, &i)))
2122 goto done;
2123 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2124 if ((error = xfs_btree_decrement(cur, 0, &i)))
2125 goto done;
2126 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2127 if ((error = xfs_btree_delete(cur, &i)))
2128 goto done;
2129 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2130 if ((error = xfs_btree_decrement(cur, 0, &i)))
2131 goto done;
2132 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2133 error = xfs_bmbt_update(cur, &LEFT);
2134 if (error)
2135 goto done;
2136 }
2137 break;
2138
2139 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2140 /*
2141 * Setting all of a previous oldext extent to newext.
2142 * The left neighbor is contiguous, the right is not.
2143 */
2144 LEFT.br_blockcount += PREV.br_blockcount;
2145
2146 xfs_iext_remove(ip, icur, state);
2147 xfs_iext_prev(ifp, icur);
2148 xfs_iext_update_extent(ip, state, icur, &LEFT);
2149 XFS_IFORK_NEXT_SET(ip, whichfork,
2150 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
2151 if (cur == NULL)
2152 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2153 else {
2154 rval = XFS_ILOG_CORE;
2155 error = xfs_bmbt_lookup_eq(cur, &PREV, &i);
2156 if (error)
2157 goto done;
2158 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2159 if ((error = xfs_btree_delete(cur, &i)))
2160 goto done;
2161 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2162 if ((error = xfs_btree_decrement(cur, 0, &i)))
2163 goto done;
2164 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2165 error = xfs_bmbt_update(cur, &LEFT);
2166 if (error)
2167 goto done;
2168 }
2169 break;
2170
2171 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2172 /*
2173 * Setting all of a previous oldext extent to newext.
2174 * The right neighbor is contiguous, the left is not.
2175 */
2176 PREV.br_blockcount += RIGHT.br_blockcount;
2177 PREV.br_state = new->br_state;
2178
2179 xfs_iext_next(ifp, icur);
2180 xfs_iext_remove(ip, icur, state);
2181 xfs_iext_prev(ifp, icur);
2182 xfs_iext_update_extent(ip, state, icur, &PREV);
2183
2184 XFS_IFORK_NEXT_SET(ip, whichfork,
2185 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
2186 if (cur == NULL)
2187 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2188 else {
2189 rval = XFS_ILOG_CORE;
2190 error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
2191 if (error)
2192 goto done;
2193 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2194 if ((error = xfs_btree_delete(cur, &i)))
2195 goto done;
2196 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2197 if ((error = xfs_btree_decrement(cur, 0, &i)))
2198 goto done;
2199 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2200 error = xfs_bmbt_update(cur, &PREV);
2201 if (error)
2202 goto done;
2203 }
2204 break;
2205
2206 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
2207 /*
2208 * Setting all of a previous oldext extent to newext.
2209 * Neither the left nor right neighbors are contiguous with
2210 * the new one.
2211 */
2212 PREV.br_state = new->br_state;
2213 xfs_iext_update_extent(ip, state, icur, &PREV);
2214
2215 if (cur == NULL)
2216 rval = XFS_ILOG_DEXT;
2217 else {
2218 rval = 0;
2219 error = xfs_bmbt_lookup_eq(cur, new, &i);
2220 if (error)
2221 goto done;
2222 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2223 error = xfs_bmbt_update(cur, &PREV);
2224 if (error)
2225 goto done;
2226 }
2227 break;
2228
2229 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
2230 /*
2231 * Setting the first part of a previous oldext extent to newext.
2232 * The left neighbor is contiguous.
2233 */
2234 LEFT.br_blockcount += new->br_blockcount;
2235
2236 old = PREV;
2237 PREV.br_startoff += new->br_blockcount;
2238 PREV.br_startblock += new->br_blockcount;
2239 PREV.br_blockcount -= new->br_blockcount;
2240
2241 xfs_iext_update_extent(ip, state, icur, &PREV);
2242 xfs_iext_prev(ifp, icur);
2243 xfs_iext_update_extent(ip, state, icur, &LEFT);
2244
2245 if (cur == NULL)
2246 rval = XFS_ILOG_DEXT;
2247 else {
2248 rval = 0;
2249 error = xfs_bmbt_lookup_eq(cur, &old, &i);
2250 if (error)
2251 goto done;
2252 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2253 error = xfs_bmbt_update(cur, &PREV);
2254 if (error)
2255 goto done;
2256 error = xfs_btree_decrement(cur, 0, &i);
2257 if (error)
2258 goto done;
2259 error = xfs_bmbt_update(cur, &LEFT);
2260 if (error)
2261 goto done;
2262 }
2263 break;
2264
2265 case BMAP_LEFT_FILLING:
2266 /*
2267 * Setting the first part of a previous oldext extent to newext.
2268 * The left neighbor is not contiguous.
2269 */
2270 old = PREV;
2271 PREV.br_startoff += new->br_blockcount;
2272 PREV.br_startblock += new->br_blockcount;
2273 PREV.br_blockcount -= new->br_blockcount;
2274
2275 xfs_iext_update_extent(ip, state, icur, &PREV);
2276 xfs_iext_insert(ip, icur, new, state);
2277 XFS_IFORK_NEXT_SET(ip, whichfork,
2278 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
2279 if (cur == NULL)
2280 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2281 else {
2282 rval = XFS_ILOG_CORE;
2283 error = xfs_bmbt_lookup_eq(cur, &old, &i);
2284 if (error)
2285 goto done;
2286 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2287 error = xfs_bmbt_update(cur, &PREV);
2288 if (error)
2289 goto done;
2290 cur->bc_rec.b = *new;
2291 if ((error = xfs_btree_insert(cur, &i)))
2292 goto done;
2293 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2294 }
2295 break;
2296
2297 case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2298 /*
2299 * Setting the last part of a previous oldext extent to newext.
2300 * The right neighbor is contiguous with the new allocation.
2301 */
2302 old = PREV;
2303 PREV.br_blockcount -= new->br_blockcount;
2304
2305 RIGHT.br_startoff = new->br_startoff;
2306 RIGHT.br_startblock = new->br_startblock;
2307 RIGHT.br_blockcount += new->br_blockcount;
2308
2309 xfs_iext_update_extent(ip, state, icur, &PREV);
2310 xfs_iext_next(ifp, icur);
2311 xfs_iext_update_extent(ip, state, icur, &RIGHT);
2312
2313 if (cur == NULL)
2314 rval = XFS_ILOG_DEXT;
2315 else {
2316 rval = 0;
2317 error = xfs_bmbt_lookup_eq(cur, &old, &i);
2318 if (error)
2319 goto done;
2320 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2321 error = xfs_bmbt_update(cur, &PREV);
2322 if (error)
2323 goto done;
2324 error = xfs_btree_increment(cur, 0, &i);
2325 if (error)
2326 goto done;
2327 error = xfs_bmbt_update(cur, &RIGHT);
2328 if (error)
2329 goto done;
2330 }
2331 break;
2332
2333 case BMAP_RIGHT_FILLING:
2334 /*
2335 * Setting the last part of a previous oldext extent to newext.
2336 * The right neighbor is not contiguous.
2337 */
2338 old = PREV;
2339 PREV.br_blockcount -= new->br_blockcount;
2340
2341 xfs_iext_update_extent(ip, state, icur, &PREV);
2342 xfs_iext_next(ifp, icur);
2343 xfs_iext_insert(ip, icur, new, state);
2344
2345 XFS_IFORK_NEXT_SET(ip, whichfork,
2346 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
2347 if (cur == NULL)
2348 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2349 else {
2350 rval = XFS_ILOG_CORE;
2351 error = xfs_bmbt_lookup_eq(cur, &old, &i);
2352 if (error)
2353 goto done;
2354 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2355 error = xfs_bmbt_update(cur, &PREV);
2356 if (error)
2357 goto done;
2358 error = xfs_bmbt_lookup_eq(cur, new, &i);
2359 if (error)
2360 goto done;
2361 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2362 if ((error = xfs_btree_insert(cur, &i)))
2363 goto done;
2364 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2365 }
2366 break;
2367
2368 case 0:
2369 /*
2370 * Setting the middle part of a previous oldext extent to
2371 * newext. Contiguity is impossible here.
2372 * One extent becomes three extents.
2373 */
2374 old = PREV;
2375 PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
2376
2377 r[0] = *new;
2378 r[1].br_startoff = new_endoff;
2379 r[1].br_blockcount =
2380 old.br_startoff + old.br_blockcount - new_endoff;
2381 r[1].br_startblock = new->br_startblock + new->br_blockcount;
2382 r[1].br_state = PREV.br_state;
2383
2384 xfs_iext_update_extent(ip, state, icur, &PREV);
2385 xfs_iext_next(ifp, icur);
2386 xfs_iext_insert(ip, icur, &r[1], state);
2387 xfs_iext_insert(ip, icur, &r[0], state);
2388
2389 XFS_IFORK_NEXT_SET(ip, whichfork,
2390 XFS_IFORK_NEXTENTS(ip, whichfork) + 2);
2391 if (cur == NULL)
2392 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2393 else {
2394 rval = XFS_ILOG_CORE;
2395 error = xfs_bmbt_lookup_eq(cur, &old, &i);
2396 if (error)
2397 goto done;
2398 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2399 /* new right extent - oldext */
2400 error = xfs_bmbt_update(cur, &r[1]);
2401 if (error)
2402 goto done;
2403 /* new left extent - oldext */
2404 cur->bc_rec.b = PREV;
2405 if ((error = xfs_btree_insert(cur, &i)))
2406 goto done;
2407 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2408 /*
2409 * Reset the cursor to the position of the new extent
2410 * we are about to insert as we can't trust it after
2411 * the previous insert.
2412 */
2413 error = xfs_bmbt_lookup_eq(cur, new, &i);
2414 if (error)
2415 goto done;
2416 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2417 /* new middle extent - newext */
2418 if ((error = xfs_btree_insert(cur, &i)))
2419 goto done;
2420 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2421 }
2422 break;
2423
2424 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2425 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2426 case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
2427 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2428 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2429 case BMAP_LEFT_CONTIG:
2430 case BMAP_RIGHT_CONTIG:
2431 /*
2432 * These cases are all impossible.
2433 */
2434 ASSERT(0);
2435 }
2436
2437 /* update reverse mappings */
2438 error = xfs_rmap_convert_extent(mp, tp, ip, whichfork, new);
2439 if (error)
2440 goto done;
2441
2442 /* convert to a btree if necessary */
2443 if (xfs_bmap_needs_btree(ip, whichfork)) {
2444 int tmp_logflags; /* partial log flag return val */
2445
2446 ASSERT(cur == NULL);
2447 error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
2448 &tmp_logflags, whichfork);
2449 *logflagsp |= tmp_logflags;
2450 if (error)
2451 goto done;
2452 }
2453
2454 /* clear out the allocated field, done with it now in any case. */
2455 if (cur) {
2456 cur->bc_private.b.allocated = 0;
2457 *curp = cur;
2458 }
2459
2460 xfs_bmap_check_leaf_extents(*curp, ip, whichfork);
2461 done:
2462 *logflagsp |= rval;
2463 return error;
2464 #undef LEFT
2465 #undef RIGHT
2466 #undef PREV
2467 }
2468
2469 /*
2470 * Convert a hole to a delayed allocation.
2471 */
2472 STATIC void
2473 xfs_bmap_add_extent_hole_delay(
2474 xfs_inode_t *ip, /* incore inode pointer */
2475 int whichfork,
2476 struct xfs_iext_cursor *icur,
2477 xfs_bmbt_irec_t *new) /* new data to add to file extents */
2478 {
2479 struct xfs_ifork *ifp; /* inode fork pointer */
2480 xfs_bmbt_irec_t left; /* left neighbor extent entry */
2481 xfs_filblks_t newlen=0; /* new indirect size */
2482 xfs_filblks_t oldlen=0; /* old indirect size */
2483 xfs_bmbt_irec_t right; /* right neighbor extent entry */
2484 int state = xfs_bmap_fork_to_state(whichfork);
2485 xfs_filblks_t temp; /* temp for indirect calculations */
2486
2487 ifp = XFS_IFORK_PTR(ip, whichfork);
2488 ASSERT(isnullstartblock(new->br_startblock));
2489
2490 /*
2491 * Check and set flags if this segment has a left neighbor
2492 */
2493 if (xfs_iext_peek_prev_extent(ifp, icur, &left)) {
2494 state |= BMAP_LEFT_VALID;
2495 if (isnullstartblock(left.br_startblock))
2496 state |= BMAP_LEFT_DELAY;
2497 }
2498
2499 /*
2500 * Check and set flags if the current (right) segment exists.
2501 * If it doesn't exist, we're converting the hole at end-of-file.
2502 */
2503 if (xfs_iext_get_extent(ifp, icur, &right)) {
2504 state |= BMAP_RIGHT_VALID;
2505 if (isnullstartblock(right.br_startblock))
2506 state |= BMAP_RIGHT_DELAY;
2507 }
2508
2509 /*
2510 * Set contiguity flags on the left and right neighbors.
2511 * Don't let extents get too large, even if the pieces are contiguous.
2512 */
2513 if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) &&
2514 left.br_startoff + left.br_blockcount == new->br_startoff &&
2515 left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2516 state |= BMAP_LEFT_CONTIG;
2517
2518 if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) &&
2519 new->br_startoff + new->br_blockcount == right.br_startoff &&
2520 new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
2521 (!(state & BMAP_LEFT_CONTIG) ||
2522 (left.br_blockcount + new->br_blockcount +
2523 right.br_blockcount <= MAXEXTLEN)))
2524 state |= BMAP_RIGHT_CONTIG;
2525
2526 /*
2527 * Switch out based on the contiguity flags.
2528 */
2529 switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2530 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2531 /*
2532 * New allocation is contiguous with delayed allocations
2533 * on the left and on the right.
2534 * Merge all three into a single extent record.
2535 */
2536 temp = left.br_blockcount + new->br_blockcount +
2537 right.br_blockcount;
2538
2539 oldlen = startblockval(left.br_startblock) +
2540 startblockval(new->br_startblock) +
2541 startblockval(right.br_startblock);
2542 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2543 oldlen);
2544 left.br_startblock = nullstartblock(newlen);
2545 left.br_blockcount = temp;
2546
2547 xfs_iext_remove(ip, icur, state);
2548 xfs_iext_prev(ifp, icur);
2549 xfs_iext_update_extent(ip, state, icur, &left);
2550 break;
2551
2552 case BMAP_LEFT_CONTIG:
2553 /*
2554 * New allocation is contiguous with a delayed allocation
2555 * on the left.
2556 * Merge the new allocation with the left neighbor.
2557 */
2558 temp = left.br_blockcount + new->br_blockcount;
2559
2560 oldlen = startblockval(left.br_startblock) +
2561 startblockval(new->br_startblock);
2562 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2563 oldlen);
2564 left.br_blockcount = temp;
2565 left.br_startblock = nullstartblock(newlen);
2566
2567 xfs_iext_prev(ifp, icur);
2568 xfs_iext_update_extent(ip, state, icur, &left);
2569 break;
2570
2571 case BMAP_RIGHT_CONTIG:
2572 /*
2573 * New allocation is contiguous with a delayed allocation
2574 * on the right.
2575 * Merge the new allocation with the right neighbor.
2576 */
2577 temp = new->br_blockcount + right.br_blockcount;
2578 oldlen = startblockval(new->br_startblock) +
2579 startblockval(right.br_startblock);
2580 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2581 oldlen);
2582 right.br_startoff = new->br_startoff;
2583 right.br_startblock = nullstartblock(newlen);
2584 right.br_blockcount = temp;
2585 xfs_iext_update_extent(ip, state, icur, &right);
2586 break;
2587
2588 case 0:
2589 /*
2590 * New allocation is not contiguous with another
2591 * delayed allocation.
2592 * Insert a new entry.
2593 */
2594 oldlen = newlen = 0;
2595 xfs_iext_insert(ip, icur, new, state);
2596 break;
2597 }
2598 if (oldlen != newlen) {
2599 ASSERT(oldlen > newlen);
2600 xfs_mod_fdblocks(ip->i_mount, (int64_t)(oldlen - newlen),
2601 false);
2602 /*
2603 * Nothing to do for disk quota accounting here.
2604 */
2605 }
2606 }
2607
2608 /*
2609 * Convert a hole to a real allocation.
2610 */
2611 STATIC int /* error */
2612 xfs_bmap_add_extent_hole_real(
2613 struct xfs_trans *tp,
2614 struct xfs_inode *ip,
2615 int whichfork,
2616 struct xfs_iext_cursor *icur,
2617 struct xfs_btree_cur **curp,
2618 struct xfs_bmbt_irec *new,
2619 int *logflagsp,
2620 int flags)
2621 {
2622 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
2623 struct xfs_mount *mp = ip->i_mount;
2624 struct xfs_btree_cur *cur = *curp;
2625 int error; /* error return value */
2626 int i; /* temp state */
2627 xfs_bmbt_irec_t left; /* left neighbor extent entry */
2628 xfs_bmbt_irec_t right; /* right neighbor extent entry */
2629 int rval=0; /* return value (logging flags) */
2630 int state = xfs_bmap_fork_to_state(whichfork);
2631 struct xfs_bmbt_irec old;
2632
2633 ASSERT(!isnullstartblock(new->br_startblock));
2634 ASSERT(!cur || !(cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
2635
2636 XFS_STATS_INC(mp, xs_add_exlist);
2637
2638 /*
2639 * Check and set flags if this segment has a left neighbor.
2640 */
2641 if (xfs_iext_peek_prev_extent(ifp, icur, &left)) {
2642 state |= BMAP_LEFT_VALID;
2643 if (isnullstartblock(left.br_startblock))
2644 state |= BMAP_LEFT_DELAY;
2645 }
2646
2647 /*
2648 * Check and set flags if this segment has a current value.
2649 * Not true if we're inserting into the "hole" at eof.
2650 */
2651 if (xfs_iext_get_extent(ifp, icur, &right)) {
2652 state |= BMAP_RIGHT_VALID;
2653 if (isnullstartblock(right.br_startblock))
2654 state |= BMAP_RIGHT_DELAY;
2655 }
2656
2657 /*
2658 * We're inserting a real allocation between "left" and "right".
2659 * Set the contiguity flags. Don't let extents get too large.
2660 */
2661 if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2662 left.br_startoff + left.br_blockcount == new->br_startoff &&
2663 left.br_startblock + left.br_blockcount == new->br_startblock &&
2664 left.br_state == new->br_state &&
2665 left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2666 state |= BMAP_LEFT_CONTIG;
2667
2668 if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2669 new->br_startoff + new->br_blockcount == right.br_startoff &&
2670 new->br_startblock + new->br_blockcount == right.br_startblock &&
2671 new->br_state == right.br_state &&
2672 new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
2673 (!(state & BMAP_LEFT_CONTIG) ||
2674 left.br_blockcount + new->br_blockcount +
2675 right.br_blockcount <= MAXEXTLEN))
2676 state |= BMAP_RIGHT_CONTIG;
2677
2678 error = 0;
2679 /*
2680 * Select which case we're in here, and implement it.
2681 */
2682 switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2683 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2684 /*
2685 * New allocation is contiguous with real allocations on the
2686 * left and on the right.
2687 * Merge all three into a single extent record.
2688 */
2689 left.br_blockcount += new->br_blockcount + right.br_blockcount;
2690
2691 xfs_iext_remove(ip, icur, state);
2692 xfs_iext_prev(ifp, icur);
2693 xfs_iext_update_extent(ip, state, icur, &left);
2694
2695 XFS_IFORK_NEXT_SET(ip, whichfork,
2696 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
2697 if (cur == NULL) {
2698 rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
2699 } else {
2700 rval = XFS_ILOG_CORE;
2701 error = xfs_bmbt_lookup_eq(cur, &right, &i);
2702 if (error)
2703 goto done;
2704 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2705 error = xfs_btree_delete(cur, &i);
2706 if (error)
2707 goto done;
2708 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2709 error = xfs_btree_decrement(cur, 0, &i);
2710 if (error)
2711 goto done;
2712 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2713 error = xfs_bmbt_update(cur, &left);
2714 if (error)
2715 goto done;
2716 }
2717 break;
2718
2719 case BMAP_LEFT_CONTIG:
2720 /*
2721 * New allocation is contiguous with a real allocation
2722 * on the left.
2723 * Merge the new allocation with the left neighbor.
2724 */
2725 old = left;
2726 left.br_blockcount += new->br_blockcount;
2727
2728 xfs_iext_prev(ifp, icur);
2729 xfs_iext_update_extent(ip, state, icur, &left);
2730
2731 if (cur == NULL) {
2732 rval = xfs_ilog_fext(whichfork);
2733 } else {
2734 rval = 0;
2735 error = xfs_bmbt_lookup_eq(cur, &old, &i);
2736 if (error)
2737 goto done;
2738 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2739 error = xfs_bmbt_update(cur, &left);
2740 if (error)
2741 goto done;
2742 }
2743 break;
2744
2745 case BMAP_RIGHT_CONTIG:
2746 /*
2747 * New allocation is contiguous with a real allocation
2748 * on the right.
2749 * Merge the new allocation with the right neighbor.
2750 */
2751 old = right;
2752
2753 right.br_startoff = new->br_startoff;
2754 right.br_startblock = new->br_startblock;
2755 right.br_blockcount += new->br_blockcount;
2756 xfs_iext_update_extent(ip, state, icur, &right);
2757
2758 if (cur == NULL) {
2759 rval = xfs_ilog_fext(whichfork);
2760 } else {
2761 rval = 0;
2762 error = xfs_bmbt_lookup_eq(cur, &old, &i);
2763 if (error)
2764 goto done;
2765 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2766 error = xfs_bmbt_update(cur, &right);
2767 if (error)
2768 goto done;
2769 }
2770 break;
2771
2772 case 0:
2773 /*
2774 * New allocation is not contiguous with another
2775 * real allocation.
2776 * Insert a new entry.
2777 */
2778 xfs_iext_insert(ip, icur, new, state);
2779 XFS_IFORK_NEXT_SET(ip, whichfork,
2780 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
2781 if (cur == NULL) {
2782 rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
2783 } else {
2784 rval = XFS_ILOG_CORE;
2785 error = xfs_bmbt_lookup_eq(cur, new, &i);
2786 if (error)
2787 goto done;
2788 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2789 error = xfs_btree_insert(cur, &i);
2790 if (error)
2791 goto done;
2792 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2793 }
2794 break;
2795 }
2796
2797 /* add reverse mapping unless caller opted out */
2798 if (!(flags & XFS_BMAPI_NORMAP)) {
2799 error = xfs_rmap_map_extent(tp, ip, whichfork, new);
2800 if (error)
2801 goto done;
2802 }
2803
2804 /* convert to a btree if necessary */
2805 if (xfs_bmap_needs_btree(ip, whichfork)) {
2806 int tmp_logflags; /* partial log flag return val */
2807
2808 ASSERT(cur == NULL);
2809 error = xfs_bmap_extents_to_btree(tp, ip, curp, 0,
2810 &tmp_logflags, whichfork);
2811 *logflagsp |= tmp_logflags;
2812 cur = *curp;
2813 if (error)
2814 goto done;
2815 }
2816
2817 /* clear out the allocated field, done with it now in any case. */
2818 if (cur)
2819 cur->bc_private.b.allocated = 0;
2820
2821 xfs_bmap_check_leaf_extents(cur, ip, whichfork);
2822 done:
2823 *logflagsp |= rval;
2824 return error;
2825 }
2826
2827 /*
2828 * Functions used in the extent read, allocate and remove paths
2829 */
2830
2831 /*
2832 * Adjust the size of the new extent based on di_extsize and rt extsize.
2833 */
2834 int
2835 xfs_bmap_extsize_align(
2836 xfs_mount_t *mp,
2837 xfs_bmbt_irec_t *gotp, /* next extent pointer */
2838 xfs_bmbt_irec_t *prevp, /* previous extent pointer */
2839 xfs_extlen_t extsz, /* align to this extent size */
2840 int rt, /* is this a realtime inode? */
2841 int eof, /* is extent at end-of-file? */
2842 int delay, /* creating delalloc extent? */
2843 int convert, /* overwriting unwritten extent? */
2844 xfs_fileoff_t *offp, /* in/out: aligned offset */
2845 xfs_extlen_t *lenp) /* in/out: aligned length */
2846 {
2847 xfs_fileoff_t orig_off; /* original offset */
2848 xfs_extlen_t orig_alen; /* original length */
2849 xfs_fileoff_t orig_end; /* original off+len */
2850 xfs_fileoff_t nexto; /* next file offset */
2851 xfs_fileoff_t prevo; /* previous file offset */
2852 xfs_fileoff_t align_off; /* temp for offset */
2853 xfs_extlen_t align_alen; /* temp for length */
2854 xfs_extlen_t temp; /* temp for calculations */
2855
2856 if (convert)
2857 return 0;
2858
2859 orig_off = align_off = *offp;
2860 orig_alen = align_alen = *lenp;
2861 orig_end = orig_off + orig_alen;
2862
2863 /*
2864 * If this request overlaps an existing extent, then don't
2865 * attempt to perform any additional alignment.
2866 */
2867 if (!delay && !eof &&
2868 (orig_off >= gotp->br_startoff) &&
2869 (orig_end <= gotp->br_startoff + gotp->br_blockcount)) {
2870 return 0;
2871 }
2872
2873 /*
2874 * If the file offset is unaligned vs. the extent size
2875 * we need to align it. This will be possible unless
2876 * the file was previously written with a kernel that didn't
2877 * perform this alignment, or if a truncate shot us in the
2878 * foot.
2879 */
2880 div_u64_rem(orig_off, extsz, &temp);
2881 if (temp) {
2882 align_alen += temp;
2883 align_off -= temp;
2884 }
2885
2886 /* Same adjustment for the end of the requested area. */
2887 temp = (align_alen % extsz);
2888 if (temp)
2889 align_alen += extsz - temp;
2890
2891 /*
2892 * For large extent hint sizes, the aligned extent might be larger than
2893 * MAXEXTLEN. In that case, reduce the size by an extsz so that it pulls
2894 * the length back under MAXEXTLEN. The outer allocation loops handle
2895 * short allocation just fine, so it is safe to do this. We only want to
2896 * do it when we are forced to, though, because it means more allocation
2897 * operations are required.
2898 */
2899 while (align_alen > MAXEXTLEN)
2900 align_alen -= extsz;
2901 ASSERT(align_alen <= MAXEXTLEN);
2902
2903 /*
2904 * If the previous block overlaps with this proposed allocation
2905 * then move the start forward without adjusting the length.
2906 */
2907 if (prevp->br_startoff != NULLFILEOFF) {
2908 if (prevp->br_startblock == HOLESTARTBLOCK)
2909 prevo = prevp->br_startoff;
2910 else
2911 prevo = prevp->br_startoff + prevp->br_blockcount;
2912 } else
2913 prevo = 0;
2914 if (align_off != orig_off && align_off < prevo)
2915 align_off = prevo;
2916 /*
2917 * If the next block overlaps with this proposed allocation
2918 * then move the start back without adjusting the length,
2919 * but not before offset 0.
2920 * This may of course make the start overlap previous block,
2921 * and if we hit the offset 0 limit then the next block
2922 * can still overlap too.
2923 */
2924 if (!eof && gotp->br_startoff != NULLFILEOFF) {
2925 if ((delay && gotp->br_startblock == HOLESTARTBLOCK) ||
2926 (!delay && gotp->br_startblock == DELAYSTARTBLOCK))
2927 nexto = gotp->br_startoff + gotp->br_blockcount;
2928 else
2929 nexto = gotp->br_startoff;
2930 } else
2931 nexto = NULLFILEOFF;
2932 if (!eof &&
2933 align_off + align_alen != orig_end &&
2934 align_off + align_alen > nexto)
2935 align_off = nexto > align_alen ? nexto - align_alen : 0;
2936 /*
2937 * If we're now overlapping the next or previous extent that
2938 * means we can't fit an extsz piece in this hole. Just move
2939 * the start forward to the first valid spot and set
2940 * the length so we hit the end.
2941 */
2942 if (align_off != orig_off && align_off < prevo)
2943 align_off = prevo;
2944 if (align_off + align_alen != orig_end &&
2945 align_off + align_alen > nexto &&
2946 nexto != NULLFILEOFF) {
2947 ASSERT(nexto > prevo);
2948 align_alen = nexto - align_off;
2949 }
2950
2951 /*
2952 * If realtime, and the result isn't a multiple of the realtime
2953 * extent size we need to remove blocks until it is.
2954 */
2955 if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) {
2956 /*
2957 * We're not covering the original request, or
2958 * we won't be able to once we fix the length.
2959 */
2960 if (orig_off < align_off ||
2961 orig_end > align_off + align_alen ||
2962 align_alen - temp < orig_alen)
2963 return -EINVAL;
2964 /*
2965 * Try to fix it by moving the start up.
2966 */
2967 if (align_off + temp <= orig_off) {
2968 align_alen -= temp;
2969 align_off += temp;
2970 }
2971 /*
2972 * Try to fix it by moving the end in.
2973 */
2974 else if (align_off + align_alen - temp >= orig_end)
2975 align_alen -= temp;
2976 /*
2977 * Set the start to the minimum then trim the length.
2978 */
2979 else {
2980 align_alen -= orig_off - align_off;
2981 align_off = orig_off;
2982 align_alen -= align_alen % mp->m_sb.sb_rextsize;
2983 }
2984 /*
2985 * Result doesn't cover the request, fail it.
2986 */
2987 if (orig_off < align_off || orig_end > align_off + align_alen)
2988 return -EINVAL;
2989 } else {
2990 ASSERT(orig_off >= align_off);
2991 /* see MAXEXTLEN handling above */
2992 ASSERT(orig_end <= align_off + align_alen ||
2993 align_alen + extsz > MAXEXTLEN);
2994 }
2995
2996 #ifdef DEBUG
2997 if (!eof && gotp->br_startoff != NULLFILEOFF)
2998 ASSERT(align_off + align_alen <= gotp->br_startoff);
2999 if (prevp->br_startoff != NULLFILEOFF)
3000 ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount);
3001 #endif
3002
3003 *lenp = align_alen;
3004 *offp = align_off;
3005 return 0;
3006 }
3007
3008 #define XFS_ALLOC_GAP_UNITS 4
3009
3010 void
3011 xfs_bmap_adjacent(
3012 struct xfs_bmalloca *ap) /* bmap alloc argument struct */
3013 {
3014 xfs_fsblock_t adjust; /* adjustment to block numbers */
3015 xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */
3016 xfs_mount_t *mp; /* mount point structure */
3017 int nullfb; /* true if ap->firstblock isn't set */
3018 int rt; /* true if inode is realtime */
3019
3020 #define ISVALID(x,y) \
3021 (rt ? \
3022 (x) < mp->m_sb.sb_rblocks : \
3023 XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \
3024 XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \
3025 XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks)
3026
3027 mp = ap->ip->i_mount;
3028 nullfb = ap->tp->t_firstblock == NULLFSBLOCK;
3029 rt = XFS_IS_REALTIME_INODE(ap->ip) &&
3030 xfs_alloc_is_userdata(ap->datatype);
3031 fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp,
3032 ap->tp->t_firstblock);
3033 /*
3034 * If allocating at eof, and there's a previous real block,
3035 * try to use its last block as our starting point.
3036 */
3037 if (ap->eof && ap->prev.br_startoff != NULLFILEOFF &&
3038 !isnullstartblock(ap->prev.br_startblock) &&
3039 ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount,
3040 ap->prev.br_startblock)) {
3041 ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount;
3042 /*
3043 * Adjust for the gap between prevp and us.
3044 */
3045 adjust = ap->offset -
3046 (ap->prev.br_startoff + ap->prev.br_blockcount);
3047 if (adjust &&
3048 ISVALID(ap->blkno + adjust, ap->prev.br_startblock))
3049 ap->blkno += adjust;
3050 }
3051 /*
3052 * If not at eof, then compare the two neighbor blocks.
3053 * Figure out whether either one gives us a good starting point,
3054 * and pick the better one.
3055 */
3056 else if (!ap->eof) {
3057 xfs_fsblock_t gotbno; /* right side block number */
3058 xfs_fsblock_t gotdiff=0; /* right side difference */
3059 xfs_fsblock_t prevbno; /* left side block number */
3060 xfs_fsblock_t prevdiff=0; /* left side difference */
3061
3062 /*
3063 * If there's a previous (left) block, select a requested
3064 * start block based on it.
3065 */
3066 if (ap->prev.br_startoff != NULLFILEOFF &&
3067 !isnullstartblock(ap->prev.br_startblock) &&
3068 (prevbno = ap->prev.br_startblock +
3069 ap->prev.br_blockcount) &&
3070 ISVALID(prevbno, ap->prev.br_startblock)) {
3071 /*
3072 * Calculate gap to end of previous block.
3073 */
3074 adjust = prevdiff = ap->offset -
3075 (ap->prev.br_startoff +
3076 ap->prev.br_blockcount);
3077 /*
3078 * Figure the startblock based on the previous block's
3079 * end and the gap size.
3080 * Heuristic!
3081 * If the gap is large relative to the piece we're
3082 * allocating, or using it gives us an invalid block
3083 * number, then just use the end of the previous block.
3084 */
3085 if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3086 ISVALID(prevbno + prevdiff,
3087 ap->prev.br_startblock))
3088 prevbno += adjust;
3089 else
3090 prevdiff += adjust;
3091 /*
3092 * If the firstblock forbids it, can't use it,
3093 * must use default.
3094 */
3095 if (!rt && !nullfb &&
3096 XFS_FSB_TO_AGNO(mp, prevbno) != fb_agno)
3097 prevbno = NULLFSBLOCK;
3098 }
3099 /*
3100 * No previous block or can't follow it, just default.
3101 */
3102 else
3103 prevbno = NULLFSBLOCK;
3104 /*
3105 * If there's a following (right) block, select a requested
3106 * start block based on it.
3107 */
3108 if (!isnullstartblock(ap->got.br_startblock)) {
3109 /*
3110 * Calculate gap to start of next block.
3111 */
3112 adjust = gotdiff = ap->got.br_startoff - ap->offset;
3113 /*
3114 * Figure the startblock based on the next block's
3115 * start and the gap size.
3116 */
3117 gotbno = ap->got.br_startblock;
3118 /*
3119 * Heuristic!
3120 * If the gap is large relative to the piece we're
3121 * allocating, or using it gives us an invalid block
3122 * number, then just use the start of the next block
3123 * offset by our length.
3124 */
3125 if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3126 ISVALID(gotbno - gotdiff, gotbno))
3127 gotbno -= adjust;
3128 else if (ISVALID(gotbno - ap->length, gotbno)) {
3129 gotbno -= ap->length;
3130 gotdiff += adjust - ap->length;
3131 } else
3132 gotdiff += adjust;
3133 /*
3134 * If the firstblock forbids it, can't use it,
3135 * must use default.
3136 */
3137 if (!rt && !nullfb &&
3138 XFS_FSB_TO_AGNO(mp, gotbno) != fb_agno)
3139 gotbno = NULLFSBLOCK;
3140 }
3141 /*
3142 * No next block, just default.
3143 */
3144 else
3145 gotbno = NULLFSBLOCK;
3146 /*
3147 * If both valid, pick the better one, else the only good
3148 * one, else ap->blkno is already set (to 0 or the inode block).
3149 */
3150 if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK)
3151 ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno;
3152 else if (prevbno != NULLFSBLOCK)
3153 ap->blkno = prevbno;
3154 else if (gotbno != NULLFSBLOCK)
3155 ap->blkno = gotbno;
3156 }
3157 #undef ISVALID
3158 }
3159
3160 static int
3161 xfs_bmap_longest_free_extent(
3162 struct xfs_trans *tp,
3163 xfs_agnumber_t ag,
3164 xfs_extlen_t *blen,
3165 int *notinit)
3166 {
3167 struct xfs_mount *mp = tp->t_mountp;
3168 struct xfs_perag *pag;
3169 xfs_extlen_t longest;
3170 int error = 0;
3171
3172 pag = xfs_perag_get(mp, ag);
3173 if (!pag->pagf_init) {
3174 error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK);
3175 if (error)
3176 goto out;
3177
3178 if (!pag->pagf_init) {
3179 *notinit = 1;
3180 goto out;
3181 }
3182 }
3183
3184 longest = xfs_alloc_longest_free_extent(pag,
3185 xfs_alloc_min_freelist(mp, pag),
3186 xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE));
3187 if (*blen < longest)
3188 *blen = longest;
3189
3190 out:
3191 xfs_perag_put(pag);
3192 return error;
3193 }
3194
3195 static void
3196 xfs_bmap_select_minlen(
3197 struct xfs_bmalloca *ap,
3198 struct xfs_alloc_arg *args,
3199 xfs_extlen_t *blen,
3200 int notinit)
3201 {
3202 if (notinit || *blen < ap->minlen) {
3203 /*
3204 * Since we did a BUF_TRYLOCK above, it is possible that
3205 * there is space for this request.
3206 */
3207 args->minlen = ap->minlen;
3208 } else if (*blen < args->maxlen) {
3209 /*
3210 * If the best seen length is less than the request length,
3211 * use the best as the minimum.
3212 */
3213 args->minlen = *blen;
3214 } else {
3215 /*
3216 * Otherwise we've seen an extent as big as maxlen, use that
3217 * as the minimum.
3218 */
3219 args->minlen = args->maxlen;
3220 }
3221 }
3222
3223 STATIC int
3224 xfs_bmap_btalloc_nullfb(
3225 struct xfs_bmalloca *ap,
3226 struct xfs_alloc_arg *args,
3227 xfs_extlen_t *blen)
3228 {
3229 struct xfs_mount *mp = ap->ip->i_mount;
3230 xfs_agnumber_t ag, startag;
3231 int notinit = 0;
3232 int error;
3233
3234 args->type = XFS_ALLOCTYPE_START_BNO;
3235 args->total = ap->total;
3236
3237 startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
3238 if (startag == NULLAGNUMBER)
3239 startag = ag = 0;
3240
3241 while (*blen < args->maxlen) {
3242 error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
3243 &notinit);
3244 if (error)
3245 return error;
3246
3247 if (++ag == mp->m_sb.sb_agcount)
3248 ag = 0;
3249 if (ag == startag)
3250 break;
3251 }
3252
3253 xfs_bmap_select_minlen(ap, args, blen, notinit);
3254 return 0;
3255 }
3256
3257 STATIC int
3258 xfs_bmap_btalloc_filestreams(
3259 struct xfs_bmalloca *ap,
3260 struct xfs_alloc_arg *args,
3261 xfs_extlen_t *blen)
3262 {
3263 struct xfs_mount *mp = ap->ip->i_mount;
3264 xfs_agnumber_t ag;
3265 int notinit = 0;
3266 int error;
3267
3268 args->type = XFS_ALLOCTYPE_NEAR_BNO;
3269 args->total = ap->total;
3270
3271 ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
3272 if (ag == NULLAGNUMBER)
3273 ag = 0;
3274
3275 error = xfs_bmap_longest_free_extent(args->tp, ag, blen, &notinit);
3276 if (error)
3277 return error;
3278
3279 if (*blen < args->maxlen) {
3280 error = xfs_filestream_new_ag(ap, &ag);
3281 if (error)
3282 return error;
3283
3284 error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
3285 &notinit);
3286 if (error)
3287 return error;
3288
3289 }
3290
3291 xfs_bmap_select_minlen(ap, args, blen, notinit);
3292
3293 /*
3294 * Set the failure fallback case to look in the selected AG as stream
3295 * may have moved.
3296 */
3297 ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
3298 return 0;
3299 }
3300
3301 /* Update all inode and quota accounting for the allocation we just did. */
3302 static void
3303 xfs_bmap_btalloc_accounting(
3304 struct xfs_bmalloca *ap,
3305 struct xfs_alloc_arg *args)
3306 {
3307 if (ap->flags & XFS_BMAPI_COWFORK) {
3308 /*
3309 * COW fork blocks are in-core only and thus are treated as
3310 * in-core quota reservation (like delalloc blocks) even when
3311 * converted to real blocks. The quota reservation is not
3312 * accounted to disk until blocks are remapped to the data
3313 * fork. So if these blocks were previously delalloc, we
3314 * already have quota reservation and there's nothing to do
3315 * yet.
3316 */
3317 if (ap->wasdel)
3318 return;
3319
3320 /*
3321 * Otherwise, we've allocated blocks in a hole. The transaction
3322 * has acquired in-core quota reservation for this extent.
3323 * Rather than account these as real blocks, however, we reduce
3324 * the transaction quota reservation based on the allocation.
3325 * This essentially transfers the transaction quota reservation
3326 * to that of a delalloc extent.
3327 */
3328 ap->ip->i_delayed_blks += args->len;
3329 xfs_trans_mod_dquot_byino(ap->tp, ap->ip, XFS_TRANS_DQ_RES_BLKS,
3330 -(long)args->len);
3331 return;
3332 }
3333
3334 /* data/attr fork only */
3335 ap->ip->i_d.di_nblocks += args->len;
3336 xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
3337 if (ap->wasdel)
3338 ap->ip->i_delayed_blks -= args->len;
3339 xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
3340 ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : XFS_TRANS_DQ_BCOUNT,
3341 args->len);
3342 }
3343
3344 STATIC int
3345 xfs_bmap_btalloc(
3346 struct xfs_bmalloca *ap) /* bmap alloc argument struct */
3347 {
3348 xfs_mount_t *mp; /* mount point structure */
3349 xfs_alloctype_t atype = 0; /* type for allocation routines */
3350 xfs_extlen_t align = 0; /* minimum allocation alignment */
3351 xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */
3352 xfs_agnumber_t ag;
3353 xfs_alloc_arg_t args;
3354 xfs_fileoff_t orig_offset;
3355 xfs_extlen_t orig_length;
3356 xfs_extlen_t blen;
3357 xfs_extlen_t nextminlen = 0;
3358 int nullfb; /* true if ap->firstblock isn't set */
3359 int isaligned;
3360 int tryagain;
3361 int error;
3362 int stripe_align;
3363
3364 ASSERT(ap->length);
3365 orig_offset = ap->offset;
3366 orig_length = ap->length;
3367
3368 mp = ap->ip->i_mount;
3369
3370 /* stripe alignment for allocation is determined by mount parameters */
3371 stripe_align = 0;
3372 if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC))
3373 stripe_align = mp->m_swidth;
3374 else if (mp->m_dalign)
3375 stripe_align = mp->m_dalign;
3376
3377 if (ap->flags & XFS_BMAPI_COWFORK)
3378 align = xfs_get_cowextsz_hint(ap->ip);
3379 else if (xfs_alloc_is_userdata(ap->datatype))
3380 align = xfs_get_extsz_hint(ap->ip);
3381 if (align) {
3382 error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
3383 align, 0, ap->eof, 0, ap->conv,
3384 &ap->offset, &ap->length);
3385 ASSERT(!error);
3386 ASSERT(ap->length);
3387 }
3388
3389
3390 nullfb = ap->tp->t_firstblock == NULLFSBLOCK;
3391 fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp,
3392 ap->tp->t_firstblock);
3393 if (nullfb) {
3394 if (xfs_alloc_is_userdata(ap->datatype) &&
3395 xfs_inode_is_filestream(ap->ip)) {
3396 ag = xfs_filestream_lookup_ag(ap->ip);
3397 ag = (ag != NULLAGNUMBER) ? ag : 0;
3398 ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0);
3399 } else {
3400 ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
3401 }
3402 } else
3403 ap->blkno = ap->tp->t_firstblock;
3404
3405 xfs_bmap_adjacent(ap);
3406
3407 /*
3408 * If allowed, use ap->blkno; otherwise must use firstblock since
3409 * it's in the right allocation group.
3410 */
3411 if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno)
3412 ;
3413 else
3414 ap->blkno = ap->tp->t_firstblock;
3415 /*
3416 * Normal allocation, done through xfs_alloc_vextent.
3417 */
3418 tryagain = isaligned = 0;
3419 memset(&args, 0, sizeof(args));
3420 args.tp = ap->tp;
3421 args.mp = mp;
3422 args.fsbno = ap->blkno;
3423 xfs_rmap_skip_owner_update(&args.oinfo);
3424
3425 /* Trim the allocation back to the maximum an AG can fit. */
3426 args.maxlen = min(ap->length, mp->m_ag_max_usable);
3427 blen = 0;
3428 if (nullfb) {
3429 /*
3430 * Search for an allocation group with a single extent large
3431 * enough for the request. If one isn't found, then adjust
3432 * the minimum allocation size to the largest space found.
3433 */
3434 if (xfs_alloc_is_userdata(ap->datatype) &&
3435 xfs_inode_is_filestream(ap->ip))
3436 error = xfs_bmap_btalloc_filestreams(ap, &args, &blen);
3437 else
3438 error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
3439 if (error)
3440 return error;
3441 } else if (ap->tp->t_flags & XFS_TRANS_LOWMODE) {
3442 if (xfs_inode_is_filestream(ap->ip))
3443 args.type = XFS_ALLOCTYPE_FIRST_AG;
3444 else
3445 args.type = XFS_ALLOCTYPE_START_BNO;
3446 args.total = args.minlen = ap->minlen;
3447 } else {
3448 args.type = XFS_ALLOCTYPE_NEAR_BNO;
3449 args.total = ap->total;
3450 args.minlen = ap->minlen;
3451 }
3452 /* apply extent size hints if obtained earlier */
3453 if (align) {
3454 args.prod = align;
3455 div_u64_rem(ap->offset, args.prod, &args.mod);
3456 if (args.mod)
3457 args.mod = args.prod - args.mod;
3458 } else if (mp->m_sb.sb_blocksize >= PAGE_SIZE) {
3459 args.prod = 1;
3460 args.mod = 0;
3461 } else {
3462 args.prod = PAGE_SIZE >> mp->m_sb.sb_blocklog;
3463 div_u64_rem(ap->offset, args.prod, &args.mod);
3464 if (args.mod)
3465 args.mod = args.prod - args.mod;
3466 }
3467 /*
3468 * If we are not low on available data blocks, and the
3469 * underlying logical volume manager is a stripe, and
3470 * the file offset is zero then try to allocate data
3471 * blocks on stripe unit boundary.
3472 * NOTE: ap->aeof is only set if the allocation length
3473 * is >= the stripe unit and the allocation offset is
3474 * at the end of file.
3475 */
3476 if (!(ap->tp->t_flags & XFS_TRANS_LOWMODE) && ap->aeof) {
3477 if (!ap->offset) {
3478 args.alignment = stripe_align;
3479 atype = args.type;
3480 isaligned = 1;
3481 /*
3482 * Adjust for alignment
3483 */
3484 if (blen > args.alignment && blen <= args.maxlen)
3485 args.minlen = blen - args.alignment;
3486 args.minalignslop = 0;
3487 } else {
3488 /*
3489 * First try an exact bno allocation.
3490 * If it fails then do a near or start bno
3491 * allocation with alignment turned on.
3492 */
3493 atype = args.type;
3494 tryagain = 1;
3495 args.type = XFS_ALLOCTYPE_THIS_BNO;
3496 args.alignment = 1;
3497 /*
3498 * Compute the minlen+alignment for the
3499 * next case. Set slop so that the value
3500 * of minlen+alignment+slop doesn't go up
3501 * between the calls.
3502 */
3503 if (blen > stripe_align && blen <= args.maxlen)
3504 nextminlen = blen - stripe_align;
3505 else
3506 nextminlen = args.minlen;
3507 if (nextminlen + stripe_align > args.minlen + 1)
3508 args.minalignslop =
3509 nextminlen + stripe_align -
3510 args.minlen - 1;
3511 else
3512 args.minalignslop = 0;
3513 }
3514 } else {
3515 args.alignment = 1;
3516 args.minalignslop = 0;
3517 }
3518 args.minleft = ap->minleft;
3519 args.wasdel = ap->wasdel;
3520 args.resv = XFS_AG_RESV_NONE;
3521 args.datatype = ap->datatype;
3522 if (ap->datatype & XFS_ALLOC_USERDATA_ZERO)
3523 args.ip = ap->ip;
3524
3525 error = xfs_alloc_vextent(&args);
3526 if (error)
3527 return error;
3528
3529 if (tryagain && args.fsbno == NULLFSBLOCK) {
3530 /*
3531 * Exact allocation failed. Now try with alignment
3532 * turned on.
3533 */
3534 args.type = atype;
3535 args.fsbno = ap->blkno;
3536 args.alignment = stripe_align;
3537 args.minlen = nextminlen;
3538 args.minalignslop = 0;
3539 isaligned = 1;
3540 if ((error = xfs_alloc_vextent(&args)))
3541 return error;
3542 }
3543 if (isaligned && args.fsbno == NULLFSBLOCK) {
3544 /*
3545 * allocation failed, so turn off alignment and
3546 * try again.
3547 */
3548 args.type = atype;
3549 args.fsbno = ap->blkno;
3550 args.alignment = 0;
3551 if ((error = xfs_alloc_vextent(&args)))
3552 return error;
3553 }
3554 if (args.fsbno == NULLFSBLOCK && nullfb &&
3555 args.minlen > ap->minlen) {
3556 args.minlen = ap->minlen;
3557 args.type = XFS_ALLOCTYPE_START_BNO;
3558 args.fsbno = ap->blkno;
3559 if ((error = xfs_alloc_vextent(&args)))
3560 return error;
3561 }
3562 if (args.fsbno == NULLFSBLOCK && nullfb) {
3563 args.fsbno = 0;
3564 args.type = XFS_ALLOCTYPE_FIRST_AG;
3565 args.total = ap->minlen;
3566 if ((error = xfs_alloc_vextent(&args)))
3567 return error;
3568 ap->tp->t_flags |= XFS_TRANS_LOWMODE;
3569 }
3570 if (args.fsbno != NULLFSBLOCK) {
3571 /*
3572 * check the allocation happened at the same or higher AG than
3573 * the first block that was allocated.
3574 */
3575 ASSERT(ap->tp->t_firstblock == NULLFSBLOCK ||
3576 XFS_FSB_TO_AGNO(mp, ap->tp->t_firstblock) <=
3577 XFS_FSB_TO_AGNO(mp, args.fsbno));
3578
3579 ap->blkno = args.fsbno;
3580 if (ap->tp->t_firstblock == NULLFSBLOCK)
3581 ap->tp->t_firstblock = args.fsbno;
3582 ASSERT(nullfb || fb_agno <= args.agno);
3583 ap->length = args.len;
3584 /*
3585 * If the extent size hint is active, we tried to round the
3586 * caller's allocation request offset down to extsz and the
3587 * length up to another extsz boundary. If we found a free
3588 * extent we mapped it in starting at this new offset. If the
3589 * newly mapped space isn't long enough to cover any of the
3590 * range of offsets that was originally requested, move the
3591 * mapping up so that we can fill as much of the caller's
3592 * original request as possible. Free space is apparently
3593 * very fragmented so we're unlikely to be able to satisfy the
3594 * hints anyway.
3595 */
3596 if (ap->length <= orig_length)
3597 ap->offset = orig_offset;
3598 else if (ap->offset + ap->length < orig_offset + orig_length)
3599 ap->offset = orig_offset + orig_length - ap->length;
3600 xfs_bmap_btalloc_accounting(ap, &args);
3601 } else {
3602 ap->blkno = NULLFSBLOCK;
3603 ap->length = 0;
3604 }
3605 return 0;
3606 }
3607
3608 /*
3609 * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
3610 * It figures out where to ask the underlying allocator to put the new extent.
3611 */
3612 STATIC int
3613 xfs_bmap_alloc(
3614 struct xfs_bmalloca *ap) /* bmap alloc argument struct */
3615 {
3616 if (XFS_IS_REALTIME_INODE(ap->ip) &&
3617 xfs_alloc_is_userdata(ap->datatype))
3618 return xfs_bmap_rtalloc(ap);
3619 return xfs_bmap_btalloc(ap);
3620 }
3621
3622 /* Trim extent to fit a logical block range. */
3623 void
3624 xfs_trim_extent(
3625 struct xfs_bmbt_irec *irec,
3626 xfs_fileoff_t bno,
3627 xfs_filblks_t len)
3628 {
3629 xfs_fileoff_t distance;
3630 xfs_fileoff_t end = bno + len;
3631
3632 if (irec->br_startoff + irec->br_blockcount <= bno ||
3633 irec->br_startoff >= end) {
3634 irec->br_blockcount = 0;
3635 return;
3636 }
3637
3638 if (irec->br_startoff < bno) {
3639 distance = bno - irec->br_startoff;
3640 if (isnullstartblock(irec->br_startblock))
3641 irec->br_startblock = DELAYSTARTBLOCK;
3642 if (irec->br_startblock != DELAYSTARTBLOCK &&
3643 irec->br_startblock != HOLESTARTBLOCK)
3644 irec->br_startblock += distance;
3645 irec->br_startoff += distance;
3646 irec->br_blockcount -= distance;
3647 }
3648
3649 if (end < irec->br_startoff + irec->br_blockcount) {
3650 distance = irec->br_startoff + irec->br_blockcount - end;
3651 irec->br_blockcount -= distance;
3652 }
3653 }
3654
3655 /* trim extent to within eof */
3656 void
3657 xfs_trim_extent_eof(
3658 struct xfs_bmbt_irec *irec,
3659 struct xfs_inode *ip)
3660
3661 {
3662 xfs_trim_extent(irec, 0, XFS_B_TO_FSB(ip->i_mount,
3663 i_size_read(VFS_I(ip))));
3664 }
3665
3666 /*
3667 * Trim the returned map to the required bounds
3668 */
3669 STATIC void
3670 xfs_bmapi_trim_map(
3671 struct xfs_bmbt_irec *mval,
3672 struct xfs_bmbt_irec *got,
3673 xfs_fileoff_t *bno,
3674 xfs_filblks_t len,
3675 xfs_fileoff_t obno,
3676 xfs_fileoff_t end,
3677 int n,
3678 int flags)
3679 {
3680 if ((flags & XFS_BMAPI_ENTIRE) ||
3681 got->br_startoff + got->br_blockcount <= obno) {
3682 *mval = *got;
3683 if (isnullstartblock(got->br_startblock))
3684 mval->br_startblock = DELAYSTARTBLOCK;
3685 return;
3686 }
3687
3688 if (obno > *bno)
3689 *bno = obno;
3690 ASSERT((*bno >= obno) || (n == 0));
3691 ASSERT(*bno < end);
3692 mval->br_startoff = *bno;
3693 if (isnullstartblock(got->br_startblock))
3694 mval->br_startblock = DELAYSTARTBLOCK;
3695 else
3696 mval->br_startblock = got->br_startblock +
3697 (*bno - got->br_startoff);
3698 /*
3699 * Return the minimum of what we got and what we asked for for
3700 * the length. We can use the len variable here because it is
3701 * modified below and we could have been there before coming
3702 * here if the first part of the allocation didn't overlap what
3703 * was asked for.
3704 */
3705 mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno,
3706 got->br_blockcount - (*bno - got->br_startoff));
3707 mval->br_state = got->br_state;
3708 ASSERT(mval->br_blockcount <= len);
3709 return;
3710 }
3711
3712 /*
3713 * Update and validate the extent map to return
3714 */
3715 STATIC void
3716 xfs_bmapi_update_map(
3717 struct xfs_bmbt_irec **map,
3718 xfs_fileoff_t *bno,
3719 xfs_filblks_t *len,
3720 xfs_fileoff_t obno,
3721 xfs_fileoff_t end,
3722 int *n,
3723 int flags)
3724 {
3725 xfs_bmbt_irec_t *mval = *map;
3726
3727 ASSERT((flags & XFS_BMAPI_ENTIRE) ||
3728 ((mval->br_startoff + mval->br_blockcount) <= end));
3729 ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) ||
3730 (mval->br_startoff < obno));
3731
3732 *bno = mval->br_startoff + mval->br_blockcount;
3733 *len = end - *bno;
3734 if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) {
3735 /* update previous map with new information */
3736 ASSERT(mval->br_startblock == mval[-1].br_startblock);
3737 ASSERT(mval->br_blockcount > mval[-1].br_blockcount);
3738 ASSERT(mval->br_state == mval[-1].br_state);
3739 mval[-1].br_blockcount = mval->br_blockcount;
3740 mval[-1].br_state = mval->br_state;
3741 } else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK &&
3742 mval[-1].br_startblock != DELAYSTARTBLOCK &&
3743 mval[-1].br_startblock != HOLESTARTBLOCK &&
3744 mval->br_startblock == mval[-1].br_startblock +
3745 mval[-1].br_blockcount &&
3746 mval[-1].br_state == mval->br_state) {
3747 ASSERT(mval->br_startoff ==
3748 mval[-1].br_startoff + mval[-1].br_blockcount);
3749 mval[-1].br_blockcount += mval->br_blockcount;
3750 } else if (*n > 0 &&
3751 mval->br_startblock == DELAYSTARTBLOCK &&
3752 mval[-1].br_startblock == DELAYSTARTBLOCK &&
3753 mval->br_startoff ==
3754 mval[-1].br_startoff + mval[-1].br_blockcount) {
3755 mval[-1].br_blockcount += mval->br_blockcount;
3756 mval[-1].br_state = mval->br_state;
3757 } else if (!((*n == 0) &&
3758 ((mval->br_startoff + mval->br_blockcount) <=
3759 obno))) {
3760 mval++;
3761 (*n)++;
3762 }
3763 *map = mval;
3764 }
3765
3766 /*
3767 * Map file blocks to filesystem blocks without allocation.
3768 */
3769 int
3770 xfs_bmapi_read(
3771 struct xfs_inode *ip,
3772 xfs_fileoff_t bno,
3773 xfs_filblks_t len,
3774 struct xfs_bmbt_irec *mval,
3775 int *nmap,
3776 int flags)
3777 {
3778 struct xfs_mount *mp = ip->i_mount;
3779 struct xfs_ifork *ifp;
3780 struct xfs_bmbt_irec got;
3781 xfs_fileoff_t obno;
3782 xfs_fileoff_t end;
3783 struct xfs_iext_cursor icur;
3784 int error;
3785 bool eof = false;
3786 int n = 0;
3787 int whichfork = xfs_bmapi_whichfork(flags);
3788
3789 ASSERT(*nmap >= 1);
3790 ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE|
3791 XFS_BMAPI_COWFORK)));
3792 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL));
3793
3794 if (unlikely(XFS_TEST_ERROR(
3795 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
3796 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
3797 mp, XFS_ERRTAG_BMAPIFORMAT))) {
3798 XFS_ERROR_REPORT("xfs_bmapi_read", XFS_ERRLEVEL_LOW, mp);
3799 return -EFSCORRUPTED;
3800 }
3801
3802 if (XFS_FORCED_SHUTDOWN(mp))
3803 return -EIO;
3804
3805 XFS_STATS_INC(mp, xs_blk_mapr);
3806
3807 ifp = XFS_IFORK_PTR(ip, whichfork);
3808
3809 /* No CoW fork? Return a hole. */
3810 if (whichfork == XFS_COW_FORK && !ifp) {
3811 mval->br_startoff = bno;
3812 mval->br_startblock = HOLESTARTBLOCK;
3813 mval->br_blockcount = len;
3814 mval->br_state = XFS_EXT_NORM;
3815 *nmap = 1;
3816 return 0;
3817 }
3818
3819 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
3820 error = xfs_iread_extents(NULL, ip, whichfork);
3821 if (error)
3822 return error;
3823 }
3824
3825 if (!xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got))
3826 eof = true;
3827 end = bno + len;
3828 obno = bno;
3829
3830 while (bno < end && n < *nmap) {
3831 /* Reading past eof, act as though there's a hole up to end. */
3832 if (eof)
3833 got.br_startoff = end;
3834 if (got.br_startoff > bno) {
3835 /* Reading in a hole. */
3836 mval->br_startoff = bno;
3837 mval->br_startblock = HOLESTARTBLOCK;
3838 mval->br_blockcount =
3839 XFS_FILBLKS_MIN(len, got.br_startoff - bno);
3840 mval->br_state = XFS_EXT_NORM;
3841 bno += mval->br_blockcount;
3842 len -= mval->br_blockcount;
3843 mval++;
3844 n++;
3845 continue;
3846 }
3847
3848 /* set up the extent map to return. */
3849 xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
3850 xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
3851
3852 /* If we're done, stop now. */
3853 if (bno >= end || n >= *nmap)
3854 break;
3855
3856 /* Else go on to the next record. */
3857 if (!xfs_iext_next_extent(ifp, &icur, &got))
3858 eof = true;
3859 }
3860 *nmap = n;
3861 return 0;
3862 }
3863
3864 /*
3865 * Add a delayed allocation extent to an inode. Blocks are reserved from the
3866 * global pool and the extent inserted into the inode in-core extent tree.
3867 *
3868 * On entry, got refers to the first extent beyond the offset of the extent to
3869 * allocate or eof is specified if no such extent exists. On return, got refers
3870 * to the extent record that was inserted to the inode fork.
3871 *
3872 * Note that the allocated extent may have been merged with contiguous extents
3873 * during insertion into the inode fork. Thus, got does not reflect the current
3874 * state of the inode fork on return. If necessary, the caller can use lastx to
3875 * look up the updated record in the inode fork.
3876 */
3877 int
3878 xfs_bmapi_reserve_delalloc(
3879 struct xfs_inode *ip,
3880 int whichfork,
3881 xfs_fileoff_t off,
3882 xfs_filblks_t len,
3883 xfs_filblks_t prealloc,
3884 struct xfs_bmbt_irec *got,
3885 struct xfs_iext_cursor *icur,
3886 int eof)
3887 {
3888 struct xfs_mount *mp = ip->i_mount;
3889 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
3890 xfs_extlen_t alen;
3891 xfs_extlen_t indlen;
3892 int error;
3893 xfs_fileoff_t aoff = off;
3894
3895 /*
3896 * Cap the alloc length. Keep track of prealloc so we know whether to
3897 * tag the inode before we return.
3898 */
3899 alen = XFS_FILBLKS_MIN(len + prealloc, MAXEXTLEN);
3900 if (!eof)
3901 alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
3902 if (prealloc && alen >= len)
3903 prealloc = alen - len;
3904
3905 /* Figure out the extent size, adjust alen */
3906 if (whichfork == XFS_COW_FORK) {
3907 struct xfs_bmbt_irec prev;
3908 xfs_extlen_t extsz = xfs_get_cowextsz_hint(ip);
3909
3910 if (!xfs_iext_peek_prev_extent(ifp, icur, &prev))
3911 prev.br_startoff = NULLFILEOFF;
3912
3913 error = xfs_bmap_extsize_align(mp, got, &prev, extsz, 0, eof,
3914 1, 0, &aoff, &alen);
3915 ASSERT(!error);
3916 }
3917
3918 /*
3919 * Make a transaction-less quota reservation for delayed allocation
3920 * blocks. This number gets adjusted later. We return if we haven't
3921 * allocated blocks already inside this loop.
3922 */
3923 error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0,
3924 XFS_QMOPT_RES_REGBLKS);
3925 if (error)
3926 return error;
3927
3928 /*
3929 * Split changing sb for alen and indlen since they could be coming
3930 * from different places.
3931 */
3932 indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen);
3933 ASSERT(indlen > 0);
3934
3935 error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
3936 if (error)
3937 goto out_unreserve_quota;
3938
3939 error = xfs_mod_fdblocks(mp, -((int64_t)indlen), false);
3940 if (error)
3941 goto out_unreserve_blocks;
3942
3943
3944 ip->i_delayed_blks += alen;
3945
3946 got->br_startoff = aoff;
3947 got->br_startblock = nullstartblock(indlen);
3948 got->br_blockcount = alen;
3949 got->br_state = XFS_EXT_NORM;
3950
3951 xfs_bmap_add_extent_hole_delay(ip, whichfork, icur, got);
3952
3953 /*
3954 * Tag the inode if blocks were preallocated. Note that COW fork
3955 * preallocation can occur at the start or end of the extent, even when
3956 * prealloc == 0, so we must also check the aligned offset and length.
3957 */
3958 if (whichfork == XFS_DATA_FORK && prealloc)
3959 xfs_inode_set_eofblocks_tag(ip);
3960 if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))
3961 xfs_inode_set_cowblocks_tag(ip);
3962
3963 return 0;
3964
3965 out_unreserve_blocks:
3966 xfs_mod_fdblocks(mp, alen, false);
3967 out_unreserve_quota:
3968 if (XFS_IS_QUOTA_ON(mp))
3969 xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0,
3970 XFS_QMOPT_RES_REGBLKS);
3971 return error;
3972 }
3973
3974 static int
3975 xfs_bmapi_allocate(
3976 struct xfs_bmalloca *bma)
3977 {
3978 struct xfs_mount *mp = bma->ip->i_mount;
3979 int whichfork = xfs_bmapi_whichfork(bma->flags);
3980 struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
3981 int tmp_logflags = 0;
3982 int error;
3983
3984 ASSERT(bma->length > 0);
3985
3986 /*
3987 * For the wasdelay case, we could also just allocate the stuff asked
3988 * for in this bmap call but that wouldn't be as good.
3989 */
3990 if (bma->wasdel) {
3991 bma->length = (xfs_extlen_t)bma->got.br_blockcount;
3992 bma->offset = bma->got.br_startoff;
3993 xfs_iext_peek_prev_extent(ifp, &bma->icur, &bma->prev);
3994 } else {
3995 bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN);
3996 if (!bma->eof)
3997 bma->length = XFS_FILBLKS_MIN(bma->length,
3998 bma->got.br_startoff - bma->offset);
3999 }
4000
4001 /*
4002 * Set the data type being allocated. For the data fork, the first data
4003 * in the file is treated differently to all other allocations. For the
4004 * attribute fork, we only need to ensure the allocated range is not on
4005 * the busy list.
4006 */
4007 if (!(bma->flags & XFS_BMAPI_METADATA)) {
4008 bma->datatype = XFS_ALLOC_NOBUSY;
4009 if (whichfork == XFS_DATA_FORK) {
4010 if (bma->offset == 0)
4011 bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
4012 else
4013 bma->datatype |= XFS_ALLOC_USERDATA;
4014 }
4015 if (bma->flags & XFS_BMAPI_ZERO)
4016 bma->datatype |= XFS_ALLOC_USERDATA_ZERO;
4017 }
4018
4019 bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
4020
4021 /*
4022 * Only want to do the alignment at the eof if it is userdata and
4023 * allocation length is larger than a stripe unit.
4024 */
4025 if (mp->m_dalign && bma->length >= mp->m_dalign &&
4026 !(bma->flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) {
4027 error = xfs_bmap_isaeof(bma, whichfork);
4028 if (error)
4029 return error;
4030 }
4031
4032 error = xfs_bmap_alloc(bma);
4033 if (error)
4034 return error;
4035
4036 if (bma->blkno == NULLFSBLOCK)
4037 return 0;
4038 if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur)
4039 bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork);
4040 /*
4041 * Bump the number of extents we've allocated
4042 * in this call.
4043 */
4044 bma->nallocs++;
4045
4046 if (bma->cur)
4047 bma->cur->bc_private.b.flags =
4048 bma->wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
4049
4050 bma->got.br_startoff = bma->offset;
4051 bma->got.br_startblock = bma->blkno;
4052 bma->got.br_blockcount = bma->length;
4053 bma->got.br_state = XFS_EXT_NORM;
4054
4055 /*
4056 * In the data fork, a wasdelay extent has been initialized, so
4057 * shouldn't be flagged as unwritten.
4058 *
4059 * For the cow fork, however, we convert delalloc reservations
4060 * (extents allocated for speculative preallocation) to
4061 * allocated unwritten extents, and only convert the unwritten
4062 * extents to real extents when we're about to write the data.
4063 */
4064 if ((!bma->wasdel || (bma->flags & XFS_BMAPI_COWFORK)) &&
4065 (bma->flags & XFS_BMAPI_PREALLOC) &&
4066 xfs_sb_version_hasextflgbit(&mp->m_sb))
4067 bma->got.br_state = XFS_EXT_UNWRITTEN;
4068
4069 if (bma->wasdel)
4070 error = xfs_bmap_add_extent_delay_real(bma, whichfork);
4071 else
4072 error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip,
4073 whichfork, &bma->icur, &bma->cur, &bma->got,
4074 &bma->logflags, bma->flags);
4075
4076 bma->logflags |= tmp_logflags;
4077 if (error)
4078 return error;
4079
4080 /*
4081 * Update our extent pointer, given that xfs_bmap_add_extent_delay_real
4082 * or xfs_bmap_add_extent_hole_real might have merged it into one of
4083 * the neighbouring ones.
4084 */
4085 xfs_iext_get_extent(ifp, &bma->icur, &bma->got);
4086
4087 ASSERT(bma->got.br_startoff <= bma->offset);
4088 ASSERT(bma->got.br_startoff + bma->got.br_blockcount >=
4089 bma->offset + bma->length);
4090 ASSERT(bma->got.br_state == XFS_EXT_NORM ||
4091 bma->got.br_state == XFS_EXT_UNWRITTEN);
4092 return 0;
4093 }
4094
4095 STATIC int
4096 xfs_bmapi_convert_unwritten(
4097 struct xfs_bmalloca *bma,
4098 struct xfs_bmbt_irec *mval,
4099 xfs_filblks_t len,
4100 int flags)
4101 {
4102 int whichfork = xfs_bmapi_whichfork(flags);
4103 struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4104 int tmp_logflags = 0;
4105 int error;
4106
4107 /* check if we need to do unwritten->real conversion */
4108 if (mval->br_state == XFS_EXT_UNWRITTEN &&
4109 (flags & XFS_BMAPI_PREALLOC))
4110 return 0;
4111
4112 /* check if we need to do real->unwritten conversion */
4113 if (mval->br_state == XFS_EXT_NORM &&
4114 (flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) !=
4115 (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
4116 return 0;
4117
4118 /*
4119 * Modify (by adding) the state flag, if writing.
4120 */
4121 ASSERT(mval->br_blockcount <= len);
4122 if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
4123 bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp,
4124 bma->ip, whichfork);
4125 }
4126 mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
4127 ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
4128
4129 /*
4130 * Before insertion into the bmbt, zero the range being converted
4131 * if required.
4132 */
4133 if (flags & XFS_BMAPI_ZERO) {
4134 error = xfs_zero_extent(bma->ip, mval->br_startblock,
4135 mval->br_blockcount);
4136 if (error)
4137 return error;
4138 }
4139
4140 error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork,
4141 &bma->icur, &bma->cur, mval, &tmp_logflags);
4142 /*
4143 * Log the inode core unconditionally in the unwritten extent conversion
4144 * path because the conversion might not have done so (e.g., if the
4145 * extent count hasn't changed). We need to make sure the inode is dirty
4146 * in the transaction for the sake of fsync(), even if nothing has
4147 * changed, because fsync() will not force the log for this transaction
4148 * unless it sees the inode pinned.
4149 *
4150 * Note: If we're only converting cow fork extents, there aren't
4151 * any on-disk updates to make, so we don't need to log anything.
4152 */
4153 if (whichfork != XFS_COW_FORK)
4154 bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
4155 if (error)
4156 return error;
4157
4158 /*
4159 * Update our extent pointer, given that
4160 * xfs_bmap_add_extent_unwritten_real might have merged it into one
4161 * of the neighbouring ones.
4162 */
4163 xfs_iext_get_extent(ifp, &bma->icur, &bma->got);
4164
4165 /*
4166 * We may have combined previously unwritten space with written space,
4167 * so generate another request.
4168 */
4169 if (mval->br_blockcount < len)
4170 return -EAGAIN;
4171 return 0;
4172 }
4173
4174 /*
4175 * Map file blocks to filesystem blocks, and allocate blocks or convert the
4176 * extent state if necessary. Details behaviour is controlled by the flags
4177 * parameter. Only allocates blocks from a single allocation group, to avoid
4178 * locking problems.
4179 */
4180 int
4181 xfs_bmapi_write(
4182 struct xfs_trans *tp, /* transaction pointer */
4183 struct xfs_inode *ip, /* incore inode */
4184 xfs_fileoff_t bno, /* starting file offs. mapped */
4185 xfs_filblks_t len, /* length to map in file */
4186 int flags, /* XFS_BMAPI_... */
4187 xfs_extlen_t total, /* total blocks needed */
4188 struct xfs_bmbt_irec *mval, /* output: map values */
4189 int *nmap) /* i/o: mval size/count */
4190 {
4191 struct xfs_mount *mp = ip->i_mount;
4192 struct xfs_ifork *ifp;
4193 struct xfs_bmalloca bma = { NULL }; /* args for xfs_bmap_alloc */
4194 xfs_fileoff_t end; /* end of mapped file region */
4195 bool eof = false; /* after the end of extents */
4196 int error; /* error return */
4197 int n; /* current extent index */
4198 xfs_fileoff_t obno; /* old block number (offset) */
4199 int whichfork; /* data or attr fork */
4200
4201 #ifdef DEBUG
4202 xfs_fileoff_t orig_bno; /* original block number value */
4203 int orig_flags; /* original flags arg value */
4204 xfs_filblks_t orig_len; /* original value of len arg */
4205 struct xfs_bmbt_irec *orig_mval; /* original value of mval */
4206 int orig_nmap; /* original value of *nmap */
4207
4208 orig_bno = bno;
4209 orig_len = len;
4210 orig_flags = flags;
4211 orig_mval = mval;
4212 orig_nmap = *nmap;
4213 #endif
4214 whichfork = xfs_bmapi_whichfork(flags);
4215
4216 ASSERT(*nmap >= 1);
4217 ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
4218 ASSERT(tp != NULL ||
4219 (flags & (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK)) ==
4220 (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK));
4221 ASSERT(len > 0);
4222 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL);
4223 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4224 ASSERT(!(flags & XFS_BMAPI_REMAP));
4225
4226 /* zeroing is for currently only for data extents, not metadata */
4227 ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
4228 (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO));
4229 /*
4230 * we can allocate unwritten extents or pre-zero allocated blocks,
4231 * but it makes no sense to do both at once. This would result in
4232 * zeroing the unwritten extent twice, but it still being an
4233 * unwritten extent....
4234 */
4235 ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) !=
4236 (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO));
4237
4238 if (unlikely(XFS_TEST_ERROR(
4239 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
4240 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
4241 mp, XFS_ERRTAG_BMAPIFORMAT))) {
4242 XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp);
4243 return -EFSCORRUPTED;
4244 }
4245
4246 if (XFS_FORCED_SHUTDOWN(mp))
4247 return -EIO;
4248
4249 ifp = XFS_IFORK_PTR(ip, whichfork);
4250
4251 XFS_STATS_INC(mp, xs_blk_mapw);
4252
4253 if (!tp || tp->t_firstblock == NULLFSBLOCK) {
4254 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE)
4255 bma.minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1;
4256 else
4257 bma.minleft = 1;
4258 } else {
4259 bma.minleft = 0;
4260 }
4261
4262 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4263 error = xfs_iread_extents(tp, ip, whichfork);
4264 if (error)
4265 goto error0;
4266 }
4267
4268 n = 0;
4269 end = bno + len;
4270 obno = bno;
4271
4272 if (!xfs_iext_lookup_extent(ip, ifp, bno, &bma.icur, &bma.got))
4273 eof = true;
4274 if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
4275 bma.prev.br_startoff = NULLFILEOFF;
4276 bma.tp = tp;
4277 bma.ip = ip;
4278 bma.total = total;
4279 bma.datatype = 0;
4280 ASSERT(!tp || tp->t_dfops);
4281
4282 while (bno < end && n < *nmap) {
4283 bool need_alloc = false, wasdelay = false;
4284
4285 /* in hole or beyond EOF? */
4286 if (eof || bma.got.br_startoff > bno) {
4287 /*
4288 * CoW fork conversions should /never/ hit EOF or
4289 * holes. There should always be something for us
4290 * to work on.
4291 */
4292 ASSERT(!((flags & XFS_BMAPI_CONVERT) &&
4293 (flags & XFS_BMAPI_COWFORK)));
4294
4295 if (flags & XFS_BMAPI_DELALLOC) {
4296 /*
4297 * For the COW fork we can reasonably get a
4298 * request for converting an extent that races
4299 * with other threads already having converted
4300 * part of it, as there converting COW to
4301 * regular blocks is not protected using the
4302 * IOLOCK.
4303 */
4304 ASSERT(flags & XFS_BMAPI_COWFORK);
4305 if (!(flags & XFS_BMAPI_COWFORK)) {
4306 error = -EIO;
4307 goto error0;
4308 }
4309
4310 if (eof || bno >= end)
4311 break;
4312 } else {
4313 need_alloc = true;
4314 }
4315 } else if (isnullstartblock(bma.got.br_startblock)) {
4316 wasdelay = true;
4317 }
4318
4319 /*
4320 * First, deal with the hole before the allocated space
4321 * that we found, if any.
4322 */
4323 if ((need_alloc || wasdelay) &&
4324 !(flags & XFS_BMAPI_CONVERT_ONLY)) {
4325 bma.eof = eof;
4326 bma.conv = !!(flags & XFS_BMAPI_CONVERT);
4327 bma.wasdel = wasdelay;
4328 bma.offset = bno;
4329 bma.flags = flags;
4330
4331 /*
4332 * There's a 32/64 bit type mismatch between the
4333 * allocation length request (which can be 64 bits in
4334 * length) and the bma length request, which is
4335 * xfs_extlen_t and therefore 32 bits. Hence we have to
4336 * check for 32-bit overflows and handle them here.
4337 */
4338 if (len > (xfs_filblks_t)MAXEXTLEN)
4339 bma.length = MAXEXTLEN;
4340 else
4341 bma.length = len;
4342
4343 ASSERT(len > 0);
4344 ASSERT(bma.length > 0);
4345 error = xfs_bmapi_allocate(&bma);
4346 if (error)
4347 goto error0;
4348 if (bma.blkno == NULLFSBLOCK)
4349 break;
4350
4351 /*
4352 * If this is a CoW allocation, record the data in
4353 * the refcount btree for orphan recovery.
4354 */
4355 if (whichfork == XFS_COW_FORK) {
4356 error = xfs_refcount_alloc_cow_extent(tp,
4357 bma.blkno, bma.length);
4358 if (error)
4359 goto error0;
4360 }
4361 }
4362
4363 /* Deal with the allocated space we found. */
4364 xfs_bmapi_trim_map(mval, &bma.got, &bno, len, obno,
4365 end, n, flags);
4366
4367 /* Execute unwritten extent conversion if necessary */
4368 error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags);
4369 if (error == -EAGAIN)
4370 continue;
4371 if (error)
4372 goto error0;
4373
4374 /* update the extent map to return */
4375 xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4376
4377 /*
4378 * If we're done, stop now. Stop when we've allocated
4379 * XFS_BMAP_MAX_NMAP extents no matter what. Otherwise
4380 * the transaction may get too big.
4381 */
4382 if (bno >= end || n >= *nmap || bma.nallocs >= *nmap)
4383 break;
4384
4385 /* Else go on to the next record. */
4386 bma.prev = bma.got;
4387 if (!xfs_iext_next_extent(ifp, &bma.icur, &bma.got))
4388 eof = true;
4389 }
4390 *nmap = n;
4391
4392 /*
4393 * Transform from btree to extents, give it cur.
4394 */
4395 if (xfs_bmap_wants_extents(ip, whichfork)) {
4396 int tmp_logflags = 0;
4397
4398 ASSERT(bma.cur);
4399 error = xfs_bmap_btree_to_extents(tp, ip, bma.cur,
4400 &tmp_logflags, whichfork);
4401 bma.logflags |= tmp_logflags;
4402 if (error)
4403 goto error0;
4404 }
4405
4406 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE ||
4407 XFS_IFORK_NEXTENTS(ip, whichfork) >
4408 XFS_IFORK_MAXEXT(ip, whichfork));
4409 error = 0;
4410 error0:
4411 /*
4412 * Log everything. Do this after conversion, there's no point in
4413 * logging the extent records if we've converted to btree format.
4414 */
4415 if ((bma.logflags & xfs_ilog_fext(whichfork)) &&
4416 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
4417 bma.logflags &= ~xfs_ilog_fext(whichfork);
4418 else if ((bma.logflags & xfs_ilog_fbroot(whichfork)) &&
4419 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
4420 bma.logflags &= ~xfs_ilog_fbroot(whichfork);
4421 /*
4422 * Log whatever the flags say, even if error. Otherwise we might miss
4423 * detecting a case where the data is changed, there's an error,
4424 * and it's not logged so we don't shutdown when we should.
4425 */
4426 if (bma.logflags)
4427 xfs_trans_log_inode(tp, ip, bma.logflags);
4428
4429 if (bma.cur) {
4430 xfs_btree_del_cursor(bma.cur, error);
4431 }
4432 if (!error)
4433 xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
4434 orig_nmap, *nmap);
4435 return error;
4436 }
4437
4438 int
4439 xfs_bmapi_remap(
4440 struct xfs_trans *tp,
4441 struct xfs_inode *ip,
4442 xfs_fileoff_t bno,
4443 xfs_filblks_t len,
4444 xfs_fsblock_t startblock,
4445 int flags)
4446 {
4447 struct xfs_mount *mp = ip->i_mount;
4448 struct xfs_ifork *ifp;
4449 struct xfs_btree_cur *cur = NULL;
4450 struct xfs_bmbt_irec got;
4451 struct xfs_iext_cursor icur;
4452 int whichfork = xfs_bmapi_whichfork(flags);
4453 int logflags = 0, error;
4454
4455 ifp = XFS_IFORK_PTR(ip, whichfork);
4456 ASSERT(len > 0);
4457 ASSERT(len <= (xfs_filblks_t)MAXEXTLEN);
4458 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4459 ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC |
4460 XFS_BMAPI_NORMAP)));
4461 ASSERT((flags & (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC)) !=
4462 (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC));
4463
4464 if (unlikely(XFS_TEST_ERROR(
4465 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
4466 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
4467 mp, XFS_ERRTAG_BMAPIFORMAT))) {
4468 XFS_ERROR_REPORT("xfs_bmapi_remap", XFS_ERRLEVEL_LOW, mp);
4469 return -EFSCORRUPTED;
4470 }
4471
4472 if (XFS_FORCED_SHUTDOWN(mp))
4473 return -EIO;
4474
4475 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4476 error = xfs_iread_extents(tp, ip, whichfork);
4477 if (error)
4478 return error;
4479 }
4480
4481 if (xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) {
4482 /* make sure we only reflink into a hole. */
4483 ASSERT(got.br_startoff > bno);
4484 ASSERT(got.br_startoff - bno >= len);
4485 }
4486
4487 ip->i_d.di_nblocks += len;
4488 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
4489
4490 if (ifp->if_flags & XFS_IFBROOT) {
4491 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
4492 cur->bc_private.b.flags = 0;
4493 }
4494
4495 got.br_startoff = bno;
4496 got.br_startblock = startblock;
4497 got.br_blockcount = len;
4498 if (flags & XFS_BMAPI_PREALLOC)
4499 got.br_state = XFS_EXT_UNWRITTEN;
4500 else
4501 got.br_state = XFS_EXT_NORM;
4502
4503 error = xfs_bmap_add_extent_hole_real(tp, ip, whichfork, &icur,
4504 &cur, &got, &logflags, flags);
4505 if (error)
4506 goto error0;
4507
4508 if (xfs_bmap_wants_extents(ip, whichfork)) {
4509 int tmp_logflags = 0;
4510
4511 error = xfs_bmap_btree_to_extents(tp, ip, cur,
4512 &tmp_logflags, whichfork);
4513 logflags |= tmp_logflags;
4514 }
4515
4516 error0:
4517 if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS)
4518 logflags &= ~XFS_ILOG_DEXT;
4519 else if (ip->i_d.di_format != XFS_DINODE_FMT_BTREE)
4520 logflags &= ~XFS_ILOG_DBROOT;
4521
4522 if (logflags)
4523 xfs_trans_log_inode(tp, ip, logflags);
4524 if (cur)
4525 xfs_btree_del_cursor(cur, error);
4526 return error;
4527 }
4528
4529 /*
4530 * When a delalloc extent is split (e.g., due to a hole punch), the original
4531 * indlen reservation must be shared across the two new extents that are left
4532 * behind.
4533 *
4534 * Given the original reservation and the worst case indlen for the two new
4535 * extents (as calculated by xfs_bmap_worst_indlen()), split the original
4536 * reservation fairly across the two new extents. If necessary, steal available
4537 * blocks from a deleted extent to make up a reservation deficiency (e.g., if
4538 * ores == 1). The number of stolen blocks is returned. The availability and
4539 * subsequent accounting of stolen blocks is the responsibility of the caller.
4540 */
4541 static xfs_filblks_t
4542 xfs_bmap_split_indlen(
4543 xfs_filblks_t ores, /* original res. */
4544 xfs_filblks_t *indlen1, /* ext1 worst indlen */
4545 xfs_filblks_t *indlen2, /* ext2 worst indlen */
4546 xfs_filblks_t avail) /* stealable blocks */
4547 {
4548 xfs_filblks_t len1 = *indlen1;
4549 xfs_filblks_t len2 = *indlen2;
4550 xfs_filblks_t nres = len1 + len2; /* new total res. */
4551 xfs_filblks_t stolen = 0;
4552 xfs_filblks_t resfactor;
4553
4554 /*
4555 * Steal as many blocks as we can to try and satisfy the worst case
4556 * indlen for both new extents.
4557 */
4558 if (ores < nres && avail)
4559 stolen = XFS_FILBLKS_MIN(nres - ores, avail);
4560 ores += stolen;
4561
4562 /* nothing else to do if we've satisfied the new reservation */
4563 if (ores >= nres)
4564 return stolen;
4565
4566 /*
4567 * We can't meet the total required reservation for the two extents.
4568 * Calculate the percent of the overall shortage between both extents
4569 * and apply this percentage to each of the requested indlen values.
4570 * This distributes the shortage fairly and reduces the chances that one
4571 * of the two extents is left with nothing when extents are repeatedly
4572 * split.
4573 */
4574 resfactor = (ores * 100);
4575 do_div(resfactor, nres);
4576 len1 *= resfactor;
4577 do_div(len1, 100);
4578 len2 *= resfactor;
4579 do_div(len2, 100);
4580 ASSERT(len1 + len2 <= ores);
4581 ASSERT(len1 < *indlen1 && len2 < *indlen2);
4582
4583 /*
4584 * Hand out the remainder to each extent. If one of the two reservations
4585 * is zero, we want to make sure that one gets a block first. The loop
4586 * below starts with len1, so hand len2 a block right off the bat if it
4587 * is zero.
4588 */
4589 ores -= (len1 + len2);
4590 ASSERT((*indlen1 - len1) + (*indlen2 - len2) >= ores);
4591 if (ores && !len2 && *indlen2) {
4592 len2++;
4593 ores--;
4594 }
4595 while (ores) {
4596 if (len1 < *indlen1) {
4597 len1++;
4598 ores--;
4599 }
4600 if (!ores)
4601 break;
4602 if (len2 < *indlen2) {
4603 len2++;
4604 ores--;
4605 }
4606 }
4607
4608 *indlen1 = len1;
4609 *indlen2 = len2;
4610
4611 return stolen;
4612 }
4613
4614 int
4615 xfs_bmap_del_extent_delay(
4616 struct xfs_inode *ip,
4617 int whichfork,
4618 struct xfs_iext_cursor *icur,
4619 struct xfs_bmbt_irec *got,
4620 struct xfs_bmbt_irec *del)
4621 {
4622 struct xfs_mount *mp = ip->i_mount;
4623 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
4624 struct xfs_bmbt_irec new;
4625 int64_t da_old, da_new, da_diff = 0;
4626 xfs_fileoff_t del_endoff, got_endoff;
4627 xfs_filblks_t got_indlen, new_indlen, stolen;
4628 int state = xfs_bmap_fork_to_state(whichfork);
4629 int error = 0;
4630 bool isrt;
4631
4632 XFS_STATS_INC(mp, xs_del_exlist);
4633
4634 isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
4635 del_endoff = del->br_startoff + del->br_blockcount;
4636 got_endoff = got->br_startoff + got->br_blockcount;
4637 da_old = startblockval(got->br_startblock);
4638 da_new = 0;
4639
4640 ASSERT(del->br_blockcount > 0);
4641 ASSERT(got->br_startoff <= del->br_startoff);
4642 ASSERT(got_endoff >= del_endoff);
4643
4644 if (isrt) {
4645 uint64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount);
4646
4647 do_div(rtexts, mp->m_sb.sb_rextsize);
4648 xfs_mod_frextents(mp, rtexts);
4649 }
4650
4651 /*
4652 * Update the inode delalloc counter now and wait to update the
4653 * sb counters as we might have to borrow some blocks for the
4654 * indirect block accounting.
4655 */
4656 error = xfs_trans_reserve_quota_nblks(NULL, ip,
4657 -((long)del->br_blockcount), 0,
4658 isrt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
4659 if (error)
4660 return error;
4661 ip->i_delayed_blks -= del->br_blockcount;
4662
4663 if (got->br_startoff == del->br_startoff)
4664 state |= BMAP_LEFT_FILLING;
4665 if (got_endoff == del_endoff)
4666 state |= BMAP_RIGHT_FILLING;
4667
4668 switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
4669 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
4670 /*
4671 * Matches the whole extent. Delete the entry.
4672 */
4673 xfs_iext_remove(ip, icur, state);
4674 xfs_iext_prev(ifp, icur);
4675 break;
4676 case BMAP_LEFT_FILLING:
4677 /*
4678 * Deleting the first part of the extent.
4679 */
4680 got->br_startoff = del_endoff;
4681 got->br_blockcount -= del->br_blockcount;
4682 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
4683 got->br_blockcount), da_old);
4684 got->br_startblock = nullstartblock((int)da_new);
4685 xfs_iext_update_extent(ip, state, icur, got);
4686 break;
4687 case BMAP_RIGHT_FILLING:
4688 /*
4689 * Deleting the last part of the extent.
4690 */
4691 got->br_blockcount = got->br_blockcount - del->br_blockcount;
4692 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
4693 got->br_blockcount), da_old);
4694 got->br_startblock = nullstartblock((int)da_new);
4695 xfs_iext_update_extent(ip, state, icur, got);
4696 break;
4697 case 0:
4698 /*
4699 * Deleting the middle of the extent.
4700 *
4701 * Distribute the original indlen reservation across the two new
4702 * extents. Steal blocks from the deleted extent if necessary.
4703 * Stealing blocks simply fudges the fdblocks accounting below.
4704 * Warn if either of the new indlen reservations is zero as this
4705 * can lead to delalloc problems.
4706 */
4707 got->br_blockcount = del->br_startoff - got->br_startoff;
4708 got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount);
4709
4710 new.br_blockcount = got_endoff - del_endoff;
4711 new_indlen = xfs_bmap_worst_indlen(ip, new.br_blockcount);
4712
4713 WARN_ON_ONCE(!got_indlen || !new_indlen);
4714 stolen = xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen,
4715 del->br_blockcount);
4716
4717 got->br_startblock = nullstartblock((int)got_indlen);
4718
4719 new.br_startoff = del_endoff;
4720 new.br_state = got->br_state;
4721 new.br_startblock = nullstartblock((int)new_indlen);
4722
4723 xfs_iext_update_extent(ip, state, icur, got);
4724 xfs_iext_next(ifp, icur);
4725 xfs_iext_insert(ip, icur, &new, state);
4726
4727 da_new = got_indlen + new_indlen - stolen;
4728 del->br_blockcount -= stolen;
4729 break;
4730 }
4731
4732 ASSERT(da_old >= da_new);
4733 da_diff = da_old - da_new;
4734 if (!isrt)
4735 da_diff += del->br_blockcount;
4736 if (da_diff)
4737 xfs_mod_fdblocks(mp, da_diff, false);
4738 return error;
4739 }
4740
4741 void
4742 xfs_bmap_del_extent_cow(
4743 struct xfs_inode *ip,
4744 struct xfs_iext_cursor *icur,
4745 struct xfs_bmbt_irec *got,
4746 struct xfs_bmbt_irec *del)
4747 {
4748 struct xfs_mount *mp = ip->i_mount;
4749 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
4750 struct xfs_bmbt_irec new;
4751 xfs_fileoff_t del_endoff, got_endoff;
4752 int state = BMAP_COWFORK;
4753
4754 XFS_STATS_INC(mp, xs_del_exlist);
4755
4756 del_endoff = del->br_startoff + del->br_blockcount;
4757 got_endoff = got->br_startoff + got->br_blockcount;
4758
4759 ASSERT(del->br_blockcount > 0);
4760 ASSERT(got->br_startoff <= del->br_startoff);
4761 ASSERT(got_endoff >= del_endoff);
4762 ASSERT(!isnullstartblock(got->br_startblock));
4763
4764 if (got->br_startoff == del->br_startoff)
4765 state |= BMAP_LEFT_FILLING;
4766 if (got_endoff == del_endoff)
4767 state |= BMAP_RIGHT_FILLING;
4768
4769 switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
4770 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
4771 /*
4772 * Matches the whole extent. Delete the entry.
4773 */
4774 xfs_iext_remove(ip, icur, state);
4775 xfs_iext_prev(ifp, icur);
4776 break;
4777 case BMAP_LEFT_FILLING:
4778 /*
4779 * Deleting the first part of the extent.
4780 */
4781 got->br_startoff = del_endoff;
4782 got->br_blockcount -= del->br_blockcount;
4783 got->br_startblock = del->br_startblock + del->br_blockcount;
4784 xfs_iext_update_extent(ip, state, icur, got);
4785 break;
4786 case BMAP_RIGHT_FILLING:
4787 /*
4788 * Deleting the last part of the extent.
4789 */
4790 got->br_blockcount -= del->br_blockcount;
4791 xfs_iext_update_extent(ip, state, icur, got);
4792 break;
4793 case 0:
4794 /*
4795 * Deleting the middle of the extent.
4796 */
4797 got->br_blockcount = del->br_startoff - got->br_startoff;
4798
4799 new.br_startoff = del_endoff;
4800 new.br_blockcount = got_endoff - del_endoff;
4801 new.br_state = got->br_state;
4802 new.br_startblock = del->br_startblock + del->br_blockcount;
4803
4804 xfs_iext_update_extent(ip, state, icur, got);
4805 xfs_iext_next(ifp, icur);
4806 xfs_iext_insert(ip, icur, &new, state);
4807 break;
4808 }
4809 ip->i_delayed_blks -= del->br_blockcount;
4810 }
4811
4812 /*
4813 * Called by xfs_bmapi to update file extent records and the btree
4814 * after removing space.
4815 */
4816 STATIC int /* error */
4817 xfs_bmap_del_extent_real(
4818 xfs_inode_t *ip, /* incore inode pointer */
4819 xfs_trans_t *tp, /* current transaction pointer */
4820 struct xfs_iext_cursor *icur,
4821 xfs_btree_cur_t *cur, /* if null, not a btree */
4822 xfs_bmbt_irec_t *del, /* data to remove from extents */
4823 int *logflagsp, /* inode logging flags */
4824 int whichfork, /* data or attr fork */
4825 int bflags) /* bmapi flags */
4826 {
4827 xfs_fsblock_t del_endblock=0; /* first block past del */
4828 xfs_fileoff_t del_endoff; /* first offset past del */
4829 int do_fx; /* free extent at end of routine */
4830 int error; /* error return value */
4831 int flags = 0;/* inode logging flags */
4832 struct xfs_bmbt_irec got; /* current extent entry */
4833 xfs_fileoff_t got_endoff; /* first offset past got */
4834 int i; /* temp state */
4835 struct xfs_ifork *ifp; /* inode fork pointer */
4836 xfs_mount_t *mp; /* mount structure */
4837 xfs_filblks_t nblks; /* quota/sb block count */
4838 xfs_bmbt_irec_t new; /* new record to be inserted */
4839 /* REFERENCED */
4840 uint qfield; /* quota field to update */
4841 int state = xfs_bmap_fork_to_state(whichfork);
4842 struct xfs_bmbt_irec old;
4843
4844 mp = ip->i_mount;
4845 XFS_STATS_INC(mp, xs_del_exlist);
4846
4847 ifp = XFS_IFORK_PTR(ip, whichfork);
4848 ASSERT(del->br_blockcount > 0);
4849 xfs_iext_get_extent(ifp, icur, &got);
4850 ASSERT(got.br_startoff <= del->br_startoff);
4851 del_endoff = del->br_startoff + del->br_blockcount;
4852 got_endoff = got.br_startoff + got.br_blockcount;
4853 ASSERT(got_endoff >= del_endoff);
4854 ASSERT(!isnullstartblock(got.br_startblock));
4855 qfield = 0;
4856 error = 0;
4857
4858 /*
4859 * If it's the case where the directory code is running with no block
4860 * reservation, and the deleted block is in the middle of its extent,
4861 * and the resulting insert of an extent would cause transformation to
4862 * btree format, then reject it. The calling code will then swap blocks
4863 * around instead. We have to do this now, rather than waiting for the
4864 * conversion to btree format, since the transaction will be dirty then.
4865 */
4866 if (tp->t_blk_res == 0 &&
4867 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
4868 XFS_IFORK_NEXTENTS(ip, whichfork) >=
4869 XFS_IFORK_MAXEXT(ip, whichfork) &&
4870 del->br_startoff > got.br_startoff && del_endoff < got_endoff)
4871 return -ENOSPC;
4872
4873 flags = XFS_ILOG_CORE;
4874 if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
4875 xfs_fsblock_t bno;
4876 xfs_filblks_t len;
4877 xfs_extlen_t mod;
4878
4879 bno = div_u64_rem(del->br_startblock, mp->m_sb.sb_rextsize,
4880 &mod);
4881 ASSERT(mod == 0);
4882 len = div_u64_rem(del->br_blockcount, mp->m_sb.sb_rextsize,
4883 &mod);
4884 ASSERT(mod == 0);
4885
4886 error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
4887 if (error)
4888 goto done;
4889 do_fx = 0;
4890 nblks = len * mp->m_sb.sb_rextsize;
4891 qfield = XFS_TRANS_DQ_RTBCOUNT;
4892 } else {
4893 do_fx = 1;
4894 nblks = del->br_blockcount;
4895 qfield = XFS_TRANS_DQ_BCOUNT;
4896 }
4897
4898 del_endblock = del->br_startblock + del->br_blockcount;
4899 if (cur) {
4900 error = xfs_bmbt_lookup_eq(cur, &got, &i);
4901 if (error)
4902 goto done;
4903 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
4904 }
4905
4906 if (got.br_startoff == del->br_startoff)
4907 state |= BMAP_LEFT_FILLING;
4908 if (got_endoff == del_endoff)
4909 state |= BMAP_RIGHT_FILLING;
4910
4911 switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
4912 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
4913 /*
4914 * Matches the whole extent. Delete the entry.
4915 */
4916 xfs_iext_remove(ip, icur, state);
4917 xfs_iext_prev(ifp, icur);
4918 XFS_IFORK_NEXT_SET(ip, whichfork,
4919 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
4920 flags |= XFS_ILOG_CORE;
4921 if (!cur) {
4922 flags |= xfs_ilog_fext(whichfork);
4923 break;
4924 }
4925 if ((error = xfs_btree_delete(cur, &i)))
4926 goto done;
4927 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
4928 break;
4929 case BMAP_LEFT_FILLING:
4930 /*
4931 * Deleting the first part of the extent.
4932 */
4933 got.br_startoff = del_endoff;
4934 got.br_startblock = del_endblock;
4935 got.br_blockcount -= del->br_blockcount;
4936 xfs_iext_update_extent(ip, state, icur, &got);
4937 if (!cur) {
4938 flags |= xfs_ilog_fext(whichfork);
4939 break;
4940 }
4941 error = xfs_bmbt_update(cur, &got);
4942 if (error)
4943 goto done;
4944 break;
4945 case BMAP_RIGHT_FILLING:
4946 /*
4947 * Deleting the last part of the extent.
4948 */
4949 got.br_blockcount -= del->br_blockcount;
4950 xfs_iext_update_extent(ip, state, icur, &got);
4951 if (!cur) {
4952 flags |= xfs_ilog_fext(whichfork);
4953 break;
4954 }
4955 error = xfs_bmbt_update(cur, &got);
4956 if (error)
4957 goto done;
4958 break;
4959 case 0:
4960 /*
4961 * Deleting the middle of the extent.
4962 */
4963 old = got;
4964
4965 got.br_blockcount = del->br_startoff - got.br_startoff;
4966 xfs_iext_update_extent(ip, state, icur, &got);
4967
4968 new.br_startoff = del_endoff;
4969 new.br_blockcount = got_endoff - del_endoff;
4970 new.br_state = got.br_state;
4971 new.br_startblock = del_endblock;
4972
4973 flags |= XFS_ILOG_CORE;
4974 if (cur) {
4975 error = xfs_bmbt_update(cur, &got);
4976 if (error)
4977 goto done;
4978 error = xfs_btree_increment(cur, 0, &i);
4979 if (error)
4980 goto done;
4981 cur->bc_rec.b = new;
4982 error = xfs_btree_insert(cur, &i);
4983 if (error && error != -ENOSPC)
4984 goto done;
4985 /*
4986 * If get no-space back from btree insert, it tried a
4987 * split, and we have a zero block reservation. Fix up
4988 * our state and return the error.
4989 */
4990 if (error == -ENOSPC) {
4991 /*
4992 * Reset the cursor, don't trust it after any
4993 * insert operation.
4994 */
4995 error = xfs_bmbt_lookup_eq(cur, &got, &i);
4996 if (error)
4997 goto done;
4998 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
4999 /*
5000 * Update the btree record back
5001 * to the original value.
5002 */
5003 error = xfs_bmbt_update(cur, &old);
5004 if (error)
5005 goto done;
5006 /*
5007 * Reset the extent record back
5008 * to the original value.
5009 */
5010 xfs_iext_update_extent(ip, state, icur, &old);
5011 flags = 0;
5012 error = -ENOSPC;
5013 goto done;
5014 }
5015 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
5016 } else
5017 flags |= xfs_ilog_fext(whichfork);
5018 XFS_IFORK_NEXT_SET(ip, whichfork,
5019 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
5020 xfs_iext_next(ifp, icur);
5021 xfs_iext_insert(ip, icur, &new, state);
5022 break;
5023 }
5024
5025 /* remove reverse mapping */
5026 error = xfs_rmap_unmap_extent(tp, ip, whichfork, del);
5027 if (error)
5028 goto done;
5029
5030 /*
5031 * If we need to, add to list of extents to delete.
5032 */
5033 if (do_fx && !(bflags & XFS_BMAPI_REMAP)) {
5034 if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) {
5035 error = xfs_refcount_decrease_extent(tp, del);
5036 if (error)
5037 goto done;
5038 } else {
5039 __xfs_bmap_add_free(tp, del->br_startblock,
5040 del->br_blockcount, NULL,
5041 (bflags & XFS_BMAPI_NODISCARD) ||
5042 del->br_state == XFS_EXT_UNWRITTEN);
5043 }
5044 }
5045
5046 /*
5047 * Adjust inode # blocks in the file.
5048 */
5049 if (nblks)
5050 ip->i_d.di_nblocks -= nblks;
5051 /*
5052 * Adjust quota data.
5053 */
5054 if (qfield && !(bflags & XFS_BMAPI_REMAP))
5055 xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
5056
5057 done:
5058 *logflagsp = flags;
5059 return error;
5060 }
5061
5062 /*
5063 * Unmap (remove) blocks from a file.
5064 * If nexts is nonzero then the number of extents to remove is limited to
5065 * that value. If not all extents in the block range can be removed then
5066 * *done is set.
5067 */
5068 int /* error */
5069 __xfs_bunmapi(
5070 struct xfs_trans *tp, /* transaction pointer */
5071 struct xfs_inode *ip, /* incore inode */
5072 xfs_fileoff_t start, /* first file offset deleted */
5073 xfs_filblks_t *rlen, /* i/o: amount remaining */
5074 int flags, /* misc flags */
5075 xfs_extnum_t nexts) /* number of extents max */
5076 {
5077 struct xfs_btree_cur *cur; /* bmap btree cursor */
5078 struct xfs_bmbt_irec del; /* extent being deleted */
5079 int error; /* error return value */
5080 xfs_extnum_t extno; /* extent number in list */
5081 struct xfs_bmbt_irec got; /* current extent record */
5082 struct xfs_ifork *ifp; /* inode fork pointer */
5083 int isrt; /* freeing in rt area */
5084 int logflags; /* transaction logging flags */
5085 xfs_extlen_t mod; /* rt extent offset */
5086 struct xfs_mount *mp; /* mount structure */
5087 int tmp_logflags; /* partial logging flags */
5088 int wasdel; /* was a delayed alloc extent */
5089 int whichfork; /* data or attribute fork */
5090 xfs_fsblock_t sum;
5091 xfs_filblks_t len = *rlen; /* length to unmap in file */
5092 xfs_fileoff_t max_len;
5093 xfs_agnumber_t prev_agno = NULLAGNUMBER, agno;
5094 xfs_fileoff_t end;
5095 struct xfs_iext_cursor icur;
5096 bool done = false;
5097
5098 trace_xfs_bunmap(ip, start, len, flags, _RET_IP_);
5099
5100 whichfork = xfs_bmapi_whichfork(flags);
5101 ASSERT(whichfork != XFS_COW_FORK);
5102 ifp = XFS_IFORK_PTR(ip, whichfork);
5103 if (unlikely(
5104 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5105 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
5106 XFS_ERROR_REPORT("xfs_bunmapi", XFS_ERRLEVEL_LOW,
5107 ip->i_mount);
5108 return -EFSCORRUPTED;
5109 }
5110 mp = ip->i_mount;
5111 if (XFS_FORCED_SHUTDOWN(mp))
5112 return -EIO;
5113
5114 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5115 ASSERT(len > 0);
5116 ASSERT(nexts >= 0);
5117
5118 /*
5119 * Guesstimate how many blocks we can unmap without running the risk of
5120 * blowing out the transaction with a mix of EFIs and reflink
5121 * adjustments.
5122 */
5123 if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
5124 max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res));
5125 else
5126 max_len = len;
5127
5128 if (!(ifp->if_flags & XFS_IFEXTENTS) &&
5129 (error = xfs_iread_extents(tp, ip, whichfork)))
5130 return error;
5131 if (xfs_iext_count(ifp) == 0) {
5132 *rlen = 0;
5133 return 0;
5134 }
5135 XFS_STATS_INC(mp, xs_blk_unmap);
5136 isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
5137 end = start + len;
5138
5139 if (!xfs_iext_lookup_extent_before(ip, ifp, &end, &icur, &got)) {
5140 *rlen = 0;
5141 return 0;
5142 }
5143 end--;
5144
5145 logflags = 0;
5146 if (ifp->if_flags & XFS_IFBROOT) {
5147 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
5148 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5149 cur->bc_private.b.flags = 0;
5150 } else
5151 cur = NULL;
5152
5153 if (isrt) {
5154 /*
5155 * Synchronize by locking the bitmap inode.
5156 */
5157 xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP);
5158 xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
5159 xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM);
5160 xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
5161 }
5162
5163 extno = 0;
5164 while (end != (xfs_fileoff_t)-1 && end >= start &&
5165 (nexts == 0 || extno < nexts) && max_len > 0) {
5166 /*
5167 * Is the found extent after a hole in which end lives?
5168 * Just back up to the previous extent, if so.
5169 */
5170 if (got.br_startoff > end &&
5171 !xfs_iext_prev_extent(ifp, &icur, &got)) {
5172 done = true;
5173 break;
5174 }
5175 /*
5176 * Is the last block of this extent before the range
5177 * we're supposed to delete? If so, we're done.
5178 */
5179 end = XFS_FILEOFF_MIN(end,
5180 got.br_startoff + got.br_blockcount - 1);
5181 if (end < start)
5182 break;
5183 /*
5184 * Then deal with the (possibly delayed) allocated space
5185 * we found.
5186 */
5187 del = got;
5188 wasdel = isnullstartblock(del.br_startblock);
5189
5190 /*
5191 * Make sure we don't touch multiple AGF headers out of order
5192 * in a single transaction, as that could cause AB-BA deadlocks.
5193 */
5194 if (!wasdel) {
5195 agno = XFS_FSB_TO_AGNO(mp, del.br_startblock);
5196 if (prev_agno != NULLAGNUMBER && prev_agno > agno)
5197 break;
5198 prev_agno = agno;
5199 }
5200 if (got.br_startoff < start) {
5201 del.br_startoff = start;
5202 del.br_blockcount -= start - got.br_startoff;
5203 if (!wasdel)
5204 del.br_startblock += start - got.br_startoff;
5205 }
5206 if (del.br_startoff + del.br_blockcount > end + 1)
5207 del.br_blockcount = end + 1 - del.br_startoff;
5208
5209 /* How much can we safely unmap? */
5210 if (max_len < del.br_blockcount) {
5211 del.br_startoff += del.br_blockcount - max_len;
5212 if (!wasdel)
5213 del.br_startblock += del.br_blockcount - max_len;
5214 del.br_blockcount = max_len;
5215 }
5216
5217 if (!isrt)
5218 goto delete;
5219
5220 sum = del.br_startblock + del.br_blockcount;
5221 div_u64_rem(sum, mp->m_sb.sb_rextsize, &mod);
5222 if (mod) {
5223 /*
5224 * Realtime extent not lined up at the end.
5225 * The extent could have been split into written
5226 * and unwritten pieces, or we could just be
5227 * unmapping part of it. But we can't really
5228 * get rid of part of a realtime extent.
5229 */
5230 if (del.br_state == XFS_EXT_UNWRITTEN ||
5231 !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
5232 /*
5233 * This piece is unwritten, or we're not
5234 * using unwritten extents. Skip over it.
5235 */
5236 ASSERT(end >= mod);
5237 end -= mod > del.br_blockcount ?
5238 del.br_blockcount : mod;
5239 if (end < got.br_startoff &&
5240 !xfs_iext_prev_extent(ifp, &icur, &got)) {
5241 done = true;
5242 break;
5243 }
5244 continue;
5245 }
5246 /*
5247 * It's written, turn it unwritten.
5248 * This is better than zeroing it.
5249 */
5250 ASSERT(del.br_state == XFS_EXT_NORM);
5251 ASSERT(tp->t_blk_res > 0);
5252 /*
5253 * If this spans a realtime extent boundary,
5254 * chop it back to the start of the one we end at.
5255 */
5256 if (del.br_blockcount > mod) {
5257 del.br_startoff += del.br_blockcount - mod;
5258 del.br_startblock += del.br_blockcount - mod;
5259 del.br_blockcount = mod;
5260 }
5261 del.br_state = XFS_EXT_UNWRITTEN;
5262 error = xfs_bmap_add_extent_unwritten_real(tp, ip,
5263 whichfork, &icur, &cur, &del,
5264 &logflags);
5265 if (error)
5266 goto error0;
5267 goto nodelete;
5268 }
5269 div_u64_rem(del.br_startblock, mp->m_sb.sb_rextsize, &mod);
5270 if (mod) {
5271 /*
5272 * Realtime extent is lined up at the end but not
5273 * at the front. We'll get rid of full extents if
5274 * we can.
5275 */
5276 mod = mp->m_sb.sb_rextsize - mod;
5277 if (del.br_blockcount > mod) {
5278 del.br_blockcount -= mod;
5279 del.br_startoff += mod;
5280 del.br_startblock += mod;
5281 } else if ((del.br_startoff == start &&
5282 (del.br_state == XFS_EXT_UNWRITTEN ||
5283 tp->t_blk_res == 0)) ||
5284 !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
5285 /*
5286 * Can't make it unwritten. There isn't
5287 * a full extent here so just skip it.
5288 */
5289 ASSERT(end >= del.br_blockcount);
5290 end -= del.br_blockcount;
5291 if (got.br_startoff > end &&
5292 !xfs_iext_prev_extent(ifp, &icur, &got)) {
5293 done = true;
5294 break;
5295 }
5296 continue;
5297 } else if (del.br_state == XFS_EXT_UNWRITTEN) {
5298 struct xfs_bmbt_irec prev;
5299
5300 /*
5301 * This one is already unwritten.
5302 * It must have a written left neighbor.
5303 * Unwrite the killed part of that one and
5304 * try again.
5305 */
5306 if (!xfs_iext_prev_extent(ifp, &icur, &prev))
5307 ASSERT(0);
5308 ASSERT(prev.br_state == XFS_EXT_NORM);
5309 ASSERT(!isnullstartblock(prev.br_startblock));
5310 ASSERT(del.br_startblock ==
5311 prev.br_startblock + prev.br_blockcount);
5312 if (prev.br_startoff < start) {
5313 mod = start - prev.br_startoff;
5314 prev.br_blockcount -= mod;
5315 prev.br_startblock += mod;
5316 prev.br_startoff = start;
5317 }
5318 prev.br_state = XFS_EXT_UNWRITTEN;
5319 error = xfs_bmap_add_extent_unwritten_real(tp,
5320 ip, whichfork, &icur, &cur,
5321 &prev, &logflags);
5322 if (error)
5323 goto error0;
5324 goto nodelete;
5325 } else {
5326 ASSERT(del.br_state == XFS_EXT_NORM);
5327 del.br_state = XFS_EXT_UNWRITTEN;
5328 error = xfs_bmap_add_extent_unwritten_real(tp,
5329 ip, whichfork, &icur, &cur,
5330 &del, &logflags);
5331 if (error)
5332 goto error0;
5333 goto nodelete;
5334 }
5335 }
5336
5337 delete:
5338 if (wasdel) {
5339 error = xfs_bmap_del_extent_delay(ip, whichfork, &icur,
5340 &got, &del);
5341 } else {
5342 error = xfs_bmap_del_extent_real(ip, tp, &icur, cur,
5343 &del, &tmp_logflags, whichfork,
5344 flags);
5345 logflags |= tmp_logflags;
5346 }
5347
5348 if (error)
5349 goto error0;
5350
5351 max_len -= del.br_blockcount;
5352 end = del.br_startoff - 1;
5353 nodelete:
5354 /*
5355 * If not done go on to the next (previous) record.
5356 */
5357 if (end != (xfs_fileoff_t)-1 && end >= start) {
5358 if (!xfs_iext_get_extent(ifp, &icur, &got) ||
5359 (got.br_startoff > end &&
5360 !xfs_iext_prev_extent(ifp, &icur, &got))) {
5361 done = true;
5362 break;
5363 }
5364 extno++;
5365 }
5366 }
5367 if (done || end == (xfs_fileoff_t)-1 || end < start)
5368 *rlen = 0;
5369 else
5370 *rlen = end - start + 1;
5371
5372 /*
5373 * Convert to a btree if necessary.
5374 */
5375 if (xfs_bmap_needs_btree(ip, whichfork)) {
5376 ASSERT(cur == NULL);
5377 error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
5378 &tmp_logflags, whichfork);
5379 logflags |= tmp_logflags;
5380 if (error)
5381 goto error0;
5382 }
5383 /*
5384 * transform from btree to extents, give it cur
5385 */
5386 else if (xfs_bmap_wants_extents(ip, whichfork)) {
5387 ASSERT(cur != NULL);
5388 error = xfs_bmap_btree_to_extents(tp, ip, cur, &tmp_logflags,
5389 whichfork);
5390 logflags |= tmp_logflags;
5391 if (error)
5392 goto error0;
5393 }
5394 /*
5395 * transform from extents to local?
5396 */
5397 error = 0;
5398 error0:
5399 /*
5400 * Log everything. Do this after conversion, there's no point in
5401 * logging the extent records if we've converted to btree format.
5402 */
5403 if ((logflags & xfs_ilog_fext(whichfork)) &&
5404 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
5405 logflags &= ~xfs_ilog_fext(whichfork);
5406 else if ((logflags & xfs_ilog_fbroot(whichfork)) &&
5407 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
5408 logflags &= ~xfs_ilog_fbroot(whichfork);
5409 /*
5410 * Log inode even in the error case, if the transaction
5411 * is dirty we'll need to shut down the filesystem.
5412 */
5413 if (logflags)
5414 xfs_trans_log_inode(tp, ip, logflags);
5415 if (cur) {
5416 if (!error)
5417 cur->bc_private.b.allocated = 0;
5418 xfs_btree_del_cursor(cur, error);
5419 }
5420 return error;
5421 }
5422
5423 /* Unmap a range of a file. */
5424 int
5425 xfs_bunmapi(
5426 xfs_trans_t *tp,
5427 struct xfs_inode *ip,
5428 xfs_fileoff_t bno,
5429 xfs_filblks_t len,
5430 int flags,
5431 xfs_extnum_t nexts,
5432 int *done)
5433 {
5434 int error;
5435
5436 error = __xfs_bunmapi(tp, ip, bno, &len, flags, nexts);
5437 *done = (len == 0);
5438 return error;
5439 }
5440
5441 /*
5442 * Determine whether an extent shift can be accomplished by a merge with the
5443 * extent that precedes the target hole of the shift.
5444 */
5445 STATIC bool
5446 xfs_bmse_can_merge(
5447 struct xfs_bmbt_irec *left, /* preceding extent */
5448 struct xfs_bmbt_irec *got, /* current extent to shift */
5449 xfs_fileoff_t shift) /* shift fsb */
5450 {
5451 xfs_fileoff_t startoff;
5452
5453 startoff = got->br_startoff - shift;
5454
5455 /*
5456 * The extent, once shifted, must be adjacent in-file and on-disk with
5457 * the preceding extent.
5458 */
5459 if ((left->br_startoff + left->br_blockcount != startoff) ||
5460 (left->br_startblock + left->br_blockcount != got->br_startblock) ||
5461 (left->br_state != got->br_state) ||
5462 (left->br_blockcount + got->br_blockcount > MAXEXTLEN))
5463 return false;
5464
5465 return true;
5466 }
5467
5468 /*
5469 * A bmap extent shift adjusts the file offset of an extent to fill a preceding
5470 * hole in the file. If an extent shift would result in the extent being fully
5471 * adjacent to the extent that currently precedes the hole, we can merge with
5472 * the preceding extent rather than do the shift.
5473 *
5474 * This function assumes the caller has verified a shift-by-merge is possible
5475 * with the provided extents via xfs_bmse_can_merge().
5476 */
5477 STATIC int
5478 xfs_bmse_merge(
5479 struct xfs_trans *tp,
5480 struct xfs_inode *ip,
5481 int whichfork,
5482 xfs_fileoff_t shift, /* shift fsb */
5483 struct xfs_iext_cursor *icur,
5484 struct xfs_bmbt_irec *got, /* extent to shift */
5485 struct xfs_bmbt_irec *left, /* preceding extent */
5486 struct xfs_btree_cur *cur,
5487 int *logflags) /* output */
5488 {
5489 struct xfs_bmbt_irec new;
5490 xfs_filblks_t blockcount;
5491 int error, i;
5492 struct xfs_mount *mp = ip->i_mount;
5493
5494 blockcount = left->br_blockcount + got->br_blockcount;
5495
5496 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5497 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5498 ASSERT(xfs_bmse_can_merge(left, got, shift));
5499
5500 new = *left;
5501 new.br_blockcount = blockcount;
5502
5503 /*
5504 * Update the on-disk extent count, the btree if necessary and log the
5505 * inode.
5506 */
5507 XFS_IFORK_NEXT_SET(ip, whichfork,
5508 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
5509 *logflags |= XFS_ILOG_CORE;
5510 if (!cur) {
5511 *logflags |= XFS_ILOG_DEXT;
5512 goto done;
5513 }
5514
5515 /* lookup and remove the extent to merge */
5516 error = xfs_bmbt_lookup_eq(cur, got, &i);
5517 if (error)
5518 return error;
5519 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5520
5521 error = xfs_btree_delete(cur, &i);
5522 if (error)
5523 return error;
5524 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5525
5526 /* lookup and update size of the previous extent */
5527 error = xfs_bmbt_lookup_eq(cur, left, &i);
5528 if (error)
5529 return error;
5530 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5531
5532 error = xfs_bmbt_update(cur, &new);
5533 if (error)
5534 return error;
5535
5536 done:
5537 xfs_iext_remove(ip, icur, 0);
5538 xfs_iext_prev(XFS_IFORK_PTR(ip, whichfork), icur);
5539 xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur,
5540 &new);
5541
5542 /* update reverse mapping. rmap functions merge the rmaps for us */
5543 error = xfs_rmap_unmap_extent(tp, ip, whichfork, got);
5544 if (error)
5545 return error;
5546 memcpy(&new, got, sizeof(new));
5547 new.br_startoff = left->br_startoff + left->br_blockcount;
5548 return xfs_rmap_map_extent(tp, ip, whichfork, &new);
5549 }
5550
5551 static int
5552 xfs_bmap_shift_update_extent(
5553 struct xfs_trans *tp,
5554 struct xfs_inode *ip,
5555 int whichfork,
5556 struct xfs_iext_cursor *icur,
5557 struct xfs_bmbt_irec *got,
5558 struct xfs_btree_cur *cur,
5559 int *logflags,
5560 xfs_fileoff_t startoff)
5561 {
5562 struct xfs_mount *mp = ip->i_mount;
5563 struct xfs_bmbt_irec prev = *got;
5564 int error, i;
5565
5566 *logflags |= XFS_ILOG_CORE;
5567
5568 got->br_startoff = startoff;
5569
5570 if (cur) {
5571 error = xfs_bmbt_lookup_eq(cur, &prev, &i);
5572 if (error)
5573 return error;
5574 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5575
5576 error = xfs_bmbt_update(cur, got);
5577 if (error)
5578 return error;
5579 } else {
5580 *logflags |= XFS_ILOG_DEXT;
5581 }
5582
5583 xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur,
5584 got);
5585
5586 /* update reverse mapping */
5587 error = xfs_rmap_unmap_extent(tp, ip, whichfork, &prev);
5588 if (error)
5589 return error;
5590 return xfs_rmap_map_extent(tp, ip, whichfork, got);
5591 }
5592
5593 int
5594 xfs_bmap_collapse_extents(
5595 struct xfs_trans *tp,
5596 struct xfs_inode *ip,
5597 xfs_fileoff_t *next_fsb,
5598 xfs_fileoff_t offset_shift_fsb,
5599 bool *done)
5600 {
5601 int whichfork = XFS_DATA_FORK;
5602 struct xfs_mount *mp = ip->i_mount;
5603 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
5604 struct xfs_btree_cur *cur = NULL;
5605 struct xfs_bmbt_irec got, prev;
5606 struct xfs_iext_cursor icur;
5607 xfs_fileoff_t new_startoff;
5608 int error = 0;
5609 int logflags = 0;
5610
5611 if (unlikely(XFS_TEST_ERROR(
5612 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5613 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
5614 mp, XFS_ERRTAG_BMAPIFORMAT))) {
5615 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
5616 return -EFSCORRUPTED;
5617 }
5618
5619 if (XFS_FORCED_SHUTDOWN(mp))
5620 return -EIO;
5621
5622 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL));
5623
5624 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5625 error = xfs_iread_extents(tp, ip, whichfork);
5626 if (error)
5627 return error;
5628 }
5629
5630 if (ifp->if_flags & XFS_IFBROOT) {
5631 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5632 cur->bc_private.b.flags = 0;
5633 }
5634
5635 if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) {
5636 *done = true;
5637 goto del_cursor;
5638 }
5639 XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock),
5640 del_cursor);
5641
5642 new_startoff = got.br_startoff - offset_shift_fsb;
5643 if (xfs_iext_peek_prev_extent(ifp, &icur, &prev)) {
5644 if (new_startoff < prev.br_startoff + prev.br_blockcount) {
5645 error = -EINVAL;
5646 goto del_cursor;
5647 }
5648
5649 if (xfs_bmse_can_merge(&prev, &got, offset_shift_fsb)) {
5650 error = xfs_bmse_merge(tp, ip, whichfork,
5651 offset_shift_fsb, &icur, &got, &prev,
5652 cur, &logflags);
5653 if (error)
5654 goto del_cursor;
5655 goto done;
5656 }
5657 } else {
5658 if (got.br_startoff < offset_shift_fsb) {
5659 error = -EINVAL;
5660 goto del_cursor;
5661 }
5662 }
5663
5664 error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got,
5665 cur, &logflags, new_startoff);
5666 if (error)
5667 goto del_cursor;
5668
5669 done:
5670 if (!xfs_iext_next_extent(ifp, &icur, &got)) {
5671 *done = true;
5672 goto del_cursor;
5673 }
5674
5675 *next_fsb = got.br_startoff;
5676 del_cursor:
5677 if (cur)
5678 xfs_btree_del_cursor(cur, error);
5679 if (logflags)
5680 xfs_trans_log_inode(tp, ip, logflags);
5681 return error;
5682 }
5683
5684 /* Make sure we won't be right-shifting an extent past the maximum bound. */
5685 int
5686 xfs_bmap_can_insert_extents(
5687 struct xfs_inode *ip,
5688 xfs_fileoff_t off,
5689 xfs_fileoff_t shift)
5690 {
5691 struct xfs_bmbt_irec got;
5692 int is_empty;
5693 int error = 0;
5694
5695 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5696
5697 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
5698 return -EIO;
5699
5700 xfs_ilock(ip, XFS_ILOCK_EXCL);
5701 error = xfs_bmap_last_extent(NULL, ip, XFS_DATA_FORK, &got, &is_empty);
5702 if (!error && !is_empty && got.br_startoff >= off &&
5703 ((got.br_startoff + shift) & BMBT_STARTOFF_MASK) < got.br_startoff)
5704 error = -EINVAL;
5705 xfs_iunlock(ip, XFS_ILOCK_EXCL);
5706
5707 return error;
5708 }
5709
5710 int
5711 xfs_bmap_insert_extents(
5712 struct xfs_trans *tp,
5713 struct xfs_inode *ip,
5714 xfs_fileoff_t *next_fsb,
5715 xfs_fileoff_t offset_shift_fsb,
5716 bool *done,
5717 xfs_fileoff_t stop_fsb)
5718 {
5719 int whichfork = XFS_DATA_FORK;
5720 struct xfs_mount *mp = ip->i_mount;
5721 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
5722 struct xfs_btree_cur *cur = NULL;
5723 struct xfs_bmbt_irec got, next;
5724 struct xfs_iext_cursor icur;
5725 xfs_fileoff_t new_startoff;
5726 int error = 0;
5727 int logflags = 0;
5728
5729 if (unlikely(XFS_TEST_ERROR(
5730 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5731 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
5732 mp, XFS_ERRTAG_BMAPIFORMAT))) {
5733 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
5734 return -EFSCORRUPTED;
5735 }
5736
5737 if (XFS_FORCED_SHUTDOWN(mp))
5738 return -EIO;
5739
5740 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL));
5741
5742 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5743 error = xfs_iread_extents(tp, ip, whichfork);
5744 if (error)
5745 return error;
5746 }
5747
5748 if (ifp->if_flags & XFS_IFBROOT) {
5749 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5750 cur->bc_private.b.flags = 0;
5751 }
5752
5753 if (*next_fsb == NULLFSBLOCK) {
5754 xfs_iext_last(ifp, &icur);
5755 if (!xfs_iext_get_extent(ifp, &icur, &got) ||
5756 stop_fsb > got.br_startoff) {
5757 *done = true;
5758 goto del_cursor;
5759 }
5760 } else {
5761 if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) {
5762 *done = true;
5763 goto del_cursor;
5764 }
5765 }
5766 XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock),
5767 del_cursor);
5768
5769 if (stop_fsb >= got.br_startoff + got.br_blockcount) {
5770 error = -EIO;
5771 goto del_cursor;
5772 }
5773
5774 new_startoff = got.br_startoff + offset_shift_fsb;
5775 if (xfs_iext_peek_next_extent(ifp, &icur, &next)) {
5776 if (new_startoff + got.br_blockcount > next.br_startoff) {
5777 error = -EINVAL;
5778 goto del_cursor;
5779 }
5780
5781 /*
5782 * Unlike a left shift (which involves a hole punch), a right
5783 * shift does not modify extent neighbors in any way. We should
5784 * never find mergeable extents in this scenario. Check anyways
5785 * and warn if we encounter two extents that could be one.
5786 */
5787 if (xfs_bmse_can_merge(&got, &next, offset_shift_fsb))
5788 WARN_ON_ONCE(1);
5789 }
5790
5791 error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got,
5792 cur, &logflags, new_startoff);
5793 if (error)
5794 goto del_cursor;
5795
5796 if (!xfs_iext_prev_extent(ifp, &icur, &got) ||
5797 stop_fsb >= got.br_startoff + got.br_blockcount) {
5798 *done = true;
5799 goto del_cursor;
5800 }
5801
5802 *next_fsb = got.br_startoff;
5803 del_cursor:
5804 if (cur)
5805 xfs_btree_del_cursor(cur, error);
5806 if (logflags)
5807 xfs_trans_log_inode(tp, ip, logflags);
5808 return error;
5809 }
5810
5811 /*
5812 * Splits an extent into two extents at split_fsb block such that it is the
5813 * first block of the current_ext. @ext is a target extent to be split.
5814 * @split_fsb is a block where the extents is split. If split_fsb lies in a
5815 * hole or the first block of extents, just return 0.
5816 */
5817 STATIC int
5818 xfs_bmap_split_extent_at(
5819 struct xfs_trans *tp,
5820 struct xfs_inode *ip,
5821 xfs_fileoff_t split_fsb)
5822 {
5823 int whichfork = XFS_DATA_FORK;
5824 struct xfs_btree_cur *cur = NULL;
5825 struct xfs_bmbt_irec got;
5826 struct xfs_bmbt_irec new; /* split extent */
5827 struct xfs_mount *mp = ip->i_mount;
5828 struct xfs_ifork *ifp;
5829 xfs_fsblock_t gotblkcnt; /* new block count for got */
5830 struct xfs_iext_cursor icur;
5831 int error = 0;
5832 int logflags = 0;
5833 int i = 0;
5834
5835 if (unlikely(XFS_TEST_ERROR(
5836 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5837 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
5838 mp, XFS_ERRTAG_BMAPIFORMAT))) {
5839 XFS_ERROR_REPORT("xfs_bmap_split_extent_at",
5840 XFS_ERRLEVEL_LOW, mp);
5841 return -EFSCORRUPTED;
5842 }
5843
5844 if (XFS_FORCED_SHUTDOWN(mp))
5845 return -EIO;
5846
5847 ifp = XFS_IFORK_PTR(ip, whichfork);
5848 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5849 /* Read in all the extents */
5850 error = xfs_iread_extents(tp, ip, whichfork);
5851 if (error)
5852 return error;
5853 }
5854
5855 /*
5856 * If there are not extents, or split_fsb lies in a hole we are done.
5857 */
5858 if (!xfs_iext_lookup_extent(ip, ifp, split_fsb, &icur, &got) ||
5859 got.br_startoff >= split_fsb)
5860 return 0;
5861
5862 gotblkcnt = split_fsb - got.br_startoff;
5863 new.br_startoff = split_fsb;
5864 new.br_startblock = got.br_startblock + gotblkcnt;
5865 new.br_blockcount = got.br_blockcount - gotblkcnt;
5866 new.br_state = got.br_state;
5867
5868 if (ifp->if_flags & XFS_IFBROOT) {
5869 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5870 cur->bc_private.b.flags = 0;
5871 error = xfs_bmbt_lookup_eq(cur, &got, &i);
5872 if (error)
5873 goto del_cursor;
5874 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
5875 }
5876
5877 got.br_blockcount = gotblkcnt;
5878 xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), &icur,
5879 &got);
5880
5881 logflags = XFS_ILOG_CORE;
5882 if (cur) {
5883 error = xfs_bmbt_update(cur, &got);
5884 if (error)
5885 goto del_cursor;
5886 } else
5887 logflags |= XFS_ILOG_DEXT;
5888
5889 /* Add new extent */
5890 xfs_iext_next(ifp, &icur);
5891 xfs_iext_insert(ip, &icur, &new, 0);
5892 XFS_IFORK_NEXT_SET(ip, whichfork,
5893 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
5894
5895 if (cur) {
5896 error = xfs_bmbt_lookup_eq(cur, &new, &i);
5897 if (error)
5898 goto del_cursor;
5899 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, del_cursor);
5900 error = xfs_btree_insert(cur, &i);
5901 if (error)
5902 goto del_cursor;
5903 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
5904 }
5905
5906 /*
5907 * Convert to a btree if necessary.
5908 */
5909 if (xfs_bmap_needs_btree(ip, whichfork)) {
5910 int tmp_logflags; /* partial log flag return val */
5911
5912 ASSERT(cur == NULL);
5913 error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0,
5914 &tmp_logflags, whichfork);
5915 logflags |= tmp_logflags;
5916 }
5917
5918 del_cursor:
5919 if (cur) {
5920 cur->bc_private.b.allocated = 0;
5921 xfs_btree_del_cursor(cur, error);
5922 }
5923
5924 if (logflags)
5925 xfs_trans_log_inode(tp, ip, logflags);
5926 return error;
5927 }
5928
5929 int
5930 xfs_bmap_split_extent(
5931 struct xfs_inode *ip,
5932 xfs_fileoff_t split_fsb)
5933 {
5934 struct xfs_mount *mp = ip->i_mount;
5935 struct xfs_trans *tp;
5936 int error;
5937
5938 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write,
5939 XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
5940 if (error)
5941 return error;
5942
5943 xfs_ilock(ip, XFS_ILOCK_EXCL);
5944 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
5945
5946 error = xfs_bmap_split_extent_at(tp, ip, split_fsb);
5947 if (error)
5948 goto out;
5949
5950 return xfs_trans_commit(tp);
5951
5952 out:
5953 xfs_trans_cancel(tp);
5954 return error;
5955 }
5956
5957 /* Deferred mapping is only for real extents in the data fork. */
5958 static bool
5959 xfs_bmap_is_update_needed(
5960 struct xfs_bmbt_irec *bmap)
5961 {
5962 return bmap->br_startblock != HOLESTARTBLOCK &&
5963 bmap->br_startblock != DELAYSTARTBLOCK;
5964 }
5965
5966 /* Record a bmap intent. */
5967 static int
5968 __xfs_bmap_add(
5969 struct xfs_trans *tp,
5970 enum xfs_bmap_intent_type type,
5971 struct xfs_inode *ip,
5972 int whichfork,
5973 struct xfs_bmbt_irec *bmap)
5974 {
5975 struct xfs_bmap_intent *bi;
5976
5977 trace_xfs_bmap_defer(tp->t_mountp,
5978 XFS_FSB_TO_AGNO(tp->t_mountp, bmap->br_startblock),
5979 type,
5980 XFS_FSB_TO_AGBNO(tp->t_mountp, bmap->br_startblock),
5981 ip->i_ino, whichfork,
5982 bmap->br_startoff,
5983 bmap->br_blockcount,
5984 bmap->br_state);
5985
5986 bi = kmem_alloc(sizeof(struct xfs_bmap_intent), KM_SLEEP | KM_NOFS);
5987 INIT_LIST_HEAD(&bi->bi_list);
5988 bi->bi_type = type;
5989 bi->bi_owner = ip;
5990 bi->bi_whichfork = whichfork;
5991 bi->bi_bmap = *bmap;
5992
5993 xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_BMAP, &bi->bi_list);
5994 return 0;
5995 }
5996
5997 /* Map an extent into a file. */
5998 int
5999 xfs_bmap_map_extent(
6000 struct xfs_trans *tp,
6001 struct xfs_inode *ip,
6002 struct xfs_bmbt_irec *PREV)
6003 {
6004 if (!xfs_bmap_is_update_needed(PREV))
6005 return 0;
6006
6007 return __xfs_bmap_add(tp, XFS_BMAP_MAP, ip, XFS_DATA_FORK, PREV);
6008 }
6009
6010 /* Unmap an extent out of a file. */
6011 int
6012 xfs_bmap_unmap_extent(
6013 struct xfs_trans *tp,
6014 struct xfs_inode *ip,
6015 struct xfs_bmbt_irec *PREV)
6016 {
6017 if (!xfs_bmap_is_update_needed(PREV))
6018 return 0;
6019
6020 return __xfs_bmap_add(tp, XFS_BMAP_UNMAP, ip, XFS_DATA_FORK, PREV);
6021 }
6022
6023 /*
6024 * Process one of the deferred bmap operations. We pass back the
6025 * btree cursor to maintain our lock on the bmapbt between calls.
6026 */
6027 int
6028 xfs_bmap_finish_one(
6029 struct xfs_trans *tp,
6030 struct xfs_inode *ip,
6031 enum xfs_bmap_intent_type type,
6032 int whichfork,
6033 xfs_fileoff_t startoff,
6034 xfs_fsblock_t startblock,
6035 xfs_filblks_t *blockcount,
6036 xfs_exntst_t state)
6037 {
6038 int error = 0;
6039
6040 ASSERT(tp->t_firstblock == NULLFSBLOCK);
6041
6042 trace_xfs_bmap_deferred(tp->t_mountp,
6043 XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type,
6044 XFS_FSB_TO_AGBNO(tp->t_mountp, startblock),
6045 ip->i_ino, whichfork, startoff, *blockcount, state);
6046
6047 if (WARN_ON_ONCE(whichfork != XFS_DATA_FORK))
6048 return -EFSCORRUPTED;
6049
6050 if (XFS_TEST_ERROR(false, tp->t_mountp,
6051 XFS_ERRTAG_BMAP_FINISH_ONE))
6052 return -EIO;
6053
6054 switch (type) {
6055 case XFS_BMAP_MAP:
6056 error = xfs_bmapi_remap(tp, ip, startoff, *blockcount,
6057 startblock, 0);
6058 *blockcount = 0;
6059 break;
6060 case XFS_BMAP_UNMAP:
6061 error = __xfs_bunmapi(tp, ip, startoff, blockcount,
6062 XFS_BMAPI_REMAP, 1);
6063 break;
6064 default:
6065 ASSERT(0);
6066 error = -EFSCORRUPTED;
6067 }
6068
6069 return error;
6070 }
6071
6072 /* Check that an inode's extent does not have invalid flags or bad ranges. */
6073 xfs_failaddr_t
6074 xfs_bmap_validate_extent(
6075 struct xfs_inode *ip,
6076 int whichfork,
6077 struct xfs_bmbt_irec *irec)
6078 {
6079 struct xfs_mount *mp = ip->i_mount;
6080 xfs_fsblock_t endfsb;
6081 bool isrt;
6082
6083 isrt = XFS_IS_REALTIME_INODE(ip);
6084 endfsb = irec->br_startblock + irec->br_blockcount - 1;
6085 if (isrt) {
6086 if (!xfs_verify_rtbno(mp, irec->br_startblock))
6087 return __this_address;
6088 if (!xfs_verify_rtbno(mp, endfsb))
6089 return __this_address;
6090 } else {
6091 if (!xfs_verify_fsbno(mp, irec->br_startblock))
6092 return __this_address;
6093 if (!xfs_verify_fsbno(mp, endfsb))
6094 return __this_address;
6095 if (XFS_FSB_TO_AGNO(mp, irec->br_startblock) !=
6096 XFS_FSB_TO_AGNO(mp, endfsb))
6097 return __this_address;
6098 }
6099 if (irec->br_state != XFS_EXT_NORM) {
6100 if (whichfork != XFS_DATA_FORK)
6101 return __this_address;
6102 if (!xfs_sb_version_hasextflgbit(&mp->m_sb))
6103 return __this_address;
6104 }
6105 return NULL;
6106 }