]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - libxfs/xfs_bmap.c
xfs: directory scrubber must walk through data block to offset
[thirdparty/xfsprogs-dev.git] / libxfs / xfs_bmap.c
1 /*
2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18 #include "libxfs_priv.h"
19 #include "xfs_fs.h"
20 #include "xfs_shared.h"
21 #include "xfs_format.h"
22 #include "xfs_log_format.h"
23 #include "xfs_trans_resv.h"
24 #include "xfs_bit.h"
25 #include "xfs_sb.h"
26 #include "xfs_mount.h"
27 #include "xfs_defer.h"
28 #include "xfs_da_format.h"
29 #include "xfs_da_btree.h"
30 #include "xfs_dir2.h"
31 #include "xfs_inode.h"
32 #include "xfs_btree.h"
33 #include "xfs_trans.h"
34 #include "xfs_alloc.h"
35 #include "xfs_bmap.h"
36 #include "xfs_bmap_btree.h"
37 #include "xfs_errortag.h"
38 #include "xfs_trans_space.h"
39 #include "xfs_trace.h"
40 #include "xfs_attr_leaf.h"
41 #include "xfs_quota_defs.h"
42 #include "xfs_rmap.h"
43 #include "xfs_ag_resv.h"
44 #include "xfs_refcount.h"
45
46
47 kmem_zone_t *xfs_bmap_free_item_zone;
48
49 /*
50 * Miscellaneous helper functions
51 */
52
53 /*
54 * Compute and fill in the value of the maximum depth of a bmap btree
55 * in this filesystem. Done once, during mount.
56 */
57 void
58 xfs_bmap_compute_maxlevels(
59 xfs_mount_t *mp, /* file system mount structure */
60 int whichfork) /* data or attr fork */
61 {
62 int level; /* btree level */
63 uint maxblocks; /* max blocks at this level */
64 uint maxleafents; /* max leaf entries possible */
65 int maxrootrecs; /* max records in root block */
66 int minleafrecs; /* min records in leaf block */
67 int minnoderecs; /* min records in node block */
68 int sz; /* root block size */
69
70 /*
71 * The maximum number of extents in a file, hence the maximum
72 * number of leaf entries, is controlled by the type of di_nextents
73 * (a signed 32-bit number, xfs_extnum_t), or by di_anextents
74 * (a signed 16-bit number, xfs_aextnum_t).
75 *
76 * Note that we can no longer assume that if we are in ATTR1 that
77 * the fork offset of all the inodes will be
78 * (xfs_default_attroffset(ip) >> 3) because we could have mounted
79 * with ATTR2 and then mounted back with ATTR1, keeping the
80 * di_forkoff's fixed but probably at various positions. Therefore,
81 * for both ATTR1 and ATTR2 we have to assume the worst case scenario
82 * of a minimum size available.
83 */
84 if (whichfork == XFS_DATA_FORK) {
85 maxleafents = MAXEXTNUM;
86 sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
87 } else {
88 maxleafents = MAXAEXTNUM;
89 sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
90 }
91 maxrootrecs = xfs_bmdr_maxrecs(sz, 0);
92 minleafrecs = mp->m_bmap_dmnr[0];
93 minnoderecs = mp->m_bmap_dmnr[1];
94 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
95 for (level = 1; maxblocks > 1; level++) {
96 if (maxblocks <= maxrootrecs)
97 maxblocks = 1;
98 else
99 maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
100 }
101 mp->m_bm_maxlevels[whichfork] = level;
102 }
103
104 STATIC int /* error */
105 xfs_bmbt_lookup_eq(
106 struct xfs_btree_cur *cur,
107 struct xfs_bmbt_irec *irec,
108 int *stat) /* success/failure */
109 {
110 cur->bc_rec.b = *irec;
111 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
112 }
113
114 STATIC int /* error */
115 xfs_bmbt_lookup_first(
116 struct xfs_btree_cur *cur,
117 int *stat) /* success/failure */
118 {
119 cur->bc_rec.b.br_startoff = 0;
120 cur->bc_rec.b.br_startblock = 0;
121 cur->bc_rec.b.br_blockcount = 0;
122 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
123 }
124
125 /*
126 * Check if the inode needs to be converted to btree format.
127 */
128 static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork)
129 {
130 return whichfork != XFS_COW_FORK &&
131 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
132 XFS_IFORK_NEXTENTS(ip, whichfork) >
133 XFS_IFORK_MAXEXT(ip, whichfork);
134 }
135
136 /*
137 * Check if the inode should be converted to extent format.
138 */
139 static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork)
140 {
141 return whichfork != XFS_COW_FORK &&
142 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE &&
143 XFS_IFORK_NEXTENTS(ip, whichfork) <=
144 XFS_IFORK_MAXEXT(ip, whichfork);
145 }
146
147 /*
148 * Update the record referred to by cur to the value given by irec
149 * This either works (return 0) or gets an EFSCORRUPTED error.
150 */
151 STATIC int
152 xfs_bmbt_update(
153 struct xfs_btree_cur *cur,
154 struct xfs_bmbt_irec *irec)
155 {
156 union xfs_btree_rec rec;
157
158 xfs_bmbt_disk_set_all(&rec.bmbt, irec);
159 return xfs_btree_update(cur, &rec);
160 }
161
162 /*
163 * Compute the worst-case number of indirect blocks that will be used
164 * for ip's delayed extent of length "len".
165 */
166 STATIC xfs_filblks_t
167 xfs_bmap_worst_indlen(
168 xfs_inode_t *ip, /* incore inode pointer */
169 xfs_filblks_t len) /* delayed extent length */
170 {
171 int level; /* btree level number */
172 int maxrecs; /* maximum record count at this level */
173 xfs_mount_t *mp; /* mount structure */
174 xfs_filblks_t rval; /* return value */
175
176 mp = ip->i_mount;
177 maxrecs = mp->m_bmap_dmxr[0];
178 for (level = 0, rval = 0;
179 level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
180 level++) {
181 len += maxrecs - 1;
182 do_div(len, maxrecs);
183 rval += len;
184 if (len == 1)
185 return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
186 level - 1;
187 if (level == 0)
188 maxrecs = mp->m_bmap_dmxr[1];
189 }
190 return rval;
191 }
192
193 /*
194 * Calculate the default attribute fork offset for newly created inodes.
195 */
196 uint
197 xfs_default_attroffset(
198 struct xfs_inode *ip)
199 {
200 struct xfs_mount *mp = ip->i_mount;
201 uint offset;
202
203 if (mp->m_sb.sb_inodesize == 256) {
204 offset = XFS_LITINO(mp, ip->i_d.di_version) -
205 XFS_BMDR_SPACE_CALC(MINABTPTRS);
206 } else {
207 offset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
208 }
209
210 ASSERT(offset < XFS_LITINO(mp, ip->i_d.di_version));
211 return offset;
212 }
213
214 /*
215 * Helper routine to reset inode di_forkoff field when switching
216 * attribute fork from local to extent format - we reset it where
217 * possible to make space available for inline data fork extents.
218 */
219 STATIC void
220 xfs_bmap_forkoff_reset(
221 xfs_inode_t *ip,
222 int whichfork)
223 {
224 if (whichfork == XFS_ATTR_FORK &&
225 ip->i_d.di_format != XFS_DINODE_FMT_DEV &&
226 ip->i_d.di_format != XFS_DINODE_FMT_BTREE) {
227 uint dfl_forkoff = xfs_default_attroffset(ip) >> 3;
228
229 if (dfl_forkoff > ip->i_d.di_forkoff)
230 ip->i_d.di_forkoff = dfl_forkoff;
231 }
232 }
233
234 #ifdef DEBUG
235 STATIC struct xfs_buf *
236 xfs_bmap_get_bp(
237 struct xfs_btree_cur *cur,
238 xfs_fsblock_t bno)
239 {
240 struct xfs_log_item_desc *lidp;
241 int i;
242
243 if (!cur)
244 return NULL;
245
246 for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) {
247 if (!cur->bc_bufs[i])
248 break;
249 if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno)
250 return cur->bc_bufs[i];
251 }
252
253 /* Chase down all the log items to see if the bp is there */
254 list_for_each_entry(lidp, &cur->bc_tp->t_items, lid_trans) {
255 struct xfs_buf_log_item *bip;
256 bip = (struct xfs_buf_log_item *)lidp->lid_item;
257 if (bip->bli_item.li_type == XFS_LI_BUF &&
258 XFS_BUF_ADDR(bip->bli_buf) == bno)
259 return bip->bli_buf;
260 }
261
262 return NULL;
263 }
264
265 STATIC void
266 xfs_check_block(
267 struct xfs_btree_block *block,
268 xfs_mount_t *mp,
269 int root,
270 short sz)
271 {
272 int i, j, dmxr;
273 __be64 *pp, *thispa; /* pointer to block address */
274 xfs_bmbt_key_t *prevp, *keyp;
275
276 ASSERT(be16_to_cpu(block->bb_level) > 0);
277
278 prevp = NULL;
279 for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
280 dmxr = mp->m_bmap_dmxr[0];
281 keyp = XFS_BMBT_KEY_ADDR(mp, block, i);
282
283 if (prevp) {
284 ASSERT(be64_to_cpu(prevp->br_startoff) <
285 be64_to_cpu(keyp->br_startoff));
286 }
287 prevp = keyp;
288
289 /*
290 * Compare the block numbers to see if there are dups.
291 */
292 if (root)
293 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz);
294 else
295 pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr);
296
297 for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
298 if (root)
299 thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz);
300 else
301 thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
302 if (*thispa == *pp) {
303 xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld",
304 __func__, j, i,
305 (unsigned long long)be64_to_cpu(*thispa));
306 panic("%s: ptrs are equal in node\n",
307 __func__);
308 }
309 }
310 }
311 }
312
313 /*
314 * Check that the extents for the inode ip are in the right order in all
315 * btree leaves. THis becomes prohibitively expensive for large extent count
316 * files, so don't bother with inodes that have more than 10,000 extents in
317 * them. The btree record ordering checks will still be done, so for such large
318 * bmapbt constructs that is going to catch most corruptions.
319 */
320 STATIC void
321 xfs_bmap_check_leaf_extents(
322 xfs_btree_cur_t *cur, /* btree cursor or null */
323 xfs_inode_t *ip, /* incore inode pointer */
324 int whichfork) /* data or attr fork */
325 {
326 struct xfs_btree_block *block; /* current btree block */
327 xfs_fsblock_t bno; /* block # of "block" */
328 xfs_buf_t *bp; /* buffer for "block" */
329 int error; /* error return value */
330 xfs_extnum_t i=0, j; /* index into the extents list */
331 xfs_ifork_t *ifp; /* fork structure */
332 int level; /* btree level, for checking */
333 xfs_mount_t *mp; /* file system mount structure */
334 __be64 *pp; /* pointer to block address */
335 xfs_bmbt_rec_t *ep; /* pointer to current extent */
336 xfs_bmbt_rec_t last = {0, 0}; /* last extent in prev block */
337 xfs_bmbt_rec_t *nextp; /* pointer to next extent */
338 int bp_release = 0;
339
340 if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) {
341 return;
342 }
343
344 /* skip large extent count inodes */
345 if (ip->i_d.di_nextents > 10000)
346 return;
347
348 bno = NULLFSBLOCK;
349 mp = ip->i_mount;
350 ifp = XFS_IFORK_PTR(ip, whichfork);
351 block = ifp->if_broot;
352 /*
353 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
354 */
355 level = be16_to_cpu(block->bb_level);
356 ASSERT(level > 0);
357 xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
358 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
359 bno = be64_to_cpu(*pp);
360
361 ASSERT(bno != NULLFSBLOCK);
362 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
363 ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
364
365 /*
366 * Go down the tree until leaf level is reached, following the first
367 * pointer (leftmost) at each level.
368 */
369 while (level-- > 0) {
370 /* See if buf is in cur first */
371 bp_release = 0;
372 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
373 if (!bp) {
374 bp_release = 1;
375 error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
376 XFS_BMAP_BTREE_REF,
377 &xfs_bmbt_buf_ops);
378 if (error)
379 goto error_norelse;
380 }
381 block = XFS_BUF_TO_BLOCK(bp);
382 if (level == 0)
383 break;
384
385 /*
386 * Check this block for basic sanity (increasing keys and
387 * no duplicate blocks).
388 */
389
390 xfs_check_block(block, mp, 0, 0);
391 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
392 bno = be64_to_cpu(*pp);
393 XFS_WANT_CORRUPTED_GOTO(mp,
394 xfs_verify_fsbno(mp, bno), error0);
395 if (bp_release) {
396 bp_release = 0;
397 xfs_trans_brelse(NULL, bp);
398 }
399 }
400
401 /*
402 * Here with bp and block set to the leftmost leaf node in the tree.
403 */
404 i = 0;
405
406 /*
407 * Loop over all leaf nodes checking that all extents are in the right order.
408 */
409 for (;;) {
410 xfs_fsblock_t nextbno;
411 xfs_extnum_t num_recs;
412
413
414 num_recs = xfs_btree_get_numrecs(block);
415
416 /*
417 * Read-ahead the next leaf block, if any.
418 */
419
420 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
421
422 /*
423 * Check all the extents to make sure they are OK.
424 * If we had a previous block, the last entry should
425 * conform with the first entry in this one.
426 */
427
428 ep = XFS_BMBT_REC_ADDR(mp, block, 1);
429 if (i) {
430 ASSERT(xfs_bmbt_disk_get_startoff(&last) +
431 xfs_bmbt_disk_get_blockcount(&last) <=
432 xfs_bmbt_disk_get_startoff(ep));
433 }
434 for (j = 1; j < num_recs; j++) {
435 nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1);
436 ASSERT(xfs_bmbt_disk_get_startoff(ep) +
437 xfs_bmbt_disk_get_blockcount(ep) <=
438 xfs_bmbt_disk_get_startoff(nextp));
439 ep = nextp;
440 }
441
442 last = *ep;
443 i += num_recs;
444 if (bp_release) {
445 bp_release = 0;
446 xfs_trans_brelse(NULL, bp);
447 }
448 bno = nextbno;
449 /*
450 * If we've reached the end, stop.
451 */
452 if (bno == NULLFSBLOCK)
453 break;
454
455 bp_release = 0;
456 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
457 if (!bp) {
458 bp_release = 1;
459 error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
460 XFS_BMAP_BTREE_REF,
461 &xfs_bmbt_buf_ops);
462 if (error)
463 goto error_norelse;
464 }
465 block = XFS_BUF_TO_BLOCK(bp);
466 }
467
468 return;
469
470 error0:
471 xfs_warn(mp, "%s: at error0", __func__);
472 if (bp_release)
473 xfs_trans_brelse(NULL, bp);
474 error_norelse:
475 xfs_warn(mp, "%s: BAD after btree leaves for %d extents",
476 __func__, i);
477 panic("%s: CORRUPTED BTREE OR SOMETHING", __func__);
478 return;
479 }
480
481 /*
482 * Validate that the bmbt_irecs being returned from bmapi are valid
483 * given the caller's original parameters. Specifically check the
484 * ranges of the returned irecs to ensure that they only extend beyond
485 * the given parameters if the XFS_BMAPI_ENTIRE flag was set.
486 */
487 STATIC void
488 xfs_bmap_validate_ret(
489 xfs_fileoff_t bno,
490 xfs_filblks_t len,
491 int flags,
492 xfs_bmbt_irec_t *mval,
493 int nmap,
494 int ret_nmap)
495 {
496 int i; /* index to map values */
497
498 ASSERT(ret_nmap <= nmap);
499
500 for (i = 0; i < ret_nmap; i++) {
501 ASSERT(mval[i].br_blockcount > 0);
502 if (!(flags & XFS_BMAPI_ENTIRE)) {
503 ASSERT(mval[i].br_startoff >= bno);
504 ASSERT(mval[i].br_blockcount <= len);
505 ASSERT(mval[i].br_startoff + mval[i].br_blockcount <=
506 bno + len);
507 } else {
508 ASSERT(mval[i].br_startoff < bno + len);
509 ASSERT(mval[i].br_startoff + mval[i].br_blockcount >
510 bno);
511 }
512 ASSERT(i == 0 ||
513 mval[i - 1].br_startoff + mval[i - 1].br_blockcount ==
514 mval[i].br_startoff);
515 ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK &&
516 mval[i].br_startblock != HOLESTARTBLOCK);
517 ASSERT(mval[i].br_state == XFS_EXT_NORM ||
518 mval[i].br_state == XFS_EXT_UNWRITTEN);
519 }
520 }
521
522 #else
523 #define xfs_bmap_check_leaf_extents(cur, ip, whichfork) do { } while (0)
524 #define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap) do { } while (0)
525 #endif /* DEBUG */
526
527 /*
528 * bmap free list manipulation functions
529 */
530
531 /*
532 * Add the extent to the list of extents to be free at transaction end.
533 * The list is maintained sorted (by block number).
534 */
535 void
536 xfs_bmap_add_free(
537 struct xfs_mount *mp,
538 struct xfs_defer_ops *dfops,
539 xfs_fsblock_t bno,
540 xfs_filblks_t len,
541 struct xfs_owner_info *oinfo)
542 {
543 struct xfs_extent_free_item *new; /* new element */
544 #ifdef DEBUG
545 xfs_agnumber_t agno;
546 xfs_agblock_t agbno;
547
548 ASSERT(bno != NULLFSBLOCK);
549 ASSERT(len > 0);
550 ASSERT(len <= MAXEXTLEN);
551 ASSERT(!isnullstartblock(bno));
552 agno = XFS_FSB_TO_AGNO(mp, bno);
553 agbno = XFS_FSB_TO_AGBNO(mp, bno);
554 ASSERT(agno < mp->m_sb.sb_agcount);
555 ASSERT(agbno < mp->m_sb.sb_agblocks);
556 ASSERT(len < mp->m_sb.sb_agblocks);
557 ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
558 #endif
559 ASSERT(xfs_bmap_free_item_zone != NULL);
560
561 new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP);
562 new->xefi_startblock = bno;
563 new->xefi_blockcount = (xfs_extlen_t)len;
564 if (oinfo)
565 new->xefi_oinfo = *oinfo;
566 else
567 xfs_rmap_skip_owner_update(&new->xefi_oinfo);
568 trace_xfs_bmap_free_defer(mp, XFS_FSB_TO_AGNO(mp, bno), 0,
569 XFS_FSB_TO_AGBNO(mp, bno), len);
570 xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list);
571 }
572
573 /*
574 * Inode fork format manipulation functions
575 */
576
577 /*
578 * Transform a btree format file with only one leaf node, where the
579 * extents list will fit in the inode, into an extents format file.
580 * Since the file extents are already in-core, all we have to do is
581 * give up the space for the btree root and pitch the leaf block.
582 */
583 STATIC int /* error */
584 xfs_bmap_btree_to_extents(
585 xfs_trans_t *tp, /* transaction pointer */
586 xfs_inode_t *ip, /* incore inode pointer */
587 xfs_btree_cur_t *cur, /* btree cursor */
588 int *logflagsp, /* inode logging flags */
589 int whichfork) /* data or attr fork */
590 {
591 /* REFERENCED */
592 struct xfs_btree_block *cblock;/* child btree block */
593 xfs_fsblock_t cbno; /* child block number */
594 xfs_buf_t *cbp; /* child block's buffer */
595 int error; /* error return value */
596 xfs_ifork_t *ifp; /* inode fork data */
597 xfs_mount_t *mp; /* mount point structure */
598 __be64 *pp; /* ptr to block address */
599 struct xfs_btree_block *rblock;/* root btree block */
600 struct xfs_owner_info oinfo;
601
602 mp = ip->i_mount;
603 ifp = XFS_IFORK_PTR(ip, whichfork);
604 ASSERT(whichfork != XFS_COW_FORK);
605 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
606 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
607 rblock = ifp->if_broot;
608 ASSERT(be16_to_cpu(rblock->bb_level) == 1);
609 ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
610 ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
611 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
612 cbno = be64_to_cpu(*pp);
613 *logflagsp = 0;
614 #ifdef DEBUG
615 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
616 xfs_btree_check_lptr(cur, cbno, 1));
617 #endif
618 error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF,
619 &xfs_bmbt_buf_ops);
620 if (error)
621 return error;
622 cblock = XFS_BUF_TO_BLOCK(cbp);
623 if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
624 return error;
625 xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
626 xfs_bmap_add_free(mp, cur->bc_private.b.dfops, cbno, 1, &oinfo);
627 ip->i_d.di_nblocks--;
628 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
629 xfs_trans_binval(tp, cbp);
630 if (cur->bc_bufs[0] == cbp)
631 cur->bc_bufs[0] = NULL;
632 xfs_iroot_realloc(ip, -1, whichfork);
633 ASSERT(ifp->if_broot == NULL);
634 ASSERT((ifp->if_flags & XFS_IFBROOT) == 0);
635 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
636 *logflagsp = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
637 return 0;
638 }
639
640 /*
641 * Convert an extents-format file into a btree-format file.
642 * The new file will have a root block (in the inode) and a single child block.
643 */
644 STATIC int /* error */
645 xfs_bmap_extents_to_btree(
646 xfs_trans_t *tp, /* transaction pointer */
647 xfs_inode_t *ip, /* incore inode pointer */
648 xfs_fsblock_t *firstblock, /* first-block-allocated */
649 struct xfs_defer_ops *dfops, /* blocks freed in xaction */
650 xfs_btree_cur_t **curp, /* cursor returned to caller */
651 int wasdel, /* converting a delayed alloc */
652 int *logflagsp, /* inode logging flags */
653 int whichfork) /* data or attr fork */
654 {
655 struct xfs_btree_block *ablock; /* allocated (child) bt block */
656 xfs_buf_t *abp; /* buffer for ablock */
657 xfs_alloc_arg_t args; /* allocation arguments */
658 xfs_bmbt_rec_t *arp; /* child record pointer */
659 struct xfs_btree_block *block; /* btree root block */
660 xfs_btree_cur_t *cur; /* bmap btree cursor */
661 int error; /* error return value */
662 xfs_ifork_t *ifp; /* inode fork pointer */
663 xfs_bmbt_key_t *kp; /* root block key pointer */
664 xfs_mount_t *mp; /* mount structure */
665 xfs_bmbt_ptr_t *pp; /* root block address pointer */
666 struct xfs_iext_cursor icur;
667 struct xfs_bmbt_irec rec;
668 xfs_extnum_t cnt = 0;
669
670 mp = ip->i_mount;
671 ASSERT(whichfork != XFS_COW_FORK);
672 ifp = XFS_IFORK_PTR(ip, whichfork);
673 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS);
674
675 /*
676 * Make space in the inode incore.
677 */
678 xfs_iroot_realloc(ip, 1, whichfork);
679 ifp->if_flags |= XFS_IFBROOT;
680
681 /*
682 * Fill in the root.
683 */
684 block = ifp->if_broot;
685 xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
686 XFS_BTNUM_BMAP, 1, 1, ip->i_ino,
687 XFS_BTREE_LONG_PTRS);
688 /*
689 * Need a cursor. Can't allocate until bb_level is filled in.
690 */
691 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
692 cur->bc_private.b.firstblock = *firstblock;
693 cur->bc_private.b.dfops = dfops;
694 cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
695 /*
696 * Convert to a btree with two levels, one record in root.
697 */
698 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
699 memset(&args, 0, sizeof(args));
700 args.tp = tp;
701 args.mp = mp;
702 xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, whichfork);
703 args.firstblock = *firstblock;
704 if (*firstblock == NULLFSBLOCK) {
705 args.type = XFS_ALLOCTYPE_START_BNO;
706 args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
707 } else if (dfops->dop_low) {
708 args.type = XFS_ALLOCTYPE_START_BNO;
709 args.fsbno = *firstblock;
710 } else {
711 args.type = XFS_ALLOCTYPE_NEAR_BNO;
712 args.fsbno = *firstblock;
713 }
714 args.minlen = args.maxlen = args.prod = 1;
715 args.wasdel = wasdel;
716 *logflagsp = 0;
717 if ((error = xfs_alloc_vextent(&args))) {
718 xfs_iroot_realloc(ip, -1, whichfork);
719 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
720 return error;
721 }
722
723 if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
724 xfs_iroot_realloc(ip, -1, whichfork);
725 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
726 return -ENOSPC;
727 }
728 /*
729 * Allocation can't fail, the space was reserved.
730 */
731 ASSERT(*firstblock == NULLFSBLOCK ||
732 args.agno >= XFS_FSB_TO_AGNO(mp, *firstblock));
733 *firstblock = cur->bc_private.b.firstblock = args.fsbno;
734 cur->bc_private.b.allocated++;
735 ip->i_d.di_nblocks++;
736 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
737 abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0);
738 /*
739 * Fill in the child block.
740 */
741 abp->b_ops = &xfs_bmbt_buf_ops;
742 ablock = XFS_BUF_TO_BLOCK(abp);
743 xfs_btree_init_block_int(mp, ablock, abp->b_bn,
744 XFS_BTNUM_BMAP, 0, 0, ip->i_ino,
745 XFS_BTREE_LONG_PTRS);
746
747 for_each_xfs_iext(ifp, &icur, &rec) {
748 if (isnullstartblock(rec.br_startblock))
749 continue;
750 arp = XFS_BMBT_REC_ADDR(mp, ablock, 1 + cnt);
751 xfs_bmbt_disk_set_all(arp, &rec);
752 cnt++;
753 }
754 ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork));
755 xfs_btree_set_numrecs(ablock, cnt);
756
757 /*
758 * Fill in the root key and pointer.
759 */
760 kp = XFS_BMBT_KEY_ADDR(mp, block, 1);
761 arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
762 kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
763 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
764 be16_to_cpu(block->bb_level)));
765 *pp = cpu_to_be64(args.fsbno);
766
767 /*
768 * Do all this logging at the end so that
769 * the root is at the right level.
770 */
771 xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);
772 xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
773 ASSERT(*curp == NULL);
774 *curp = cur;
775 *logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork);
776 return 0;
777 }
778
779 /*
780 * Convert a local file to an extents file.
781 * This code is out of bounds for data forks of regular files,
782 * since the file data needs to get logged so things will stay consistent.
783 * (The bmap-level manipulations are ok, though).
784 */
785 void
786 xfs_bmap_local_to_extents_empty(
787 struct xfs_inode *ip,
788 int whichfork)
789 {
790 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
791
792 ASSERT(whichfork != XFS_COW_FORK);
793 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
794 ASSERT(ifp->if_bytes == 0);
795 ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0);
796
797 xfs_bmap_forkoff_reset(ip, whichfork);
798 ifp->if_flags &= ~XFS_IFINLINE;
799 ifp->if_flags |= XFS_IFEXTENTS;
800 ifp->if_u1.if_root = NULL;
801 ifp->if_height = 0;
802 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
803 }
804
805
806 STATIC int /* error */
807 xfs_bmap_local_to_extents(
808 xfs_trans_t *tp, /* transaction pointer */
809 xfs_inode_t *ip, /* incore inode pointer */
810 xfs_fsblock_t *firstblock, /* first block allocated in xaction */
811 xfs_extlen_t total, /* total blocks needed by transaction */
812 int *logflagsp, /* inode logging flags */
813 int whichfork,
814 void (*init_fn)(struct xfs_trans *tp,
815 struct xfs_buf *bp,
816 struct xfs_inode *ip,
817 struct xfs_ifork *ifp))
818 {
819 int error = 0;
820 int flags; /* logging flags returned */
821 xfs_ifork_t *ifp; /* inode fork pointer */
822 xfs_alloc_arg_t args; /* allocation arguments */
823 xfs_buf_t *bp; /* buffer for extent block */
824 struct xfs_bmbt_irec rec;
825 struct xfs_iext_cursor icur;
826
827 /*
828 * We don't want to deal with the case of keeping inode data inline yet.
829 * So sending the data fork of a regular inode is invalid.
830 */
831 ASSERT(!(S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK));
832 ifp = XFS_IFORK_PTR(ip, whichfork);
833 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
834
835 if (!ifp->if_bytes) {
836 xfs_bmap_local_to_extents_empty(ip, whichfork);
837 flags = XFS_ILOG_CORE;
838 goto done;
839 }
840
841 flags = 0;
842 error = 0;
843 ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS)) == XFS_IFINLINE);
844 memset(&args, 0, sizeof(args));
845 args.tp = tp;
846 args.mp = ip->i_mount;
847 xfs_rmap_ino_owner(&args.oinfo, ip->i_ino, whichfork, 0);
848 args.firstblock = *firstblock;
849 /*
850 * Allocate a block. We know we need only one, since the
851 * file currently fits in an inode.
852 */
853 if (*firstblock == NULLFSBLOCK) {
854 args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino);
855 args.type = XFS_ALLOCTYPE_START_BNO;
856 } else {
857 args.fsbno = *firstblock;
858 args.type = XFS_ALLOCTYPE_NEAR_BNO;
859 }
860 args.total = total;
861 args.minlen = args.maxlen = args.prod = 1;
862 error = xfs_alloc_vextent(&args);
863 if (error)
864 goto done;
865
866 /* Can't fail, the space was reserved. */
867 ASSERT(args.fsbno != NULLFSBLOCK);
868 ASSERT(args.len == 1);
869 *firstblock = args.fsbno;
870 bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
871
872 /*
873 * Initialize the block, copy the data and log the remote buffer.
874 *
875 * The callout is responsible for logging because the remote format
876 * might differ from the local format and thus we don't know how much to
877 * log here. Note that init_fn must also set the buffer log item type
878 * correctly.
879 */
880 init_fn(tp, bp, ip, ifp);
881
882 /* account for the change in fork size */
883 xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
884 xfs_bmap_local_to_extents_empty(ip, whichfork);
885 flags |= XFS_ILOG_CORE;
886
887 ifp->if_u1.if_root = NULL;
888 ifp->if_height = 0;
889
890 rec.br_startoff = 0;
891 rec.br_startblock = args.fsbno;
892 rec.br_blockcount = 1;
893 rec.br_state = XFS_EXT_NORM;
894 xfs_iext_first(ifp, &icur);
895 xfs_iext_insert(ip, &icur, &rec, 0);
896
897 XFS_IFORK_NEXT_SET(ip, whichfork, 1);
898 ip->i_d.di_nblocks = 1;
899 xfs_trans_mod_dquot_byino(tp, ip,
900 XFS_TRANS_DQ_BCOUNT, 1L);
901 flags |= xfs_ilog_fext(whichfork);
902
903 done:
904 *logflagsp = flags;
905 return error;
906 }
907
908 /*
909 * Called from xfs_bmap_add_attrfork to handle btree format files.
910 */
911 STATIC int /* error */
912 xfs_bmap_add_attrfork_btree(
913 xfs_trans_t *tp, /* transaction pointer */
914 xfs_inode_t *ip, /* incore inode pointer */
915 xfs_fsblock_t *firstblock, /* first block allocated */
916 struct xfs_defer_ops *dfops, /* blocks to free at commit */
917 int *flags) /* inode logging flags */
918 {
919 xfs_btree_cur_t *cur; /* btree cursor */
920 int error; /* error return value */
921 xfs_mount_t *mp; /* file system mount struct */
922 int stat; /* newroot status */
923
924 mp = ip->i_mount;
925 if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip))
926 *flags |= XFS_ILOG_DBROOT;
927 else {
928 cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
929 cur->bc_private.b.dfops = dfops;
930 cur->bc_private.b.firstblock = *firstblock;
931 error = xfs_bmbt_lookup_first(cur, &stat);
932 if (error)
933 goto error0;
934 /* must be at least one entry */
935 XFS_WANT_CORRUPTED_GOTO(mp, stat == 1, error0);
936 if ((error = xfs_btree_new_iroot(cur, flags, &stat)))
937 goto error0;
938 if (stat == 0) {
939 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
940 return -ENOSPC;
941 }
942 *firstblock = cur->bc_private.b.firstblock;
943 cur->bc_private.b.allocated = 0;
944 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
945 }
946 return 0;
947 error0:
948 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
949 return error;
950 }
951
952 /*
953 * Called from xfs_bmap_add_attrfork to handle extents format files.
954 */
955 STATIC int /* error */
956 xfs_bmap_add_attrfork_extents(
957 xfs_trans_t *tp, /* transaction pointer */
958 xfs_inode_t *ip, /* incore inode pointer */
959 xfs_fsblock_t *firstblock, /* first block allocated */
960 struct xfs_defer_ops *dfops, /* blocks to free at commit */
961 int *flags) /* inode logging flags */
962 {
963 xfs_btree_cur_t *cur; /* bmap btree cursor */
964 int error; /* error return value */
965
966 if (ip->i_d.di_nextents * sizeof(xfs_bmbt_rec_t) <= XFS_IFORK_DSIZE(ip))
967 return 0;
968 cur = NULL;
969 error = xfs_bmap_extents_to_btree(tp, ip, firstblock, dfops, &cur, 0,
970 flags, XFS_DATA_FORK);
971 if (cur) {
972 cur->bc_private.b.allocated = 0;
973 xfs_btree_del_cursor(cur,
974 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
975 }
976 return error;
977 }
978
979 /*
980 * Called from xfs_bmap_add_attrfork to handle local format files. Each
981 * different data fork content type needs a different callout to do the
982 * conversion. Some are basic and only require special block initialisation
983 * callouts for the data formating, others (directories) are so specialised they
984 * handle everything themselves.
985 *
986 * XXX (dgc): investigate whether directory conversion can use the generic
987 * formatting callout. It should be possible - it's just a very complex
988 * formatter.
989 */
990 STATIC int /* error */
991 xfs_bmap_add_attrfork_local(
992 xfs_trans_t *tp, /* transaction pointer */
993 xfs_inode_t *ip, /* incore inode pointer */
994 xfs_fsblock_t *firstblock, /* first block allocated */
995 struct xfs_defer_ops *dfops, /* blocks to free at commit */
996 int *flags) /* inode logging flags */
997 {
998 xfs_da_args_t dargs; /* args for dir/attr code */
999
1000 if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
1001 return 0;
1002
1003 if (S_ISDIR(VFS_I(ip)->i_mode)) {
1004 memset(&dargs, 0, sizeof(dargs));
1005 dargs.geo = ip->i_mount->m_dir_geo;
1006 dargs.dp = ip;
1007 dargs.firstblock = firstblock;
1008 dargs.dfops = dfops;
1009 dargs.total = dargs.geo->fsbcount;
1010 dargs.whichfork = XFS_DATA_FORK;
1011 dargs.trans = tp;
1012 return xfs_dir2_sf_to_block(&dargs);
1013 }
1014
1015 if (S_ISLNK(VFS_I(ip)->i_mode))
1016 return xfs_bmap_local_to_extents(tp, ip, firstblock, 1,
1017 flags, XFS_DATA_FORK,
1018 xfs_symlink_local_to_remote);
1019
1020 /* should only be called for types that support local format data */
1021 ASSERT(0);
1022 return -EFSCORRUPTED;
1023 }
1024
1025 /*
1026 * Convert inode from non-attributed to attributed.
1027 * Must not be in a transaction, ip must not be locked.
1028 */
1029 int /* error code */
1030 xfs_bmap_add_attrfork(
1031 xfs_inode_t *ip, /* incore inode pointer */
1032 int size, /* space new attribute needs */
1033 int rsvd) /* xact may use reserved blks */
1034 {
1035 xfs_fsblock_t firstblock; /* 1st block/ag allocated */
1036 struct xfs_defer_ops dfops; /* freed extent records */
1037 xfs_mount_t *mp; /* mount structure */
1038 xfs_trans_t *tp; /* transaction pointer */
1039 int blks; /* space reservation */
1040 int version = 1; /* superblock attr version */
1041 int logflags; /* logging flags */
1042 int error; /* error return value */
1043
1044 ASSERT(XFS_IFORK_Q(ip) == 0);
1045
1046 mp = ip->i_mount;
1047 ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
1048
1049 blks = XFS_ADDAFORK_SPACE_RES(mp);
1050
1051 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_addafork, blks, 0,
1052 rsvd ? XFS_TRANS_RESERVE : 0, &tp);
1053 if (error)
1054 return error;
1055
1056 xfs_ilock(ip, XFS_ILOCK_EXCL);
1057 error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
1058 XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
1059 XFS_QMOPT_RES_REGBLKS);
1060 if (error)
1061 goto trans_cancel;
1062 if (XFS_IFORK_Q(ip))
1063 goto trans_cancel;
1064 if (ip->i_d.di_anextents != 0) {
1065 error = -EFSCORRUPTED;
1066 goto trans_cancel;
1067 }
1068 if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) {
1069 /*
1070 * For inodes coming from pre-6.2 filesystems.
1071 */
1072 ASSERT(ip->i_d.di_aformat == 0);
1073 ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
1074 }
1075
1076 xfs_trans_ijoin(tp, ip, 0);
1077 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1078
1079 switch (ip->i_d.di_format) {
1080 case XFS_DINODE_FMT_DEV:
1081 ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
1082 break;
1083 case XFS_DINODE_FMT_LOCAL:
1084 case XFS_DINODE_FMT_EXTENTS:
1085 case XFS_DINODE_FMT_BTREE:
1086 ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
1087 if (!ip->i_d.di_forkoff)
1088 ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3;
1089 else if (mp->m_flags & XFS_MOUNT_ATTR2)
1090 version = 2;
1091 break;
1092 default:
1093 ASSERT(0);
1094 error = -EINVAL;
1095 goto trans_cancel;
1096 }
1097
1098 ASSERT(ip->i_afp == NULL);
1099 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
1100 ip->i_afp->if_flags = XFS_IFEXTENTS;
1101 logflags = 0;
1102 xfs_defer_init(&dfops, &firstblock);
1103 switch (ip->i_d.di_format) {
1104 case XFS_DINODE_FMT_LOCAL:
1105 error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock, &dfops,
1106 &logflags);
1107 break;
1108 case XFS_DINODE_FMT_EXTENTS:
1109 error = xfs_bmap_add_attrfork_extents(tp, ip, &firstblock,
1110 &dfops, &logflags);
1111 break;
1112 case XFS_DINODE_FMT_BTREE:
1113 error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock, &dfops,
1114 &logflags);
1115 break;
1116 default:
1117 error = 0;
1118 break;
1119 }
1120 if (logflags)
1121 xfs_trans_log_inode(tp, ip, logflags);
1122 if (error)
1123 goto bmap_cancel;
1124 if (!xfs_sb_version_hasattr(&mp->m_sb) ||
1125 (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) {
1126 bool log_sb = false;
1127
1128 spin_lock(&mp->m_sb_lock);
1129 if (!xfs_sb_version_hasattr(&mp->m_sb)) {
1130 xfs_sb_version_addattr(&mp->m_sb);
1131 log_sb = true;
1132 }
1133 if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) {
1134 xfs_sb_version_addattr2(&mp->m_sb);
1135 log_sb = true;
1136 }
1137 spin_unlock(&mp->m_sb_lock);
1138 if (log_sb)
1139 xfs_log_sb(tp);
1140 }
1141
1142 error = xfs_defer_finish(&tp, &dfops);
1143 if (error)
1144 goto bmap_cancel;
1145 error = xfs_trans_commit(tp);
1146 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1147 return error;
1148
1149 bmap_cancel:
1150 xfs_defer_cancel(&dfops);
1151 trans_cancel:
1152 xfs_trans_cancel(tp);
1153 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1154 return error;
1155 }
1156
1157 /*
1158 * Internal and external extent tree search functions.
1159 */
1160
1161 /*
1162 * Read in extents from a btree-format inode.
1163 */
1164 int
1165 xfs_iread_extents(
1166 struct xfs_trans *tp,
1167 struct xfs_inode *ip,
1168 int whichfork)
1169 {
1170 struct xfs_mount *mp = ip->i_mount;
1171 int state = xfs_bmap_fork_to_state(whichfork);
1172 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
1173 xfs_extnum_t nextents = XFS_IFORK_NEXTENTS(ip, whichfork);
1174 struct xfs_btree_block *block = ifp->if_broot;
1175 struct xfs_iext_cursor icur;
1176 struct xfs_bmbt_irec new;
1177 xfs_fsblock_t bno;
1178 struct xfs_buf *bp;
1179 xfs_extnum_t i, j;
1180 int level;
1181 __be64 *pp;
1182 int error;
1183
1184 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1185
1186 if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
1187 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
1188 return -EFSCORRUPTED;
1189 }
1190
1191 /*
1192 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
1193 */
1194 level = be16_to_cpu(block->bb_level);
1195 ASSERT(level > 0);
1196 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
1197 bno = be64_to_cpu(*pp);
1198
1199 /*
1200 * Go down the tree until leaf level is reached, following the first
1201 * pointer (leftmost) at each level.
1202 */
1203 while (level-- > 0) {
1204 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
1205 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
1206 if (error)
1207 goto out;
1208 block = XFS_BUF_TO_BLOCK(bp);
1209 if (level == 0)
1210 break;
1211 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
1212 bno = be64_to_cpu(*pp);
1213 XFS_WANT_CORRUPTED_GOTO(mp,
1214 xfs_verify_fsbno(mp, bno), out_brelse);
1215 xfs_trans_brelse(tp, bp);
1216 }
1217
1218 /*
1219 * Here with bp and block set to the leftmost leaf node in the tree.
1220 */
1221 i = 0;
1222 xfs_iext_first(ifp, &icur);
1223
1224 /*
1225 * Loop over all leaf nodes. Copy information to the extent records.
1226 */
1227 for (;;) {
1228 xfs_bmbt_rec_t *frp;
1229 xfs_fsblock_t nextbno;
1230 xfs_extnum_t num_recs;
1231
1232 num_recs = xfs_btree_get_numrecs(block);
1233 if (unlikely(i + num_recs > nextents)) {
1234 ASSERT(i + num_recs <= nextents);
1235 xfs_warn(ip->i_mount,
1236 "corrupt dinode %Lu, (btree extents).",
1237 (unsigned long long) ip->i_ino);
1238 XFS_CORRUPTION_ERROR(__func__,
1239 XFS_ERRLEVEL_LOW, ip->i_mount, block);
1240 error = -EFSCORRUPTED;
1241 goto out_brelse;
1242 }
1243 /*
1244 * Read-ahead the next leaf block, if any.
1245 */
1246 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
1247 if (nextbno != NULLFSBLOCK)
1248 xfs_btree_reada_bufl(mp, nextbno, 1,
1249 &xfs_bmbt_buf_ops);
1250 /*
1251 * Copy records into the extent records.
1252 */
1253 frp = XFS_BMBT_REC_ADDR(mp, block, 1);
1254 for (j = 0; j < num_recs; j++, frp++, i++) {
1255 xfs_bmbt_disk_get_all(frp, &new);
1256 if (!xfs_bmbt_validate_extent(mp, whichfork, &new)) {
1257 XFS_ERROR_REPORT("xfs_bmap_read_extents(2)",
1258 XFS_ERRLEVEL_LOW, mp);
1259 error = -EFSCORRUPTED;
1260 goto out_brelse;
1261 }
1262 xfs_iext_insert(ip, &icur, &new, state);
1263 trace_xfs_read_extent(ip, &icur, state, _THIS_IP_);
1264 xfs_iext_next(ifp, &icur);
1265 }
1266 xfs_trans_brelse(tp, bp);
1267 bno = nextbno;
1268 /*
1269 * If we've reached the end, stop.
1270 */
1271 if (bno == NULLFSBLOCK)
1272 break;
1273 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
1274 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
1275 if (error)
1276 goto out;
1277 block = XFS_BUF_TO_BLOCK(bp);
1278 }
1279
1280 if (i != XFS_IFORK_NEXTENTS(ip, whichfork)) {
1281 error = -EFSCORRUPTED;
1282 goto out;
1283 }
1284 ASSERT(i == xfs_iext_count(ifp));
1285
1286 ifp->if_flags |= XFS_IFEXTENTS;
1287 return 0;
1288
1289 out_brelse:
1290 xfs_trans_brelse(tp, bp);
1291 out:
1292 xfs_iext_destroy(ifp);
1293 return error;
1294 }
1295
1296 /*
1297 * Returns the relative block number of the first unused block(s) in the given
1298 * fork with at least "len" logically contiguous blocks free. This is the
1299 * lowest-address hole if the fork has holes, else the first block past the end
1300 * of fork. Return 0 if the fork is currently local (in-inode).
1301 */
1302 int /* error */
1303 xfs_bmap_first_unused(
1304 struct xfs_trans *tp, /* transaction pointer */
1305 struct xfs_inode *ip, /* incore inode */
1306 xfs_extlen_t len, /* size of hole to find */
1307 xfs_fileoff_t *first_unused, /* unused block */
1308 int whichfork) /* data or attr fork */
1309 {
1310 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
1311 struct xfs_bmbt_irec got;
1312 struct xfs_iext_cursor icur;
1313 xfs_fileoff_t lastaddr = 0;
1314 xfs_fileoff_t lowest, max;
1315 int error;
1316
1317 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE ||
1318 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ||
1319 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
1320
1321 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
1322 *first_unused = 0;
1323 return 0;
1324 }
1325
1326 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1327 error = xfs_iread_extents(tp, ip, whichfork);
1328 if (error)
1329 return error;
1330 }
1331
1332 lowest = max = *first_unused;
1333 for_each_xfs_iext(ifp, &icur, &got) {
1334 /*
1335 * See if the hole before this extent will work.
1336 */
1337 if (got.br_startoff >= lowest + len &&
1338 got.br_startoff - max >= len)
1339 break;
1340 lastaddr = got.br_startoff + got.br_blockcount;
1341 max = XFS_FILEOFF_MAX(lastaddr, lowest);
1342 }
1343
1344 *first_unused = max;
1345 return 0;
1346 }
1347
1348 /*
1349 * Returns the file-relative block number of the last block - 1 before
1350 * last_block (input value) in the file.
1351 * This is not based on i_size, it is based on the extent records.
1352 * Returns 0 for local files, as they do not have extent records.
1353 */
1354 int /* error */
1355 xfs_bmap_last_before(
1356 struct xfs_trans *tp, /* transaction pointer */
1357 struct xfs_inode *ip, /* incore inode */
1358 xfs_fileoff_t *last_block, /* last block */
1359 int whichfork) /* data or attr fork */
1360 {
1361 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
1362 struct xfs_bmbt_irec got;
1363 struct xfs_iext_cursor icur;
1364 int error;
1365
1366 switch (XFS_IFORK_FORMAT(ip, whichfork)) {
1367 case XFS_DINODE_FMT_LOCAL:
1368 *last_block = 0;
1369 return 0;
1370 case XFS_DINODE_FMT_BTREE:
1371 case XFS_DINODE_FMT_EXTENTS:
1372 break;
1373 default:
1374 return -EIO;
1375 }
1376
1377 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1378 error = xfs_iread_extents(tp, ip, whichfork);
1379 if (error)
1380 return error;
1381 }
1382
1383 if (!xfs_iext_lookup_extent_before(ip, ifp, last_block, &icur, &got))
1384 *last_block = 0;
1385 return 0;
1386 }
1387
1388 int
1389 xfs_bmap_last_extent(
1390 struct xfs_trans *tp,
1391 struct xfs_inode *ip,
1392 int whichfork,
1393 struct xfs_bmbt_irec *rec,
1394 int *is_empty)
1395 {
1396 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
1397 struct xfs_iext_cursor icur;
1398 int error;
1399
1400 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1401 error = xfs_iread_extents(tp, ip, whichfork);
1402 if (error)
1403 return error;
1404 }
1405
1406 xfs_iext_last(ifp, &icur);
1407 if (!xfs_iext_get_extent(ifp, &icur, rec))
1408 *is_empty = 1;
1409 else
1410 *is_empty = 0;
1411 return 0;
1412 }
1413
1414 /*
1415 * Check the last inode extent to determine whether this allocation will result
1416 * in blocks being allocated at the end of the file. When we allocate new data
1417 * blocks at the end of the file which do not start at the previous data block,
1418 * we will try to align the new blocks at stripe unit boundaries.
1419 *
1420 * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be
1421 * at, or past the EOF.
1422 */
1423 STATIC int
1424 xfs_bmap_isaeof(
1425 struct xfs_bmalloca *bma,
1426 int whichfork)
1427 {
1428 struct xfs_bmbt_irec rec;
1429 int is_empty;
1430 int error;
1431
1432 bma->aeof = false;
1433 error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
1434 &is_empty);
1435 if (error)
1436 return error;
1437
1438 if (is_empty) {
1439 bma->aeof = true;
1440 return 0;
1441 }
1442
1443 /*
1444 * Check if we are allocation or past the last extent, or at least into
1445 * the last delayed allocated extent.
1446 */
1447 bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount ||
1448 (bma->offset >= rec.br_startoff &&
1449 isnullstartblock(rec.br_startblock));
1450 return 0;
1451 }
1452
1453 /*
1454 * Returns the file-relative block number of the first block past eof in
1455 * the file. This is not based on i_size, it is based on the extent records.
1456 * Returns 0 for local files, as they do not have extent records.
1457 */
1458 int
1459 xfs_bmap_last_offset(
1460 struct xfs_inode *ip,
1461 xfs_fileoff_t *last_block,
1462 int whichfork)
1463 {
1464 struct xfs_bmbt_irec rec;
1465 int is_empty;
1466 int error;
1467
1468 *last_block = 0;
1469
1470 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL)
1471 return 0;
1472
1473 if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
1474 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
1475 return -EIO;
1476
1477 error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
1478 if (error || is_empty)
1479 return error;
1480
1481 *last_block = rec.br_startoff + rec.br_blockcount;
1482 return 0;
1483 }
1484
1485 /*
1486 * Returns whether the selected fork of the inode has exactly one
1487 * block or not. For the data fork we check this matches di_size,
1488 * implying the file's range is 0..bsize-1.
1489 */
1490 int /* 1=>1 block, 0=>otherwise */
1491 xfs_bmap_one_block(
1492 xfs_inode_t *ip, /* incore inode */
1493 int whichfork) /* data or attr fork */
1494 {
1495 xfs_ifork_t *ifp; /* inode fork pointer */
1496 int rval; /* return value */
1497 xfs_bmbt_irec_t s; /* internal version of extent */
1498 struct xfs_iext_cursor icur;
1499
1500 #ifndef DEBUG
1501 if (whichfork == XFS_DATA_FORK)
1502 return XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize;
1503 #endif /* !DEBUG */
1504 if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1)
1505 return 0;
1506 if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
1507 return 0;
1508 ifp = XFS_IFORK_PTR(ip, whichfork);
1509 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
1510 xfs_iext_first(ifp, &icur);
1511 xfs_iext_get_extent(ifp, &icur, &s);
1512 rval = s.br_startoff == 0 && s.br_blockcount == 1;
1513 if (rval && whichfork == XFS_DATA_FORK)
1514 ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize);
1515 return rval;
1516 }
1517
1518 /*
1519 * Extent tree manipulation functions used during allocation.
1520 */
1521
1522 /*
1523 * Convert a delayed allocation to a real allocation.
1524 */
1525 STATIC int /* error */
1526 xfs_bmap_add_extent_delay_real(
1527 struct xfs_bmalloca *bma,
1528 int whichfork)
1529 {
1530 struct xfs_bmbt_irec *new = &bma->got;
1531 int error; /* error return value */
1532 int i; /* temp state */
1533 xfs_ifork_t *ifp; /* inode fork pointer */
1534 xfs_fileoff_t new_endoff; /* end offset of new entry */
1535 xfs_bmbt_irec_t r[3]; /* neighbor extent entries */
1536 /* left is 0, right is 1, prev is 2 */
1537 int rval=0; /* return value (logging flags) */
1538 int state = xfs_bmap_fork_to_state(whichfork);
1539 xfs_filblks_t da_new; /* new count del alloc blocks used */
1540 xfs_filblks_t da_old; /* old count del alloc blocks used */
1541 xfs_filblks_t temp=0; /* value for da_new calculations */
1542 int tmp_rval; /* partial logging flags */
1543 struct xfs_mount *mp;
1544 xfs_extnum_t *nextents;
1545 struct xfs_bmbt_irec old;
1546
1547 mp = bma->ip->i_mount;
1548 ifp = XFS_IFORK_PTR(bma->ip, whichfork);
1549 ASSERT(whichfork != XFS_ATTR_FORK);
1550 nextents = (whichfork == XFS_COW_FORK ? &bma->ip->i_cnextents :
1551 &bma->ip->i_d.di_nextents);
1552
1553 ASSERT(!isnullstartblock(new->br_startblock));
1554 ASSERT(!bma->cur ||
1555 (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
1556
1557 XFS_STATS_INC(mp, xs_add_exlist);
1558
1559 #define LEFT r[0]
1560 #define RIGHT r[1]
1561 #define PREV r[2]
1562
1563 /*
1564 * Set up a bunch of variables to make the tests simpler.
1565 */
1566 xfs_iext_get_extent(ifp, &bma->icur, &PREV);
1567 new_endoff = new->br_startoff + new->br_blockcount;
1568 ASSERT(isnullstartblock(PREV.br_startblock));
1569 ASSERT(PREV.br_startoff <= new->br_startoff);
1570 ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
1571
1572 da_old = startblockval(PREV.br_startblock);
1573 da_new = 0;
1574
1575 /*
1576 * Set flags determining what part of the previous delayed allocation
1577 * extent is being replaced by a real allocation.
1578 */
1579 if (PREV.br_startoff == new->br_startoff)
1580 state |= BMAP_LEFT_FILLING;
1581 if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
1582 state |= BMAP_RIGHT_FILLING;
1583
1584 /*
1585 * Check and set flags if this segment has a left neighbor.
1586 * Don't set contiguous if the combined extent would be too large.
1587 */
1588 if (xfs_iext_peek_prev_extent(ifp, &bma->icur, &LEFT)) {
1589 state |= BMAP_LEFT_VALID;
1590 if (isnullstartblock(LEFT.br_startblock))
1591 state |= BMAP_LEFT_DELAY;
1592 }
1593
1594 if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
1595 LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
1596 LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
1597 LEFT.br_state == new->br_state &&
1598 LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
1599 state |= BMAP_LEFT_CONTIG;
1600
1601 /*
1602 * Check and set flags if this segment has a right neighbor.
1603 * Don't set contiguous if the combined extent would be too large.
1604 * Also check for all-three-contiguous being too large.
1605 */
1606 if (xfs_iext_peek_next_extent(ifp, &bma->icur, &RIGHT)) {
1607 state |= BMAP_RIGHT_VALID;
1608 if (isnullstartblock(RIGHT.br_startblock))
1609 state |= BMAP_RIGHT_DELAY;
1610 }
1611
1612 if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
1613 new_endoff == RIGHT.br_startoff &&
1614 new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
1615 new->br_state == RIGHT.br_state &&
1616 new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
1617 ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1618 BMAP_RIGHT_FILLING)) !=
1619 (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1620 BMAP_RIGHT_FILLING) ||
1621 LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
1622 <= MAXEXTLEN))
1623 state |= BMAP_RIGHT_CONTIG;
1624
1625 error = 0;
1626 /*
1627 * Switch out based on the FILLING and CONTIG state bits.
1628 */
1629 switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1630 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
1631 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1632 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1633 /*
1634 * Filling in all of a previously delayed allocation extent.
1635 * The left and right neighbors are both contiguous with new.
1636 */
1637 LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
1638
1639 xfs_iext_remove(bma->ip, &bma->icur, state);
1640 xfs_iext_remove(bma->ip, &bma->icur, state);
1641 xfs_iext_prev(ifp, &bma->icur);
1642 xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1643 (*nextents)--;
1644
1645 if (bma->cur == NULL)
1646 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1647 else {
1648 rval = XFS_ILOG_CORE;
1649 error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
1650 if (error)
1651 goto done;
1652 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1653 error = xfs_btree_delete(bma->cur, &i);
1654 if (error)
1655 goto done;
1656 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1657 error = xfs_btree_decrement(bma->cur, 0, &i);
1658 if (error)
1659 goto done;
1660 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1661 error = xfs_bmbt_update(bma->cur, &LEFT);
1662 if (error)
1663 goto done;
1664 }
1665 break;
1666
1667 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1668 /*
1669 * Filling in all of a previously delayed allocation extent.
1670 * The left neighbor is contiguous, the right is not.
1671 */
1672 old = LEFT;
1673 LEFT.br_blockcount += PREV.br_blockcount;
1674
1675 xfs_iext_remove(bma->ip, &bma->icur, state);
1676 xfs_iext_prev(ifp, &bma->icur);
1677 xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1678
1679 if (bma->cur == NULL)
1680 rval = XFS_ILOG_DEXT;
1681 else {
1682 rval = 0;
1683 error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1684 if (error)
1685 goto done;
1686 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1687 error = xfs_bmbt_update(bma->cur, &LEFT);
1688 if (error)
1689 goto done;
1690 }
1691 break;
1692
1693 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1694 /*
1695 * Filling in all of a previously delayed allocation extent.
1696 * The right neighbor is contiguous, the left is not.
1697 */
1698 PREV.br_startblock = new->br_startblock;
1699 PREV.br_blockcount += RIGHT.br_blockcount;
1700
1701 xfs_iext_next(ifp, &bma->icur);
1702 xfs_iext_remove(bma->ip, &bma->icur, state);
1703 xfs_iext_prev(ifp, &bma->icur);
1704 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1705
1706 if (bma->cur == NULL)
1707 rval = XFS_ILOG_DEXT;
1708 else {
1709 rval = 0;
1710 error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
1711 if (error)
1712 goto done;
1713 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1714 error = xfs_bmbt_update(bma->cur, &PREV);
1715 if (error)
1716 goto done;
1717 }
1718 break;
1719
1720 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
1721 /*
1722 * Filling in all of a previously delayed allocation extent.
1723 * Neither the left nor right neighbors are contiguous with
1724 * the new one.
1725 */
1726 PREV.br_startblock = new->br_startblock;
1727 PREV.br_state = new->br_state;
1728 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1729
1730 (*nextents)++;
1731 if (bma->cur == NULL)
1732 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1733 else {
1734 rval = XFS_ILOG_CORE;
1735 error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1736 if (error)
1737 goto done;
1738 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
1739 error = xfs_btree_insert(bma->cur, &i);
1740 if (error)
1741 goto done;
1742 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1743 }
1744 break;
1745
1746 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
1747 /*
1748 * Filling in the first part of a previous delayed allocation.
1749 * The left neighbor is contiguous.
1750 */
1751 old = LEFT;
1752 temp = PREV.br_blockcount - new->br_blockcount;
1753 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1754 startblockval(PREV.br_startblock));
1755
1756 LEFT.br_blockcount += new->br_blockcount;
1757
1758 PREV.br_blockcount = temp;
1759 PREV.br_startoff += new->br_blockcount;
1760 PREV.br_startblock = nullstartblock(da_new);
1761
1762 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1763 xfs_iext_prev(ifp, &bma->icur);
1764 xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT);
1765
1766 if (bma->cur == NULL)
1767 rval = XFS_ILOG_DEXT;
1768 else {
1769 rval = 0;
1770 error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1771 if (error)
1772 goto done;
1773 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1774 error = xfs_bmbt_update(bma->cur, &LEFT);
1775 if (error)
1776 goto done;
1777 }
1778 break;
1779
1780 case BMAP_LEFT_FILLING:
1781 /*
1782 * Filling in the first part of a previous delayed allocation.
1783 * The left neighbor is not contiguous.
1784 */
1785 xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
1786 (*nextents)++;
1787 if (bma->cur == NULL)
1788 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1789 else {
1790 rval = XFS_ILOG_CORE;
1791 error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1792 if (error)
1793 goto done;
1794 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
1795 error = xfs_btree_insert(bma->cur, &i);
1796 if (error)
1797 goto done;
1798 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1799 }
1800
1801 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1802 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1803 bma->firstblock, bma->dfops,
1804 &bma->cur, 1, &tmp_rval, whichfork);
1805 rval |= tmp_rval;
1806 if (error)
1807 goto done;
1808 }
1809
1810 temp = PREV.br_blockcount - new->br_blockcount;
1811 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1812 startblockval(PREV.br_startblock) -
1813 (bma->cur ? bma->cur->bc_private.b.allocated : 0));
1814
1815 PREV.br_startoff = new_endoff;
1816 PREV.br_blockcount = temp;
1817 PREV.br_startblock = nullstartblock(da_new);
1818 xfs_iext_next(ifp, &bma->icur);
1819 xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
1820 xfs_iext_prev(ifp, &bma->icur);
1821 break;
1822
1823 case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1824 /*
1825 * Filling in the last part of a previous delayed allocation.
1826 * The right neighbor is contiguous with the new allocation.
1827 */
1828 old = RIGHT;
1829 RIGHT.br_startoff = new->br_startoff;
1830 RIGHT.br_startblock = new->br_startblock;
1831 RIGHT.br_blockcount += new->br_blockcount;
1832
1833 if (bma->cur == NULL)
1834 rval = XFS_ILOG_DEXT;
1835 else {
1836 rval = 0;
1837 error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1838 if (error)
1839 goto done;
1840 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1841 error = xfs_bmbt_update(bma->cur, &RIGHT);
1842 if (error)
1843 goto done;
1844 }
1845
1846 temp = PREV.br_blockcount - new->br_blockcount;
1847 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1848 startblockval(PREV.br_startblock));
1849
1850 PREV.br_blockcount = temp;
1851 PREV.br_startblock = nullstartblock(da_new);
1852
1853 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1854 xfs_iext_next(ifp, &bma->icur);
1855 xfs_iext_update_extent(bma->ip, state, &bma->icur, &RIGHT);
1856 break;
1857
1858 case BMAP_RIGHT_FILLING:
1859 /*
1860 * Filling in the last part of a previous delayed allocation.
1861 * The right neighbor is not contiguous.
1862 */
1863 xfs_iext_update_extent(bma->ip, state, &bma->icur, new);
1864 (*nextents)++;
1865 if (bma->cur == NULL)
1866 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1867 else {
1868 rval = XFS_ILOG_CORE;
1869 error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1870 if (error)
1871 goto done;
1872 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
1873 error = xfs_btree_insert(bma->cur, &i);
1874 if (error)
1875 goto done;
1876 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1877 }
1878
1879 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1880 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1881 bma->firstblock, bma->dfops, &bma->cur, 1,
1882 &tmp_rval, whichfork);
1883 rval |= tmp_rval;
1884 if (error)
1885 goto done;
1886 }
1887
1888 temp = PREV.br_blockcount - new->br_blockcount;
1889 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1890 startblockval(PREV.br_startblock) -
1891 (bma->cur ? bma->cur->bc_private.b.allocated : 0));
1892
1893 PREV.br_startblock = nullstartblock(da_new);
1894 PREV.br_blockcount = temp;
1895 xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
1896 xfs_iext_next(ifp, &bma->icur);
1897 break;
1898
1899 case 0:
1900 /*
1901 * Filling in the middle part of a previous delayed allocation.
1902 * Contiguity is impossible here.
1903 * This case is avoided almost all the time.
1904 *
1905 * We start with a delayed allocation:
1906 *
1907 * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+
1908 * PREV @ idx
1909 *
1910 * and we are allocating:
1911 * +rrrrrrrrrrrrrrrrr+
1912 * new
1913 *
1914 * and we set it up for insertion as:
1915 * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+
1916 * new
1917 * PREV @ idx LEFT RIGHT
1918 * inserted at idx + 1
1919 */
1920 old = PREV;
1921
1922 /* LEFT is the new middle */
1923 LEFT = *new;
1924
1925 /* RIGHT is the new right */
1926 RIGHT.br_state = PREV.br_state;
1927 RIGHT.br_startoff = new_endoff;
1928 RIGHT.br_blockcount =
1929 PREV.br_startoff + PREV.br_blockcount - new_endoff;
1930 RIGHT.br_startblock =
1931 nullstartblock(xfs_bmap_worst_indlen(bma->ip,
1932 RIGHT.br_blockcount));
1933
1934 /* truncate PREV */
1935 PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
1936 PREV.br_startblock =
1937 nullstartblock(xfs_bmap_worst_indlen(bma->ip,
1938 PREV.br_blockcount));
1939 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
1940
1941 xfs_iext_next(ifp, &bma->icur);
1942 xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state);
1943 xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state);
1944 (*nextents)++;
1945
1946 if (bma->cur == NULL)
1947 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1948 else {
1949 rval = XFS_ILOG_CORE;
1950 error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1951 if (error)
1952 goto done;
1953 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
1954 error = xfs_btree_insert(bma->cur, &i);
1955 if (error)
1956 goto done;
1957 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1958 }
1959
1960 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1961 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1962 bma->firstblock, bma->dfops, &bma->cur,
1963 1, &tmp_rval, whichfork);
1964 rval |= tmp_rval;
1965 if (error)
1966 goto done;
1967 }
1968
1969 da_new = startblockval(PREV.br_startblock) +
1970 startblockval(RIGHT.br_startblock);
1971 break;
1972
1973 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1974 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1975 case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
1976 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1977 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1978 case BMAP_LEFT_CONTIG:
1979 case BMAP_RIGHT_CONTIG:
1980 /*
1981 * These cases are all impossible.
1982 */
1983 ASSERT(0);
1984 }
1985
1986 /* add reverse mapping */
1987 error = xfs_rmap_map_extent(mp, bma->dfops, bma->ip, whichfork, new);
1988 if (error)
1989 goto done;
1990
1991 /* convert to a btree if necessary */
1992 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1993 int tmp_logflags; /* partial log flag return val */
1994
1995 ASSERT(bma->cur == NULL);
1996 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1997 bma->firstblock, bma->dfops, &bma->cur,
1998 da_old > 0, &tmp_logflags, whichfork);
1999 bma->logflags |= tmp_logflags;
2000 if (error)
2001 goto done;
2002 }
2003
2004 if (bma->cur) {
2005 da_new += bma->cur->bc_private.b.allocated;
2006 bma->cur->bc_private.b.allocated = 0;
2007 }
2008
2009 /* adjust for changes in reserved delayed indirect blocks */
2010 if (da_new != da_old) {
2011 ASSERT(state == 0 || da_new < da_old);
2012 error = xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new),
2013 false);
2014 }
2015
2016 xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
2017 done:
2018 if (whichfork != XFS_COW_FORK)
2019 bma->logflags |= rval;
2020 return error;
2021 #undef LEFT
2022 #undef RIGHT
2023 #undef PREV
2024 }
2025
2026 /*
2027 * Convert an unwritten allocation to a real allocation or vice versa.
2028 */
2029 STATIC int /* error */
2030 xfs_bmap_add_extent_unwritten_real(
2031 struct xfs_trans *tp,
2032 xfs_inode_t *ip, /* incore inode pointer */
2033 int whichfork,
2034 struct xfs_iext_cursor *icur,
2035 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
2036 xfs_bmbt_irec_t *new, /* new data to add to file extents */
2037 xfs_fsblock_t *first, /* pointer to firstblock variable */
2038 struct xfs_defer_ops *dfops, /* list of extents to be freed */
2039 int *logflagsp) /* inode logging flags */
2040 {
2041 xfs_btree_cur_t *cur; /* btree cursor */
2042 int error; /* error return value */
2043 int i; /* temp state */
2044 xfs_ifork_t *ifp; /* inode fork pointer */
2045 xfs_fileoff_t new_endoff; /* end offset of new entry */
2046 xfs_bmbt_irec_t r[3]; /* neighbor extent entries */
2047 /* left is 0, right is 1, prev is 2 */
2048 int rval=0; /* return value (logging flags) */
2049 int state = xfs_bmap_fork_to_state(whichfork);
2050 struct xfs_mount *mp = ip->i_mount;
2051 struct xfs_bmbt_irec old;
2052
2053 *logflagsp = 0;
2054
2055 cur = *curp;
2056 ifp = XFS_IFORK_PTR(ip, whichfork);
2057
2058 ASSERT(!isnullstartblock(new->br_startblock));
2059
2060 XFS_STATS_INC(mp, xs_add_exlist);
2061
2062 #define LEFT r[0]
2063 #define RIGHT r[1]
2064 #define PREV r[2]
2065
2066 /*
2067 * Set up a bunch of variables to make the tests simpler.
2068 */
2069 error = 0;
2070 xfs_iext_get_extent(ifp, icur, &PREV);
2071 ASSERT(new->br_state != PREV.br_state);
2072 new_endoff = new->br_startoff + new->br_blockcount;
2073 ASSERT(PREV.br_startoff <= new->br_startoff);
2074 ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
2075
2076 /*
2077 * Set flags determining what part of the previous oldext allocation
2078 * extent is being replaced by a newext allocation.
2079 */
2080 if (PREV.br_startoff == new->br_startoff)
2081 state |= BMAP_LEFT_FILLING;
2082 if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
2083 state |= BMAP_RIGHT_FILLING;
2084
2085 /*
2086 * Check and set flags if this segment has a left neighbor.
2087 * Don't set contiguous if the combined extent would be too large.
2088 */
2089 if (xfs_iext_peek_prev_extent(ifp, icur, &LEFT)) {
2090 state |= BMAP_LEFT_VALID;
2091 if (isnullstartblock(LEFT.br_startblock))
2092 state |= BMAP_LEFT_DELAY;
2093 }
2094
2095 if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2096 LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
2097 LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
2098 LEFT.br_state == new->br_state &&
2099 LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2100 state |= BMAP_LEFT_CONTIG;
2101
2102 /*
2103 * Check and set flags if this segment has a right neighbor.
2104 * Don't set contiguous if the combined extent would be too large.
2105 * Also check for all-three-contiguous being too large.
2106 */
2107 if (xfs_iext_peek_next_extent(ifp, icur, &RIGHT)) {
2108 state |= BMAP_RIGHT_VALID;
2109 if (isnullstartblock(RIGHT.br_startblock))
2110 state |= BMAP_RIGHT_DELAY;
2111 }
2112
2113 if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2114 new_endoff == RIGHT.br_startoff &&
2115 new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
2116 new->br_state == RIGHT.br_state &&
2117 new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
2118 ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2119 BMAP_RIGHT_FILLING)) !=
2120 (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2121 BMAP_RIGHT_FILLING) ||
2122 LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
2123 <= MAXEXTLEN))
2124 state |= BMAP_RIGHT_CONTIG;
2125
2126 /*
2127 * Switch out based on the FILLING and CONTIG state bits.
2128 */
2129 switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2130 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
2131 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2132 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2133 /*
2134 * Setting all of a previous oldext extent to newext.
2135 * The left and right neighbors are both contiguous with new.
2136 */
2137 LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
2138
2139 xfs_iext_remove(ip, icur, state);
2140 xfs_iext_remove(ip, icur, state);
2141 xfs_iext_prev(ifp, icur);
2142 xfs_iext_update_extent(ip, state, icur, &LEFT);
2143 XFS_IFORK_NEXT_SET(ip, whichfork,
2144 XFS_IFORK_NEXTENTS(ip, whichfork) - 2);
2145 if (cur == NULL)
2146 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2147 else {
2148 rval = XFS_ILOG_CORE;
2149 error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
2150 if (error)
2151 goto done;
2152 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2153 if ((error = xfs_btree_delete(cur, &i)))
2154 goto done;
2155 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2156 if ((error = xfs_btree_decrement(cur, 0, &i)))
2157 goto done;
2158 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2159 if ((error = xfs_btree_delete(cur, &i)))
2160 goto done;
2161 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2162 if ((error = xfs_btree_decrement(cur, 0, &i)))
2163 goto done;
2164 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2165 error = xfs_bmbt_update(cur, &LEFT);
2166 if (error)
2167 goto done;
2168 }
2169 break;
2170
2171 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2172 /*
2173 * Setting all of a previous oldext extent to newext.
2174 * The left neighbor is contiguous, the right is not.
2175 */
2176 LEFT.br_blockcount += PREV.br_blockcount;
2177
2178 xfs_iext_remove(ip, icur, state);
2179 xfs_iext_prev(ifp, icur);
2180 xfs_iext_update_extent(ip, state, icur, &LEFT);
2181 XFS_IFORK_NEXT_SET(ip, whichfork,
2182 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
2183 if (cur == NULL)
2184 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2185 else {
2186 rval = XFS_ILOG_CORE;
2187 error = xfs_bmbt_lookup_eq(cur, &PREV, &i);
2188 if (error)
2189 goto done;
2190 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2191 if ((error = xfs_btree_delete(cur, &i)))
2192 goto done;
2193 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2194 if ((error = xfs_btree_decrement(cur, 0, &i)))
2195 goto done;
2196 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2197 error = xfs_bmbt_update(cur, &LEFT);
2198 if (error)
2199 goto done;
2200 }
2201 break;
2202
2203 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2204 /*
2205 * Setting all of a previous oldext extent to newext.
2206 * The right neighbor is contiguous, the left is not.
2207 */
2208 PREV.br_blockcount += RIGHT.br_blockcount;
2209 PREV.br_state = new->br_state;
2210
2211 xfs_iext_next(ifp, icur);
2212 xfs_iext_remove(ip, icur, state);
2213 xfs_iext_prev(ifp, icur);
2214 xfs_iext_update_extent(ip, state, icur, &PREV);
2215
2216 XFS_IFORK_NEXT_SET(ip, whichfork,
2217 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
2218 if (cur == NULL)
2219 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2220 else {
2221 rval = XFS_ILOG_CORE;
2222 error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
2223 if (error)
2224 goto done;
2225 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2226 if ((error = xfs_btree_delete(cur, &i)))
2227 goto done;
2228 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2229 if ((error = xfs_btree_decrement(cur, 0, &i)))
2230 goto done;
2231 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2232 error = xfs_bmbt_update(cur, &PREV);
2233 if (error)
2234 goto done;
2235 }
2236 break;
2237
2238 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
2239 /*
2240 * Setting all of a previous oldext extent to newext.
2241 * Neither the left nor right neighbors are contiguous with
2242 * the new one.
2243 */
2244 PREV.br_state = new->br_state;
2245 xfs_iext_update_extent(ip, state, icur, &PREV);
2246
2247 if (cur == NULL)
2248 rval = XFS_ILOG_DEXT;
2249 else {
2250 rval = 0;
2251 error = xfs_bmbt_lookup_eq(cur, new, &i);
2252 if (error)
2253 goto done;
2254 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2255 error = xfs_bmbt_update(cur, &PREV);
2256 if (error)
2257 goto done;
2258 }
2259 break;
2260
2261 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
2262 /*
2263 * Setting the first part of a previous oldext extent to newext.
2264 * The left neighbor is contiguous.
2265 */
2266 LEFT.br_blockcount += new->br_blockcount;
2267
2268 old = PREV;
2269 PREV.br_startoff += new->br_blockcount;
2270 PREV.br_startblock += new->br_blockcount;
2271 PREV.br_blockcount -= new->br_blockcount;
2272
2273 xfs_iext_update_extent(ip, state, icur, &PREV);
2274 xfs_iext_prev(ifp, icur);
2275 xfs_iext_update_extent(ip, state, icur, &LEFT);
2276
2277 if (cur == NULL)
2278 rval = XFS_ILOG_DEXT;
2279 else {
2280 rval = 0;
2281 error = xfs_bmbt_lookup_eq(cur, &old, &i);
2282 if (error)
2283 goto done;
2284 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2285 error = xfs_bmbt_update(cur, &PREV);
2286 if (error)
2287 goto done;
2288 error = xfs_btree_decrement(cur, 0, &i);
2289 if (error)
2290 goto done;
2291 error = xfs_bmbt_update(cur, &LEFT);
2292 if (error)
2293 goto done;
2294 }
2295 break;
2296
2297 case BMAP_LEFT_FILLING:
2298 /*
2299 * Setting the first part of a previous oldext extent to newext.
2300 * The left neighbor is not contiguous.
2301 */
2302 old = PREV;
2303 PREV.br_startoff += new->br_blockcount;
2304 PREV.br_startblock += new->br_blockcount;
2305 PREV.br_blockcount -= new->br_blockcount;
2306
2307 xfs_iext_update_extent(ip, state, icur, &PREV);
2308 xfs_iext_insert(ip, icur, new, state);
2309 XFS_IFORK_NEXT_SET(ip, whichfork,
2310 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
2311 if (cur == NULL)
2312 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2313 else {
2314 rval = XFS_ILOG_CORE;
2315 error = xfs_bmbt_lookup_eq(cur, &old, &i);
2316 if (error)
2317 goto done;
2318 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2319 error = xfs_bmbt_update(cur, &PREV);
2320 if (error)
2321 goto done;
2322 cur->bc_rec.b = *new;
2323 if ((error = xfs_btree_insert(cur, &i)))
2324 goto done;
2325 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2326 }
2327 break;
2328
2329 case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2330 /*
2331 * Setting the last part of a previous oldext extent to newext.
2332 * The right neighbor is contiguous with the new allocation.
2333 */
2334 old = PREV;
2335 PREV.br_blockcount -= new->br_blockcount;
2336
2337 RIGHT.br_startoff = new->br_startoff;
2338 RIGHT.br_startblock = new->br_startblock;
2339 RIGHT.br_blockcount += new->br_blockcount;
2340
2341 xfs_iext_update_extent(ip, state, icur, &PREV);
2342 xfs_iext_next(ifp, icur);
2343 xfs_iext_update_extent(ip, state, icur, &RIGHT);
2344
2345 if (cur == NULL)
2346 rval = XFS_ILOG_DEXT;
2347 else {
2348 rval = 0;
2349 error = xfs_bmbt_lookup_eq(cur, &old, &i);
2350 if (error)
2351 goto done;
2352 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2353 error = xfs_bmbt_update(cur, &PREV);
2354 if (error)
2355 goto done;
2356 error = xfs_btree_increment(cur, 0, &i);
2357 if (error)
2358 goto done;
2359 error = xfs_bmbt_update(cur, &RIGHT);
2360 if (error)
2361 goto done;
2362 }
2363 break;
2364
2365 case BMAP_RIGHT_FILLING:
2366 /*
2367 * Setting the last part of a previous oldext extent to newext.
2368 * The right neighbor is not contiguous.
2369 */
2370 old = PREV;
2371 PREV.br_blockcount -= new->br_blockcount;
2372
2373 xfs_iext_update_extent(ip, state, icur, &PREV);
2374 xfs_iext_next(ifp, icur);
2375 xfs_iext_insert(ip, icur, new, state);
2376
2377 XFS_IFORK_NEXT_SET(ip, whichfork,
2378 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
2379 if (cur == NULL)
2380 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2381 else {
2382 rval = XFS_ILOG_CORE;
2383 error = xfs_bmbt_lookup_eq(cur, &old, &i);
2384 if (error)
2385 goto done;
2386 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2387 error = xfs_bmbt_update(cur, &PREV);
2388 if (error)
2389 goto done;
2390 error = xfs_bmbt_lookup_eq(cur, new, &i);
2391 if (error)
2392 goto done;
2393 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2394 if ((error = xfs_btree_insert(cur, &i)))
2395 goto done;
2396 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2397 }
2398 break;
2399
2400 case 0:
2401 /*
2402 * Setting the middle part of a previous oldext extent to
2403 * newext. Contiguity is impossible here.
2404 * One extent becomes three extents.
2405 */
2406 old = PREV;
2407 PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
2408
2409 r[0] = *new;
2410 r[1].br_startoff = new_endoff;
2411 r[1].br_blockcount =
2412 old.br_startoff + old.br_blockcount - new_endoff;
2413 r[1].br_startblock = new->br_startblock + new->br_blockcount;
2414 r[1].br_state = PREV.br_state;
2415
2416 xfs_iext_update_extent(ip, state, icur, &PREV);
2417 xfs_iext_next(ifp, icur);
2418 xfs_iext_insert(ip, icur, &r[1], state);
2419 xfs_iext_insert(ip, icur, &r[0], state);
2420
2421 XFS_IFORK_NEXT_SET(ip, whichfork,
2422 XFS_IFORK_NEXTENTS(ip, whichfork) + 2);
2423 if (cur == NULL)
2424 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2425 else {
2426 rval = XFS_ILOG_CORE;
2427 error = xfs_bmbt_lookup_eq(cur, &old, &i);
2428 if (error)
2429 goto done;
2430 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2431 /* new right extent - oldext */
2432 error = xfs_bmbt_update(cur, &r[1]);
2433 if (error)
2434 goto done;
2435 /* new left extent - oldext */
2436 cur->bc_rec.b = PREV;
2437 if ((error = xfs_btree_insert(cur, &i)))
2438 goto done;
2439 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2440 /*
2441 * Reset the cursor to the position of the new extent
2442 * we are about to insert as we can't trust it after
2443 * the previous insert.
2444 */
2445 error = xfs_bmbt_lookup_eq(cur, new, &i);
2446 if (error)
2447 goto done;
2448 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2449 /* new middle extent - newext */
2450 if ((error = xfs_btree_insert(cur, &i)))
2451 goto done;
2452 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2453 }
2454 break;
2455
2456 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2457 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2458 case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
2459 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2460 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2461 case BMAP_LEFT_CONTIG:
2462 case BMAP_RIGHT_CONTIG:
2463 /*
2464 * These cases are all impossible.
2465 */
2466 ASSERT(0);
2467 }
2468
2469 /* update reverse mappings */
2470 error = xfs_rmap_convert_extent(mp, dfops, ip, whichfork, new);
2471 if (error)
2472 goto done;
2473
2474 /* convert to a btree if necessary */
2475 if (xfs_bmap_needs_btree(ip, whichfork)) {
2476 int tmp_logflags; /* partial log flag return val */
2477
2478 ASSERT(cur == NULL);
2479 error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, &cur,
2480 0, &tmp_logflags, whichfork);
2481 *logflagsp |= tmp_logflags;
2482 if (error)
2483 goto done;
2484 }
2485
2486 /* clear out the allocated field, done with it now in any case. */
2487 if (cur) {
2488 cur->bc_private.b.allocated = 0;
2489 *curp = cur;
2490 }
2491
2492 xfs_bmap_check_leaf_extents(*curp, ip, whichfork);
2493 done:
2494 *logflagsp |= rval;
2495 return error;
2496 #undef LEFT
2497 #undef RIGHT
2498 #undef PREV
2499 }
2500
2501 /*
2502 * Convert a hole to a delayed allocation.
2503 */
2504 STATIC void
2505 xfs_bmap_add_extent_hole_delay(
2506 xfs_inode_t *ip, /* incore inode pointer */
2507 int whichfork,
2508 struct xfs_iext_cursor *icur,
2509 xfs_bmbt_irec_t *new) /* new data to add to file extents */
2510 {
2511 xfs_ifork_t *ifp; /* inode fork pointer */
2512 xfs_bmbt_irec_t left; /* left neighbor extent entry */
2513 xfs_filblks_t newlen=0; /* new indirect size */
2514 xfs_filblks_t oldlen=0; /* old indirect size */
2515 xfs_bmbt_irec_t right; /* right neighbor extent entry */
2516 int state = xfs_bmap_fork_to_state(whichfork);
2517 xfs_filblks_t temp; /* temp for indirect calculations */
2518
2519 ifp = XFS_IFORK_PTR(ip, whichfork);
2520 ASSERT(isnullstartblock(new->br_startblock));
2521
2522 /*
2523 * Check and set flags if this segment has a left neighbor
2524 */
2525 if (xfs_iext_peek_prev_extent(ifp, icur, &left)) {
2526 state |= BMAP_LEFT_VALID;
2527 if (isnullstartblock(left.br_startblock))
2528 state |= BMAP_LEFT_DELAY;
2529 }
2530
2531 /*
2532 * Check and set flags if the current (right) segment exists.
2533 * If it doesn't exist, we're converting the hole at end-of-file.
2534 */
2535 if (xfs_iext_get_extent(ifp, icur, &right)) {
2536 state |= BMAP_RIGHT_VALID;
2537 if (isnullstartblock(right.br_startblock))
2538 state |= BMAP_RIGHT_DELAY;
2539 }
2540
2541 /*
2542 * Set contiguity flags on the left and right neighbors.
2543 * Don't let extents get too large, even if the pieces are contiguous.
2544 */
2545 if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) &&
2546 left.br_startoff + left.br_blockcount == new->br_startoff &&
2547 left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2548 state |= BMAP_LEFT_CONTIG;
2549
2550 if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) &&
2551 new->br_startoff + new->br_blockcount == right.br_startoff &&
2552 new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
2553 (!(state & BMAP_LEFT_CONTIG) ||
2554 (left.br_blockcount + new->br_blockcount +
2555 right.br_blockcount <= MAXEXTLEN)))
2556 state |= BMAP_RIGHT_CONTIG;
2557
2558 /*
2559 * Switch out based on the contiguity flags.
2560 */
2561 switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2562 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2563 /*
2564 * New allocation is contiguous with delayed allocations
2565 * on the left and on the right.
2566 * Merge all three into a single extent record.
2567 */
2568 temp = left.br_blockcount + new->br_blockcount +
2569 right.br_blockcount;
2570
2571 oldlen = startblockval(left.br_startblock) +
2572 startblockval(new->br_startblock) +
2573 startblockval(right.br_startblock);
2574 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2575 oldlen);
2576 left.br_startblock = nullstartblock(newlen);
2577 left.br_blockcount = temp;
2578
2579 xfs_iext_remove(ip, icur, state);
2580 xfs_iext_prev(ifp, icur);
2581 xfs_iext_update_extent(ip, state, icur, &left);
2582 break;
2583
2584 case BMAP_LEFT_CONTIG:
2585 /*
2586 * New allocation is contiguous with a delayed allocation
2587 * on the left.
2588 * Merge the new allocation with the left neighbor.
2589 */
2590 temp = left.br_blockcount + new->br_blockcount;
2591
2592 oldlen = startblockval(left.br_startblock) +
2593 startblockval(new->br_startblock);
2594 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2595 oldlen);
2596 left.br_blockcount = temp;
2597 left.br_startblock = nullstartblock(newlen);
2598
2599 xfs_iext_prev(ifp, icur);
2600 xfs_iext_update_extent(ip, state, icur, &left);
2601 break;
2602
2603 case BMAP_RIGHT_CONTIG:
2604 /*
2605 * New allocation is contiguous with a delayed allocation
2606 * on the right.
2607 * Merge the new allocation with the right neighbor.
2608 */
2609 temp = new->br_blockcount + right.br_blockcount;
2610 oldlen = startblockval(new->br_startblock) +
2611 startblockval(right.br_startblock);
2612 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2613 oldlen);
2614 right.br_startoff = new->br_startoff;
2615 right.br_startblock = nullstartblock(newlen);
2616 right.br_blockcount = temp;
2617 xfs_iext_update_extent(ip, state, icur, &right);
2618 break;
2619
2620 case 0:
2621 /*
2622 * New allocation is not contiguous with another
2623 * delayed allocation.
2624 * Insert a new entry.
2625 */
2626 oldlen = newlen = 0;
2627 xfs_iext_insert(ip, icur, new, state);
2628 break;
2629 }
2630 if (oldlen != newlen) {
2631 ASSERT(oldlen > newlen);
2632 xfs_mod_fdblocks(ip->i_mount, (int64_t)(oldlen - newlen),
2633 false);
2634 /*
2635 * Nothing to do for disk quota accounting here.
2636 */
2637 }
2638 }
2639
2640 /*
2641 * Convert a hole to a real allocation.
2642 */
2643 STATIC int /* error */
2644 xfs_bmap_add_extent_hole_real(
2645 struct xfs_trans *tp,
2646 struct xfs_inode *ip,
2647 int whichfork,
2648 struct xfs_iext_cursor *icur,
2649 struct xfs_btree_cur **curp,
2650 struct xfs_bmbt_irec *new,
2651 xfs_fsblock_t *first,
2652 struct xfs_defer_ops *dfops,
2653 int *logflagsp)
2654 {
2655 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
2656 struct xfs_mount *mp = ip->i_mount;
2657 struct xfs_btree_cur *cur = *curp;
2658 int error; /* error return value */
2659 int i; /* temp state */
2660 xfs_bmbt_irec_t left; /* left neighbor extent entry */
2661 xfs_bmbt_irec_t right; /* right neighbor extent entry */
2662 int rval=0; /* return value (logging flags) */
2663 int state = xfs_bmap_fork_to_state(whichfork);
2664 struct xfs_bmbt_irec old;
2665
2666 ASSERT(!isnullstartblock(new->br_startblock));
2667 ASSERT(!cur || !(cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
2668
2669 XFS_STATS_INC(mp, xs_add_exlist);
2670
2671 /*
2672 * Check and set flags if this segment has a left neighbor.
2673 */
2674 if (xfs_iext_peek_prev_extent(ifp, icur, &left)) {
2675 state |= BMAP_LEFT_VALID;
2676 if (isnullstartblock(left.br_startblock))
2677 state |= BMAP_LEFT_DELAY;
2678 }
2679
2680 /*
2681 * Check and set flags if this segment has a current value.
2682 * Not true if we're inserting into the "hole" at eof.
2683 */
2684 if (xfs_iext_get_extent(ifp, icur, &right)) {
2685 state |= BMAP_RIGHT_VALID;
2686 if (isnullstartblock(right.br_startblock))
2687 state |= BMAP_RIGHT_DELAY;
2688 }
2689
2690 /*
2691 * We're inserting a real allocation between "left" and "right".
2692 * Set the contiguity flags. Don't let extents get too large.
2693 */
2694 if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2695 left.br_startoff + left.br_blockcount == new->br_startoff &&
2696 left.br_startblock + left.br_blockcount == new->br_startblock &&
2697 left.br_state == new->br_state &&
2698 left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2699 state |= BMAP_LEFT_CONTIG;
2700
2701 if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2702 new->br_startoff + new->br_blockcount == right.br_startoff &&
2703 new->br_startblock + new->br_blockcount == right.br_startblock &&
2704 new->br_state == right.br_state &&
2705 new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
2706 (!(state & BMAP_LEFT_CONTIG) ||
2707 left.br_blockcount + new->br_blockcount +
2708 right.br_blockcount <= MAXEXTLEN))
2709 state |= BMAP_RIGHT_CONTIG;
2710
2711 error = 0;
2712 /*
2713 * Select which case we're in here, and implement it.
2714 */
2715 switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2716 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2717 /*
2718 * New allocation is contiguous with real allocations on the
2719 * left and on the right.
2720 * Merge all three into a single extent record.
2721 */
2722 left.br_blockcount += new->br_blockcount + right.br_blockcount;
2723
2724 xfs_iext_remove(ip, icur, state);
2725 xfs_iext_prev(ifp, icur);
2726 xfs_iext_update_extent(ip, state, icur, &left);
2727
2728 XFS_IFORK_NEXT_SET(ip, whichfork,
2729 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
2730 if (cur == NULL) {
2731 rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
2732 } else {
2733 rval = XFS_ILOG_CORE;
2734 error = xfs_bmbt_lookup_eq(cur, &right, &i);
2735 if (error)
2736 goto done;
2737 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2738 error = xfs_btree_delete(cur, &i);
2739 if (error)
2740 goto done;
2741 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2742 error = xfs_btree_decrement(cur, 0, &i);
2743 if (error)
2744 goto done;
2745 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2746 error = xfs_bmbt_update(cur, &left);
2747 if (error)
2748 goto done;
2749 }
2750 break;
2751
2752 case BMAP_LEFT_CONTIG:
2753 /*
2754 * New allocation is contiguous with a real allocation
2755 * on the left.
2756 * Merge the new allocation with the left neighbor.
2757 */
2758 old = left;
2759 left.br_blockcount += new->br_blockcount;
2760
2761 xfs_iext_prev(ifp, icur);
2762 xfs_iext_update_extent(ip, state, icur, &left);
2763
2764 if (cur == NULL) {
2765 rval = xfs_ilog_fext(whichfork);
2766 } else {
2767 rval = 0;
2768 error = xfs_bmbt_lookup_eq(cur, &old, &i);
2769 if (error)
2770 goto done;
2771 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2772 error = xfs_bmbt_update(cur, &left);
2773 if (error)
2774 goto done;
2775 }
2776 break;
2777
2778 case BMAP_RIGHT_CONTIG:
2779 /*
2780 * New allocation is contiguous with a real allocation
2781 * on the right.
2782 * Merge the new allocation with the right neighbor.
2783 */
2784 old = right;
2785
2786 right.br_startoff = new->br_startoff;
2787 right.br_startblock = new->br_startblock;
2788 right.br_blockcount += new->br_blockcount;
2789 xfs_iext_update_extent(ip, state, icur, &right);
2790
2791 if (cur == NULL) {
2792 rval = xfs_ilog_fext(whichfork);
2793 } else {
2794 rval = 0;
2795 error = xfs_bmbt_lookup_eq(cur, &old, &i);
2796 if (error)
2797 goto done;
2798 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2799 error = xfs_bmbt_update(cur, &right);
2800 if (error)
2801 goto done;
2802 }
2803 break;
2804
2805 case 0:
2806 /*
2807 * New allocation is not contiguous with another
2808 * real allocation.
2809 * Insert a new entry.
2810 */
2811 xfs_iext_insert(ip, icur, new, state);
2812 XFS_IFORK_NEXT_SET(ip, whichfork,
2813 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
2814 if (cur == NULL) {
2815 rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
2816 } else {
2817 rval = XFS_ILOG_CORE;
2818 error = xfs_bmbt_lookup_eq(cur, new, &i);
2819 if (error)
2820 goto done;
2821 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2822 error = xfs_btree_insert(cur, &i);
2823 if (error)
2824 goto done;
2825 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2826 }
2827 break;
2828 }
2829
2830 /* add reverse mapping */
2831 error = xfs_rmap_map_extent(mp, dfops, ip, whichfork, new);
2832 if (error)
2833 goto done;
2834
2835 /* convert to a btree if necessary */
2836 if (xfs_bmap_needs_btree(ip, whichfork)) {
2837 int tmp_logflags; /* partial log flag return val */
2838
2839 ASSERT(cur == NULL);
2840 error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, curp,
2841 0, &tmp_logflags, whichfork);
2842 *logflagsp |= tmp_logflags;
2843 cur = *curp;
2844 if (error)
2845 goto done;
2846 }
2847
2848 /* clear out the allocated field, done with it now in any case. */
2849 if (cur)
2850 cur->bc_private.b.allocated = 0;
2851
2852 xfs_bmap_check_leaf_extents(cur, ip, whichfork);
2853 done:
2854 *logflagsp |= rval;
2855 return error;
2856 }
2857
2858 /*
2859 * Functions used in the extent read, allocate and remove paths
2860 */
2861
2862 /*
2863 * Adjust the size of the new extent based on di_extsize and rt extsize.
2864 */
2865 int
2866 xfs_bmap_extsize_align(
2867 xfs_mount_t *mp,
2868 xfs_bmbt_irec_t *gotp, /* next extent pointer */
2869 xfs_bmbt_irec_t *prevp, /* previous extent pointer */
2870 xfs_extlen_t extsz, /* align to this extent size */
2871 int rt, /* is this a realtime inode? */
2872 int eof, /* is extent at end-of-file? */
2873 int delay, /* creating delalloc extent? */
2874 int convert, /* overwriting unwritten extent? */
2875 xfs_fileoff_t *offp, /* in/out: aligned offset */
2876 xfs_extlen_t *lenp) /* in/out: aligned length */
2877 {
2878 xfs_fileoff_t orig_off; /* original offset */
2879 xfs_extlen_t orig_alen; /* original length */
2880 xfs_fileoff_t orig_end; /* original off+len */
2881 xfs_fileoff_t nexto; /* next file offset */
2882 xfs_fileoff_t prevo; /* previous file offset */
2883 xfs_fileoff_t align_off; /* temp for offset */
2884 xfs_extlen_t align_alen; /* temp for length */
2885 xfs_extlen_t temp; /* temp for calculations */
2886
2887 if (convert)
2888 return 0;
2889
2890 orig_off = align_off = *offp;
2891 orig_alen = align_alen = *lenp;
2892 orig_end = orig_off + orig_alen;
2893
2894 /*
2895 * If this request overlaps an existing extent, then don't
2896 * attempt to perform any additional alignment.
2897 */
2898 if (!delay && !eof &&
2899 (orig_off >= gotp->br_startoff) &&
2900 (orig_end <= gotp->br_startoff + gotp->br_blockcount)) {
2901 return 0;
2902 }
2903
2904 /*
2905 * If the file offset is unaligned vs. the extent size
2906 * we need to align it. This will be possible unless
2907 * the file was previously written with a kernel that didn't
2908 * perform this alignment, or if a truncate shot us in the
2909 * foot.
2910 */
2911 temp = do_mod(orig_off, extsz);
2912 if (temp) {
2913 align_alen += temp;
2914 align_off -= temp;
2915 }
2916
2917 /* Same adjustment for the end of the requested area. */
2918 temp = (align_alen % extsz);
2919 if (temp)
2920 align_alen += extsz - temp;
2921
2922 /*
2923 * For large extent hint sizes, the aligned extent might be larger than
2924 * MAXEXTLEN. In that case, reduce the size by an extsz so that it pulls
2925 * the length back under MAXEXTLEN. The outer allocation loops handle
2926 * short allocation just fine, so it is safe to do this. We only want to
2927 * do it when we are forced to, though, because it means more allocation
2928 * operations are required.
2929 */
2930 while (align_alen > MAXEXTLEN)
2931 align_alen -= extsz;
2932 ASSERT(align_alen <= MAXEXTLEN);
2933
2934 /*
2935 * If the previous block overlaps with this proposed allocation
2936 * then move the start forward without adjusting the length.
2937 */
2938 if (prevp->br_startoff != NULLFILEOFF) {
2939 if (prevp->br_startblock == HOLESTARTBLOCK)
2940 prevo = prevp->br_startoff;
2941 else
2942 prevo = prevp->br_startoff + prevp->br_blockcount;
2943 } else
2944 prevo = 0;
2945 if (align_off != orig_off && align_off < prevo)
2946 align_off = prevo;
2947 /*
2948 * If the next block overlaps with this proposed allocation
2949 * then move the start back without adjusting the length,
2950 * but not before offset 0.
2951 * This may of course make the start overlap previous block,
2952 * and if we hit the offset 0 limit then the next block
2953 * can still overlap too.
2954 */
2955 if (!eof && gotp->br_startoff != NULLFILEOFF) {
2956 if ((delay && gotp->br_startblock == HOLESTARTBLOCK) ||
2957 (!delay && gotp->br_startblock == DELAYSTARTBLOCK))
2958 nexto = gotp->br_startoff + gotp->br_blockcount;
2959 else
2960 nexto = gotp->br_startoff;
2961 } else
2962 nexto = NULLFILEOFF;
2963 if (!eof &&
2964 align_off + align_alen != orig_end &&
2965 align_off + align_alen > nexto)
2966 align_off = nexto > align_alen ? nexto - align_alen : 0;
2967 /*
2968 * If we're now overlapping the next or previous extent that
2969 * means we can't fit an extsz piece in this hole. Just move
2970 * the start forward to the first valid spot and set
2971 * the length so we hit the end.
2972 */
2973 if (align_off != orig_off && align_off < prevo)
2974 align_off = prevo;
2975 if (align_off + align_alen != orig_end &&
2976 align_off + align_alen > nexto &&
2977 nexto != NULLFILEOFF) {
2978 ASSERT(nexto > prevo);
2979 align_alen = nexto - align_off;
2980 }
2981
2982 /*
2983 * If realtime, and the result isn't a multiple of the realtime
2984 * extent size we need to remove blocks until it is.
2985 */
2986 if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) {
2987 /*
2988 * We're not covering the original request, or
2989 * we won't be able to once we fix the length.
2990 */
2991 if (orig_off < align_off ||
2992 orig_end > align_off + align_alen ||
2993 align_alen - temp < orig_alen)
2994 return -EINVAL;
2995 /*
2996 * Try to fix it by moving the start up.
2997 */
2998 if (align_off + temp <= orig_off) {
2999 align_alen -= temp;
3000 align_off += temp;
3001 }
3002 /*
3003 * Try to fix it by moving the end in.
3004 */
3005 else if (align_off + align_alen - temp >= orig_end)
3006 align_alen -= temp;
3007 /*
3008 * Set the start to the minimum then trim the length.
3009 */
3010 else {
3011 align_alen -= orig_off - align_off;
3012 align_off = orig_off;
3013 align_alen -= align_alen % mp->m_sb.sb_rextsize;
3014 }
3015 /*
3016 * Result doesn't cover the request, fail it.
3017 */
3018 if (orig_off < align_off || orig_end > align_off + align_alen)
3019 return -EINVAL;
3020 } else {
3021 ASSERT(orig_off >= align_off);
3022 /* see MAXEXTLEN handling above */
3023 ASSERT(orig_end <= align_off + align_alen ||
3024 align_alen + extsz > MAXEXTLEN);
3025 }
3026
3027 #ifdef DEBUG
3028 if (!eof && gotp->br_startoff != NULLFILEOFF)
3029 ASSERT(align_off + align_alen <= gotp->br_startoff);
3030 if (prevp->br_startoff != NULLFILEOFF)
3031 ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount);
3032 #endif
3033
3034 *lenp = align_alen;
3035 *offp = align_off;
3036 return 0;
3037 }
3038
3039 #define XFS_ALLOC_GAP_UNITS 4
3040
3041 void
3042 xfs_bmap_adjacent(
3043 struct xfs_bmalloca *ap) /* bmap alloc argument struct */
3044 {
3045 xfs_fsblock_t adjust; /* adjustment to block numbers */
3046 xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */
3047 xfs_mount_t *mp; /* mount point structure */
3048 int nullfb; /* true if ap->firstblock isn't set */
3049 int rt; /* true if inode is realtime */
3050
3051 #define ISVALID(x,y) \
3052 (rt ? \
3053 (x) < mp->m_sb.sb_rblocks : \
3054 XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \
3055 XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \
3056 XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks)
3057
3058 mp = ap->ip->i_mount;
3059 nullfb = *ap->firstblock == NULLFSBLOCK;
3060 rt = XFS_IS_REALTIME_INODE(ap->ip) &&
3061 xfs_alloc_is_userdata(ap->datatype);
3062 fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
3063 /*
3064 * If allocating at eof, and there's a previous real block,
3065 * try to use its last block as our starting point.
3066 */
3067 if (ap->eof && ap->prev.br_startoff != NULLFILEOFF &&
3068 !isnullstartblock(ap->prev.br_startblock) &&
3069 ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount,
3070 ap->prev.br_startblock)) {
3071 ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount;
3072 /*
3073 * Adjust for the gap between prevp and us.
3074 */
3075 adjust = ap->offset -
3076 (ap->prev.br_startoff + ap->prev.br_blockcount);
3077 if (adjust &&
3078 ISVALID(ap->blkno + adjust, ap->prev.br_startblock))
3079 ap->blkno += adjust;
3080 }
3081 /*
3082 * If not at eof, then compare the two neighbor blocks.
3083 * Figure out whether either one gives us a good starting point,
3084 * and pick the better one.
3085 */
3086 else if (!ap->eof) {
3087 xfs_fsblock_t gotbno; /* right side block number */
3088 xfs_fsblock_t gotdiff=0; /* right side difference */
3089 xfs_fsblock_t prevbno; /* left side block number */
3090 xfs_fsblock_t prevdiff=0; /* left side difference */
3091
3092 /*
3093 * If there's a previous (left) block, select a requested
3094 * start block based on it.
3095 */
3096 if (ap->prev.br_startoff != NULLFILEOFF &&
3097 !isnullstartblock(ap->prev.br_startblock) &&
3098 (prevbno = ap->prev.br_startblock +
3099 ap->prev.br_blockcount) &&
3100 ISVALID(prevbno, ap->prev.br_startblock)) {
3101 /*
3102 * Calculate gap to end of previous block.
3103 */
3104 adjust = prevdiff = ap->offset -
3105 (ap->prev.br_startoff +
3106 ap->prev.br_blockcount);
3107 /*
3108 * Figure the startblock based on the previous block's
3109 * end and the gap size.
3110 * Heuristic!
3111 * If the gap is large relative to the piece we're
3112 * allocating, or using it gives us an invalid block
3113 * number, then just use the end of the previous block.
3114 */
3115 if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3116 ISVALID(prevbno + prevdiff,
3117 ap->prev.br_startblock))
3118 prevbno += adjust;
3119 else
3120 prevdiff += adjust;
3121 /*
3122 * If the firstblock forbids it, can't use it,
3123 * must use default.
3124 */
3125 if (!rt && !nullfb &&
3126 XFS_FSB_TO_AGNO(mp, prevbno) != fb_agno)
3127 prevbno = NULLFSBLOCK;
3128 }
3129 /*
3130 * No previous block or can't follow it, just default.
3131 */
3132 else
3133 prevbno = NULLFSBLOCK;
3134 /*
3135 * If there's a following (right) block, select a requested
3136 * start block based on it.
3137 */
3138 if (!isnullstartblock(ap->got.br_startblock)) {
3139 /*
3140 * Calculate gap to start of next block.
3141 */
3142 adjust = gotdiff = ap->got.br_startoff - ap->offset;
3143 /*
3144 * Figure the startblock based on the next block's
3145 * start and the gap size.
3146 */
3147 gotbno = ap->got.br_startblock;
3148 /*
3149 * Heuristic!
3150 * If the gap is large relative to the piece we're
3151 * allocating, or using it gives us an invalid block
3152 * number, then just use the start of the next block
3153 * offset by our length.
3154 */
3155 if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3156 ISVALID(gotbno - gotdiff, gotbno))
3157 gotbno -= adjust;
3158 else if (ISVALID(gotbno - ap->length, gotbno)) {
3159 gotbno -= ap->length;
3160 gotdiff += adjust - ap->length;
3161 } else
3162 gotdiff += adjust;
3163 /*
3164 * If the firstblock forbids it, can't use it,
3165 * must use default.
3166 */
3167 if (!rt && !nullfb &&
3168 XFS_FSB_TO_AGNO(mp, gotbno) != fb_agno)
3169 gotbno = NULLFSBLOCK;
3170 }
3171 /*
3172 * No next block, just default.
3173 */
3174 else
3175 gotbno = NULLFSBLOCK;
3176 /*
3177 * If both valid, pick the better one, else the only good
3178 * one, else ap->blkno is already set (to 0 or the inode block).
3179 */
3180 if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK)
3181 ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno;
3182 else if (prevbno != NULLFSBLOCK)
3183 ap->blkno = prevbno;
3184 else if (gotbno != NULLFSBLOCK)
3185 ap->blkno = gotbno;
3186 }
3187 #undef ISVALID
3188 }
3189
3190 static int
3191 xfs_bmap_longest_free_extent(
3192 struct xfs_trans *tp,
3193 xfs_agnumber_t ag,
3194 xfs_extlen_t *blen,
3195 int *notinit)
3196 {
3197 struct xfs_mount *mp = tp->t_mountp;
3198 struct xfs_perag *pag;
3199 xfs_extlen_t longest;
3200 int error = 0;
3201
3202 pag = xfs_perag_get(mp, ag);
3203 if (!pag->pagf_init) {
3204 error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK);
3205 if (error)
3206 goto out;
3207
3208 if (!pag->pagf_init) {
3209 *notinit = 1;
3210 goto out;
3211 }
3212 }
3213
3214 longest = xfs_alloc_longest_free_extent(mp, pag,
3215 xfs_alloc_min_freelist(mp, pag),
3216 xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE));
3217 if (*blen < longest)
3218 *blen = longest;
3219
3220 out:
3221 xfs_perag_put(pag);
3222 return error;
3223 }
3224
3225 static void
3226 xfs_bmap_select_minlen(
3227 struct xfs_bmalloca *ap,
3228 struct xfs_alloc_arg *args,
3229 xfs_extlen_t *blen,
3230 int notinit)
3231 {
3232 if (notinit || *blen < ap->minlen) {
3233 /*
3234 * Since we did a BUF_TRYLOCK above, it is possible that
3235 * there is space for this request.
3236 */
3237 args->minlen = ap->minlen;
3238 } else if (*blen < args->maxlen) {
3239 /*
3240 * If the best seen length is less than the request length,
3241 * use the best as the minimum.
3242 */
3243 args->minlen = *blen;
3244 } else {
3245 /*
3246 * Otherwise we've seen an extent as big as maxlen, use that
3247 * as the minimum.
3248 */
3249 args->minlen = args->maxlen;
3250 }
3251 }
3252
3253 STATIC int
3254 xfs_bmap_btalloc_nullfb(
3255 struct xfs_bmalloca *ap,
3256 struct xfs_alloc_arg *args,
3257 xfs_extlen_t *blen)
3258 {
3259 struct xfs_mount *mp = ap->ip->i_mount;
3260 xfs_agnumber_t ag, startag;
3261 int notinit = 0;
3262 int error;
3263
3264 args->type = XFS_ALLOCTYPE_START_BNO;
3265 args->total = ap->total;
3266
3267 startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
3268 if (startag == NULLAGNUMBER)
3269 startag = ag = 0;
3270
3271 while (*blen < args->maxlen) {
3272 error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
3273 &notinit);
3274 if (error)
3275 return error;
3276
3277 if (++ag == mp->m_sb.sb_agcount)
3278 ag = 0;
3279 if (ag == startag)
3280 break;
3281 }
3282
3283 xfs_bmap_select_minlen(ap, args, blen, notinit);
3284 return 0;
3285 }
3286
3287 STATIC int
3288 xfs_bmap_btalloc_filestreams(
3289 struct xfs_bmalloca *ap,
3290 struct xfs_alloc_arg *args,
3291 xfs_extlen_t *blen)
3292 {
3293 struct xfs_mount *mp = ap->ip->i_mount;
3294 xfs_agnumber_t ag;
3295 int notinit = 0;
3296 int error;
3297
3298 args->type = XFS_ALLOCTYPE_NEAR_BNO;
3299 args->total = ap->total;
3300
3301 ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
3302 if (ag == NULLAGNUMBER)
3303 ag = 0;
3304
3305 error = xfs_bmap_longest_free_extent(args->tp, ag, blen, &notinit);
3306 if (error)
3307 return error;
3308
3309 if (*blen < args->maxlen) {
3310 error = xfs_filestream_new_ag(ap, &ag);
3311 if (error)
3312 return error;
3313
3314 error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
3315 &notinit);
3316 if (error)
3317 return error;
3318
3319 }
3320
3321 xfs_bmap_select_minlen(ap, args, blen, notinit);
3322
3323 /*
3324 * Set the failure fallback case to look in the selected AG as stream
3325 * may have moved.
3326 */
3327 ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
3328 return 0;
3329 }
3330
3331 STATIC int
3332 xfs_bmap_btalloc(
3333 struct xfs_bmalloca *ap) /* bmap alloc argument struct */
3334 {
3335 xfs_mount_t *mp; /* mount point structure */
3336 xfs_alloctype_t atype = 0; /* type for allocation routines */
3337 xfs_extlen_t align = 0; /* minimum allocation alignment */
3338 xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */
3339 xfs_agnumber_t ag;
3340 xfs_alloc_arg_t args;
3341 xfs_extlen_t blen;
3342 xfs_extlen_t nextminlen = 0;
3343 int nullfb; /* true if ap->firstblock isn't set */
3344 int isaligned;
3345 int tryagain;
3346 int error;
3347 int stripe_align;
3348
3349 ASSERT(ap->length);
3350
3351 mp = ap->ip->i_mount;
3352
3353 /* stripe alignment for allocation is determined by mount parameters */
3354 stripe_align = 0;
3355 if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC))
3356 stripe_align = mp->m_swidth;
3357 else if (mp->m_dalign)
3358 stripe_align = mp->m_dalign;
3359
3360 if (ap->flags & XFS_BMAPI_COWFORK)
3361 align = xfs_get_cowextsz_hint(ap->ip);
3362 else if (xfs_alloc_is_userdata(ap->datatype))
3363 align = xfs_get_extsz_hint(ap->ip);
3364 if (align) {
3365 error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
3366 align, 0, ap->eof, 0, ap->conv,
3367 &ap->offset, &ap->length);
3368 ASSERT(!error);
3369 ASSERT(ap->length);
3370 }
3371
3372
3373 nullfb = *ap->firstblock == NULLFSBLOCK;
3374 fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
3375 if (nullfb) {
3376 if (xfs_alloc_is_userdata(ap->datatype) &&
3377 xfs_inode_is_filestream(ap->ip)) {
3378 ag = xfs_filestream_lookup_ag(ap->ip);
3379 ag = (ag != NULLAGNUMBER) ? ag : 0;
3380 ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0);
3381 } else {
3382 ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
3383 }
3384 } else
3385 ap->blkno = *ap->firstblock;
3386
3387 xfs_bmap_adjacent(ap);
3388
3389 /*
3390 * If allowed, use ap->blkno; otherwise must use firstblock since
3391 * it's in the right allocation group.
3392 */
3393 if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno)
3394 ;
3395 else
3396 ap->blkno = *ap->firstblock;
3397 /*
3398 * Normal allocation, done through xfs_alloc_vextent.
3399 */
3400 tryagain = isaligned = 0;
3401 memset(&args, 0, sizeof(args));
3402 args.tp = ap->tp;
3403 args.mp = mp;
3404 args.fsbno = ap->blkno;
3405 xfs_rmap_skip_owner_update(&args.oinfo);
3406
3407 /* Trim the allocation back to the maximum an AG can fit. */
3408 args.maxlen = MIN(ap->length, mp->m_ag_max_usable);
3409 args.firstblock = *ap->firstblock;
3410 blen = 0;
3411 if (nullfb) {
3412 /*
3413 * Search for an allocation group with a single extent large
3414 * enough for the request. If one isn't found, then adjust
3415 * the minimum allocation size to the largest space found.
3416 */
3417 if (xfs_alloc_is_userdata(ap->datatype) &&
3418 xfs_inode_is_filestream(ap->ip))
3419 error = xfs_bmap_btalloc_filestreams(ap, &args, &blen);
3420 else
3421 error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
3422 if (error)
3423 return error;
3424 } else if (ap->dfops->dop_low) {
3425 if (xfs_inode_is_filestream(ap->ip))
3426 args.type = XFS_ALLOCTYPE_FIRST_AG;
3427 else
3428 args.type = XFS_ALLOCTYPE_START_BNO;
3429 args.total = args.minlen = ap->minlen;
3430 } else {
3431 args.type = XFS_ALLOCTYPE_NEAR_BNO;
3432 args.total = ap->total;
3433 args.minlen = ap->minlen;
3434 }
3435 /* apply extent size hints if obtained earlier */
3436 if (align) {
3437 args.prod = align;
3438 if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod)))
3439 args.mod = (xfs_extlen_t)(args.prod - args.mod);
3440 } else if (mp->m_sb.sb_blocksize >= PAGE_SIZE) {
3441 args.prod = 1;
3442 args.mod = 0;
3443 } else {
3444 args.prod = PAGE_SIZE >> mp->m_sb.sb_blocklog;
3445 if ((args.mod = (xfs_extlen_t)(do_mod(ap->offset, args.prod))))
3446 args.mod = (xfs_extlen_t)(args.prod - args.mod);
3447 }
3448 /*
3449 * If we are not low on available data blocks, and the
3450 * underlying logical volume manager is a stripe, and
3451 * the file offset is zero then try to allocate data
3452 * blocks on stripe unit boundary.
3453 * NOTE: ap->aeof is only set if the allocation length
3454 * is >= the stripe unit and the allocation offset is
3455 * at the end of file.
3456 */
3457 if (!ap->dfops->dop_low && ap->aeof) {
3458 if (!ap->offset) {
3459 args.alignment = stripe_align;
3460 atype = args.type;
3461 isaligned = 1;
3462 /*
3463 * Adjust for alignment
3464 */
3465 if (blen > args.alignment && blen <= args.maxlen)
3466 args.minlen = blen - args.alignment;
3467 args.minalignslop = 0;
3468 } else {
3469 /*
3470 * First try an exact bno allocation.
3471 * If it fails then do a near or start bno
3472 * allocation with alignment turned on.
3473 */
3474 atype = args.type;
3475 tryagain = 1;
3476 args.type = XFS_ALLOCTYPE_THIS_BNO;
3477 args.alignment = 1;
3478 /*
3479 * Compute the minlen+alignment for the
3480 * next case. Set slop so that the value
3481 * of minlen+alignment+slop doesn't go up
3482 * between the calls.
3483 */
3484 if (blen > stripe_align && blen <= args.maxlen)
3485 nextminlen = blen - stripe_align;
3486 else
3487 nextminlen = args.minlen;
3488 if (nextminlen + stripe_align > args.minlen + 1)
3489 args.minalignslop =
3490 nextminlen + stripe_align -
3491 args.minlen - 1;
3492 else
3493 args.minalignslop = 0;
3494 }
3495 } else {
3496 args.alignment = 1;
3497 args.minalignslop = 0;
3498 }
3499 args.minleft = ap->minleft;
3500 args.wasdel = ap->wasdel;
3501 args.resv = XFS_AG_RESV_NONE;
3502 args.datatype = ap->datatype;
3503 if (ap->datatype & XFS_ALLOC_USERDATA_ZERO)
3504 args.ip = ap->ip;
3505
3506 error = xfs_alloc_vextent(&args);
3507 if (error)
3508 return error;
3509
3510 if (tryagain && args.fsbno == NULLFSBLOCK) {
3511 /*
3512 * Exact allocation failed. Now try with alignment
3513 * turned on.
3514 */
3515 args.type = atype;
3516 args.fsbno = ap->blkno;
3517 args.alignment = stripe_align;
3518 args.minlen = nextminlen;
3519 args.minalignslop = 0;
3520 isaligned = 1;
3521 if ((error = xfs_alloc_vextent(&args)))
3522 return error;
3523 }
3524 if (isaligned && args.fsbno == NULLFSBLOCK) {
3525 /*
3526 * allocation failed, so turn off alignment and
3527 * try again.
3528 */
3529 args.type = atype;
3530 args.fsbno = ap->blkno;
3531 args.alignment = 0;
3532 if ((error = xfs_alloc_vextent(&args)))
3533 return error;
3534 }
3535 if (args.fsbno == NULLFSBLOCK && nullfb &&
3536 args.minlen > ap->minlen) {
3537 args.minlen = ap->minlen;
3538 args.type = XFS_ALLOCTYPE_START_BNO;
3539 args.fsbno = ap->blkno;
3540 if ((error = xfs_alloc_vextent(&args)))
3541 return error;
3542 }
3543 if (args.fsbno == NULLFSBLOCK && nullfb) {
3544 args.fsbno = 0;
3545 args.type = XFS_ALLOCTYPE_FIRST_AG;
3546 args.total = ap->minlen;
3547 if ((error = xfs_alloc_vextent(&args)))
3548 return error;
3549 ap->dfops->dop_low = true;
3550 }
3551 if (args.fsbno != NULLFSBLOCK) {
3552 /*
3553 * check the allocation happened at the same or higher AG than
3554 * the first block that was allocated.
3555 */
3556 ASSERT(*ap->firstblock == NULLFSBLOCK ||
3557 XFS_FSB_TO_AGNO(mp, *ap->firstblock) <=
3558 XFS_FSB_TO_AGNO(mp, args.fsbno));
3559
3560 ap->blkno = args.fsbno;
3561 if (*ap->firstblock == NULLFSBLOCK)
3562 *ap->firstblock = args.fsbno;
3563 ASSERT(nullfb || fb_agno <= args.agno);
3564 ap->length = args.len;
3565 if (!(ap->flags & XFS_BMAPI_COWFORK))
3566 ap->ip->i_d.di_nblocks += args.len;
3567 xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
3568 if (ap->wasdel)
3569 ap->ip->i_delayed_blks -= args.len;
3570 /*
3571 * Adjust the disk quota also. This was reserved
3572 * earlier.
3573 */
3574 xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
3575 ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT :
3576 XFS_TRANS_DQ_BCOUNT,
3577 (long) args.len);
3578 } else {
3579 ap->blkno = NULLFSBLOCK;
3580 ap->length = 0;
3581 }
3582 return 0;
3583 }
3584
3585 /*
3586 * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
3587 * It figures out where to ask the underlying allocator to put the new extent.
3588 */
3589 STATIC int
3590 xfs_bmap_alloc(
3591 struct xfs_bmalloca *ap) /* bmap alloc argument struct */
3592 {
3593 if (XFS_IS_REALTIME_INODE(ap->ip) &&
3594 xfs_alloc_is_userdata(ap->datatype))
3595 return xfs_bmap_rtalloc(ap);
3596 return xfs_bmap_btalloc(ap);
3597 }
3598
3599 /* Trim extent to fit a logical block range. */
3600 void
3601 xfs_trim_extent(
3602 struct xfs_bmbt_irec *irec,
3603 xfs_fileoff_t bno,
3604 xfs_filblks_t len)
3605 {
3606 xfs_fileoff_t distance;
3607 xfs_fileoff_t end = bno + len;
3608
3609 if (irec->br_startoff + irec->br_blockcount <= bno ||
3610 irec->br_startoff >= end) {
3611 irec->br_blockcount = 0;
3612 return;
3613 }
3614
3615 if (irec->br_startoff < bno) {
3616 distance = bno - irec->br_startoff;
3617 if (isnullstartblock(irec->br_startblock))
3618 irec->br_startblock = DELAYSTARTBLOCK;
3619 if (irec->br_startblock != DELAYSTARTBLOCK &&
3620 irec->br_startblock != HOLESTARTBLOCK)
3621 irec->br_startblock += distance;
3622 irec->br_startoff += distance;
3623 irec->br_blockcount -= distance;
3624 }
3625
3626 if (end < irec->br_startoff + irec->br_blockcount) {
3627 distance = irec->br_startoff + irec->br_blockcount - end;
3628 irec->br_blockcount -= distance;
3629 }
3630 }
3631
3632 /* trim extent to within eof */
3633 void
3634 xfs_trim_extent_eof(
3635 struct xfs_bmbt_irec *irec,
3636 struct xfs_inode *ip)
3637
3638 {
3639 xfs_trim_extent(irec, 0, XFS_B_TO_FSB(ip->i_mount,
3640 i_size_read(VFS_I(ip))));
3641 }
3642
3643 /*
3644 * Trim the returned map to the required bounds
3645 */
3646 STATIC void
3647 xfs_bmapi_trim_map(
3648 struct xfs_bmbt_irec *mval,
3649 struct xfs_bmbt_irec *got,
3650 xfs_fileoff_t *bno,
3651 xfs_filblks_t len,
3652 xfs_fileoff_t obno,
3653 xfs_fileoff_t end,
3654 int n,
3655 int flags)
3656 {
3657 if ((flags & XFS_BMAPI_ENTIRE) ||
3658 got->br_startoff + got->br_blockcount <= obno) {
3659 *mval = *got;
3660 if (isnullstartblock(got->br_startblock))
3661 mval->br_startblock = DELAYSTARTBLOCK;
3662 return;
3663 }
3664
3665 if (obno > *bno)
3666 *bno = obno;
3667 ASSERT((*bno >= obno) || (n == 0));
3668 ASSERT(*bno < end);
3669 mval->br_startoff = *bno;
3670 if (isnullstartblock(got->br_startblock))
3671 mval->br_startblock = DELAYSTARTBLOCK;
3672 else
3673 mval->br_startblock = got->br_startblock +
3674 (*bno - got->br_startoff);
3675 /*
3676 * Return the minimum of what we got and what we asked for for
3677 * the length. We can use the len variable here because it is
3678 * modified below and we could have been there before coming
3679 * here if the first part of the allocation didn't overlap what
3680 * was asked for.
3681 */
3682 mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno,
3683 got->br_blockcount - (*bno - got->br_startoff));
3684 mval->br_state = got->br_state;
3685 ASSERT(mval->br_blockcount <= len);
3686 return;
3687 }
3688
3689 /*
3690 * Update and validate the extent map to return
3691 */
3692 STATIC void
3693 xfs_bmapi_update_map(
3694 struct xfs_bmbt_irec **map,
3695 xfs_fileoff_t *bno,
3696 xfs_filblks_t *len,
3697 xfs_fileoff_t obno,
3698 xfs_fileoff_t end,
3699 int *n,
3700 int flags)
3701 {
3702 xfs_bmbt_irec_t *mval = *map;
3703
3704 ASSERT((flags & XFS_BMAPI_ENTIRE) ||
3705 ((mval->br_startoff + mval->br_blockcount) <= end));
3706 ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) ||
3707 (mval->br_startoff < obno));
3708
3709 *bno = mval->br_startoff + mval->br_blockcount;
3710 *len = end - *bno;
3711 if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) {
3712 /* update previous map with new information */
3713 ASSERT(mval->br_startblock == mval[-1].br_startblock);
3714 ASSERT(mval->br_blockcount > mval[-1].br_blockcount);
3715 ASSERT(mval->br_state == mval[-1].br_state);
3716 mval[-1].br_blockcount = mval->br_blockcount;
3717 mval[-1].br_state = mval->br_state;
3718 } else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK &&
3719 mval[-1].br_startblock != DELAYSTARTBLOCK &&
3720 mval[-1].br_startblock != HOLESTARTBLOCK &&
3721 mval->br_startblock == mval[-1].br_startblock +
3722 mval[-1].br_blockcount &&
3723 ((flags & XFS_BMAPI_IGSTATE) ||
3724 mval[-1].br_state == mval->br_state)) {
3725 ASSERT(mval->br_startoff ==
3726 mval[-1].br_startoff + mval[-1].br_blockcount);
3727 mval[-1].br_blockcount += mval->br_blockcount;
3728 } else if (*n > 0 &&
3729 mval->br_startblock == DELAYSTARTBLOCK &&
3730 mval[-1].br_startblock == DELAYSTARTBLOCK &&
3731 mval->br_startoff ==
3732 mval[-1].br_startoff + mval[-1].br_blockcount) {
3733 mval[-1].br_blockcount += mval->br_blockcount;
3734 mval[-1].br_state = mval->br_state;
3735 } else if (!((*n == 0) &&
3736 ((mval->br_startoff + mval->br_blockcount) <=
3737 obno))) {
3738 mval++;
3739 (*n)++;
3740 }
3741 *map = mval;
3742 }
3743
3744 /*
3745 * Map file blocks to filesystem blocks without allocation.
3746 */
3747 int
3748 xfs_bmapi_read(
3749 struct xfs_inode *ip,
3750 xfs_fileoff_t bno,
3751 xfs_filblks_t len,
3752 struct xfs_bmbt_irec *mval,
3753 int *nmap,
3754 int flags)
3755 {
3756 struct xfs_mount *mp = ip->i_mount;
3757 struct xfs_ifork *ifp;
3758 struct xfs_bmbt_irec got;
3759 xfs_fileoff_t obno;
3760 xfs_fileoff_t end;
3761 struct xfs_iext_cursor icur;
3762 int error;
3763 bool eof = false;
3764 int n = 0;
3765 int whichfork = xfs_bmapi_whichfork(flags);
3766
3767 ASSERT(*nmap >= 1);
3768 ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE|
3769 XFS_BMAPI_IGSTATE|XFS_BMAPI_COWFORK)));
3770 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL));
3771
3772 if (unlikely(XFS_TEST_ERROR(
3773 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
3774 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
3775 mp, XFS_ERRTAG_BMAPIFORMAT))) {
3776 XFS_ERROR_REPORT("xfs_bmapi_read", XFS_ERRLEVEL_LOW, mp);
3777 return -EFSCORRUPTED;
3778 }
3779
3780 if (XFS_FORCED_SHUTDOWN(mp))
3781 return -EIO;
3782
3783 XFS_STATS_INC(mp, xs_blk_mapr);
3784
3785 ifp = XFS_IFORK_PTR(ip, whichfork);
3786
3787 /* No CoW fork? Return a hole. */
3788 if (whichfork == XFS_COW_FORK && !ifp) {
3789 mval->br_startoff = bno;
3790 mval->br_startblock = HOLESTARTBLOCK;
3791 mval->br_blockcount = len;
3792 mval->br_state = XFS_EXT_NORM;
3793 *nmap = 1;
3794 return 0;
3795 }
3796
3797 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
3798 error = xfs_iread_extents(NULL, ip, whichfork);
3799 if (error)
3800 return error;
3801 }
3802
3803 if (!xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got))
3804 eof = true;
3805 end = bno + len;
3806 obno = bno;
3807
3808 while (bno < end && n < *nmap) {
3809 /* Reading past eof, act as though there's a hole up to end. */
3810 if (eof)
3811 got.br_startoff = end;
3812 if (got.br_startoff > bno) {
3813 /* Reading in a hole. */
3814 mval->br_startoff = bno;
3815 mval->br_startblock = HOLESTARTBLOCK;
3816 mval->br_blockcount =
3817 XFS_FILBLKS_MIN(len, got.br_startoff - bno);
3818 mval->br_state = XFS_EXT_NORM;
3819 bno += mval->br_blockcount;
3820 len -= mval->br_blockcount;
3821 mval++;
3822 n++;
3823 continue;
3824 }
3825
3826 /* set up the extent map to return. */
3827 xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
3828 xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
3829
3830 /* If we're done, stop now. */
3831 if (bno >= end || n >= *nmap)
3832 break;
3833
3834 /* Else go on to the next record. */
3835 if (!xfs_iext_next_extent(ifp, &icur, &got))
3836 eof = true;
3837 }
3838 *nmap = n;
3839 return 0;
3840 }
3841
3842 /*
3843 * Add a delayed allocation extent to an inode. Blocks are reserved from the
3844 * global pool and the extent inserted into the inode in-core extent tree.
3845 *
3846 * On entry, got refers to the first extent beyond the offset of the extent to
3847 * allocate or eof is specified if no such extent exists. On return, got refers
3848 * to the extent record that was inserted to the inode fork.
3849 *
3850 * Note that the allocated extent may have been merged with contiguous extents
3851 * during insertion into the inode fork. Thus, got does not reflect the current
3852 * state of the inode fork on return. If necessary, the caller can use lastx to
3853 * look up the updated record in the inode fork.
3854 */
3855 int
3856 xfs_bmapi_reserve_delalloc(
3857 struct xfs_inode *ip,
3858 int whichfork,
3859 xfs_fileoff_t off,
3860 xfs_filblks_t len,
3861 xfs_filblks_t prealloc,
3862 struct xfs_bmbt_irec *got,
3863 struct xfs_iext_cursor *icur,
3864 int eof)
3865 {
3866 struct xfs_mount *mp = ip->i_mount;
3867 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
3868 xfs_extlen_t alen;
3869 xfs_extlen_t indlen;
3870 char rt = XFS_IS_REALTIME_INODE(ip);
3871 xfs_extlen_t extsz;
3872 int error;
3873 xfs_fileoff_t aoff = off;
3874
3875 /*
3876 * Cap the alloc length. Keep track of prealloc so we know whether to
3877 * tag the inode before we return.
3878 */
3879 alen = XFS_FILBLKS_MIN(len + prealloc, MAXEXTLEN);
3880 if (!eof)
3881 alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
3882 if (prealloc && alen >= len)
3883 prealloc = alen - len;
3884
3885 /* Figure out the extent size, adjust alen */
3886 if (whichfork == XFS_COW_FORK)
3887 extsz = xfs_get_cowextsz_hint(ip);
3888 else
3889 extsz = xfs_get_extsz_hint(ip);
3890 if (extsz) {
3891 struct xfs_bmbt_irec prev;
3892
3893 if (!xfs_iext_peek_prev_extent(ifp, icur, &prev))
3894 prev.br_startoff = NULLFILEOFF;
3895
3896 error = xfs_bmap_extsize_align(mp, got, &prev, extsz, rt, eof,
3897 1, 0, &aoff, &alen);
3898 ASSERT(!error);
3899 }
3900
3901 if (rt)
3902 extsz = alen / mp->m_sb.sb_rextsize;
3903
3904 /*
3905 * Make a transaction-less quota reservation for delayed allocation
3906 * blocks. This number gets adjusted later. We return if we haven't
3907 * allocated blocks already inside this loop.
3908 */
3909 error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0,
3910 rt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
3911 if (error)
3912 return error;
3913
3914 /*
3915 * Split changing sb for alen and indlen since they could be coming
3916 * from different places.
3917 */
3918 indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen);
3919 ASSERT(indlen > 0);
3920
3921 if (rt) {
3922 error = xfs_mod_frextents(mp, -((int64_t)extsz));
3923 } else {
3924 error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
3925 }
3926
3927 if (error)
3928 goto out_unreserve_quota;
3929
3930 error = xfs_mod_fdblocks(mp, -((int64_t)indlen), false);
3931 if (error)
3932 goto out_unreserve_blocks;
3933
3934
3935 ip->i_delayed_blks += alen;
3936
3937 got->br_startoff = aoff;
3938 got->br_startblock = nullstartblock(indlen);
3939 got->br_blockcount = alen;
3940 got->br_state = XFS_EXT_NORM;
3941
3942 xfs_bmap_add_extent_hole_delay(ip, whichfork, icur, got);
3943
3944 /*
3945 * Tag the inode if blocks were preallocated. Note that COW fork
3946 * preallocation can occur at the start or end of the extent, even when
3947 * prealloc == 0, so we must also check the aligned offset and length.
3948 */
3949 if (whichfork == XFS_DATA_FORK && prealloc)
3950 xfs_inode_set_eofblocks_tag(ip);
3951 if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))
3952 xfs_inode_set_cowblocks_tag(ip);
3953
3954 return 0;
3955
3956 out_unreserve_blocks:
3957 if (rt)
3958 xfs_mod_frextents(mp, extsz);
3959 else
3960 xfs_mod_fdblocks(mp, alen, false);
3961 out_unreserve_quota:
3962 if (XFS_IS_QUOTA_ON(mp))
3963 xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ?
3964 XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
3965 return error;
3966 }
3967
3968 static int
3969 xfs_bmapi_allocate(
3970 struct xfs_bmalloca *bma)
3971 {
3972 struct xfs_mount *mp = bma->ip->i_mount;
3973 int whichfork = xfs_bmapi_whichfork(bma->flags);
3974 struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
3975 int tmp_logflags = 0;
3976 int error;
3977
3978 ASSERT(bma->length > 0);
3979
3980 /*
3981 * For the wasdelay case, we could also just allocate the stuff asked
3982 * for in this bmap call but that wouldn't be as good.
3983 */
3984 if (bma->wasdel) {
3985 bma->length = (xfs_extlen_t)bma->got.br_blockcount;
3986 bma->offset = bma->got.br_startoff;
3987 xfs_iext_peek_prev_extent(ifp, &bma->icur, &bma->prev);
3988 } else {
3989 bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN);
3990 if (!bma->eof)
3991 bma->length = XFS_FILBLKS_MIN(bma->length,
3992 bma->got.br_startoff - bma->offset);
3993 }
3994
3995 /*
3996 * Set the data type being allocated. For the data fork, the first data
3997 * in the file is treated differently to all other allocations. For the
3998 * attribute fork, we only need to ensure the allocated range is not on
3999 * the busy list.
4000 */
4001 if (!(bma->flags & XFS_BMAPI_METADATA)) {
4002 bma->datatype = XFS_ALLOC_NOBUSY;
4003 if (whichfork == XFS_DATA_FORK) {
4004 if (bma->offset == 0)
4005 bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
4006 else
4007 bma->datatype |= XFS_ALLOC_USERDATA;
4008 }
4009 if (bma->flags & XFS_BMAPI_ZERO)
4010 bma->datatype |= XFS_ALLOC_USERDATA_ZERO;
4011 }
4012
4013 bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
4014
4015 /*
4016 * Only want to do the alignment at the eof if it is userdata and
4017 * allocation length is larger than a stripe unit.
4018 */
4019 if (mp->m_dalign && bma->length >= mp->m_dalign &&
4020 !(bma->flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) {
4021 error = xfs_bmap_isaeof(bma, whichfork);
4022 if (error)
4023 return error;
4024 }
4025
4026 error = xfs_bmap_alloc(bma);
4027 if (error)
4028 return error;
4029
4030 if (bma->cur)
4031 bma->cur->bc_private.b.firstblock = *bma->firstblock;
4032 if (bma->blkno == NULLFSBLOCK)
4033 return 0;
4034 if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
4035 bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork);
4036 bma->cur->bc_private.b.firstblock = *bma->firstblock;
4037 bma->cur->bc_private.b.dfops = bma->dfops;
4038 }
4039 /*
4040 * Bump the number of extents we've allocated
4041 * in this call.
4042 */
4043 bma->nallocs++;
4044
4045 if (bma->cur)
4046 bma->cur->bc_private.b.flags =
4047 bma->wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
4048
4049 bma->got.br_startoff = bma->offset;
4050 bma->got.br_startblock = bma->blkno;
4051 bma->got.br_blockcount = bma->length;
4052 bma->got.br_state = XFS_EXT_NORM;
4053
4054 /*
4055 * In the data fork, a wasdelay extent has been initialized, so
4056 * shouldn't be flagged as unwritten.
4057 *
4058 * For the cow fork, however, we convert delalloc reservations
4059 * (extents allocated for speculative preallocation) to
4060 * allocated unwritten extents, and only convert the unwritten
4061 * extents to real extents when we're about to write the data.
4062 */
4063 if ((!bma->wasdel || (bma->flags & XFS_BMAPI_COWFORK)) &&
4064 (bma->flags & XFS_BMAPI_PREALLOC) &&
4065 xfs_sb_version_hasextflgbit(&mp->m_sb))
4066 bma->got.br_state = XFS_EXT_UNWRITTEN;
4067
4068 if (bma->wasdel)
4069 error = xfs_bmap_add_extent_delay_real(bma, whichfork);
4070 else
4071 error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip,
4072 whichfork, &bma->icur, &bma->cur, &bma->got,
4073 bma->firstblock, bma->dfops, &bma->logflags);
4074
4075 bma->logflags |= tmp_logflags;
4076 if (error)
4077 return error;
4078
4079 /*
4080 * Update our extent pointer, given that xfs_bmap_add_extent_delay_real
4081 * or xfs_bmap_add_extent_hole_real might have merged it into one of
4082 * the neighbouring ones.
4083 */
4084 xfs_iext_get_extent(ifp, &bma->icur, &bma->got);
4085
4086 ASSERT(bma->got.br_startoff <= bma->offset);
4087 ASSERT(bma->got.br_startoff + bma->got.br_blockcount >=
4088 bma->offset + bma->length);
4089 ASSERT(bma->got.br_state == XFS_EXT_NORM ||
4090 bma->got.br_state == XFS_EXT_UNWRITTEN);
4091 return 0;
4092 }
4093
4094 STATIC int
4095 xfs_bmapi_convert_unwritten(
4096 struct xfs_bmalloca *bma,
4097 struct xfs_bmbt_irec *mval,
4098 xfs_filblks_t len,
4099 int flags)
4100 {
4101 int whichfork = xfs_bmapi_whichfork(flags);
4102 struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4103 int tmp_logflags = 0;
4104 int error;
4105
4106 /* check if we need to do unwritten->real conversion */
4107 if (mval->br_state == XFS_EXT_UNWRITTEN &&
4108 (flags & XFS_BMAPI_PREALLOC))
4109 return 0;
4110
4111 /* check if we need to do real->unwritten conversion */
4112 if (mval->br_state == XFS_EXT_NORM &&
4113 (flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) !=
4114 (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
4115 return 0;
4116
4117 /*
4118 * Modify (by adding) the state flag, if writing.
4119 */
4120 ASSERT(mval->br_blockcount <= len);
4121 if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
4122 bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp,
4123 bma->ip, whichfork);
4124 bma->cur->bc_private.b.firstblock = *bma->firstblock;
4125 bma->cur->bc_private.b.dfops = bma->dfops;
4126 }
4127 mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
4128 ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
4129
4130 /*
4131 * Before insertion into the bmbt, zero the range being converted
4132 * if required.
4133 */
4134 if (flags & XFS_BMAPI_ZERO) {
4135 error = xfs_zero_extent(bma->ip, mval->br_startblock,
4136 mval->br_blockcount);
4137 if (error)
4138 return error;
4139 }
4140
4141 error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork,
4142 &bma->icur, &bma->cur, mval, bma->firstblock,
4143 bma->dfops, &tmp_logflags);
4144 /*
4145 * Log the inode core unconditionally in the unwritten extent conversion
4146 * path because the conversion might not have done so (e.g., if the
4147 * extent count hasn't changed). We need to make sure the inode is dirty
4148 * in the transaction for the sake of fsync(), even if nothing has
4149 * changed, because fsync() will not force the log for this transaction
4150 * unless it sees the inode pinned.
4151 *
4152 * Note: If we're only converting cow fork extents, there aren't
4153 * any on-disk updates to make, so we don't need to log anything.
4154 */
4155 if (whichfork != XFS_COW_FORK)
4156 bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
4157 if (error)
4158 return error;
4159
4160 /*
4161 * Update our extent pointer, given that
4162 * xfs_bmap_add_extent_unwritten_real might have merged it into one
4163 * of the neighbouring ones.
4164 */
4165 xfs_iext_get_extent(ifp, &bma->icur, &bma->got);
4166
4167 /*
4168 * We may have combined previously unwritten space with written space,
4169 * so generate another request.
4170 */
4171 if (mval->br_blockcount < len)
4172 return -EAGAIN;
4173 return 0;
4174 }
4175
4176 /*
4177 * Map file blocks to filesystem blocks, and allocate blocks or convert the
4178 * extent state if necessary. Details behaviour is controlled by the flags
4179 * parameter. Only allocates blocks from a single allocation group, to avoid
4180 * locking problems.
4181 *
4182 * The returned value in "firstblock" from the first call in a transaction
4183 * must be remembered and presented to subsequent calls in "firstblock".
4184 * An upper bound for the number of blocks to be allocated is supplied to
4185 * the first call in "total"; if no allocation group has that many free
4186 * blocks then the call will fail (return NULLFSBLOCK in "firstblock").
4187 */
4188 int
4189 xfs_bmapi_write(
4190 struct xfs_trans *tp, /* transaction pointer */
4191 struct xfs_inode *ip, /* incore inode */
4192 xfs_fileoff_t bno, /* starting file offs. mapped */
4193 xfs_filblks_t len, /* length to map in file */
4194 int flags, /* XFS_BMAPI_... */
4195 xfs_fsblock_t *firstblock, /* first allocated block
4196 controls a.g. for allocs */
4197 xfs_extlen_t total, /* total blocks needed */
4198 struct xfs_bmbt_irec *mval, /* output: map values */
4199 int *nmap, /* i/o: mval size/count */
4200 struct xfs_defer_ops *dfops) /* i/o: list extents to free */
4201 {
4202 struct xfs_mount *mp = ip->i_mount;
4203 struct xfs_ifork *ifp;
4204 struct xfs_bmalloca bma = { NULL }; /* args for xfs_bmap_alloc */
4205 xfs_fileoff_t end; /* end of mapped file region */
4206 bool eof = false; /* after the end of extents */
4207 int error; /* error return */
4208 int n; /* current extent index */
4209 xfs_fileoff_t obno; /* old block number (offset) */
4210 int whichfork; /* data or attr fork */
4211
4212 #ifdef DEBUG
4213 xfs_fileoff_t orig_bno; /* original block number value */
4214 int orig_flags; /* original flags arg value */
4215 xfs_filblks_t orig_len; /* original value of len arg */
4216 struct xfs_bmbt_irec *orig_mval; /* original value of mval */
4217 int orig_nmap; /* original value of *nmap */
4218
4219 orig_bno = bno;
4220 orig_len = len;
4221 orig_flags = flags;
4222 orig_mval = mval;
4223 orig_nmap = *nmap;
4224 #endif
4225 whichfork = xfs_bmapi_whichfork(flags);
4226
4227 ASSERT(*nmap >= 1);
4228 ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
4229 ASSERT(!(flags & XFS_BMAPI_IGSTATE));
4230 ASSERT(tp != NULL ||
4231 (flags & (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK)) ==
4232 (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK));
4233 ASSERT(len > 0);
4234 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL);
4235 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4236 ASSERT(!(flags & XFS_BMAPI_REMAP));
4237
4238 /* zeroing is for currently only for data extents, not metadata */
4239 ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
4240 (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO));
4241 /*
4242 * we can allocate unwritten extents or pre-zero allocated blocks,
4243 * but it makes no sense to do both at once. This would result in
4244 * zeroing the unwritten extent twice, but it still being an
4245 * unwritten extent....
4246 */
4247 ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) !=
4248 (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO));
4249
4250 if (unlikely(XFS_TEST_ERROR(
4251 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
4252 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
4253 mp, XFS_ERRTAG_BMAPIFORMAT))) {
4254 XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp);
4255 return -EFSCORRUPTED;
4256 }
4257
4258 if (XFS_FORCED_SHUTDOWN(mp))
4259 return -EIO;
4260
4261 ifp = XFS_IFORK_PTR(ip, whichfork);
4262
4263 XFS_STATS_INC(mp, xs_blk_mapw);
4264
4265 if (*firstblock == NULLFSBLOCK) {
4266 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE)
4267 bma.minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1;
4268 else
4269 bma.minleft = 1;
4270 } else {
4271 bma.minleft = 0;
4272 }
4273
4274 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4275 error = xfs_iread_extents(tp, ip, whichfork);
4276 if (error)
4277 goto error0;
4278 }
4279
4280 n = 0;
4281 end = bno + len;
4282 obno = bno;
4283
4284 if (!xfs_iext_lookup_extent(ip, ifp, bno, &bma.icur, &bma.got))
4285 eof = true;
4286 if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev))
4287 bma.prev.br_startoff = NULLFILEOFF;
4288 bma.tp = tp;
4289 bma.ip = ip;
4290 bma.total = total;
4291 bma.datatype = 0;
4292 bma.dfops = dfops;
4293 bma.firstblock = firstblock;
4294
4295 while (bno < end && n < *nmap) {
4296 bool need_alloc = false, wasdelay = false;
4297
4298 /* in hole or beyoned EOF? */
4299 if (eof || bma.got.br_startoff > bno) {
4300 if (flags & XFS_BMAPI_DELALLOC) {
4301 /*
4302 * For the COW fork we can reasonably get a
4303 * request for converting an extent that races
4304 * with other threads already having converted
4305 * part of it, as there converting COW to
4306 * regular blocks is not protected using the
4307 * IOLOCK.
4308 */
4309 ASSERT(flags & XFS_BMAPI_COWFORK);
4310 if (!(flags & XFS_BMAPI_COWFORK)) {
4311 error = -EIO;
4312 goto error0;
4313 }
4314
4315 if (eof || bno >= end)
4316 break;
4317 } else {
4318 need_alloc = true;
4319 }
4320 } else if (isnullstartblock(bma.got.br_startblock)) {
4321 wasdelay = true;
4322 }
4323
4324 /*
4325 * First, deal with the hole before the allocated space
4326 * that we found, if any.
4327 */
4328 if ((need_alloc || wasdelay) &&
4329 !(flags & XFS_BMAPI_CONVERT_ONLY)) {
4330 bma.eof = eof;
4331 bma.conv = !!(flags & XFS_BMAPI_CONVERT);
4332 bma.wasdel = wasdelay;
4333 bma.offset = bno;
4334 bma.flags = flags;
4335
4336 /*
4337 * There's a 32/64 bit type mismatch between the
4338 * allocation length request (which can be 64 bits in
4339 * length) and the bma length request, which is
4340 * xfs_extlen_t and therefore 32 bits. Hence we have to
4341 * check for 32-bit overflows and handle them here.
4342 */
4343 if (len > (xfs_filblks_t)MAXEXTLEN)
4344 bma.length = MAXEXTLEN;
4345 else
4346 bma.length = len;
4347
4348 ASSERT(len > 0);
4349 ASSERT(bma.length > 0);
4350 error = xfs_bmapi_allocate(&bma);
4351 if (error)
4352 goto error0;
4353 if (bma.blkno == NULLFSBLOCK)
4354 break;
4355
4356 /*
4357 * If this is a CoW allocation, record the data in
4358 * the refcount btree for orphan recovery.
4359 */
4360 if (whichfork == XFS_COW_FORK) {
4361 error = xfs_refcount_alloc_cow_extent(mp, dfops,
4362 bma.blkno, bma.length);
4363 if (error)
4364 goto error0;
4365 }
4366 }
4367
4368 /* Deal with the allocated space we found. */
4369 xfs_bmapi_trim_map(mval, &bma.got, &bno, len, obno,
4370 end, n, flags);
4371
4372 /* Execute unwritten extent conversion if necessary */
4373 error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags);
4374 if (error == -EAGAIN)
4375 continue;
4376 if (error)
4377 goto error0;
4378
4379 /* update the extent map to return */
4380 xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4381
4382 /*
4383 * If we're done, stop now. Stop when we've allocated
4384 * XFS_BMAP_MAX_NMAP extents no matter what. Otherwise
4385 * the transaction may get too big.
4386 */
4387 if (bno >= end || n >= *nmap || bma.nallocs >= *nmap)
4388 break;
4389
4390 /* Else go on to the next record. */
4391 bma.prev = bma.got;
4392 if (!xfs_iext_next_extent(ifp, &bma.icur, &bma.got))
4393 eof = true;
4394 }
4395 *nmap = n;
4396
4397 /*
4398 * Transform from btree to extents, give it cur.
4399 */
4400 if (xfs_bmap_wants_extents(ip, whichfork)) {
4401 int tmp_logflags = 0;
4402
4403 ASSERT(bma.cur);
4404 error = xfs_bmap_btree_to_extents(tp, ip, bma.cur,
4405 &tmp_logflags, whichfork);
4406 bma.logflags |= tmp_logflags;
4407 if (error)
4408 goto error0;
4409 }
4410
4411 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE ||
4412 XFS_IFORK_NEXTENTS(ip, whichfork) >
4413 XFS_IFORK_MAXEXT(ip, whichfork));
4414 error = 0;
4415 error0:
4416 /*
4417 * Log everything. Do this after conversion, there's no point in
4418 * logging the extent records if we've converted to btree format.
4419 */
4420 if ((bma.logflags & xfs_ilog_fext(whichfork)) &&
4421 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
4422 bma.logflags &= ~xfs_ilog_fext(whichfork);
4423 else if ((bma.logflags & xfs_ilog_fbroot(whichfork)) &&
4424 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
4425 bma.logflags &= ~xfs_ilog_fbroot(whichfork);
4426 /*
4427 * Log whatever the flags say, even if error. Otherwise we might miss
4428 * detecting a case where the data is changed, there's an error,
4429 * and it's not logged so we don't shutdown when we should.
4430 */
4431 if (bma.logflags)
4432 xfs_trans_log_inode(tp, ip, bma.logflags);
4433
4434 if (bma.cur) {
4435 if (!error) {
4436 ASSERT(*firstblock == NULLFSBLOCK ||
4437 XFS_FSB_TO_AGNO(mp, *firstblock) <=
4438 XFS_FSB_TO_AGNO(mp,
4439 bma.cur->bc_private.b.firstblock));
4440 *firstblock = bma.cur->bc_private.b.firstblock;
4441 }
4442 xfs_btree_del_cursor(bma.cur,
4443 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
4444 }
4445 if (!error)
4446 xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
4447 orig_nmap, *nmap);
4448 return error;
4449 }
4450
4451 static int
4452 xfs_bmapi_remap(
4453 struct xfs_trans *tp,
4454 struct xfs_inode *ip,
4455 xfs_fileoff_t bno,
4456 xfs_filblks_t len,
4457 xfs_fsblock_t startblock,
4458 struct xfs_defer_ops *dfops)
4459 {
4460 struct xfs_mount *mp = ip->i_mount;
4461 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
4462 struct xfs_btree_cur *cur = NULL;
4463 xfs_fsblock_t firstblock = NULLFSBLOCK;
4464 struct xfs_bmbt_irec got;
4465 struct xfs_iext_cursor icur;
4466 int logflags = 0, error;
4467
4468 ASSERT(len > 0);
4469 ASSERT(len <= (xfs_filblks_t)MAXEXTLEN);
4470 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4471
4472 if (unlikely(XFS_TEST_ERROR(
4473 (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS &&
4474 XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE),
4475 mp, XFS_ERRTAG_BMAPIFORMAT))) {
4476 XFS_ERROR_REPORT("xfs_bmapi_remap", XFS_ERRLEVEL_LOW, mp);
4477 return -EFSCORRUPTED;
4478 }
4479
4480 if (XFS_FORCED_SHUTDOWN(mp))
4481 return -EIO;
4482
4483 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4484 error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
4485 if (error)
4486 return error;
4487 }
4488
4489 if (xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) {
4490 /* make sure we only reflink into a hole. */
4491 ASSERT(got.br_startoff > bno);
4492 ASSERT(got.br_startoff - bno >= len);
4493 }
4494
4495 ip->i_d.di_nblocks += len;
4496 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
4497
4498 if (ifp->if_flags & XFS_IFBROOT) {
4499 cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
4500 cur->bc_private.b.firstblock = firstblock;
4501 cur->bc_private.b.dfops = dfops;
4502 cur->bc_private.b.flags = 0;
4503 }
4504
4505 got.br_startoff = bno;
4506 got.br_startblock = startblock;
4507 got.br_blockcount = len;
4508 got.br_state = XFS_EXT_NORM;
4509
4510 error = xfs_bmap_add_extent_hole_real(tp, ip, XFS_DATA_FORK, &icur,
4511 &cur, &got, &firstblock, dfops, &logflags);
4512 if (error)
4513 goto error0;
4514
4515 if (xfs_bmap_wants_extents(ip, XFS_DATA_FORK)) {
4516 int tmp_logflags = 0;
4517
4518 error = xfs_bmap_btree_to_extents(tp, ip, cur,
4519 &tmp_logflags, XFS_DATA_FORK);
4520 logflags |= tmp_logflags;
4521 }
4522
4523 error0:
4524 if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS)
4525 logflags &= ~XFS_ILOG_DEXT;
4526 else if (ip->i_d.di_format != XFS_DINODE_FMT_BTREE)
4527 logflags &= ~XFS_ILOG_DBROOT;
4528
4529 if (logflags)
4530 xfs_trans_log_inode(tp, ip, logflags);
4531 if (cur) {
4532 xfs_btree_del_cursor(cur,
4533 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
4534 }
4535 return error;
4536 }
4537
4538 /*
4539 * When a delalloc extent is split (e.g., due to a hole punch), the original
4540 * indlen reservation must be shared across the two new extents that are left
4541 * behind.
4542 *
4543 * Given the original reservation and the worst case indlen for the two new
4544 * extents (as calculated by xfs_bmap_worst_indlen()), split the original
4545 * reservation fairly across the two new extents. If necessary, steal available
4546 * blocks from a deleted extent to make up a reservation deficiency (e.g., if
4547 * ores == 1). The number of stolen blocks is returned. The availability and
4548 * subsequent accounting of stolen blocks is the responsibility of the caller.
4549 */
4550 static xfs_filblks_t
4551 xfs_bmap_split_indlen(
4552 xfs_filblks_t ores, /* original res. */
4553 xfs_filblks_t *indlen1, /* ext1 worst indlen */
4554 xfs_filblks_t *indlen2, /* ext2 worst indlen */
4555 xfs_filblks_t avail) /* stealable blocks */
4556 {
4557 xfs_filblks_t len1 = *indlen1;
4558 xfs_filblks_t len2 = *indlen2;
4559 xfs_filblks_t nres = len1 + len2; /* new total res. */
4560 xfs_filblks_t stolen = 0;
4561 xfs_filblks_t resfactor;
4562
4563 /*
4564 * Steal as many blocks as we can to try and satisfy the worst case
4565 * indlen for both new extents.
4566 */
4567 if (ores < nres && avail)
4568 stolen = XFS_FILBLKS_MIN(nres - ores, avail);
4569 ores += stolen;
4570
4571 /* nothing else to do if we've satisfied the new reservation */
4572 if (ores >= nres)
4573 return stolen;
4574
4575 /*
4576 * We can't meet the total required reservation for the two extents.
4577 * Calculate the percent of the overall shortage between both extents
4578 * and apply this percentage to each of the requested indlen values.
4579 * This distributes the shortage fairly and reduces the chances that one
4580 * of the two extents is left with nothing when extents are repeatedly
4581 * split.
4582 */
4583 resfactor = (ores * 100);
4584 do_div(resfactor, nres);
4585 len1 *= resfactor;
4586 do_div(len1, 100);
4587 len2 *= resfactor;
4588 do_div(len2, 100);
4589 ASSERT(len1 + len2 <= ores);
4590 ASSERT(len1 < *indlen1 && len2 < *indlen2);
4591
4592 /*
4593 * Hand out the remainder to each extent. If one of the two reservations
4594 * is zero, we want to make sure that one gets a block first. The loop
4595 * below starts with len1, so hand len2 a block right off the bat if it
4596 * is zero.
4597 */
4598 ores -= (len1 + len2);
4599 ASSERT((*indlen1 - len1) + (*indlen2 - len2) >= ores);
4600 if (ores && !len2 && *indlen2) {
4601 len2++;
4602 ores--;
4603 }
4604 while (ores) {
4605 if (len1 < *indlen1) {
4606 len1++;
4607 ores--;
4608 }
4609 if (!ores)
4610 break;
4611 if (len2 < *indlen2) {
4612 len2++;
4613 ores--;
4614 }
4615 }
4616
4617 *indlen1 = len1;
4618 *indlen2 = len2;
4619
4620 return stolen;
4621 }
4622
4623 int
4624 xfs_bmap_del_extent_delay(
4625 struct xfs_inode *ip,
4626 int whichfork,
4627 struct xfs_iext_cursor *icur,
4628 struct xfs_bmbt_irec *got,
4629 struct xfs_bmbt_irec *del)
4630 {
4631 struct xfs_mount *mp = ip->i_mount;
4632 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
4633 struct xfs_bmbt_irec new;
4634 int64_t da_old, da_new, da_diff = 0;
4635 xfs_fileoff_t del_endoff, got_endoff;
4636 xfs_filblks_t got_indlen, new_indlen, stolen;
4637 int state = xfs_bmap_fork_to_state(whichfork);
4638 int error = 0;
4639 bool isrt;
4640
4641 XFS_STATS_INC(mp, xs_del_exlist);
4642
4643 isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
4644 del_endoff = del->br_startoff + del->br_blockcount;
4645 got_endoff = got->br_startoff + got->br_blockcount;
4646 da_old = startblockval(got->br_startblock);
4647 da_new = 0;
4648
4649 ASSERT(del->br_blockcount > 0);
4650 ASSERT(got->br_startoff <= del->br_startoff);
4651 ASSERT(got_endoff >= del_endoff);
4652
4653 if (isrt) {
4654 uint64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount);
4655
4656 do_div(rtexts, mp->m_sb.sb_rextsize);
4657 xfs_mod_frextents(mp, rtexts);
4658 }
4659
4660 /*
4661 * Update the inode delalloc counter now and wait to update the
4662 * sb counters as we might have to borrow some blocks for the
4663 * indirect block accounting.
4664 */
4665 error = xfs_trans_reserve_quota_nblks(NULL, ip,
4666 -((long)del->br_blockcount), 0,
4667 isrt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
4668 if (error)
4669 return error;
4670 ip->i_delayed_blks -= del->br_blockcount;
4671
4672 if (got->br_startoff == del->br_startoff)
4673 state |= BMAP_LEFT_FILLING;
4674 if (got_endoff == del_endoff)
4675 state |= BMAP_RIGHT_FILLING;
4676
4677 switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
4678 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
4679 /*
4680 * Matches the whole extent. Delete the entry.
4681 */
4682 xfs_iext_remove(ip, icur, state);
4683 xfs_iext_prev(ifp, icur);
4684 break;
4685 case BMAP_LEFT_FILLING:
4686 /*
4687 * Deleting the first part of the extent.
4688 */
4689 got->br_startoff = del_endoff;
4690 got->br_blockcount -= del->br_blockcount;
4691 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
4692 got->br_blockcount), da_old);
4693 got->br_startblock = nullstartblock((int)da_new);
4694 xfs_iext_update_extent(ip, state, icur, got);
4695 break;
4696 case BMAP_RIGHT_FILLING:
4697 /*
4698 * Deleting the last part of the extent.
4699 */
4700 got->br_blockcount = got->br_blockcount - del->br_blockcount;
4701 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
4702 got->br_blockcount), da_old);
4703 got->br_startblock = nullstartblock((int)da_new);
4704 xfs_iext_update_extent(ip, state, icur, got);
4705 break;
4706 case 0:
4707 /*
4708 * Deleting the middle of the extent.
4709 *
4710 * Distribute the original indlen reservation across the two new
4711 * extents. Steal blocks from the deleted extent if necessary.
4712 * Stealing blocks simply fudges the fdblocks accounting below.
4713 * Warn if either of the new indlen reservations is zero as this
4714 * can lead to delalloc problems.
4715 */
4716 got->br_blockcount = del->br_startoff - got->br_startoff;
4717 got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount);
4718
4719 new.br_blockcount = got_endoff - del_endoff;
4720 new_indlen = xfs_bmap_worst_indlen(ip, new.br_blockcount);
4721
4722 WARN_ON_ONCE(!got_indlen || !new_indlen);
4723 stolen = xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen,
4724 del->br_blockcount);
4725
4726 got->br_startblock = nullstartblock((int)got_indlen);
4727
4728 new.br_startoff = del_endoff;
4729 new.br_state = got->br_state;
4730 new.br_startblock = nullstartblock((int)new_indlen);
4731
4732 xfs_iext_update_extent(ip, state, icur, got);
4733 xfs_iext_next(ifp, icur);
4734 xfs_iext_insert(ip, icur, &new, state);
4735
4736 da_new = got_indlen + new_indlen - stolen;
4737 del->br_blockcount -= stolen;
4738 break;
4739 }
4740
4741 ASSERT(da_old >= da_new);
4742 da_diff = da_old - da_new;
4743 if (!isrt)
4744 da_diff += del->br_blockcount;
4745 if (da_diff)
4746 xfs_mod_fdblocks(mp, da_diff, false);
4747 return error;
4748 }
4749
4750 void
4751 xfs_bmap_del_extent_cow(
4752 struct xfs_inode *ip,
4753 struct xfs_iext_cursor *icur,
4754 struct xfs_bmbt_irec *got,
4755 struct xfs_bmbt_irec *del)
4756 {
4757 struct xfs_mount *mp = ip->i_mount;
4758 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
4759 struct xfs_bmbt_irec new;
4760 xfs_fileoff_t del_endoff, got_endoff;
4761 int state = BMAP_COWFORK;
4762
4763 XFS_STATS_INC(mp, xs_del_exlist);
4764
4765 del_endoff = del->br_startoff + del->br_blockcount;
4766 got_endoff = got->br_startoff + got->br_blockcount;
4767
4768 ASSERT(del->br_blockcount > 0);
4769 ASSERT(got->br_startoff <= del->br_startoff);
4770 ASSERT(got_endoff >= del_endoff);
4771 ASSERT(!isnullstartblock(got->br_startblock));
4772
4773 if (got->br_startoff == del->br_startoff)
4774 state |= BMAP_LEFT_FILLING;
4775 if (got_endoff == del_endoff)
4776 state |= BMAP_RIGHT_FILLING;
4777
4778 switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
4779 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
4780 /*
4781 * Matches the whole extent. Delete the entry.
4782 */
4783 xfs_iext_remove(ip, icur, state);
4784 xfs_iext_prev(ifp, icur);
4785 break;
4786 case BMAP_LEFT_FILLING:
4787 /*
4788 * Deleting the first part of the extent.
4789 */
4790 got->br_startoff = del_endoff;
4791 got->br_blockcount -= del->br_blockcount;
4792 got->br_startblock = del->br_startblock + del->br_blockcount;
4793 xfs_iext_update_extent(ip, state, icur, got);
4794 break;
4795 case BMAP_RIGHT_FILLING:
4796 /*
4797 * Deleting the last part of the extent.
4798 */
4799 got->br_blockcount -= del->br_blockcount;
4800 xfs_iext_update_extent(ip, state, icur, got);
4801 break;
4802 case 0:
4803 /*
4804 * Deleting the middle of the extent.
4805 */
4806 got->br_blockcount = del->br_startoff - got->br_startoff;
4807
4808 new.br_startoff = del_endoff;
4809 new.br_blockcount = got_endoff - del_endoff;
4810 new.br_state = got->br_state;
4811 new.br_startblock = del->br_startblock + del->br_blockcount;
4812
4813 xfs_iext_update_extent(ip, state, icur, got);
4814 xfs_iext_next(ifp, icur);
4815 xfs_iext_insert(ip, icur, &new, state);
4816 break;
4817 }
4818 }
4819
4820 /*
4821 * Called by xfs_bmapi to update file extent records and the btree
4822 * after removing space.
4823 */
4824 STATIC int /* error */
4825 xfs_bmap_del_extent_real(
4826 xfs_inode_t *ip, /* incore inode pointer */
4827 xfs_trans_t *tp, /* current transaction pointer */
4828 struct xfs_iext_cursor *icur,
4829 struct xfs_defer_ops *dfops, /* list of extents to be freed */
4830 xfs_btree_cur_t *cur, /* if null, not a btree */
4831 xfs_bmbt_irec_t *del, /* data to remove from extents */
4832 int *logflagsp, /* inode logging flags */
4833 int whichfork, /* data or attr fork */
4834 int bflags) /* bmapi flags */
4835 {
4836 xfs_fsblock_t del_endblock=0; /* first block past del */
4837 xfs_fileoff_t del_endoff; /* first offset past del */
4838 int do_fx; /* free extent at end of routine */
4839 int error; /* error return value */
4840 int flags = 0;/* inode logging flags */
4841 struct xfs_bmbt_irec got; /* current extent entry */
4842 xfs_fileoff_t got_endoff; /* first offset past got */
4843 int i; /* temp state */
4844 xfs_ifork_t *ifp; /* inode fork pointer */
4845 xfs_mount_t *mp; /* mount structure */
4846 xfs_filblks_t nblks; /* quota/sb block count */
4847 xfs_bmbt_irec_t new; /* new record to be inserted */
4848 /* REFERENCED */
4849 uint qfield; /* quota field to update */
4850 int state = xfs_bmap_fork_to_state(whichfork);
4851 struct xfs_bmbt_irec old;
4852
4853 mp = ip->i_mount;
4854 XFS_STATS_INC(mp, xs_del_exlist);
4855
4856 ifp = XFS_IFORK_PTR(ip, whichfork);
4857 ASSERT(del->br_blockcount > 0);
4858 xfs_iext_get_extent(ifp, icur, &got);
4859 ASSERT(got.br_startoff <= del->br_startoff);
4860 del_endoff = del->br_startoff + del->br_blockcount;
4861 got_endoff = got.br_startoff + got.br_blockcount;
4862 ASSERT(got_endoff >= del_endoff);
4863 ASSERT(!isnullstartblock(got.br_startblock));
4864 qfield = 0;
4865 error = 0;
4866
4867 /*
4868 * If it's the case where the directory code is running with no block
4869 * reservation, and the deleted block is in the middle of its extent,
4870 * and the resulting insert of an extent would cause transformation to
4871 * btree format, then reject it. The calling code will then swap blocks
4872 * around instead. We have to do this now, rather than waiting for the
4873 * conversion to btree format, since the transaction will be dirty then.
4874 */
4875 if (tp->t_blk_res == 0 &&
4876 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
4877 XFS_IFORK_NEXTENTS(ip, whichfork) >=
4878 XFS_IFORK_MAXEXT(ip, whichfork) &&
4879 del->br_startoff > got.br_startoff && del_endoff < got_endoff)
4880 return -ENOSPC;
4881
4882 flags = XFS_ILOG_CORE;
4883 if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
4884 xfs_fsblock_t bno;
4885 xfs_filblks_t len;
4886
4887 ASSERT(do_mod(del->br_blockcount, mp->m_sb.sb_rextsize) == 0);
4888 ASSERT(do_mod(del->br_startblock, mp->m_sb.sb_rextsize) == 0);
4889 bno = del->br_startblock;
4890 len = del->br_blockcount;
4891 do_div(bno, mp->m_sb.sb_rextsize);
4892 do_div(len, mp->m_sb.sb_rextsize);
4893 error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
4894 if (error)
4895 goto done;
4896 do_fx = 0;
4897 nblks = len * mp->m_sb.sb_rextsize;
4898 qfield = XFS_TRANS_DQ_RTBCOUNT;
4899 } else {
4900 do_fx = 1;
4901 nblks = del->br_blockcount;
4902 qfield = XFS_TRANS_DQ_BCOUNT;
4903 }
4904
4905 del_endblock = del->br_startblock + del->br_blockcount;
4906 if (cur) {
4907 error = xfs_bmbt_lookup_eq(cur, &got, &i);
4908 if (error)
4909 goto done;
4910 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
4911 }
4912
4913 if (got.br_startoff == del->br_startoff)
4914 state |= BMAP_LEFT_FILLING;
4915 if (got_endoff == del_endoff)
4916 state |= BMAP_RIGHT_FILLING;
4917
4918 switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
4919 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
4920 /*
4921 * Matches the whole extent. Delete the entry.
4922 */
4923 xfs_iext_remove(ip, icur, state);
4924 xfs_iext_prev(ifp, icur);
4925 XFS_IFORK_NEXT_SET(ip, whichfork,
4926 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
4927 flags |= XFS_ILOG_CORE;
4928 if (!cur) {
4929 flags |= xfs_ilog_fext(whichfork);
4930 break;
4931 }
4932 if ((error = xfs_btree_delete(cur, &i)))
4933 goto done;
4934 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
4935 break;
4936 case BMAP_LEFT_FILLING:
4937 /*
4938 * Deleting the first part of the extent.
4939 */
4940 got.br_startoff = del_endoff;
4941 got.br_startblock = del_endblock;
4942 got.br_blockcount -= del->br_blockcount;
4943 xfs_iext_update_extent(ip, state, icur, &got);
4944 if (!cur) {
4945 flags |= xfs_ilog_fext(whichfork);
4946 break;
4947 }
4948 error = xfs_bmbt_update(cur, &got);
4949 if (error)
4950 goto done;
4951 break;
4952 case BMAP_RIGHT_FILLING:
4953 /*
4954 * Deleting the last part of the extent.
4955 */
4956 got.br_blockcount -= del->br_blockcount;
4957 xfs_iext_update_extent(ip, state, icur, &got);
4958 if (!cur) {
4959 flags |= xfs_ilog_fext(whichfork);
4960 break;
4961 }
4962 error = xfs_bmbt_update(cur, &got);
4963 if (error)
4964 goto done;
4965 break;
4966 case 0:
4967 /*
4968 * Deleting the middle of the extent.
4969 */
4970 old = got;
4971
4972 got.br_blockcount = del->br_startoff - got.br_startoff;
4973 xfs_iext_update_extent(ip, state, icur, &got);
4974
4975 new.br_startoff = del_endoff;
4976 new.br_blockcount = got_endoff - del_endoff;
4977 new.br_state = got.br_state;
4978 new.br_startblock = del_endblock;
4979
4980 flags |= XFS_ILOG_CORE;
4981 if (cur) {
4982 error = xfs_bmbt_update(cur, &got);
4983 if (error)
4984 goto done;
4985 error = xfs_btree_increment(cur, 0, &i);
4986 if (error)
4987 goto done;
4988 cur->bc_rec.b = new;
4989 error = xfs_btree_insert(cur, &i);
4990 if (error && error != -ENOSPC)
4991 goto done;
4992 /*
4993 * If get no-space back from btree insert, it tried a
4994 * split, and we have a zero block reservation. Fix up
4995 * our state and return the error.
4996 */
4997 if (error == -ENOSPC) {
4998 /*
4999 * Reset the cursor, don't trust it after any
5000 * insert operation.
5001 */
5002 error = xfs_bmbt_lookup_eq(cur, &got, &i);
5003 if (error)
5004 goto done;
5005 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
5006 /*
5007 * Update the btree record back
5008 * to the original value.
5009 */
5010 error = xfs_bmbt_update(cur, &old);
5011 if (error)
5012 goto done;
5013 /*
5014 * Reset the extent record back
5015 * to the original value.
5016 */
5017 xfs_iext_update_extent(ip, state, icur, &old);
5018 flags = 0;
5019 error = -ENOSPC;
5020 goto done;
5021 }
5022 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
5023 } else
5024 flags |= xfs_ilog_fext(whichfork);
5025 XFS_IFORK_NEXT_SET(ip, whichfork,
5026 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
5027 xfs_iext_next(ifp, icur);
5028 xfs_iext_insert(ip, icur, &new, state);
5029 break;
5030 }
5031
5032 /* remove reverse mapping */
5033 error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, del);
5034 if (error)
5035 goto done;
5036
5037 /*
5038 * If we need to, add to list of extents to delete.
5039 */
5040 if (do_fx && !(bflags & XFS_BMAPI_REMAP)) {
5041 if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) {
5042 error = xfs_refcount_decrease_extent(mp, dfops, del);
5043 if (error)
5044 goto done;
5045 } else
5046 xfs_bmap_add_free(mp, dfops, del->br_startblock,
5047 del->br_blockcount, NULL);
5048 }
5049
5050 /*
5051 * Adjust inode # blocks in the file.
5052 */
5053 if (nblks)
5054 ip->i_d.di_nblocks -= nblks;
5055 /*
5056 * Adjust quota data.
5057 */
5058 if (qfield && !(bflags & XFS_BMAPI_REMAP))
5059 xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
5060
5061 done:
5062 *logflagsp = flags;
5063 return error;
5064 }
5065
5066 /*
5067 * Unmap (remove) blocks from a file.
5068 * If nexts is nonzero then the number of extents to remove is limited to
5069 * that value. If not all extents in the block range can be removed then
5070 * *done is set.
5071 */
5072 int /* error */
5073 __xfs_bunmapi(
5074 xfs_trans_t *tp, /* transaction pointer */
5075 struct xfs_inode *ip, /* incore inode */
5076 xfs_fileoff_t start, /* first file offset deleted */
5077 xfs_filblks_t *rlen, /* i/o: amount remaining */
5078 int flags, /* misc flags */
5079 xfs_extnum_t nexts, /* number of extents max */
5080 xfs_fsblock_t *firstblock, /* first allocated block
5081 controls a.g. for allocs */
5082 struct xfs_defer_ops *dfops) /* i/o: deferred updates */
5083 {
5084 xfs_btree_cur_t *cur; /* bmap btree cursor */
5085 xfs_bmbt_irec_t del; /* extent being deleted */
5086 int error; /* error return value */
5087 xfs_extnum_t extno; /* extent number in list */
5088 xfs_bmbt_irec_t got; /* current extent record */
5089 xfs_ifork_t *ifp; /* inode fork pointer */
5090 int isrt; /* freeing in rt area */
5091 int logflags; /* transaction logging flags */
5092 xfs_extlen_t mod; /* rt extent offset */
5093 xfs_mount_t *mp; /* mount structure */
5094 int tmp_logflags; /* partial logging flags */
5095 int wasdel; /* was a delayed alloc extent */
5096 int whichfork; /* data or attribute fork */
5097 xfs_fsblock_t sum;
5098 xfs_filblks_t len = *rlen; /* length to unmap in file */
5099 xfs_fileoff_t max_len;
5100 xfs_agnumber_t prev_agno = NULLAGNUMBER, agno;
5101 xfs_fileoff_t end;
5102 struct xfs_iext_cursor icur;
5103 bool done = false;
5104
5105 trace_xfs_bunmap(ip, start, len, flags, _RET_IP_);
5106
5107 whichfork = xfs_bmapi_whichfork(flags);
5108 ASSERT(whichfork != XFS_COW_FORK);
5109 ifp = XFS_IFORK_PTR(ip, whichfork);
5110 if (unlikely(
5111 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5112 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
5113 XFS_ERROR_REPORT("xfs_bunmapi", XFS_ERRLEVEL_LOW,
5114 ip->i_mount);
5115 return -EFSCORRUPTED;
5116 }
5117 mp = ip->i_mount;
5118 if (XFS_FORCED_SHUTDOWN(mp))
5119 return -EIO;
5120
5121 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5122 ASSERT(len > 0);
5123 ASSERT(nexts >= 0);
5124
5125 /*
5126 * Guesstimate how many blocks we can unmap without running the risk of
5127 * blowing out the transaction with a mix of EFIs and reflink
5128 * adjustments.
5129 */
5130 if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
5131 max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res));
5132 else
5133 max_len = len;
5134
5135 if (!(ifp->if_flags & XFS_IFEXTENTS) &&
5136 (error = xfs_iread_extents(tp, ip, whichfork)))
5137 return error;
5138 if (xfs_iext_count(ifp) == 0) {
5139 *rlen = 0;
5140 return 0;
5141 }
5142 XFS_STATS_INC(mp, xs_blk_unmap);
5143 isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
5144 end = start + len;
5145
5146 if (!xfs_iext_lookup_extent_before(ip, ifp, &end, &icur, &got)) {
5147 *rlen = 0;
5148 return 0;
5149 }
5150 end--;
5151
5152 logflags = 0;
5153 if (ifp->if_flags & XFS_IFBROOT) {
5154 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
5155 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5156 cur->bc_private.b.firstblock = *firstblock;
5157 cur->bc_private.b.dfops = dfops;
5158 cur->bc_private.b.flags = 0;
5159 } else
5160 cur = NULL;
5161
5162 if (isrt) {
5163 /*
5164 * Synchronize by locking the bitmap inode.
5165 */
5166 xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP);
5167 xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
5168 xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM);
5169 xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
5170 }
5171
5172 extno = 0;
5173 while (end != (xfs_fileoff_t)-1 && end >= start &&
5174 (nexts == 0 || extno < nexts) && max_len > 0) {
5175 /*
5176 * Is the found extent after a hole in which end lives?
5177 * Just back up to the previous extent, if so.
5178 */
5179 if (got.br_startoff > end &&
5180 !xfs_iext_prev_extent(ifp, &icur, &got)) {
5181 done = true;
5182 break;
5183 }
5184 /*
5185 * Is the last block of this extent before the range
5186 * we're supposed to delete? If so, we're done.
5187 */
5188 end = XFS_FILEOFF_MIN(end,
5189 got.br_startoff + got.br_blockcount - 1);
5190 if (end < start)
5191 break;
5192 /*
5193 * Then deal with the (possibly delayed) allocated space
5194 * we found.
5195 */
5196 del = got;
5197 wasdel = isnullstartblock(del.br_startblock);
5198
5199 /*
5200 * Make sure we don't touch multiple AGF headers out of order
5201 * in a single transaction, as that could cause AB-BA deadlocks.
5202 */
5203 if (!wasdel) {
5204 agno = XFS_FSB_TO_AGNO(mp, del.br_startblock);
5205 if (prev_agno != NULLAGNUMBER && prev_agno > agno)
5206 break;
5207 prev_agno = agno;
5208 }
5209 if (got.br_startoff < start) {
5210 del.br_startoff = start;
5211 del.br_blockcount -= start - got.br_startoff;
5212 if (!wasdel)
5213 del.br_startblock += start - got.br_startoff;
5214 }
5215 if (del.br_startoff + del.br_blockcount > end + 1)
5216 del.br_blockcount = end + 1 - del.br_startoff;
5217
5218 /* How much can we safely unmap? */
5219 if (max_len < del.br_blockcount) {
5220 del.br_startoff += del.br_blockcount - max_len;
5221 if (!wasdel)
5222 del.br_startblock += del.br_blockcount - max_len;
5223 del.br_blockcount = max_len;
5224 }
5225
5226 sum = del.br_startblock + del.br_blockcount;
5227 if (isrt &&
5228 (mod = do_mod(sum, mp->m_sb.sb_rextsize))) {
5229 /*
5230 * Realtime extent not lined up at the end.
5231 * The extent could have been split into written
5232 * and unwritten pieces, or we could just be
5233 * unmapping part of it. But we can't really
5234 * get rid of part of a realtime extent.
5235 */
5236 if (del.br_state == XFS_EXT_UNWRITTEN ||
5237 !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
5238 /*
5239 * This piece is unwritten, or we're not
5240 * using unwritten extents. Skip over it.
5241 */
5242 ASSERT(end >= mod);
5243 end -= mod > del.br_blockcount ?
5244 del.br_blockcount : mod;
5245 if (end < got.br_startoff &&
5246 !xfs_iext_prev_extent(ifp, &icur, &got)) {
5247 done = true;
5248 break;
5249 }
5250 continue;
5251 }
5252 /*
5253 * It's written, turn it unwritten.
5254 * This is better than zeroing it.
5255 */
5256 ASSERT(del.br_state == XFS_EXT_NORM);
5257 ASSERT(tp->t_blk_res > 0);
5258 /*
5259 * If this spans a realtime extent boundary,
5260 * chop it back to the start of the one we end at.
5261 */
5262 if (del.br_blockcount > mod) {
5263 del.br_startoff += del.br_blockcount - mod;
5264 del.br_startblock += del.br_blockcount - mod;
5265 del.br_blockcount = mod;
5266 }
5267 del.br_state = XFS_EXT_UNWRITTEN;
5268 error = xfs_bmap_add_extent_unwritten_real(tp, ip,
5269 whichfork, &icur, &cur, &del,
5270 firstblock, dfops, &logflags);
5271 if (error)
5272 goto error0;
5273 goto nodelete;
5274 }
5275 if (isrt && (mod = do_mod(del.br_startblock, mp->m_sb.sb_rextsize))) {
5276 /*
5277 * Realtime extent is lined up at the end but not
5278 * at the front. We'll get rid of full extents if
5279 * we can.
5280 */
5281 mod = mp->m_sb.sb_rextsize - mod;
5282 if (del.br_blockcount > mod) {
5283 del.br_blockcount -= mod;
5284 del.br_startoff += mod;
5285 del.br_startblock += mod;
5286 } else if ((del.br_startoff == start &&
5287 (del.br_state == XFS_EXT_UNWRITTEN ||
5288 tp->t_blk_res == 0)) ||
5289 !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
5290 /*
5291 * Can't make it unwritten. There isn't
5292 * a full extent here so just skip it.
5293 */
5294 ASSERT(end >= del.br_blockcount);
5295 end -= del.br_blockcount;
5296 if (got.br_startoff > end &&
5297 !xfs_iext_prev_extent(ifp, &icur, &got)) {
5298 done = true;
5299 break;
5300 }
5301 continue;
5302 } else if (del.br_state == XFS_EXT_UNWRITTEN) {
5303 struct xfs_bmbt_irec prev;
5304
5305 /*
5306 * This one is already unwritten.
5307 * It must have a written left neighbor.
5308 * Unwrite the killed part of that one and
5309 * try again.
5310 */
5311 if (!xfs_iext_prev_extent(ifp, &icur, &prev))
5312 ASSERT(0);
5313 ASSERT(prev.br_state == XFS_EXT_NORM);
5314 ASSERT(!isnullstartblock(prev.br_startblock));
5315 ASSERT(del.br_startblock ==
5316 prev.br_startblock + prev.br_blockcount);
5317 if (prev.br_startoff < start) {
5318 mod = start - prev.br_startoff;
5319 prev.br_blockcount -= mod;
5320 prev.br_startblock += mod;
5321 prev.br_startoff = start;
5322 }
5323 prev.br_state = XFS_EXT_UNWRITTEN;
5324 error = xfs_bmap_add_extent_unwritten_real(tp,
5325 ip, whichfork, &icur, &cur,
5326 &prev, firstblock, dfops,
5327 &logflags);
5328 if (error)
5329 goto error0;
5330 goto nodelete;
5331 } else {
5332 ASSERT(del.br_state == XFS_EXT_NORM);
5333 del.br_state = XFS_EXT_UNWRITTEN;
5334 error = xfs_bmap_add_extent_unwritten_real(tp,
5335 ip, whichfork, &icur, &cur,
5336 &del, firstblock, dfops,
5337 &logflags);
5338 if (error)
5339 goto error0;
5340 goto nodelete;
5341 }
5342 }
5343
5344 if (wasdel) {
5345 error = xfs_bmap_del_extent_delay(ip, whichfork, &icur,
5346 &got, &del);
5347 } else {
5348 error = xfs_bmap_del_extent_real(ip, tp, &icur, dfops,
5349 cur, &del, &tmp_logflags, whichfork,
5350 flags);
5351 logflags |= tmp_logflags;
5352 }
5353
5354 if (error)
5355 goto error0;
5356
5357 max_len -= del.br_blockcount;
5358 end = del.br_startoff - 1;
5359 nodelete:
5360 /*
5361 * If not done go on to the next (previous) record.
5362 */
5363 if (end != (xfs_fileoff_t)-1 && end >= start) {
5364 if (!xfs_iext_get_extent(ifp, &icur, &got) ||
5365 (got.br_startoff > end &&
5366 !xfs_iext_prev_extent(ifp, &icur, &got))) {
5367 done = true;
5368 break;
5369 }
5370 extno++;
5371 }
5372 }
5373 if (done || end == (xfs_fileoff_t)-1 || end < start)
5374 *rlen = 0;
5375 else
5376 *rlen = end - start + 1;
5377
5378 /*
5379 * Convert to a btree if necessary.
5380 */
5381 if (xfs_bmap_needs_btree(ip, whichfork)) {
5382 ASSERT(cur == NULL);
5383 error = xfs_bmap_extents_to_btree(tp, ip, firstblock, dfops,
5384 &cur, 0, &tmp_logflags, whichfork);
5385 logflags |= tmp_logflags;
5386 if (error)
5387 goto error0;
5388 }
5389 /*
5390 * transform from btree to extents, give it cur
5391 */
5392 else if (xfs_bmap_wants_extents(ip, whichfork)) {
5393 ASSERT(cur != NULL);
5394 error = xfs_bmap_btree_to_extents(tp, ip, cur, &tmp_logflags,
5395 whichfork);
5396 logflags |= tmp_logflags;
5397 if (error)
5398 goto error0;
5399 }
5400 /*
5401 * transform from extents to local?
5402 */
5403 error = 0;
5404 error0:
5405 /*
5406 * Log everything. Do this after conversion, there's no point in
5407 * logging the extent records if we've converted to btree format.
5408 */
5409 if ((logflags & xfs_ilog_fext(whichfork)) &&
5410 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
5411 logflags &= ~xfs_ilog_fext(whichfork);
5412 else if ((logflags & xfs_ilog_fbroot(whichfork)) &&
5413 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
5414 logflags &= ~xfs_ilog_fbroot(whichfork);
5415 /*
5416 * Log inode even in the error case, if the transaction
5417 * is dirty we'll need to shut down the filesystem.
5418 */
5419 if (logflags)
5420 xfs_trans_log_inode(tp, ip, logflags);
5421 if (cur) {
5422 if (!error) {
5423 *firstblock = cur->bc_private.b.firstblock;
5424 cur->bc_private.b.allocated = 0;
5425 }
5426 xfs_btree_del_cursor(cur,
5427 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5428 }
5429 return error;
5430 }
5431
5432 /* Unmap a range of a file. */
5433 int
5434 xfs_bunmapi(
5435 xfs_trans_t *tp,
5436 struct xfs_inode *ip,
5437 xfs_fileoff_t bno,
5438 xfs_filblks_t len,
5439 int flags,
5440 xfs_extnum_t nexts,
5441 xfs_fsblock_t *firstblock,
5442 struct xfs_defer_ops *dfops,
5443 int *done)
5444 {
5445 int error;
5446
5447 error = __xfs_bunmapi(tp, ip, bno, &len, flags, nexts, firstblock,
5448 dfops);
5449 *done = (len == 0);
5450 return error;
5451 }
5452
5453 /*
5454 * Determine whether an extent shift can be accomplished by a merge with the
5455 * extent that precedes the target hole of the shift.
5456 */
5457 STATIC bool
5458 xfs_bmse_can_merge(
5459 struct xfs_bmbt_irec *left, /* preceding extent */
5460 struct xfs_bmbt_irec *got, /* current extent to shift */
5461 xfs_fileoff_t shift) /* shift fsb */
5462 {
5463 xfs_fileoff_t startoff;
5464
5465 startoff = got->br_startoff - shift;
5466
5467 /*
5468 * The extent, once shifted, must be adjacent in-file and on-disk with
5469 * the preceding extent.
5470 */
5471 if ((left->br_startoff + left->br_blockcount != startoff) ||
5472 (left->br_startblock + left->br_blockcount != got->br_startblock) ||
5473 (left->br_state != got->br_state) ||
5474 (left->br_blockcount + got->br_blockcount > MAXEXTLEN))
5475 return false;
5476
5477 return true;
5478 }
5479
5480 /*
5481 * A bmap extent shift adjusts the file offset of an extent to fill a preceding
5482 * hole in the file. If an extent shift would result in the extent being fully
5483 * adjacent to the extent that currently precedes the hole, we can merge with
5484 * the preceding extent rather than do the shift.
5485 *
5486 * This function assumes the caller has verified a shift-by-merge is possible
5487 * with the provided extents via xfs_bmse_can_merge().
5488 */
5489 STATIC int
5490 xfs_bmse_merge(
5491 struct xfs_inode *ip,
5492 int whichfork,
5493 xfs_fileoff_t shift, /* shift fsb */
5494 struct xfs_iext_cursor *icur,
5495 struct xfs_bmbt_irec *got, /* extent to shift */
5496 struct xfs_bmbt_irec *left, /* preceding extent */
5497 struct xfs_btree_cur *cur,
5498 int *logflags, /* output */
5499 struct xfs_defer_ops *dfops)
5500 {
5501 struct xfs_bmbt_irec new;
5502 xfs_filblks_t blockcount;
5503 int error, i;
5504 struct xfs_mount *mp = ip->i_mount;
5505
5506 blockcount = left->br_blockcount + got->br_blockcount;
5507
5508 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5509 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5510 ASSERT(xfs_bmse_can_merge(left, got, shift));
5511
5512 new = *left;
5513 new.br_blockcount = blockcount;
5514
5515 /*
5516 * Update the on-disk extent count, the btree if necessary and log the
5517 * inode.
5518 */
5519 XFS_IFORK_NEXT_SET(ip, whichfork,
5520 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
5521 *logflags |= XFS_ILOG_CORE;
5522 if (!cur) {
5523 *logflags |= XFS_ILOG_DEXT;
5524 goto done;
5525 }
5526
5527 /* lookup and remove the extent to merge */
5528 error = xfs_bmbt_lookup_eq(cur, got, &i);
5529 if (error)
5530 return error;
5531 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5532
5533 error = xfs_btree_delete(cur, &i);
5534 if (error)
5535 return error;
5536 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5537
5538 /* lookup and update size of the previous extent */
5539 error = xfs_bmbt_lookup_eq(cur, left, &i);
5540 if (error)
5541 return error;
5542 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5543
5544 error = xfs_bmbt_update(cur, &new);
5545 if (error)
5546 return error;
5547
5548 done:
5549 xfs_iext_remove(ip, icur, 0);
5550 xfs_iext_prev(XFS_IFORK_PTR(ip, whichfork), icur);
5551 xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur,
5552 &new);
5553
5554 /* update reverse mapping. rmap functions merge the rmaps for us */
5555 error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, got);
5556 if (error)
5557 return error;
5558 memcpy(&new, got, sizeof(new));
5559 new.br_startoff = left->br_startoff + left->br_blockcount;
5560 return xfs_rmap_map_extent(mp, dfops, ip, whichfork, &new);
5561 }
5562
5563 static int
5564 xfs_bmap_shift_update_extent(
5565 struct xfs_inode *ip,
5566 int whichfork,
5567 struct xfs_iext_cursor *icur,
5568 struct xfs_bmbt_irec *got,
5569 struct xfs_btree_cur *cur,
5570 int *logflags,
5571 struct xfs_defer_ops *dfops,
5572 xfs_fileoff_t startoff)
5573 {
5574 struct xfs_mount *mp = ip->i_mount;
5575 struct xfs_bmbt_irec prev = *got;
5576 int error, i;
5577
5578 *logflags |= XFS_ILOG_CORE;
5579
5580 got->br_startoff = startoff;
5581
5582 if (cur) {
5583 error = xfs_bmbt_lookup_eq(cur, &prev, &i);
5584 if (error)
5585 return error;
5586 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5587
5588 error = xfs_bmbt_update(cur, got);
5589 if (error)
5590 return error;
5591 } else {
5592 *logflags |= XFS_ILOG_DEXT;
5593 }
5594
5595 xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur,
5596 got);
5597
5598 /* update reverse mapping */
5599 error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, &prev);
5600 if (error)
5601 return error;
5602 return xfs_rmap_map_extent(mp, dfops, ip, whichfork, got);
5603 }
5604
5605 int
5606 xfs_bmap_collapse_extents(
5607 struct xfs_trans *tp,
5608 struct xfs_inode *ip,
5609 xfs_fileoff_t *next_fsb,
5610 xfs_fileoff_t offset_shift_fsb,
5611 bool *done,
5612 xfs_fileoff_t stop_fsb,
5613 xfs_fsblock_t *firstblock,
5614 struct xfs_defer_ops *dfops)
5615 {
5616 int whichfork = XFS_DATA_FORK;
5617 struct xfs_mount *mp = ip->i_mount;
5618 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
5619 struct xfs_btree_cur *cur = NULL;
5620 struct xfs_bmbt_irec got, prev;
5621 struct xfs_iext_cursor icur;
5622 xfs_fileoff_t new_startoff;
5623 int error = 0;
5624 int logflags = 0;
5625
5626 if (unlikely(XFS_TEST_ERROR(
5627 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5628 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
5629 mp, XFS_ERRTAG_BMAPIFORMAT))) {
5630 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
5631 return -EFSCORRUPTED;
5632 }
5633
5634 if (XFS_FORCED_SHUTDOWN(mp))
5635 return -EIO;
5636
5637 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL));
5638
5639 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5640 error = xfs_iread_extents(tp, ip, whichfork);
5641 if (error)
5642 return error;
5643 }
5644
5645 if (ifp->if_flags & XFS_IFBROOT) {
5646 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5647 cur->bc_private.b.firstblock = *firstblock;
5648 cur->bc_private.b.dfops = dfops;
5649 cur->bc_private.b.flags = 0;
5650 }
5651
5652 if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) {
5653 *done = true;
5654 goto del_cursor;
5655 }
5656 XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock),
5657 del_cursor);
5658
5659 new_startoff = got.br_startoff - offset_shift_fsb;
5660 if (xfs_iext_peek_prev_extent(ifp, &icur, &prev)) {
5661 if (new_startoff < prev.br_startoff + prev.br_blockcount) {
5662 error = -EINVAL;
5663 goto del_cursor;
5664 }
5665
5666 if (xfs_bmse_can_merge(&prev, &got, offset_shift_fsb)) {
5667 error = xfs_bmse_merge(ip, whichfork, offset_shift_fsb,
5668 &icur, &got, &prev, cur, &logflags,
5669 dfops);
5670 if (error)
5671 goto del_cursor;
5672 goto done;
5673 }
5674 } else {
5675 if (got.br_startoff < offset_shift_fsb) {
5676 error = -EINVAL;
5677 goto del_cursor;
5678 }
5679 }
5680
5681 error = xfs_bmap_shift_update_extent(ip, whichfork, &icur, &got, cur,
5682 &logflags, dfops, new_startoff);
5683 if (error)
5684 goto del_cursor;
5685
5686 done:
5687 if (!xfs_iext_next_extent(ifp, &icur, &got)) {
5688 *done = true;
5689 goto del_cursor;
5690 }
5691
5692 *next_fsb = got.br_startoff;
5693 del_cursor:
5694 if (cur)
5695 xfs_btree_del_cursor(cur,
5696 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5697 if (logflags)
5698 xfs_trans_log_inode(tp, ip, logflags);
5699 return error;
5700 }
5701
5702 int
5703 xfs_bmap_insert_extents(
5704 struct xfs_trans *tp,
5705 struct xfs_inode *ip,
5706 xfs_fileoff_t *next_fsb,
5707 xfs_fileoff_t offset_shift_fsb,
5708 bool *done,
5709 xfs_fileoff_t stop_fsb,
5710 xfs_fsblock_t *firstblock,
5711 struct xfs_defer_ops *dfops)
5712 {
5713 int whichfork = XFS_DATA_FORK;
5714 struct xfs_mount *mp = ip->i_mount;
5715 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
5716 struct xfs_btree_cur *cur = NULL;
5717 struct xfs_bmbt_irec got, next;
5718 struct xfs_iext_cursor icur;
5719 xfs_fileoff_t new_startoff;
5720 int error = 0;
5721 int logflags = 0;
5722
5723 if (unlikely(XFS_TEST_ERROR(
5724 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5725 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
5726 mp, XFS_ERRTAG_BMAPIFORMAT))) {
5727 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
5728 return -EFSCORRUPTED;
5729 }
5730
5731 if (XFS_FORCED_SHUTDOWN(mp))
5732 return -EIO;
5733
5734 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL));
5735
5736 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5737 error = xfs_iread_extents(tp, ip, whichfork);
5738 if (error)
5739 return error;
5740 }
5741
5742 if (ifp->if_flags & XFS_IFBROOT) {
5743 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5744 cur->bc_private.b.firstblock = *firstblock;
5745 cur->bc_private.b.dfops = dfops;
5746 cur->bc_private.b.flags = 0;
5747 }
5748
5749 if (*next_fsb == NULLFSBLOCK) {
5750 xfs_iext_last(ifp, &icur);
5751 if (!xfs_iext_get_extent(ifp, &icur, &got) ||
5752 stop_fsb > got.br_startoff) {
5753 *done = true;
5754 goto del_cursor;
5755 }
5756 } else {
5757 if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) {
5758 *done = true;
5759 goto del_cursor;
5760 }
5761 }
5762 XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock),
5763 del_cursor);
5764
5765 if (stop_fsb >= got.br_startoff + got.br_blockcount) {
5766 error = -EIO;
5767 goto del_cursor;
5768 }
5769
5770 new_startoff = got.br_startoff + offset_shift_fsb;
5771 if (xfs_iext_peek_next_extent(ifp, &icur, &next)) {
5772 if (new_startoff + got.br_blockcount > next.br_startoff) {
5773 error = -EINVAL;
5774 goto del_cursor;
5775 }
5776
5777 /*
5778 * Unlike a left shift (which involves a hole punch), a right
5779 * shift does not modify extent neighbors in any way. We should
5780 * never find mergeable extents in this scenario. Check anyways
5781 * and warn if we encounter two extents that could be one.
5782 */
5783 if (xfs_bmse_can_merge(&got, &next, offset_shift_fsb))
5784 WARN_ON_ONCE(1);
5785 }
5786
5787 error = xfs_bmap_shift_update_extent(ip, whichfork, &icur, &got, cur,
5788 &logflags, dfops, new_startoff);
5789 if (error)
5790 goto del_cursor;
5791
5792 if (!xfs_iext_prev_extent(ifp, &icur, &got) ||
5793 stop_fsb >= got.br_startoff + got.br_blockcount) {
5794 *done = true;
5795 goto del_cursor;
5796 }
5797
5798 *next_fsb = got.br_startoff;
5799 del_cursor:
5800 if (cur)
5801 xfs_btree_del_cursor(cur,
5802 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5803 if (logflags)
5804 xfs_trans_log_inode(tp, ip, logflags);
5805 return error;
5806 }
5807
5808 /*
5809 * Splits an extent into two extents at split_fsb block such that it is the
5810 * first block of the current_ext. @ext is a target extent to be split.
5811 * @split_fsb is a block where the extents is split. If split_fsb lies in a
5812 * hole or the first block of extents, just return 0.
5813 */
5814 STATIC int
5815 xfs_bmap_split_extent_at(
5816 struct xfs_trans *tp,
5817 struct xfs_inode *ip,
5818 xfs_fileoff_t split_fsb,
5819 xfs_fsblock_t *firstfsb,
5820 struct xfs_defer_ops *dfops)
5821 {
5822 int whichfork = XFS_DATA_FORK;
5823 struct xfs_btree_cur *cur = NULL;
5824 struct xfs_bmbt_irec got;
5825 struct xfs_bmbt_irec new; /* split extent */
5826 struct xfs_mount *mp = ip->i_mount;
5827 struct xfs_ifork *ifp;
5828 xfs_fsblock_t gotblkcnt; /* new block count for got */
5829 struct xfs_iext_cursor icur;
5830 int error = 0;
5831 int logflags = 0;
5832 int i = 0;
5833
5834 if (unlikely(XFS_TEST_ERROR(
5835 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5836 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
5837 mp, XFS_ERRTAG_BMAPIFORMAT))) {
5838 XFS_ERROR_REPORT("xfs_bmap_split_extent_at",
5839 XFS_ERRLEVEL_LOW, mp);
5840 return -EFSCORRUPTED;
5841 }
5842
5843 if (XFS_FORCED_SHUTDOWN(mp))
5844 return -EIO;
5845
5846 ifp = XFS_IFORK_PTR(ip, whichfork);
5847 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5848 /* Read in all the extents */
5849 error = xfs_iread_extents(tp, ip, whichfork);
5850 if (error)
5851 return error;
5852 }
5853
5854 /*
5855 * If there are not extents, or split_fsb lies in a hole we are done.
5856 */
5857 if (!xfs_iext_lookup_extent(ip, ifp, split_fsb, &icur, &got) ||
5858 got.br_startoff >= split_fsb)
5859 return 0;
5860
5861 gotblkcnt = split_fsb - got.br_startoff;
5862 new.br_startoff = split_fsb;
5863 new.br_startblock = got.br_startblock + gotblkcnt;
5864 new.br_blockcount = got.br_blockcount - gotblkcnt;
5865 new.br_state = got.br_state;
5866
5867 if (ifp->if_flags & XFS_IFBROOT) {
5868 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5869 cur->bc_private.b.firstblock = *firstfsb;
5870 cur->bc_private.b.dfops = dfops;
5871 cur->bc_private.b.flags = 0;
5872 error = xfs_bmbt_lookup_eq(cur, &got, &i);
5873 if (error)
5874 goto del_cursor;
5875 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
5876 }
5877
5878 got.br_blockcount = gotblkcnt;
5879 xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), &icur,
5880 &got);
5881
5882 logflags = XFS_ILOG_CORE;
5883 if (cur) {
5884 error = xfs_bmbt_update(cur, &got);
5885 if (error)
5886 goto del_cursor;
5887 } else
5888 logflags |= XFS_ILOG_DEXT;
5889
5890 /* Add new extent */
5891 xfs_iext_next(ifp, &icur);
5892 xfs_iext_insert(ip, &icur, &new, 0);
5893 XFS_IFORK_NEXT_SET(ip, whichfork,
5894 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
5895
5896 if (cur) {
5897 error = xfs_bmbt_lookup_eq(cur, &new, &i);
5898 if (error)
5899 goto del_cursor;
5900 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, del_cursor);
5901 error = xfs_btree_insert(cur, &i);
5902 if (error)
5903 goto del_cursor;
5904 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
5905 }
5906
5907 /*
5908 * Convert to a btree if necessary.
5909 */
5910 if (xfs_bmap_needs_btree(ip, whichfork)) {
5911 int tmp_logflags; /* partial log flag return val */
5912
5913 ASSERT(cur == NULL);
5914 error = xfs_bmap_extents_to_btree(tp, ip, firstfsb, dfops,
5915 &cur, 0, &tmp_logflags, whichfork);
5916 logflags |= tmp_logflags;
5917 }
5918
5919 del_cursor:
5920 if (cur) {
5921 cur->bc_private.b.allocated = 0;
5922 xfs_btree_del_cursor(cur,
5923 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5924 }
5925
5926 if (logflags)
5927 xfs_trans_log_inode(tp, ip, logflags);
5928 return error;
5929 }
5930
5931 int
5932 xfs_bmap_split_extent(
5933 struct xfs_inode *ip,
5934 xfs_fileoff_t split_fsb)
5935 {
5936 struct xfs_mount *mp = ip->i_mount;
5937 struct xfs_trans *tp;
5938 struct xfs_defer_ops dfops;
5939 xfs_fsblock_t firstfsb;
5940 int error;
5941
5942 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write,
5943 XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
5944 if (error)
5945 return error;
5946
5947 xfs_ilock(ip, XFS_ILOCK_EXCL);
5948 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
5949
5950 xfs_defer_init(&dfops, &firstfsb);
5951
5952 error = xfs_bmap_split_extent_at(tp, ip, split_fsb,
5953 &firstfsb, &dfops);
5954 if (error)
5955 goto out;
5956
5957 error = xfs_defer_finish(&tp, &dfops);
5958 if (error)
5959 goto out;
5960
5961 return xfs_trans_commit(tp);
5962
5963 out:
5964 xfs_defer_cancel(&dfops);
5965 xfs_trans_cancel(tp);
5966 return error;
5967 }
5968
5969 /* Deferred mapping is only for real extents in the data fork. */
5970 static bool
5971 xfs_bmap_is_update_needed(
5972 struct xfs_bmbt_irec *bmap)
5973 {
5974 return bmap->br_startblock != HOLESTARTBLOCK &&
5975 bmap->br_startblock != DELAYSTARTBLOCK;
5976 }
5977
5978 /* Record a bmap intent. */
5979 static int
5980 __xfs_bmap_add(
5981 struct xfs_mount *mp,
5982 struct xfs_defer_ops *dfops,
5983 enum xfs_bmap_intent_type type,
5984 struct xfs_inode *ip,
5985 int whichfork,
5986 struct xfs_bmbt_irec *bmap)
5987 {
5988 int error;
5989 struct xfs_bmap_intent *bi;
5990
5991 trace_xfs_bmap_defer(mp,
5992 XFS_FSB_TO_AGNO(mp, bmap->br_startblock),
5993 type,
5994 XFS_FSB_TO_AGBNO(mp, bmap->br_startblock),
5995 ip->i_ino, whichfork,
5996 bmap->br_startoff,
5997 bmap->br_blockcount,
5998 bmap->br_state);
5999
6000 bi = kmem_alloc(sizeof(struct xfs_bmap_intent), KM_SLEEP | KM_NOFS);
6001 INIT_LIST_HEAD(&bi->bi_list);
6002 bi->bi_type = type;
6003 bi->bi_owner = ip;
6004 bi->bi_whichfork = whichfork;
6005 bi->bi_bmap = *bmap;
6006
6007 error = xfs_defer_ijoin(dfops, bi->bi_owner);
6008 if (error) {
6009 kmem_free(bi);
6010 return error;
6011 }
6012
6013 xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_BMAP, &bi->bi_list);
6014 return 0;
6015 }
6016
6017 /* Map an extent into a file. */
6018 int
6019 xfs_bmap_map_extent(
6020 struct xfs_mount *mp,
6021 struct xfs_defer_ops *dfops,
6022 struct xfs_inode *ip,
6023 struct xfs_bmbt_irec *PREV)
6024 {
6025 if (!xfs_bmap_is_update_needed(PREV))
6026 return 0;
6027
6028 return __xfs_bmap_add(mp, dfops, XFS_BMAP_MAP, ip,
6029 XFS_DATA_FORK, PREV);
6030 }
6031
6032 /* Unmap an extent out of a file. */
6033 int
6034 xfs_bmap_unmap_extent(
6035 struct xfs_mount *mp,
6036 struct xfs_defer_ops *dfops,
6037 struct xfs_inode *ip,
6038 struct xfs_bmbt_irec *PREV)
6039 {
6040 if (!xfs_bmap_is_update_needed(PREV))
6041 return 0;
6042
6043 return __xfs_bmap_add(mp, dfops, XFS_BMAP_UNMAP, ip,
6044 XFS_DATA_FORK, PREV);
6045 }
6046
6047 /*
6048 * Process one of the deferred bmap operations. We pass back the
6049 * btree cursor to maintain our lock on the bmapbt between calls.
6050 */
6051 int
6052 xfs_bmap_finish_one(
6053 struct xfs_trans *tp,
6054 struct xfs_defer_ops *dfops,
6055 struct xfs_inode *ip,
6056 enum xfs_bmap_intent_type type,
6057 int whichfork,
6058 xfs_fileoff_t startoff,
6059 xfs_fsblock_t startblock,
6060 xfs_filblks_t *blockcount,
6061 xfs_exntst_t state)
6062 {
6063 xfs_fsblock_t firstfsb;
6064 int error = 0;
6065
6066 /*
6067 * firstfsb is tied to the transaction lifetime and is used to
6068 * ensure correct AG locking order and schedule work item
6069 * continuations. XFS_BUI_MAX_FAST_EXTENTS (== 1) restricts us
6070 * to only making one bmap call per transaction, so it should
6071 * be safe to have it as a local variable here.
6072 */
6073 firstfsb = NULLFSBLOCK;
6074
6075 trace_xfs_bmap_deferred(tp->t_mountp,
6076 XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type,
6077 XFS_FSB_TO_AGBNO(tp->t_mountp, startblock),
6078 ip->i_ino, whichfork, startoff, *blockcount, state);
6079
6080 if (WARN_ON_ONCE(whichfork != XFS_DATA_FORK))
6081 return -EFSCORRUPTED;
6082
6083 if (XFS_TEST_ERROR(false, tp->t_mountp,
6084 XFS_ERRTAG_BMAP_FINISH_ONE))
6085 return -EIO;
6086
6087 switch (type) {
6088 case XFS_BMAP_MAP:
6089 error = xfs_bmapi_remap(tp, ip, startoff, *blockcount,
6090 startblock, dfops);
6091 *blockcount = 0;
6092 break;
6093 case XFS_BMAP_UNMAP:
6094 error = __xfs_bunmapi(tp, ip, startoff, blockcount,
6095 XFS_BMAPI_REMAP, 1, &firstfsb, dfops);
6096 break;
6097 default:
6098 ASSERT(0);
6099 error = -EFSCORRUPTED;
6100 }
6101
6102 return error;
6103 }