]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - libxfs/xfs_bmap.c
xfs: remove XFS_BMAP_TRACE_EXLIST
[thirdparty/xfsprogs-dev.git] / libxfs / xfs_bmap.c
1 /*
2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18 #include "libxfs_priv.h"
19 #include "xfs_fs.h"
20 #include "xfs_shared.h"
21 #include "xfs_format.h"
22 #include "xfs_log_format.h"
23 #include "xfs_trans_resv.h"
24 #include "xfs_bit.h"
25 #include "xfs_sb.h"
26 #include "xfs_mount.h"
27 #include "xfs_defer.h"
28 #include "xfs_da_format.h"
29 #include "xfs_da_btree.h"
30 #include "xfs_dir2.h"
31 #include "xfs_inode.h"
32 #include "xfs_btree.h"
33 #include "xfs_trans.h"
34 #include "xfs_alloc.h"
35 #include "xfs_bmap.h"
36 #include "xfs_bmap_btree.h"
37 #include "xfs_trans_space.h"
38 #include "xfs_trace.h"
39 #include "xfs_attr_leaf.h"
40 #include "xfs_quota_defs.h"
41 #include "xfs_rmap.h"
42 #include "xfs_ag_resv.h"
43 #include "xfs_refcount.h"
44
45
46 kmem_zone_t *xfs_bmap_free_item_zone;
47
48 /*
49 * Miscellaneous helper functions
50 */
51
52 /*
53 * Compute and fill in the value of the maximum depth of a bmap btree
54 * in this filesystem. Done once, during mount.
55 */
56 void
57 xfs_bmap_compute_maxlevels(
58 xfs_mount_t *mp, /* file system mount structure */
59 int whichfork) /* data or attr fork */
60 {
61 int level; /* btree level */
62 uint maxblocks; /* max blocks at this level */
63 uint maxleafents; /* max leaf entries possible */
64 int maxrootrecs; /* max records in root block */
65 int minleafrecs; /* min records in leaf block */
66 int minnoderecs; /* min records in node block */
67 int sz; /* root block size */
68
69 /*
70 * The maximum number of extents in a file, hence the maximum
71 * number of leaf entries, is controlled by the type of di_nextents
72 * (a signed 32-bit number, xfs_extnum_t), or by di_anextents
73 * (a signed 16-bit number, xfs_aextnum_t).
74 *
75 * Note that we can no longer assume that if we are in ATTR1 that
76 * the fork offset of all the inodes will be
77 * (xfs_default_attroffset(ip) >> 3) because we could have mounted
78 * with ATTR2 and then mounted back with ATTR1, keeping the
79 * di_forkoff's fixed but probably at various positions. Therefore,
80 * for both ATTR1 and ATTR2 we have to assume the worst case scenario
81 * of a minimum size available.
82 */
83 if (whichfork == XFS_DATA_FORK) {
84 maxleafents = MAXEXTNUM;
85 sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
86 } else {
87 maxleafents = MAXAEXTNUM;
88 sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
89 }
90 maxrootrecs = xfs_bmdr_maxrecs(sz, 0);
91 minleafrecs = mp->m_bmap_dmnr[0];
92 minnoderecs = mp->m_bmap_dmnr[1];
93 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
94 for (level = 1; maxblocks > 1; level++) {
95 if (maxblocks <= maxrootrecs)
96 maxblocks = 1;
97 else
98 maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
99 }
100 mp->m_bm_maxlevels[whichfork] = level;
101 }
102
103 STATIC int /* error */
104 xfs_bmbt_lookup_eq(
105 struct xfs_btree_cur *cur,
106 struct xfs_bmbt_irec *irec,
107 int *stat) /* success/failure */
108 {
109 cur->bc_rec.b = *irec;
110 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
111 }
112
113 STATIC int /* error */
114 xfs_bmbt_lookup_first(
115 struct xfs_btree_cur *cur,
116 int *stat) /* success/failure */
117 {
118 cur->bc_rec.b.br_startoff = 0;
119 cur->bc_rec.b.br_startblock = 0;
120 cur->bc_rec.b.br_blockcount = 0;
121 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
122 }
123
124 /*
125 * Check if the inode needs to be converted to btree format.
126 */
127 static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork)
128 {
129 return whichfork != XFS_COW_FORK &&
130 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
131 XFS_IFORK_NEXTENTS(ip, whichfork) >
132 XFS_IFORK_MAXEXT(ip, whichfork);
133 }
134
135 /*
136 * Check if the inode should be converted to extent format.
137 */
138 static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork)
139 {
140 return whichfork != XFS_COW_FORK &&
141 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE &&
142 XFS_IFORK_NEXTENTS(ip, whichfork) <=
143 XFS_IFORK_MAXEXT(ip, whichfork);
144 }
145
146 /*
147 * Update the record referred to by cur to the value given by irec
148 * This either works (return 0) or gets an EFSCORRUPTED error.
149 */
150 STATIC int
151 xfs_bmbt_update(
152 struct xfs_btree_cur *cur,
153 struct xfs_bmbt_irec *irec)
154 {
155 union xfs_btree_rec rec;
156
157 xfs_bmbt_disk_set_all(&rec.bmbt, irec);
158 return xfs_btree_update(cur, &rec);
159 }
160
161 /*
162 * Compute the worst-case number of indirect blocks that will be used
163 * for ip's delayed extent of length "len".
164 */
165 STATIC xfs_filblks_t
166 xfs_bmap_worst_indlen(
167 xfs_inode_t *ip, /* incore inode pointer */
168 xfs_filblks_t len) /* delayed extent length */
169 {
170 int level; /* btree level number */
171 int maxrecs; /* maximum record count at this level */
172 xfs_mount_t *mp; /* mount structure */
173 xfs_filblks_t rval; /* return value */
174
175 mp = ip->i_mount;
176 maxrecs = mp->m_bmap_dmxr[0];
177 for (level = 0, rval = 0;
178 level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
179 level++) {
180 len += maxrecs - 1;
181 do_div(len, maxrecs);
182 rval += len;
183 if (len == 1)
184 return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
185 level - 1;
186 if (level == 0)
187 maxrecs = mp->m_bmap_dmxr[1];
188 }
189 return rval;
190 }
191
192 /*
193 * Calculate the default attribute fork offset for newly created inodes.
194 */
195 uint
196 xfs_default_attroffset(
197 struct xfs_inode *ip)
198 {
199 struct xfs_mount *mp = ip->i_mount;
200 uint offset;
201
202 if (mp->m_sb.sb_inodesize == 256) {
203 offset = XFS_LITINO(mp, ip->i_d.di_version) -
204 XFS_BMDR_SPACE_CALC(MINABTPTRS);
205 } else {
206 offset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
207 }
208
209 ASSERT(offset < XFS_LITINO(mp, ip->i_d.di_version));
210 return offset;
211 }
212
213 /*
214 * Helper routine to reset inode di_forkoff field when switching
215 * attribute fork from local to extent format - we reset it where
216 * possible to make space available for inline data fork extents.
217 */
218 STATIC void
219 xfs_bmap_forkoff_reset(
220 xfs_inode_t *ip,
221 int whichfork)
222 {
223 if (whichfork == XFS_ATTR_FORK &&
224 ip->i_d.di_format != XFS_DINODE_FMT_DEV &&
225 ip->i_d.di_format != XFS_DINODE_FMT_UUID &&
226 ip->i_d.di_format != XFS_DINODE_FMT_BTREE) {
227 uint dfl_forkoff = xfs_default_attroffset(ip) >> 3;
228
229 if (dfl_forkoff > ip->i_d.di_forkoff)
230 ip->i_d.di_forkoff = dfl_forkoff;
231 }
232 }
233
234 #ifdef DEBUG
235 STATIC struct xfs_buf *
236 xfs_bmap_get_bp(
237 struct xfs_btree_cur *cur,
238 xfs_fsblock_t bno)
239 {
240 struct xfs_log_item_desc *lidp;
241 int i;
242
243 if (!cur)
244 return NULL;
245
246 for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) {
247 if (!cur->bc_bufs[i])
248 break;
249 if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno)
250 return cur->bc_bufs[i];
251 }
252
253 /* Chase down all the log items to see if the bp is there */
254 list_for_each_entry(lidp, &cur->bc_tp->t_items, lid_trans) {
255 struct xfs_buf_log_item *bip;
256 bip = (struct xfs_buf_log_item *)lidp->lid_item;
257 if (bip->bli_item.li_type == XFS_LI_BUF &&
258 XFS_BUF_ADDR(bip->bli_buf) == bno)
259 return bip->bli_buf;
260 }
261
262 return NULL;
263 }
264
265 STATIC void
266 xfs_check_block(
267 struct xfs_btree_block *block,
268 xfs_mount_t *mp,
269 int root,
270 short sz)
271 {
272 int i, j, dmxr;
273 __be64 *pp, *thispa; /* pointer to block address */
274 xfs_bmbt_key_t *prevp, *keyp;
275
276 ASSERT(be16_to_cpu(block->bb_level) > 0);
277
278 prevp = NULL;
279 for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
280 dmxr = mp->m_bmap_dmxr[0];
281 keyp = XFS_BMBT_KEY_ADDR(mp, block, i);
282
283 if (prevp) {
284 ASSERT(be64_to_cpu(prevp->br_startoff) <
285 be64_to_cpu(keyp->br_startoff));
286 }
287 prevp = keyp;
288
289 /*
290 * Compare the block numbers to see if there are dups.
291 */
292 if (root)
293 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz);
294 else
295 pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr);
296
297 for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
298 if (root)
299 thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz);
300 else
301 thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
302 if (*thispa == *pp) {
303 xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld",
304 __func__, j, i,
305 (unsigned long long)be64_to_cpu(*thispa));
306 panic("%s: ptrs are equal in node\n",
307 __func__);
308 }
309 }
310 }
311 }
312
313 /*
314 * Check that the extents for the inode ip are in the right order in all
315 * btree leaves. THis becomes prohibitively expensive for large extent count
316 * files, so don't bother with inodes that have more than 10,000 extents in
317 * them. The btree record ordering checks will still be done, so for such large
318 * bmapbt constructs that is going to catch most corruptions.
319 */
320 STATIC void
321 xfs_bmap_check_leaf_extents(
322 xfs_btree_cur_t *cur, /* btree cursor or null */
323 xfs_inode_t *ip, /* incore inode pointer */
324 int whichfork) /* data or attr fork */
325 {
326 struct xfs_btree_block *block; /* current btree block */
327 xfs_fsblock_t bno; /* block # of "block" */
328 xfs_buf_t *bp; /* buffer for "block" */
329 int error; /* error return value */
330 xfs_extnum_t i=0, j; /* index into the extents list */
331 xfs_ifork_t *ifp; /* fork structure */
332 int level; /* btree level, for checking */
333 xfs_mount_t *mp; /* file system mount structure */
334 __be64 *pp; /* pointer to block address */
335 xfs_bmbt_rec_t *ep; /* pointer to current extent */
336 xfs_bmbt_rec_t last = {0, 0}; /* last extent in prev block */
337 xfs_bmbt_rec_t *nextp; /* pointer to next extent */
338 int bp_release = 0;
339
340 if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) {
341 return;
342 }
343
344 /* skip large extent count inodes */
345 if (ip->i_d.di_nextents > 10000)
346 return;
347
348 bno = NULLFSBLOCK;
349 mp = ip->i_mount;
350 ifp = XFS_IFORK_PTR(ip, whichfork);
351 block = ifp->if_broot;
352 /*
353 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
354 */
355 level = be16_to_cpu(block->bb_level);
356 ASSERT(level > 0);
357 xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
358 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
359 bno = be64_to_cpu(*pp);
360
361 ASSERT(bno != NULLFSBLOCK);
362 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
363 ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
364
365 /*
366 * Go down the tree until leaf level is reached, following the first
367 * pointer (leftmost) at each level.
368 */
369 while (level-- > 0) {
370 /* See if buf is in cur first */
371 bp_release = 0;
372 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
373 if (!bp) {
374 bp_release = 1;
375 error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
376 XFS_BMAP_BTREE_REF,
377 &xfs_bmbt_buf_ops);
378 if (error)
379 goto error_norelse;
380 }
381 block = XFS_BUF_TO_BLOCK(bp);
382 if (level == 0)
383 break;
384
385 /*
386 * Check this block for basic sanity (increasing keys and
387 * no duplicate blocks).
388 */
389
390 xfs_check_block(block, mp, 0, 0);
391 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
392 bno = be64_to_cpu(*pp);
393 XFS_WANT_CORRUPTED_GOTO(mp,
394 XFS_FSB_SANITY_CHECK(mp, bno), error0);
395 if (bp_release) {
396 bp_release = 0;
397 xfs_trans_brelse(NULL, bp);
398 }
399 }
400
401 /*
402 * Here with bp and block set to the leftmost leaf node in the tree.
403 */
404 i = 0;
405
406 /*
407 * Loop over all leaf nodes checking that all extents are in the right order.
408 */
409 for (;;) {
410 xfs_fsblock_t nextbno;
411 xfs_extnum_t num_recs;
412
413
414 num_recs = xfs_btree_get_numrecs(block);
415
416 /*
417 * Read-ahead the next leaf block, if any.
418 */
419
420 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
421
422 /*
423 * Check all the extents to make sure they are OK.
424 * If we had a previous block, the last entry should
425 * conform with the first entry in this one.
426 */
427
428 ep = XFS_BMBT_REC_ADDR(mp, block, 1);
429 if (i) {
430 ASSERT(xfs_bmbt_disk_get_startoff(&last) +
431 xfs_bmbt_disk_get_blockcount(&last) <=
432 xfs_bmbt_disk_get_startoff(ep));
433 }
434 for (j = 1; j < num_recs; j++) {
435 nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1);
436 ASSERT(xfs_bmbt_disk_get_startoff(ep) +
437 xfs_bmbt_disk_get_blockcount(ep) <=
438 xfs_bmbt_disk_get_startoff(nextp));
439 ep = nextp;
440 }
441
442 last = *ep;
443 i += num_recs;
444 if (bp_release) {
445 bp_release = 0;
446 xfs_trans_brelse(NULL, bp);
447 }
448 bno = nextbno;
449 /*
450 * If we've reached the end, stop.
451 */
452 if (bno == NULLFSBLOCK)
453 break;
454
455 bp_release = 0;
456 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
457 if (!bp) {
458 bp_release = 1;
459 error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
460 XFS_BMAP_BTREE_REF,
461 &xfs_bmbt_buf_ops);
462 if (error)
463 goto error_norelse;
464 }
465 block = XFS_BUF_TO_BLOCK(bp);
466 }
467
468 return;
469
470 error0:
471 xfs_warn(mp, "%s: at error0", __func__);
472 if (bp_release)
473 xfs_trans_brelse(NULL, bp);
474 error_norelse:
475 xfs_warn(mp, "%s: BAD after btree leaves for %d extents",
476 __func__, i);
477 panic("%s: CORRUPTED BTREE OR SOMETHING", __func__);
478 return;
479 }
480
481 /*
482 * Validate that the bmbt_irecs being returned from bmapi are valid
483 * given the caller's original parameters. Specifically check the
484 * ranges of the returned irecs to ensure that they only extend beyond
485 * the given parameters if the XFS_BMAPI_ENTIRE flag was set.
486 */
487 STATIC void
488 xfs_bmap_validate_ret(
489 xfs_fileoff_t bno,
490 xfs_filblks_t len,
491 int flags,
492 xfs_bmbt_irec_t *mval,
493 int nmap,
494 int ret_nmap)
495 {
496 int i; /* index to map values */
497
498 ASSERT(ret_nmap <= nmap);
499
500 for (i = 0; i < ret_nmap; i++) {
501 ASSERT(mval[i].br_blockcount > 0);
502 if (!(flags & XFS_BMAPI_ENTIRE)) {
503 ASSERT(mval[i].br_startoff >= bno);
504 ASSERT(mval[i].br_blockcount <= len);
505 ASSERT(mval[i].br_startoff + mval[i].br_blockcount <=
506 bno + len);
507 } else {
508 ASSERT(mval[i].br_startoff < bno + len);
509 ASSERT(mval[i].br_startoff + mval[i].br_blockcount >
510 bno);
511 }
512 ASSERT(i == 0 ||
513 mval[i - 1].br_startoff + mval[i - 1].br_blockcount ==
514 mval[i].br_startoff);
515 ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK &&
516 mval[i].br_startblock != HOLESTARTBLOCK);
517 ASSERT(mval[i].br_state == XFS_EXT_NORM ||
518 mval[i].br_state == XFS_EXT_UNWRITTEN);
519 }
520 }
521
522 #else
523 #define xfs_bmap_check_leaf_extents(cur, ip, whichfork) do { } while (0)
524 #define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap) do { } while (0)
525 #endif /* DEBUG */
526
527 /*
528 * bmap free list manipulation functions
529 */
530
531 /*
532 * Add the extent to the list of extents to be free at transaction end.
533 * The list is maintained sorted (by block number).
534 */
535 void
536 xfs_bmap_add_free(
537 struct xfs_mount *mp,
538 struct xfs_defer_ops *dfops,
539 xfs_fsblock_t bno,
540 xfs_filblks_t len,
541 struct xfs_owner_info *oinfo)
542 {
543 struct xfs_extent_free_item *new; /* new element */
544 #ifdef DEBUG
545 xfs_agnumber_t agno;
546 xfs_agblock_t agbno;
547
548 ASSERT(bno != NULLFSBLOCK);
549 ASSERT(len > 0);
550 ASSERT(len <= MAXEXTLEN);
551 ASSERT(!isnullstartblock(bno));
552 agno = XFS_FSB_TO_AGNO(mp, bno);
553 agbno = XFS_FSB_TO_AGBNO(mp, bno);
554 ASSERT(agno < mp->m_sb.sb_agcount);
555 ASSERT(agbno < mp->m_sb.sb_agblocks);
556 ASSERT(len < mp->m_sb.sb_agblocks);
557 ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
558 #endif
559 ASSERT(xfs_bmap_free_item_zone != NULL);
560
561 new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP);
562 new->xefi_startblock = bno;
563 new->xefi_blockcount = (xfs_extlen_t)len;
564 if (oinfo)
565 new->xefi_oinfo = *oinfo;
566 else
567 xfs_rmap_skip_owner_update(&new->xefi_oinfo);
568 trace_xfs_bmap_free_defer(mp, XFS_FSB_TO_AGNO(mp, bno), 0,
569 XFS_FSB_TO_AGBNO(mp, bno), len);
570 xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list);
571 }
572
573 /*
574 * Inode fork format manipulation functions
575 */
576
577 /*
578 * Transform a btree format file with only one leaf node, where the
579 * extents list will fit in the inode, into an extents format file.
580 * Since the file extents are already in-core, all we have to do is
581 * give up the space for the btree root and pitch the leaf block.
582 */
583 STATIC int /* error */
584 xfs_bmap_btree_to_extents(
585 xfs_trans_t *tp, /* transaction pointer */
586 xfs_inode_t *ip, /* incore inode pointer */
587 xfs_btree_cur_t *cur, /* btree cursor */
588 int *logflagsp, /* inode logging flags */
589 int whichfork) /* data or attr fork */
590 {
591 /* REFERENCED */
592 struct xfs_btree_block *cblock;/* child btree block */
593 xfs_fsblock_t cbno; /* child block number */
594 xfs_buf_t *cbp; /* child block's buffer */
595 int error; /* error return value */
596 xfs_ifork_t *ifp; /* inode fork data */
597 xfs_mount_t *mp; /* mount point structure */
598 __be64 *pp; /* ptr to block address */
599 struct xfs_btree_block *rblock;/* root btree block */
600 struct xfs_owner_info oinfo;
601
602 mp = ip->i_mount;
603 ifp = XFS_IFORK_PTR(ip, whichfork);
604 ASSERT(whichfork != XFS_COW_FORK);
605 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
606 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
607 rblock = ifp->if_broot;
608 ASSERT(be16_to_cpu(rblock->bb_level) == 1);
609 ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
610 ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
611 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
612 cbno = be64_to_cpu(*pp);
613 *logflagsp = 0;
614 #ifdef DEBUG
615 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
616 xfs_btree_check_lptr(cur, cbno, 1));
617 #endif
618 error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF,
619 &xfs_bmbt_buf_ops);
620 if (error)
621 return error;
622 cblock = XFS_BUF_TO_BLOCK(cbp);
623 if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
624 return error;
625 xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
626 xfs_bmap_add_free(mp, cur->bc_private.b.dfops, cbno, 1, &oinfo);
627 ip->i_d.di_nblocks--;
628 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
629 xfs_trans_binval(tp, cbp);
630 if (cur->bc_bufs[0] == cbp)
631 cur->bc_bufs[0] = NULL;
632 xfs_iroot_realloc(ip, -1, whichfork);
633 ASSERT(ifp->if_broot == NULL);
634 ASSERT((ifp->if_flags & XFS_IFBROOT) == 0);
635 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
636 *logflagsp = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
637 return 0;
638 }
639
640 /*
641 * Convert an extents-format file into a btree-format file.
642 * The new file will have a root block (in the inode) and a single child block.
643 */
644 STATIC int /* error */
645 xfs_bmap_extents_to_btree(
646 xfs_trans_t *tp, /* transaction pointer */
647 xfs_inode_t *ip, /* incore inode pointer */
648 xfs_fsblock_t *firstblock, /* first-block-allocated */
649 struct xfs_defer_ops *dfops, /* blocks freed in xaction */
650 xfs_btree_cur_t **curp, /* cursor returned to caller */
651 int wasdel, /* converting a delayed alloc */
652 int *logflagsp, /* inode logging flags */
653 int whichfork) /* data or attr fork */
654 {
655 struct xfs_btree_block *ablock; /* allocated (child) bt block */
656 xfs_buf_t *abp; /* buffer for ablock */
657 xfs_alloc_arg_t args; /* allocation arguments */
658 xfs_bmbt_rec_t *arp; /* child record pointer */
659 struct xfs_btree_block *block; /* btree root block */
660 xfs_btree_cur_t *cur; /* bmap btree cursor */
661 xfs_bmbt_rec_host_t *ep; /* extent record pointer */
662 int error; /* error return value */
663 xfs_extnum_t i, cnt; /* extent record index */
664 xfs_ifork_t *ifp; /* inode fork pointer */
665 xfs_bmbt_key_t *kp; /* root block key pointer */
666 xfs_mount_t *mp; /* mount structure */
667 xfs_extnum_t nextents; /* number of file extents */
668 xfs_bmbt_ptr_t *pp; /* root block address pointer */
669
670 mp = ip->i_mount;
671 ASSERT(whichfork != XFS_COW_FORK);
672 ifp = XFS_IFORK_PTR(ip, whichfork);
673 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS);
674
675 /*
676 * Make space in the inode incore.
677 */
678 xfs_iroot_realloc(ip, 1, whichfork);
679 ifp->if_flags |= XFS_IFBROOT;
680
681 /*
682 * Fill in the root.
683 */
684 block = ifp->if_broot;
685 xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
686 XFS_BTNUM_BMAP, 1, 1, ip->i_ino,
687 XFS_BTREE_LONG_PTRS);
688 /*
689 * Need a cursor. Can't allocate until bb_level is filled in.
690 */
691 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
692 cur->bc_private.b.firstblock = *firstblock;
693 cur->bc_private.b.dfops = dfops;
694 cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
695 /*
696 * Convert to a btree with two levels, one record in root.
697 */
698 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
699 memset(&args, 0, sizeof(args));
700 args.tp = tp;
701 args.mp = mp;
702 xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, whichfork);
703 args.firstblock = *firstblock;
704 if (*firstblock == NULLFSBLOCK) {
705 args.type = XFS_ALLOCTYPE_START_BNO;
706 args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
707 } else if (dfops->dop_low) {
708 args.type = XFS_ALLOCTYPE_START_BNO;
709 args.fsbno = *firstblock;
710 } else {
711 args.type = XFS_ALLOCTYPE_NEAR_BNO;
712 args.fsbno = *firstblock;
713 }
714 args.minlen = args.maxlen = args.prod = 1;
715 args.wasdel = wasdel;
716 *logflagsp = 0;
717 if ((error = xfs_alloc_vextent(&args))) {
718 xfs_iroot_realloc(ip, -1, whichfork);
719 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
720 return error;
721 }
722
723 if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
724 xfs_iroot_realloc(ip, -1, whichfork);
725 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
726 return -ENOSPC;
727 }
728 /*
729 * Allocation can't fail, the space was reserved.
730 */
731 ASSERT(*firstblock == NULLFSBLOCK ||
732 args.agno >= XFS_FSB_TO_AGNO(mp, *firstblock));
733 *firstblock = cur->bc_private.b.firstblock = args.fsbno;
734 cur->bc_private.b.allocated++;
735 ip->i_d.di_nblocks++;
736 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
737 abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0);
738 /*
739 * Fill in the child block.
740 */
741 abp->b_ops = &xfs_bmbt_buf_ops;
742 ablock = XFS_BUF_TO_BLOCK(abp);
743 xfs_btree_init_block_int(mp, ablock, abp->b_bn,
744 XFS_BTNUM_BMAP, 0, 0, ip->i_ino,
745 XFS_BTREE_LONG_PTRS);
746
747 arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
748 nextents = xfs_iext_count(ifp);
749 for (cnt = i = 0; i < nextents; i++) {
750 ep = xfs_iext_get_ext(ifp, i);
751 if (!isnullstartblock(xfs_bmbt_get_startblock(ep))) {
752 arp->l0 = cpu_to_be64(ep->l0);
753 arp->l1 = cpu_to_be64(ep->l1);
754 arp++; cnt++;
755 }
756 }
757 ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork));
758 xfs_btree_set_numrecs(ablock, cnt);
759
760 /*
761 * Fill in the root key and pointer.
762 */
763 kp = XFS_BMBT_KEY_ADDR(mp, block, 1);
764 arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
765 kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
766 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
767 be16_to_cpu(block->bb_level)));
768 *pp = cpu_to_be64(args.fsbno);
769
770 /*
771 * Do all this logging at the end so that
772 * the root is at the right level.
773 */
774 xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);
775 xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
776 ASSERT(*curp == NULL);
777 *curp = cur;
778 *logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork);
779 return 0;
780 }
781
782 /*
783 * Convert a local file to an extents file.
784 * This code is out of bounds for data forks of regular files,
785 * since the file data needs to get logged so things will stay consistent.
786 * (The bmap-level manipulations are ok, though).
787 */
788 void
789 xfs_bmap_local_to_extents_empty(
790 struct xfs_inode *ip,
791 int whichfork)
792 {
793 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
794
795 ASSERT(whichfork != XFS_COW_FORK);
796 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
797 ASSERT(ifp->if_bytes == 0);
798 ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0);
799
800 xfs_bmap_forkoff_reset(ip, whichfork);
801 ifp->if_flags &= ~XFS_IFINLINE;
802 ifp->if_flags |= XFS_IFEXTENTS;
803 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
804 }
805
806
807 STATIC int /* error */
808 xfs_bmap_local_to_extents(
809 xfs_trans_t *tp, /* transaction pointer */
810 xfs_inode_t *ip, /* incore inode pointer */
811 xfs_fsblock_t *firstblock, /* first block allocated in xaction */
812 xfs_extlen_t total, /* total blocks needed by transaction */
813 int *logflagsp, /* inode logging flags */
814 int whichfork,
815 void (*init_fn)(struct xfs_trans *tp,
816 struct xfs_buf *bp,
817 struct xfs_inode *ip,
818 struct xfs_ifork *ifp))
819 {
820 int error = 0;
821 int flags; /* logging flags returned */
822 xfs_ifork_t *ifp; /* inode fork pointer */
823 xfs_alloc_arg_t args; /* allocation arguments */
824 xfs_buf_t *bp; /* buffer for extent block */
825 struct xfs_bmbt_irec rec;
826
827 /*
828 * We don't want to deal with the case of keeping inode data inline yet.
829 * So sending the data fork of a regular inode is invalid.
830 */
831 ASSERT(!(S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK));
832 ifp = XFS_IFORK_PTR(ip, whichfork);
833 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
834
835 if (!ifp->if_bytes) {
836 xfs_bmap_local_to_extents_empty(ip, whichfork);
837 flags = XFS_ILOG_CORE;
838 goto done;
839 }
840
841 flags = 0;
842 error = 0;
843 ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) ==
844 XFS_IFINLINE);
845 memset(&args, 0, sizeof(args));
846 args.tp = tp;
847 args.mp = ip->i_mount;
848 xfs_rmap_ino_owner(&args.oinfo, ip->i_ino, whichfork, 0);
849 args.firstblock = *firstblock;
850 /*
851 * Allocate a block. We know we need only one, since the
852 * file currently fits in an inode.
853 */
854 if (*firstblock == NULLFSBLOCK) {
855 args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino);
856 args.type = XFS_ALLOCTYPE_START_BNO;
857 } else {
858 args.fsbno = *firstblock;
859 args.type = XFS_ALLOCTYPE_NEAR_BNO;
860 }
861 args.total = total;
862 args.minlen = args.maxlen = args.prod = 1;
863 error = xfs_alloc_vextent(&args);
864 if (error)
865 goto done;
866
867 /* Can't fail, the space was reserved. */
868 ASSERT(args.fsbno != NULLFSBLOCK);
869 ASSERT(args.len == 1);
870 *firstblock = args.fsbno;
871 bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
872
873 /*
874 * Initialize the block, copy the data and log the remote buffer.
875 *
876 * The callout is responsible for logging because the remote format
877 * might differ from the local format and thus we don't know how much to
878 * log here. Note that init_fn must also set the buffer log item type
879 * correctly.
880 */
881 init_fn(tp, bp, ip, ifp);
882
883 /* account for the change in fork size */
884 xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
885 xfs_bmap_local_to_extents_empty(ip, whichfork);
886 flags |= XFS_ILOG_CORE;
887
888 rec.br_startoff = 0;
889 rec.br_startblock = args.fsbno;
890 rec.br_blockcount = 1;
891 rec.br_state = XFS_EXT_NORM;
892 xfs_iext_insert(ip, 0, 1, &rec, 0);
893
894 XFS_IFORK_NEXT_SET(ip, whichfork, 1);
895 ip->i_d.di_nblocks = 1;
896 xfs_trans_mod_dquot_byino(tp, ip,
897 XFS_TRANS_DQ_BCOUNT, 1L);
898 flags |= xfs_ilog_fext(whichfork);
899
900 done:
901 *logflagsp = flags;
902 return error;
903 }
904
905 /*
906 * Called from xfs_bmap_add_attrfork to handle btree format files.
907 */
908 STATIC int /* error */
909 xfs_bmap_add_attrfork_btree(
910 xfs_trans_t *tp, /* transaction pointer */
911 xfs_inode_t *ip, /* incore inode pointer */
912 xfs_fsblock_t *firstblock, /* first block allocated */
913 struct xfs_defer_ops *dfops, /* blocks to free at commit */
914 int *flags) /* inode logging flags */
915 {
916 xfs_btree_cur_t *cur; /* btree cursor */
917 int error; /* error return value */
918 xfs_mount_t *mp; /* file system mount struct */
919 int stat; /* newroot status */
920
921 mp = ip->i_mount;
922 if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip))
923 *flags |= XFS_ILOG_DBROOT;
924 else {
925 cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
926 cur->bc_private.b.dfops = dfops;
927 cur->bc_private.b.firstblock = *firstblock;
928 error = xfs_bmbt_lookup_first(cur, &stat);
929 if (error)
930 goto error0;
931 /* must be at least one entry */
932 XFS_WANT_CORRUPTED_GOTO(mp, stat == 1, error0);
933 if ((error = xfs_btree_new_iroot(cur, flags, &stat)))
934 goto error0;
935 if (stat == 0) {
936 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
937 return -ENOSPC;
938 }
939 *firstblock = cur->bc_private.b.firstblock;
940 cur->bc_private.b.allocated = 0;
941 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
942 }
943 return 0;
944 error0:
945 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
946 return error;
947 }
948
949 /*
950 * Called from xfs_bmap_add_attrfork to handle extents format files.
951 */
952 STATIC int /* error */
953 xfs_bmap_add_attrfork_extents(
954 xfs_trans_t *tp, /* transaction pointer */
955 xfs_inode_t *ip, /* incore inode pointer */
956 xfs_fsblock_t *firstblock, /* first block allocated */
957 struct xfs_defer_ops *dfops, /* blocks to free at commit */
958 int *flags) /* inode logging flags */
959 {
960 xfs_btree_cur_t *cur; /* bmap btree cursor */
961 int error; /* error return value */
962
963 if (ip->i_d.di_nextents * sizeof(xfs_bmbt_rec_t) <= XFS_IFORK_DSIZE(ip))
964 return 0;
965 cur = NULL;
966 error = xfs_bmap_extents_to_btree(tp, ip, firstblock, dfops, &cur, 0,
967 flags, XFS_DATA_FORK);
968 if (cur) {
969 cur->bc_private.b.allocated = 0;
970 xfs_btree_del_cursor(cur,
971 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
972 }
973 return error;
974 }
975
976 /*
977 * Called from xfs_bmap_add_attrfork to handle local format files. Each
978 * different data fork content type needs a different callout to do the
979 * conversion. Some are basic and only require special block initialisation
980 * callouts for the data formating, others (directories) are so specialised they
981 * handle everything themselves.
982 *
983 * XXX (dgc): investigate whether directory conversion can use the generic
984 * formatting callout. It should be possible - it's just a very complex
985 * formatter.
986 */
987 STATIC int /* error */
988 xfs_bmap_add_attrfork_local(
989 xfs_trans_t *tp, /* transaction pointer */
990 xfs_inode_t *ip, /* incore inode pointer */
991 xfs_fsblock_t *firstblock, /* first block allocated */
992 struct xfs_defer_ops *dfops, /* blocks to free at commit */
993 int *flags) /* inode logging flags */
994 {
995 xfs_da_args_t dargs; /* args for dir/attr code */
996
997 if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
998 return 0;
999
1000 if (S_ISDIR(VFS_I(ip)->i_mode)) {
1001 memset(&dargs, 0, sizeof(dargs));
1002 dargs.geo = ip->i_mount->m_dir_geo;
1003 dargs.dp = ip;
1004 dargs.firstblock = firstblock;
1005 dargs.dfops = dfops;
1006 dargs.total = dargs.geo->fsbcount;
1007 dargs.whichfork = XFS_DATA_FORK;
1008 dargs.trans = tp;
1009 return xfs_dir2_sf_to_block(&dargs);
1010 }
1011
1012 if (S_ISLNK(VFS_I(ip)->i_mode))
1013 return xfs_bmap_local_to_extents(tp, ip, firstblock, 1,
1014 flags, XFS_DATA_FORK,
1015 xfs_symlink_local_to_remote);
1016
1017 /* should only be called for types that support local format data */
1018 ASSERT(0);
1019 return -EFSCORRUPTED;
1020 }
1021
1022 /*
1023 * Convert inode from non-attributed to attributed.
1024 * Must not be in a transaction, ip must not be locked.
1025 */
1026 int /* error code */
1027 xfs_bmap_add_attrfork(
1028 xfs_inode_t *ip, /* incore inode pointer */
1029 int size, /* space new attribute needs */
1030 int rsvd) /* xact may use reserved blks */
1031 {
1032 xfs_fsblock_t firstblock; /* 1st block/ag allocated */
1033 struct xfs_defer_ops dfops; /* freed extent records */
1034 xfs_mount_t *mp; /* mount structure */
1035 xfs_trans_t *tp; /* transaction pointer */
1036 int blks; /* space reservation */
1037 int version = 1; /* superblock attr version */
1038 int logflags; /* logging flags */
1039 int error; /* error return value */
1040
1041 ASSERT(XFS_IFORK_Q(ip) == 0);
1042
1043 mp = ip->i_mount;
1044 ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
1045
1046 blks = XFS_ADDAFORK_SPACE_RES(mp);
1047
1048 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_addafork, blks, 0,
1049 rsvd ? XFS_TRANS_RESERVE : 0, &tp);
1050 if (error)
1051 return error;
1052
1053 xfs_ilock(ip, XFS_ILOCK_EXCL);
1054 error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
1055 XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
1056 XFS_QMOPT_RES_REGBLKS);
1057 if (error)
1058 goto trans_cancel;
1059 if (XFS_IFORK_Q(ip))
1060 goto trans_cancel;
1061 if (ip->i_d.di_anextents != 0) {
1062 error = -EFSCORRUPTED;
1063 goto trans_cancel;
1064 }
1065 if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) {
1066 /*
1067 * For inodes coming from pre-6.2 filesystems.
1068 */
1069 ASSERT(ip->i_d.di_aformat == 0);
1070 ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
1071 }
1072
1073 xfs_trans_ijoin(tp, ip, 0);
1074 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1075
1076 switch (ip->i_d.di_format) {
1077 case XFS_DINODE_FMT_DEV:
1078 ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
1079 break;
1080 case XFS_DINODE_FMT_UUID:
1081 ip->i_d.di_forkoff = roundup(sizeof(uuid_t), 8) >> 3;
1082 break;
1083 case XFS_DINODE_FMT_LOCAL:
1084 case XFS_DINODE_FMT_EXTENTS:
1085 case XFS_DINODE_FMT_BTREE:
1086 ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
1087 if (!ip->i_d.di_forkoff)
1088 ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3;
1089 else if (mp->m_flags & XFS_MOUNT_ATTR2)
1090 version = 2;
1091 break;
1092 default:
1093 ASSERT(0);
1094 error = -EINVAL;
1095 goto trans_cancel;
1096 }
1097
1098 ASSERT(ip->i_afp == NULL);
1099 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
1100 ip->i_afp->if_flags = XFS_IFEXTENTS;
1101 logflags = 0;
1102 xfs_defer_init(&dfops, &firstblock);
1103 switch (ip->i_d.di_format) {
1104 case XFS_DINODE_FMT_LOCAL:
1105 error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock, &dfops,
1106 &logflags);
1107 break;
1108 case XFS_DINODE_FMT_EXTENTS:
1109 error = xfs_bmap_add_attrfork_extents(tp, ip, &firstblock,
1110 &dfops, &logflags);
1111 break;
1112 case XFS_DINODE_FMT_BTREE:
1113 error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock, &dfops,
1114 &logflags);
1115 break;
1116 default:
1117 error = 0;
1118 break;
1119 }
1120 if (logflags)
1121 xfs_trans_log_inode(tp, ip, logflags);
1122 if (error)
1123 goto bmap_cancel;
1124 if (!xfs_sb_version_hasattr(&mp->m_sb) ||
1125 (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) {
1126 bool log_sb = false;
1127
1128 spin_lock(&mp->m_sb_lock);
1129 if (!xfs_sb_version_hasattr(&mp->m_sb)) {
1130 xfs_sb_version_addattr(&mp->m_sb);
1131 log_sb = true;
1132 }
1133 if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) {
1134 xfs_sb_version_addattr2(&mp->m_sb);
1135 log_sb = true;
1136 }
1137 spin_unlock(&mp->m_sb_lock);
1138 if (log_sb)
1139 xfs_log_sb(tp);
1140 }
1141
1142 error = xfs_defer_finish(&tp, &dfops);
1143 if (error)
1144 goto bmap_cancel;
1145 error = xfs_trans_commit(tp);
1146 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1147 return error;
1148
1149 bmap_cancel:
1150 xfs_defer_cancel(&dfops);
1151 trans_cancel:
1152 xfs_trans_cancel(tp);
1153 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1154 return error;
1155 }
1156
1157 /*
1158 * Internal and external extent tree search functions.
1159 */
1160
1161 /*
1162 * Read in the extents to if_extents.
1163 * All inode fields are set up by caller, we just traverse the btree
1164 * and copy the records in. If the file system cannot contain unwritten
1165 * extents, the records are checked for no "state" flags.
1166 */
1167 int /* error */
1168 xfs_bmap_read_extents(
1169 xfs_trans_t *tp, /* transaction pointer */
1170 xfs_inode_t *ip, /* incore inode */
1171 int whichfork) /* data or attr fork */
1172 {
1173 struct xfs_btree_block *block; /* current btree block */
1174 xfs_fsblock_t bno; /* block # of "block" */
1175 xfs_buf_t *bp; /* buffer for "block" */
1176 int error; /* error return value */
1177 xfs_extnum_t i, j; /* index into the extents list */
1178 xfs_ifork_t *ifp; /* fork structure */
1179 int level; /* btree level, for checking */
1180 xfs_mount_t *mp; /* file system mount structure */
1181 __be64 *pp; /* pointer to block address */
1182 /* REFERENCED */
1183 xfs_extnum_t room; /* number of entries there's room for */
1184 int state = xfs_bmap_fork_to_state(whichfork);
1185
1186 mp = ip->i_mount;
1187 ifp = XFS_IFORK_PTR(ip, whichfork);
1188 block = ifp->if_broot;
1189 /*
1190 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
1191 */
1192 level = be16_to_cpu(block->bb_level);
1193 ASSERT(level > 0);
1194 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
1195 bno = be64_to_cpu(*pp);
1196
1197 /*
1198 * Go down the tree until leaf level is reached, following the first
1199 * pointer (leftmost) at each level.
1200 */
1201 while (level-- > 0) {
1202 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
1203 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
1204 if (error)
1205 return error;
1206 block = XFS_BUF_TO_BLOCK(bp);
1207 if (level == 0)
1208 break;
1209 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
1210 bno = be64_to_cpu(*pp);
1211 XFS_WANT_CORRUPTED_GOTO(mp,
1212 XFS_FSB_SANITY_CHECK(mp, bno), error0);
1213 xfs_trans_brelse(tp, bp);
1214 }
1215 /*
1216 * Here with bp and block set to the leftmost leaf node in the tree.
1217 */
1218 room = xfs_iext_count(ifp);
1219 i = 0;
1220 /*
1221 * Loop over all leaf nodes. Copy information to the extent records.
1222 */
1223 for (;;) {
1224 xfs_bmbt_rec_t *frp;
1225 xfs_fsblock_t nextbno;
1226 xfs_extnum_t num_recs;
1227
1228 num_recs = xfs_btree_get_numrecs(block);
1229 if (unlikely(i + num_recs > room)) {
1230 ASSERT(i + num_recs <= room);
1231 xfs_warn(ip->i_mount,
1232 "corrupt dinode %Lu, (btree extents).",
1233 (unsigned long long) ip->i_ino);
1234 XFS_CORRUPTION_ERROR("xfs_bmap_read_extents(1)",
1235 XFS_ERRLEVEL_LOW, ip->i_mount, block);
1236 goto error0;
1237 }
1238 /*
1239 * Read-ahead the next leaf block, if any.
1240 */
1241 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
1242 if (nextbno != NULLFSBLOCK)
1243 xfs_btree_reada_bufl(mp, nextbno, 1,
1244 &xfs_bmbt_buf_ops);
1245 /*
1246 * Copy records into the extent records.
1247 */
1248 frp = XFS_BMBT_REC_ADDR(mp, block, 1);
1249 for (j = 0; j < num_recs; j++, i++, frp++) {
1250 xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i);
1251 trp->l0 = be64_to_cpu(frp->l0);
1252 trp->l1 = be64_to_cpu(frp->l1);
1253 if (!xfs_bmbt_validate_extent(mp, whichfork, trp)) {
1254 XFS_ERROR_REPORT("xfs_bmap_read_extents(2)",
1255 XFS_ERRLEVEL_LOW, mp);
1256 goto error0;
1257 }
1258 trace_xfs_read_extent(ip, i, state, _THIS_IP_);
1259 }
1260 xfs_trans_brelse(tp, bp);
1261 bno = nextbno;
1262 /*
1263 * If we've reached the end, stop.
1264 */
1265 if (bno == NULLFSBLOCK)
1266 break;
1267 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
1268 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
1269 if (error)
1270 return error;
1271 block = XFS_BUF_TO_BLOCK(bp);
1272 }
1273 if (i != XFS_IFORK_NEXTENTS(ip, whichfork))
1274 return -EFSCORRUPTED;
1275 ASSERT(i == xfs_iext_count(ifp));
1276 return 0;
1277 error0:
1278 xfs_trans_brelse(tp, bp);
1279 return -EFSCORRUPTED;
1280 }
1281
1282 /*
1283 * Returns the file-relative block number of the first unused block(s)
1284 * in the file with at least "len" logically contiguous blocks free.
1285 * This is the lowest-address hole if the file has holes, else the first block
1286 * past the end of file.
1287 * Return 0 if the file is currently local (in-inode).
1288 */
1289 int /* error */
1290 xfs_bmap_first_unused(
1291 xfs_trans_t *tp, /* transaction pointer */
1292 xfs_inode_t *ip, /* incore inode */
1293 xfs_extlen_t len, /* size of hole to find */
1294 xfs_fileoff_t *first_unused, /* unused block */
1295 int whichfork) /* data or attr fork */
1296 {
1297 int error; /* error return value */
1298 int idx; /* extent record index */
1299 xfs_ifork_t *ifp; /* inode fork pointer */
1300 xfs_fileoff_t lastaddr; /* last block number seen */
1301 xfs_fileoff_t lowest; /* lowest useful block */
1302 xfs_fileoff_t max; /* starting useful block */
1303 xfs_extnum_t nextents; /* number of extent entries */
1304
1305 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE ||
1306 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ||
1307 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
1308 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
1309 *first_unused = 0;
1310 return 0;
1311 }
1312 ifp = XFS_IFORK_PTR(ip, whichfork);
1313 if (!(ifp->if_flags & XFS_IFEXTENTS) &&
1314 (error = xfs_iread_extents(tp, ip, whichfork)))
1315 return error;
1316 lowest = *first_unused;
1317 nextents = xfs_iext_count(ifp);
1318 for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) {
1319 struct xfs_bmbt_irec got;
1320
1321 xfs_iext_get_extent(ifp, idx, &got);
1322
1323 /*
1324 * See if the hole before this extent will work.
1325 */
1326 if (got.br_startoff >= lowest + len &&
1327 got.br_startoff - max >= len) {
1328 *first_unused = max;
1329 return 0;
1330 }
1331 lastaddr = got.br_startoff + got.br_blockcount;
1332 max = XFS_FILEOFF_MAX(lastaddr, lowest);
1333 }
1334 *first_unused = max;
1335 return 0;
1336 }
1337
1338 /*
1339 * Returns the file-relative block number of the last block - 1 before
1340 * last_block (input value) in the file.
1341 * This is not based on i_size, it is based on the extent records.
1342 * Returns 0 for local files, as they do not have extent records.
1343 */
1344 int /* error */
1345 xfs_bmap_last_before(
1346 struct xfs_trans *tp, /* transaction pointer */
1347 struct xfs_inode *ip, /* incore inode */
1348 xfs_fileoff_t *last_block, /* last block */
1349 int whichfork) /* data or attr fork */
1350 {
1351 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
1352 struct xfs_bmbt_irec got;
1353 xfs_extnum_t idx;
1354 int error;
1355
1356 switch (XFS_IFORK_FORMAT(ip, whichfork)) {
1357 case XFS_DINODE_FMT_LOCAL:
1358 *last_block = 0;
1359 return 0;
1360 case XFS_DINODE_FMT_BTREE:
1361 case XFS_DINODE_FMT_EXTENTS:
1362 break;
1363 default:
1364 return -EIO;
1365 }
1366
1367 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1368 error = xfs_iread_extents(tp, ip, whichfork);
1369 if (error)
1370 return error;
1371 }
1372
1373 if (xfs_iext_lookup_extent(ip, ifp, *last_block - 1, &idx, &got)) {
1374 if (got.br_startoff <= *last_block - 1)
1375 return 0;
1376 }
1377
1378 if (xfs_iext_get_extent(ifp, idx - 1, &got)) {
1379 *last_block = got.br_startoff + got.br_blockcount;
1380 return 0;
1381 }
1382
1383 *last_block = 0;
1384 return 0;
1385 }
1386
1387 int
1388 xfs_bmap_last_extent(
1389 struct xfs_trans *tp,
1390 struct xfs_inode *ip,
1391 int whichfork,
1392 struct xfs_bmbt_irec *rec,
1393 int *is_empty)
1394 {
1395 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
1396 int error;
1397 int nextents;
1398
1399 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1400 error = xfs_iread_extents(tp, ip, whichfork);
1401 if (error)
1402 return error;
1403 }
1404
1405 nextents = xfs_iext_count(ifp);
1406 if (nextents == 0) {
1407 *is_empty = 1;
1408 return 0;
1409 }
1410
1411 xfs_iext_get_extent(ifp, nextents - 1, rec);
1412 *is_empty = 0;
1413 return 0;
1414 }
1415
1416 /*
1417 * Check the last inode extent to determine whether this allocation will result
1418 * in blocks being allocated at the end of the file. When we allocate new data
1419 * blocks at the end of the file which do not start at the previous data block,
1420 * we will try to align the new blocks at stripe unit boundaries.
1421 *
1422 * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be
1423 * at, or past the EOF.
1424 */
1425 STATIC int
1426 xfs_bmap_isaeof(
1427 struct xfs_bmalloca *bma,
1428 int whichfork)
1429 {
1430 struct xfs_bmbt_irec rec;
1431 int is_empty;
1432 int error;
1433
1434 bma->aeof = false;
1435 error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
1436 &is_empty);
1437 if (error)
1438 return error;
1439
1440 if (is_empty) {
1441 bma->aeof = true;
1442 return 0;
1443 }
1444
1445 /*
1446 * Check if we are allocation or past the last extent, or at least into
1447 * the last delayed allocated extent.
1448 */
1449 bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount ||
1450 (bma->offset >= rec.br_startoff &&
1451 isnullstartblock(rec.br_startblock));
1452 return 0;
1453 }
1454
1455 /*
1456 * Returns the file-relative block number of the first block past eof in
1457 * the file. This is not based on i_size, it is based on the extent records.
1458 * Returns 0 for local files, as they do not have extent records.
1459 */
1460 int
1461 xfs_bmap_last_offset(
1462 struct xfs_inode *ip,
1463 xfs_fileoff_t *last_block,
1464 int whichfork)
1465 {
1466 struct xfs_bmbt_irec rec;
1467 int is_empty;
1468 int error;
1469
1470 *last_block = 0;
1471
1472 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL)
1473 return 0;
1474
1475 if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
1476 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
1477 return -EIO;
1478
1479 error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
1480 if (error || is_empty)
1481 return error;
1482
1483 *last_block = rec.br_startoff + rec.br_blockcount;
1484 return 0;
1485 }
1486
1487 /*
1488 * Returns whether the selected fork of the inode has exactly one
1489 * block or not. For the data fork we check this matches di_size,
1490 * implying the file's range is 0..bsize-1.
1491 */
1492 int /* 1=>1 block, 0=>otherwise */
1493 xfs_bmap_one_block(
1494 xfs_inode_t *ip, /* incore inode */
1495 int whichfork) /* data or attr fork */
1496 {
1497 xfs_ifork_t *ifp; /* inode fork pointer */
1498 int rval; /* return value */
1499 xfs_bmbt_irec_t s; /* internal version of extent */
1500
1501 #ifndef DEBUG
1502 if (whichfork == XFS_DATA_FORK)
1503 return XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize;
1504 #endif /* !DEBUG */
1505 if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1)
1506 return 0;
1507 if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
1508 return 0;
1509 ifp = XFS_IFORK_PTR(ip, whichfork);
1510 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
1511 xfs_iext_get_extent(ifp, 0, &s);
1512 rval = s.br_startoff == 0 && s.br_blockcount == 1;
1513 if (rval && whichfork == XFS_DATA_FORK)
1514 ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize);
1515 return rval;
1516 }
1517
1518 /*
1519 * Extent tree manipulation functions used during allocation.
1520 */
1521
1522 /*
1523 * Convert a delayed allocation to a real allocation.
1524 */
1525 STATIC int /* error */
1526 xfs_bmap_add_extent_delay_real(
1527 struct xfs_bmalloca *bma,
1528 int whichfork)
1529 {
1530 struct xfs_bmbt_irec *new = &bma->got;
1531 int error; /* error return value */
1532 int i; /* temp state */
1533 xfs_ifork_t *ifp; /* inode fork pointer */
1534 xfs_fileoff_t new_endoff; /* end offset of new entry */
1535 xfs_bmbt_irec_t r[3]; /* neighbor extent entries */
1536 /* left is 0, right is 1, prev is 2 */
1537 int rval=0; /* return value (logging flags) */
1538 int state = xfs_bmap_fork_to_state(whichfork);
1539 xfs_filblks_t da_new; /* new count del alloc blocks used */
1540 xfs_filblks_t da_old; /* old count del alloc blocks used */
1541 xfs_filblks_t temp=0; /* value for da_new calculations */
1542 int tmp_rval; /* partial logging flags */
1543 struct xfs_mount *mp;
1544 xfs_extnum_t *nextents;
1545 struct xfs_bmbt_irec old;
1546
1547 mp = bma->ip->i_mount;
1548 ifp = XFS_IFORK_PTR(bma->ip, whichfork);
1549 ASSERT(whichfork != XFS_ATTR_FORK);
1550 nextents = (whichfork == XFS_COW_FORK ? &bma->ip->i_cnextents :
1551 &bma->ip->i_d.di_nextents);
1552
1553 ASSERT(bma->idx >= 0);
1554 ASSERT(bma->idx <= xfs_iext_count(ifp));
1555 ASSERT(!isnullstartblock(new->br_startblock));
1556 ASSERT(!bma->cur ||
1557 (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
1558
1559 XFS_STATS_INC(mp, xs_add_exlist);
1560
1561 #define LEFT r[0]
1562 #define RIGHT r[1]
1563 #define PREV r[2]
1564
1565 /*
1566 * Set up a bunch of variables to make the tests simpler.
1567 */
1568 xfs_iext_get_extent(ifp, bma->idx, &PREV);
1569 new_endoff = new->br_startoff + new->br_blockcount;
1570 ASSERT(isnullstartblock(PREV.br_startblock));
1571 ASSERT(PREV.br_startoff <= new->br_startoff);
1572 ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
1573
1574 da_old = startblockval(PREV.br_startblock);
1575 da_new = 0;
1576
1577 /*
1578 * Set flags determining what part of the previous delayed allocation
1579 * extent is being replaced by a real allocation.
1580 */
1581 if (PREV.br_startoff == new->br_startoff)
1582 state |= BMAP_LEFT_FILLING;
1583 if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
1584 state |= BMAP_RIGHT_FILLING;
1585
1586 /*
1587 * Check and set flags if this segment has a left neighbor.
1588 * Don't set contiguous if the combined extent would be too large.
1589 */
1590 if (bma->idx > 0) {
1591 state |= BMAP_LEFT_VALID;
1592 xfs_iext_get_extent(ifp, bma->idx - 1, &LEFT);
1593
1594 if (isnullstartblock(LEFT.br_startblock))
1595 state |= BMAP_LEFT_DELAY;
1596 }
1597
1598 if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
1599 LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
1600 LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
1601 LEFT.br_state == new->br_state &&
1602 LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
1603 state |= BMAP_LEFT_CONTIG;
1604
1605 /*
1606 * Check and set flags if this segment has a right neighbor.
1607 * Don't set contiguous if the combined extent would be too large.
1608 * Also check for all-three-contiguous being too large.
1609 */
1610 if (bma->idx < xfs_iext_count(ifp) - 1) {
1611 state |= BMAP_RIGHT_VALID;
1612 xfs_iext_get_extent(ifp, bma->idx + 1, &RIGHT);
1613
1614 if (isnullstartblock(RIGHT.br_startblock))
1615 state |= BMAP_RIGHT_DELAY;
1616 }
1617
1618 if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
1619 new_endoff == RIGHT.br_startoff &&
1620 new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
1621 new->br_state == RIGHT.br_state &&
1622 new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
1623 ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1624 BMAP_RIGHT_FILLING)) !=
1625 (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1626 BMAP_RIGHT_FILLING) ||
1627 LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
1628 <= MAXEXTLEN))
1629 state |= BMAP_RIGHT_CONTIG;
1630
1631 error = 0;
1632 /*
1633 * Switch out based on the FILLING and CONTIG state bits.
1634 */
1635 switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1636 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
1637 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1638 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1639 /*
1640 * Filling in all of a previously delayed allocation extent.
1641 * The left and right neighbors are both contiguous with new.
1642 */
1643 bma->idx--;
1644 LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
1645 xfs_iext_update_extent(bma->ip, state, bma->idx, &LEFT);
1646
1647 xfs_iext_remove(bma->ip, bma->idx + 1, 2, state);
1648 (*nextents)--;
1649 if (bma->cur == NULL)
1650 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1651 else {
1652 rval = XFS_ILOG_CORE;
1653 error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
1654 if (error)
1655 goto done;
1656 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1657 error = xfs_btree_delete(bma->cur, &i);
1658 if (error)
1659 goto done;
1660 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1661 error = xfs_btree_decrement(bma->cur, 0, &i);
1662 if (error)
1663 goto done;
1664 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1665 error = xfs_bmbt_update(bma->cur, &LEFT);
1666 if (error)
1667 goto done;
1668 }
1669 break;
1670
1671 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1672 /*
1673 * Filling in all of a previously delayed allocation extent.
1674 * The left neighbor is contiguous, the right is not.
1675 */
1676 bma->idx--;
1677
1678 old = LEFT;
1679 LEFT.br_blockcount += PREV.br_blockcount;
1680 xfs_iext_update_extent(bma->ip, state, bma->idx, &LEFT);
1681
1682 xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
1683 if (bma->cur == NULL)
1684 rval = XFS_ILOG_DEXT;
1685 else {
1686 rval = 0;
1687 error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1688 if (error)
1689 goto done;
1690 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1691 error = xfs_bmbt_update(bma->cur, &LEFT);
1692 if (error)
1693 goto done;
1694 }
1695 break;
1696
1697 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1698 /*
1699 * Filling in all of a previously delayed allocation extent.
1700 * The right neighbor is contiguous, the left is not.
1701 */
1702 PREV.br_startblock = new->br_startblock;
1703 PREV.br_blockcount += RIGHT.br_blockcount;
1704 xfs_iext_update_extent(bma->ip, state, bma->idx, &PREV);
1705
1706 xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
1707 if (bma->cur == NULL)
1708 rval = XFS_ILOG_DEXT;
1709 else {
1710 rval = 0;
1711 error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
1712 if (error)
1713 goto done;
1714 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1715 error = xfs_bmbt_update(bma->cur, &PREV);
1716 if (error)
1717 goto done;
1718 }
1719 break;
1720
1721 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
1722 /*
1723 * Filling in all of a previously delayed allocation extent.
1724 * Neither the left nor right neighbors are contiguous with
1725 * the new one.
1726 */
1727 PREV.br_startblock = new->br_startblock;
1728 PREV.br_state = new->br_state;
1729 xfs_iext_update_extent(bma->ip, state, bma->idx, &PREV);
1730
1731 (*nextents)++;
1732 if (bma->cur == NULL)
1733 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1734 else {
1735 rval = XFS_ILOG_CORE;
1736 error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1737 if (error)
1738 goto done;
1739 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
1740 error = xfs_btree_insert(bma->cur, &i);
1741 if (error)
1742 goto done;
1743 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1744 }
1745 break;
1746
1747 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
1748 /*
1749 * Filling in the first part of a previous delayed allocation.
1750 * The left neighbor is contiguous.
1751 */
1752 old = LEFT;
1753 temp = PREV.br_blockcount - new->br_blockcount;
1754 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1755 startblockval(PREV.br_startblock));
1756
1757 LEFT.br_blockcount += new->br_blockcount;
1758 xfs_iext_update_extent(bma->ip, state, bma->idx - 1, &LEFT);
1759
1760 PREV.br_blockcount = temp = PREV.br_blockcount - new->br_blockcount;
1761 PREV.br_startoff += new->br_blockcount;
1762 PREV.br_startblock = nullstartblock(da_new);
1763 xfs_iext_update_extent(bma->ip, state, bma->idx, &PREV);
1764
1765 if (bma->cur == NULL)
1766 rval = XFS_ILOG_DEXT;
1767 else {
1768 rval = 0;
1769 error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1770 if (error)
1771 goto done;
1772 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1773 error = xfs_bmbt_update(bma->cur, &LEFT);
1774 if (error)
1775 goto done;
1776 }
1777
1778 bma->idx--;
1779 break;
1780
1781 case BMAP_LEFT_FILLING:
1782 /*
1783 * Filling in the first part of a previous delayed allocation.
1784 * The left neighbor is not contiguous.
1785 */
1786 xfs_iext_insert(bma->ip, bma->idx, 1, new, state);
1787 (*nextents)++;
1788 if (bma->cur == NULL)
1789 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1790 else {
1791 rval = XFS_ILOG_CORE;
1792 error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1793 if (error)
1794 goto done;
1795 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
1796 error = xfs_btree_insert(bma->cur, &i);
1797 if (error)
1798 goto done;
1799 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1800 }
1801
1802 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1803 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1804 bma->firstblock, bma->dfops,
1805 &bma->cur, 1, &tmp_rval, whichfork);
1806 rval |= tmp_rval;
1807 if (error)
1808 goto done;
1809 }
1810
1811 temp = PREV.br_blockcount - new->br_blockcount;
1812 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1813 startblockval(PREV.br_startblock) -
1814 (bma->cur ? bma->cur->bc_private.b.allocated : 0));
1815
1816 PREV.br_startoff = new_endoff;
1817 PREV.br_blockcount = temp;
1818 PREV.br_startblock = nullstartblock(da_new);
1819 xfs_iext_update_extent(bma->ip, state, bma->idx + 1, &PREV);
1820 break;
1821
1822 case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1823 /*
1824 * Filling in the last part of a previous delayed allocation.
1825 * The right neighbor is contiguous with the new allocation.
1826 */
1827 old = RIGHT;
1828 RIGHT.br_startoff = new->br_startoff;
1829 RIGHT.br_startblock = new->br_startblock;
1830 RIGHT.br_blockcount += new->br_blockcount;
1831 xfs_iext_update_extent(bma->ip, state, bma->idx + 1, &RIGHT);
1832
1833 if (bma->cur == NULL)
1834 rval = XFS_ILOG_DEXT;
1835 else {
1836 rval = 0;
1837 error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
1838 if (error)
1839 goto done;
1840 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1841 error = xfs_bmbt_update(bma->cur, &RIGHT);
1842 if (error)
1843 goto done;
1844 }
1845
1846 temp = PREV.br_blockcount - new->br_blockcount;
1847 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1848 startblockval(PREV.br_startblock));
1849
1850 PREV.br_blockcount = temp;
1851 PREV.br_startblock = nullstartblock(da_new);
1852 xfs_iext_update_extent(bma->ip, state, bma->idx, &PREV);
1853
1854 bma->idx++;
1855 break;
1856
1857 case BMAP_RIGHT_FILLING:
1858 /*
1859 * Filling in the last part of a previous delayed allocation.
1860 * The right neighbor is not contiguous.
1861 */
1862 xfs_iext_insert(bma->ip, bma->idx + 1, 1, new, state);
1863 (*nextents)++;
1864 if (bma->cur == NULL)
1865 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1866 else {
1867 rval = XFS_ILOG_CORE;
1868 error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1869 if (error)
1870 goto done;
1871 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
1872 error = xfs_btree_insert(bma->cur, &i);
1873 if (error)
1874 goto done;
1875 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1876 }
1877
1878 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1879 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1880 bma->firstblock, bma->dfops, &bma->cur, 1,
1881 &tmp_rval, whichfork);
1882 rval |= tmp_rval;
1883 if (error)
1884 goto done;
1885 }
1886
1887 temp = PREV.br_blockcount - new->br_blockcount;
1888 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1889 startblockval(PREV.br_startblock) -
1890 (bma->cur ? bma->cur->bc_private.b.allocated : 0));
1891
1892 PREV.br_startblock = nullstartblock(da_new);
1893 PREV.br_blockcount = temp;
1894 xfs_iext_update_extent(bma->ip, state, bma->idx, &PREV);
1895
1896 bma->idx++;
1897 break;
1898
1899 case 0:
1900 /*
1901 * Filling in the middle part of a previous delayed allocation.
1902 * Contiguity is impossible here.
1903 * This case is avoided almost all the time.
1904 *
1905 * We start with a delayed allocation:
1906 *
1907 * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+
1908 * PREV @ idx
1909 *
1910 * and we are allocating:
1911 * +rrrrrrrrrrrrrrrrr+
1912 * new
1913 *
1914 * and we set it up for insertion as:
1915 * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+
1916 * new
1917 * PREV @ idx LEFT RIGHT
1918 * inserted at idx + 1
1919 */
1920 old = PREV;
1921
1922 /* LEFT is the new middle */
1923 LEFT = *new;
1924
1925 /* RIGHT is the new right */
1926 RIGHT.br_state = PREV.br_state;
1927 RIGHT.br_startoff = new_endoff;
1928 RIGHT.br_blockcount =
1929 PREV.br_startoff + PREV.br_blockcount - new_endoff;
1930 RIGHT.br_startblock =
1931 nullstartblock(xfs_bmap_worst_indlen(bma->ip,
1932 RIGHT.br_blockcount));
1933
1934 /* truncate PREV */
1935 PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
1936 PREV.br_startblock =
1937 nullstartblock(xfs_bmap_worst_indlen(bma->ip,
1938 PREV.br_blockcount));
1939 xfs_iext_update_extent(bma->ip, state, bma->idx, &PREV);
1940
1941 /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */
1942 xfs_iext_insert(bma->ip, bma->idx + 1, 2, &LEFT, state);
1943 (*nextents)++;
1944
1945 if (bma->cur == NULL)
1946 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1947 else {
1948 rval = XFS_ILOG_CORE;
1949 error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
1950 if (error)
1951 goto done;
1952 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
1953 error = xfs_btree_insert(bma->cur, &i);
1954 if (error)
1955 goto done;
1956 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1957 }
1958
1959 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1960 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1961 bma->firstblock, bma->dfops, &bma->cur,
1962 1, &tmp_rval, whichfork);
1963 rval |= tmp_rval;
1964 if (error)
1965 goto done;
1966 }
1967
1968 da_new = startblockval(PREV.br_startblock) +
1969 startblockval(RIGHT.br_startblock);
1970 bma->idx++;
1971 break;
1972
1973 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1974 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1975 case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
1976 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1977 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
1978 case BMAP_LEFT_CONTIG:
1979 case BMAP_RIGHT_CONTIG:
1980 /*
1981 * These cases are all impossible.
1982 */
1983 ASSERT(0);
1984 }
1985
1986 /* add reverse mapping */
1987 error = xfs_rmap_map_extent(mp, bma->dfops, bma->ip, whichfork, new);
1988 if (error)
1989 goto done;
1990
1991 /* convert to a btree if necessary */
1992 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1993 int tmp_logflags; /* partial log flag return val */
1994
1995 ASSERT(bma->cur == NULL);
1996 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1997 bma->firstblock, bma->dfops, &bma->cur,
1998 da_old > 0, &tmp_logflags, whichfork);
1999 bma->logflags |= tmp_logflags;
2000 if (error)
2001 goto done;
2002 }
2003
2004 if (bma->cur) {
2005 da_new += bma->cur->bc_private.b.allocated;
2006 bma->cur->bc_private.b.allocated = 0;
2007 }
2008
2009 /* adjust for changes in reserved delayed indirect blocks */
2010 if (da_new != da_old) {
2011 ASSERT(state == 0 || da_new < da_old);
2012 error = xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new),
2013 false);
2014 }
2015
2016 xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
2017 done:
2018 if (whichfork != XFS_COW_FORK)
2019 bma->logflags |= rval;
2020 return error;
2021 #undef LEFT
2022 #undef RIGHT
2023 #undef PREV
2024 }
2025
2026 /*
2027 * Convert an unwritten allocation to a real allocation or vice versa.
2028 */
2029 STATIC int /* error */
2030 xfs_bmap_add_extent_unwritten_real(
2031 struct xfs_trans *tp,
2032 xfs_inode_t *ip, /* incore inode pointer */
2033 int whichfork,
2034 xfs_extnum_t *idx, /* extent number to update/insert */
2035 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
2036 xfs_bmbt_irec_t *new, /* new data to add to file extents */
2037 xfs_fsblock_t *first, /* pointer to firstblock variable */
2038 struct xfs_defer_ops *dfops, /* list of extents to be freed */
2039 int *logflagsp) /* inode logging flags */
2040 {
2041 xfs_btree_cur_t *cur; /* btree cursor */
2042 int error; /* error return value */
2043 int i; /* temp state */
2044 xfs_ifork_t *ifp; /* inode fork pointer */
2045 xfs_fileoff_t new_endoff; /* end offset of new entry */
2046 xfs_bmbt_irec_t r[3]; /* neighbor extent entries */
2047 /* left is 0, right is 1, prev is 2 */
2048 int rval=0; /* return value (logging flags) */
2049 int state = xfs_bmap_fork_to_state(whichfork);
2050 struct xfs_mount *mp = ip->i_mount;
2051 struct xfs_bmbt_irec old;
2052
2053 *logflagsp = 0;
2054
2055 cur = *curp;
2056 ifp = XFS_IFORK_PTR(ip, whichfork);
2057
2058 ASSERT(*idx >= 0);
2059 ASSERT(*idx <= xfs_iext_count(ifp));
2060 ASSERT(!isnullstartblock(new->br_startblock));
2061
2062 XFS_STATS_INC(mp, xs_add_exlist);
2063
2064 #define LEFT r[0]
2065 #define RIGHT r[1]
2066 #define PREV r[2]
2067
2068 /*
2069 * Set up a bunch of variables to make the tests simpler.
2070 */
2071 error = 0;
2072 xfs_iext_get_extent(ifp, *idx, &PREV);
2073 ASSERT(new->br_state != PREV.br_state);
2074 new_endoff = new->br_startoff + new->br_blockcount;
2075 ASSERT(PREV.br_startoff <= new->br_startoff);
2076 ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
2077
2078 /*
2079 * Set flags determining what part of the previous oldext allocation
2080 * extent is being replaced by a newext allocation.
2081 */
2082 if (PREV.br_startoff == new->br_startoff)
2083 state |= BMAP_LEFT_FILLING;
2084 if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
2085 state |= BMAP_RIGHT_FILLING;
2086
2087 /*
2088 * Check and set flags if this segment has a left neighbor.
2089 * Don't set contiguous if the combined extent would be too large.
2090 */
2091 if (*idx > 0) {
2092 state |= BMAP_LEFT_VALID;
2093 xfs_iext_get_extent(ifp, *idx - 1, &LEFT);
2094
2095 if (isnullstartblock(LEFT.br_startblock))
2096 state |= BMAP_LEFT_DELAY;
2097 }
2098
2099 if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2100 LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
2101 LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
2102 LEFT.br_state == new->br_state &&
2103 LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2104 state |= BMAP_LEFT_CONTIG;
2105
2106 /*
2107 * Check and set flags if this segment has a right neighbor.
2108 * Don't set contiguous if the combined extent would be too large.
2109 * Also check for all-three-contiguous being too large.
2110 */
2111 if (*idx < xfs_iext_count(ifp) - 1) {
2112 state |= BMAP_RIGHT_VALID;
2113 xfs_iext_get_extent(ifp, *idx + 1, &RIGHT);
2114 if (isnullstartblock(RIGHT.br_startblock))
2115 state |= BMAP_RIGHT_DELAY;
2116 }
2117
2118 if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2119 new_endoff == RIGHT.br_startoff &&
2120 new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
2121 new->br_state == RIGHT.br_state &&
2122 new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
2123 ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2124 BMAP_RIGHT_FILLING)) !=
2125 (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2126 BMAP_RIGHT_FILLING) ||
2127 LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
2128 <= MAXEXTLEN))
2129 state |= BMAP_RIGHT_CONTIG;
2130
2131 /*
2132 * Switch out based on the FILLING and CONTIG state bits.
2133 */
2134 switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2135 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
2136 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2137 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2138 /*
2139 * Setting all of a previous oldext extent to newext.
2140 * The left and right neighbors are both contiguous with new.
2141 */
2142 --*idx;
2143
2144 LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount;
2145 xfs_iext_update_extent(ip, state, *idx, &LEFT);
2146
2147 xfs_iext_remove(ip, *idx + 1, 2, state);
2148 XFS_IFORK_NEXT_SET(ip, whichfork,
2149 XFS_IFORK_NEXTENTS(ip, whichfork) - 2);
2150 if (cur == NULL)
2151 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2152 else {
2153 rval = XFS_ILOG_CORE;
2154 error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
2155 if (error)
2156 goto done;
2157 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2158 if ((error = xfs_btree_delete(cur, &i)))
2159 goto done;
2160 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2161 if ((error = xfs_btree_decrement(cur, 0, &i)))
2162 goto done;
2163 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2164 if ((error = xfs_btree_delete(cur, &i)))
2165 goto done;
2166 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2167 if ((error = xfs_btree_decrement(cur, 0, &i)))
2168 goto done;
2169 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2170 error = xfs_bmbt_update(cur, &LEFT);
2171 if (error)
2172 goto done;
2173 }
2174 break;
2175
2176 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2177 /*
2178 * Setting all of a previous oldext extent to newext.
2179 * The left neighbor is contiguous, the right is not.
2180 */
2181 --*idx;
2182
2183 LEFT.br_blockcount += PREV.br_blockcount;
2184 xfs_iext_update_extent(ip, state, *idx, &LEFT);
2185
2186 xfs_iext_remove(ip, *idx + 1, 1, state);
2187 XFS_IFORK_NEXT_SET(ip, whichfork,
2188 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
2189 if (cur == NULL)
2190 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2191 else {
2192 rval = XFS_ILOG_CORE;
2193 error = xfs_bmbt_lookup_eq(cur, &PREV, &i);
2194 if (error)
2195 goto done;
2196 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2197 if ((error = xfs_btree_delete(cur, &i)))
2198 goto done;
2199 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2200 if ((error = xfs_btree_decrement(cur, 0, &i)))
2201 goto done;
2202 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2203 error = xfs_bmbt_update(cur, &LEFT);
2204 if (error)
2205 goto done;
2206 }
2207 break;
2208
2209 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2210 /*
2211 * Setting all of a previous oldext extent to newext.
2212 * The right neighbor is contiguous, the left is not.
2213 */
2214 PREV.br_blockcount += RIGHT.br_blockcount;
2215 PREV.br_state = new->br_state;
2216 xfs_iext_update_extent(ip, state, *idx, &PREV);
2217
2218 xfs_iext_remove(ip, *idx + 1, 1, state);
2219 XFS_IFORK_NEXT_SET(ip, whichfork,
2220 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
2221 if (cur == NULL)
2222 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2223 else {
2224 rval = XFS_ILOG_CORE;
2225 error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
2226 if (error)
2227 goto done;
2228 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2229 if ((error = xfs_btree_delete(cur, &i)))
2230 goto done;
2231 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2232 if ((error = xfs_btree_decrement(cur, 0, &i)))
2233 goto done;
2234 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2235 error = xfs_bmbt_update(cur, &PREV);
2236 if (error)
2237 goto done;
2238 }
2239 break;
2240
2241 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
2242 /*
2243 * Setting all of a previous oldext extent to newext.
2244 * Neither the left nor right neighbors are contiguous with
2245 * the new one.
2246 */
2247 PREV.br_state = new->br_state;
2248 xfs_iext_update_extent(ip, state, *idx, &PREV);
2249
2250 if (cur == NULL)
2251 rval = XFS_ILOG_DEXT;
2252 else {
2253 rval = 0;
2254 error = xfs_bmbt_lookup_eq(cur, new, &i);
2255 if (error)
2256 goto done;
2257 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2258 error = xfs_bmbt_update(cur, &PREV);
2259 if (error)
2260 goto done;
2261 }
2262 break;
2263
2264 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
2265 /*
2266 * Setting the first part of a previous oldext extent to newext.
2267 * The left neighbor is contiguous.
2268 */
2269 LEFT.br_blockcount += new->br_blockcount;
2270 xfs_iext_update_extent(ip, state, *idx - 1, &LEFT);
2271
2272 old = PREV;
2273 PREV.br_startoff += new->br_blockcount;
2274 PREV.br_startblock += new->br_blockcount;
2275 PREV.br_blockcount -= new->br_blockcount;
2276 xfs_iext_update_extent(ip, state, *idx, &PREV);
2277
2278 --*idx;
2279
2280 if (cur == NULL)
2281 rval = XFS_ILOG_DEXT;
2282 else {
2283 rval = 0;
2284 error = xfs_bmbt_lookup_eq(cur, &old, &i);
2285 if (error)
2286 goto done;
2287 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2288 error = xfs_bmbt_update(cur, &PREV);
2289 if (error)
2290 goto done;
2291 error = xfs_btree_decrement(cur, 0, &i);
2292 if (error)
2293 goto done;
2294 error = xfs_bmbt_update(cur, &LEFT);
2295 if (error)
2296 goto done;
2297 }
2298 break;
2299
2300 case BMAP_LEFT_FILLING:
2301 /*
2302 * Setting the first part of a previous oldext extent to newext.
2303 * The left neighbor is not contiguous.
2304 */
2305 old = PREV;
2306 PREV.br_startoff += new->br_blockcount;
2307 PREV.br_startblock += new->br_blockcount;
2308 PREV.br_blockcount -= new->br_blockcount;
2309 xfs_iext_update_extent(ip, state, *idx, &PREV);
2310
2311 xfs_iext_insert(ip, *idx, 1, new, state);
2312 XFS_IFORK_NEXT_SET(ip, whichfork,
2313 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
2314 if (cur == NULL)
2315 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2316 else {
2317 rval = XFS_ILOG_CORE;
2318 error = xfs_bmbt_lookup_eq(cur, &old, &i);
2319 if (error)
2320 goto done;
2321 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2322 error = xfs_bmbt_update(cur, &PREV);
2323 if (error)
2324 goto done;
2325 cur->bc_rec.b = *new;
2326 if ((error = xfs_btree_insert(cur, &i)))
2327 goto done;
2328 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2329 }
2330 break;
2331
2332 case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2333 /*
2334 * Setting the last part of a previous oldext extent to newext.
2335 * The right neighbor is contiguous with the new allocation.
2336 */
2337 old = PREV;
2338 PREV.br_blockcount -= new->br_blockcount;
2339 xfs_iext_update_extent(ip, state, *idx, &PREV);
2340
2341 ++*idx;
2342
2343 RIGHT.br_startoff = new->br_startoff;
2344 RIGHT.br_startblock = new->br_startblock;
2345 RIGHT.br_blockcount += new->br_blockcount;
2346 xfs_iext_update_extent(ip, state, *idx, &RIGHT);
2347
2348 if (cur == NULL)
2349 rval = XFS_ILOG_DEXT;
2350 else {
2351 rval = 0;
2352 error = xfs_bmbt_lookup_eq(cur, &old, &i);
2353 if (error)
2354 goto done;
2355 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2356 error = xfs_bmbt_update(cur, &PREV);
2357 if (error)
2358 goto done;
2359 error = xfs_btree_increment(cur, 0, &i);
2360 if (error)
2361 goto done;
2362 error = xfs_bmbt_update(cur, &RIGHT);
2363 if (error)
2364 goto done;
2365 }
2366 break;
2367
2368 case BMAP_RIGHT_FILLING:
2369 /*
2370 * Setting the last part of a previous oldext extent to newext.
2371 * The right neighbor is not contiguous.
2372 */
2373 old = PREV;
2374 PREV.br_blockcount -= new->br_blockcount;
2375 xfs_iext_update_extent(ip, state, *idx, &PREV);
2376
2377 ++*idx;
2378 xfs_iext_insert(ip, *idx, 1, new, state);
2379
2380 XFS_IFORK_NEXT_SET(ip, whichfork,
2381 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
2382 if (cur == NULL)
2383 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2384 else {
2385 rval = XFS_ILOG_CORE;
2386 error = xfs_bmbt_lookup_eq(cur, &old, &i);
2387 if (error)
2388 goto done;
2389 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2390 error = xfs_bmbt_update(cur, &PREV);
2391 if (error)
2392 goto done;
2393 error = xfs_bmbt_lookup_eq(cur, new, &i);
2394 if (error)
2395 goto done;
2396 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2397 if ((error = xfs_btree_insert(cur, &i)))
2398 goto done;
2399 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2400 }
2401 break;
2402
2403 case 0:
2404 /*
2405 * Setting the middle part of a previous oldext extent to
2406 * newext. Contiguity is impossible here.
2407 * One extent becomes three extents.
2408 */
2409 old = PREV;
2410 PREV.br_blockcount = new->br_startoff - PREV.br_startoff;
2411 xfs_iext_update_extent(ip, state, *idx, &PREV);
2412
2413 r[0] = *new;
2414 r[1].br_startoff = new_endoff;
2415 r[1].br_blockcount =
2416 old.br_startoff + old.br_blockcount - new_endoff;
2417 r[1].br_startblock = new->br_startblock + new->br_blockcount;
2418 r[1].br_state = PREV.br_state;
2419
2420 ++*idx;
2421 xfs_iext_insert(ip, *idx, 2, &r[0], state);
2422
2423 XFS_IFORK_NEXT_SET(ip, whichfork,
2424 XFS_IFORK_NEXTENTS(ip, whichfork) + 2);
2425 if (cur == NULL)
2426 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2427 else {
2428 rval = XFS_ILOG_CORE;
2429 error = xfs_bmbt_lookup_eq(cur, &old, &i);
2430 if (error)
2431 goto done;
2432 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2433 /* new right extent - oldext */
2434 error = xfs_bmbt_update(cur, &r[1]);
2435 if (error)
2436 goto done;
2437 /* new left extent - oldext */
2438 cur->bc_rec.b = PREV;
2439 if ((error = xfs_btree_insert(cur, &i)))
2440 goto done;
2441 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2442 /*
2443 * Reset the cursor to the position of the new extent
2444 * we are about to insert as we can't trust it after
2445 * the previous insert.
2446 */
2447 error = xfs_bmbt_lookup_eq(cur, new, &i);
2448 if (error)
2449 goto done;
2450 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2451 /* new middle extent - newext */
2452 if ((error = xfs_btree_insert(cur, &i)))
2453 goto done;
2454 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2455 }
2456 break;
2457
2458 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2459 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2460 case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
2461 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2462 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2463 case BMAP_LEFT_CONTIG:
2464 case BMAP_RIGHT_CONTIG:
2465 /*
2466 * These cases are all impossible.
2467 */
2468 ASSERT(0);
2469 }
2470
2471 /* update reverse mappings */
2472 error = xfs_rmap_convert_extent(mp, dfops, ip, whichfork, new);
2473 if (error)
2474 goto done;
2475
2476 /* convert to a btree if necessary */
2477 if (xfs_bmap_needs_btree(ip, whichfork)) {
2478 int tmp_logflags; /* partial log flag return val */
2479
2480 ASSERT(cur == NULL);
2481 error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, &cur,
2482 0, &tmp_logflags, whichfork);
2483 *logflagsp |= tmp_logflags;
2484 if (error)
2485 goto done;
2486 }
2487
2488 /* clear out the allocated field, done with it now in any case. */
2489 if (cur) {
2490 cur->bc_private.b.allocated = 0;
2491 *curp = cur;
2492 }
2493
2494 xfs_bmap_check_leaf_extents(*curp, ip, whichfork);
2495 done:
2496 *logflagsp |= rval;
2497 return error;
2498 #undef LEFT
2499 #undef RIGHT
2500 #undef PREV
2501 }
2502
2503 /*
2504 * Convert a hole to a delayed allocation.
2505 */
2506 STATIC void
2507 xfs_bmap_add_extent_hole_delay(
2508 xfs_inode_t *ip, /* incore inode pointer */
2509 int whichfork,
2510 xfs_extnum_t *idx, /* extent number to update/insert */
2511 xfs_bmbt_irec_t *new) /* new data to add to file extents */
2512 {
2513 xfs_ifork_t *ifp; /* inode fork pointer */
2514 xfs_bmbt_irec_t left; /* left neighbor extent entry */
2515 xfs_filblks_t newlen=0; /* new indirect size */
2516 xfs_filblks_t oldlen=0; /* old indirect size */
2517 xfs_bmbt_irec_t right; /* right neighbor extent entry */
2518 int state = xfs_bmap_fork_to_state(whichfork);
2519 xfs_filblks_t temp; /* temp for indirect calculations */
2520
2521 ifp = XFS_IFORK_PTR(ip, whichfork);
2522 ASSERT(isnullstartblock(new->br_startblock));
2523
2524 /*
2525 * Check and set flags if this segment has a left neighbor
2526 */
2527 if (*idx > 0) {
2528 state |= BMAP_LEFT_VALID;
2529 xfs_iext_get_extent(ifp, *idx - 1, &left);
2530
2531 if (isnullstartblock(left.br_startblock))
2532 state |= BMAP_LEFT_DELAY;
2533 }
2534
2535 /*
2536 * Check and set flags if the current (right) segment exists.
2537 * If it doesn't exist, we're converting the hole at end-of-file.
2538 */
2539 if (*idx < xfs_iext_count(ifp)) {
2540 state |= BMAP_RIGHT_VALID;
2541 xfs_iext_get_extent(ifp, *idx, &right);
2542
2543 if (isnullstartblock(right.br_startblock))
2544 state |= BMAP_RIGHT_DELAY;
2545 }
2546
2547 /*
2548 * Set contiguity flags on the left and right neighbors.
2549 * Don't let extents get too large, even if the pieces are contiguous.
2550 */
2551 if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) &&
2552 left.br_startoff + left.br_blockcount == new->br_startoff &&
2553 left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2554 state |= BMAP_LEFT_CONTIG;
2555
2556 if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) &&
2557 new->br_startoff + new->br_blockcount == right.br_startoff &&
2558 new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
2559 (!(state & BMAP_LEFT_CONTIG) ||
2560 (left.br_blockcount + new->br_blockcount +
2561 right.br_blockcount <= MAXEXTLEN)))
2562 state |= BMAP_RIGHT_CONTIG;
2563
2564 /*
2565 * Switch out based on the contiguity flags.
2566 */
2567 switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2568 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2569 /*
2570 * New allocation is contiguous with delayed allocations
2571 * on the left and on the right.
2572 * Merge all three into a single extent record.
2573 */
2574 --*idx;
2575 temp = left.br_blockcount + new->br_blockcount +
2576 right.br_blockcount;
2577
2578 oldlen = startblockval(left.br_startblock) +
2579 startblockval(new->br_startblock) +
2580 startblockval(right.br_startblock);
2581 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2582 oldlen);
2583 left.br_startblock = nullstartblock(newlen);
2584 left.br_blockcount = temp;
2585 xfs_iext_update_extent(ip, state, *idx, &left);
2586
2587 xfs_iext_remove(ip, *idx + 1, 1, state);
2588 break;
2589
2590 case BMAP_LEFT_CONTIG:
2591 /*
2592 * New allocation is contiguous with a delayed allocation
2593 * on the left.
2594 * Merge the new allocation with the left neighbor.
2595 */
2596 --*idx;
2597 temp = left.br_blockcount + new->br_blockcount;
2598
2599 oldlen = startblockval(left.br_startblock) +
2600 startblockval(new->br_startblock);
2601 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2602 oldlen);
2603 left.br_blockcount = temp;
2604 left.br_startblock = nullstartblock(newlen);
2605 xfs_iext_update_extent(ip, state, *idx, &left);
2606 break;
2607
2608 case BMAP_RIGHT_CONTIG:
2609 /*
2610 * New allocation is contiguous with a delayed allocation
2611 * on the right.
2612 * Merge the new allocation with the right neighbor.
2613 */
2614 temp = new->br_blockcount + right.br_blockcount;
2615 oldlen = startblockval(new->br_startblock) +
2616 startblockval(right.br_startblock);
2617 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2618 oldlen);
2619 right.br_startoff = new->br_startoff;
2620 right.br_startblock = nullstartblock(newlen);
2621 right.br_blockcount = temp;
2622 xfs_iext_update_extent(ip, state, *idx, &right);
2623 break;
2624
2625 case 0:
2626 /*
2627 * New allocation is not contiguous with another
2628 * delayed allocation.
2629 * Insert a new entry.
2630 */
2631 oldlen = newlen = 0;
2632 xfs_iext_insert(ip, *idx, 1, new, state);
2633 break;
2634 }
2635 if (oldlen != newlen) {
2636 ASSERT(oldlen > newlen);
2637 xfs_mod_fdblocks(ip->i_mount, (int64_t)(oldlen - newlen),
2638 false);
2639 /*
2640 * Nothing to do for disk quota accounting here.
2641 */
2642 }
2643 }
2644
2645 /*
2646 * Convert a hole to a real allocation.
2647 */
2648 STATIC int /* error */
2649 xfs_bmap_add_extent_hole_real(
2650 struct xfs_trans *tp,
2651 struct xfs_inode *ip,
2652 int whichfork,
2653 xfs_extnum_t *idx,
2654 struct xfs_btree_cur **curp,
2655 struct xfs_bmbt_irec *new,
2656 xfs_fsblock_t *first,
2657 struct xfs_defer_ops *dfops,
2658 int *logflagsp)
2659 {
2660 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
2661 struct xfs_mount *mp = ip->i_mount;
2662 struct xfs_btree_cur *cur = *curp;
2663 int error; /* error return value */
2664 int i; /* temp state */
2665 xfs_bmbt_irec_t left; /* left neighbor extent entry */
2666 xfs_bmbt_irec_t right; /* right neighbor extent entry */
2667 int rval=0; /* return value (logging flags) */
2668 int state = xfs_bmap_fork_to_state(whichfork);
2669 struct xfs_bmbt_irec old;
2670
2671 ASSERT(*idx >= 0);
2672 ASSERT(*idx <= xfs_iext_count(ifp));
2673 ASSERT(!isnullstartblock(new->br_startblock));
2674 ASSERT(!cur || !(cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
2675
2676 XFS_STATS_INC(mp, xs_add_exlist);
2677
2678 /*
2679 * Check and set flags if this segment has a left neighbor.
2680 */
2681 if (*idx > 0) {
2682 state |= BMAP_LEFT_VALID;
2683 xfs_iext_get_extent(ifp, *idx - 1, &left);
2684 if (isnullstartblock(left.br_startblock))
2685 state |= BMAP_LEFT_DELAY;
2686 }
2687
2688 /*
2689 * Check and set flags if this segment has a current value.
2690 * Not true if we're inserting into the "hole" at eof.
2691 */
2692 if (*idx < xfs_iext_count(ifp)) {
2693 state |= BMAP_RIGHT_VALID;
2694 xfs_iext_get_extent(ifp, *idx, &right);
2695 if (isnullstartblock(right.br_startblock))
2696 state |= BMAP_RIGHT_DELAY;
2697 }
2698
2699 /*
2700 * We're inserting a real allocation between "left" and "right".
2701 * Set the contiguity flags. Don't let extents get too large.
2702 */
2703 if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2704 left.br_startoff + left.br_blockcount == new->br_startoff &&
2705 left.br_startblock + left.br_blockcount == new->br_startblock &&
2706 left.br_state == new->br_state &&
2707 left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2708 state |= BMAP_LEFT_CONTIG;
2709
2710 if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2711 new->br_startoff + new->br_blockcount == right.br_startoff &&
2712 new->br_startblock + new->br_blockcount == right.br_startblock &&
2713 new->br_state == right.br_state &&
2714 new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
2715 (!(state & BMAP_LEFT_CONTIG) ||
2716 left.br_blockcount + new->br_blockcount +
2717 right.br_blockcount <= MAXEXTLEN))
2718 state |= BMAP_RIGHT_CONTIG;
2719
2720 error = 0;
2721 /*
2722 * Select which case we're in here, and implement it.
2723 */
2724 switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2725 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2726 /*
2727 * New allocation is contiguous with real allocations on the
2728 * left and on the right.
2729 * Merge all three into a single extent record.
2730 */
2731 --*idx;
2732 left.br_blockcount += new->br_blockcount + right.br_blockcount;
2733 xfs_iext_update_extent(ip, state, *idx, &left);
2734
2735 xfs_iext_remove(ip, *idx + 1, 1, state);
2736
2737 XFS_IFORK_NEXT_SET(ip, whichfork,
2738 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
2739 if (cur == NULL) {
2740 rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
2741 } else {
2742 rval = XFS_ILOG_CORE;
2743 error = xfs_bmbt_lookup_eq(cur, &right, &i);
2744 if (error)
2745 goto done;
2746 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2747 error = xfs_btree_delete(cur, &i);
2748 if (error)
2749 goto done;
2750 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2751 error = xfs_btree_decrement(cur, 0, &i);
2752 if (error)
2753 goto done;
2754 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2755 error = xfs_bmbt_update(cur, &left);
2756 if (error)
2757 goto done;
2758 }
2759 break;
2760
2761 case BMAP_LEFT_CONTIG:
2762 /*
2763 * New allocation is contiguous with a real allocation
2764 * on the left.
2765 * Merge the new allocation with the left neighbor.
2766 */
2767 --*idx;
2768 old = left;
2769
2770 left.br_blockcount += new->br_blockcount;
2771 xfs_iext_update_extent(ip, state, *idx, &left);
2772
2773 if (cur == NULL) {
2774 rval = xfs_ilog_fext(whichfork);
2775 } else {
2776 rval = 0;
2777 error = xfs_bmbt_lookup_eq(cur, &old, &i);
2778 if (error)
2779 goto done;
2780 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2781 error = xfs_bmbt_update(cur, &left);
2782 if (error)
2783 goto done;
2784 }
2785 break;
2786
2787 case BMAP_RIGHT_CONTIG:
2788 /*
2789 * New allocation is contiguous with a real allocation
2790 * on the right.
2791 * Merge the new allocation with the right neighbor.
2792 */
2793 old = right;
2794
2795 right.br_startoff = new->br_startoff;
2796 right.br_startblock = new->br_startblock;
2797 right.br_blockcount += new->br_blockcount;
2798 xfs_iext_update_extent(ip, state, *idx, &right);
2799
2800 if (cur == NULL) {
2801 rval = xfs_ilog_fext(whichfork);
2802 } else {
2803 rval = 0;
2804 error = xfs_bmbt_lookup_eq(cur, &old, &i);
2805 if (error)
2806 goto done;
2807 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2808 error = xfs_bmbt_update(cur, &right);
2809 if (error)
2810 goto done;
2811 }
2812 break;
2813
2814 case 0:
2815 /*
2816 * New allocation is not contiguous with another
2817 * real allocation.
2818 * Insert a new entry.
2819 */
2820 xfs_iext_insert(ip, *idx, 1, new, state);
2821 XFS_IFORK_NEXT_SET(ip, whichfork,
2822 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
2823 if (cur == NULL) {
2824 rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
2825 } else {
2826 rval = XFS_ILOG_CORE;
2827 error = xfs_bmbt_lookup_eq(cur, new, &i);
2828 if (error)
2829 goto done;
2830 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2831 error = xfs_btree_insert(cur, &i);
2832 if (error)
2833 goto done;
2834 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2835 }
2836 break;
2837 }
2838
2839 /* add reverse mapping */
2840 error = xfs_rmap_map_extent(mp, dfops, ip, whichfork, new);
2841 if (error)
2842 goto done;
2843
2844 /* convert to a btree if necessary */
2845 if (xfs_bmap_needs_btree(ip, whichfork)) {
2846 int tmp_logflags; /* partial log flag return val */
2847
2848 ASSERT(cur == NULL);
2849 error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, curp,
2850 0, &tmp_logflags, whichfork);
2851 *logflagsp |= tmp_logflags;
2852 cur = *curp;
2853 if (error)
2854 goto done;
2855 }
2856
2857 /* clear out the allocated field, done with it now in any case. */
2858 if (cur)
2859 cur->bc_private.b.allocated = 0;
2860
2861 xfs_bmap_check_leaf_extents(cur, ip, whichfork);
2862 done:
2863 *logflagsp |= rval;
2864 return error;
2865 }
2866
2867 /*
2868 * Functions used in the extent read, allocate and remove paths
2869 */
2870
2871 /*
2872 * Adjust the size of the new extent based on di_extsize and rt extsize.
2873 */
2874 int
2875 xfs_bmap_extsize_align(
2876 xfs_mount_t *mp,
2877 xfs_bmbt_irec_t *gotp, /* next extent pointer */
2878 xfs_bmbt_irec_t *prevp, /* previous extent pointer */
2879 xfs_extlen_t extsz, /* align to this extent size */
2880 int rt, /* is this a realtime inode? */
2881 int eof, /* is extent at end-of-file? */
2882 int delay, /* creating delalloc extent? */
2883 int convert, /* overwriting unwritten extent? */
2884 xfs_fileoff_t *offp, /* in/out: aligned offset */
2885 xfs_extlen_t *lenp) /* in/out: aligned length */
2886 {
2887 xfs_fileoff_t orig_off; /* original offset */
2888 xfs_extlen_t orig_alen; /* original length */
2889 xfs_fileoff_t orig_end; /* original off+len */
2890 xfs_fileoff_t nexto; /* next file offset */
2891 xfs_fileoff_t prevo; /* previous file offset */
2892 xfs_fileoff_t align_off; /* temp for offset */
2893 xfs_extlen_t align_alen; /* temp for length */
2894 xfs_extlen_t temp; /* temp for calculations */
2895
2896 if (convert)
2897 return 0;
2898
2899 orig_off = align_off = *offp;
2900 orig_alen = align_alen = *lenp;
2901 orig_end = orig_off + orig_alen;
2902
2903 /*
2904 * If this request overlaps an existing extent, then don't
2905 * attempt to perform any additional alignment.
2906 */
2907 if (!delay && !eof &&
2908 (orig_off >= gotp->br_startoff) &&
2909 (orig_end <= gotp->br_startoff + gotp->br_blockcount)) {
2910 return 0;
2911 }
2912
2913 /*
2914 * If the file offset is unaligned vs. the extent size
2915 * we need to align it. This will be possible unless
2916 * the file was previously written with a kernel that didn't
2917 * perform this alignment, or if a truncate shot us in the
2918 * foot.
2919 */
2920 temp = do_mod(orig_off, extsz);
2921 if (temp) {
2922 align_alen += temp;
2923 align_off -= temp;
2924 }
2925
2926 /* Same adjustment for the end of the requested area. */
2927 temp = (align_alen % extsz);
2928 if (temp)
2929 align_alen += extsz - temp;
2930
2931 /*
2932 * For large extent hint sizes, the aligned extent might be larger than
2933 * MAXEXTLEN. In that case, reduce the size by an extsz so that it pulls
2934 * the length back under MAXEXTLEN. The outer allocation loops handle
2935 * short allocation just fine, so it is safe to do this. We only want to
2936 * do it when we are forced to, though, because it means more allocation
2937 * operations are required.
2938 */
2939 while (align_alen > MAXEXTLEN)
2940 align_alen -= extsz;
2941 ASSERT(align_alen <= MAXEXTLEN);
2942
2943 /*
2944 * If the previous block overlaps with this proposed allocation
2945 * then move the start forward without adjusting the length.
2946 */
2947 if (prevp->br_startoff != NULLFILEOFF) {
2948 if (prevp->br_startblock == HOLESTARTBLOCK)
2949 prevo = prevp->br_startoff;
2950 else
2951 prevo = prevp->br_startoff + prevp->br_blockcount;
2952 } else
2953 prevo = 0;
2954 if (align_off != orig_off && align_off < prevo)
2955 align_off = prevo;
2956 /*
2957 * If the next block overlaps with this proposed allocation
2958 * then move the start back without adjusting the length,
2959 * but not before offset 0.
2960 * This may of course make the start overlap previous block,
2961 * and if we hit the offset 0 limit then the next block
2962 * can still overlap too.
2963 */
2964 if (!eof && gotp->br_startoff != NULLFILEOFF) {
2965 if ((delay && gotp->br_startblock == HOLESTARTBLOCK) ||
2966 (!delay && gotp->br_startblock == DELAYSTARTBLOCK))
2967 nexto = gotp->br_startoff + gotp->br_blockcount;
2968 else
2969 nexto = gotp->br_startoff;
2970 } else
2971 nexto = NULLFILEOFF;
2972 if (!eof &&
2973 align_off + align_alen != orig_end &&
2974 align_off + align_alen > nexto)
2975 align_off = nexto > align_alen ? nexto - align_alen : 0;
2976 /*
2977 * If we're now overlapping the next or previous extent that
2978 * means we can't fit an extsz piece in this hole. Just move
2979 * the start forward to the first valid spot and set
2980 * the length so we hit the end.
2981 */
2982 if (align_off != orig_off && align_off < prevo)
2983 align_off = prevo;
2984 if (align_off + align_alen != orig_end &&
2985 align_off + align_alen > nexto &&
2986 nexto != NULLFILEOFF) {
2987 ASSERT(nexto > prevo);
2988 align_alen = nexto - align_off;
2989 }
2990
2991 /*
2992 * If realtime, and the result isn't a multiple of the realtime
2993 * extent size we need to remove blocks until it is.
2994 */
2995 if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) {
2996 /*
2997 * We're not covering the original request, or
2998 * we won't be able to once we fix the length.
2999 */
3000 if (orig_off < align_off ||
3001 orig_end > align_off + align_alen ||
3002 align_alen - temp < orig_alen)
3003 return -EINVAL;
3004 /*
3005 * Try to fix it by moving the start up.
3006 */
3007 if (align_off + temp <= orig_off) {
3008 align_alen -= temp;
3009 align_off += temp;
3010 }
3011 /*
3012 * Try to fix it by moving the end in.
3013 */
3014 else if (align_off + align_alen - temp >= orig_end)
3015 align_alen -= temp;
3016 /*
3017 * Set the start to the minimum then trim the length.
3018 */
3019 else {
3020 align_alen -= orig_off - align_off;
3021 align_off = orig_off;
3022 align_alen -= align_alen % mp->m_sb.sb_rextsize;
3023 }
3024 /*
3025 * Result doesn't cover the request, fail it.
3026 */
3027 if (orig_off < align_off || orig_end > align_off + align_alen)
3028 return -EINVAL;
3029 } else {
3030 ASSERT(orig_off >= align_off);
3031 /* see MAXEXTLEN handling above */
3032 ASSERT(orig_end <= align_off + align_alen ||
3033 align_alen + extsz > MAXEXTLEN);
3034 }
3035
3036 #ifdef DEBUG
3037 if (!eof && gotp->br_startoff != NULLFILEOFF)
3038 ASSERT(align_off + align_alen <= gotp->br_startoff);
3039 if (prevp->br_startoff != NULLFILEOFF)
3040 ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount);
3041 #endif
3042
3043 *lenp = align_alen;
3044 *offp = align_off;
3045 return 0;
3046 }
3047
3048 #define XFS_ALLOC_GAP_UNITS 4
3049
3050 void
3051 xfs_bmap_adjacent(
3052 struct xfs_bmalloca *ap) /* bmap alloc argument struct */
3053 {
3054 xfs_fsblock_t adjust; /* adjustment to block numbers */
3055 xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */
3056 xfs_mount_t *mp; /* mount point structure */
3057 int nullfb; /* true if ap->firstblock isn't set */
3058 int rt; /* true if inode is realtime */
3059
3060 #define ISVALID(x,y) \
3061 (rt ? \
3062 (x) < mp->m_sb.sb_rblocks : \
3063 XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \
3064 XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \
3065 XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks)
3066
3067 mp = ap->ip->i_mount;
3068 nullfb = *ap->firstblock == NULLFSBLOCK;
3069 rt = XFS_IS_REALTIME_INODE(ap->ip) &&
3070 xfs_alloc_is_userdata(ap->datatype);
3071 fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
3072 /*
3073 * If allocating at eof, and there's a previous real block,
3074 * try to use its last block as our starting point.
3075 */
3076 if (ap->eof && ap->prev.br_startoff != NULLFILEOFF &&
3077 !isnullstartblock(ap->prev.br_startblock) &&
3078 ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount,
3079 ap->prev.br_startblock)) {
3080 ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount;
3081 /*
3082 * Adjust for the gap between prevp and us.
3083 */
3084 adjust = ap->offset -
3085 (ap->prev.br_startoff + ap->prev.br_blockcount);
3086 if (adjust &&
3087 ISVALID(ap->blkno + adjust, ap->prev.br_startblock))
3088 ap->blkno += adjust;
3089 }
3090 /*
3091 * If not at eof, then compare the two neighbor blocks.
3092 * Figure out whether either one gives us a good starting point,
3093 * and pick the better one.
3094 */
3095 else if (!ap->eof) {
3096 xfs_fsblock_t gotbno; /* right side block number */
3097 xfs_fsblock_t gotdiff=0; /* right side difference */
3098 xfs_fsblock_t prevbno; /* left side block number */
3099 xfs_fsblock_t prevdiff=0; /* left side difference */
3100
3101 /*
3102 * If there's a previous (left) block, select a requested
3103 * start block based on it.
3104 */
3105 if (ap->prev.br_startoff != NULLFILEOFF &&
3106 !isnullstartblock(ap->prev.br_startblock) &&
3107 (prevbno = ap->prev.br_startblock +
3108 ap->prev.br_blockcount) &&
3109 ISVALID(prevbno, ap->prev.br_startblock)) {
3110 /*
3111 * Calculate gap to end of previous block.
3112 */
3113 adjust = prevdiff = ap->offset -
3114 (ap->prev.br_startoff +
3115 ap->prev.br_blockcount);
3116 /*
3117 * Figure the startblock based on the previous block's
3118 * end and the gap size.
3119 * Heuristic!
3120 * If the gap is large relative to the piece we're
3121 * allocating, or using it gives us an invalid block
3122 * number, then just use the end of the previous block.
3123 */
3124 if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3125 ISVALID(prevbno + prevdiff,
3126 ap->prev.br_startblock))
3127 prevbno += adjust;
3128 else
3129 prevdiff += adjust;
3130 /*
3131 * If the firstblock forbids it, can't use it,
3132 * must use default.
3133 */
3134 if (!rt && !nullfb &&
3135 XFS_FSB_TO_AGNO(mp, prevbno) != fb_agno)
3136 prevbno = NULLFSBLOCK;
3137 }
3138 /*
3139 * No previous block or can't follow it, just default.
3140 */
3141 else
3142 prevbno = NULLFSBLOCK;
3143 /*
3144 * If there's a following (right) block, select a requested
3145 * start block based on it.
3146 */
3147 if (!isnullstartblock(ap->got.br_startblock)) {
3148 /*
3149 * Calculate gap to start of next block.
3150 */
3151 adjust = gotdiff = ap->got.br_startoff - ap->offset;
3152 /*
3153 * Figure the startblock based on the next block's
3154 * start and the gap size.
3155 */
3156 gotbno = ap->got.br_startblock;
3157 /*
3158 * Heuristic!
3159 * If the gap is large relative to the piece we're
3160 * allocating, or using it gives us an invalid block
3161 * number, then just use the start of the next block
3162 * offset by our length.
3163 */
3164 if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3165 ISVALID(gotbno - gotdiff, gotbno))
3166 gotbno -= adjust;
3167 else if (ISVALID(gotbno - ap->length, gotbno)) {
3168 gotbno -= ap->length;
3169 gotdiff += adjust - ap->length;
3170 } else
3171 gotdiff += adjust;
3172 /*
3173 * If the firstblock forbids it, can't use it,
3174 * must use default.
3175 */
3176 if (!rt && !nullfb &&
3177 XFS_FSB_TO_AGNO(mp, gotbno) != fb_agno)
3178 gotbno = NULLFSBLOCK;
3179 }
3180 /*
3181 * No next block, just default.
3182 */
3183 else
3184 gotbno = NULLFSBLOCK;
3185 /*
3186 * If both valid, pick the better one, else the only good
3187 * one, else ap->blkno is already set (to 0 or the inode block).
3188 */
3189 if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK)
3190 ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno;
3191 else if (prevbno != NULLFSBLOCK)
3192 ap->blkno = prevbno;
3193 else if (gotbno != NULLFSBLOCK)
3194 ap->blkno = gotbno;
3195 }
3196 #undef ISVALID
3197 }
3198
3199 static int
3200 xfs_bmap_longest_free_extent(
3201 struct xfs_trans *tp,
3202 xfs_agnumber_t ag,
3203 xfs_extlen_t *blen,
3204 int *notinit)
3205 {
3206 struct xfs_mount *mp = tp->t_mountp;
3207 struct xfs_perag *pag;
3208 xfs_extlen_t longest;
3209 int error = 0;
3210
3211 pag = xfs_perag_get(mp, ag);
3212 if (!pag->pagf_init) {
3213 error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK);
3214 if (error)
3215 goto out;
3216
3217 if (!pag->pagf_init) {
3218 *notinit = 1;
3219 goto out;
3220 }
3221 }
3222
3223 longest = xfs_alloc_longest_free_extent(mp, pag,
3224 xfs_alloc_min_freelist(mp, pag),
3225 xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE));
3226 if (*blen < longest)
3227 *blen = longest;
3228
3229 out:
3230 xfs_perag_put(pag);
3231 return error;
3232 }
3233
3234 static void
3235 xfs_bmap_select_minlen(
3236 struct xfs_bmalloca *ap,
3237 struct xfs_alloc_arg *args,
3238 xfs_extlen_t *blen,
3239 int notinit)
3240 {
3241 if (notinit || *blen < ap->minlen) {
3242 /*
3243 * Since we did a BUF_TRYLOCK above, it is possible that
3244 * there is space for this request.
3245 */
3246 args->minlen = ap->minlen;
3247 } else if (*blen < args->maxlen) {
3248 /*
3249 * If the best seen length is less than the request length,
3250 * use the best as the minimum.
3251 */
3252 args->minlen = *blen;
3253 } else {
3254 /*
3255 * Otherwise we've seen an extent as big as maxlen, use that
3256 * as the minimum.
3257 */
3258 args->minlen = args->maxlen;
3259 }
3260 }
3261
3262 STATIC int
3263 xfs_bmap_btalloc_nullfb(
3264 struct xfs_bmalloca *ap,
3265 struct xfs_alloc_arg *args,
3266 xfs_extlen_t *blen)
3267 {
3268 struct xfs_mount *mp = ap->ip->i_mount;
3269 xfs_agnumber_t ag, startag;
3270 int notinit = 0;
3271 int error;
3272
3273 args->type = XFS_ALLOCTYPE_START_BNO;
3274 args->total = ap->total;
3275
3276 startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
3277 if (startag == NULLAGNUMBER)
3278 startag = ag = 0;
3279
3280 while (*blen < args->maxlen) {
3281 error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
3282 &notinit);
3283 if (error)
3284 return error;
3285
3286 if (++ag == mp->m_sb.sb_agcount)
3287 ag = 0;
3288 if (ag == startag)
3289 break;
3290 }
3291
3292 xfs_bmap_select_minlen(ap, args, blen, notinit);
3293 return 0;
3294 }
3295
3296 STATIC int
3297 xfs_bmap_btalloc_filestreams(
3298 struct xfs_bmalloca *ap,
3299 struct xfs_alloc_arg *args,
3300 xfs_extlen_t *blen)
3301 {
3302 struct xfs_mount *mp = ap->ip->i_mount;
3303 xfs_agnumber_t ag;
3304 int notinit = 0;
3305 int error;
3306
3307 args->type = XFS_ALLOCTYPE_NEAR_BNO;
3308 args->total = ap->total;
3309
3310 ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
3311 if (ag == NULLAGNUMBER)
3312 ag = 0;
3313
3314 error = xfs_bmap_longest_free_extent(args->tp, ag, blen, &notinit);
3315 if (error)
3316 return error;
3317
3318 if (*blen < args->maxlen) {
3319 error = xfs_filestream_new_ag(ap, &ag);
3320 if (error)
3321 return error;
3322
3323 error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
3324 &notinit);
3325 if (error)
3326 return error;
3327
3328 }
3329
3330 xfs_bmap_select_minlen(ap, args, blen, notinit);
3331
3332 /*
3333 * Set the failure fallback case to look in the selected AG as stream
3334 * may have moved.
3335 */
3336 ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
3337 return 0;
3338 }
3339
3340 STATIC int
3341 xfs_bmap_btalloc(
3342 struct xfs_bmalloca *ap) /* bmap alloc argument struct */
3343 {
3344 xfs_mount_t *mp; /* mount point structure */
3345 xfs_alloctype_t atype = 0; /* type for allocation routines */
3346 xfs_extlen_t align = 0; /* minimum allocation alignment */
3347 xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */
3348 xfs_agnumber_t ag;
3349 xfs_alloc_arg_t args;
3350 xfs_extlen_t blen;
3351 xfs_extlen_t nextminlen = 0;
3352 int nullfb; /* true if ap->firstblock isn't set */
3353 int isaligned;
3354 int tryagain;
3355 int error;
3356 int stripe_align;
3357
3358 ASSERT(ap->length);
3359
3360 mp = ap->ip->i_mount;
3361
3362 /* stripe alignment for allocation is determined by mount parameters */
3363 stripe_align = 0;
3364 if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC))
3365 stripe_align = mp->m_swidth;
3366 else if (mp->m_dalign)
3367 stripe_align = mp->m_dalign;
3368
3369 if (ap->flags & XFS_BMAPI_COWFORK)
3370 align = xfs_get_cowextsz_hint(ap->ip);
3371 else if (xfs_alloc_is_userdata(ap->datatype))
3372 align = xfs_get_extsz_hint(ap->ip);
3373 if (align) {
3374 error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
3375 align, 0, ap->eof, 0, ap->conv,
3376 &ap->offset, &ap->length);
3377 ASSERT(!error);
3378 ASSERT(ap->length);
3379 }
3380
3381
3382 nullfb = *ap->firstblock == NULLFSBLOCK;
3383 fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
3384 if (nullfb) {
3385 if (xfs_alloc_is_userdata(ap->datatype) &&
3386 xfs_inode_is_filestream(ap->ip)) {
3387 ag = xfs_filestream_lookup_ag(ap->ip);
3388 ag = (ag != NULLAGNUMBER) ? ag : 0;
3389 ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0);
3390 } else {
3391 ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
3392 }
3393 } else
3394 ap->blkno = *ap->firstblock;
3395
3396 xfs_bmap_adjacent(ap);
3397
3398 /*
3399 * If allowed, use ap->blkno; otherwise must use firstblock since
3400 * it's in the right allocation group.
3401 */
3402 if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno)
3403 ;
3404 else
3405 ap->blkno = *ap->firstblock;
3406 /*
3407 * Normal allocation, done through xfs_alloc_vextent.
3408 */
3409 tryagain = isaligned = 0;
3410 memset(&args, 0, sizeof(args));
3411 args.tp = ap->tp;
3412 args.mp = mp;
3413 args.fsbno = ap->blkno;
3414 xfs_rmap_skip_owner_update(&args.oinfo);
3415
3416 /* Trim the allocation back to the maximum an AG can fit. */
3417 args.maxlen = MIN(ap->length, mp->m_ag_max_usable);
3418 args.firstblock = *ap->firstblock;
3419 blen = 0;
3420 if (nullfb) {
3421 /*
3422 * Search for an allocation group with a single extent large
3423 * enough for the request. If one isn't found, then adjust
3424 * the minimum allocation size to the largest space found.
3425 */
3426 if (xfs_alloc_is_userdata(ap->datatype) &&
3427 xfs_inode_is_filestream(ap->ip))
3428 error = xfs_bmap_btalloc_filestreams(ap, &args, &blen);
3429 else
3430 error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
3431 if (error)
3432 return error;
3433 } else if (ap->dfops->dop_low) {
3434 if (xfs_inode_is_filestream(ap->ip))
3435 args.type = XFS_ALLOCTYPE_FIRST_AG;
3436 else
3437 args.type = XFS_ALLOCTYPE_START_BNO;
3438 args.total = args.minlen = ap->minlen;
3439 } else {
3440 args.type = XFS_ALLOCTYPE_NEAR_BNO;
3441 args.total = ap->total;
3442 args.minlen = ap->minlen;
3443 }
3444 /* apply extent size hints if obtained earlier */
3445 if (align) {
3446 args.prod = align;
3447 if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod)))
3448 args.mod = (xfs_extlen_t)(args.prod - args.mod);
3449 } else if (mp->m_sb.sb_blocksize >= PAGE_SIZE) {
3450 args.prod = 1;
3451 args.mod = 0;
3452 } else {
3453 args.prod = PAGE_SIZE >> mp->m_sb.sb_blocklog;
3454 if ((args.mod = (xfs_extlen_t)(do_mod(ap->offset, args.prod))))
3455 args.mod = (xfs_extlen_t)(args.prod - args.mod);
3456 }
3457 /*
3458 * If we are not low on available data blocks, and the
3459 * underlying logical volume manager is a stripe, and
3460 * the file offset is zero then try to allocate data
3461 * blocks on stripe unit boundary.
3462 * NOTE: ap->aeof is only set if the allocation length
3463 * is >= the stripe unit and the allocation offset is
3464 * at the end of file.
3465 */
3466 if (!ap->dfops->dop_low && ap->aeof) {
3467 if (!ap->offset) {
3468 args.alignment = stripe_align;
3469 atype = args.type;
3470 isaligned = 1;
3471 /*
3472 * Adjust for alignment
3473 */
3474 if (blen > args.alignment && blen <= args.maxlen)
3475 args.minlen = blen - args.alignment;
3476 args.minalignslop = 0;
3477 } else {
3478 /*
3479 * First try an exact bno allocation.
3480 * If it fails then do a near or start bno
3481 * allocation with alignment turned on.
3482 */
3483 atype = args.type;
3484 tryagain = 1;
3485 args.type = XFS_ALLOCTYPE_THIS_BNO;
3486 args.alignment = 1;
3487 /*
3488 * Compute the minlen+alignment for the
3489 * next case. Set slop so that the value
3490 * of minlen+alignment+slop doesn't go up
3491 * between the calls.
3492 */
3493 if (blen > stripe_align && blen <= args.maxlen)
3494 nextminlen = blen - stripe_align;
3495 else
3496 nextminlen = args.minlen;
3497 if (nextminlen + stripe_align > args.minlen + 1)
3498 args.minalignslop =
3499 nextminlen + stripe_align -
3500 args.minlen - 1;
3501 else
3502 args.minalignslop = 0;
3503 }
3504 } else {
3505 args.alignment = 1;
3506 args.minalignslop = 0;
3507 }
3508 args.minleft = ap->minleft;
3509 args.wasdel = ap->wasdel;
3510 args.resv = XFS_AG_RESV_NONE;
3511 args.datatype = ap->datatype;
3512 if (ap->datatype & XFS_ALLOC_USERDATA_ZERO)
3513 args.ip = ap->ip;
3514
3515 error = xfs_alloc_vextent(&args);
3516 if (error)
3517 return error;
3518
3519 if (tryagain && args.fsbno == NULLFSBLOCK) {
3520 /*
3521 * Exact allocation failed. Now try with alignment
3522 * turned on.
3523 */
3524 args.type = atype;
3525 args.fsbno = ap->blkno;
3526 args.alignment = stripe_align;
3527 args.minlen = nextminlen;
3528 args.minalignslop = 0;
3529 isaligned = 1;
3530 if ((error = xfs_alloc_vextent(&args)))
3531 return error;
3532 }
3533 if (isaligned && args.fsbno == NULLFSBLOCK) {
3534 /*
3535 * allocation failed, so turn off alignment and
3536 * try again.
3537 */
3538 args.type = atype;
3539 args.fsbno = ap->blkno;
3540 args.alignment = 0;
3541 if ((error = xfs_alloc_vextent(&args)))
3542 return error;
3543 }
3544 if (args.fsbno == NULLFSBLOCK && nullfb &&
3545 args.minlen > ap->minlen) {
3546 args.minlen = ap->minlen;
3547 args.type = XFS_ALLOCTYPE_START_BNO;
3548 args.fsbno = ap->blkno;
3549 if ((error = xfs_alloc_vextent(&args)))
3550 return error;
3551 }
3552 if (args.fsbno == NULLFSBLOCK && nullfb) {
3553 args.fsbno = 0;
3554 args.type = XFS_ALLOCTYPE_FIRST_AG;
3555 args.total = ap->minlen;
3556 if ((error = xfs_alloc_vextent(&args)))
3557 return error;
3558 ap->dfops->dop_low = true;
3559 }
3560 if (args.fsbno != NULLFSBLOCK) {
3561 /*
3562 * check the allocation happened at the same or higher AG than
3563 * the first block that was allocated.
3564 */
3565 ASSERT(*ap->firstblock == NULLFSBLOCK ||
3566 XFS_FSB_TO_AGNO(mp, *ap->firstblock) <=
3567 XFS_FSB_TO_AGNO(mp, args.fsbno));
3568
3569 ap->blkno = args.fsbno;
3570 if (*ap->firstblock == NULLFSBLOCK)
3571 *ap->firstblock = args.fsbno;
3572 ASSERT(nullfb || fb_agno <= args.agno);
3573 ap->length = args.len;
3574 if (!(ap->flags & XFS_BMAPI_COWFORK))
3575 ap->ip->i_d.di_nblocks += args.len;
3576 xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
3577 if (ap->wasdel)
3578 ap->ip->i_delayed_blks -= args.len;
3579 /*
3580 * Adjust the disk quota also. This was reserved
3581 * earlier.
3582 */
3583 xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
3584 ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT :
3585 XFS_TRANS_DQ_BCOUNT,
3586 (long) args.len);
3587 } else {
3588 ap->blkno = NULLFSBLOCK;
3589 ap->length = 0;
3590 }
3591 return 0;
3592 }
3593
3594 /*
3595 * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
3596 * It figures out where to ask the underlying allocator to put the new extent.
3597 */
3598 STATIC int
3599 xfs_bmap_alloc(
3600 struct xfs_bmalloca *ap) /* bmap alloc argument struct */
3601 {
3602 if (XFS_IS_REALTIME_INODE(ap->ip) &&
3603 xfs_alloc_is_userdata(ap->datatype))
3604 return xfs_bmap_rtalloc(ap);
3605 return xfs_bmap_btalloc(ap);
3606 }
3607
3608 /* Trim extent to fit a logical block range. */
3609 void
3610 xfs_trim_extent(
3611 struct xfs_bmbt_irec *irec,
3612 xfs_fileoff_t bno,
3613 xfs_filblks_t len)
3614 {
3615 xfs_fileoff_t distance;
3616 xfs_fileoff_t end = bno + len;
3617
3618 if (irec->br_startoff + irec->br_blockcount <= bno ||
3619 irec->br_startoff >= end) {
3620 irec->br_blockcount = 0;
3621 return;
3622 }
3623
3624 if (irec->br_startoff < bno) {
3625 distance = bno - irec->br_startoff;
3626 if (isnullstartblock(irec->br_startblock))
3627 irec->br_startblock = DELAYSTARTBLOCK;
3628 if (irec->br_startblock != DELAYSTARTBLOCK &&
3629 irec->br_startblock != HOLESTARTBLOCK)
3630 irec->br_startblock += distance;
3631 irec->br_startoff += distance;
3632 irec->br_blockcount -= distance;
3633 }
3634
3635 if (end < irec->br_startoff + irec->br_blockcount) {
3636 distance = irec->br_startoff + irec->br_blockcount - end;
3637 irec->br_blockcount -= distance;
3638 }
3639 }
3640
3641 /* trim extent to within eof */
3642 void
3643 xfs_trim_extent_eof(
3644 struct xfs_bmbt_irec *irec,
3645 struct xfs_inode *ip)
3646
3647 {
3648 xfs_trim_extent(irec, 0, XFS_B_TO_FSB(ip->i_mount,
3649 i_size_read(VFS_I(ip))));
3650 }
3651
3652 /*
3653 * Trim the returned map to the required bounds
3654 */
3655 STATIC void
3656 xfs_bmapi_trim_map(
3657 struct xfs_bmbt_irec *mval,
3658 struct xfs_bmbt_irec *got,
3659 xfs_fileoff_t *bno,
3660 xfs_filblks_t len,
3661 xfs_fileoff_t obno,
3662 xfs_fileoff_t end,
3663 int n,
3664 int flags)
3665 {
3666 if ((flags & XFS_BMAPI_ENTIRE) ||
3667 got->br_startoff + got->br_blockcount <= obno) {
3668 *mval = *got;
3669 if (isnullstartblock(got->br_startblock))
3670 mval->br_startblock = DELAYSTARTBLOCK;
3671 return;
3672 }
3673
3674 if (obno > *bno)
3675 *bno = obno;
3676 ASSERT((*bno >= obno) || (n == 0));
3677 ASSERT(*bno < end);
3678 mval->br_startoff = *bno;
3679 if (isnullstartblock(got->br_startblock))
3680 mval->br_startblock = DELAYSTARTBLOCK;
3681 else
3682 mval->br_startblock = got->br_startblock +
3683 (*bno - got->br_startoff);
3684 /*
3685 * Return the minimum of what we got and what we asked for for
3686 * the length. We can use the len variable here because it is
3687 * modified below and we could have been there before coming
3688 * here if the first part of the allocation didn't overlap what
3689 * was asked for.
3690 */
3691 mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno,
3692 got->br_blockcount - (*bno - got->br_startoff));
3693 mval->br_state = got->br_state;
3694 ASSERT(mval->br_blockcount <= len);
3695 return;
3696 }
3697
3698 /*
3699 * Update and validate the extent map to return
3700 */
3701 STATIC void
3702 xfs_bmapi_update_map(
3703 struct xfs_bmbt_irec **map,
3704 xfs_fileoff_t *bno,
3705 xfs_filblks_t *len,
3706 xfs_fileoff_t obno,
3707 xfs_fileoff_t end,
3708 int *n,
3709 int flags)
3710 {
3711 xfs_bmbt_irec_t *mval = *map;
3712
3713 ASSERT((flags & XFS_BMAPI_ENTIRE) ||
3714 ((mval->br_startoff + mval->br_blockcount) <= end));
3715 ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) ||
3716 (mval->br_startoff < obno));
3717
3718 *bno = mval->br_startoff + mval->br_blockcount;
3719 *len = end - *bno;
3720 if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) {
3721 /* update previous map with new information */
3722 ASSERT(mval->br_startblock == mval[-1].br_startblock);
3723 ASSERT(mval->br_blockcount > mval[-1].br_blockcount);
3724 ASSERT(mval->br_state == mval[-1].br_state);
3725 mval[-1].br_blockcount = mval->br_blockcount;
3726 mval[-1].br_state = mval->br_state;
3727 } else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK &&
3728 mval[-1].br_startblock != DELAYSTARTBLOCK &&
3729 mval[-1].br_startblock != HOLESTARTBLOCK &&
3730 mval->br_startblock == mval[-1].br_startblock +
3731 mval[-1].br_blockcount &&
3732 ((flags & XFS_BMAPI_IGSTATE) ||
3733 mval[-1].br_state == mval->br_state)) {
3734 ASSERT(mval->br_startoff ==
3735 mval[-1].br_startoff + mval[-1].br_blockcount);
3736 mval[-1].br_blockcount += mval->br_blockcount;
3737 } else if (*n > 0 &&
3738 mval->br_startblock == DELAYSTARTBLOCK &&
3739 mval[-1].br_startblock == DELAYSTARTBLOCK &&
3740 mval->br_startoff ==
3741 mval[-1].br_startoff + mval[-1].br_blockcount) {
3742 mval[-1].br_blockcount += mval->br_blockcount;
3743 mval[-1].br_state = mval->br_state;
3744 } else if (!((*n == 0) &&
3745 ((mval->br_startoff + mval->br_blockcount) <=
3746 obno))) {
3747 mval++;
3748 (*n)++;
3749 }
3750 *map = mval;
3751 }
3752
3753 /*
3754 * Map file blocks to filesystem blocks without allocation.
3755 */
3756 int
3757 xfs_bmapi_read(
3758 struct xfs_inode *ip,
3759 xfs_fileoff_t bno,
3760 xfs_filblks_t len,
3761 struct xfs_bmbt_irec *mval,
3762 int *nmap,
3763 int flags)
3764 {
3765 struct xfs_mount *mp = ip->i_mount;
3766 struct xfs_ifork *ifp;
3767 struct xfs_bmbt_irec got;
3768 xfs_fileoff_t obno;
3769 xfs_fileoff_t end;
3770 xfs_extnum_t idx;
3771 int error;
3772 bool eof = false;
3773 int n = 0;
3774 int whichfork = xfs_bmapi_whichfork(flags);
3775
3776 ASSERT(*nmap >= 1);
3777 ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE|
3778 XFS_BMAPI_IGSTATE|XFS_BMAPI_COWFORK)));
3779 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL));
3780
3781 if (unlikely(XFS_TEST_ERROR(
3782 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
3783 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
3784 mp, XFS_ERRTAG_BMAPIFORMAT))) {
3785 XFS_ERROR_REPORT("xfs_bmapi_read", XFS_ERRLEVEL_LOW, mp);
3786 return -EFSCORRUPTED;
3787 }
3788
3789 if (XFS_FORCED_SHUTDOWN(mp))
3790 return -EIO;
3791
3792 XFS_STATS_INC(mp, xs_blk_mapr);
3793
3794 ifp = XFS_IFORK_PTR(ip, whichfork);
3795
3796 /* No CoW fork? Return a hole. */
3797 if (whichfork == XFS_COW_FORK && !ifp) {
3798 mval->br_startoff = bno;
3799 mval->br_startblock = HOLESTARTBLOCK;
3800 mval->br_blockcount = len;
3801 mval->br_state = XFS_EXT_NORM;
3802 *nmap = 1;
3803 return 0;
3804 }
3805
3806 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
3807 error = xfs_iread_extents(NULL, ip, whichfork);
3808 if (error)
3809 return error;
3810 }
3811
3812 if (!xfs_iext_lookup_extent(ip, ifp, bno, &idx, &got))
3813 eof = true;
3814 end = bno + len;
3815 obno = bno;
3816
3817 while (bno < end && n < *nmap) {
3818 /* Reading past eof, act as though there's a hole up to end. */
3819 if (eof)
3820 got.br_startoff = end;
3821 if (got.br_startoff > bno) {
3822 /* Reading in a hole. */
3823 mval->br_startoff = bno;
3824 mval->br_startblock = HOLESTARTBLOCK;
3825 mval->br_blockcount =
3826 XFS_FILBLKS_MIN(len, got.br_startoff - bno);
3827 mval->br_state = XFS_EXT_NORM;
3828 bno += mval->br_blockcount;
3829 len -= mval->br_blockcount;
3830 mval++;
3831 n++;
3832 continue;
3833 }
3834
3835 /* set up the extent map to return. */
3836 xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
3837 xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
3838
3839 /* If we're done, stop now. */
3840 if (bno >= end || n >= *nmap)
3841 break;
3842
3843 /* Else go on to the next record. */
3844 if (!xfs_iext_get_extent(ifp, ++idx, &got))
3845 eof = true;
3846 }
3847 *nmap = n;
3848 return 0;
3849 }
3850
3851 /*
3852 * Add a delayed allocation extent to an inode. Blocks are reserved from the
3853 * global pool and the extent inserted into the inode in-core extent tree.
3854 *
3855 * On entry, got refers to the first extent beyond the offset of the extent to
3856 * allocate or eof is specified if no such extent exists. On return, got refers
3857 * to the extent record that was inserted to the inode fork.
3858 *
3859 * Note that the allocated extent may have been merged with contiguous extents
3860 * during insertion into the inode fork. Thus, got does not reflect the current
3861 * state of the inode fork on return. If necessary, the caller can use lastx to
3862 * look up the updated record in the inode fork.
3863 */
3864 int
3865 xfs_bmapi_reserve_delalloc(
3866 struct xfs_inode *ip,
3867 int whichfork,
3868 xfs_fileoff_t off,
3869 xfs_filblks_t len,
3870 xfs_filblks_t prealloc,
3871 struct xfs_bmbt_irec *got,
3872 xfs_extnum_t *lastx,
3873 int eof)
3874 {
3875 struct xfs_mount *mp = ip->i_mount;
3876 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
3877 xfs_extlen_t alen;
3878 xfs_extlen_t indlen;
3879 char rt = XFS_IS_REALTIME_INODE(ip);
3880 xfs_extlen_t extsz;
3881 int error;
3882 xfs_fileoff_t aoff = off;
3883
3884 /*
3885 * Cap the alloc length. Keep track of prealloc so we know whether to
3886 * tag the inode before we return.
3887 */
3888 alen = XFS_FILBLKS_MIN(len + prealloc, MAXEXTLEN);
3889 if (!eof)
3890 alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
3891 if (prealloc && alen >= len)
3892 prealloc = alen - len;
3893
3894 /* Figure out the extent size, adjust alen */
3895 if (whichfork == XFS_COW_FORK)
3896 extsz = xfs_get_cowextsz_hint(ip);
3897 else
3898 extsz = xfs_get_extsz_hint(ip);
3899 if (extsz) {
3900 struct xfs_bmbt_irec prev;
3901
3902 if (!xfs_iext_get_extent(ifp, *lastx - 1, &prev))
3903 prev.br_startoff = NULLFILEOFF;
3904
3905 error = xfs_bmap_extsize_align(mp, got, &prev, extsz, rt, eof,
3906 1, 0, &aoff, &alen);
3907 ASSERT(!error);
3908 }
3909
3910 if (rt)
3911 extsz = alen / mp->m_sb.sb_rextsize;
3912
3913 /*
3914 * Make a transaction-less quota reservation for delayed allocation
3915 * blocks. This number gets adjusted later. We return if we haven't
3916 * allocated blocks already inside this loop.
3917 */
3918 error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0,
3919 rt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
3920 if (error)
3921 return error;
3922
3923 /*
3924 * Split changing sb for alen and indlen since they could be coming
3925 * from different places.
3926 */
3927 indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen);
3928 ASSERT(indlen > 0);
3929
3930 if (rt) {
3931 error = xfs_mod_frextents(mp, -((int64_t)extsz));
3932 } else {
3933 error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
3934 }
3935
3936 if (error)
3937 goto out_unreserve_quota;
3938
3939 error = xfs_mod_fdblocks(mp, -((int64_t)indlen), false);
3940 if (error)
3941 goto out_unreserve_blocks;
3942
3943
3944 ip->i_delayed_blks += alen;
3945
3946 got->br_startoff = aoff;
3947 got->br_startblock = nullstartblock(indlen);
3948 got->br_blockcount = alen;
3949 got->br_state = XFS_EXT_NORM;
3950
3951 xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got);
3952
3953 /*
3954 * Tag the inode if blocks were preallocated. Note that COW fork
3955 * preallocation can occur at the start or end of the extent, even when
3956 * prealloc == 0, so we must also check the aligned offset and length.
3957 */
3958 if (whichfork == XFS_DATA_FORK && prealloc)
3959 xfs_inode_set_eofblocks_tag(ip);
3960 if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))
3961 xfs_inode_set_cowblocks_tag(ip);
3962
3963 return 0;
3964
3965 out_unreserve_blocks:
3966 if (rt)
3967 xfs_mod_frextents(mp, extsz);
3968 else
3969 xfs_mod_fdblocks(mp, alen, false);
3970 out_unreserve_quota:
3971 if (XFS_IS_QUOTA_ON(mp))
3972 xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ?
3973 XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
3974 return error;
3975 }
3976
3977 static int
3978 xfs_bmapi_allocate(
3979 struct xfs_bmalloca *bma)
3980 {
3981 struct xfs_mount *mp = bma->ip->i_mount;
3982 int whichfork = xfs_bmapi_whichfork(bma->flags);
3983 struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
3984 int tmp_logflags = 0;
3985 int error;
3986
3987 ASSERT(bma->length > 0);
3988
3989 /*
3990 * For the wasdelay case, we could also just allocate the stuff asked
3991 * for in this bmap call but that wouldn't be as good.
3992 */
3993 if (bma->wasdel) {
3994 bma->length = (xfs_extlen_t)bma->got.br_blockcount;
3995 bma->offset = bma->got.br_startoff;
3996 if (bma->idx)
3997 xfs_iext_get_extent(ifp, bma->idx - 1, &bma->prev);
3998 } else {
3999 bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN);
4000 if (!bma->eof)
4001 bma->length = XFS_FILBLKS_MIN(bma->length,
4002 bma->got.br_startoff - bma->offset);
4003 }
4004
4005 /*
4006 * Set the data type being allocated. For the data fork, the first data
4007 * in the file is treated differently to all other allocations. For the
4008 * attribute fork, we only need to ensure the allocated range is not on
4009 * the busy list.
4010 */
4011 if (!(bma->flags & XFS_BMAPI_METADATA)) {
4012 bma->datatype = XFS_ALLOC_NOBUSY;
4013 if (whichfork == XFS_DATA_FORK) {
4014 if (bma->offset == 0)
4015 bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
4016 else
4017 bma->datatype |= XFS_ALLOC_USERDATA;
4018 }
4019 if (bma->flags & XFS_BMAPI_ZERO)
4020 bma->datatype |= XFS_ALLOC_USERDATA_ZERO;
4021 }
4022
4023 bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
4024
4025 /*
4026 * Only want to do the alignment at the eof if it is userdata and
4027 * allocation length is larger than a stripe unit.
4028 */
4029 if (mp->m_dalign && bma->length >= mp->m_dalign &&
4030 !(bma->flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) {
4031 error = xfs_bmap_isaeof(bma, whichfork);
4032 if (error)
4033 return error;
4034 }
4035
4036 error = xfs_bmap_alloc(bma);
4037 if (error)
4038 return error;
4039
4040 if (bma->cur)
4041 bma->cur->bc_private.b.firstblock = *bma->firstblock;
4042 if (bma->blkno == NULLFSBLOCK)
4043 return 0;
4044 if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
4045 bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork);
4046 bma->cur->bc_private.b.firstblock = *bma->firstblock;
4047 bma->cur->bc_private.b.dfops = bma->dfops;
4048 }
4049 /*
4050 * Bump the number of extents we've allocated
4051 * in this call.
4052 */
4053 bma->nallocs++;
4054
4055 if (bma->cur)
4056 bma->cur->bc_private.b.flags =
4057 bma->wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
4058
4059 bma->got.br_startoff = bma->offset;
4060 bma->got.br_startblock = bma->blkno;
4061 bma->got.br_blockcount = bma->length;
4062 bma->got.br_state = XFS_EXT_NORM;
4063
4064 /*
4065 * In the data fork, a wasdelay extent has been initialized, so
4066 * shouldn't be flagged as unwritten.
4067 *
4068 * For the cow fork, however, we convert delalloc reservations
4069 * (extents allocated for speculative preallocation) to
4070 * allocated unwritten extents, and only convert the unwritten
4071 * extents to real extents when we're about to write the data.
4072 */
4073 if ((!bma->wasdel || (bma->flags & XFS_BMAPI_COWFORK)) &&
4074 (bma->flags & XFS_BMAPI_PREALLOC) &&
4075 xfs_sb_version_hasextflgbit(&mp->m_sb))
4076 bma->got.br_state = XFS_EXT_UNWRITTEN;
4077
4078 if (bma->wasdel)
4079 error = xfs_bmap_add_extent_delay_real(bma, whichfork);
4080 else
4081 error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip,
4082 whichfork, &bma->idx, &bma->cur, &bma->got,
4083 bma->firstblock, bma->dfops, &bma->logflags);
4084
4085 bma->logflags |= tmp_logflags;
4086 if (error)
4087 return error;
4088
4089 /*
4090 * Update our extent pointer, given that xfs_bmap_add_extent_delay_real
4091 * or xfs_bmap_add_extent_hole_real might have merged it into one of
4092 * the neighbouring ones.
4093 */
4094 xfs_iext_get_extent(ifp, bma->idx, &bma->got);
4095
4096 ASSERT(bma->got.br_startoff <= bma->offset);
4097 ASSERT(bma->got.br_startoff + bma->got.br_blockcount >=
4098 bma->offset + bma->length);
4099 ASSERT(bma->got.br_state == XFS_EXT_NORM ||
4100 bma->got.br_state == XFS_EXT_UNWRITTEN);
4101 return 0;
4102 }
4103
4104 STATIC int
4105 xfs_bmapi_convert_unwritten(
4106 struct xfs_bmalloca *bma,
4107 struct xfs_bmbt_irec *mval,
4108 xfs_filblks_t len,
4109 int flags)
4110 {
4111 int whichfork = xfs_bmapi_whichfork(flags);
4112 struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4113 int tmp_logflags = 0;
4114 int error;
4115
4116 /* check if we need to do unwritten->real conversion */
4117 if (mval->br_state == XFS_EXT_UNWRITTEN &&
4118 (flags & XFS_BMAPI_PREALLOC))
4119 return 0;
4120
4121 /* check if we need to do real->unwritten conversion */
4122 if (mval->br_state == XFS_EXT_NORM &&
4123 (flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) !=
4124 (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
4125 return 0;
4126
4127 /*
4128 * Modify (by adding) the state flag, if writing.
4129 */
4130 ASSERT(mval->br_blockcount <= len);
4131 if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
4132 bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp,
4133 bma->ip, whichfork);
4134 bma->cur->bc_private.b.firstblock = *bma->firstblock;
4135 bma->cur->bc_private.b.dfops = bma->dfops;
4136 }
4137 mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
4138 ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
4139
4140 /*
4141 * Before insertion into the bmbt, zero the range being converted
4142 * if required.
4143 */
4144 if (flags & XFS_BMAPI_ZERO) {
4145 error = xfs_zero_extent(bma->ip, mval->br_startblock,
4146 mval->br_blockcount);
4147 if (error)
4148 return error;
4149 }
4150
4151 error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork,
4152 &bma->idx, &bma->cur, mval, bma->firstblock, bma->dfops,
4153 &tmp_logflags);
4154 /*
4155 * Log the inode core unconditionally in the unwritten extent conversion
4156 * path because the conversion might not have done so (e.g., if the
4157 * extent count hasn't changed). We need to make sure the inode is dirty
4158 * in the transaction for the sake of fsync(), even if nothing has
4159 * changed, because fsync() will not force the log for this transaction
4160 * unless it sees the inode pinned.
4161 *
4162 * Note: If we're only converting cow fork extents, there aren't
4163 * any on-disk updates to make, so we don't need to log anything.
4164 */
4165 if (whichfork != XFS_COW_FORK)
4166 bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
4167 if (error)
4168 return error;
4169
4170 /*
4171 * Update our extent pointer, given that
4172 * xfs_bmap_add_extent_unwritten_real might have merged it into one
4173 * of the neighbouring ones.
4174 */
4175 xfs_iext_get_extent(ifp, bma->idx, &bma->got);
4176
4177 /*
4178 * We may have combined previously unwritten space with written space,
4179 * so generate another request.
4180 */
4181 if (mval->br_blockcount < len)
4182 return -EAGAIN;
4183 return 0;
4184 }
4185
4186 /*
4187 * Map file blocks to filesystem blocks, and allocate blocks or convert the
4188 * extent state if necessary. Details behaviour is controlled by the flags
4189 * parameter. Only allocates blocks from a single allocation group, to avoid
4190 * locking problems.
4191 *
4192 * The returned value in "firstblock" from the first call in a transaction
4193 * must be remembered and presented to subsequent calls in "firstblock".
4194 * An upper bound for the number of blocks to be allocated is supplied to
4195 * the first call in "total"; if no allocation group has that many free
4196 * blocks then the call will fail (return NULLFSBLOCK in "firstblock").
4197 */
4198 int
4199 xfs_bmapi_write(
4200 struct xfs_trans *tp, /* transaction pointer */
4201 struct xfs_inode *ip, /* incore inode */
4202 xfs_fileoff_t bno, /* starting file offs. mapped */
4203 xfs_filblks_t len, /* length to map in file */
4204 int flags, /* XFS_BMAPI_... */
4205 xfs_fsblock_t *firstblock, /* first allocated block
4206 controls a.g. for allocs */
4207 xfs_extlen_t total, /* total blocks needed */
4208 struct xfs_bmbt_irec *mval, /* output: map values */
4209 int *nmap, /* i/o: mval size/count */
4210 struct xfs_defer_ops *dfops) /* i/o: list extents to free */
4211 {
4212 struct xfs_mount *mp = ip->i_mount;
4213 struct xfs_ifork *ifp;
4214 struct xfs_bmalloca bma = { NULL }; /* args for xfs_bmap_alloc */
4215 xfs_fileoff_t end; /* end of mapped file region */
4216 bool eof = false; /* after the end of extents */
4217 int error; /* error return */
4218 int n; /* current extent index */
4219 xfs_fileoff_t obno; /* old block number (offset) */
4220 int whichfork; /* data or attr fork */
4221
4222 #ifdef DEBUG
4223 xfs_fileoff_t orig_bno; /* original block number value */
4224 int orig_flags; /* original flags arg value */
4225 xfs_filblks_t orig_len; /* original value of len arg */
4226 struct xfs_bmbt_irec *orig_mval; /* original value of mval */
4227 int orig_nmap; /* original value of *nmap */
4228
4229 orig_bno = bno;
4230 orig_len = len;
4231 orig_flags = flags;
4232 orig_mval = mval;
4233 orig_nmap = *nmap;
4234 #endif
4235 whichfork = xfs_bmapi_whichfork(flags);
4236
4237 ASSERT(*nmap >= 1);
4238 ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
4239 ASSERT(!(flags & XFS_BMAPI_IGSTATE));
4240 ASSERT(tp != NULL ||
4241 (flags & (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK)) ==
4242 (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK));
4243 ASSERT(len > 0);
4244 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL);
4245 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4246 ASSERT(!(flags & XFS_BMAPI_REMAP));
4247
4248 /* zeroing is for currently only for data extents, not metadata */
4249 ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
4250 (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO));
4251 /*
4252 * we can allocate unwritten extents or pre-zero allocated blocks,
4253 * but it makes no sense to do both at once. This would result in
4254 * zeroing the unwritten extent twice, but it still being an
4255 * unwritten extent....
4256 */
4257 ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) !=
4258 (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO));
4259
4260 if (unlikely(XFS_TEST_ERROR(
4261 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
4262 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
4263 mp, XFS_ERRTAG_BMAPIFORMAT))) {
4264 XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp);
4265 return -EFSCORRUPTED;
4266 }
4267
4268 if (XFS_FORCED_SHUTDOWN(mp))
4269 return -EIO;
4270
4271 ifp = XFS_IFORK_PTR(ip, whichfork);
4272
4273 XFS_STATS_INC(mp, xs_blk_mapw);
4274
4275 if (*firstblock == NULLFSBLOCK) {
4276 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE)
4277 bma.minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1;
4278 else
4279 bma.minleft = 1;
4280 } else {
4281 bma.minleft = 0;
4282 }
4283
4284 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4285 error = xfs_iread_extents(tp, ip, whichfork);
4286 if (error)
4287 goto error0;
4288 }
4289
4290 n = 0;
4291 end = bno + len;
4292 obno = bno;
4293
4294 if (!xfs_iext_lookup_extent(ip, ifp, bno, &bma.idx, &bma.got))
4295 eof = true;
4296 if (!xfs_iext_get_extent(ifp, bma.idx - 1, &bma.prev))
4297 bma.prev.br_startoff = NULLFILEOFF;
4298 bma.tp = tp;
4299 bma.ip = ip;
4300 bma.total = total;
4301 bma.datatype = 0;
4302 bma.dfops = dfops;
4303 bma.firstblock = firstblock;
4304
4305 while (bno < end && n < *nmap) {
4306 bool need_alloc = false, wasdelay = false;
4307
4308 /* in hole or beyoned EOF? */
4309 if (eof || bma.got.br_startoff > bno) {
4310 if (flags & XFS_BMAPI_DELALLOC) {
4311 /*
4312 * For the COW fork we can reasonably get a
4313 * request for converting an extent that races
4314 * with other threads already having converted
4315 * part of it, as there converting COW to
4316 * regular blocks is not protected using the
4317 * IOLOCK.
4318 */
4319 ASSERT(flags & XFS_BMAPI_COWFORK);
4320 if (!(flags & XFS_BMAPI_COWFORK)) {
4321 error = -EIO;
4322 goto error0;
4323 }
4324
4325 if (eof || bno >= end)
4326 break;
4327 } else {
4328 need_alloc = true;
4329 }
4330 } else if (isnullstartblock(bma.got.br_startblock)) {
4331 wasdelay = true;
4332 }
4333
4334 /*
4335 * First, deal with the hole before the allocated space
4336 * that we found, if any.
4337 */
4338 if (need_alloc || wasdelay) {
4339 bma.eof = eof;
4340 bma.conv = !!(flags & XFS_BMAPI_CONVERT);
4341 bma.wasdel = wasdelay;
4342 bma.offset = bno;
4343 bma.flags = flags;
4344
4345 /*
4346 * There's a 32/64 bit type mismatch between the
4347 * allocation length request (which can be 64 bits in
4348 * length) and the bma length request, which is
4349 * xfs_extlen_t and therefore 32 bits. Hence we have to
4350 * check for 32-bit overflows and handle them here.
4351 */
4352 if (len > (xfs_filblks_t)MAXEXTLEN)
4353 bma.length = MAXEXTLEN;
4354 else
4355 bma.length = len;
4356
4357 ASSERT(len > 0);
4358 ASSERT(bma.length > 0);
4359 error = xfs_bmapi_allocate(&bma);
4360 if (error)
4361 goto error0;
4362 if (bma.blkno == NULLFSBLOCK)
4363 break;
4364
4365 /*
4366 * If this is a CoW allocation, record the data in
4367 * the refcount btree for orphan recovery.
4368 */
4369 if (whichfork == XFS_COW_FORK) {
4370 error = xfs_refcount_alloc_cow_extent(mp, dfops,
4371 bma.blkno, bma.length);
4372 if (error)
4373 goto error0;
4374 }
4375 }
4376
4377 /* Deal with the allocated space we found. */
4378 xfs_bmapi_trim_map(mval, &bma.got, &bno, len, obno,
4379 end, n, flags);
4380
4381 /* Execute unwritten extent conversion if necessary */
4382 error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags);
4383 if (error == -EAGAIN)
4384 continue;
4385 if (error)
4386 goto error0;
4387
4388 /* update the extent map to return */
4389 xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4390
4391 /*
4392 * If we're done, stop now. Stop when we've allocated
4393 * XFS_BMAP_MAX_NMAP extents no matter what. Otherwise
4394 * the transaction may get too big.
4395 */
4396 if (bno >= end || n >= *nmap || bma.nallocs >= *nmap)
4397 break;
4398
4399 /* Else go on to the next record. */
4400 bma.prev = bma.got;
4401 if (!xfs_iext_get_extent(ifp, ++bma.idx, &bma.got))
4402 eof = true;
4403 }
4404 *nmap = n;
4405
4406 /*
4407 * Transform from btree to extents, give it cur.
4408 */
4409 if (xfs_bmap_wants_extents(ip, whichfork)) {
4410 int tmp_logflags = 0;
4411
4412 ASSERT(bma.cur);
4413 error = xfs_bmap_btree_to_extents(tp, ip, bma.cur,
4414 &tmp_logflags, whichfork);
4415 bma.logflags |= tmp_logflags;
4416 if (error)
4417 goto error0;
4418 }
4419
4420 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE ||
4421 XFS_IFORK_NEXTENTS(ip, whichfork) >
4422 XFS_IFORK_MAXEXT(ip, whichfork));
4423 error = 0;
4424 error0:
4425 /*
4426 * Log everything. Do this after conversion, there's no point in
4427 * logging the extent records if we've converted to btree format.
4428 */
4429 if ((bma.logflags & xfs_ilog_fext(whichfork)) &&
4430 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
4431 bma.logflags &= ~xfs_ilog_fext(whichfork);
4432 else if ((bma.logflags & xfs_ilog_fbroot(whichfork)) &&
4433 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
4434 bma.logflags &= ~xfs_ilog_fbroot(whichfork);
4435 /*
4436 * Log whatever the flags say, even if error. Otherwise we might miss
4437 * detecting a case where the data is changed, there's an error,
4438 * and it's not logged so we don't shutdown when we should.
4439 */
4440 if (bma.logflags)
4441 xfs_trans_log_inode(tp, ip, bma.logflags);
4442
4443 if (bma.cur) {
4444 if (!error) {
4445 ASSERT(*firstblock == NULLFSBLOCK ||
4446 XFS_FSB_TO_AGNO(mp, *firstblock) <=
4447 XFS_FSB_TO_AGNO(mp,
4448 bma.cur->bc_private.b.firstblock));
4449 *firstblock = bma.cur->bc_private.b.firstblock;
4450 }
4451 xfs_btree_del_cursor(bma.cur,
4452 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
4453 }
4454 if (!error)
4455 xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
4456 orig_nmap, *nmap);
4457 return error;
4458 }
4459
4460 static int
4461 xfs_bmapi_remap(
4462 struct xfs_trans *tp,
4463 struct xfs_inode *ip,
4464 xfs_fileoff_t bno,
4465 xfs_filblks_t len,
4466 xfs_fsblock_t startblock,
4467 struct xfs_defer_ops *dfops)
4468 {
4469 struct xfs_mount *mp = ip->i_mount;
4470 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
4471 struct xfs_btree_cur *cur = NULL;
4472 xfs_fsblock_t firstblock = NULLFSBLOCK;
4473 struct xfs_bmbt_irec got;
4474 xfs_extnum_t idx;
4475 int logflags = 0, error;
4476
4477 ASSERT(len > 0);
4478 ASSERT(len <= (xfs_filblks_t)MAXEXTLEN);
4479 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4480
4481 if (unlikely(XFS_TEST_ERROR(
4482 (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS &&
4483 XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE),
4484 mp, XFS_ERRTAG_BMAPIFORMAT))) {
4485 XFS_ERROR_REPORT("xfs_bmapi_remap", XFS_ERRLEVEL_LOW, mp);
4486 return -EFSCORRUPTED;
4487 }
4488
4489 if (XFS_FORCED_SHUTDOWN(mp))
4490 return -EIO;
4491
4492 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4493 error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
4494 if (error)
4495 return error;
4496 }
4497
4498 if (xfs_iext_lookup_extent(ip, ifp, bno, &idx, &got)) {
4499 /* make sure we only reflink into a hole. */
4500 ASSERT(got.br_startoff > bno);
4501 ASSERT(got.br_startoff - bno >= len);
4502 }
4503
4504 ip->i_d.di_nblocks += len;
4505 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
4506
4507 if (ifp->if_flags & XFS_IFBROOT) {
4508 cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
4509 cur->bc_private.b.firstblock = firstblock;
4510 cur->bc_private.b.dfops = dfops;
4511 cur->bc_private.b.flags = 0;
4512 }
4513
4514 got.br_startoff = bno;
4515 got.br_startblock = startblock;
4516 got.br_blockcount = len;
4517 got.br_state = XFS_EXT_NORM;
4518
4519 error = xfs_bmap_add_extent_hole_real(tp, ip, XFS_DATA_FORK, &idx, &cur,
4520 &got, &firstblock, dfops, &logflags);
4521 if (error)
4522 goto error0;
4523
4524 if (xfs_bmap_wants_extents(ip, XFS_DATA_FORK)) {
4525 int tmp_logflags = 0;
4526
4527 error = xfs_bmap_btree_to_extents(tp, ip, cur,
4528 &tmp_logflags, XFS_DATA_FORK);
4529 logflags |= tmp_logflags;
4530 }
4531
4532 error0:
4533 if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS)
4534 logflags &= ~XFS_ILOG_DEXT;
4535 else if (ip->i_d.di_format != XFS_DINODE_FMT_BTREE)
4536 logflags &= ~XFS_ILOG_DBROOT;
4537
4538 if (logflags)
4539 xfs_trans_log_inode(tp, ip, logflags);
4540 if (cur) {
4541 xfs_btree_del_cursor(cur,
4542 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
4543 }
4544 return error;
4545 }
4546
4547 /*
4548 * When a delalloc extent is split (e.g., due to a hole punch), the original
4549 * indlen reservation must be shared across the two new extents that are left
4550 * behind.
4551 *
4552 * Given the original reservation and the worst case indlen for the two new
4553 * extents (as calculated by xfs_bmap_worst_indlen()), split the original
4554 * reservation fairly across the two new extents. If necessary, steal available
4555 * blocks from a deleted extent to make up a reservation deficiency (e.g., if
4556 * ores == 1). The number of stolen blocks is returned. The availability and
4557 * subsequent accounting of stolen blocks is the responsibility of the caller.
4558 */
4559 static xfs_filblks_t
4560 xfs_bmap_split_indlen(
4561 xfs_filblks_t ores, /* original res. */
4562 xfs_filblks_t *indlen1, /* ext1 worst indlen */
4563 xfs_filblks_t *indlen2, /* ext2 worst indlen */
4564 xfs_filblks_t avail) /* stealable blocks */
4565 {
4566 xfs_filblks_t len1 = *indlen1;
4567 xfs_filblks_t len2 = *indlen2;
4568 xfs_filblks_t nres = len1 + len2; /* new total res. */
4569 xfs_filblks_t stolen = 0;
4570 xfs_filblks_t resfactor;
4571
4572 /*
4573 * Steal as many blocks as we can to try and satisfy the worst case
4574 * indlen for both new extents.
4575 */
4576 if (ores < nres && avail)
4577 stolen = XFS_FILBLKS_MIN(nres - ores, avail);
4578 ores += stolen;
4579
4580 /* nothing else to do if we've satisfied the new reservation */
4581 if (ores >= nres)
4582 return stolen;
4583
4584 /*
4585 * We can't meet the total required reservation for the two extents.
4586 * Calculate the percent of the overall shortage between both extents
4587 * and apply this percentage to each of the requested indlen values.
4588 * This distributes the shortage fairly and reduces the chances that one
4589 * of the two extents is left with nothing when extents are repeatedly
4590 * split.
4591 */
4592 resfactor = (ores * 100);
4593 do_div(resfactor, nres);
4594 len1 *= resfactor;
4595 do_div(len1, 100);
4596 len2 *= resfactor;
4597 do_div(len2, 100);
4598 ASSERT(len1 + len2 <= ores);
4599 ASSERT(len1 < *indlen1 && len2 < *indlen2);
4600
4601 /*
4602 * Hand out the remainder to each extent. If one of the two reservations
4603 * is zero, we want to make sure that one gets a block first. The loop
4604 * below starts with len1, so hand len2 a block right off the bat if it
4605 * is zero.
4606 */
4607 ores -= (len1 + len2);
4608 ASSERT((*indlen1 - len1) + (*indlen2 - len2) >= ores);
4609 if (ores && !len2 && *indlen2) {
4610 len2++;
4611 ores--;
4612 }
4613 while (ores) {
4614 if (len1 < *indlen1) {
4615 len1++;
4616 ores--;
4617 }
4618 if (!ores)
4619 break;
4620 if (len2 < *indlen2) {
4621 len2++;
4622 ores--;
4623 }
4624 }
4625
4626 *indlen1 = len1;
4627 *indlen2 = len2;
4628
4629 return stolen;
4630 }
4631
4632 int
4633 xfs_bmap_del_extent_delay(
4634 struct xfs_inode *ip,
4635 int whichfork,
4636 xfs_extnum_t *idx,
4637 struct xfs_bmbt_irec *got,
4638 struct xfs_bmbt_irec *del)
4639 {
4640 struct xfs_mount *mp = ip->i_mount;
4641 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
4642 struct xfs_bmbt_irec new;
4643 int64_t da_old, da_new, da_diff = 0;
4644 xfs_fileoff_t del_endoff, got_endoff;
4645 xfs_filblks_t got_indlen, new_indlen, stolen;
4646 int state = xfs_bmap_fork_to_state(whichfork);
4647 int error = 0;
4648 bool isrt;
4649
4650 XFS_STATS_INC(mp, xs_del_exlist);
4651
4652 isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
4653 del_endoff = del->br_startoff + del->br_blockcount;
4654 got_endoff = got->br_startoff + got->br_blockcount;
4655 da_old = startblockval(got->br_startblock);
4656 da_new = 0;
4657
4658 ASSERT(*idx >= 0);
4659 ASSERT(*idx <= xfs_iext_count(ifp));
4660 ASSERT(del->br_blockcount > 0);
4661 ASSERT(got->br_startoff <= del->br_startoff);
4662 ASSERT(got_endoff >= del_endoff);
4663
4664 if (isrt) {
4665 uint64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount);
4666
4667 do_div(rtexts, mp->m_sb.sb_rextsize);
4668 xfs_mod_frextents(mp, rtexts);
4669 }
4670
4671 /*
4672 * Update the inode delalloc counter now and wait to update the
4673 * sb counters as we might have to borrow some blocks for the
4674 * indirect block accounting.
4675 */
4676 error = xfs_trans_reserve_quota_nblks(NULL, ip,
4677 -((long)del->br_blockcount), 0,
4678 isrt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
4679 if (error)
4680 return error;
4681 ip->i_delayed_blks -= del->br_blockcount;
4682
4683 if (got->br_startoff == del->br_startoff)
4684 state |= BMAP_LEFT_FILLING;
4685 if (got_endoff == del_endoff)
4686 state |= BMAP_RIGHT_FILLING;
4687
4688 switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
4689 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
4690 /*
4691 * Matches the whole extent. Delete the entry.
4692 */
4693 xfs_iext_remove(ip, *idx, 1, state);
4694 --*idx;
4695 break;
4696 case BMAP_LEFT_FILLING:
4697 /*
4698 * Deleting the first part of the extent.
4699 */
4700 got->br_startoff = del_endoff;
4701 got->br_blockcount -= del->br_blockcount;
4702 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
4703 got->br_blockcount), da_old);
4704 got->br_startblock = nullstartblock((int)da_new);
4705 xfs_iext_update_extent(ip, state, *idx, got);
4706 break;
4707 case BMAP_RIGHT_FILLING:
4708 /*
4709 * Deleting the last part of the extent.
4710 */
4711 got->br_blockcount = got->br_blockcount - del->br_blockcount;
4712 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
4713 got->br_blockcount), da_old);
4714 got->br_startblock = nullstartblock((int)da_new);
4715 xfs_iext_update_extent(ip, state, *idx, got);
4716 break;
4717 case 0:
4718 /*
4719 * Deleting the middle of the extent.
4720 *
4721 * Distribute the original indlen reservation across the two new
4722 * extents. Steal blocks from the deleted extent if necessary.
4723 * Stealing blocks simply fudges the fdblocks accounting below.
4724 * Warn if either of the new indlen reservations is zero as this
4725 * can lead to delalloc problems.
4726 */
4727 got->br_blockcount = del->br_startoff - got->br_startoff;
4728 got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount);
4729
4730 new.br_blockcount = got_endoff - del_endoff;
4731 new_indlen = xfs_bmap_worst_indlen(ip, new.br_blockcount);
4732
4733 WARN_ON_ONCE(!got_indlen || !new_indlen);
4734 stolen = xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen,
4735 del->br_blockcount);
4736
4737 got->br_startblock = nullstartblock((int)got_indlen);
4738 xfs_iext_update_extent(ip, state, *idx, got);
4739
4740 new.br_startoff = del_endoff;
4741 new.br_state = got->br_state;
4742 new.br_startblock = nullstartblock((int)new_indlen);
4743
4744 ++*idx;
4745 xfs_iext_insert(ip, *idx, 1, &new, state);
4746
4747 da_new = got_indlen + new_indlen - stolen;
4748 del->br_blockcount -= stolen;
4749 break;
4750 }
4751
4752 ASSERT(da_old >= da_new);
4753 da_diff = da_old - da_new;
4754 if (!isrt)
4755 da_diff += del->br_blockcount;
4756 if (da_diff)
4757 xfs_mod_fdblocks(mp, da_diff, false);
4758 return error;
4759 }
4760
4761 void
4762 xfs_bmap_del_extent_cow(
4763 struct xfs_inode *ip,
4764 xfs_extnum_t *idx,
4765 struct xfs_bmbt_irec *got,
4766 struct xfs_bmbt_irec *del)
4767 {
4768 struct xfs_mount *mp = ip->i_mount;
4769 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
4770 struct xfs_bmbt_irec new;
4771 xfs_fileoff_t del_endoff, got_endoff;
4772 int state = BMAP_COWFORK;
4773
4774 XFS_STATS_INC(mp, xs_del_exlist);
4775
4776 del_endoff = del->br_startoff + del->br_blockcount;
4777 got_endoff = got->br_startoff + got->br_blockcount;
4778
4779 ASSERT(*idx >= 0);
4780 ASSERT(*idx <= xfs_iext_count(ifp));
4781 ASSERT(del->br_blockcount > 0);
4782 ASSERT(got->br_startoff <= del->br_startoff);
4783 ASSERT(got_endoff >= del_endoff);
4784 ASSERT(!isnullstartblock(got->br_startblock));
4785
4786 if (got->br_startoff == del->br_startoff)
4787 state |= BMAP_LEFT_FILLING;
4788 if (got_endoff == del_endoff)
4789 state |= BMAP_RIGHT_FILLING;
4790
4791 switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
4792 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
4793 /*
4794 * Matches the whole extent. Delete the entry.
4795 */
4796 xfs_iext_remove(ip, *idx, 1, state);
4797 --*idx;
4798 break;
4799 case BMAP_LEFT_FILLING:
4800 /*
4801 * Deleting the first part of the extent.
4802 */
4803 got->br_startoff = del_endoff;
4804 got->br_blockcount -= del->br_blockcount;
4805 got->br_startblock = del->br_startblock + del->br_blockcount;
4806 xfs_iext_update_extent(ip, state, *idx, got);
4807 break;
4808 case BMAP_RIGHT_FILLING:
4809 /*
4810 * Deleting the last part of the extent.
4811 */
4812 got->br_blockcount -= del->br_blockcount;
4813 xfs_iext_update_extent(ip, state, *idx, got);
4814 break;
4815 case 0:
4816 /*
4817 * Deleting the middle of the extent.
4818 */
4819 got->br_blockcount = del->br_startoff - got->br_startoff;
4820 xfs_iext_update_extent(ip, state, *idx, got);
4821
4822 new.br_startoff = del_endoff;
4823 new.br_blockcount = got_endoff - del_endoff;
4824 new.br_state = got->br_state;
4825 new.br_startblock = del->br_startblock + del->br_blockcount;
4826
4827 ++*idx;
4828 xfs_iext_insert(ip, *idx, 1, &new, state);
4829 break;
4830 }
4831 }
4832
4833 /*
4834 * Called by xfs_bmapi to update file extent records and the btree
4835 * after removing space.
4836 */
4837 STATIC int /* error */
4838 xfs_bmap_del_extent_real(
4839 xfs_inode_t *ip, /* incore inode pointer */
4840 xfs_trans_t *tp, /* current transaction pointer */
4841 xfs_extnum_t *idx, /* extent number to update/delete */
4842 struct xfs_defer_ops *dfops, /* list of extents to be freed */
4843 xfs_btree_cur_t *cur, /* if null, not a btree */
4844 xfs_bmbt_irec_t *del, /* data to remove from extents */
4845 int *logflagsp, /* inode logging flags */
4846 int whichfork, /* data or attr fork */
4847 int bflags) /* bmapi flags */
4848 {
4849 xfs_fsblock_t del_endblock=0; /* first block past del */
4850 xfs_fileoff_t del_endoff; /* first offset past del */
4851 int do_fx; /* free extent at end of routine */
4852 int error; /* error return value */
4853 int flags = 0;/* inode logging flags */
4854 struct xfs_bmbt_irec got; /* current extent entry */
4855 xfs_fileoff_t got_endoff; /* first offset past got */
4856 int i; /* temp state */
4857 xfs_ifork_t *ifp; /* inode fork pointer */
4858 xfs_mount_t *mp; /* mount structure */
4859 xfs_filblks_t nblks; /* quota/sb block count */
4860 xfs_bmbt_irec_t new; /* new record to be inserted */
4861 /* REFERENCED */
4862 uint qfield; /* quota field to update */
4863 int state = xfs_bmap_fork_to_state(whichfork);
4864 struct xfs_bmbt_irec old;
4865
4866 mp = ip->i_mount;
4867 XFS_STATS_INC(mp, xs_del_exlist);
4868
4869 ifp = XFS_IFORK_PTR(ip, whichfork);
4870 ASSERT((*idx >= 0) && (*idx < xfs_iext_count(ifp)));
4871 ASSERT(del->br_blockcount > 0);
4872 xfs_iext_get_extent(ifp, *idx, &got);
4873 ASSERT(got.br_startoff <= del->br_startoff);
4874 del_endoff = del->br_startoff + del->br_blockcount;
4875 got_endoff = got.br_startoff + got.br_blockcount;
4876 ASSERT(got_endoff >= del_endoff);
4877 ASSERT(!isnullstartblock(got.br_startblock));
4878 qfield = 0;
4879 error = 0;
4880
4881 /*
4882 * If it's the case where the directory code is running with no block
4883 * reservation, and the deleted block is in the middle of its extent,
4884 * and the resulting insert of an extent would cause transformation to
4885 * btree format, then reject it. The calling code will then swap blocks
4886 * around instead. We have to do this now, rather than waiting for the
4887 * conversion to btree format, since the transaction will be dirty then.
4888 */
4889 if (tp->t_blk_res == 0 &&
4890 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
4891 XFS_IFORK_NEXTENTS(ip, whichfork) >=
4892 XFS_IFORK_MAXEXT(ip, whichfork) &&
4893 del->br_startoff > got.br_startoff && del_endoff < got_endoff)
4894 return -ENOSPC;
4895
4896 flags = XFS_ILOG_CORE;
4897 if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
4898 xfs_fsblock_t bno;
4899 xfs_filblks_t len;
4900
4901 ASSERT(do_mod(del->br_blockcount, mp->m_sb.sb_rextsize) == 0);
4902 ASSERT(do_mod(del->br_startblock, mp->m_sb.sb_rextsize) == 0);
4903 bno = del->br_startblock;
4904 len = del->br_blockcount;
4905 do_div(bno, mp->m_sb.sb_rextsize);
4906 do_div(len, mp->m_sb.sb_rextsize);
4907 error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
4908 if (error)
4909 goto done;
4910 do_fx = 0;
4911 nblks = len * mp->m_sb.sb_rextsize;
4912 qfield = XFS_TRANS_DQ_RTBCOUNT;
4913 } else {
4914 do_fx = 1;
4915 nblks = del->br_blockcount;
4916 qfield = XFS_TRANS_DQ_BCOUNT;
4917 }
4918
4919 del_endblock = del->br_startblock + del->br_blockcount;
4920 if (cur) {
4921 error = xfs_bmbt_lookup_eq(cur, &got, &i);
4922 if (error)
4923 goto done;
4924 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
4925 }
4926
4927 if (got.br_startoff == del->br_startoff)
4928 state |= BMAP_LEFT_FILLING;
4929 if (got_endoff == del_endoff)
4930 state |= BMAP_RIGHT_FILLING;
4931
4932 switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) {
4933 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
4934 /*
4935 * Matches the whole extent. Delete the entry.
4936 */
4937 xfs_iext_remove(ip, *idx, 1, state);
4938 --*idx;
4939
4940 XFS_IFORK_NEXT_SET(ip, whichfork,
4941 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
4942 flags |= XFS_ILOG_CORE;
4943 if (!cur) {
4944 flags |= xfs_ilog_fext(whichfork);
4945 break;
4946 }
4947 if ((error = xfs_btree_delete(cur, &i)))
4948 goto done;
4949 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
4950 break;
4951 case BMAP_LEFT_FILLING:
4952 /*
4953 * Deleting the first part of the extent.
4954 */
4955 got.br_startoff = del_endoff;
4956 got.br_startblock = del_endblock;
4957 got.br_blockcount -= del->br_blockcount;
4958 xfs_iext_update_extent(ip, state, *idx, &got);
4959 if (!cur) {
4960 flags |= xfs_ilog_fext(whichfork);
4961 break;
4962 }
4963 error = xfs_bmbt_update(cur, &got);
4964 if (error)
4965 goto done;
4966 break;
4967 case BMAP_RIGHT_FILLING:
4968 /*
4969 * Deleting the last part of the extent.
4970 */
4971 got.br_blockcount -= del->br_blockcount;
4972 xfs_iext_update_extent(ip, state, *idx, &got);
4973 if (!cur) {
4974 flags |= xfs_ilog_fext(whichfork);
4975 break;
4976 }
4977 error = xfs_bmbt_update(cur, &got);
4978 if (error)
4979 goto done;
4980 break;
4981 case 0:
4982 /*
4983 * Deleting the middle of the extent.
4984 */
4985 old = got;
4986
4987 got.br_blockcount = del->br_startoff - got.br_startoff;
4988 xfs_iext_update_extent(ip, state, *idx, &got);
4989
4990 new.br_startoff = del_endoff;
4991 new.br_blockcount = got_endoff - del_endoff;
4992 new.br_state = got.br_state;
4993 new.br_startblock = del_endblock;
4994
4995 flags |= XFS_ILOG_CORE;
4996 if (cur) {
4997 error = xfs_bmbt_update(cur, &got);
4998 if (error)
4999 goto done;
5000 error = xfs_btree_increment(cur, 0, &i);
5001 if (error)
5002 goto done;
5003 cur->bc_rec.b = new;
5004 error = xfs_btree_insert(cur, &i);
5005 if (error && error != -ENOSPC)
5006 goto done;
5007 /*
5008 * If get no-space back from btree insert, it tried a
5009 * split, and we have a zero block reservation. Fix up
5010 * our state and return the error.
5011 */
5012 if (error == -ENOSPC) {
5013 /*
5014 * Reset the cursor, don't trust it after any
5015 * insert operation.
5016 */
5017 error = xfs_bmbt_lookup_eq(cur, &got, &i);
5018 if (error)
5019 goto done;
5020 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
5021 /*
5022 * Update the btree record back
5023 * to the original value.
5024 */
5025 error = xfs_bmbt_update(cur, &old);
5026 if (error)
5027 goto done;
5028 /*
5029 * Reset the extent record back
5030 * to the original value.
5031 */
5032 xfs_iext_update_extent(ip, state, *idx, &old);
5033 flags = 0;
5034 error = -ENOSPC;
5035 goto done;
5036 }
5037 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
5038 } else
5039 flags |= xfs_ilog_fext(whichfork);
5040 XFS_IFORK_NEXT_SET(ip, whichfork,
5041 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
5042 xfs_iext_insert(ip, *idx + 1, 1, &new, state);
5043 ++*idx;
5044 break;
5045 }
5046
5047 /* remove reverse mapping */
5048 error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, del);
5049 if (error)
5050 goto done;
5051
5052 /*
5053 * If we need to, add to list of extents to delete.
5054 */
5055 if (do_fx && !(bflags & XFS_BMAPI_REMAP)) {
5056 if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) {
5057 error = xfs_refcount_decrease_extent(mp, dfops, del);
5058 if (error)
5059 goto done;
5060 } else
5061 xfs_bmap_add_free(mp, dfops, del->br_startblock,
5062 del->br_blockcount, NULL);
5063 }
5064
5065 /*
5066 * Adjust inode # blocks in the file.
5067 */
5068 if (nblks)
5069 ip->i_d.di_nblocks -= nblks;
5070 /*
5071 * Adjust quota data.
5072 */
5073 if (qfield && !(bflags & XFS_BMAPI_REMAP))
5074 xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
5075
5076 done:
5077 *logflagsp = flags;
5078 return error;
5079 }
5080
5081 /*
5082 * Unmap (remove) blocks from a file.
5083 * If nexts is nonzero then the number of extents to remove is limited to
5084 * that value. If not all extents in the block range can be removed then
5085 * *done is set.
5086 */
5087 int /* error */
5088 __xfs_bunmapi(
5089 xfs_trans_t *tp, /* transaction pointer */
5090 struct xfs_inode *ip, /* incore inode */
5091 xfs_fileoff_t start, /* first file offset deleted */
5092 xfs_filblks_t *rlen, /* i/o: amount remaining */
5093 int flags, /* misc flags */
5094 xfs_extnum_t nexts, /* number of extents max */
5095 xfs_fsblock_t *firstblock, /* first allocated block
5096 controls a.g. for allocs */
5097 struct xfs_defer_ops *dfops) /* i/o: deferred updates */
5098 {
5099 xfs_btree_cur_t *cur; /* bmap btree cursor */
5100 xfs_bmbt_irec_t del; /* extent being deleted */
5101 int error; /* error return value */
5102 xfs_extnum_t extno; /* extent number in list */
5103 xfs_bmbt_irec_t got; /* current extent record */
5104 xfs_ifork_t *ifp; /* inode fork pointer */
5105 int isrt; /* freeing in rt area */
5106 xfs_extnum_t lastx; /* last extent index used */
5107 int logflags; /* transaction logging flags */
5108 xfs_extlen_t mod; /* rt extent offset */
5109 xfs_mount_t *mp; /* mount structure */
5110 int tmp_logflags; /* partial logging flags */
5111 int wasdel; /* was a delayed alloc extent */
5112 int whichfork; /* data or attribute fork */
5113 xfs_fsblock_t sum;
5114 xfs_filblks_t len = *rlen; /* length to unmap in file */
5115 xfs_fileoff_t max_len;
5116 xfs_agnumber_t prev_agno = NULLAGNUMBER, agno;
5117 xfs_fileoff_t end;
5118
5119 trace_xfs_bunmap(ip, start, len, flags, _RET_IP_);
5120
5121 whichfork = xfs_bmapi_whichfork(flags);
5122 ASSERT(whichfork != XFS_COW_FORK);
5123 ifp = XFS_IFORK_PTR(ip, whichfork);
5124 if (unlikely(
5125 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5126 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
5127 XFS_ERROR_REPORT("xfs_bunmapi", XFS_ERRLEVEL_LOW,
5128 ip->i_mount);
5129 return -EFSCORRUPTED;
5130 }
5131 mp = ip->i_mount;
5132 if (XFS_FORCED_SHUTDOWN(mp))
5133 return -EIO;
5134
5135 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5136 ASSERT(len > 0);
5137 ASSERT(nexts >= 0);
5138
5139 /*
5140 * Guesstimate how many blocks we can unmap without running the risk of
5141 * blowing out the transaction with a mix of EFIs and reflink
5142 * adjustments.
5143 */
5144 if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
5145 max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res));
5146 else
5147 max_len = len;
5148
5149 if (!(ifp->if_flags & XFS_IFEXTENTS) &&
5150 (error = xfs_iread_extents(tp, ip, whichfork)))
5151 return error;
5152 if (xfs_iext_count(ifp) == 0) {
5153 *rlen = 0;
5154 return 0;
5155 }
5156 XFS_STATS_INC(mp, xs_blk_unmap);
5157 isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
5158 end = start + len - 1;
5159
5160 /*
5161 * Check to see if the given block number is past the end of the
5162 * file, back up to the last block if so...
5163 */
5164 if (!xfs_iext_lookup_extent(ip, ifp, end, &lastx, &got)) {
5165 ASSERT(lastx > 0);
5166 xfs_iext_get_extent(ifp, --lastx, &got);
5167 end = got.br_startoff + got.br_blockcount - 1;
5168 }
5169
5170 logflags = 0;
5171 if (ifp->if_flags & XFS_IFBROOT) {
5172 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
5173 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5174 cur->bc_private.b.firstblock = *firstblock;
5175 cur->bc_private.b.dfops = dfops;
5176 cur->bc_private.b.flags = 0;
5177 } else
5178 cur = NULL;
5179
5180 if (isrt) {
5181 /*
5182 * Synchronize by locking the bitmap inode.
5183 */
5184 xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP);
5185 xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
5186 xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM);
5187 xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
5188 }
5189
5190 extno = 0;
5191 while (end != (xfs_fileoff_t)-1 && end >= start && lastx >= 0 &&
5192 (nexts == 0 || extno < nexts) && max_len > 0) {
5193 /*
5194 * Is the found extent after a hole in which end lives?
5195 * Just back up to the previous extent, if so.
5196 */
5197 if (got.br_startoff > end) {
5198 if (--lastx < 0)
5199 break;
5200 xfs_iext_get_extent(ifp, lastx, &got);
5201 }
5202 /*
5203 * Is the last block of this extent before the range
5204 * we're supposed to delete? If so, we're done.
5205 */
5206 end = XFS_FILEOFF_MIN(end,
5207 got.br_startoff + got.br_blockcount - 1);
5208 if (end < start)
5209 break;
5210 /*
5211 * Then deal with the (possibly delayed) allocated space
5212 * we found.
5213 */
5214 del = got;
5215 wasdel = isnullstartblock(del.br_startblock);
5216
5217 /*
5218 * Make sure we don't touch multiple AGF headers out of order
5219 * in a single transaction, as that could cause AB-BA deadlocks.
5220 */
5221 if (!wasdel) {
5222 agno = XFS_FSB_TO_AGNO(mp, del.br_startblock);
5223 if (prev_agno != NULLAGNUMBER && prev_agno > agno)
5224 break;
5225 prev_agno = agno;
5226 }
5227 if (got.br_startoff < start) {
5228 del.br_startoff = start;
5229 del.br_blockcount -= start - got.br_startoff;
5230 if (!wasdel)
5231 del.br_startblock += start - got.br_startoff;
5232 }
5233 if (del.br_startoff + del.br_blockcount > end + 1)
5234 del.br_blockcount = end + 1 - del.br_startoff;
5235
5236 /* How much can we safely unmap? */
5237 if (max_len < del.br_blockcount) {
5238 del.br_startoff += del.br_blockcount - max_len;
5239 if (!wasdel)
5240 del.br_startblock += del.br_blockcount - max_len;
5241 del.br_blockcount = max_len;
5242 }
5243
5244 sum = del.br_startblock + del.br_blockcount;
5245 if (isrt &&
5246 (mod = do_mod(sum, mp->m_sb.sb_rextsize))) {
5247 /*
5248 * Realtime extent not lined up at the end.
5249 * The extent could have been split into written
5250 * and unwritten pieces, or we could just be
5251 * unmapping part of it. But we can't really
5252 * get rid of part of a realtime extent.
5253 */
5254 if (del.br_state == XFS_EXT_UNWRITTEN ||
5255 !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
5256 /*
5257 * This piece is unwritten, or we're not
5258 * using unwritten extents. Skip over it.
5259 */
5260 ASSERT(end >= mod);
5261 end -= mod > del.br_blockcount ?
5262 del.br_blockcount : mod;
5263 if (end < got.br_startoff) {
5264 if (--lastx >= 0)
5265 xfs_iext_get_extent(ifp, lastx,
5266 &got);
5267 }
5268 continue;
5269 }
5270 /*
5271 * It's written, turn it unwritten.
5272 * This is better than zeroing it.
5273 */
5274 ASSERT(del.br_state == XFS_EXT_NORM);
5275 ASSERT(tp->t_blk_res > 0);
5276 /*
5277 * If this spans a realtime extent boundary,
5278 * chop it back to the start of the one we end at.
5279 */
5280 if (del.br_blockcount > mod) {
5281 del.br_startoff += del.br_blockcount - mod;
5282 del.br_startblock += del.br_blockcount - mod;
5283 del.br_blockcount = mod;
5284 }
5285 del.br_state = XFS_EXT_UNWRITTEN;
5286 error = xfs_bmap_add_extent_unwritten_real(tp, ip,
5287 whichfork, &lastx, &cur, &del,
5288 firstblock, dfops, &logflags);
5289 if (error)
5290 goto error0;
5291 goto nodelete;
5292 }
5293 if (isrt && (mod = do_mod(del.br_startblock, mp->m_sb.sb_rextsize))) {
5294 /*
5295 * Realtime extent is lined up at the end but not
5296 * at the front. We'll get rid of full extents if
5297 * we can.
5298 */
5299 mod = mp->m_sb.sb_rextsize - mod;
5300 if (del.br_blockcount > mod) {
5301 del.br_blockcount -= mod;
5302 del.br_startoff += mod;
5303 del.br_startblock += mod;
5304 } else if ((del.br_startoff == start &&
5305 (del.br_state == XFS_EXT_UNWRITTEN ||
5306 tp->t_blk_res == 0)) ||
5307 !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
5308 /*
5309 * Can't make it unwritten. There isn't
5310 * a full extent here so just skip it.
5311 */
5312 ASSERT(end >= del.br_blockcount);
5313 end -= del.br_blockcount;
5314 if (got.br_startoff > end && --lastx >= 0)
5315 xfs_iext_get_extent(ifp, lastx, &got);
5316 continue;
5317 } else if (del.br_state == XFS_EXT_UNWRITTEN) {
5318 struct xfs_bmbt_irec prev;
5319
5320 /*
5321 * This one is already unwritten.
5322 * It must have a written left neighbor.
5323 * Unwrite the killed part of that one and
5324 * try again.
5325 */
5326 ASSERT(lastx > 0);
5327 xfs_iext_get_extent(ifp, lastx - 1, &prev);
5328 ASSERT(prev.br_state == XFS_EXT_NORM);
5329 ASSERT(!isnullstartblock(prev.br_startblock));
5330 ASSERT(del.br_startblock ==
5331 prev.br_startblock + prev.br_blockcount);
5332 if (prev.br_startoff < start) {
5333 mod = start - prev.br_startoff;
5334 prev.br_blockcount -= mod;
5335 prev.br_startblock += mod;
5336 prev.br_startoff = start;
5337 }
5338 prev.br_state = XFS_EXT_UNWRITTEN;
5339 lastx--;
5340 error = xfs_bmap_add_extent_unwritten_real(tp,
5341 ip, whichfork, &lastx, &cur,
5342 &prev, firstblock, dfops,
5343 &logflags);
5344 if (error)
5345 goto error0;
5346 goto nodelete;
5347 } else {
5348 ASSERT(del.br_state == XFS_EXT_NORM);
5349 del.br_state = XFS_EXT_UNWRITTEN;
5350 error = xfs_bmap_add_extent_unwritten_real(tp,
5351 ip, whichfork, &lastx, &cur,
5352 &del, firstblock, dfops,
5353 &logflags);
5354 if (error)
5355 goto error0;
5356 goto nodelete;
5357 }
5358 }
5359
5360 if (wasdel) {
5361 error = xfs_bmap_del_extent_delay(ip, whichfork, &lastx,
5362 &got, &del);
5363 } else {
5364 error = xfs_bmap_del_extent_real(ip, tp, &lastx, dfops,
5365 cur, &del, &tmp_logflags, whichfork,
5366 flags);
5367 logflags |= tmp_logflags;
5368 }
5369
5370 if (error)
5371 goto error0;
5372
5373 max_len -= del.br_blockcount;
5374 end = del.br_startoff - 1;
5375 nodelete:
5376 /*
5377 * If not done go on to the next (previous) record.
5378 */
5379 if (end != (xfs_fileoff_t)-1 && end >= start) {
5380 if (lastx >= 0) {
5381 xfs_iext_get_extent(ifp, lastx, &got);
5382 if (got.br_startoff > end && --lastx >= 0)
5383 xfs_iext_get_extent(ifp, lastx, &got);
5384 }
5385 extno++;
5386 }
5387 }
5388 if (end == (xfs_fileoff_t)-1 || end < start || lastx < 0)
5389 *rlen = 0;
5390 else
5391 *rlen = end - start + 1;
5392
5393 /*
5394 * Convert to a btree if necessary.
5395 */
5396 if (xfs_bmap_needs_btree(ip, whichfork)) {
5397 ASSERT(cur == NULL);
5398 error = xfs_bmap_extents_to_btree(tp, ip, firstblock, dfops,
5399 &cur, 0, &tmp_logflags, whichfork);
5400 logflags |= tmp_logflags;
5401 if (error)
5402 goto error0;
5403 }
5404 /*
5405 * transform from btree to extents, give it cur
5406 */
5407 else if (xfs_bmap_wants_extents(ip, whichfork)) {
5408 ASSERT(cur != NULL);
5409 error = xfs_bmap_btree_to_extents(tp, ip, cur, &tmp_logflags,
5410 whichfork);
5411 logflags |= tmp_logflags;
5412 if (error)
5413 goto error0;
5414 }
5415 /*
5416 * transform from extents to local?
5417 */
5418 error = 0;
5419 error0:
5420 /*
5421 * Log everything. Do this after conversion, there's no point in
5422 * logging the extent records if we've converted to btree format.
5423 */
5424 if ((logflags & xfs_ilog_fext(whichfork)) &&
5425 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
5426 logflags &= ~xfs_ilog_fext(whichfork);
5427 else if ((logflags & xfs_ilog_fbroot(whichfork)) &&
5428 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
5429 logflags &= ~xfs_ilog_fbroot(whichfork);
5430 /*
5431 * Log inode even in the error case, if the transaction
5432 * is dirty we'll need to shut down the filesystem.
5433 */
5434 if (logflags)
5435 xfs_trans_log_inode(tp, ip, logflags);
5436 if (cur) {
5437 if (!error) {
5438 *firstblock = cur->bc_private.b.firstblock;
5439 cur->bc_private.b.allocated = 0;
5440 }
5441 xfs_btree_del_cursor(cur,
5442 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5443 }
5444 return error;
5445 }
5446
5447 /* Unmap a range of a file. */
5448 int
5449 xfs_bunmapi(
5450 xfs_trans_t *tp,
5451 struct xfs_inode *ip,
5452 xfs_fileoff_t bno,
5453 xfs_filblks_t len,
5454 int flags,
5455 xfs_extnum_t nexts,
5456 xfs_fsblock_t *firstblock,
5457 struct xfs_defer_ops *dfops,
5458 int *done)
5459 {
5460 int error;
5461
5462 error = __xfs_bunmapi(tp, ip, bno, &len, flags, nexts, firstblock,
5463 dfops);
5464 *done = (len == 0);
5465 return error;
5466 }
5467
5468 /*
5469 * Determine whether an extent shift can be accomplished by a merge with the
5470 * extent that precedes the target hole of the shift.
5471 */
5472 STATIC bool
5473 xfs_bmse_can_merge(
5474 struct xfs_bmbt_irec *left, /* preceding extent */
5475 struct xfs_bmbt_irec *got, /* current extent to shift */
5476 xfs_fileoff_t shift) /* shift fsb */
5477 {
5478 xfs_fileoff_t startoff;
5479
5480 startoff = got->br_startoff - shift;
5481
5482 /*
5483 * The extent, once shifted, must be adjacent in-file and on-disk with
5484 * the preceding extent.
5485 */
5486 if ((left->br_startoff + left->br_blockcount != startoff) ||
5487 (left->br_startblock + left->br_blockcount != got->br_startblock) ||
5488 (left->br_state != got->br_state) ||
5489 (left->br_blockcount + got->br_blockcount > MAXEXTLEN))
5490 return false;
5491
5492 return true;
5493 }
5494
5495 /*
5496 * A bmap extent shift adjusts the file offset of an extent to fill a preceding
5497 * hole in the file. If an extent shift would result in the extent being fully
5498 * adjacent to the extent that currently precedes the hole, we can merge with
5499 * the preceding extent rather than do the shift.
5500 *
5501 * This function assumes the caller has verified a shift-by-merge is possible
5502 * with the provided extents via xfs_bmse_can_merge().
5503 */
5504 STATIC int
5505 xfs_bmse_merge(
5506 struct xfs_inode *ip,
5507 int whichfork,
5508 xfs_fileoff_t shift, /* shift fsb */
5509 int current_ext, /* idx of gotp */
5510 struct xfs_bmbt_irec *got, /* extent to shift */
5511 struct xfs_bmbt_irec *left, /* preceding extent */
5512 struct xfs_btree_cur *cur,
5513 int *logflags, /* output */
5514 struct xfs_defer_ops *dfops)
5515 {
5516 struct xfs_bmbt_irec new;
5517 xfs_filblks_t blockcount;
5518 int error, i;
5519 struct xfs_mount *mp = ip->i_mount;
5520
5521 blockcount = left->br_blockcount + got->br_blockcount;
5522
5523 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5524 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5525 ASSERT(xfs_bmse_can_merge(left, got, shift));
5526
5527 new = *left;
5528 new.br_blockcount = blockcount;
5529
5530 /*
5531 * Update the on-disk extent count, the btree if necessary and log the
5532 * inode.
5533 */
5534 XFS_IFORK_NEXT_SET(ip, whichfork,
5535 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
5536 *logflags |= XFS_ILOG_CORE;
5537 if (!cur) {
5538 *logflags |= XFS_ILOG_DEXT;
5539 goto done;
5540 }
5541
5542 /* lookup and remove the extent to merge */
5543 error = xfs_bmbt_lookup_eq(cur, got, &i);
5544 if (error)
5545 return error;
5546 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5547
5548 error = xfs_btree_delete(cur, &i);
5549 if (error)
5550 return error;
5551 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5552
5553 /* lookup and update size of the previous extent */
5554 error = xfs_bmbt_lookup_eq(cur, left, &i);
5555 if (error)
5556 return error;
5557 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5558
5559 error = xfs_bmbt_update(cur, &new);
5560 if (error)
5561 return error;
5562
5563 done:
5564 xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork),
5565 current_ext - 1, &new);
5566 xfs_iext_remove(ip, current_ext, 1, 0);
5567
5568 /* update reverse mapping. rmap functions merge the rmaps for us */
5569 error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, got);
5570 if (error)
5571 return error;
5572 memcpy(&new, got, sizeof(new));
5573 new.br_startoff = left->br_startoff + left->br_blockcount;
5574 return xfs_rmap_map_extent(mp, dfops, ip, whichfork, &new);
5575 }
5576
5577 /*
5578 * Shift a single extent.
5579 */
5580 STATIC int
5581 xfs_bmse_shift_one(
5582 struct xfs_inode *ip,
5583 int whichfork,
5584 xfs_fileoff_t offset_shift_fsb,
5585 int *current_ext,
5586 struct xfs_bmbt_irec *got,
5587 struct xfs_btree_cur *cur,
5588 int *logflags,
5589 enum shift_direction direction,
5590 struct xfs_defer_ops *dfops)
5591 {
5592 struct xfs_ifork *ifp;
5593 struct xfs_mount *mp;
5594 xfs_fileoff_t startoff;
5595 struct xfs_bmbt_irec adj_irec, new;
5596 int error;
5597 int i;
5598 int total_extents;
5599
5600 mp = ip->i_mount;
5601 ifp = XFS_IFORK_PTR(ip, whichfork);
5602 total_extents = xfs_iext_count(ifp);
5603
5604 /* delalloc extents should be prevented by caller */
5605 XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got->br_startblock));
5606
5607 if (direction == SHIFT_LEFT) {
5608 startoff = got->br_startoff - offset_shift_fsb;
5609
5610 /*
5611 * Check for merge if we've got an extent to the left,
5612 * otherwise make sure there's enough room at the start
5613 * of the file for the shift.
5614 */
5615 if (!*current_ext) {
5616 if (got->br_startoff < offset_shift_fsb)
5617 return -EINVAL;
5618 goto update_current_ext;
5619 }
5620
5621 /*
5622 * grab the left extent and check for a large enough hole.
5623 */
5624 xfs_iext_get_extent(ifp, *current_ext - 1, &adj_irec);
5625 if (startoff < adj_irec.br_startoff + adj_irec.br_blockcount)
5626 return -EINVAL;
5627
5628 /* check whether to merge the extent or shift it down */
5629 if (xfs_bmse_can_merge(&adj_irec, got, offset_shift_fsb)) {
5630 return xfs_bmse_merge(ip, whichfork, offset_shift_fsb,
5631 *current_ext, got, &adj_irec,
5632 cur, logflags, dfops);
5633 }
5634 } else {
5635 startoff = got->br_startoff + offset_shift_fsb;
5636 /* nothing to move if this is the last extent */
5637 if (*current_ext >= (total_extents - 1))
5638 goto update_current_ext;
5639
5640 /*
5641 * If this is not the last extent in the file, make sure there
5642 * is enough room between current extent and next extent for
5643 * accommodating the shift.
5644 */
5645 xfs_iext_get_extent(ifp, *current_ext + 1, &adj_irec);
5646 if (startoff + got->br_blockcount > adj_irec.br_startoff)
5647 return -EINVAL;
5648
5649 /*
5650 * Unlike a left shift (which involves a hole punch),
5651 * a right shift does not modify extent neighbors
5652 * in any way. We should never find mergeable extents
5653 * in this scenario. Check anyways and warn if we
5654 * encounter two extents that could be one.
5655 */
5656 if (xfs_bmse_can_merge(got, &adj_irec, offset_shift_fsb))
5657 WARN_ON_ONCE(1);
5658 }
5659
5660 /*
5661 * Increment the extent index for the next iteration, update the start
5662 * offset of the in-core extent and update the btree if applicable.
5663 */
5664 update_current_ext:
5665 *logflags |= XFS_ILOG_CORE;
5666
5667 new = *got;
5668 new.br_startoff = startoff;
5669
5670 if (cur) {
5671 error = xfs_bmbt_lookup_eq(cur, got, &i);
5672 if (error)
5673 return error;
5674 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5675
5676 error = xfs_bmbt_update(cur, &new);
5677 if (error)
5678 return error;
5679 } else {
5680 *logflags |= XFS_ILOG_DEXT;
5681 }
5682
5683 xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork),
5684 *current_ext, &new);
5685
5686 if (direction == SHIFT_LEFT)
5687 (*current_ext)++;
5688 else
5689 (*current_ext)--;
5690
5691 /* update reverse mapping */
5692 error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, got);
5693 if (error)
5694 return error;
5695 return xfs_rmap_map_extent(mp, dfops, ip, whichfork, &new);
5696 }
5697
5698 /*
5699 * Shift extent records to the left/right to cover/create a hole.
5700 *
5701 * The maximum number of extents to be shifted in a single operation is
5702 * @num_exts. @stop_fsb specifies the file offset at which to stop shift and the
5703 * file offset where we've left off is returned in @next_fsb. @offset_shift_fsb
5704 * is the length by which each extent is shifted. If there is no hole to shift
5705 * the extents into, this will be considered invalid operation and we abort
5706 * immediately.
5707 */
5708 int
5709 xfs_bmap_shift_extents(
5710 struct xfs_trans *tp,
5711 struct xfs_inode *ip,
5712 xfs_fileoff_t *next_fsb,
5713 xfs_fileoff_t offset_shift_fsb,
5714 int *done,
5715 xfs_fileoff_t stop_fsb,
5716 xfs_fsblock_t *firstblock,
5717 struct xfs_defer_ops *dfops,
5718 enum shift_direction direction,
5719 int num_exts)
5720 {
5721 struct xfs_btree_cur *cur = NULL;
5722 struct xfs_bmbt_irec got;
5723 struct xfs_mount *mp = ip->i_mount;
5724 struct xfs_ifork *ifp;
5725 xfs_extnum_t nexts = 0;
5726 xfs_extnum_t current_ext;
5727 xfs_extnum_t total_extents;
5728 xfs_extnum_t stop_extent;
5729 int error = 0;
5730 int whichfork = XFS_DATA_FORK;
5731 int logflags = 0;
5732
5733 if (unlikely(XFS_TEST_ERROR(
5734 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5735 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
5736 mp, XFS_ERRTAG_BMAPIFORMAT))) {
5737 XFS_ERROR_REPORT("xfs_bmap_shift_extents",
5738 XFS_ERRLEVEL_LOW, mp);
5739 return -EFSCORRUPTED;
5740 }
5741
5742 if (XFS_FORCED_SHUTDOWN(mp))
5743 return -EIO;
5744
5745 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5746 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5747 ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT);
5748
5749 ifp = XFS_IFORK_PTR(ip, whichfork);
5750 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5751 /* Read in all the extents */
5752 error = xfs_iread_extents(tp, ip, whichfork);
5753 if (error)
5754 return error;
5755 }
5756
5757 if (ifp->if_flags & XFS_IFBROOT) {
5758 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5759 cur->bc_private.b.firstblock = *firstblock;
5760 cur->bc_private.b.dfops = dfops;
5761 cur->bc_private.b.flags = 0;
5762 }
5763
5764 /*
5765 * There may be delalloc extents in the data fork before the range we
5766 * are collapsing out, so we cannot use the count of real extents here.
5767 * Instead we have to calculate it from the incore fork.
5768 */
5769 total_extents = xfs_iext_count(ifp);
5770 if (total_extents == 0) {
5771 *done = 1;
5772 goto del_cursor;
5773 }
5774
5775 /*
5776 * In case of first right shift, we need to initialize next_fsb
5777 */
5778 if (*next_fsb == NULLFSBLOCK) {
5779 ASSERT(direction == SHIFT_RIGHT);
5780
5781 current_ext = total_extents - 1;
5782 xfs_iext_get_extent(ifp, current_ext, &got);
5783 if (stop_fsb > got.br_startoff) {
5784 *done = 1;
5785 goto del_cursor;
5786 }
5787 *next_fsb = got.br_startoff;
5788 } else {
5789 /*
5790 * Look up the extent index for the fsb where we start shifting. We can
5791 * henceforth iterate with current_ext as extent list changes are locked
5792 * out via ilock.
5793 *
5794 * If next_fsb lies in a hole beyond which there are no extents we are
5795 * done.
5796 */
5797 if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &current_ext,
5798 &got)) {
5799 *done = 1;
5800 goto del_cursor;
5801 }
5802 }
5803
5804 /* Lookup the extent index at which we have to stop */
5805 if (direction == SHIFT_RIGHT) {
5806 struct xfs_bmbt_irec s;
5807
5808 xfs_iext_lookup_extent(ip, ifp, stop_fsb, &stop_extent, &s);
5809 /* Make stop_extent exclusive of shift range */
5810 stop_extent--;
5811 if (current_ext <= stop_extent) {
5812 error = -EIO;
5813 goto del_cursor;
5814 }
5815 } else {
5816 stop_extent = total_extents;
5817 if (current_ext >= stop_extent) {
5818 error = -EIO;
5819 goto del_cursor;
5820 }
5821 }
5822
5823 while (nexts++ < num_exts) {
5824 error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb,
5825 &current_ext, &got, cur, &logflags,
5826 direction, dfops);
5827 if (error)
5828 goto del_cursor;
5829 /*
5830 * If there was an extent merge during the shift, the extent
5831 * count can change. Update the total and grade the next record.
5832 */
5833 if (direction == SHIFT_LEFT) {
5834 total_extents = xfs_iext_count(ifp);
5835 stop_extent = total_extents;
5836 }
5837
5838 if (current_ext == stop_extent) {
5839 *done = 1;
5840 *next_fsb = NULLFSBLOCK;
5841 break;
5842 }
5843 xfs_iext_get_extent(ifp, current_ext, &got);
5844 }
5845
5846 if (!*done)
5847 *next_fsb = got.br_startoff;
5848
5849 del_cursor:
5850 if (cur)
5851 xfs_btree_del_cursor(cur,
5852 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5853
5854 if (logflags)
5855 xfs_trans_log_inode(tp, ip, logflags);
5856
5857 return error;
5858 }
5859
5860 /*
5861 * Splits an extent into two extents at split_fsb block such that it is
5862 * the first block of the current_ext. @current_ext is a target extent
5863 * to be split. @split_fsb is a block where the extents is split.
5864 * If split_fsb lies in a hole or the first block of extents, just return 0.
5865 */
5866 STATIC int
5867 xfs_bmap_split_extent_at(
5868 struct xfs_trans *tp,
5869 struct xfs_inode *ip,
5870 xfs_fileoff_t split_fsb,
5871 xfs_fsblock_t *firstfsb,
5872 struct xfs_defer_ops *dfops)
5873 {
5874 int whichfork = XFS_DATA_FORK;
5875 struct xfs_btree_cur *cur = NULL;
5876 struct xfs_bmbt_irec got;
5877 struct xfs_bmbt_irec new; /* split extent */
5878 struct xfs_mount *mp = ip->i_mount;
5879 struct xfs_ifork *ifp;
5880 xfs_fsblock_t gotblkcnt; /* new block count for got */
5881 xfs_extnum_t current_ext;
5882 int error = 0;
5883 int logflags = 0;
5884 int i = 0;
5885
5886 if (unlikely(XFS_TEST_ERROR(
5887 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5888 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
5889 mp, XFS_ERRTAG_BMAPIFORMAT))) {
5890 XFS_ERROR_REPORT("xfs_bmap_split_extent_at",
5891 XFS_ERRLEVEL_LOW, mp);
5892 return -EFSCORRUPTED;
5893 }
5894
5895 if (XFS_FORCED_SHUTDOWN(mp))
5896 return -EIO;
5897
5898 ifp = XFS_IFORK_PTR(ip, whichfork);
5899 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5900 /* Read in all the extents */
5901 error = xfs_iread_extents(tp, ip, whichfork);
5902 if (error)
5903 return error;
5904 }
5905
5906 /*
5907 * If there are not extents, or split_fsb lies in a hole we are done.
5908 */
5909 if (!xfs_iext_lookup_extent(ip, ifp, split_fsb, &current_ext, &got) ||
5910 got.br_startoff >= split_fsb)
5911 return 0;
5912
5913 gotblkcnt = split_fsb - got.br_startoff;
5914 new.br_startoff = split_fsb;
5915 new.br_startblock = got.br_startblock + gotblkcnt;
5916 new.br_blockcount = got.br_blockcount - gotblkcnt;
5917 new.br_state = got.br_state;
5918
5919 if (ifp->if_flags & XFS_IFBROOT) {
5920 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5921 cur->bc_private.b.firstblock = *firstfsb;
5922 cur->bc_private.b.dfops = dfops;
5923 cur->bc_private.b.flags = 0;
5924 error = xfs_bmbt_lookup_eq(cur, &got, &i);
5925 if (error)
5926 goto del_cursor;
5927 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
5928 }
5929
5930 got.br_blockcount = gotblkcnt;
5931 xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork),
5932 current_ext, &got);
5933
5934 logflags = XFS_ILOG_CORE;
5935 if (cur) {
5936 error = xfs_bmbt_update(cur, &got);
5937 if (error)
5938 goto del_cursor;
5939 } else
5940 logflags |= XFS_ILOG_DEXT;
5941
5942 /* Add new extent */
5943 current_ext++;
5944 xfs_iext_insert(ip, current_ext, 1, &new, 0);
5945 XFS_IFORK_NEXT_SET(ip, whichfork,
5946 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
5947
5948 if (cur) {
5949 error = xfs_bmbt_lookup_eq(cur, &new, &i);
5950 if (error)
5951 goto del_cursor;
5952 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, del_cursor);
5953 error = xfs_btree_insert(cur, &i);
5954 if (error)
5955 goto del_cursor;
5956 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
5957 }
5958
5959 /*
5960 * Convert to a btree if necessary.
5961 */
5962 if (xfs_bmap_needs_btree(ip, whichfork)) {
5963 int tmp_logflags; /* partial log flag return val */
5964
5965 ASSERT(cur == NULL);
5966 error = xfs_bmap_extents_to_btree(tp, ip, firstfsb, dfops,
5967 &cur, 0, &tmp_logflags, whichfork);
5968 logflags |= tmp_logflags;
5969 }
5970
5971 del_cursor:
5972 if (cur) {
5973 cur->bc_private.b.allocated = 0;
5974 xfs_btree_del_cursor(cur,
5975 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5976 }
5977
5978 if (logflags)
5979 xfs_trans_log_inode(tp, ip, logflags);
5980 return error;
5981 }
5982
5983 int
5984 xfs_bmap_split_extent(
5985 struct xfs_inode *ip,
5986 xfs_fileoff_t split_fsb)
5987 {
5988 struct xfs_mount *mp = ip->i_mount;
5989 struct xfs_trans *tp;
5990 struct xfs_defer_ops dfops;
5991 xfs_fsblock_t firstfsb;
5992 int error;
5993
5994 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write,
5995 XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
5996 if (error)
5997 return error;
5998
5999 xfs_ilock(ip, XFS_ILOCK_EXCL);
6000 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
6001
6002 xfs_defer_init(&dfops, &firstfsb);
6003
6004 error = xfs_bmap_split_extent_at(tp, ip, split_fsb,
6005 &firstfsb, &dfops);
6006 if (error)
6007 goto out;
6008
6009 error = xfs_defer_finish(&tp, &dfops);
6010 if (error)
6011 goto out;
6012
6013 return xfs_trans_commit(tp);
6014
6015 out:
6016 xfs_defer_cancel(&dfops);
6017 xfs_trans_cancel(tp);
6018 return error;
6019 }
6020
6021 /* Deferred mapping is only for real extents in the data fork. */
6022 static bool
6023 xfs_bmap_is_update_needed(
6024 struct xfs_bmbt_irec *bmap)
6025 {
6026 return bmap->br_startblock != HOLESTARTBLOCK &&
6027 bmap->br_startblock != DELAYSTARTBLOCK;
6028 }
6029
6030 /* Record a bmap intent. */
6031 static int
6032 __xfs_bmap_add(
6033 struct xfs_mount *mp,
6034 struct xfs_defer_ops *dfops,
6035 enum xfs_bmap_intent_type type,
6036 struct xfs_inode *ip,
6037 int whichfork,
6038 struct xfs_bmbt_irec *bmap)
6039 {
6040 int error;
6041 struct xfs_bmap_intent *bi;
6042
6043 trace_xfs_bmap_defer(mp,
6044 XFS_FSB_TO_AGNO(mp, bmap->br_startblock),
6045 type,
6046 XFS_FSB_TO_AGBNO(mp, bmap->br_startblock),
6047 ip->i_ino, whichfork,
6048 bmap->br_startoff,
6049 bmap->br_blockcount,
6050 bmap->br_state);
6051
6052 bi = kmem_alloc(sizeof(struct xfs_bmap_intent), KM_SLEEP | KM_NOFS);
6053 INIT_LIST_HEAD(&bi->bi_list);
6054 bi->bi_type = type;
6055 bi->bi_owner = ip;
6056 bi->bi_whichfork = whichfork;
6057 bi->bi_bmap = *bmap;
6058
6059 error = xfs_defer_ijoin(dfops, bi->bi_owner);
6060 if (error) {
6061 kmem_free(bi);
6062 return error;
6063 }
6064
6065 xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_BMAP, &bi->bi_list);
6066 return 0;
6067 }
6068
6069 /* Map an extent into a file. */
6070 int
6071 xfs_bmap_map_extent(
6072 struct xfs_mount *mp,
6073 struct xfs_defer_ops *dfops,
6074 struct xfs_inode *ip,
6075 struct xfs_bmbt_irec *PREV)
6076 {
6077 if (!xfs_bmap_is_update_needed(PREV))
6078 return 0;
6079
6080 return __xfs_bmap_add(mp, dfops, XFS_BMAP_MAP, ip,
6081 XFS_DATA_FORK, PREV);
6082 }
6083
6084 /* Unmap an extent out of a file. */
6085 int
6086 xfs_bmap_unmap_extent(
6087 struct xfs_mount *mp,
6088 struct xfs_defer_ops *dfops,
6089 struct xfs_inode *ip,
6090 struct xfs_bmbt_irec *PREV)
6091 {
6092 if (!xfs_bmap_is_update_needed(PREV))
6093 return 0;
6094
6095 return __xfs_bmap_add(mp, dfops, XFS_BMAP_UNMAP, ip,
6096 XFS_DATA_FORK, PREV);
6097 }
6098
6099 /*
6100 * Process one of the deferred bmap operations. We pass back the
6101 * btree cursor to maintain our lock on the bmapbt between calls.
6102 */
6103 int
6104 xfs_bmap_finish_one(
6105 struct xfs_trans *tp,
6106 struct xfs_defer_ops *dfops,
6107 struct xfs_inode *ip,
6108 enum xfs_bmap_intent_type type,
6109 int whichfork,
6110 xfs_fileoff_t startoff,
6111 xfs_fsblock_t startblock,
6112 xfs_filblks_t *blockcount,
6113 xfs_exntst_t state)
6114 {
6115 xfs_fsblock_t firstfsb;
6116 int error = 0;
6117
6118 /*
6119 * firstfsb is tied to the transaction lifetime and is used to
6120 * ensure correct AG locking order and schedule work item
6121 * continuations. XFS_BUI_MAX_FAST_EXTENTS (== 1) restricts us
6122 * to only making one bmap call per transaction, so it should
6123 * be safe to have it as a local variable here.
6124 */
6125 firstfsb = NULLFSBLOCK;
6126
6127 trace_xfs_bmap_deferred(tp->t_mountp,
6128 XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type,
6129 XFS_FSB_TO_AGBNO(tp->t_mountp, startblock),
6130 ip->i_ino, whichfork, startoff, *blockcount, state);
6131
6132 if (WARN_ON_ONCE(whichfork != XFS_DATA_FORK))
6133 return -EFSCORRUPTED;
6134
6135 if (XFS_TEST_ERROR(false, tp->t_mountp,
6136 XFS_ERRTAG_BMAP_FINISH_ONE))
6137 return -EIO;
6138
6139 switch (type) {
6140 case XFS_BMAP_MAP:
6141 error = xfs_bmapi_remap(tp, ip, startoff, *blockcount,
6142 startblock, dfops);
6143 *blockcount = 0;
6144 break;
6145 case XFS_BMAP_UNMAP:
6146 error = __xfs_bunmapi(tp, ip, startoff, blockcount,
6147 XFS_BMAPI_REMAP, 1, &firstfsb, dfops);
6148 break;
6149 default:
6150 ASSERT(0);
6151 error = -EFSCORRUPTED;
6152 }
6153
6154 return error;
6155 }