]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - libxfs/xfs_attr_remote.c
xfs_repair: fix libxfs api violations in quota repair code
[thirdparty/xfsprogs-dev.git] / libxfs / xfs_attr_remote.c
1 /*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * Copyright (c) 2013 Red Hat, Inc.
4 * All Rights Reserved.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it would be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 #include "libxfs_priv.h"
20 #include "xfs_fs.h"
21 #include "xfs_shared.h"
22 #include "xfs_format.h"
23 #include "xfs_log_format.h"
24 #include "xfs_trans_resv.h"
25 #include "xfs_bit.h"
26 #include "xfs_mount.h"
27 #include "xfs_defer.h"
28 #include "xfs_da_format.h"
29 #include "xfs_da_btree.h"
30 #include "xfs_inode.h"
31 #include "xfs_alloc.h"
32 #include "xfs_trans.h"
33 #include "xfs_bmap.h"
34 #include "xfs_attr_leaf.h"
35 #include "xfs_attr_remote.h"
36 #include "xfs_trans_space.h"
37 #include "xfs_trace.h"
38 #include "xfs_cksum.h"
39
40 #define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */
41
42 /*
43 * Each contiguous block has a header, so it is not just a simple attribute
44 * length to FSB conversion.
45 */
46 int
47 xfs_attr3_rmt_blocks(
48 struct xfs_mount *mp,
49 int attrlen)
50 {
51 if (xfs_sb_version_hascrc(&mp->m_sb)) {
52 int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
53 return (attrlen + buflen - 1) / buflen;
54 }
55 return XFS_B_TO_FSB(mp, attrlen);
56 }
57
58 /*
59 * Checking of the remote attribute header is split into two parts. The verifier
60 * does CRC, location and bounds checking, the unpacking function checks the
61 * attribute parameters and owner.
62 */
63 static xfs_failaddr_t
64 xfs_attr3_rmt_hdr_ok(
65 void *ptr,
66 xfs_ino_t ino,
67 uint32_t offset,
68 uint32_t size,
69 xfs_daddr_t bno)
70 {
71 struct xfs_attr3_rmt_hdr *rmt = ptr;
72
73 if (bno != be64_to_cpu(rmt->rm_blkno))
74 return __this_address;
75 if (offset != be32_to_cpu(rmt->rm_offset))
76 return __this_address;
77 if (size != be32_to_cpu(rmt->rm_bytes))
78 return __this_address;
79 if (ino != be64_to_cpu(rmt->rm_owner))
80 return __this_address;
81
82 /* ok */
83 return NULL;
84 }
85
86 static xfs_failaddr_t
87 xfs_attr3_rmt_verify(
88 struct xfs_mount *mp,
89 void *ptr,
90 int fsbsize,
91 xfs_daddr_t bno)
92 {
93 struct xfs_attr3_rmt_hdr *rmt = ptr;
94
95 if (!xfs_sb_version_hascrc(&mp->m_sb))
96 return __this_address;
97 if (rmt->rm_magic != cpu_to_be32(XFS_ATTR3_RMT_MAGIC))
98 return __this_address;
99 if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid))
100 return __this_address;
101 if (be64_to_cpu(rmt->rm_blkno) != bno)
102 return __this_address;
103 if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt))
104 return __this_address;
105 if (be32_to_cpu(rmt->rm_offset) +
106 be32_to_cpu(rmt->rm_bytes) > XFS_XATTR_SIZE_MAX)
107 return __this_address;
108 if (rmt->rm_owner == 0)
109 return __this_address;
110
111 return NULL;
112 }
113
114 static int
115 __xfs_attr3_rmt_read_verify(
116 struct xfs_buf *bp,
117 bool check_crc,
118 xfs_failaddr_t *failaddr)
119 {
120 struct xfs_mount *mp = bp->b_target->bt_mount;
121 char *ptr;
122 int len;
123 xfs_daddr_t bno;
124 int blksize = mp->m_attr_geo->blksize;
125
126 /* no verification of non-crc buffers */
127 if (!xfs_sb_version_hascrc(&mp->m_sb))
128 return 0;
129
130 ptr = bp->b_addr;
131 bno = bp->b_bn;
132 len = BBTOB(bp->b_length);
133 ASSERT(len >= blksize);
134
135 while (len > 0) {
136 if (check_crc &&
137 !xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) {
138 *failaddr = __this_address;
139 return -EFSBADCRC;
140 }
141 *failaddr = xfs_attr3_rmt_verify(mp, ptr, blksize, bno);
142 if (*failaddr)
143 return -EFSCORRUPTED;
144 len -= blksize;
145 ptr += blksize;
146 bno += BTOBB(blksize);
147 }
148
149 if (len != 0) {
150 *failaddr = __this_address;
151 return -EFSCORRUPTED;
152 }
153
154 return 0;
155 }
156
157 static void
158 xfs_attr3_rmt_read_verify(
159 struct xfs_buf *bp)
160 {
161 xfs_failaddr_t fa;
162 int error;
163
164 error = __xfs_attr3_rmt_read_verify(bp, true, &fa);
165 if (error)
166 xfs_verifier_error(bp, error, fa);
167 }
168
169 static xfs_failaddr_t
170 xfs_attr3_rmt_verify_struct(
171 struct xfs_buf *bp)
172 {
173 xfs_failaddr_t fa;
174 int error;
175
176 error = __xfs_attr3_rmt_read_verify(bp, false, &fa);
177 return error ? fa : NULL;
178 }
179
180 static void
181 xfs_attr3_rmt_write_verify(
182 struct xfs_buf *bp)
183 {
184 struct xfs_mount *mp = bp->b_target->bt_mount;
185 xfs_failaddr_t fa;
186 int blksize = mp->m_attr_geo->blksize;
187 char *ptr;
188 int len;
189 xfs_daddr_t bno;
190
191 /* no verification of non-crc buffers */
192 if (!xfs_sb_version_hascrc(&mp->m_sb))
193 return;
194
195 ptr = bp->b_addr;
196 bno = bp->b_bn;
197 len = BBTOB(bp->b_length);
198 ASSERT(len >= blksize);
199
200 while (len > 0) {
201 struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr;
202
203 fa = xfs_attr3_rmt_verify(mp, ptr, blksize, bno);
204 if (fa) {
205 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
206 return;
207 }
208
209 /*
210 * Ensure we aren't writing bogus LSNs to disk. See
211 * xfs_attr3_rmt_hdr_set() for the explanation.
212 */
213 if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) {
214 xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
215 return;
216 }
217 xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF);
218
219 len -= blksize;
220 ptr += blksize;
221 bno += BTOBB(blksize);
222 }
223
224 if (len != 0)
225 xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
226 }
227
228 const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
229 .name = "xfs_attr3_rmt",
230 .verify_read = xfs_attr3_rmt_read_verify,
231 .verify_write = xfs_attr3_rmt_write_verify,
232 .verify_struct = xfs_attr3_rmt_verify_struct,
233 };
234
235 STATIC int
236 xfs_attr3_rmt_hdr_set(
237 struct xfs_mount *mp,
238 void *ptr,
239 xfs_ino_t ino,
240 uint32_t offset,
241 uint32_t size,
242 xfs_daddr_t bno)
243 {
244 struct xfs_attr3_rmt_hdr *rmt = ptr;
245
246 if (!xfs_sb_version_hascrc(&mp->m_sb))
247 return 0;
248
249 rmt->rm_magic = cpu_to_be32(XFS_ATTR3_RMT_MAGIC);
250 rmt->rm_offset = cpu_to_be32(offset);
251 rmt->rm_bytes = cpu_to_be32(size);
252 uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid);
253 rmt->rm_owner = cpu_to_be64(ino);
254 rmt->rm_blkno = cpu_to_be64(bno);
255
256 /*
257 * Remote attribute blocks are written synchronously, so we don't
258 * have an LSN that we can stamp in them that makes any sense to log
259 * recovery. To ensure that log recovery handles overwrites of these
260 * blocks sanely (i.e. once they've been freed and reallocated as some
261 * other type of metadata) we need to ensure that the LSN has a value
262 * that tells log recovery to ignore the LSN and overwrite the buffer
263 * with whatever is in it's log. To do this, we use the magic
264 * NULLCOMMITLSN to indicate that the LSN is invalid.
265 */
266 rmt->rm_lsn = cpu_to_be64(NULLCOMMITLSN);
267
268 return sizeof(struct xfs_attr3_rmt_hdr);
269 }
270
271 /*
272 * Helper functions to copy attribute data in and out of the one disk extents
273 */
274 STATIC int
275 xfs_attr_rmtval_copyout(
276 struct xfs_mount *mp,
277 struct xfs_buf *bp,
278 xfs_ino_t ino,
279 int *offset,
280 int *valuelen,
281 uint8_t **dst)
282 {
283 char *src = bp->b_addr;
284 xfs_daddr_t bno = bp->b_bn;
285 int len = BBTOB(bp->b_length);
286 int blksize = mp->m_attr_geo->blksize;
287
288 ASSERT(len >= blksize);
289
290 while (len > 0 && *valuelen > 0) {
291 int hdr_size = 0;
292 int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize);
293
294 byte_cnt = min(*valuelen, byte_cnt);
295
296 if (xfs_sb_version_hascrc(&mp->m_sb)) {
297 if (xfs_attr3_rmt_hdr_ok(src, ino, *offset,
298 byte_cnt, bno)) {
299 xfs_alert(mp,
300 "remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)",
301 bno, *offset, byte_cnt, ino);
302 return -EFSCORRUPTED;
303 }
304 hdr_size = sizeof(struct xfs_attr3_rmt_hdr);
305 }
306
307 memcpy(*dst, src + hdr_size, byte_cnt);
308
309 /* roll buffer forwards */
310 len -= blksize;
311 src += blksize;
312 bno += BTOBB(blksize);
313
314 /* roll attribute data forwards */
315 *valuelen -= byte_cnt;
316 *dst += byte_cnt;
317 *offset += byte_cnt;
318 }
319 return 0;
320 }
321
322 STATIC void
323 xfs_attr_rmtval_copyin(
324 struct xfs_mount *mp,
325 struct xfs_buf *bp,
326 xfs_ino_t ino,
327 int *offset,
328 int *valuelen,
329 uint8_t **src)
330 {
331 char *dst = bp->b_addr;
332 xfs_daddr_t bno = bp->b_bn;
333 int len = BBTOB(bp->b_length);
334 int blksize = mp->m_attr_geo->blksize;
335
336 ASSERT(len >= blksize);
337
338 while (len > 0 && *valuelen > 0) {
339 int hdr_size;
340 int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize);
341
342 byte_cnt = min(*valuelen, byte_cnt);
343 hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset,
344 byte_cnt, bno);
345
346 memcpy(dst + hdr_size, *src, byte_cnt);
347
348 /*
349 * If this is the last block, zero the remainder of it.
350 * Check that we are actually the last block, too.
351 */
352 if (byte_cnt + hdr_size < blksize) {
353 ASSERT(*valuelen - byte_cnt == 0);
354 ASSERT(len == blksize);
355 memset(dst + hdr_size + byte_cnt, 0,
356 blksize - hdr_size - byte_cnt);
357 }
358
359 /* roll buffer forwards */
360 len -= blksize;
361 dst += blksize;
362 bno += BTOBB(blksize);
363
364 /* roll attribute data forwards */
365 *valuelen -= byte_cnt;
366 *src += byte_cnt;
367 *offset += byte_cnt;
368 }
369 }
370
371 /*
372 * Read the value associated with an attribute from the out-of-line buffer
373 * that we stored it in.
374 */
375 int
376 xfs_attr_rmtval_get(
377 struct xfs_da_args *args)
378 {
379 struct xfs_bmbt_irec map[ATTR_RMTVALUE_MAPSIZE];
380 struct xfs_mount *mp = args->dp->i_mount;
381 struct xfs_buf *bp;
382 xfs_dablk_t lblkno = args->rmtblkno;
383 uint8_t *dst = args->value;
384 int valuelen;
385 int nmap;
386 int error;
387 int blkcnt = args->rmtblkcnt;
388 int i;
389 int offset = 0;
390
391 trace_xfs_attr_rmtval_get(args);
392
393 ASSERT(!(args->flags & ATTR_KERNOVAL));
394 ASSERT(args->rmtvaluelen == args->valuelen);
395
396 valuelen = args->rmtvaluelen;
397 while (valuelen > 0) {
398 nmap = ATTR_RMTVALUE_MAPSIZE;
399 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
400 blkcnt, map, &nmap,
401 XFS_BMAPI_ATTRFORK);
402 if (error)
403 return error;
404 ASSERT(nmap >= 1);
405
406 for (i = 0; (i < nmap) && (valuelen > 0); i++) {
407 xfs_daddr_t dblkno;
408 int dblkcnt;
409
410 ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) &&
411 (map[i].br_startblock != HOLESTARTBLOCK));
412 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
413 dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
414 error = xfs_trans_read_buf(mp, args->trans,
415 mp->m_ddev_targp,
416 dblkno, dblkcnt, 0, &bp,
417 &xfs_attr3_rmt_buf_ops);
418 if (error)
419 return error;
420
421 error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino,
422 &offset, &valuelen,
423 &dst);
424 xfs_trans_brelse(args->trans, bp);
425 if (error)
426 return error;
427
428 /* roll attribute extent map forwards */
429 lblkno += map[i].br_blockcount;
430 blkcnt -= map[i].br_blockcount;
431 }
432 }
433 ASSERT(valuelen == 0);
434 return 0;
435 }
436
437 /*
438 * Write the value associated with an attribute into the out-of-line buffer
439 * that we have defined for it.
440 */
441 int
442 xfs_attr_rmtval_set(
443 struct xfs_da_args *args)
444 {
445 struct xfs_inode *dp = args->dp;
446 struct xfs_mount *mp = dp->i_mount;
447 struct xfs_bmbt_irec map;
448 xfs_dablk_t lblkno;
449 xfs_fileoff_t lfileoff = 0;
450 uint8_t *src = args->value;
451 int blkcnt;
452 int valuelen;
453 int nmap;
454 int error;
455 int offset = 0;
456
457 trace_xfs_attr_rmtval_set(args);
458
459 /*
460 * Find a "hole" in the attribute address space large enough for
461 * us to drop the new attribute's value into. Because CRC enable
462 * attributes have headers, we can't just do a straight byte to FSB
463 * conversion and have to take the header space into account.
464 */
465 blkcnt = xfs_attr3_rmt_blocks(mp, args->rmtvaluelen);
466 error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff,
467 XFS_ATTR_FORK);
468 if (error)
469 return error;
470
471 args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff;
472 args->rmtblkcnt = blkcnt;
473
474 /*
475 * Roll through the "value", allocating blocks on disk as required.
476 */
477 while (blkcnt > 0) {
478 /*
479 * Allocate a single extent, up to the size of the value.
480 *
481 * Note that we have to consider this a data allocation as we
482 * write the remote attribute without logging the contents.
483 * Hence we must ensure that we aren't using blocks that are on
484 * the busy list so that we don't overwrite blocks which have
485 * recently been freed but their transactions are not yet
486 * committed to disk. If we overwrite the contents of a busy
487 * extent and then crash then the block may not contain the
488 * correct metadata after log recovery occurs.
489 */
490 xfs_defer_init(args->dfops, args->firstblock);
491 nmap = 1;
492 error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
493 blkcnt, XFS_BMAPI_ATTRFORK, args->firstblock,
494 args->total, &map, &nmap, args->dfops);
495 if (error)
496 goto out_defer_cancel;
497 xfs_defer_ijoin(args->dfops, dp);
498 error = xfs_defer_finish(&args->trans, args->dfops);
499 if (error)
500 goto out_defer_cancel;
501
502 ASSERT(nmap == 1);
503 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
504 (map.br_startblock != HOLESTARTBLOCK));
505 lblkno += map.br_blockcount;
506 blkcnt -= map.br_blockcount;
507
508 /*
509 * Start the next trans in the chain.
510 */
511 error = xfs_trans_roll_inode(&args->trans, dp);
512 if (error)
513 return error;
514 }
515
516 /*
517 * Roll through the "value", copying the attribute value to the
518 * already-allocated blocks. Blocks are written synchronously
519 * so that we can know they are all on disk before we turn off
520 * the INCOMPLETE flag.
521 */
522 lblkno = args->rmtblkno;
523 blkcnt = args->rmtblkcnt;
524 valuelen = args->rmtvaluelen;
525 while (valuelen > 0) {
526 struct xfs_buf *bp;
527 xfs_daddr_t dblkno;
528 int dblkcnt;
529
530 ASSERT(blkcnt > 0);
531
532 xfs_defer_init(args->dfops, args->firstblock);
533 nmap = 1;
534 error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno,
535 blkcnt, &map, &nmap,
536 XFS_BMAPI_ATTRFORK);
537 if (error)
538 return error;
539 ASSERT(nmap == 1);
540 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
541 (map.br_startblock != HOLESTARTBLOCK));
542
543 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
544 dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
545
546 bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, 0);
547 if (!bp)
548 return -ENOMEM;
549 bp->b_ops = &xfs_attr3_rmt_buf_ops;
550
551 xfs_attr_rmtval_copyin(mp, bp, args->dp->i_ino, &offset,
552 &valuelen, &src);
553
554 error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */
555 xfs_buf_relse(bp);
556 if (error)
557 return error;
558
559
560 /* roll attribute extent map forwards */
561 lblkno += map.br_blockcount;
562 blkcnt -= map.br_blockcount;
563 }
564 ASSERT(valuelen == 0);
565 return 0;
566 out_defer_cancel:
567 xfs_defer_cancel(args->dfops);
568 args->trans = NULL;
569 return error;
570 }
571
572 /*
573 * Remove the value associated with an attribute by deleting the
574 * out-of-line buffer that it is stored on.
575 */
576 int
577 xfs_attr_rmtval_remove(
578 struct xfs_da_args *args)
579 {
580 struct xfs_mount *mp = args->dp->i_mount;
581 xfs_dablk_t lblkno;
582 int blkcnt;
583 int error;
584 int done;
585
586 trace_xfs_attr_rmtval_remove(args);
587
588 /*
589 * Roll through the "value", invalidating the attribute value's blocks.
590 */
591 lblkno = args->rmtblkno;
592 blkcnt = args->rmtblkcnt;
593 while (blkcnt > 0) {
594 struct xfs_bmbt_irec map;
595 struct xfs_buf *bp;
596 xfs_daddr_t dblkno;
597 int dblkcnt;
598 int nmap;
599
600 /*
601 * Try to remember where we decided to put the value.
602 */
603 nmap = 1;
604 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
605 blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK);
606 if (error)
607 return error;
608 ASSERT(nmap == 1);
609 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
610 (map.br_startblock != HOLESTARTBLOCK));
611
612 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
613 dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
614
615 /*
616 * If the "remote" value is in the cache, remove it.
617 */
618 bp = xfs_incore(mp->m_ddev_targp, dblkno, dblkcnt, XBF_TRYLOCK);
619 if (bp) {
620 xfs_buf_stale(bp);
621 xfs_buf_relse(bp);
622 bp = NULL;
623 }
624
625 lblkno += map.br_blockcount;
626 blkcnt -= map.br_blockcount;
627 }
628
629 /*
630 * Keep de-allocating extents until the remote-value region is gone.
631 */
632 lblkno = args->rmtblkno;
633 blkcnt = args->rmtblkcnt;
634 done = 0;
635 while (!done) {
636 xfs_defer_init(args->dfops, args->firstblock);
637 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
638 XFS_BMAPI_ATTRFORK, 1, args->firstblock,
639 args->dfops, &done);
640 if (error)
641 goto out_defer_cancel;
642 xfs_defer_ijoin(args->dfops, args->dp);
643 error = xfs_defer_finish(&args->trans, args->dfops);
644 if (error)
645 goto out_defer_cancel;
646
647 /*
648 * Close out trans and start the next one in the chain.
649 */
650 error = xfs_trans_roll_inode(&args->trans, args->dp);
651 if (error)
652 return error;
653 }
654 return 0;
655 out_defer_cancel:
656 xfs_defer_cancel(args->dfops);
657 args->trans = NULL;
658 return error;
659 }