]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - libxfs/xfs_attr_remote.c
libxfs: refactor manage_zones()
[thirdparty/xfsprogs-dev.git] / libxfs / xfs_attr_remote.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
4 * Copyright (c) 2013 Red Hat, Inc.
5 * All Rights Reserved.
6 */
7 #include "libxfs_priv.h"
8 #include "xfs_fs.h"
9 #include "xfs_shared.h"
10 #include "xfs_format.h"
11 #include "xfs_log_format.h"
12 #include "xfs_trans_resv.h"
13 #include "xfs_bit.h"
14 #include "xfs_mount.h"
15 #include "xfs_defer.h"
16 #include "xfs_da_format.h"
17 #include "xfs_da_btree.h"
18 #include "xfs_inode.h"
19 #include "xfs_alloc.h"
20 #include "xfs_trans.h"
21 #include "xfs_bmap.h"
22 #include "xfs_attr_leaf.h"
23 #include "xfs_attr_remote.h"
24 #include "xfs_trans_space.h"
25 #include "xfs_trace.h"
26 #include "xfs_cksum.h"
27
28 #define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */
29
30 /*
31 * Each contiguous block has a header, so it is not just a simple attribute
32 * length to FSB conversion.
33 */
34 int
35 xfs_attr3_rmt_blocks(
36 struct xfs_mount *mp,
37 int attrlen)
38 {
39 if (xfs_sb_version_hascrc(&mp->m_sb)) {
40 int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
41 return (attrlen + buflen - 1) / buflen;
42 }
43 return XFS_B_TO_FSB(mp, attrlen);
44 }
45
46 /*
47 * Checking of the remote attribute header is split into two parts. The verifier
48 * does CRC, location and bounds checking, the unpacking function checks the
49 * attribute parameters and owner.
50 */
51 static xfs_failaddr_t
52 xfs_attr3_rmt_hdr_ok(
53 void *ptr,
54 xfs_ino_t ino,
55 uint32_t offset,
56 uint32_t size,
57 xfs_daddr_t bno)
58 {
59 struct xfs_attr3_rmt_hdr *rmt = ptr;
60
61 if (bno != be64_to_cpu(rmt->rm_blkno))
62 return __this_address;
63 if (offset != be32_to_cpu(rmt->rm_offset))
64 return __this_address;
65 if (size != be32_to_cpu(rmt->rm_bytes))
66 return __this_address;
67 if (ino != be64_to_cpu(rmt->rm_owner))
68 return __this_address;
69
70 /* ok */
71 return NULL;
72 }
73
74 static xfs_failaddr_t
75 xfs_attr3_rmt_verify(
76 struct xfs_mount *mp,
77 void *ptr,
78 int fsbsize,
79 xfs_daddr_t bno)
80 {
81 struct xfs_attr3_rmt_hdr *rmt = ptr;
82
83 if (!xfs_sb_version_hascrc(&mp->m_sb))
84 return __this_address;
85 if (rmt->rm_magic != cpu_to_be32(XFS_ATTR3_RMT_MAGIC))
86 return __this_address;
87 if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid))
88 return __this_address;
89 if (be64_to_cpu(rmt->rm_blkno) != bno)
90 return __this_address;
91 if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt))
92 return __this_address;
93 if (be32_to_cpu(rmt->rm_offset) +
94 be32_to_cpu(rmt->rm_bytes) > XFS_XATTR_SIZE_MAX)
95 return __this_address;
96 if (rmt->rm_owner == 0)
97 return __this_address;
98
99 return NULL;
100 }
101
102 static int
103 __xfs_attr3_rmt_read_verify(
104 struct xfs_buf *bp,
105 bool check_crc,
106 xfs_failaddr_t *failaddr)
107 {
108 struct xfs_mount *mp = bp->b_target->bt_mount;
109 char *ptr;
110 int len;
111 xfs_daddr_t bno;
112 int blksize = mp->m_attr_geo->blksize;
113
114 /* no verification of non-crc buffers */
115 if (!xfs_sb_version_hascrc(&mp->m_sb))
116 return 0;
117
118 ptr = bp->b_addr;
119 bno = bp->b_bn;
120 len = BBTOB(bp->b_length);
121 ASSERT(len >= blksize);
122
123 while (len > 0) {
124 if (check_crc &&
125 !xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) {
126 *failaddr = __this_address;
127 return -EFSBADCRC;
128 }
129 *failaddr = xfs_attr3_rmt_verify(mp, ptr, blksize, bno);
130 if (*failaddr)
131 return -EFSCORRUPTED;
132 len -= blksize;
133 ptr += blksize;
134 bno += BTOBB(blksize);
135 }
136
137 if (len != 0) {
138 *failaddr = __this_address;
139 return -EFSCORRUPTED;
140 }
141
142 return 0;
143 }
144
145 static void
146 xfs_attr3_rmt_read_verify(
147 struct xfs_buf *bp)
148 {
149 xfs_failaddr_t fa;
150 int error;
151
152 error = __xfs_attr3_rmt_read_verify(bp, true, &fa);
153 if (error)
154 xfs_verifier_error(bp, error, fa);
155 }
156
157 static xfs_failaddr_t
158 xfs_attr3_rmt_verify_struct(
159 struct xfs_buf *bp)
160 {
161 xfs_failaddr_t fa;
162 int error;
163
164 error = __xfs_attr3_rmt_read_verify(bp, false, &fa);
165 return error ? fa : NULL;
166 }
167
168 static void
169 xfs_attr3_rmt_write_verify(
170 struct xfs_buf *bp)
171 {
172 struct xfs_mount *mp = bp->b_target->bt_mount;
173 xfs_failaddr_t fa;
174 int blksize = mp->m_attr_geo->blksize;
175 char *ptr;
176 int len;
177 xfs_daddr_t bno;
178
179 /* no verification of non-crc buffers */
180 if (!xfs_sb_version_hascrc(&mp->m_sb))
181 return;
182
183 ptr = bp->b_addr;
184 bno = bp->b_bn;
185 len = BBTOB(bp->b_length);
186 ASSERT(len >= blksize);
187
188 while (len > 0) {
189 struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr;
190
191 fa = xfs_attr3_rmt_verify(mp, ptr, blksize, bno);
192 if (fa) {
193 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
194 return;
195 }
196
197 /*
198 * Ensure we aren't writing bogus LSNs to disk. See
199 * xfs_attr3_rmt_hdr_set() for the explanation.
200 */
201 if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) {
202 xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
203 return;
204 }
205 xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF);
206
207 len -= blksize;
208 ptr += blksize;
209 bno += BTOBB(blksize);
210 }
211
212 if (len != 0)
213 xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
214 }
215
216 const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
217 .name = "xfs_attr3_rmt",
218 .verify_read = xfs_attr3_rmt_read_verify,
219 .verify_write = xfs_attr3_rmt_write_verify,
220 .verify_struct = xfs_attr3_rmt_verify_struct,
221 };
222
223 STATIC int
224 xfs_attr3_rmt_hdr_set(
225 struct xfs_mount *mp,
226 void *ptr,
227 xfs_ino_t ino,
228 uint32_t offset,
229 uint32_t size,
230 xfs_daddr_t bno)
231 {
232 struct xfs_attr3_rmt_hdr *rmt = ptr;
233
234 if (!xfs_sb_version_hascrc(&mp->m_sb))
235 return 0;
236
237 rmt->rm_magic = cpu_to_be32(XFS_ATTR3_RMT_MAGIC);
238 rmt->rm_offset = cpu_to_be32(offset);
239 rmt->rm_bytes = cpu_to_be32(size);
240 uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid);
241 rmt->rm_owner = cpu_to_be64(ino);
242 rmt->rm_blkno = cpu_to_be64(bno);
243
244 /*
245 * Remote attribute blocks are written synchronously, so we don't
246 * have an LSN that we can stamp in them that makes any sense to log
247 * recovery. To ensure that log recovery handles overwrites of these
248 * blocks sanely (i.e. once they've been freed and reallocated as some
249 * other type of metadata) we need to ensure that the LSN has a value
250 * that tells log recovery to ignore the LSN and overwrite the buffer
251 * with whatever is in it's log. To do this, we use the magic
252 * NULLCOMMITLSN to indicate that the LSN is invalid.
253 */
254 rmt->rm_lsn = cpu_to_be64(NULLCOMMITLSN);
255
256 return sizeof(struct xfs_attr3_rmt_hdr);
257 }
258
259 /*
260 * Helper functions to copy attribute data in and out of the one disk extents
261 */
262 STATIC int
263 xfs_attr_rmtval_copyout(
264 struct xfs_mount *mp,
265 struct xfs_buf *bp,
266 xfs_ino_t ino,
267 int *offset,
268 int *valuelen,
269 uint8_t **dst)
270 {
271 char *src = bp->b_addr;
272 xfs_daddr_t bno = bp->b_bn;
273 int len = BBTOB(bp->b_length);
274 int blksize = mp->m_attr_geo->blksize;
275
276 ASSERT(len >= blksize);
277
278 while (len > 0 && *valuelen > 0) {
279 int hdr_size = 0;
280 int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize);
281
282 byte_cnt = min(*valuelen, byte_cnt);
283
284 if (xfs_sb_version_hascrc(&mp->m_sb)) {
285 if (xfs_attr3_rmt_hdr_ok(src, ino, *offset,
286 byte_cnt, bno)) {
287 xfs_alert(mp,
288 "remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)",
289 bno, *offset, byte_cnt, ino);
290 return -EFSCORRUPTED;
291 }
292 hdr_size = sizeof(struct xfs_attr3_rmt_hdr);
293 }
294
295 memcpy(*dst, src + hdr_size, byte_cnt);
296
297 /* roll buffer forwards */
298 len -= blksize;
299 src += blksize;
300 bno += BTOBB(blksize);
301
302 /* roll attribute data forwards */
303 *valuelen -= byte_cnt;
304 *dst += byte_cnt;
305 *offset += byte_cnt;
306 }
307 return 0;
308 }
309
310 STATIC void
311 xfs_attr_rmtval_copyin(
312 struct xfs_mount *mp,
313 struct xfs_buf *bp,
314 xfs_ino_t ino,
315 int *offset,
316 int *valuelen,
317 uint8_t **src)
318 {
319 char *dst = bp->b_addr;
320 xfs_daddr_t bno = bp->b_bn;
321 int len = BBTOB(bp->b_length);
322 int blksize = mp->m_attr_geo->blksize;
323
324 ASSERT(len >= blksize);
325
326 while (len > 0 && *valuelen > 0) {
327 int hdr_size;
328 int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize);
329
330 byte_cnt = min(*valuelen, byte_cnt);
331 hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset,
332 byte_cnt, bno);
333
334 memcpy(dst + hdr_size, *src, byte_cnt);
335
336 /*
337 * If this is the last block, zero the remainder of it.
338 * Check that we are actually the last block, too.
339 */
340 if (byte_cnt + hdr_size < blksize) {
341 ASSERT(*valuelen - byte_cnt == 0);
342 ASSERT(len == blksize);
343 memset(dst + hdr_size + byte_cnt, 0,
344 blksize - hdr_size - byte_cnt);
345 }
346
347 /* roll buffer forwards */
348 len -= blksize;
349 dst += blksize;
350 bno += BTOBB(blksize);
351
352 /* roll attribute data forwards */
353 *valuelen -= byte_cnt;
354 *src += byte_cnt;
355 *offset += byte_cnt;
356 }
357 }
358
359 /*
360 * Read the value associated with an attribute from the out-of-line buffer
361 * that we stored it in.
362 */
363 int
364 xfs_attr_rmtval_get(
365 struct xfs_da_args *args)
366 {
367 struct xfs_bmbt_irec map[ATTR_RMTVALUE_MAPSIZE];
368 struct xfs_mount *mp = args->dp->i_mount;
369 struct xfs_buf *bp;
370 xfs_dablk_t lblkno = args->rmtblkno;
371 uint8_t *dst = args->value;
372 int valuelen;
373 int nmap;
374 int error;
375 int blkcnt = args->rmtblkcnt;
376 int i;
377 int offset = 0;
378
379 trace_xfs_attr_rmtval_get(args);
380
381 ASSERT(!(args->flags & ATTR_KERNOVAL));
382 ASSERT(args->rmtvaluelen == args->valuelen);
383
384 valuelen = args->rmtvaluelen;
385 while (valuelen > 0) {
386 nmap = ATTR_RMTVALUE_MAPSIZE;
387 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
388 blkcnt, map, &nmap,
389 XFS_BMAPI_ATTRFORK);
390 if (error)
391 return error;
392 ASSERT(nmap >= 1);
393
394 for (i = 0; (i < nmap) && (valuelen > 0); i++) {
395 xfs_daddr_t dblkno;
396 int dblkcnt;
397
398 ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) &&
399 (map[i].br_startblock != HOLESTARTBLOCK));
400 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
401 dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
402 error = xfs_trans_read_buf(mp, args->trans,
403 mp->m_ddev_targp,
404 dblkno, dblkcnt, 0, &bp,
405 &xfs_attr3_rmt_buf_ops);
406 if (error)
407 return error;
408
409 error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino,
410 &offset, &valuelen,
411 &dst);
412 xfs_trans_brelse(args->trans, bp);
413 if (error)
414 return error;
415
416 /* roll attribute extent map forwards */
417 lblkno += map[i].br_blockcount;
418 blkcnt -= map[i].br_blockcount;
419 }
420 }
421 ASSERT(valuelen == 0);
422 return 0;
423 }
424
425 /*
426 * Write the value associated with an attribute into the out-of-line buffer
427 * that we have defined for it.
428 */
429 int
430 xfs_attr_rmtval_set(
431 struct xfs_da_args *args)
432 {
433 struct xfs_inode *dp = args->dp;
434 struct xfs_mount *mp = dp->i_mount;
435 struct xfs_bmbt_irec map;
436 xfs_dablk_t lblkno;
437 xfs_fileoff_t lfileoff = 0;
438 uint8_t *src = args->value;
439 int blkcnt;
440 int valuelen;
441 int nmap;
442 int error;
443 int offset = 0;
444
445 trace_xfs_attr_rmtval_set(args);
446
447 /*
448 * Find a "hole" in the attribute address space large enough for
449 * us to drop the new attribute's value into. Because CRC enable
450 * attributes have headers, we can't just do a straight byte to FSB
451 * conversion and have to take the header space into account.
452 */
453 blkcnt = xfs_attr3_rmt_blocks(mp, args->rmtvaluelen);
454 error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff,
455 XFS_ATTR_FORK);
456 if (error)
457 return error;
458
459 args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff;
460 args->rmtblkcnt = blkcnt;
461
462 /*
463 * Roll through the "value", allocating blocks on disk as required.
464 */
465 while (blkcnt > 0) {
466 /*
467 * Allocate a single extent, up to the size of the value.
468 *
469 * Note that we have to consider this a data allocation as we
470 * write the remote attribute without logging the contents.
471 * Hence we must ensure that we aren't using blocks that are on
472 * the busy list so that we don't overwrite blocks which have
473 * recently been freed but their transactions are not yet
474 * committed to disk. If we overwrite the contents of a busy
475 * extent and then crash then the block may not contain the
476 * correct metadata after log recovery occurs.
477 */
478 nmap = 1;
479 error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
480 blkcnt, XFS_BMAPI_ATTRFORK, args->total, &map,
481 &nmap);
482 if (error)
483 return error;
484 error = xfs_defer_finish(&args->trans);
485 if (error)
486 return error;
487
488 ASSERT(nmap == 1);
489 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
490 (map.br_startblock != HOLESTARTBLOCK));
491 lblkno += map.br_blockcount;
492 blkcnt -= map.br_blockcount;
493
494 /*
495 * Start the next trans in the chain.
496 */
497 error = xfs_trans_roll_inode(&args->trans, dp);
498 if (error)
499 return error;
500 }
501
502 /*
503 * Roll through the "value", copying the attribute value to the
504 * already-allocated blocks. Blocks are written synchronously
505 * so that we can know they are all on disk before we turn off
506 * the INCOMPLETE flag.
507 */
508 lblkno = args->rmtblkno;
509 blkcnt = args->rmtblkcnt;
510 valuelen = args->rmtvaluelen;
511 while (valuelen > 0) {
512 struct xfs_buf *bp;
513 xfs_daddr_t dblkno;
514 int dblkcnt;
515
516 ASSERT(blkcnt > 0);
517
518 nmap = 1;
519 error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno,
520 blkcnt, &map, &nmap,
521 XFS_BMAPI_ATTRFORK);
522 if (error)
523 return error;
524 ASSERT(nmap == 1);
525 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
526 (map.br_startblock != HOLESTARTBLOCK));
527
528 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
529 dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
530
531 bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, 0);
532 if (!bp)
533 return -ENOMEM;
534 bp->b_ops = &xfs_attr3_rmt_buf_ops;
535
536 xfs_attr_rmtval_copyin(mp, bp, args->dp->i_ino, &offset,
537 &valuelen, &src);
538
539 error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */
540 xfs_buf_relse(bp);
541 if (error)
542 return error;
543
544
545 /* roll attribute extent map forwards */
546 lblkno += map.br_blockcount;
547 blkcnt -= map.br_blockcount;
548 }
549 ASSERT(valuelen == 0);
550 return 0;
551 }
552
553 /*
554 * Remove the value associated with an attribute by deleting the
555 * out-of-line buffer that it is stored on.
556 */
557 int
558 xfs_attr_rmtval_remove(
559 struct xfs_da_args *args)
560 {
561 struct xfs_mount *mp = args->dp->i_mount;
562 xfs_dablk_t lblkno;
563 int blkcnt;
564 int error;
565 int done;
566
567 trace_xfs_attr_rmtval_remove(args);
568
569 /*
570 * Roll through the "value", invalidating the attribute value's blocks.
571 */
572 lblkno = args->rmtblkno;
573 blkcnt = args->rmtblkcnt;
574 while (blkcnt > 0) {
575 struct xfs_bmbt_irec map;
576 struct xfs_buf *bp;
577 xfs_daddr_t dblkno;
578 int dblkcnt;
579 int nmap;
580
581 /*
582 * Try to remember where we decided to put the value.
583 */
584 nmap = 1;
585 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
586 blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK);
587 if (error)
588 return error;
589 ASSERT(nmap == 1);
590 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
591 (map.br_startblock != HOLESTARTBLOCK));
592
593 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
594 dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
595
596 /*
597 * If the "remote" value is in the cache, remove it.
598 */
599 bp = xfs_buf_incore(mp->m_ddev_targp, dblkno, dblkcnt, XBF_TRYLOCK);
600 if (bp) {
601 xfs_buf_stale(bp);
602 xfs_buf_relse(bp);
603 bp = NULL;
604 }
605
606 lblkno += map.br_blockcount;
607 blkcnt -= map.br_blockcount;
608 }
609
610 /*
611 * Keep de-allocating extents until the remote-value region is gone.
612 */
613 lblkno = args->rmtblkno;
614 blkcnt = args->rmtblkcnt;
615 done = 0;
616 while (!done) {
617 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
618 XFS_BMAPI_ATTRFORK, 1, &done);
619 if (error)
620 return error;
621 error = xfs_defer_finish(&args->trans);
622 if (error)
623 return error;
624
625 /*
626 * Close out trans and start the next one in the chain.
627 */
628 error = xfs_trans_roll_inode(&args->trans, args->dp);
629 if (error)
630 return error;
631 }
632 return 0;
633 }