3 * ZFS filesystem ported to u-boot by
4 * Jorgen Lundman <lundman at lundman.net>
6 * GRUB -- GRand Unified Bootloader
7 * Copyright (C) 1999,2000,2001,2002,2003,2004
8 * Free Software Foundation, Inc.
9 * Copyright 2004 Sun Microsystems, Inc.
11 * SPDX-License-Identifier: GPL-2.0+
16 #include <linux/stat.h>
17 #include <linux/time.h>
18 #include <linux/ctype.h>
19 #include <asm/byteorder.h>
20 #include "zfs_common.h"
23 block_dev_desc_t
*zfs_dev_desc
;
26 * The zfs plug-in routines for GRUB are:
28 * zfs_mount() - locates a valid uberblock of the root pool and reads
29 * in its MOS at the memory address MOS.
31 * zfs_open() - locates a plain file object by following the MOS
32 * and places its dnode at the memory address DNODE.
34 * zfs_read() - read in the data blocks pointed by the DNODE.
40 #include <zfs/dnode.h>
41 #include <zfs/uberblock_impl.h>
42 #include <zfs/vdev_impl.h>
43 #include <zfs/zio_checksum.h>
44 #include <zfs/zap_impl.h>
45 #include <zfs/zap_leaf.h>
46 #include <zfs/zfs_znode.h>
48 #include <zfs/dmu_objset.h>
49 #include <zfs/sa_impl.h>
50 #include <zfs/dsl_dir.h>
51 #include <zfs/dsl_dataset.h>
54 #define ZPOOL_PROP_BOOTFS "bootfs"
58 * For nvlist manipulation. (from nvpair.h)
60 #define NV_ENCODE_NATIVE 0
61 #define NV_ENCODE_XDR 1
62 #define NV_BIG_ENDIAN 0
63 #define NV_LITTLE_ENDIAN 1
64 #define DATA_TYPE_UINT64 8
65 #define DATA_TYPE_STRING 9
66 #define DATA_TYPE_NVLIST 19
67 #define DATA_TYPE_NVLIST_ARRAY 20
71 * Macros to get fields in a bp or DVA.
73 #define P2PHASE(x, align) ((x) & ((align) - 1))
74 #define DVA_OFFSET_TO_PHYS_SECTOR(offset) \
75 ((offset + VDEV_LABEL_START_SIZE) >> SPA_MINBLOCKSHIFT)
78 * return x rounded down to an align boundary
79 * eg, P2ALIGN(1200, 1024) == 1024 (1*align)
80 * eg, P2ALIGN(1024, 1024) == 1024 (1*align)
81 * eg, P2ALIGN(0x1234, 0x100) == 0x1200 (0x12*align)
82 * eg, P2ALIGN(0x5600, 0x100) == 0x5600 (0x56*align)
84 #define P2ALIGN(x, align) ((x) & -(align))
87 * FAT ZAP data structures
89 #define ZFS_CRC64_POLY 0xC96C5795D7870F42ULL /* ECMA-182, reflected form */
90 #define ZAP_HASH_IDX(hash, n) (((n) == 0) ? 0 : ((hash) >> (64 - (n))))
91 #define CHAIN_END 0xffff /* end of the chunk chain */
94 * The amount of space within the chunk available for the array is:
95 * chunk size - space for type (1) - space for next pointer (2)
97 #define ZAP_LEAF_ARRAY_BYTES (ZAP_LEAF_CHUNKSIZE - 3)
99 #define ZAP_LEAF_HASH_SHIFT(bs) (bs - 5)
100 #define ZAP_LEAF_HASH_NUMENTRIES(bs) (1 << ZAP_LEAF_HASH_SHIFT(bs))
101 #define LEAF_HASH(bs, h) \
102 ((ZAP_LEAF_HASH_NUMENTRIES(bs)-1) & \
103 ((h) >> (64 - ZAP_LEAF_HASH_SHIFT(bs)-l->l_hdr.lh_prefix_len)))
106 * The amount of space available for chunks is:
107 * block size shift - hash entry size (2) * number of hash
108 * entries - header space (2*chunksize)
110 #define ZAP_LEAF_NUMCHUNKS(bs) \
111 (((1<<bs) - 2*ZAP_LEAF_HASH_NUMENTRIES(bs)) / \
112 ZAP_LEAF_CHUNKSIZE - 2)
115 * The chunks start immediately after the hash table. The end of the
116 * hash table is at l_hash + HASH_NUMENTRIES, which we simply cast to a
119 #define ZAP_LEAF_CHUNK(l, bs, idx) \
120 ((zap_leaf_chunk_t *)(l->l_hash + ZAP_LEAF_HASH_NUMENTRIES(bs)))[idx]
121 #define ZAP_LEAF_ENTRY(l, bs, idx) (&ZAP_LEAF_CHUNK(l, bs, idx).l_entry)
125 * Decompression Entry - lzjb
133 typedef int zfs_decomp_func_t(void *s_start
, void *d_start
,
134 uint32_t s_len
, uint32_t d_len
);
135 typedef struct decomp_entry
{
137 zfs_decomp_func_t
*decomp_func
;
140 typedef struct dnode_end
{
146 /* cache for a file block of the currently zfs_open()-ed file */
151 /* XXX: ashift is per vdev, not per pool. We currently only ever touch
152 * a single vdev, but when/if raid-z or stripes are supported, this
155 uint64_t vdev_ashift
;
159 /* cache for a dnode block */
160 dnode_phys_t
*dnode_buf
;
161 dnode_phys_t
*dnode_mdn
;
162 uint64_t dnode_start
;
164 zfs_endian_t dnode_endian
;
166 uberblock_t current_uberblock
;
172 uint64_t vdev_phys_sector
;
174 int (*userhook
)(const char *, const struct zfs_dirhook_info
*);
175 struct zfs_dirhook_info
*dirinfo
;
183 zlib_decompress(void *s
, void *d
,
184 uint32_t slen
, uint32_t dlen
)
186 if (zlib_decompress(s
, d
, slen
, dlen
) < 0)
187 return ZFS_ERR_BAD_FS
;
191 static decomp_entry_t decomp_table
[ZIO_COMPRESS_FUNCTIONS
] = {
192 {"inherit", NULL
}, /* ZIO_COMPRESS_INHERIT */
193 {"on", lzjb_decompress
}, /* ZIO_COMPRESS_ON */
194 {"off", NULL
}, /* ZIO_COMPRESS_OFF */
195 {"lzjb", lzjb_decompress
}, /* ZIO_COMPRESS_LZJB */
196 {"empty", NULL
}, /* ZIO_COMPRESS_EMPTY */
197 {"gzip-1", zlib_decompress
}, /* ZIO_COMPRESS_GZIP1 */
198 {"gzip-2", zlib_decompress
}, /* ZIO_COMPRESS_GZIP2 */
199 {"gzip-3", zlib_decompress
}, /* ZIO_COMPRESS_GZIP3 */
200 {"gzip-4", zlib_decompress
}, /* ZIO_COMPRESS_GZIP4 */
201 {"gzip-5", zlib_decompress
}, /* ZIO_COMPRESS_GZIP5 */
202 {"gzip-6", zlib_decompress
}, /* ZIO_COMPRESS_GZIP6 */
203 {"gzip-7", zlib_decompress
}, /* ZIO_COMPRESS_GZIP7 */
204 {"gzip-8", zlib_decompress
}, /* ZIO_COMPRESS_GZIP8 */
205 {"gzip-9", zlib_decompress
}, /* ZIO_COMPRESS_GZIP9 */
210 static int zio_read_data(blkptr_t
*bp
, zfs_endian_t endian
,
211 void *buf
, struct zfs_data
*data
);
214 zio_read(blkptr_t
*bp
, zfs_endian_t endian
, void **buf
,
215 size_t *size
, struct zfs_data
*data
);
218 * Our own version of log2(). Same thing as highbit()-1.
221 zfs_log2(uint64_t num
)
234 /* Checksum Functions */
236 zio_checksum_off(const void *buf
__attribute__ ((unused
)),
237 uint64_t size
__attribute__ ((unused
)),
238 zfs_endian_t endian
__attribute__ ((unused
)),
241 ZIO_SET_CHECKSUM(zcp
, 0, 0, 0, 0);
244 /* Checksum Table and Values */
245 static zio_checksum_info_t zio_checksum_table
[ZIO_CHECKSUM_FUNCTIONS
] = {
246 {NULL
, 0, 0, "inherit"},
248 {zio_checksum_off
, 0, 0, "off"},
249 {zio_checksum_SHA256
, 1, 1, "label"},
250 {zio_checksum_SHA256
, 1, 1, "gang_header"},
251 {NULL
, 0, 0, "zilog"},
252 {fletcher_2_endian
, 0, 0, "fletcher2"},
253 {fletcher_4_endian
, 1, 0, "fletcher4"},
254 {zio_checksum_SHA256
, 1, 0, "SHA256"},
255 {NULL
, 0, 0, "zilog2"},
259 * zio_checksum_verify: Provides support for checksum verification.
261 * Fletcher2, Fletcher4, and SHA256 are supported.
265 zio_checksum_verify(zio_cksum_t zc
, uint32_t checksum
,
266 zfs_endian_t endian
, char *buf
, int size
)
268 zio_eck_t
*zec
= (zio_eck_t
*) (buf
+ size
) - 1;
269 zio_checksum_info_t
*ci
= &zio_checksum_table
[checksum
];
270 zio_cksum_t actual_cksum
, expected_cksum
;
272 if (checksum
>= ZIO_CHECKSUM_FUNCTIONS
|| ci
->ci_func
== NULL
) {
273 printf("zfs unknown checksum function %d\n", checksum
);
274 return ZFS_ERR_NOT_IMPLEMENTED_YET
;
278 expected_cksum
= zec
->zec_cksum
;
280 ci
->ci_func(buf
, size
, endian
, &actual_cksum
);
281 zec
->zec_cksum
= expected_cksum
;
284 ci
->ci_func(buf
, size
, endian
, &actual_cksum
);
287 if ((actual_cksum
.zc_word
[0] != zc
.zc_word
[0])
288 || (actual_cksum
.zc_word
[1] != zc
.zc_word
[1])
289 || (actual_cksum
.zc_word
[2] != zc
.zc_word
[2])
290 || (actual_cksum
.zc_word
[3] != zc
.zc_word
[3])) {
291 return ZFS_ERR_BAD_FS
;
298 * vdev_uberblock_compare takes two uberblock structures and returns an integer
299 * indicating the more recent of the two.
300 * Return Value = 1 if ub2 is more recent
301 * Return Value = -1 if ub1 is more recent
302 * The most recent uberblock is determined using its transaction number and
303 * timestamp. The uberblock with the highest transaction number is
304 * considered "newer". If the transaction numbers of the two blocks match, the
305 * timestamps are compared to determine the "newer" of the two.
308 vdev_uberblock_compare(uberblock_t
*ub1
, uberblock_t
*ub2
)
310 zfs_endian_t ub1_endian
, ub2_endian
;
311 if (zfs_to_cpu64(ub1
->ub_magic
, LITTLE_ENDIAN
) == UBERBLOCK_MAGIC
)
312 ub1_endian
= LITTLE_ENDIAN
;
314 ub1_endian
= BIG_ENDIAN
;
315 if (zfs_to_cpu64(ub2
->ub_magic
, LITTLE_ENDIAN
) == UBERBLOCK_MAGIC
)
316 ub2_endian
= LITTLE_ENDIAN
;
318 ub2_endian
= BIG_ENDIAN
;
320 if (zfs_to_cpu64(ub1
->ub_txg
, ub1_endian
)
321 < zfs_to_cpu64(ub2
->ub_txg
, ub2_endian
))
323 if (zfs_to_cpu64(ub1
->ub_txg
, ub1_endian
)
324 > zfs_to_cpu64(ub2
->ub_txg
, ub2_endian
))
327 if (zfs_to_cpu64(ub1
->ub_timestamp
, ub1_endian
)
328 < zfs_to_cpu64(ub2
->ub_timestamp
, ub2_endian
))
330 if (zfs_to_cpu64(ub1
->ub_timestamp
, ub1_endian
)
331 > zfs_to_cpu64(ub2
->ub_timestamp
, ub2_endian
))
338 * Three pieces of information are needed to verify an uberblock: the magic
339 * number, the version number, and the checksum.
341 * Currently Implemented: version number, magic number, label txg
342 * Need to Implement: checksum
346 uberblock_verify(uberblock_t
*uber
, int offset
, struct zfs_data
*data
)
349 zfs_endian_t endian
= UNKNOWN_ENDIAN
;
352 if (uber
->ub_txg
< data
->label_txg
) {
353 debug("ignoring partially written label: uber_txg < label_txg %llu %llu\n",
354 uber
->ub_txg
, data
->label_txg
);
355 return ZFS_ERR_BAD_FS
;
358 if (zfs_to_cpu64(uber
->ub_magic
, LITTLE_ENDIAN
) == UBERBLOCK_MAGIC
359 && zfs_to_cpu64(uber
->ub_version
, LITTLE_ENDIAN
) > 0
360 && zfs_to_cpu64(uber
->ub_version
, LITTLE_ENDIAN
) <= SPA_VERSION
)
361 endian
= LITTLE_ENDIAN
;
363 if (zfs_to_cpu64(uber
->ub_magic
, BIG_ENDIAN
) == UBERBLOCK_MAGIC
364 && zfs_to_cpu64(uber
->ub_version
, BIG_ENDIAN
) > 0
365 && zfs_to_cpu64(uber
->ub_version
, BIG_ENDIAN
) <= SPA_VERSION
)
368 if (endian
== UNKNOWN_ENDIAN
) {
369 printf("invalid uberblock magic\n");
370 return ZFS_ERR_BAD_FS
;
373 memset(&zc
, 0, sizeof(zc
));
374 zc
.zc_word
[0] = cpu_to_zfs64(offset
, endian
);
375 err
= zio_checksum_verify(zc
, ZIO_CHECKSUM_LABEL
, endian
,
376 (char *) uber
, UBERBLOCK_SIZE(data
->vdev_ashift
));
379 /* Check that the data pointed by the rootbp is usable. */
382 err
= zio_read(&uber
->ub_rootbp
, endian
, &osp
, &ospsize
, data
);
385 if (!err
&& ospsize
< OBJSET_PHYS_SIZE_V14
) {
386 printf("uberblock rootbp points to invalid data\n");
387 return ZFS_ERR_BAD_FS
;
395 * Find the best uberblock.
397 * Success - Pointer to the best uberblock.
400 static uberblock_t
*find_bestub(char *ub_array
, struct zfs_data
*data
)
402 const uint64_t sector
= data
->vdev_phys_sector
;
403 uberblock_t
*ubbest
= NULL
;
405 unsigned int i
, offset
, pickedub
= 0;
406 int err
= ZFS_ERR_NONE
;
408 const unsigned int UBCOUNT
= UBERBLOCK_COUNT(data
->vdev_ashift
);
409 const uint64_t UBBYTES
= UBERBLOCK_SIZE(data
->vdev_ashift
);
411 for (i
= 0; i
< UBCOUNT
; i
++) {
412 ubnext
= (uberblock_t
*) (i
* UBBYTES
+ ub_array
);
413 offset
= (sector
<< SPA_MINBLOCKSHIFT
) + VDEV_PHYS_SIZE
+ (i
* UBBYTES
);
415 err
= uberblock_verify(ubnext
, offset
, data
);
419 if (ubbest
== NULL
|| vdev_uberblock_compare(ubnext
, ubbest
) > 0) {
426 debug("zfs Found best uberblock at idx %d, txg %llu\n",
427 pickedub
, (unsigned long long) ubbest
->ub_txg
);
433 get_psize(blkptr_t
*bp
, zfs_endian_t endian
)
435 return (((zfs_to_cpu64((bp
)->blk_prop
, endian
) >> 16) & 0xffff) + 1)
436 << SPA_MINBLOCKSHIFT
;
440 dva_get_offset(dva_t
*dva
, zfs_endian_t endian
)
442 return zfs_to_cpu64((dva
)->dva_word
[1],
443 endian
) << SPA_MINBLOCKSHIFT
;
447 * Read a block of data based on the gang block address dva,
448 * and put its data in buf.
452 zio_read_gang(blkptr_t
*bp
, zfs_endian_t endian
, dva_t
*dva
, void *buf
,
453 struct zfs_data
*data
)
455 zio_gbh_phys_t
*zio_gb
;
456 uint64_t offset
, sector
;
461 memset(&zc
, 0, sizeof(zc
));
463 zio_gb
= malloc(SPA_GANGBLOCKSIZE
);
465 return ZFS_ERR_OUT_OF_MEMORY
;
467 offset
= dva_get_offset(dva
, endian
);
468 sector
= DVA_OFFSET_TO_PHYS_SECTOR(offset
);
470 /* read in the gang block header */
471 err
= zfs_devread(sector
, 0, SPA_GANGBLOCKSIZE
, (char *) zio_gb
);
479 /* self checksuming the gang block header */
480 ZIO_SET_CHECKSUM(&zc
, DVA_GET_VDEV(dva
),
481 dva_get_offset(dva
, endian
), bp
->blk_birth
, 0);
482 err
= zio_checksum_verify(zc
, ZIO_CHECKSUM_GANG_HEADER
, endian
,
483 (char *) zio_gb
, SPA_GANGBLOCKSIZE
);
489 endian
= (zfs_to_cpu64(bp
->blk_prop
, endian
) >> 63) & 1;
491 for (i
= 0; i
< SPA_GBH_NBLKPTRS
; i
++) {
492 if (zio_gb
->zg_blkptr
[i
].blk_birth
== 0)
495 err
= zio_read_data(&zio_gb
->zg_blkptr
[i
], endian
, buf
, data
);
500 buf
= (char *) buf
+ get_psize(&zio_gb
->zg_blkptr
[i
], endian
);
507 * Read in a block of raw data to buf.
510 zio_read_data(blkptr_t
*bp
, zfs_endian_t endian
, void *buf
,
511 struct zfs_data
*data
)
514 int err
= ZFS_ERR_NONE
;
516 psize
= get_psize(bp
, endian
);
518 /* pick a good dva from the block pointer */
519 for (i
= 0; i
< SPA_DVAS_PER_BP
; i
++) {
520 uint64_t offset
, sector
;
522 if (bp
->blk_dva
[i
].dva_word
[0] == 0 && bp
->blk_dva
[i
].dva_word
[1] == 0)
525 if ((zfs_to_cpu64(bp
->blk_dva
[i
].dva_word
[1], endian
)>>63) & 1) {
526 err
= zio_read_gang(bp
, endian
, &bp
->blk_dva
[i
], buf
, data
);
528 /* read in a data block */
529 offset
= dva_get_offset(&bp
->blk_dva
[i
], endian
);
530 sector
= DVA_OFFSET_TO_PHYS_SECTOR(offset
);
532 err
= zfs_devread(sector
, 0, psize
, buf
);
536 /*Check the underlying checksum before we rule this DVA as "good"*/
537 uint32_t checkalgo
= (zfs_to_cpu64((bp
)->blk_prop
, endian
) >> 40) & 0xff;
539 err
= zio_checksum_verify(bp
->blk_cksum
, checkalgo
, endian
, buf
, psize
);
544 /* If read failed or checksum bad, reset the error. Hopefully we've got some more DVA's to try.*/
548 printf("couldn't find a valid DVA\n");
549 err
= ZFS_ERR_BAD_FS
;
556 * Read in a block of data, verify its checksum, decompress if needed,
557 * and put the uncompressed data in buf.
560 zio_read(blkptr_t
*bp
, zfs_endian_t endian
, void **buf
,
561 size_t *size
, struct zfs_data
*data
)
565 char *compbuf
= NULL
;
570 comp
= (zfs_to_cpu64((bp
)->blk_prop
, endian
)>>32) & 0xff;
571 lsize
= (BP_IS_HOLE(bp
) ? 0 :
572 (((zfs_to_cpu64((bp
)->blk_prop
, endian
) & 0xffff) + 1)
573 << SPA_MINBLOCKSHIFT
));
574 psize
= get_psize(bp
, endian
);
579 if (comp
>= ZIO_COMPRESS_FUNCTIONS
) {
580 printf("compression algorithm %u not supported\n", (unsigned int) comp
);
581 return ZFS_ERR_NOT_IMPLEMENTED_YET
;
584 if (comp
!= ZIO_COMPRESS_OFF
&& decomp_table
[comp
].decomp_func
== NULL
) {
585 printf("compression algorithm %s not supported\n", decomp_table
[comp
].name
);
586 return ZFS_ERR_NOT_IMPLEMENTED_YET
;
589 if (comp
!= ZIO_COMPRESS_OFF
) {
590 compbuf
= malloc(psize
);
592 return ZFS_ERR_OUT_OF_MEMORY
;
594 compbuf
= *buf
= malloc(lsize
);
597 err
= zio_read_data(bp
, endian
, compbuf
, data
);
604 if (comp
!= ZIO_COMPRESS_OFF
) {
605 *buf
= malloc(lsize
);
608 return ZFS_ERR_OUT_OF_MEMORY
;
611 err
= decomp_table
[comp
].decomp_func(compbuf
, *buf
, psize
, lsize
);
624 * Get the block from a block id.
625 * push the block onto the stack.
629 dmu_read(dnode_end_t
*dn
, uint64_t blkid
, void **buf
,
630 zfs_endian_t
*endian_out
, struct zfs_data
*data
)
633 blkptr_t
*bp_array
= dn
->dn
.dn_blkptr
;
634 int epbs
= dn
->dn
.dn_indblkshift
- SPA_BLKPTRSHIFT
;
638 int err
= ZFS_ERR_NONE
;
640 bp
= malloc(sizeof(blkptr_t
));
642 return ZFS_ERR_OUT_OF_MEMORY
;
645 for (level
= dn
->dn
.dn_nlevels
- 1; level
>= 0; level
--) {
646 idx
= (blkid
>> (epbs
* level
)) & ((1 << epbs
) - 1);
648 if (bp_array
!= dn
->dn
.dn_blkptr
) {
653 if (BP_IS_HOLE(bp
)) {
654 size_t size
= zfs_to_cpu16(dn
->dn
.dn_datablkszsec
,
656 << SPA_MINBLOCKSHIFT
;
659 err
= ZFS_ERR_OUT_OF_MEMORY
;
662 memset(*buf
, 0, size
);
663 endian
= (zfs_to_cpu64(bp
->blk_prop
, endian
) >> 63) & 1;
667 err
= zio_read(bp
, endian
, buf
, 0, data
);
668 endian
= (zfs_to_cpu64(bp
->blk_prop
, endian
) >> 63) & 1;
671 err
= zio_read(bp
, endian
, &tmpbuf
, 0, data
);
672 endian
= (zfs_to_cpu64(bp
->blk_prop
, endian
) >> 63) & 1;
677 if (bp_array
!= dn
->dn
.dn_blkptr
)
680 *endian_out
= endian
;
687 * mzap_lookup: Looks up property described by "name" and returns the value
691 mzap_lookup(mzap_phys_t
*zapobj
, zfs_endian_t endian
,
692 int objsize
, char *name
, uint64_t * value
)
695 mzap_ent_phys_t
*mzap_ent
= zapobj
->mz_chunk
;
697 chunks
= objsize
/ MZAP_ENT_LEN
- 1;
698 for (i
= 0; i
< chunks
; i
++) {
699 if (strcmp(mzap_ent
[i
].mze_name
, name
) == 0) {
700 *value
= zfs_to_cpu64(mzap_ent
[i
].mze_value
, endian
);
705 printf("couldn't find '%s'\n", name
);
706 return ZFS_ERR_FILE_NOT_FOUND
;
710 mzap_iterate(mzap_phys_t
*zapobj
, zfs_endian_t endian
, int objsize
,
711 int (*hook
)(const char *name
,
713 struct zfs_data
*data
),
714 struct zfs_data
*data
)
717 mzap_ent_phys_t
*mzap_ent
= zapobj
->mz_chunk
;
719 chunks
= objsize
/ MZAP_ENT_LEN
- 1;
720 for (i
= 0; i
< chunks
; i
++) {
721 if (hook(mzap_ent
[i
].mze_name
,
722 zfs_to_cpu64(mzap_ent
[i
].mze_value
, endian
),
731 zap_hash(uint64_t salt
, const char *name
)
733 static uint64_t table
[256];
738 if (table
[128] == 0) {
741 for (i
= 0; i
< 256; i
++) {
742 for (ct
= table
+ i
, *ct
= i
, j
= 8; j
> 0; j
--)
743 *ct
= (*ct
>> 1) ^ (-(*ct
& 1) & ZFS_CRC64_POLY
);
747 for (cp
= (const uint8_t *) name
; (c
= *cp
) != '\0'; cp
++)
748 crc
= (crc
>> 8) ^ table
[(crc
^ c
) & 0xFF];
751 * Only use 28 bits, since we need 4 bits in the cookie for the
752 * collision differentiator. We MUST use the high bits, since
753 * those are the onces that we first pay attention to when
754 * chosing the bucket.
756 crc
&= ~((1ULL << (64 - ZAP_HASHBITS
)) - 1);
762 * Only to be used on 8-bit arrays.
763 * array_len is actual len in bytes (not encoded le_value_length).
764 * buf is null-terminated.
768 zap_leaf_array_equal(zap_leaf_phys_t
*l
, zfs_endian_t endian
,
769 int blksft
, int chunk
, int array_len
, const char *buf
)
773 while (bseen
< array_len
) {
774 struct zap_leaf_array
*la
= &ZAP_LEAF_CHUNK(l
, blksft
, chunk
).l_array
;
775 int toread
= min(array_len
- bseen
, ZAP_LEAF_ARRAY_BYTES
);
777 if (chunk
>= ZAP_LEAF_NUMCHUNKS(blksft
))
780 if (memcmp(la
->la_array
, buf
+ bseen
, toread
) != 0)
782 chunk
= zfs_to_cpu16(la
->la_next
, endian
);
785 return (bseen
== array_len
);
790 zap_leaf_array_get(zap_leaf_phys_t
*l
, zfs_endian_t endian
, int blksft
,
791 int chunk
, int array_len
, char *buf
)
795 while (bseen
< array_len
) {
796 struct zap_leaf_array
*la
= &ZAP_LEAF_CHUNK(l
, blksft
, chunk
).l_array
;
797 int toread
= min(array_len
- bseen
, ZAP_LEAF_ARRAY_BYTES
);
799 if (chunk
>= ZAP_LEAF_NUMCHUNKS(blksft
))
800 /* Don't use errno because this error is to be ignored. */
801 return ZFS_ERR_BAD_FS
;
803 memcpy(buf
+ bseen
, la
->la_array
, toread
);
804 chunk
= zfs_to_cpu16(la
->la_next
, endian
);
812 * Given a zap_leaf_phys_t, walk thru the zap leaf chunks to get the
813 * value for the property "name".
818 zap_leaf_lookup(zap_leaf_phys_t
*l
, zfs_endian_t endian
,
819 int blksft
, uint64_t h
,
820 const char *name
, uint64_t *value
)
823 struct zap_leaf_entry
*le
;
825 /* Verify if this is a valid leaf block */
826 if (zfs_to_cpu64(l
->l_hdr
.lh_block_type
, endian
) != ZBT_LEAF
) {
827 printf("invalid leaf type\n");
828 return ZFS_ERR_BAD_FS
;
830 if (zfs_to_cpu32(l
->l_hdr
.lh_magic
, endian
) != ZAP_LEAF_MAGIC
) {
831 printf("invalid leaf magic\n");
832 return ZFS_ERR_BAD_FS
;
835 for (chunk
= zfs_to_cpu16(l
->l_hash
[LEAF_HASH(blksft
, h
)], endian
);
836 chunk
!= CHAIN_END
; chunk
= le
->le_next
) {
838 if (chunk
>= ZAP_LEAF_NUMCHUNKS(blksft
)) {
839 printf("invalid chunk number\n");
840 return ZFS_ERR_BAD_FS
;
843 le
= ZAP_LEAF_ENTRY(l
, blksft
, chunk
);
845 /* Verify the chunk entry */
846 if (le
->le_type
!= ZAP_CHUNK_ENTRY
) {
847 printf("invalid chunk entry\n");
848 return ZFS_ERR_BAD_FS
;
851 if (zfs_to_cpu64(le
->le_hash
, endian
) != h
)
854 if (zap_leaf_array_equal(l
, endian
, blksft
,
855 zfs_to_cpu16(le
->le_name_chunk
, endian
),
856 zfs_to_cpu16(le
->le_name_length
, endian
),
858 struct zap_leaf_array
*la
;
860 if (le
->le_int_size
!= 8 || le
->le_value_length
!= 1) {
861 printf("invalid leaf chunk entry\n");
862 return ZFS_ERR_BAD_FS
;
864 /* get the uint64_t property value */
865 la
= &ZAP_LEAF_CHUNK(l
, blksft
, le
->le_value_chunk
).l_array
;
867 *value
= be64_to_cpu(la
->la_array64
);
873 printf("couldn't find '%s'\n", name
);
874 return ZFS_ERR_FILE_NOT_FOUND
;
878 /* Verify if this is a fat zap header block */
880 zap_verify(zap_phys_t
*zap
)
882 if (zap
->zap_magic
!= (uint64_t) ZAP_MAGIC
) {
883 printf("bad ZAP magic\n");
884 return ZFS_ERR_BAD_FS
;
887 if (zap
->zap_flags
!= 0) {
888 printf("bad ZAP flags\n");
889 return ZFS_ERR_BAD_FS
;
892 if (zap
->zap_salt
== 0) {
893 printf("bad ZAP salt\n");
894 return ZFS_ERR_BAD_FS
;
906 fzap_lookup(dnode_end_t
*zap_dnode
, zap_phys_t
*zap
,
907 char *name
, uint64_t *value
, struct zfs_data
*data
)
910 uint64_t hash
, idx
, blkid
;
911 int blksft
= zfs_log2(zfs_to_cpu16(zap_dnode
->dn
.dn_datablkszsec
,
912 zap_dnode
->endian
) << DNODE_SHIFT
);
914 zfs_endian_t leafendian
;
916 err
= zap_verify(zap
);
920 hash
= zap_hash(zap
->zap_salt
, name
);
922 /* get block id from index */
923 if (zap
->zap_ptrtbl
.zt_numblks
!= 0) {
924 printf("external pointer tables not supported\n");
925 return ZFS_ERR_NOT_IMPLEMENTED_YET
;
927 idx
= ZAP_HASH_IDX(hash
, zap
->zap_ptrtbl
.zt_shift
);
928 blkid
= ((uint64_t *) zap
)[idx
+ (1 << (blksft
- 3 - 1))];
930 /* Get the leaf block */
931 if ((1U << blksft
) < sizeof(zap_leaf_phys_t
)) {
932 printf("ZAP leaf is too small\n");
933 return ZFS_ERR_BAD_FS
;
935 err
= dmu_read(zap_dnode
, blkid
, &l
, &leafendian
, data
);
939 err
= zap_leaf_lookup(l
, leafendian
, blksft
, hash
, name
, value
);
946 fzap_iterate(dnode_end_t
*zap_dnode
, zap_phys_t
*zap
,
947 int (*hook
)(const char *name
,
949 struct zfs_data
*data
),
950 struct zfs_data
*data
)
956 int blksft
= zfs_log2(zfs_to_cpu16(zap_dnode
->dn
.dn_datablkszsec
,
957 zap_dnode
->endian
) << DNODE_SHIFT
);
964 /* get block id from index */
965 if (zap
->zap_ptrtbl
.zt_numblks
!= 0) {
966 printf("external pointer tables not supported\n");
969 /* Get the leaf block */
970 if ((1U << blksft
) < sizeof(zap_leaf_phys_t
)) {
971 printf("ZAP leaf is too small\n");
974 for (idx
= 0; idx
< zap
->zap_ptrtbl
.zt_numblks
; idx
++) {
975 blkid
= ((uint64_t *) zap
)[idx
+ (1 << (blksft
- 3 - 1))];
977 err
= dmu_read(zap_dnode
, blkid
, &l_in
, &endian
, data
);
982 /* Verify if this is a valid leaf block */
983 if (zfs_to_cpu64(l
->l_hdr
.lh_block_type
, endian
) != ZBT_LEAF
) {
987 if (zfs_to_cpu32(l
->l_hdr
.lh_magic
, endian
) != ZAP_LEAF_MAGIC
) {
992 for (chunk
= 0; chunk
< ZAP_LEAF_NUMCHUNKS(blksft
); chunk
++) {
994 struct zap_leaf_array
*la
;
995 struct zap_leaf_entry
*le
;
997 le
= ZAP_LEAF_ENTRY(l
, blksft
, chunk
);
999 /* Verify the chunk entry */
1000 if (le
->le_type
!= ZAP_CHUNK_ENTRY
)
1003 buf
= malloc(zfs_to_cpu16(le
->le_name_length
, endian
)
1005 if (zap_leaf_array_get(l
, endian
, blksft
, le
->le_name_chunk
,
1006 le
->le_name_length
, buf
)) {
1010 buf
[le
->le_name_length
] = 0;
1012 if (le
->le_int_size
!= 8
1013 || zfs_to_cpu16(le
->le_value_length
, endian
) != 1)
1016 /* get the uint64_t property value */
1017 la
= &ZAP_LEAF_CHUNK(l
, blksft
, le
->le_value_chunk
).l_array
;
1018 val
= be64_to_cpu(la
->la_array64
);
1019 if (hook(buf
, val
, data
))
1029 * Read in the data of a zap object and find the value for a matching
1034 zap_lookup(dnode_end_t
*zap_dnode
, char *name
, uint64_t *val
,
1035 struct zfs_data
*data
)
1037 uint64_t block_type
;
1041 zfs_endian_t endian
;
1043 /* Read in the first block of the zap object data. */
1044 size
= zfs_to_cpu16(zap_dnode
->dn
.dn_datablkszsec
,
1045 zap_dnode
->endian
) << SPA_MINBLOCKSHIFT
;
1046 err
= dmu_read(zap_dnode
, 0, &zapbuf
, &endian
, data
);
1049 block_type
= zfs_to_cpu64(*((uint64_t *) zapbuf
), endian
);
1051 if (block_type
== ZBT_MICRO
) {
1052 err
= (mzap_lookup(zapbuf
, endian
, size
, name
, val
));
1055 } else if (block_type
== ZBT_HEADER
) {
1056 /* this is a fat zap */
1057 err
= (fzap_lookup(zap_dnode
, zapbuf
, name
, val
, data
));
1062 printf("unknown ZAP type\n");
1064 return ZFS_ERR_BAD_FS
;
1068 zap_iterate(dnode_end_t
*zap_dnode
,
1069 int (*hook
)(const char *name
, uint64_t val
,
1070 struct zfs_data
*data
),
1071 struct zfs_data
*data
)
1073 uint64_t block_type
;
1078 zfs_endian_t endian
;
1080 /* Read in the first block of the zap object data. */
1081 size
= zfs_to_cpu16(zap_dnode
->dn
.dn_datablkszsec
, zap_dnode
->endian
) << SPA_MINBLOCKSHIFT
;
1082 err
= dmu_read(zap_dnode
, 0, &zapbuf
, &endian
, data
);
1085 block_type
= zfs_to_cpu64(*((uint64_t *) zapbuf
), endian
);
1087 if (block_type
== ZBT_MICRO
) {
1088 ret
= mzap_iterate(zapbuf
, endian
, size
, hook
, data
);
1091 } else if (block_type
== ZBT_HEADER
) {
1092 /* this is a fat zap */
1093 ret
= fzap_iterate(zap_dnode
, zapbuf
, hook
, data
);
1097 printf("unknown ZAP type\n");
1104 * Get the dnode of an object number from the metadnode of an object set.
1107 * mdn - metadnode to get the object dnode
1108 * objnum - object number for the object dnode
1109 * buf - data buffer that holds the returning dnode
1112 dnode_get(dnode_end_t
*mdn
, uint64_t objnum
, uint8_t type
,
1113 dnode_end_t
*buf
, struct zfs_data
*data
)
1115 uint64_t blkid
, blksz
; /* the block id this object dnode is in */
1116 int epbs
; /* shift of number of dnodes in a block */
1117 int idx
; /* index within a block */
1120 zfs_endian_t endian
;
1122 blksz
= zfs_to_cpu16(mdn
->dn
.dn_datablkszsec
,
1123 mdn
->endian
) << SPA_MINBLOCKSHIFT
;
1125 epbs
= zfs_log2(blksz
) - DNODE_SHIFT
;
1126 blkid
= objnum
>> epbs
;
1127 idx
= objnum
& ((1 << epbs
) - 1);
1129 if (data
->dnode_buf
!= NULL
&& memcmp(data
->dnode_mdn
, mdn
,
1131 && objnum
>= data
->dnode_start
&& objnum
< data
->dnode_end
) {
1132 memmove(&(buf
->dn
), &(data
->dnode_buf
)[idx
], DNODE_SIZE
);
1133 buf
->endian
= data
->dnode_endian
;
1134 if (type
&& buf
->dn
.dn_type
!= type
) {
1135 printf("incorrect dnode type: %02X != %02x\n", buf
->dn
.dn_type
, type
);
1136 return ZFS_ERR_BAD_FS
;
1138 return ZFS_ERR_NONE
;
1141 err
= dmu_read(mdn
, blkid
, &dnbuf
, &endian
, data
);
1145 free(data
->dnode_buf
);
1146 free(data
->dnode_mdn
);
1147 data
->dnode_mdn
= malloc(sizeof(*mdn
));
1148 if (!data
->dnode_mdn
) {
1149 data
->dnode_buf
= 0;
1151 memcpy(data
->dnode_mdn
, mdn
, sizeof(*mdn
));
1152 data
->dnode_buf
= dnbuf
;
1153 data
->dnode_start
= blkid
<< epbs
;
1154 data
->dnode_end
= (blkid
+ 1) << epbs
;
1155 data
->dnode_endian
= endian
;
1158 memmove(&(buf
->dn
), (dnode_phys_t
*) dnbuf
+ idx
, DNODE_SIZE
);
1159 buf
->endian
= endian
;
1160 if (type
&& buf
->dn
.dn_type
!= type
) {
1161 printf("incorrect dnode type\n");
1162 return ZFS_ERR_BAD_FS
;
1165 return ZFS_ERR_NONE
;
1169 * Get the file dnode for a given file name where mdn is the meta dnode
1170 * for this ZFS object set. When found, place the file dnode in dn.
1171 * The 'path' argument will be mangled.
1175 dnode_get_path(dnode_end_t
*mdn
, const char *path_in
, dnode_end_t
*dn
,
1176 struct zfs_data
*data
)
1178 uint64_t objnum
, version
;
1180 int err
= ZFS_ERR_NONE
;
1181 char *path
, *path_buf
;
1182 struct dnode_chain
{
1183 struct dnode_chain
*next
;
1186 struct dnode_chain
*dnode_path
= 0, *dn_new
, *root
;
1188 dn_new
= malloc(sizeof(*dn_new
));
1190 return ZFS_ERR_OUT_OF_MEMORY
;
1192 dnode_path
= root
= dn_new
;
1194 err
= dnode_get(mdn
, MASTER_NODE_OBJ
, DMU_OT_MASTER_NODE
,
1195 &(dnode_path
->dn
), data
);
1201 err
= zap_lookup(&(dnode_path
->dn
), ZPL_VERSION_STR
, &version
, data
);
1206 if (version
> ZPL_VERSION
) {
1208 printf("too new ZPL version\n");
1209 return ZFS_ERR_NOT_IMPLEMENTED_YET
;
1212 err
= zap_lookup(&(dnode_path
->dn
), ZFS_ROOT_OBJ
, &objnum
, data
);
1218 err
= dnode_get(mdn
, objnum
, 0, &(dnode_path
->dn
), data
);
1224 path
= path_buf
= strdup(path_in
);
1227 return ZFS_ERR_OUT_OF_MEMORY
;
1231 /* skip leading slashes */
1232 while (*path
== '/')
1236 /* get the next component name */
1238 while (*path
&& *path
!= '/')
1241 if (cname
+ 1 == path
&& cname
[0] == '.')
1243 /* Handle double dot. */
1244 if (cname
+ 2 == path
&& cname
[0] == '.' && cname
[1] == '.') {
1246 dn_new
= dnode_path
;
1247 dnode_path
= dn_new
->next
;
1250 printf("can't resolve ..\n");
1251 err
= ZFS_ERR_FILE_NOT_FOUND
;
1258 *path
= 0; /* ensure null termination */
1260 if (dnode_path
->dn
.dn
.dn_type
!= DMU_OT_DIRECTORY_CONTENTS
) {
1262 printf("not a directory\n");
1263 return ZFS_ERR_BAD_FILE_TYPE
;
1265 err
= zap_lookup(&(dnode_path
->dn
), cname
, &objnum
, data
);
1269 dn_new
= malloc(sizeof(*dn_new
));
1271 err
= ZFS_ERR_OUT_OF_MEMORY
;
1274 dn_new
->next
= dnode_path
;
1275 dnode_path
= dn_new
;
1277 objnum
= ZFS_DIRENT_OBJ(objnum
);
1278 err
= dnode_get(mdn
, objnum
, 0, &(dnode_path
->dn
), data
);
1286 memcpy(dn
, &(dnode_path
->dn
), sizeof(*dn
));
1288 while (dnode_path
) {
1289 dn_new
= dnode_path
->next
;
1291 dnode_path
= dn_new
;
1299 * Given a MOS metadnode, get the metadnode of a given filesystem name (fsname),
1300 * e.g. pool/rootfs, or a given object number (obj), e.g. the object number
1303 * If no fsname and no obj are given, return the DSL_DIR metadnode.
1304 * If fsname is given, return its metadnode and its matching object number.
1305 * If only obj is given, return the metadnode for this object number.
1309 get_filesystem_dnode(dnode_end_t
*mosmdn
, char *fsname
,
1310 dnode_end_t
*mdn
, struct zfs_data
*data
)
1315 err
= dnode_get(mosmdn
, DMU_POOL_DIRECTORY_OBJECT
,
1316 DMU_OT_OBJECT_DIRECTORY
, mdn
, data
);
1320 err
= zap_lookup(mdn
, DMU_POOL_ROOT_DATASET
, &objnum
, data
);
1324 err
= dnode_get(mosmdn
, objnum
, DMU_OT_DSL_DIR
, mdn
, data
);
1332 while (*fsname
== '/')
1335 if (!*fsname
|| *fsname
== '@')
1339 while (*fsname
&& !isspace(*fsname
) && *fsname
!= '/')
1344 childobj
= zfs_to_cpu64((((dsl_dir_phys_t
*) DN_BONUS(&mdn
->dn
)))->dd_child_dir_zapobj
, mdn
->endian
);
1345 err
= dnode_get(mosmdn
, childobj
,
1346 DMU_OT_DSL_DIR_CHILD_MAP
, mdn
, data
);
1350 err
= zap_lookup(mdn
, cname
, &objnum
, data
);
1354 err
= dnode_get(mosmdn
, objnum
, DMU_OT_DSL_DIR
, mdn
, data
);
1360 return ZFS_ERR_NONE
;
1364 make_mdn(dnode_end_t
*mdn
, struct zfs_data
*data
)
1371 bp
= &(((dsl_dataset_phys_t
*) DN_BONUS(&mdn
->dn
))->ds_bp
);
1372 err
= zio_read(bp
, mdn
->endian
, &osp
, &ospsize
, data
);
1375 if (ospsize
< OBJSET_PHYS_SIZE_V14
) {
1377 printf("too small osp\n");
1378 return ZFS_ERR_BAD_FS
;
1381 mdn
->endian
= (zfs_to_cpu64(bp
->blk_prop
, mdn
->endian
)>>63) & 1;
1382 memmove((char *) &(mdn
->dn
),
1383 (char *) &((objset_phys_t
*) osp
)->os_meta_dnode
, DNODE_SIZE
);
1385 return ZFS_ERR_NONE
;
1389 dnode_get_fullpath(const char *fullpath
, dnode_end_t
*mdn
,
1390 uint64_t *mdnobj
, dnode_end_t
*dn
, int *isfs
,
1391 struct zfs_data
*data
)
1393 char *fsname
, *snapname
;
1394 const char *ptr_at
, *filename
;
1398 ptr_at
= strchr(fullpath
, '@');
1403 fsname
= strdup(fullpath
);
1405 const char *ptr_slash
= strchr(ptr_at
, '/');
1408 fsname
= malloc(ptr_at
- fullpath
+ 1);
1410 return ZFS_ERR_OUT_OF_MEMORY
;
1411 memcpy(fsname
, fullpath
, ptr_at
- fullpath
);
1412 fsname
[ptr_at
- fullpath
] = 0;
1413 if (ptr_at
[1] && ptr_at
[1] != '/') {
1414 snapname
= malloc(ptr_slash
- ptr_at
);
1417 return ZFS_ERR_OUT_OF_MEMORY
;
1419 memcpy(snapname
, ptr_at
+ 1, ptr_slash
- ptr_at
- 1);
1420 snapname
[ptr_slash
- ptr_at
- 1] = 0;
1425 filename
= ptr_slash
;
1428 printf("zfs fsname = '%s' snapname='%s' filename = '%s'\n",
1429 fsname
, snapname
, filename
);
1433 err
= get_filesystem_dnode(&(data
->mos
), fsname
, dn
, data
);
1441 headobj
= zfs_to_cpu64(((dsl_dir_phys_t
*) DN_BONUS(&dn
->dn
))->dd_head_dataset_obj
, dn
->endian
);
1443 err
= dnode_get(&(data
->mos
), headobj
, DMU_OT_DSL_DATASET
, mdn
, data
);
1453 snapobj
= zfs_to_cpu64(((dsl_dataset_phys_t
*) DN_BONUS(&mdn
->dn
))->ds_snapnames_zapobj
, mdn
->endian
);
1455 err
= dnode_get(&(data
->mos
), snapobj
,
1456 DMU_OT_DSL_DS_SNAP_MAP
, mdn
, data
);
1458 err
= zap_lookup(mdn
, snapname
, &headobj
, data
);
1460 err
= dnode_get(&(data
->mos
), headobj
, DMU_OT_DSL_DATASET
, mdn
, data
);
1471 make_mdn(mdn
, data
);
1476 return ZFS_ERR_NONE
;
1478 err
= dnode_get_path(mdn
, filename
, dn
, data
);
1485 * For a given XDR packed nvlist, verify the first 4 bytes and move on.
1487 * An XDR packed nvlist is encoded as (comments from nvs_xdr_create) :
1489 * encoding method/host endian (4 bytes)
1490 * nvl_version (4 bytes)
1491 * nvl_nvflag (4 bytes)
1493 * encoded size of the nvpair (4 bytes)
1494 * decoded size of the nvpair (4 bytes)
1495 * name string size (4 bytes)
1496 * name string data (sizeof(NV_ALIGN4(string))
1497 * data type (4 bytes)
1498 * # of elements in the nvpair (4 bytes)
1500 * 2 zero's for the last nvpair
1501 * (end of the entire list) (8 bytes)
1506 nvlist_find_value(char *nvlist
, char *name
, int valtype
, char **val
,
1507 size_t *size_out
, size_t *nelm_out
)
1509 int name_len
, type
, encode_size
;
1510 char *nvpair
, *nvp_name
;
1512 /* Verify if the 1st and 2nd byte in the nvlist are valid. */
1513 /* NOTE: independently of what endianness header announces all
1514 subsequent values are big-endian. */
1515 if (nvlist
[0] != NV_ENCODE_XDR
|| (nvlist
[1] != NV_LITTLE_ENDIAN
1516 && nvlist
[1] != NV_BIG_ENDIAN
)) {
1517 printf("zfs incorrect nvlist header\n");
1518 return ZFS_ERR_BAD_FS
;
1521 /* skip the header, nvl_version, and nvl_nvflag */
1522 nvlist
= nvlist
+ 4 * 3;
1524 * Loop thru the nvpair list
1525 * The XDR representation of an integer is in big-endian byte order.
1527 while ((encode_size
= be32_to_cpu(*(uint32_t *) nvlist
))) {
1530 nvpair
= nvlist
+ 4 * 2; /* skip the encode/decode size */
1532 name_len
= be32_to_cpu(*(uint32_t *) nvpair
);
1536 nvpair
= nvpair
+ ((name_len
+ 3) & ~3); /* align */
1538 type
= be32_to_cpu(*(uint32_t *) nvpair
);
1541 nelm
= be32_to_cpu(*(uint32_t *) nvpair
);
1543 printf("empty nvpair\n");
1544 return ZFS_ERR_BAD_FS
;
1549 if ((strncmp(nvp_name
, name
, name_len
) == 0) && type
== valtype
) {
1551 *size_out
= encode_size
;
1557 nvlist
+= encode_size
; /* goto the next nvpair */
1563 zfs_nvlist_lookup_uint64(char *nvlist
, char *name
, uint64_t *out
)
1569 found
= nvlist_find_value(nvlist
, name
, DATA_TYPE_UINT64
, &nvpair
, &size
, 0);
1572 if (size
< sizeof(uint64_t)) {
1573 printf("invalid uint64\n");
1574 return ZFS_ERR_BAD_FS
;
1577 *out
= be64_to_cpu(*(uint64_t *) nvpair
);
1582 zfs_nvlist_lookup_string(char *nvlist
, char *name
)
1590 found
= nvlist_find_value(nvlist
, name
, DATA_TYPE_STRING
, &nvpair
, &size
, 0);
1594 printf("invalid string\n");
1597 slen
= be32_to_cpu(*(uint32_t *) nvpair
);
1598 if (slen
> size
- 4)
1600 ret
= malloc(slen
+ 1);
1603 memcpy(ret
, nvpair
+ 4, slen
);
1609 zfs_nvlist_lookup_nvlist(char *nvlist
, char *name
)
1616 found
= nvlist_find_value(nvlist
, name
, DATA_TYPE_NVLIST
, &nvpair
,
1620 ret
= calloc(1, size
+ 3 * sizeof(uint32_t));
1623 memcpy(ret
, nvlist
, sizeof(uint32_t));
1625 memcpy(ret
+ sizeof(uint32_t), nvpair
, size
);
1630 zfs_nvlist_lookup_nvlist_array_get_nelm(char *nvlist
, char *name
)
1636 found
= nvlist_find_value(nvlist
, name
, DATA_TYPE_NVLIST
, &nvpair
,
1644 zfs_nvlist_lookup_nvlist_array(char *nvlist
, char *name
,
1647 char *nvpair
, *nvpairptr
;
1654 found
= nvlist_find_value(nvlist
, name
, DATA_TYPE_NVLIST
, &nvpair
,
1658 if (index
>= nelm
) {
1659 printf("trying to lookup past nvlist array\n");
1665 for (i
= 0; i
< index
; i
++) {
1666 uint32_t encode_size
;
1668 /* skip the header, nvl_version, and nvl_nvflag */
1669 nvpairptr
= nvpairptr
+ 4 * 2;
1671 while (nvpairptr
< nvpair
+ size
1672 && (encode_size
= be32_to_cpu(*(uint32_t *) nvpairptr
)))
1673 nvlist
+= encode_size
; /* goto the next nvpair */
1675 nvlist
= nvlist
+ 4 * 2; /* skip the ending 2 zeros - 8 bytes */
1678 if (nvpairptr
>= nvpair
+ size
1679 || nvpairptr
+ be32_to_cpu(*(uint32_t *) (nvpairptr
+ 4 * 2))
1681 printf("incorrect nvlist array\n");
1685 ret
= calloc(1, be32_to_cpu(*(uint32_t *) (nvpairptr
+ 4 * 2))
1686 + 3 * sizeof(uint32_t));
1689 memcpy(ret
, nvlist
, sizeof(uint32_t));
1691 memcpy(ret
+ sizeof(uint32_t), nvpairptr
, size
);
1696 int_zfs_fetch_nvlist(struct zfs_data
*data
, char **nvlist
)
1700 *nvlist
= malloc(VDEV_PHYS_SIZE
);
1701 /* Read in the vdev name-value pair list (112K). */
1702 err
= zfs_devread(data
->vdev_phys_sector
, 0, VDEV_PHYS_SIZE
, *nvlist
);
1708 return ZFS_ERR_NONE
;
1712 * Check the disk label information and retrieve needed vdev name-value pairs.
1716 check_pool_label(struct zfs_data
*data
)
1718 uint64_t pool_state
;
1719 char *nvlist
; /* for the pool */
1720 char *vdevnvlist
; /* for the vdev */
1726 err
= int_zfs_fetch_nvlist(data
, &nvlist
);
1730 found
= zfs_nvlist_lookup_uint64(nvlist
, ZPOOL_CONFIG_POOL_STATE
,
1734 printf("zfs pool state not found\n");
1735 return ZFS_ERR_BAD_FS
;
1738 if (pool_state
== POOL_STATE_DESTROYED
) {
1740 printf("zpool is marked as destroyed\n");
1741 return ZFS_ERR_BAD_FS
;
1744 data
->label_txg
= 0;
1745 found
= zfs_nvlist_lookup_uint64(nvlist
, ZPOOL_CONFIG_POOL_TXG
,
1749 printf("zfs pool txg not found\n");
1750 return ZFS_ERR_BAD_FS
;
1753 /* not an active device */
1754 if (data
->label_txg
== 0) {
1756 printf("zpool is not active\n");
1757 return ZFS_ERR_BAD_FS
;
1760 found
= zfs_nvlist_lookup_uint64(nvlist
, ZPOOL_CONFIG_VERSION
,
1764 printf("zpool config version not found\n");
1765 return ZFS_ERR_BAD_FS
;
1768 if (version
> SPA_VERSION
) {
1770 printf("SPA version too new %llu > %llu\n",
1771 (unsigned long long) version
,
1772 (unsigned long long) SPA_VERSION
);
1773 return ZFS_ERR_NOT_IMPLEMENTED_YET
;
1776 vdevnvlist
= zfs_nvlist_lookup_nvlist(nvlist
, ZPOOL_CONFIG_VDEV_TREE
);
1779 printf("ZFS config vdev tree not found\n");
1780 return ZFS_ERR_BAD_FS
;
1783 found
= zfs_nvlist_lookup_uint64(vdevnvlist
, ZPOOL_CONFIG_ASHIFT
,
1784 &data
->vdev_ashift
);
1788 printf("ZPOOL config ashift not found\n");
1789 return ZFS_ERR_BAD_FS
;
1792 found
= zfs_nvlist_lookup_uint64(nvlist
, ZPOOL_CONFIG_GUID
, &diskguid
);
1795 printf("ZPOOL config guid not found\n");
1796 return ZFS_ERR_BAD_FS
;
1799 found
= zfs_nvlist_lookup_uint64(nvlist
, ZPOOL_CONFIG_POOL_GUID
, &data
->pool_guid
);
1802 printf("ZPOOL config pool guid not found\n");
1803 return ZFS_ERR_BAD_FS
;
1808 printf("ZFS Pool GUID: %llu (%016llx) Label: GUID: %llu (%016llx), txg: %llu, SPA v%llu, ashift: %llu\n",
1809 (unsigned long long) data
->pool_guid
,
1810 (unsigned long long) data
->pool_guid
,
1811 (unsigned long long) diskguid
,
1812 (unsigned long long) diskguid
,
1813 (unsigned long long) data
->label_txg
,
1814 (unsigned long long) version
,
1815 (unsigned long long) data
->vdev_ashift
);
1817 return ZFS_ERR_NONE
;
1821 * vdev_label_start returns the physical disk offset (in bytes) of
1824 static uint64_t vdev_label_start(uint64_t psize
, int l
)
1826 return (l
* sizeof(vdev_label_t
) + (l
< VDEV_LABELS
/ 2 ?
1828 VDEV_LABELS
* sizeof(vdev_label_t
)));
1832 zfs_unmount(struct zfs_data
*data
)
1834 free(data
->dnode_buf
);
1835 free(data
->dnode_mdn
);
1836 free(data
->file_buf
);
1841 * zfs_mount() locates a valid uberblock of the root pool and read in its MOS
1842 * to the memory address MOS.
1846 zfs_mount(device_t dev
)
1848 struct zfs_data
*data
= 0;
1849 int label
= 0, bestlabel
= -1;
1851 uberblock_t
*ubbest
;
1852 uberblock_t
*ubcur
= NULL
;
1857 data
= malloc(sizeof(*data
));
1860 memset(data
, 0, sizeof(*data
));
1862 ub_array
= malloc(VDEV_UBERBLOCK_RING
);
1868 ubbest
= malloc(sizeof(*ubbest
));
1874 memset(ubbest
, 0, sizeof(*ubbest
));
1877 * some eltorito stacks don't give us a size and
1878 * we end up setting the size to MAXUINT, further
1879 * some of these devices stop working once a single
1880 * read past the end has been issued. Checking
1881 * for a maximum part_length and skipping the backup
1882 * labels at the end of the slice/partition/device
1883 * avoids breaking down on such devices.
1886 dev
->part_length
== 0 ?
1887 VDEV_LABELS
/ 2 : VDEV_LABELS
;
1889 /* Size in bytes of the device (disk or partition) aligned to label size*/
1890 uint64_t device_size
=
1891 dev
->part_length
<< SECTOR_BITS
;
1893 const uint64_t alignedbytes
=
1894 P2ALIGN(device_size
, (uint64_t) sizeof(vdev_label_t
));
1896 for (label
= 0; label
< vdevnum
; label
++) {
1897 uint64_t labelstartbytes
= vdev_label_start(alignedbytes
, label
);
1898 uint64_t labelstart
= labelstartbytes
>> SECTOR_BITS
;
1900 debug("zfs reading label %d at sector %llu (byte %llu)\n",
1901 label
, (unsigned long long) labelstart
,
1902 (unsigned long long) labelstartbytes
);
1904 data
->vdev_phys_sector
= labelstart
+
1905 ((VDEV_SKIP_SIZE
+ VDEV_BOOT_HEADER_SIZE
) >> SECTOR_BITS
);
1907 err
= check_pool_label(data
);
1909 printf("zfs error checking label %d\n", label
);
1913 /* Read in the uberblock ring (128K). */
1914 err
= zfs_devread(data
->vdev_phys_sector
+
1915 (VDEV_PHYS_SIZE
>> SECTOR_BITS
),
1916 0, VDEV_UBERBLOCK_RING
, ub_array
);
1918 printf("zfs error reading uberblock ring for label %d\n", label
);
1922 ubcur
= find_bestub(ub_array
, data
);
1924 printf("zfs No good uberblocks found in label %d\n", label
);
1928 if (vdev_uberblock_compare(ubcur
, ubbest
) > 0) {
1929 /* Looks like the block is good, so use it.*/
1930 memcpy(ubbest
, ubcur
, sizeof(*ubbest
));
1932 debug("zfs Current best uberblock found in label %d\n", label
);
1937 /* We zero'd the structure to begin with. If we never assigned to it,
1938 magic will still be zero. */
1939 if (!ubbest
->ub_magic
) {
1940 printf("couldn't find a valid ZFS label\n");
1946 debug("zfs ubbest %p in label %d\n", ubbest
, bestlabel
);
1948 zfs_endian_t ub_endian
=
1949 zfs_to_cpu64(ubbest
->ub_magic
, LITTLE_ENDIAN
) == UBERBLOCK_MAGIC
1950 ? LITTLE_ENDIAN
: BIG_ENDIAN
;
1952 debug("zfs endian set to %s\n", !ub_endian
? "big" : "little");
1954 err
= zio_read(&ubbest
->ub_rootbp
, ub_endian
, &osp
, &ospsize
, data
);
1957 printf("couldn't zio_read object directory\n");
1964 if (ospsize
< OBJSET_PHYS_SIZE_V14
) {
1965 printf("osp too small\n");
1972 /* Got the MOS. Save it at the memory addr MOS. */
1973 memmove(&(data
->mos
.dn
), &((objset_phys_t
*) osp
)->os_meta_dnode
, DNODE_SIZE
);
1975 (zfs_to_cpu64(ubbest
->ub_rootbp
.blk_prop
, ub_endian
) >> 63) & 1;
1976 memmove(&(data
->current_uberblock
), ubbest
, sizeof(uberblock_t
));
1985 zfs_fetch_nvlist(device_t dev
, char **nvlist
)
1987 struct zfs_data
*zfs
;
1990 zfs
= zfs_mount(dev
);
1992 return ZFS_ERR_BAD_FS
;
1993 err
= int_zfs_fetch_nvlist(zfs
, nvlist
);
1999 * zfs_open() locates a file in the rootpool by following the
2000 * MOS and places the dnode of the file in the memory address DNODE.
2003 zfs_open(struct zfs_file
*file
, const char *fsfilename
)
2005 struct zfs_data
*data
;
2009 data
= zfs_mount(file
->device
);
2011 return ZFS_ERR_BAD_FS
;
2013 err
= dnode_get_fullpath(fsfilename
, &(data
->mdn
), 0,
2014 &(data
->dnode
), &isfs
, data
);
2022 printf("Missing @ or / separator\n");
2023 return ZFS_ERR_FILE_NOT_FOUND
;
2026 /* We found the dnode for this file. Verify if it is a plain file. */
2027 if (data
->dnode
.dn
.dn_type
!= DMU_OT_PLAIN_FILE_CONTENTS
) {
2029 printf("not a file\n");
2030 return ZFS_ERR_BAD_FILE_TYPE
;
2033 /* get the file size and set the file position to 0 */
2036 * For DMU_OT_SA we will need to locate the SIZE attribute
2037 * attribute, which could be either in the bonus buffer
2038 * or the "spill" block.
2040 if (data
->dnode
.dn
.dn_bonustype
== DMU_OT_SA
) {
2044 if (data
->dnode
.dn
.dn_bonuslen
!= 0) {
2045 sahdrp
= (sa_hdr_phys_t
*) DN_BONUS(&data
->dnode
.dn
);
2046 } else if (data
->dnode
.dn
.dn_flags
& DNODE_FLAG_SPILL_BLKPTR
) {
2047 blkptr_t
*bp
= &data
->dnode
.dn
.dn_spill
;
2049 err
= zio_read(bp
, data
->dnode
.endian
, &sahdrp
, NULL
, data
);
2053 printf("filesystem is corrupt :(\n");
2054 return ZFS_ERR_BAD_FS
;
2057 hdrsize
= SA_HDR_SIZE(((sa_hdr_phys_t
*) sahdrp
));
2058 file
->size
= *(uint64_t *) ((char *) sahdrp
+ hdrsize
+ SA_SIZE_OFFSET
);
2059 if ((data
->dnode
.dn
.dn_bonuslen
== 0) &&
2060 (data
->dnode
.dn
.dn_flags
& DNODE_FLAG_SPILL_BLKPTR
))
2063 file
->size
= zfs_to_cpu64(((znode_phys_t
*) DN_BONUS(&data
->dnode
.dn
))->zp_size
, data
->dnode
.endian
);
2069 return ZFS_ERR_NONE
;
2073 zfs_read(zfs_file_t file
, char *buf
, uint64_t len
)
2075 struct zfs_data
*data
= (struct zfs_data
*) file
->data
;
2076 int blksz
, movesize
;
2081 if (data
->file_buf
== NULL
) {
2082 data
->file_buf
= malloc(SPA_MAXBLOCKSIZE
);
2083 if (!data
->file_buf
)
2085 data
->file_start
= data
->file_end
= 0;
2089 * If offset is in memory, move it into the buffer provided and return.
2091 if (file
->offset
>= data
->file_start
2092 && file
->offset
+ len
<= data
->file_end
) {
2093 memmove(buf
, data
->file_buf
+ file
->offset
- data
->file_start
,
2098 blksz
= zfs_to_cpu16(data
->dnode
.dn
.dn_datablkszsec
,
2099 data
->dnode
.endian
) << SPA_MINBLOCKSHIFT
;
2102 * Entire Dnode is too big to fit into the space available. We
2103 * will need to read it in chunks. This could be optimized to
2104 * read in as large a chunk as there is space available, but for
2105 * now, this only reads in one data block at a time.
2112 * Find requested blkid and the offset within that block.
2114 uint64_t blkid
= file
->offset
+ red
;
2115 blkid
= do_div(blkid
, blksz
);
2116 free(data
->file_buf
);
2119 err
= dmu_read(&(data
->dnode
), blkid
, &t
,
2125 data
->file_start
= blkid
* blksz
;
2126 data
->file_end
= data
->file_start
+ blksz
;
2128 movesize
= min(length
, data
->file_end
- (int)file
->offset
- red
);
2130 memmove(buf
, data
->file_buf
+ file
->offset
+ red
2131 - data
->file_start
, movesize
);
2141 zfs_close(zfs_file_t file
)
2143 zfs_unmount((struct zfs_data
*) file
->data
);
2144 return ZFS_ERR_NONE
;
2148 zfs_getmdnobj(device_t dev
, const char *fsfilename
,
2151 struct zfs_data
*data
;
2155 data
= zfs_mount(dev
);
2157 return ZFS_ERR_BAD_FS
;
2159 err
= dnode_get_fullpath(fsfilename
, &(data
->mdn
), mdnobj
,
2160 &(data
->dnode
), &isfs
, data
);
2166 fill_fs_info(struct zfs_dirhook_info
*info
,
2167 dnode_end_t mdn
, struct zfs_data
*data
)
2174 memset(info
, 0, sizeof(*info
));
2178 if (mdn
.dn
.dn_type
== DMU_OT_DSL_DIR
) {
2179 headobj
= zfs_to_cpu64(((dsl_dir_phys_t
*) DN_BONUS(&mdn
.dn
))->dd_head_dataset_obj
, mdn
.endian
);
2181 err
= dnode_get(&(data
->mos
), headobj
, DMU_OT_DSL_DATASET
, &mdn
, data
);
2183 printf("zfs failed here 1\n");
2187 make_mdn(&mdn
, data
);
2188 err
= dnode_get(&mdn
, MASTER_NODE_OBJ
, DMU_OT_MASTER_NODE
,
2191 printf("zfs failed here 2\n");
2195 err
= zap_lookup(&dn
, ZFS_ROOT_OBJ
, &objnum
, data
);
2197 printf("zfs failed here 3\n");
2201 err
= dnode_get(&mdn
, objnum
, 0, &dn
, data
);
2203 printf("zfs failed here 4\n");
2208 info
->mtime
= zfs_to_cpu64(((znode_phys_t
*) DN_BONUS(&dn
.dn
))->zp_mtime
[0], dn
.endian
);
2213 static int iterate_zap(const char *name
, uint64_t val
, struct zfs_data
*data
)
2215 struct zfs_dirhook_info info
;
2218 memset(&info
, 0, sizeof(info
));
2220 dnode_get(&(data
->mdn
), val
, 0, &dn
, data
);
2222 info
.mtime
= zfs_to_cpu64(((znode_phys_t
*) DN_BONUS(&dn
.dn
))->zp_mtime
[0], dn
.endian
);
2223 info
.dir
= (dn
.dn
.dn_type
== DMU_OT_DIRECTORY_CONTENTS
);
2224 debug("zfs type=%d, name=%s\n",
2225 (int)dn
.dn
.dn_type
, (char *)name
);
2226 if (!data
->userhook
)
2228 return data
->userhook(name
, &info
);
2231 static int iterate_zap_fs(const char *name
, uint64_t val
, struct zfs_data
*data
)
2233 struct zfs_dirhook_info info
;
2236 err
= dnode_get(&(data
->mos
), val
, 0, &mdn
, data
);
2239 if (mdn
.dn
.dn_type
!= DMU_OT_DSL_DIR
)
2242 fill_fs_info(&info
, mdn
, data
);
2244 if (!data
->userhook
)
2246 return data
->userhook(name
, &info
);
2249 static int iterate_zap_snap(const char *name
, uint64_t val
, struct zfs_data
*data
)
2251 struct zfs_dirhook_info info
;
2257 err
= dnode_get(&(data
->mos
), val
, 0, &mdn
, data
);
2261 if (mdn
.dn
.dn_type
!= DMU_OT_DSL_DATASET
)
2264 fill_fs_info(&info
, mdn
, data
);
2266 name2
= malloc(strlen(name
) + 2);
2268 memcpy(name2
+ 1, name
, strlen(name
) + 1);
2270 ret
= data
->userhook(name2
, &info
);
2276 zfs_ls(device_t device
, const char *path
,
2277 int (*hook
)(const char *, const struct zfs_dirhook_info
*))
2279 struct zfs_data
*data
;
2283 data
= zfs_mount(device
);
2285 return ZFS_ERR_BAD_FS
;
2287 data
->userhook
= hook
;
2289 err
= dnode_get_fullpath(path
, &(data
->mdn
), 0, &(data
->dnode
), &isfs
, data
);
2295 uint64_t childobj
, headobj
;
2298 struct zfs_dirhook_info info
;
2300 fill_fs_info(&info
, data
->dnode
, data
);
2303 childobj
= zfs_to_cpu64(((dsl_dir_phys_t
*) DN_BONUS(&data
->dnode
.dn
))->dd_child_dir_zapobj
, data
->dnode
.endian
);
2304 headobj
= zfs_to_cpu64(((dsl_dir_phys_t
*) DN_BONUS(&data
->dnode
.dn
))->dd_head_dataset_obj
, data
->dnode
.endian
);
2305 err
= dnode_get(&(data
->mos
), childobj
,
2306 DMU_OT_DSL_DIR_CHILD_MAP
, &dn
, data
);
2313 zap_iterate(&dn
, iterate_zap_fs
, data
);
2315 err
= dnode_get(&(data
->mos
), headobj
, DMU_OT_DSL_DATASET
, &dn
, data
);
2321 snapobj
= zfs_to_cpu64(((dsl_dataset_phys_t
*) DN_BONUS(&dn
.dn
))->ds_snapnames_zapobj
, dn
.endian
);
2323 err
= dnode_get(&(data
->mos
), snapobj
,
2324 DMU_OT_DSL_DS_SNAP_MAP
, &dn
, data
);
2330 zap_iterate(&dn
, iterate_zap_snap
, data
);
2332 if (data
->dnode
.dn
.dn_type
!= DMU_OT_DIRECTORY_CONTENTS
) {
2334 printf("not a directory\n");
2335 return ZFS_ERR_BAD_FILE_TYPE
;
2337 zap_iterate(&(data
->dnode
), iterate_zap
, data
);
2340 return ZFS_ERR_NONE
;