3 * ZFS filesystem ported to u-boot by
4 * Jorgen Lundman <lundman at lundman.net>
6 * GRUB -- GRand Unified Bootloader
7 * Copyright (C) 1999,2000,2001,2002,2003,2004
8 * Free Software Foundation, Inc.
9 * Copyright 2004 Sun Microsystems, Inc.
11 * GRUB is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * GRUB is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with GRUB. If not, see <http://www.gnu.org/licenses/>.
28 #include <linux/stat.h>
29 #include <linux/time.h>
30 #include <linux/ctype.h>
31 #include <asm/byteorder.h>
32 #include "zfs_common.h"
34 block_dev_desc_t
*zfs_dev_desc
;
37 * The zfs plug-in routines for GRUB are:
39 * zfs_mount() - locates a valid uberblock of the root pool and reads
40 * in its MOS at the memory address MOS.
42 * zfs_open() - locates a plain file object by following the MOS
43 * and places its dnode at the memory address DNODE.
45 * zfs_read() - read in the data blocks pointed by the DNODE.
51 #include <zfs/dnode.h>
52 #include <zfs/uberblock_impl.h>
53 #include <zfs/vdev_impl.h>
54 #include <zfs/zio_checksum.h>
55 #include <zfs/zap_impl.h>
56 #include <zfs/zap_leaf.h>
57 #include <zfs/zfs_znode.h>
59 #include <zfs/dmu_objset.h>
60 #include <zfs/sa_impl.h>
61 #include <zfs/dsl_dir.h>
62 #include <zfs/dsl_dataset.h>
65 #define ZPOOL_PROP_BOOTFS "bootfs"
69 * For nvlist manipulation. (from nvpair.h)
71 #define NV_ENCODE_NATIVE 0
72 #define NV_ENCODE_XDR 1
73 #define NV_BIG_ENDIAN 0
74 #define NV_LITTLE_ENDIAN 1
75 #define DATA_TYPE_UINT64 8
76 #define DATA_TYPE_STRING 9
77 #define DATA_TYPE_NVLIST 19
78 #define DATA_TYPE_NVLIST_ARRAY 20
82 * Macros to get fields in a bp or DVA.
84 #define P2PHASE(x, align) ((x) & ((align) - 1))
85 #define DVA_OFFSET_TO_PHYS_SECTOR(offset) \
86 ((offset + VDEV_LABEL_START_SIZE) >> SPA_MINBLOCKSHIFT)
89 * return x rounded down to an align boundary
90 * eg, P2ALIGN(1200, 1024) == 1024 (1*align)
91 * eg, P2ALIGN(1024, 1024) == 1024 (1*align)
92 * eg, P2ALIGN(0x1234, 0x100) == 0x1200 (0x12*align)
93 * eg, P2ALIGN(0x5600, 0x100) == 0x5600 (0x56*align)
95 #define P2ALIGN(x, align) ((x) & -(align))
98 * FAT ZAP data structures
100 #define ZFS_CRC64_POLY 0xC96C5795D7870F42ULL /* ECMA-182, reflected form */
101 #define ZAP_HASH_IDX(hash, n) (((n) == 0) ? 0 : ((hash) >> (64 - (n))))
102 #define CHAIN_END 0xffff /* end of the chunk chain */
105 * The amount of space within the chunk available for the array is:
106 * chunk size - space for type (1) - space for next pointer (2)
108 #define ZAP_LEAF_ARRAY_BYTES (ZAP_LEAF_CHUNKSIZE - 3)
110 #define ZAP_LEAF_HASH_SHIFT(bs) (bs - 5)
111 #define ZAP_LEAF_HASH_NUMENTRIES(bs) (1 << ZAP_LEAF_HASH_SHIFT(bs))
112 #define LEAF_HASH(bs, h) \
113 ((ZAP_LEAF_HASH_NUMENTRIES(bs)-1) & \
114 ((h) >> (64 - ZAP_LEAF_HASH_SHIFT(bs)-l->l_hdr.lh_prefix_len)))
117 * The amount of space available for chunks is:
118 * block size shift - hash entry size (2) * number of hash
119 * entries - header space (2*chunksize)
121 #define ZAP_LEAF_NUMCHUNKS(bs) \
122 (((1<<bs) - 2*ZAP_LEAF_HASH_NUMENTRIES(bs)) / \
123 ZAP_LEAF_CHUNKSIZE - 2)
126 * The chunks start immediately after the hash table. The end of the
127 * hash table is at l_hash + HASH_NUMENTRIES, which we simply cast to a
130 #define ZAP_LEAF_CHUNK(l, bs, idx) \
131 ((zap_leaf_chunk_t *)(l->l_hash + ZAP_LEAF_HASH_NUMENTRIES(bs)))[idx]
132 #define ZAP_LEAF_ENTRY(l, bs, idx) (&ZAP_LEAF_CHUNK(l, bs, idx).l_entry)
136 * Decompression Entry - lzjb
144 typedef int zfs_decomp_func_t(void *s_start
, void *d_start
,
145 uint32_t s_len
, uint32_t d_len
);
146 typedef struct decomp_entry
{
148 zfs_decomp_func_t
*decomp_func
;
151 typedef struct dnode_end
{
157 /* cache for a file block of the currently zfs_open()-ed file */
162 /* XXX: ashift is per vdev, not per pool. We currently only ever touch
163 * a single vdev, but when/if raid-z or stripes are supported, this
166 uint64_t vdev_ashift
;
170 /* cache for a dnode block */
171 dnode_phys_t
*dnode_buf
;
172 dnode_phys_t
*dnode_mdn
;
173 uint64_t dnode_start
;
175 zfs_endian_t dnode_endian
;
177 uberblock_t current_uberblock
;
183 uint64_t vdev_phys_sector
;
185 int (*userhook
)(const char *, const struct zfs_dirhook_info
*);
186 struct zfs_dirhook_info
*dirinfo
;
194 zlib_decompress(void *s
, void *d
,
195 uint32_t slen
, uint32_t dlen
)
197 if (zlib_decompress(s
, d
, slen
, dlen
) < 0)
198 return ZFS_ERR_BAD_FS
;
202 static decomp_entry_t decomp_table
[ZIO_COMPRESS_FUNCTIONS
] = {
203 {"inherit", NULL
}, /* ZIO_COMPRESS_INHERIT */
204 {"on", lzjb_decompress
}, /* ZIO_COMPRESS_ON */
205 {"off", NULL
}, /* ZIO_COMPRESS_OFF */
206 {"lzjb", lzjb_decompress
}, /* ZIO_COMPRESS_LZJB */
207 {"empty", NULL
}, /* ZIO_COMPRESS_EMPTY */
208 {"gzip-1", zlib_decompress
}, /* ZIO_COMPRESS_GZIP1 */
209 {"gzip-2", zlib_decompress
}, /* ZIO_COMPRESS_GZIP2 */
210 {"gzip-3", zlib_decompress
}, /* ZIO_COMPRESS_GZIP3 */
211 {"gzip-4", zlib_decompress
}, /* ZIO_COMPRESS_GZIP4 */
212 {"gzip-5", zlib_decompress
}, /* ZIO_COMPRESS_GZIP5 */
213 {"gzip-6", zlib_decompress
}, /* ZIO_COMPRESS_GZIP6 */
214 {"gzip-7", zlib_decompress
}, /* ZIO_COMPRESS_GZIP7 */
215 {"gzip-8", zlib_decompress
}, /* ZIO_COMPRESS_GZIP8 */
216 {"gzip-9", zlib_decompress
}, /* ZIO_COMPRESS_GZIP9 */
221 static int zio_read_data(blkptr_t
*bp
, zfs_endian_t endian
,
222 void *buf
, struct zfs_data
*data
);
225 zio_read(blkptr_t
*bp
, zfs_endian_t endian
, void **buf
,
226 size_t *size
, struct zfs_data
*data
);
229 * Our own version of log2(). Same thing as highbit()-1.
232 zfs_log2(uint64_t num
)
245 /* Checksum Functions */
247 zio_checksum_off(const void *buf
__attribute__ ((unused
)),
248 uint64_t size
__attribute__ ((unused
)),
249 zfs_endian_t endian
__attribute__ ((unused
)),
252 ZIO_SET_CHECKSUM(zcp
, 0, 0, 0, 0);
255 /* Checksum Table and Values */
256 static zio_checksum_info_t zio_checksum_table
[ZIO_CHECKSUM_FUNCTIONS
] = {
257 {NULL
, 0, 0, "inherit"},
259 {zio_checksum_off
, 0, 0, "off"},
260 {zio_checksum_SHA256
, 1, 1, "label"},
261 {zio_checksum_SHA256
, 1, 1, "gang_header"},
262 {NULL
, 0, 0, "zilog"},
263 {fletcher_2_endian
, 0, 0, "fletcher2"},
264 {fletcher_4_endian
, 1, 0, "fletcher4"},
265 {zio_checksum_SHA256
, 1, 0, "SHA256"},
266 {NULL
, 0, 0, "zilog2"},
270 * zio_checksum_verify: Provides support for checksum verification.
272 * Fletcher2, Fletcher4, and SHA256 are supported.
276 zio_checksum_verify(zio_cksum_t zc
, uint32_t checksum
,
277 zfs_endian_t endian
, char *buf
, int size
)
279 zio_eck_t
*zec
= (zio_eck_t
*) (buf
+ size
) - 1;
280 zio_checksum_info_t
*ci
= &zio_checksum_table
[checksum
];
281 zio_cksum_t actual_cksum
, expected_cksum
;
283 if (checksum
>= ZIO_CHECKSUM_FUNCTIONS
|| ci
->ci_func
== NULL
) {
284 printf("zfs unknown checksum function %d\n", checksum
);
285 return ZFS_ERR_NOT_IMPLEMENTED_YET
;
289 expected_cksum
= zec
->zec_cksum
;
291 ci
->ci_func(buf
, size
, endian
, &actual_cksum
);
292 zec
->zec_cksum
= expected_cksum
;
295 ci
->ci_func(buf
, size
, endian
, &actual_cksum
);
298 if ((actual_cksum
.zc_word
[0] != zc
.zc_word
[0])
299 || (actual_cksum
.zc_word
[1] != zc
.zc_word
[1])
300 || (actual_cksum
.zc_word
[2] != zc
.zc_word
[2])
301 || (actual_cksum
.zc_word
[3] != zc
.zc_word
[3])) {
302 return ZFS_ERR_BAD_FS
;
309 * vdev_uberblock_compare takes two uberblock structures and returns an integer
310 * indicating the more recent of the two.
311 * Return Value = 1 if ub2 is more recent
312 * Return Value = -1 if ub1 is more recent
313 * The most recent uberblock is determined using its transaction number and
314 * timestamp. The uberblock with the highest transaction number is
315 * considered "newer". If the transaction numbers of the two blocks match, the
316 * timestamps are compared to determine the "newer" of the two.
319 vdev_uberblock_compare(uberblock_t
*ub1
, uberblock_t
*ub2
)
321 zfs_endian_t ub1_endian
, ub2_endian
;
322 if (zfs_to_cpu64(ub1
->ub_magic
, LITTLE_ENDIAN
) == UBERBLOCK_MAGIC
)
323 ub1_endian
= LITTLE_ENDIAN
;
325 ub1_endian
= BIG_ENDIAN
;
326 if (zfs_to_cpu64(ub2
->ub_magic
, LITTLE_ENDIAN
) == UBERBLOCK_MAGIC
)
327 ub2_endian
= LITTLE_ENDIAN
;
329 ub2_endian
= BIG_ENDIAN
;
331 if (zfs_to_cpu64(ub1
->ub_txg
, ub1_endian
)
332 < zfs_to_cpu64(ub2
->ub_txg
, ub2_endian
))
334 if (zfs_to_cpu64(ub1
->ub_txg
, ub1_endian
)
335 > zfs_to_cpu64(ub2
->ub_txg
, ub2_endian
))
338 if (zfs_to_cpu64(ub1
->ub_timestamp
, ub1_endian
)
339 < zfs_to_cpu64(ub2
->ub_timestamp
, ub2_endian
))
341 if (zfs_to_cpu64(ub1
->ub_timestamp
, ub1_endian
)
342 > zfs_to_cpu64(ub2
->ub_timestamp
, ub2_endian
))
349 * Three pieces of information are needed to verify an uberblock: the magic
350 * number, the version number, and the checksum.
352 * Currently Implemented: version number, magic number, label txg
353 * Need to Implement: checksum
357 uberblock_verify(uberblock_t
*uber
, int offset
, struct zfs_data
*data
)
360 zfs_endian_t endian
= UNKNOWN_ENDIAN
;
363 if (uber
->ub_txg
< data
->label_txg
) {
364 debug("ignoring partially written label: uber_txg < label_txg %llu %llu\n",
365 uber
->ub_txg
, data
->label_txg
);
366 return ZFS_ERR_BAD_FS
;
369 if (zfs_to_cpu64(uber
->ub_magic
, LITTLE_ENDIAN
) == UBERBLOCK_MAGIC
370 && zfs_to_cpu64(uber
->ub_version
, LITTLE_ENDIAN
) > 0
371 && zfs_to_cpu64(uber
->ub_version
, LITTLE_ENDIAN
) <= SPA_VERSION
)
372 endian
= LITTLE_ENDIAN
;
374 if (zfs_to_cpu64(uber
->ub_magic
, BIG_ENDIAN
) == UBERBLOCK_MAGIC
375 && zfs_to_cpu64(uber
->ub_version
, BIG_ENDIAN
) > 0
376 && zfs_to_cpu64(uber
->ub_version
, BIG_ENDIAN
) <= SPA_VERSION
)
379 if (endian
== UNKNOWN_ENDIAN
) {
380 printf("invalid uberblock magic\n");
381 return ZFS_ERR_BAD_FS
;
384 memset(&zc
, 0, sizeof(zc
));
385 zc
.zc_word
[0] = cpu_to_zfs64(offset
, endian
);
386 err
= zio_checksum_verify(zc
, ZIO_CHECKSUM_LABEL
, endian
,
387 (char *) uber
, UBERBLOCK_SIZE(data
->vdev_ashift
));
390 /* Check that the data pointed by the rootbp is usable. */
393 err
= zio_read(&uber
->ub_rootbp
, endian
, &osp
, &ospsize
, data
);
396 if (!err
&& ospsize
< OBJSET_PHYS_SIZE_V14
) {
397 printf("uberblock rootbp points to invalid data\n");
398 return ZFS_ERR_BAD_FS
;
406 * Find the best uberblock.
408 * Success - Pointer to the best uberblock.
411 static uberblock_t
*find_bestub(char *ub_array
, struct zfs_data
*data
)
413 const uint64_t sector
= data
->vdev_phys_sector
;
414 uberblock_t
*ubbest
= NULL
;
416 unsigned int i
, offset
, pickedub
= 0;
417 int err
= ZFS_ERR_NONE
;
419 const unsigned int UBCOUNT
= UBERBLOCK_COUNT(data
->vdev_ashift
);
420 const uint64_t UBBYTES
= UBERBLOCK_SIZE(data
->vdev_ashift
);
422 for (i
= 0; i
< UBCOUNT
; i
++) {
423 ubnext
= (uberblock_t
*) (i
* UBBYTES
+ ub_array
);
424 offset
= (sector
<< SPA_MINBLOCKSHIFT
) + VDEV_PHYS_SIZE
+ (i
* UBBYTES
);
426 err
= uberblock_verify(ubnext
, offset
, data
);
430 if (ubbest
== NULL
|| vdev_uberblock_compare(ubnext
, ubbest
) > 0) {
437 debug("zfs Found best uberblock at idx %d, txg %llu\n",
438 pickedub
, (unsigned long long) ubbest
->ub_txg
);
444 get_psize(blkptr_t
*bp
, zfs_endian_t endian
)
446 return (((zfs_to_cpu64((bp
)->blk_prop
, endian
) >> 16) & 0xffff) + 1)
447 << SPA_MINBLOCKSHIFT
;
451 dva_get_offset(dva_t
*dva
, zfs_endian_t endian
)
453 return zfs_to_cpu64((dva
)->dva_word
[1],
454 endian
) << SPA_MINBLOCKSHIFT
;
458 * Read a block of data based on the gang block address dva,
459 * and put its data in buf.
463 zio_read_gang(blkptr_t
*bp
, zfs_endian_t endian
, dva_t
*dva
, void *buf
,
464 struct zfs_data
*data
)
466 zio_gbh_phys_t
*zio_gb
;
467 uint64_t offset
, sector
;
472 memset(&zc
, 0, sizeof(zc
));
474 zio_gb
= malloc(SPA_GANGBLOCKSIZE
);
476 return ZFS_ERR_OUT_OF_MEMORY
;
478 offset
= dva_get_offset(dva
, endian
);
479 sector
= DVA_OFFSET_TO_PHYS_SECTOR(offset
);
481 /* read in the gang block header */
482 err
= zfs_devread(sector
, 0, SPA_GANGBLOCKSIZE
, (char *) zio_gb
);
490 /* self checksuming the gang block header */
491 ZIO_SET_CHECKSUM(&zc
, DVA_GET_VDEV(dva
),
492 dva_get_offset(dva
, endian
), bp
->blk_birth
, 0);
493 err
= zio_checksum_verify(zc
, ZIO_CHECKSUM_GANG_HEADER
, endian
,
494 (char *) zio_gb
, SPA_GANGBLOCKSIZE
);
500 endian
= (zfs_to_cpu64(bp
->blk_prop
, endian
) >> 63) & 1;
502 for (i
= 0; i
< SPA_GBH_NBLKPTRS
; i
++) {
503 if (zio_gb
->zg_blkptr
[i
].blk_birth
== 0)
506 err
= zio_read_data(&zio_gb
->zg_blkptr
[i
], endian
, buf
, data
);
511 buf
= (char *) buf
+ get_psize(&zio_gb
->zg_blkptr
[i
], endian
);
518 * Read in a block of raw data to buf.
521 zio_read_data(blkptr_t
*bp
, zfs_endian_t endian
, void *buf
,
522 struct zfs_data
*data
)
525 int err
= ZFS_ERR_NONE
;
527 psize
= get_psize(bp
, endian
);
529 /* pick a good dva from the block pointer */
530 for (i
= 0; i
< SPA_DVAS_PER_BP
; i
++) {
531 uint64_t offset
, sector
;
533 if (bp
->blk_dva
[i
].dva_word
[0] == 0 && bp
->blk_dva
[i
].dva_word
[1] == 0)
536 if ((zfs_to_cpu64(bp
->blk_dva
[i
].dva_word
[1], endian
)>>63) & 1) {
537 err
= zio_read_gang(bp
, endian
, &bp
->blk_dva
[i
], buf
, data
);
539 /* read in a data block */
540 offset
= dva_get_offset(&bp
->blk_dva
[i
], endian
);
541 sector
= DVA_OFFSET_TO_PHYS_SECTOR(offset
);
543 err
= zfs_devread(sector
, 0, psize
, buf
);
547 /*Check the underlying checksum before we rule this DVA as "good"*/
548 uint32_t checkalgo
= (zfs_to_cpu64((bp
)->blk_prop
, endian
) >> 40) & 0xff;
550 err
= zio_checksum_verify(bp
->blk_cksum
, checkalgo
, endian
, buf
, psize
);
555 /* If read failed or checksum bad, reset the error. Hopefully we've got some more DVA's to try.*/
559 printf("couldn't find a valid DVA\n");
560 err
= ZFS_ERR_BAD_FS
;
567 * Read in a block of data, verify its checksum, decompress if needed,
568 * and put the uncompressed data in buf.
571 zio_read(blkptr_t
*bp
, zfs_endian_t endian
, void **buf
,
572 size_t *size
, struct zfs_data
*data
)
576 char *compbuf
= NULL
;
581 comp
= (zfs_to_cpu64((bp
)->blk_prop
, endian
)>>32) & 0xff;
582 lsize
= (BP_IS_HOLE(bp
) ? 0 :
583 (((zfs_to_cpu64((bp
)->blk_prop
, endian
) & 0xffff) + 1)
584 << SPA_MINBLOCKSHIFT
));
585 psize
= get_psize(bp
, endian
);
590 if (comp
>= ZIO_COMPRESS_FUNCTIONS
) {
591 printf("compression algorithm %u not supported\n", (unsigned int) comp
);
592 return ZFS_ERR_NOT_IMPLEMENTED_YET
;
595 if (comp
!= ZIO_COMPRESS_OFF
&& decomp_table
[comp
].decomp_func
== NULL
) {
596 printf("compression algorithm %s not supported\n", decomp_table
[comp
].name
);
597 return ZFS_ERR_NOT_IMPLEMENTED_YET
;
600 if (comp
!= ZIO_COMPRESS_OFF
) {
601 compbuf
= malloc(psize
);
603 return ZFS_ERR_OUT_OF_MEMORY
;
605 compbuf
= *buf
= malloc(lsize
);
608 err
= zio_read_data(bp
, endian
, compbuf
, data
);
615 if (comp
!= ZIO_COMPRESS_OFF
) {
616 *buf
= malloc(lsize
);
619 return ZFS_ERR_OUT_OF_MEMORY
;
622 err
= decomp_table
[comp
].decomp_func(compbuf
, *buf
, psize
, lsize
);
635 * Get the block from a block id.
636 * push the block onto the stack.
640 dmu_read(dnode_end_t
*dn
, uint64_t blkid
, void **buf
,
641 zfs_endian_t
*endian_out
, struct zfs_data
*data
)
644 blkptr_t
*bp_array
= dn
->dn
.dn_blkptr
;
645 int epbs
= dn
->dn
.dn_indblkshift
- SPA_BLKPTRSHIFT
;
649 int err
= ZFS_ERR_NONE
;
651 bp
= malloc(sizeof(blkptr_t
));
653 return ZFS_ERR_OUT_OF_MEMORY
;
656 for (level
= dn
->dn
.dn_nlevels
- 1; level
>= 0; level
--) {
657 idx
= (blkid
>> (epbs
* level
)) & ((1 << epbs
) - 1);
659 if (bp_array
!= dn
->dn
.dn_blkptr
) {
664 if (BP_IS_HOLE(bp
)) {
665 size_t size
= zfs_to_cpu16(dn
->dn
.dn_datablkszsec
,
667 << SPA_MINBLOCKSHIFT
;
670 err
= ZFS_ERR_OUT_OF_MEMORY
;
673 memset(*buf
, 0, size
);
674 endian
= (zfs_to_cpu64(bp
->blk_prop
, endian
) >> 63) & 1;
678 err
= zio_read(bp
, endian
, buf
, 0, data
);
679 endian
= (zfs_to_cpu64(bp
->blk_prop
, endian
) >> 63) & 1;
682 err
= zio_read(bp
, endian
, &tmpbuf
, 0, data
);
683 endian
= (zfs_to_cpu64(bp
->blk_prop
, endian
) >> 63) & 1;
688 if (bp_array
!= dn
->dn
.dn_blkptr
)
691 *endian_out
= endian
;
698 * mzap_lookup: Looks up property described by "name" and returns the value
702 mzap_lookup(mzap_phys_t
*zapobj
, zfs_endian_t endian
,
703 int objsize
, char *name
, uint64_t * value
)
706 mzap_ent_phys_t
*mzap_ent
= zapobj
->mz_chunk
;
708 chunks
= objsize
/ MZAP_ENT_LEN
- 1;
709 for (i
= 0; i
< chunks
; i
++) {
710 if (strcmp(mzap_ent
[i
].mze_name
, name
) == 0) {
711 *value
= zfs_to_cpu64(mzap_ent
[i
].mze_value
, endian
);
716 printf("couldn't find '%s'\n", name
);
717 return ZFS_ERR_FILE_NOT_FOUND
;
721 mzap_iterate(mzap_phys_t
*zapobj
, zfs_endian_t endian
, int objsize
,
722 int (*hook
)(const char *name
,
724 struct zfs_data
*data
),
725 struct zfs_data
*data
)
728 mzap_ent_phys_t
*mzap_ent
= zapobj
->mz_chunk
;
730 chunks
= objsize
/ MZAP_ENT_LEN
- 1;
731 for (i
= 0; i
< chunks
; i
++) {
732 if (hook(mzap_ent
[i
].mze_name
,
733 zfs_to_cpu64(mzap_ent
[i
].mze_value
, endian
),
742 zap_hash(uint64_t salt
, const char *name
)
744 static uint64_t table
[256];
749 if (table
[128] == 0) {
752 for (i
= 0; i
< 256; i
++) {
753 for (ct
= table
+ i
, *ct
= i
, j
= 8; j
> 0; j
--)
754 *ct
= (*ct
>> 1) ^ (-(*ct
& 1) & ZFS_CRC64_POLY
);
758 for (cp
= (const uint8_t *) name
; (c
= *cp
) != '\0'; cp
++)
759 crc
= (crc
>> 8) ^ table
[(crc
^ c
) & 0xFF];
762 * Only use 28 bits, since we need 4 bits in the cookie for the
763 * collision differentiator. We MUST use the high bits, since
764 * those are the onces that we first pay attention to when
765 * chosing the bucket.
767 crc
&= ~((1ULL << (64 - ZAP_HASHBITS
)) - 1);
773 * Only to be used on 8-bit arrays.
774 * array_len is actual len in bytes (not encoded le_value_length).
775 * buf is null-terminated.
779 zap_leaf_array_equal(zap_leaf_phys_t
*l
, zfs_endian_t endian
,
780 int blksft
, int chunk
, int array_len
, const char *buf
)
784 while (bseen
< array_len
) {
785 struct zap_leaf_array
*la
= &ZAP_LEAF_CHUNK(l
, blksft
, chunk
).l_array
;
786 int toread
= MIN(array_len
- bseen
, ZAP_LEAF_ARRAY_BYTES
);
788 if (chunk
>= ZAP_LEAF_NUMCHUNKS(blksft
))
791 if (memcmp(la
->la_array
, buf
+ bseen
, toread
) != 0)
793 chunk
= zfs_to_cpu16(la
->la_next
, endian
);
796 return (bseen
== array_len
);
801 zap_leaf_array_get(zap_leaf_phys_t
*l
, zfs_endian_t endian
, int blksft
,
802 int chunk
, int array_len
, char *buf
)
806 while (bseen
< array_len
) {
807 struct zap_leaf_array
*la
= &ZAP_LEAF_CHUNK(l
, blksft
, chunk
).l_array
;
808 int toread
= MIN(array_len
- bseen
, ZAP_LEAF_ARRAY_BYTES
);
810 if (chunk
>= ZAP_LEAF_NUMCHUNKS(blksft
))
811 /* Don't use errno because this error is to be ignored. */
812 return ZFS_ERR_BAD_FS
;
814 memcpy(buf
+ bseen
, la
->la_array
, toread
);
815 chunk
= zfs_to_cpu16(la
->la_next
, endian
);
823 * Given a zap_leaf_phys_t, walk thru the zap leaf chunks to get the
824 * value for the property "name".
829 zap_leaf_lookup(zap_leaf_phys_t
*l
, zfs_endian_t endian
,
830 int blksft
, uint64_t h
,
831 const char *name
, uint64_t *value
)
834 struct zap_leaf_entry
*le
;
836 /* Verify if this is a valid leaf block */
837 if (zfs_to_cpu64(l
->l_hdr
.lh_block_type
, endian
) != ZBT_LEAF
) {
838 printf("invalid leaf type\n");
839 return ZFS_ERR_BAD_FS
;
841 if (zfs_to_cpu32(l
->l_hdr
.lh_magic
, endian
) != ZAP_LEAF_MAGIC
) {
842 printf("invalid leaf magic\n");
843 return ZFS_ERR_BAD_FS
;
846 for (chunk
= zfs_to_cpu16(l
->l_hash
[LEAF_HASH(blksft
, h
)], endian
);
847 chunk
!= CHAIN_END
; chunk
= le
->le_next
) {
849 if (chunk
>= ZAP_LEAF_NUMCHUNKS(blksft
)) {
850 printf("invalid chunk number\n");
851 return ZFS_ERR_BAD_FS
;
854 le
= ZAP_LEAF_ENTRY(l
, blksft
, chunk
);
856 /* Verify the chunk entry */
857 if (le
->le_type
!= ZAP_CHUNK_ENTRY
) {
858 printf("invalid chunk entry\n");
859 return ZFS_ERR_BAD_FS
;
862 if (zfs_to_cpu64(le
->le_hash
, endian
) != h
)
865 if (zap_leaf_array_equal(l
, endian
, blksft
,
866 zfs_to_cpu16(le
->le_name_chunk
, endian
),
867 zfs_to_cpu16(le
->le_name_length
, endian
),
869 struct zap_leaf_array
*la
;
871 if (le
->le_int_size
!= 8 || le
->le_value_length
!= 1) {
872 printf("invalid leaf chunk entry\n");
873 return ZFS_ERR_BAD_FS
;
875 /* get the uint64_t property value */
876 la
= &ZAP_LEAF_CHUNK(l
, blksft
, le
->le_value_chunk
).l_array
;
878 *value
= be64_to_cpu(la
->la_array64
);
884 printf("couldn't find '%s'\n", name
);
885 return ZFS_ERR_FILE_NOT_FOUND
;
889 /* Verify if this is a fat zap header block */
891 zap_verify(zap_phys_t
*zap
)
893 if (zap
->zap_magic
!= (uint64_t) ZAP_MAGIC
) {
894 printf("bad ZAP magic\n");
895 return ZFS_ERR_BAD_FS
;
898 if (zap
->zap_flags
!= 0) {
899 printf("bad ZAP flags\n");
900 return ZFS_ERR_BAD_FS
;
903 if (zap
->zap_salt
== 0) {
904 printf("bad ZAP salt\n");
905 return ZFS_ERR_BAD_FS
;
917 fzap_lookup(dnode_end_t
*zap_dnode
, zap_phys_t
*zap
,
918 char *name
, uint64_t *value
, struct zfs_data
*data
)
921 uint64_t hash
, idx
, blkid
;
922 int blksft
= zfs_log2(zfs_to_cpu16(zap_dnode
->dn
.dn_datablkszsec
,
923 zap_dnode
->endian
) << DNODE_SHIFT
);
925 zfs_endian_t leafendian
;
927 err
= zap_verify(zap
);
931 hash
= zap_hash(zap
->zap_salt
, name
);
933 /* get block id from index */
934 if (zap
->zap_ptrtbl
.zt_numblks
!= 0) {
935 printf("external pointer tables not supported\n");
936 return ZFS_ERR_NOT_IMPLEMENTED_YET
;
938 idx
= ZAP_HASH_IDX(hash
, zap
->zap_ptrtbl
.zt_shift
);
939 blkid
= ((uint64_t *) zap
)[idx
+ (1 << (blksft
- 3 - 1))];
941 /* Get the leaf block */
942 if ((1U << blksft
) < sizeof(zap_leaf_phys_t
)) {
943 printf("ZAP leaf is too small\n");
944 return ZFS_ERR_BAD_FS
;
946 err
= dmu_read(zap_dnode
, blkid
, &l
, &leafendian
, data
);
950 err
= zap_leaf_lookup(l
, leafendian
, blksft
, hash
, name
, value
);
957 fzap_iterate(dnode_end_t
*zap_dnode
, zap_phys_t
*zap
,
958 int (*hook
)(const char *name
,
960 struct zfs_data
*data
),
961 struct zfs_data
*data
)
967 int blksft
= zfs_log2(zfs_to_cpu16(zap_dnode
->dn
.dn_datablkszsec
,
968 zap_dnode
->endian
) << DNODE_SHIFT
);
975 /* get block id from index */
976 if (zap
->zap_ptrtbl
.zt_numblks
!= 0) {
977 printf("external pointer tables not supported\n");
980 /* Get the leaf block */
981 if ((1U << blksft
) < sizeof(zap_leaf_phys_t
)) {
982 printf("ZAP leaf is too small\n");
985 for (idx
= 0; idx
< zap
->zap_ptrtbl
.zt_numblks
; idx
++) {
986 blkid
= ((uint64_t *) zap
)[idx
+ (1 << (blksft
- 3 - 1))];
988 err
= dmu_read(zap_dnode
, blkid
, &l_in
, &endian
, data
);
993 /* Verify if this is a valid leaf block */
994 if (zfs_to_cpu64(l
->l_hdr
.lh_block_type
, endian
) != ZBT_LEAF
) {
998 if (zfs_to_cpu32(l
->l_hdr
.lh_magic
, endian
) != ZAP_LEAF_MAGIC
) {
1003 for (chunk
= 0; chunk
< ZAP_LEAF_NUMCHUNKS(blksft
); chunk
++) {
1005 struct zap_leaf_array
*la
;
1006 struct zap_leaf_entry
*le
;
1008 le
= ZAP_LEAF_ENTRY(l
, blksft
, chunk
);
1010 /* Verify the chunk entry */
1011 if (le
->le_type
!= ZAP_CHUNK_ENTRY
)
1014 buf
= malloc(zfs_to_cpu16(le
->le_name_length
, endian
)
1016 if (zap_leaf_array_get(l
, endian
, blksft
, le
->le_name_chunk
,
1017 le
->le_name_length
, buf
)) {
1021 buf
[le
->le_name_length
] = 0;
1023 if (le
->le_int_size
!= 8
1024 || zfs_to_cpu16(le
->le_value_length
, endian
) != 1)
1027 /* get the uint64_t property value */
1028 la
= &ZAP_LEAF_CHUNK(l
, blksft
, le
->le_value_chunk
).l_array
;
1029 val
= be64_to_cpu(la
->la_array64
);
1030 if (hook(buf
, val
, data
))
1040 * Read in the data of a zap object and find the value for a matching
1045 zap_lookup(dnode_end_t
*zap_dnode
, char *name
, uint64_t *val
,
1046 struct zfs_data
*data
)
1048 uint64_t block_type
;
1052 zfs_endian_t endian
;
1054 /* Read in the first block of the zap object data. */
1055 size
= zfs_to_cpu16(zap_dnode
->dn
.dn_datablkszsec
,
1056 zap_dnode
->endian
) << SPA_MINBLOCKSHIFT
;
1057 err
= dmu_read(zap_dnode
, 0, &zapbuf
, &endian
, data
);
1060 block_type
= zfs_to_cpu64(*((uint64_t *) zapbuf
), endian
);
1062 if (block_type
== ZBT_MICRO
) {
1063 err
= (mzap_lookup(zapbuf
, endian
, size
, name
, val
));
1066 } else if (block_type
== ZBT_HEADER
) {
1067 /* this is a fat zap */
1068 err
= (fzap_lookup(zap_dnode
, zapbuf
, name
, val
, data
));
1073 printf("unknown ZAP type\n");
1074 return ZFS_ERR_BAD_FS
;
1078 zap_iterate(dnode_end_t
*zap_dnode
,
1079 int (*hook
)(const char *name
, uint64_t val
,
1080 struct zfs_data
*data
),
1081 struct zfs_data
*data
)
1083 uint64_t block_type
;
1088 zfs_endian_t endian
;
1090 /* Read in the first block of the zap object data. */
1091 size
= zfs_to_cpu16(zap_dnode
->dn
.dn_datablkszsec
, zap_dnode
->endian
) << SPA_MINBLOCKSHIFT
;
1092 err
= dmu_read(zap_dnode
, 0, &zapbuf
, &endian
, data
);
1095 block_type
= zfs_to_cpu64(*((uint64_t *) zapbuf
), endian
);
1097 if (block_type
== ZBT_MICRO
) {
1098 ret
= mzap_iterate(zapbuf
, endian
, size
, hook
, data
);
1101 } else if (block_type
== ZBT_HEADER
) {
1102 /* this is a fat zap */
1103 ret
= fzap_iterate(zap_dnode
, zapbuf
, hook
, data
);
1107 printf("unknown ZAP type\n");
1113 * Get the dnode of an object number from the metadnode of an object set.
1116 * mdn - metadnode to get the object dnode
1117 * objnum - object number for the object dnode
1118 * buf - data buffer that holds the returning dnode
1121 dnode_get(dnode_end_t
*mdn
, uint64_t objnum
, uint8_t type
,
1122 dnode_end_t
*buf
, struct zfs_data
*data
)
1124 uint64_t blkid
, blksz
; /* the block id this object dnode is in */
1125 int epbs
; /* shift of number of dnodes in a block */
1126 int idx
; /* index within a block */
1129 zfs_endian_t endian
;
1131 blksz
= zfs_to_cpu16(mdn
->dn
.dn_datablkszsec
,
1132 mdn
->endian
) << SPA_MINBLOCKSHIFT
;
1134 epbs
= zfs_log2(blksz
) - DNODE_SHIFT
;
1135 blkid
= objnum
>> epbs
;
1136 idx
= objnum
& ((1 << epbs
) - 1);
1138 if (data
->dnode_buf
!= NULL
&& memcmp(data
->dnode_mdn
, mdn
,
1140 && objnum
>= data
->dnode_start
&& objnum
< data
->dnode_end
) {
1141 memmove(&(buf
->dn
), &(data
->dnode_buf
)[idx
], DNODE_SIZE
);
1142 buf
->endian
= data
->dnode_endian
;
1143 if (type
&& buf
->dn
.dn_type
!= type
) {
1144 printf("incorrect dnode type: %02X != %02x\n", buf
->dn
.dn_type
, type
);
1145 return ZFS_ERR_BAD_FS
;
1147 return ZFS_ERR_NONE
;
1150 err
= dmu_read(mdn
, blkid
, &dnbuf
, &endian
, data
);
1154 free(data
->dnode_buf
);
1155 free(data
->dnode_mdn
);
1156 data
->dnode_mdn
= malloc(sizeof(*mdn
));
1157 if (!data
->dnode_mdn
) {
1158 data
->dnode_buf
= 0;
1160 memcpy(data
->dnode_mdn
, mdn
, sizeof(*mdn
));
1161 data
->dnode_buf
= dnbuf
;
1162 data
->dnode_start
= blkid
<< epbs
;
1163 data
->dnode_end
= (blkid
+ 1) << epbs
;
1164 data
->dnode_endian
= endian
;
1167 memmove(&(buf
->dn
), (dnode_phys_t
*) dnbuf
+ idx
, DNODE_SIZE
);
1168 buf
->endian
= endian
;
1169 if (type
&& buf
->dn
.dn_type
!= type
) {
1170 printf("incorrect dnode type\n");
1171 return ZFS_ERR_BAD_FS
;
1174 return ZFS_ERR_NONE
;
1178 * Get the file dnode for a given file name where mdn is the meta dnode
1179 * for this ZFS object set. When found, place the file dnode in dn.
1180 * The 'path' argument will be mangled.
1184 dnode_get_path(dnode_end_t
*mdn
, const char *path_in
, dnode_end_t
*dn
,
1185 struct zfs_data
*data
)
1187 uint64_t objnum
, version
;
1189 int err
= ZFS_ERR_NONE
;
1190 char *path
, *path_buf
;
1191 struct dnode_chain
{
1192 struct dnode_chain
*next
;
1195 struct dnode_chain
*dnode_path
= 0, *dn_new
, *root
;
1197 dn_new
= malloc(sizeof(*dn_new
));
1199 return ZFS_ERR_OUT_OF_MEMORY
;
1201 dnode_path
= root
= dn_new
;
1203 err
= dnode_get(mdn
, MASTER_NODE_OBJ
, DMU_OT_MASTER_NODE
,
1204 &(dnode_path
->dn
), data
);
1210 err
= zap_lookup(&(dnode_path
->dn
), ZPL_VERSION_STR
, &version
, data
);
1215 if (version
> ZPL_VERSION
) {
1217 printf("too new ZPL version\n");
1218 return ZFS_ERR_NOT_IMPLEMENTED_YET
;
1221 err
= zap_lookup(&(dnode_path
->dn
), ZFS_ROOT_OBJ
, &objnum
, data
);
1227 err
= dnode_get(mdn
, objnum
, 0, &(dnode_path
->dn
), data
);
1233 path
= path_buf
= strdup(path_in
);
1236 return ZFS_ERR_OUT_OF_MEMORY
;
1240 /* skip leading slashes */
1241 while (*path
== '/')
1245 /* get the next component name */
1247 while (*path
&& *path
!= '/')
1250 if (cname
+ 1 == path
&& cname
[0] == '.')
1252 /* Handle double dot. */
1253 if (cname
+ 2 == path
&& cname
[0] == '.' && cname
[1] == '.') {
1255 dn_new
= dnode_path
;
1256 dnode_path
= dn_new
->next
;
1259 printf("can't resolve ..\n");
1260 err
= ZFS_ERR_FILE_NOT_FOUND
;
1267 *path
= 0; /* ensure null termination */
1269 if (dnode_path
->dn
.dn
.dn_type
!= DMU_OT_DIRECTORY_CONTENTS
) {
1271 printf("not a directory\n");
1272 return ZFS_ERR_BAD_FILE_TYPE
;
1274 err
= zap_lookup(&(dnode_path
->dn
), cname
, &objnum
, data
);
1278 dn_new
= malloc(sizeof(*dn_new
));
1280 err
= ZFS_ERR_OUT_OF_MEMORY
;
1283 dn_new
->next
= dnode_path
;
1284 dnode_path
= dn_new
;
1286 objnum
= ZFS_DIRENT_OBJ(objnum
);
1287 err
= dnode_get(mdn
, objnum
, 0, &(dnode_path
->dn
), data
);
1295 memcpy(dn
, &(dnode_path
->dn
), sizeof(*dn
));
1297 while (dnode_path
) {
1298 dn_new
= dnode_path
->next
;
1300 dnode_path
= dn_new
;
1308 * Given a MOS metadnode, get the metadnode of a given filesystem name (fsname),
1309 * e.g. pool/rootfs, or a given object number (obj), e.g. the object number
1312 * If no fsname and no obj are given, return the DSL_DIR metadnode.
1313 * If fsname is given, return its metadnode and its matching object number.
1314 * If only obj is given, return the metadnode for this object number.
1318 get_filesystem_dnode(dnode_end_t
*mosmdn
, char *fsname
,
1319 dnode_end_t
*mdn
, struct zfs_data
*data
)
1324 err
= dnode_get(mosmdn
, DMU_POOL_DIRECTORY_OBJECT
,
1325 DMU_OT_OBJECT_DIRECTORY
, mdn
, data
);
1329 err
= zap_lookup(mdn
, DMU_POOL_ROOT_DATASET
, &objnum
, data
);
1333 err
= dnode_get(mosmdn
, objnum
, DMU_OT_DSL_DIR
, mdn
, data
);
1341 while (*fsname
== '/')
1344 if (!*fsname
|| *fsname
== '@')
1348 while (*fsname
&& !isspace(*fsname
) && *fsname
!= '/')
1353 childobj
= zfs_to_cpu64((((dsl_dir_phys_t
*) DN_BONUS(&mdn
->dn
)))->dd_child_dir_zapobj
, mdn
->endian
);
1354 err
= dnode_get(mosmdn
, childobj
,
1355 DMU_OT_DSL_DIR_CHILD_MAP
, mdn
, data
);
1359 err
= zap_lookup(mdn
, cname
, &objnum
, data
);
1363 err
= dnode_get(mosmdn
, objnum
, DMU_OT_DSL_DIR
, mdn
, data
);
1369 return ZFS_ERR_NONE
;
1373 make_mdn(dnode_end_t
*mdn
, struct zfs_data
*data
)
1380 bp
= &(((dsl_dataset_phys_t
*) DN_BONUS(&mdn
->dn
))->ds_bp
);
1381 err
= zio_read(bp
, mdn
->endian
, &osp
, &ospsize
, data
);
1384 if (ospsize
< OBJSET_PHYS_SIZE_V14
) {
1386 printf("too small osp\n");
1387 return ZFS_ERR_BAD_FS
;
1390 mdn
->endian
= (zfs_to_cpu64(bp
->blk_prop
, mdn
->endian
)>>63) & 1;
1391 memmove((char *) &(mdn
->dn
),
1392 (char *) &((objset_phys_t
*) osp
)->os_meta_dnode
, DNODE_SIZE
);
1394 return ZFS_ERR_NONE
;
1398 dnode_get_fullpath(const char *fullpath
, dnode_end_t
*mdn
,
1399 uint64_t *mdnobj
, dnode_end_t
*dn
, int *isfs
,
1400 struct zfs_data
*data
)
1402 char *fsname
, *snapname
;
1403 const char *ptr_at
, *filename
;
1407 ptr_at
= strchr(fullpath
, '@');
1412 fsname
= strdup(fullpath
);
1414 const char *ptr_slash
= strchr(ptr_at
, '/');
1417 fsname
= malloc(ptr_at
- fullpath
+ 1);
1419 return ZFS_ERR_OUT_OF_MEMORY
;
1420 memcpy(fsname
, fullpath
, ptr_at
- fullpath
);
1421 fsname
[ptr_at
- fullpath
] = 0;
1422 if (ptr_at
[1] && ptr_at
[1] != '/') {
1423 snapname
= malloc(ptr_slash
- ptr_at
);
1426 return ZFS_ERR_OUT_OF_MEMORY
;
1428 memcpy(snapname
, ptr_at
+ 1, ptr_slash
- ptr_at
- 1);
1429 snapname
[ptr_slash
- ptr_at
- 1] = 0;
1434 filename
= ptr_slash
;
1437 printf("zfs fsname = '%s' snapname='%s' filename = '%s'\n",
1438 fsname
, snapname
, filename
);
1442 err
= get_filesystem_dnode(&(data
->mos
), fsname
, dn
, data
);
1450 headobj
= zfs_to_cpu64(((dsl_dir_phys_t
*) DN_BONUS(&dn
->dn
))->dd_head_dataset_obj
, dn
->endian
);
1452 err
= dnode_get(&(data
->mos
), headobj
, DMU_OT_DSL_DATASET
, mdn
, data
);
1462 snapobj
= zfs_to_cpu64(((dsl_dataset_phys_t
*) DN_BONUS(&mdn
->dn
))->ds_snapnames_zapobj
, mdn
->endian
);
1464 err
= dnode_get(&(data
->mos
), snapobj
,
1465 DMU_OT_DSL_DS_SNAP_MAP
, mdn
, data
);
1467 err
= zap_lookup(mdn
, snapname
, &headobj
, data
);
1469 err
= dnode_get(&(data
->mos
), headobj
, DMU_OT_DSL_DATASET
, mdn
, data
);
1480 make_mdn(mdn
, data
);
1485 return ZFS_ERR_NONE
;
1487 err
= dnode_get_path(mdn
, filename
, dn
, data
);
1494 * For a given XDR packed nvlist, verify the first 4 bytes and move on.
1496 * An XDR packed nvlist is encoded as (comments from nvs_xdr_create) :
1498 * encoding method/host endian (4 bytes)
1499 * nvl_version (4 bytes)
1500 * nvl_nvflag (4 bytes)
1502 * encoded size of the nvpair (4 bytes)
1503 * decoded size of the nvpair (4 bytes)
1504 * name string size (4 bytes)
1505 * name string data (sizeof(NV_ALIGN4(string))
1506 * data type (4 bytes)
1507 * # of elements in the nvpair (4 bytes)
1509 * 2 zero's for the last nvpair
1510 * (end of the entire list) (8 bytes)
1515 nvlist_find_value(char *nvlist
, char *name
, int valtype
, char **val
,
1516 size_t *size_out
, size_t *nelm_out
)
1518 int name_len
, type
, encode_size
;
1519 char *nvpair
, *nvp_name
;
1521 /* Verify if the 1st and 2nd byte in the nvlist are valid. */
1522 /* NOTE: independently of what endianness header announces all
1523 subsequent values are big-endian. */
1524 if (nvlist
[0] != NV_ENCODE_XDR
|| (nvlist
[1] != NV_LITTLE_ENDIAN
1525 && nvlist
[1] != NV_BIG_ENDIAN
)) {
1526 printf("zfs incorrect nvlist header\n");
1527 return ZFS_ERR_BAD_FS
;
1530 /* skip the header, nvl_version, and nvl_nvflag */
1531 nvlist
= nvlist
+ 4 * 3;
1533 * Loop thru the nvpair list
1534 * The XDR representation of an integer is in big-endian byte order.
1536 while ((encode_size
= be32_to_cpu(*(uint32_t *) nvlist
))) {
1539 nvpair
= nvlist
+ 4 * 2; /* skip the encode/decode size */
1541 name_len
= be32_to_cpu(*(uint32_t *) nvpair
);
1545 nvpair
= nvpair
+ ((name_len
+ 3) & ~3); /* align */
1547 type
= be32_to_cpu(*(uint32_t *) nvpair
);
1550 nelm
= be32_to_cpu(*(uint32_t *) nvpair
);
1552 printf("empty nvpair\n");
1553 return ZFS_ERR_BAD_FS
;
1558 if ((strncmp(nvp_name
, name
, name_len
) == 0) && type
== valtype
) {
1560 *size_out
= encode_size
;
1566 nvlist
+= encode_size
; /* goto the next nvpair */
1572 zfs_nvlist_lookup_uint64(char *nvlist
, char *name
, uint64_t *out
)
1578 found
= nvlist_find_value(nvlist
, name
, DATA_TYPE_UINT64
, &nvpair
, &size
, 0);
1581 if (size
< sizeof(uint64_t)) {
1582 printf("invalid uint64\n");
1583 return ZFS_ERR_BAD_FS
;
1586 *out
= be64_to_cpu(*(uint64_t *) nvpair
);
1591 zfs_nvlist_lookup_string(char *nvlist
, char *name
)
1599 found
= nvlist_find_value(nvlist
, name
, DATA_TYPE_STRING
, &nvpair
, &size
, 0);
1603 printf("invalid string\n");
1606 slen
= be32_to_cpu(*(uint32_t *) nvpair
);
1607 if (slen
> size
- 4)
1609 ret
= malloc(slen
+ 1);
1612 memcpy(ret
, nvpair
+ 4, slen
);
1618 zfs_nvlist_lookup_nvlist(char *nvlist
, char *name
)
1625 found
= nvlist_find_value(nvlist
, name
, DATA_TYPE_NVLIST
, &nvpair
,
1629 ret
= calloc(1, size
+ 3 * sizeof(uint32_t));
1632 memcpy(ret
, nvlist
, sizeof(uint32_t));
1634 memcpy(ret
+ sizeof(uint32_t), nvpair
, size
);
1639 zfs_nvlist_lookup_nvlist_array_get_nelm(char *nvlist
, char *name
)
1645 found
= nvlist_find_value(nvlist
, name
, DATA_TYPE_NVLIST
, &nvpair
,
1653 zfs_nvlist_lookup_nvlist_array(char *nvlist
, char *name
,
1656 char *nvpair
, *nvpairptr
;
1663 found
= nvlist_find_value(nvlist
, name
, DATA_TYPE_NVLIST
, &nvpair
,
1667 if (index
>= nelm
) {
1668 printf("trying to lookup past nvlist array\n");
1674 for (i
= 0; i
< index
; i
++) {
1675 uint32_t encode_size
;
1677 /* skip the header, nvl_version, and nvl_nvflag */
1678 nvpairptr
= nvpairptr
+ 4 * 2;
1680 while (nvpairptr
< nvpair
+ size
1681 && (encode_size
= be32_to_cpu(*(uint32_t *) nvpairptr
)))
1682 nvlist
+= encode_size
; /* goto the next nvpair */
1684 nvlist
= nvlist
+ 4 * 2; /* skip the ending 2 zeros - 8 bytes */
1687 if (nvpairptr
>= nvpair
+ size
1688 || nvpairptr
+ be32_to_cpu(*(uint32_t *) (nvpairptr
+ 4 * 2))
1690 printf("incorrect nvlist array\n");
1694 ret
= calloc(1, be32_to_cpu(*(uint32_t *) (nvpairptr
+ 4 * 2))
1695 + 3 * sizeof(uint32_t));
1698 memcpy(ret
, nvlist
, sizeof(uint32_t));
1700 memcpy(ret
+ sizeof(uint32_t), nvpairptr
, size
);
1705 int_zfs_fetch_nvlist(struct zfs_data
*data
, char **nvlist
)
1709 *nvlist
= malloc(VDEV_PHYS_SIZE
);
1710 /* Read in the vdev name-value pair list (112K). */
1711 err
= zfs_devread(data
->vdev_phys_sector
, 0, VDEV_PHYS_SIZE
, *nvlist
);
1717 return ZFS_ERR_NONE
;
1721 * Check the disk label information and retrieve needed vdev name-value pairs.
1725 check_pool_label(struct zfs_data
*data
)
1727 uint64_t pool_state
;
1728 char *nvlist
; /* for the pool */
1729 char *vdevnvlist
; /* for the vdev */
1735 err
= int_zfs_fetch_nvlist(data
, &nvlist
);
1739 found
= zfs_nvlist_lookup_uint64(nvlist
, ZPOOL_CONFIG_POOL_STATE
,
1743 printf("zfs pool state not found\n");
1744 return ZFS_ERR_BAD_FS
;
1747 if (pool_state
== POOL_STATE_DESTROYED
) {
1749 printf("zpool is marked as destroyed\n");
1750 return ZFS_ERR_BAD_FS
;
1753 data
->label_txg
= 0;
1754 found
= zfs_nvlist_lookup_uint64(nvlist
, ZPOOL_CONFIG_POOL_TXG
,
1758 printf("zfs pool txg not found\n");
1759 return ZFS_ERR_BAD_FS
;
1762 /* not an active device */
1763 if (data
->label_txg
== 0) {
1765 printf("zpool is not active\n");
1766 return ZFS_ERR_BAD_FS
;
1769 found
= zfs_nvlist_lookup_uint64(nvlist
, ZPOOL_CONFIG_VERSION
,
1773 printf("zpool config version not found\n");
1774 return ZFS_ERR_BAD_FS
;
1777 if (version
> SPA_VERSION
) {
1779 printf("SPA version too new %llu > %llu\n",
1780 (unsigned long long) version
,
1781 (unsigned long long) SPA_VERSION
);
1782 return ZFS_ERR_NOT_IMPLEMENTED_YET
;
1785 vdevnvlist
= zfs_nvlist_lookup_nvlist(nvlist
, ZPOOL_CONFIG_VDEV_TREE
);
1788 printf("ZFS config vdev tree not found\n");
1789 return ZFS_ERR_BAD_FS
;
1792 found
= zfs_nvlist_lookup_uint64(vdevnvlist
, ZPOOL_CONFIG_ASHIFT
,
1793 &data
->vdev_ashift
);
1797 printf("ZPOOL config ashift not found\n");
1798 return ZFS_ERR_BAD_FS
;
1801 found
= zfs_nvlist_lookup_uint64(nvlist
, ZPOOL_CONFIG_GUID
, &diskguid
);
1804 printf("ZPOOL config guid not found\n");
1805 return ZFS_ERR_BAD_FS
;
1808 found
= zfs_nvlist_lookup_uint64(nvlist
, ZPOOL_CONFIG_POOL_GUID
, &data
->pool_guid
);
1811 printf("ZPOOL config pool guid not found\n");
1812 return ZFS_ERR_BAD_FS
;
1817 printf("ZFS Pool GUID: %llu (%016llx) Label: GUID: %llu (%016llx), txg: %llu, SPA v%llu, ashift: %llu\n",
1818 (unsigned long long) data
->pool_guid
,
1819 (unsigned long long) data
->pool_guid
,
1820 (unsigned long long) diskguid
,
1821 (unsigned long long) diskguid
,
1822 (unsigned long long) data
->label_txg
,
1823 (unsigned long long) version
,
1824 (unsigned long long) data
->vdev_ashift
);
1826 return ZFS_ERR_NONE
;
1830 * vdev_label_start returns the physical disk offset (in bytes) of
1833 static uint64_t vdev_label_start(uint64_t psize
, int l
)
1835 return (l
* sizeof(vdev_label_t
) + (l
< VDEV_LABELS
/ 2 ?
1837 VDEV_LABELS
* sizeof(vdev_label_t
)));
1841 zfs_unmount(struct zfs_data
*data
)
1843 free(data
->dnode_buf
);
1844 free(data
->dnode_mdn
);
1845 free(data
->file_buf
);
1850 * zfs_mount() locates a valid uberblock of the root pool and read in its MOS
1851 * to the memory address MOS.
1855 zfs_mount(device_t dev
)
1857 struct zfs_data
*data
= 0;
1858 int label
= 0, bestlabel
= -1;
1860 uberblock_t
*ubbest
;
1861 uberblock_t
*ubcur
= NULL
;
1866 data
= malloc(sizeof(*data
));
1869 memset(data
, 0, sizeof(*data
));
1871 ub_array
= malloc(VDEV_UBERBLOCK_RING
);
1877 ubbest
= malloc(sizeof(*ubbest
));
1882 memset(ubbest
, 0, sizeof(*ubbest
));
1885 * some eltorito stacks don't give us a size and
1886 * we end up setting the size to MAXUINT, further
1887 * some of these devices stop working once a single
1888 * read past the end has been issued. Checking
1889 * for a maximum part_length and skipping the backup
1890 * labels at the end of the slice/partition/device
1891 * avoids breaking down on such devices.
1894 dev
->part_length
== 0 ?
1895 VDEV_LABELS
/ 2 : VDEV_LABELS
;
1897 /* Size in bytes of the device (disk or partition) aligned to label size*/
1898 uint64_t device_size
=
1899 dev
->part_length
<< SECTOR_BITS
;
1901 const uint64_t alignedbytes
=
1902 P2ALIGN(device_size
, (uint64_t) sizeof(vdev_label_t
));
1904 for (label
= 0; label
< vdevnum
; label
++) {
1905 uint64_t labelstartbytes
= vdev_label_start(alignedbytes
, label
);
1906 uint64_t labelstart
= labelstartbytes
>> SECTOR_BITS
;
1908 debug("zfs reading label %d at sector %llu (byte %llu)\n",
1909 label
, (unsigned long long) labelstart
,
1910 (unsigned long long) labelstartbytes
);
1912 data
->vdev_phys_sector
= labelstart
+
1913 ((VDEV_SKIP_SIZE
+ VDEV_BOOT_HEADER_SIZE
) >> SECTOR_BITS
);
1915 err
= check_pool_label(data
);
1917 printf("zfs error checking label %d\n", label
);
1921 /* Read in the uberblock ring (128K). */
1922 err
= zfs_devread(data
->vdev_phys_sector
+
1923 (VDEV_PHYS_SIZE
>> SECTOR_BITS
),
1924 0, VDEV_UBERBLOCK_RING
, ub_array
);
1926 printf("zfs error reading uberblock ring for label %d\n", label
);
1930 ubcur
= find_bestub(ub_array
, data
);
1932 printf("zfs No good uberblocks found in label %d\n", label
);
1936 if (vdev_uberblock_compare(ubcur
, ubbest
) > 0) {
1937 /* Looks like the block is good, so use it.*/
1938 memcpy(ubbest
, ubcur
, sizeof(*ubbest
));
1940 debug("zfs Current best uberblock found in label %d\n", label
);
1945 /* We zero'd the structure to begin with. If we never assigned to it,
1946 magic will still be zero. */
1947 if (!ubbest
->ub_magic
) {
1948 printf("couldn't find a valid ZFS label\n");
1954 debug("zfs ubbest %p in label %d\n", ubbest
, bestlabel
);
1956 zfs_endian_t ub_endian
=
1957 zfs_to_cpu64(ubbest
->ub_magic
, LITTLE_ENDIAN
) == UBERBLOCK_MAGIC
1958 ? LITTLE_ENDIAN
: BIG_ENDIAN
;
1960 debug("zfs endian set to %s\n", !ub_endian
? "big" : "little");
1962 err
= zio_read(&ubbest
->ub_rootbp
, ub_endian
, &osp
, &ospsize
, data
);
1965 printf("couldn't zio_read object directory\n");
1971 if (ospsize
< OBJSET_PHYS_SIZE_V14
) {
1972 printf("osp too small\n");
1979 /* Got the MOS. Save it at the memory addr MOS. */
1980 memmove(&(data
->mos
.dn
), &((objset_phys_t
*) osp
)->os_meta_dnode
, DNODE_SIZE
);
1982 (zfs_to_cpu64(ubbest
->ub_rootbp
.blk_prop
, ub_endian
) >> 63) & 1;
1983 memmove(&(data
->current_uberblock
), ubbest
, sizeof(uberblock_t
));
1992 zfs_fetch_nvlist(device_t dev
, char **nvlist
)
1994 struct zfs_data
*zfs
;
1997 zfs
= zfs_mount(dev
);
1999 return ZFS_ERR_BAD_FS
;
2000 err
= int_zfs_fetch_nvlist(zfs
, nvlist
);
2006 * zfs_open() locates a file in the rootpool by following the
2007 * MOS and places the dnode of the file in the memory address DNODE.
2010 zfs_open(struct zfs_file
*file
, const char *fsfilename
)
2012 struct zfs_data
*data
;
2016 data
= zfs_mount(file
->device
);
2018 return ZFS_ERR_BAD_FS
;
2020 err
= dnode_get_fullpath(fsfilename
, &(data
->mdn
), 0,
2021 &(data
->dnode
), &isfs
, data
);
2029 printf("Missing @ or / separator\n");
2030 return ZFS_ERR_FILE_NOT_FOUND
;
2033 /* We found the dnode for this file. Verify if it is a plain file. */
2034 if (data
->dnode
.dn
.dn_type
!= DMU_OT_PLAIN_FILE_CONTENTS
) {
2036 printf("not a file\n");
2037 return ZFS_ERR_BAD_FILE_TYPE
;
2040 /* get the file size and set the file position to 0 */
2043 * For DMU_OT_SA we will need to locate the SIZE attribute
2044 * attribute, which could be either in the bonus buffer
2045 * or the "spill" block.
2047 if (data
->dnode
.dn
.dn_bonustype
== DMU_OT_SA
) {
2051 if (data
->dnode
.dn
.dn_bonuslen
!= 0) {
2052 sahdrp
= (sa_hdr_phys_t
*) DN_BONUS(&data
->dnode
.dn
);
2053 } else if (data
->dnode
.dn
.dn_flags
& DNODE_FLAG_SPILL_BLKPTR
) {
2054 blkptr_t
*bp
= &data
->dnode
.dn
.dn_spill
;
2056 err
= zio_read(bp
, data
->dnode
.endian
, &sahdrp
, NULL
, data
);
2060 printf("filesystem is corrupt :(\n");
2061 return ZFS_ERR_BAD_FS
;
2064 hdrsize
= SA_HDR_SIZE(((sa_hdr_phys_t
*) sahdrp
));
2065 file
->size
= *(uint64_t *) ((char *) sahdrp
+ hdrsize
+ SA_SIZE_OFFSET
);
2067 file
->size
= zfs_to_cpu64(((znode_phys_t
*) DN_BONUS(&data
->dnode
.dn
))->zp_size
, data
->dnode
.endian
);
2073 return ZFS_ERR_NONE
;
2077 zfs_read(zfs_file_t file
, char *buf
, uint64_t len
)
2079 struct zfs_data
*data
= (struct zfs_data
*) file
->data
;
2080 int blksz
, movesize
;
2085 if (data
->file_buf
== NULL
) {
2086 data
->file_buf
= malloc(SPA_MAXBLOCKSIZE
);
2087 if (!data
->file_buf
)
2089 data
->file_start
= data
->file_end
= 0;
2093 * If offset is in memory, move it into the buffer provided and return.
2095 if (file
->offset
>= data
->file_start
2096 && file
->offset
+ len
<= data
->file_end
) {
2097 memmove(buf
, data
->file_buf
+ file
->offset
- data
->file_start
,
2102 blksz
= zfs_to_cpu16(data
->dnode
.dn
.dn_datablkszsec
,
2103 data
->dnode
.endian
) << SPA_MINBLOCKSHIFT
;
2106 * Entire Dnode is too big to fit into the space available. We
2107 * will need to read it in chunks. This could be optimized to
2108 * read in as large a chunk as there is space available, but for
2109 * now, this only reads in one data block at a time.
2116 * Find requested blkid and the offset within that block.
2118 uint64_t blkid
= (file
->offset
+ red
) / blksz
;
2119 free(data
->file_buf
);
2122 err
= dmu_read(&(data
->dnode
), blkid
, &t
,
2128 data
->file_start
= blkid
* blksz
;
2129 data
->file_end
= data
->file_start
+ blksz
;
2131 movesize
= MIN(length
, data
->file_end
- (int) file
->offset
- red
);
2133 memmove(buf
, data
->file_buf
+ file
->offset
+ red
2134 - data
->file_start
, movesize
);
2144 zfs_close(zfs_file_t file
)
2146 zfs_unmount((struct zfs_data
*) file
->data
);
2147 return ZFS_ERR_NONE
;
2151 zfs_getmdnobj(device_t dev
, const char *fsfilename
,
2154 struct zfs_data
*data
;
2158 data
= zfs_mount(dev
);
2160 return ZFS_ERR_BAD_FS
;
2162 err
= dnode_get_fullpath(fsfilename
, &(data
->mdn
), mdnobj
,
2163 &(data
->dnode
), &isfs
, data
);
2169 fill_fs_info(struct zfs_dirhook_info
*info
,
2170 dnode_end_t mdn
, struct zfs_data
*data
)
2177 memset(info
, 0, sizeof(*info
));
2181 if (mdn
.dn
.dn_type
== DMU_OT_DSL_DIR
) {
2182 headobj
= zfs_to_cpu64(((dsl_dir_phys_t
*) DN_BONUS(&mdn
.dn
))->dd_head_dataset_obj
, mdn
.endian
);
2184 err
= dnode_get(&(data
->mos
), headobj
, DMU_OT_DSL_DATASET
, &mdn
, data
);
2186 printf("zfs failed here 1\n");
2190 make_mdn(&mdn
, data
);
2191 err
= dnode_get(&mdn
, MASTER_NODE_OBJ
, DMU_OT_MASTER_NODE
,
2194 printf("zfs failed here 2\n");
2198 err
= zap_lookup(&dn
, ZFS_ROOT_OBJ
, &objnum
, data
);
2200 printf("zfs failed here 3\n");
2204 err
= dnode_get(&mdn
, objnum
, 0, &dn
, data
);
2206 printf("zfs failed here 4\n");
2211 info
->mtime
= zfs_to_cpu64(((znode_phys_t
*) DN_BONUS(&dn
.dn
))->zp_mtime
[0], dn
.endian
);
2216 static int iterate_zap(const char *name
, uint64_t val
, struct zfs_data
*data
)
2218 struct zfs_dirhook_info info
;
2221 memset(&info
, 0, sizeof(info
));
2223 dnode_get(&(data
->mdn
), val
, 0, &dn
, data
);
2225 info
.mtime
= zfs_to_cpu64(((znode_phys_t
*) DN_BONUS(&dn
.dn
))->zp_mtime
[0], dn
.endian
);
2226 info
.dir
= (dn
.dn
.dn_type
== DMU_OT_DIRECTORY_CONTENTS
);
2227 debug("zfs type=%d, name=%s\n",
2228 (int)dn
.dn
.dn_type
, (char *)name
);
2229 if (!data
->userhook
)
2231 return data
->userhook(name
, &info
);
2234 static int iterate_zap_fs(const char *name
, uint64_t val
, struct zfs_data
*data
)
2236 struct zfs_dirhook_info info
;
2239 err
= dnode_get(&(data
->mos
), val
, 0, &mdn
, data
);
2242 if (mdn
.dn
.dn_type
!= DMU_OT_DSL_DIR
)
2245 fill_fs_info(&info
, mdn
, data
);
2247 if (!data
->userhook
)
2249 return data
->userhook(name
, &info
);
2252 static int iterate_zap_snap(const char *name
, uint64_t val
, struct zfs_data
*data
)
2254 struct zfs_dirhook_info info
;
2260 err
= dnode_get(&(data
->mos
), val
, 0, &mdn
, data
);
2264 if (mdn
.dn
.dn_type
!= DMU_OT_DSL_DATASET
)
2267 fill_fs_info(&info
, mdn
, data
);
2269 name2
= malloc(strlen(name
) + 2);
2271 memcpy(name2
+ 1, name
, strlen(name
) + 1);
2273 ret
= data
->userhook(name2
, &info
);
2279 zfs_ls(device_t device
, const char *path
,
2280 int (*hook
)(const char *, const struct zfs_dirhook_info
*))
2282 struct zfs_data
*data
;
2286 data
= zfs_mount(device
);
2288 return ZFS_ERR_BAD_FS
;
2290 data
->userhook
= hook
;
2292 err
= dnode_get_fullpath(path
, &(data
->mdn
), 0, &(data
->dnode
), &isfs
, data
);
2298 uint64_t childobj
, headobj
;
2301 struct zfs_dirhook_info info
;
2303 fill_fs_info(&info
, data
->dnode
, data
);
2306 childobj
= zfs_to_cpu64(((dsl_dir_phys_t
*) DN_BONUS(&data
->dnode
.dn
))->dd_child_dir_zapobj
, data
->dnode
.endian
);
2307 headobj
= zfs_to_cpu64(((dsl_dir_phys_t
*) DN_BONUS(&data
->dnode
.dn
))->dd_head_dataset_obj
, data
->dnode
.endian
);
2308 err
= dnode_get(&(data
->mos
), childobj
,
2309 DMU_OT_DSL_DIR_CHILD_MAP
, &dn
, data
);
2316 zap_iterate(&dn
, iterate_zap_fs
, data
);
2318 err
= dnode_get(&(data
->mos
), headobj
, DMU_OT_DSL_DATASET
, &dn
, data
);
2324 snapobj
= zfs_to_cpu64(((dsl_dataset_phys_t
*) DN_BONUS(&dn
.dn
))->ds_snapnames_zapobj
, dn
.endian
);
2326 err
= dnode_get(&(data
->mos
), snapobj
,
2327 DMU_OT_DSL_DS_SNAP_MAP
, &dn
, data
);
2333 zap_iterate(&dn
, iterate_zap_snap
, data
);
2335 if (data
->dnode
.dn
.dn_type
!= DMU_OT_DIRECTORY_CONTENTS
) {
2337 printf("not a directory\n");
2338 return ZFS_ERR_BAD_FILE_TYPE
;
2340 zap_iterate(&(data
->dnode
), iterate_zap
, data
);
2343 return ZFS_ERR_NONE
;