3 * ZFS filesystem ported to u-boot by
4 * Jorgen Lundman <lundman at lundman.net>
6 * GRUB -- GRand Unified Bootloader
7 * Copyright (C) 1999,2000,2001,2002,2003,2004
8 * Free Software Foundation, Inc.
9 * Copyright 2004 Sun Microsystems, Inc.
11 * GRUB is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * GRUB is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with GRUB. If not, see <http://www.gnu.org/licenses/>.
28 #include <linux/stat.h>
29 #include <linux/time.h>
30 #include <linux/ctype.h>
31 #include <asm/byteorder.h>
32 #include "zfs_common.h"
35 block_dev_desc_t
*zfs_dev_desc
;
38 * The zfs plug-in routines for GRUB are:
40 * zfs_mount() - locates a valid uberblock of the root pool and reads
41 * in its MOS at the memory address MOS.
43 * zfs_open() - locates a plain file object by following the MOS
44 * and places its dnode at the memory address DNODE.
46 * zfs_read() - read in the data blocks pointed by the DNODE.
52 #include <zfs/dnode.h>
53 #include <zfs/uberblock_impl.h>
54 #include <zfs/vdev_impl.h>
55 #include <zfs/zio_checksum.h>
56 #include <zfs/zap_impl.h>
57 #include <zfs/zap_leaf.h>
58 #include <zfs/zfs_znode.h>
60 #include <zfs/dmu_objset.h>
61 #include <zfs/sa_impl.h>
62 #include <zfs/dsl_dir.h>
63 #include <zfs/dsl_dataset.h>
66 #define ZPOOL_PROP_BOOTFS "bootfs"
70 * For nvlist manipulation. (from nvpair.h)
72 #define NV_ENCODE_NATIVE 0
73 #define NV_ENCODE_XDR 1
74 #define NV_BIG_ENDIAN 0
75 #define NV_LITTLE_ENDIAN 1
76 #define DATA_TYPE_UINT64 8
77 #define DATA_TYPE_STRING 9
78 #define DATA_TYPE_NVLIST 19
79 #define DATA_TYPE_NVLIST_ARRAY 20
83 * Macros to get fields in a bp or DVA.
85 #define P2PHASE(x, align) ((x) & ((align) - 1))
86 #define DVA_OFFSET_TO_PHYS_SECTOR(offset) \
87 ((offset + VDEV_LABEL_START_SIZE) >> SPA_MINBLOCKSHIFT)
90 * return x rounded down to an align boundary
91 * eg, P2ALIGN(1200, 1024) == 1024 (1*align)
92 * eg, P2ALIGN(1024, 1024) == 1024 (1*align)
93 * eg, P2ALIGN(0x1234, 0x100) == 0x1200 (0x12*align)
94 * eg, P2ALIGN(0x5600, 0x100) == 0x5600 (0x56*align)
96 #define P2ALIGN(x, align) ((x) & -(align))
99 * FAT ZAP data structures
101 #define ZFS_CRC64_POLY 0xC96C5795D7870F42ULL /* ECMA-182, reflected form */
102 #define ZAP_HASH_IDX(hash, n) (((n) == 0) ? 0 : ((hash) >> (64 - (n))))
103 #define CHAIN_END 0xffff /* end of the chunk chain */
106 * The amount of space within the chunk available for the array is:
107 * chunk size - space for type (1) - space for next pointer (2)
109 #define ZAP_LEAF_ARRAY_BYTES (ZAP_LEAF_CHUNKSIZE - 3)
111 #define ZAP_LEAF_HASH_SHIFT(bs) (bs - 5)
112 #define ZAP_LEAF_HASH_NUMENTRIES(bs) (1 << ZAP_LEAF_HASH_SHIFT(bs))
113 #define LEAF_HASH(bs, h) \
114 ((ZAP_LEAF_HASH_NUMENTRIES(bs)-1) & \
115 ((h) >> (64 - ZAP_LEAF_HASH_SHIFT(bs)-l->l_hdr.lh_prefix_len)))
118 * The amount of space available for chunks is:
119 * block size shift - hash entry size (2) * number of hash
120 * entries - header space (2*chunksize)
122 #define ZAP_LEAF_NUMCHUNKS(bs) \
123 (((1<<bs) - 2*ZAP_LEAF_HASH_NUMENTRIES(bs)) / \
124 ZAP_LEAF_CHUNKSIZE - 2)
127 * The chunks start immediately after the hash table. The end of the
128 * hash table is at l_hash + HASH_NUMENTRIES, which we simply cast to a
131 #define ZAP_LEAF_CHUNK(l, bs, idx) \
132 ((zap_leaf_chunk_t *)(l->l_hash + ZAP_LEAF_HASH_NUMENTRIES(bs)))[idx]
133 #define ZAP_LEAF_ENTRY(l, bs, idx) (&ZAP_LEAF_CHUNK(l, bs, idx).l_entry)
137 * Decompression Entry - lzjb
145 typedef int zfs_decomp_func_t(void *s_start
, void *d_start
,
146 uint32_t s_len
, uint32_t d_len
);
147 typedef struct decomp_entry
{
149 zfs_decomp_func_t
*decomp_func
;
152 typedef struct dnode_end
{
158 /* cache for a file block of the currently zfs_open()-ed file */
163 /* XXX: ashift is per vdev, not per pool. We currently only ever touch
164 * a single vdev, but when/if raid-z or stripes are supported, this
167 uint64_t vdev_ashift
;
171 /* cache for a dnode block */
172 dnode_phys_t
*dnode_buf
;
173 dnode_phys_t
*dnode_mdn
;
174 uint64_t dnode_start
;
176 zfs_endian_t dnode_endian
;
178 uberblock_t current_uberblock
;
184 uint64_t vdev_phys_sector
;
186 int (*userhook
)(const char *, const struct zfs_dirhook_info
*);
187 struct zfs_dirhook_info
*dirinfo
;
195 zlib_decompress(void *s
, void *d
,
196 uint32_t slen
, uint32_t dlen
)
198 if (zlib_decompress(s
, d
, slen
, dlen
) < 0)
199 return ZFS_ERR_BAD_FS
;
203 static decomp_entry_t decomp_table
[ZIO_COMPRESS_FUNCTIONS
] = {
204 {"inherit", NULL
}, /* ZIO_COMPRESS_INHERIT */
205 {"on", lzjb_decompress
}, /* ZIO_COMPRESS_ON */
206 {"off", NULL
}, /* ZIO_COMPRESS_OFF */
207 {"lzjb", lzjb_decompress
}, /* ZIO_COMPRESS_LZJB */
208 {"empty", NULL
}, /* ZIO_COMPRESS_EMPTY */
209 {"gzip-1", zlib_decompress
}, /* ZIO_COMPRESS_GZIP1 */
210 {"gzip-2", zlib_decompress
}, /* ZIO_COMPRESS_GZIP2 */
211 {"gzip-3", zlib_decompress
}, /* ZIO_COMPRESS_GZIP3 */
212 {"gzip-4", zlib_decompress
}, /* ZIO_COMPRESS_GZIP4 */
213 {"gzip-5", zlib_decompress
}, /* ZIO_COMPRESS_GZIP5 */
214 {"gzip-6", zlib_decompress
}, /* ZIO_COMPRESS_GZIP6 */
215 {"gzip-7", zlib_decompress
}, /* ZIO_COMPRESS_GZIP7 */
216 {"gzip-8", zlib_decompress
}, /* ZIO_COMPRESS_GZIP8 */
217 {"gzip-9", zlib_decompress
}, /* ZIO_COMPRESS_GZIP9 */
222 static int zio_read_data(blkptr_t
*bp
, zfs_endian_t endian
,
223 void *buf
, struct zfs_data
*data
);
226 zio_read(blkptr_t
*bp
, zfs_endian_t endian
, void **buf
,
227 size_t *size
, struct zfs_data
*data
);
230 * Our own version of log2(). Same thing as highbit()-1.
233 zfs_log2(uint64_t num
)
246 /* Checksum Functions */
248 zio_checksum_off(const void *buf
__attribute__ ((unused
)),
249 uint64_t size
__attribute__ ((unused
)),
250 zfs_endian_t endian
__attribute__ ((unused
)),
253 ZIO_SET_CHECKSUM(zcp
, 0, 0, 0, 0);
256 /* Checksum Table and Values */
257 static zio_checksum_info_t zio_checksum_table
[ZIO_CHECKSUM_FUNCTIONS
] = {
258 {NULL
, 0, 0, "inherit"},
260 {zio_checksum_off
, 0, 0, "off"},
261 {zio_checksum_SHA256
, 1, 1, "label"},
262 {zio_checksum_SHA256
, 1, 1, "gang_header"},
263 {NULL
, 0, 0, "zilog"},
264 {fletcher_2_endian
, 0, 0, "fletcher2"},
265 {fletcher_4_endian
, 1, 0, "fletcher4"},
266 {zio_checksum_SHA256
, 1, 0, "SHA256"},
267 {NULL
, 0, 0, "zilog2"},
271 * zio_checksum_verify: Provides support for checksum verification.
273 * Fletcher2, Fletcher4, and SHA256 are supported.
277 zio_checksum_verify(zio_cksum_t zc
, uint32_t checksum
,
278 zfs_endian_t endian
, char *buf
, int size
)
280 zio_eck_t
*zec
= (zio_eck_t
*) (buf
+ size
) - 1;
281 zio_checksum_info_t
*ci
= &zio_checksum_table
[checksum
];
282 zio_cksum_t actual_cksum
, expected_cksum
;
284 if (checksum
>= ZIO_CHECKSUM_FUNCTIONS
|| ci
->ci_func
== NULL
) {
285 printf("zfs unknown checksum function %d\n", checksum
);
286 return ZFS_ERR_NOT_IMPLEMENTED_YET
;
290 expected_cksum
= zec
->zec_cksum
;
292 ci
->ci_func(buf
, size
, endian
, &actual_cksum
);
293 zec
->zec_cksum
= expected_cksum
;
296 ci
->ci_func(buf
, size
, endian
, &actual_cksum
);
299 if ((actual_cksum
.zc_word
[0] != zc
.zc_word
[0])
300 || (actual_cksum
.zc_word
[1] != zc
.zc_word
[1])
301 || (actual_cksum
.zc_word
[2] != zc
.zc_word
[2])
302 || (actual_cksum
.zc_word
[3] != zc
.zc_word
[3])) {
303 return ZFS_ERR_BAD_FS
;
310 * vdev_uberblock_compare takes two uberblock structures and returns an integer
311 * indicating the more recent of the two.
312 * Return Value = 1 if ub2 is more recent
313 * Return Value = -1 if ub1 is more recent
314 * The most recent uberblock is determined using its transaction number and
315 * timestamp. The uberblock with the highest transaction number is
316 * considered "newer". If the transaction numbers of the two blocks match, the
317 * timestamps are compared to determine the "newer" of the two.
320 vdev_uberblock_compare(uberblock_t
*ub1
, uberblock_t
*ub2
)
322 zfs_endian_t ub1_endian
, ub2_endian
;
323 if (zfs_to_cpu64(ub1
->ub_magic
, LITTLE_ENDIAN
) == UBERBLOCK_MAGIC
)
324 ub1_endian
= LITTLE_ENDIAN
;
326 ub1_endian
= BIG_ENDIAN
;
327 if (zfs_to_cpu64(ub2
->ub_magic
, LITTLE_ENDIAN
) == UBERBLOCK_MAGIC
)
328 ub2_endian
= LITTLE_ENDIAN
;
330 ub2_endian
= BIG_ENDIAN
;
332 if (zfs_to_cpu64(ub1
->ub_txg
, ub1_endian
)
333 < zfs_to_cpu64(ub2
->ub_txg
, ub2_endian
))
335 if (zfs_to_cpu64(ub1
->ub_txg
, ub1_endian
)
336 > zfs_to_cpu64(ub2
->ub_txg
, ub2_endian
))
339 if (zfs_to_cpu64(ub1
->ub_timestamp
, ub1_endian
)
340 < zfs_to_cpu64(ub2
->ub_timestamp
, ub2_endian
))
342 if (zfs_to_cpu64(ub1
->ub_timestamp
, ub1_endian
)
343 > zfs_to_cpu64(ub2
->ub_timestamp
, ub2_endian
))
350 * Three pieces of information are needed to verify an uberblock: the magic
351 * number, the version number, and the checksum.
353 * Currently Implemented: version number, magic number, label txg
354 * Need to Implement: checksum
358 uberblock_verify(uberblock_t
*uber
, int offset
, struct zfs_data
*data
)
361 zfs_endian_t endian
= UNKNOWN_ENDIAN
;
364 if (uber
->ub_txg
< data
->label_txg
) {
365 debug("ignoring partially written label: uber_txg < label_txg %llu %llu\n",
366 uber
->ub_txg
, data
->label_txg
);
367 return ZFS_ERR_BAD_FS
;
370 if (zfs_to_cpu64(uber
->ub_magic
, LITTLE_ENDIAN
) == UBERBLOCK_MAGIC
371 && zfs_to_cpu64(uber
->ub_version
, LITTLE_ENDIAN
) > 0
372 && zfs_to_cpu64(uber
->ub_version
, LITTLE_ENDIAN
) <= SPA_VERSION
)
373 endian
= LITTLE_ENDIAN
;
375 if (zfs_to_cpu64(uber
->ub_magic
, BIG_ENDIAN
) == UBERBLOCK_MAGIC
376 && zfs_to_cpu64(uber
->ub_version
, BIG_ENDIAN
) > 0
377 && zfs_to_cpu64(uber
->ub_version
, BIG_ENDIAN
) <= SPA_VERSION
)
380 if (endian
== UNKNOWN_ENDIAN
) {
381 printf("invalid uberblock magic\n");
382 return ZFS_ERR_BAD_FS
;
385 memset(&zc
, 0, sizeof(zc
));
386 zc
.zc_word
[0] = cpu_to_zfs64(offset
, endian
);
387 err
= zio_checksum_verify(zc
, ZIO_CHECKSUM_LABEL
, endian
,
388 (char *) uber
, UBERBLOCK_SIZE(data
->vdev_ashift
));
391 /* Check that the data pointed by the rootbp is usable. */
394 err
= zio_read(&uber
->ub_rootbp
, endian
, &osp
, &ospsize
, data
);
397 if (!err
&& ospsize
< OBJSET_PHYS_SIZE_V14
) {
398 printf("uberblock rootbp points to invalid data\n");
399 return ZFS_ERR_BAD_FS
;
407 * Find the best uberblock.
409 * Success - Pointer to the best uberblock.
412 static uberblock_t
*find_bestub(char *ub_array
, struct zfs_data
*data
)
414 const uint64_t sector
= data
->vdev_phys_sector
;
415 uberblock_t
*ubbest
= NULL
;
417 unsigned int i
, offset
, pickedub
= 0;
418 int err
= ZFS_ERR_NONE
;
420 const unsigned int UBCOUNT
= UBERBLOCK_COUNT(data
->vdev_ashift
);
421 const uint64_t UBBYTES
= UBERBLOCK_SIZE(data
->vdev_ashift
);
423 for (i
= 0; i
< UBCOUNT
; i
++) {
424 ubnext
= (uberblock_t
*) (i
* UBBYTES
+ ub_array
);
425 offset
= (sector
<< SPA_MINBLOCKSHIFT
) + VDEV_PHYS_SIZE
+ (i
* UBBYTES
);
427 err
= uberblock_verify(ubnext
, offset
, data
);
431 if (ubbest
== NULL
|| vdev_uberblock_compare(ubnext
, ubbest
) > 0) {
438 debug("zfs Found best uberblock at idx %d, txg %llu\n",
439 pickedub
, (unsigned long long) ubbest
->ub_txg
);
445 get_psize(blkptr_t
*bp
, zfs_endian_t endian
)
447 return (((zfs_to_cpu64((bp
)->blk_prop
, endian
) >> 16) & 0xffff) + 1)
448 << SPA_MINBLOCKSHIFT
;
452 dva_get_offset(dva_t
*dva
, zfs_endian_t endian
)
454 return zfs_to_cpu64((dva
)->dva_word
[1],
455 endian
) << SPA_MINBLOCKSHIFT
;
459 * Read a block of data based on the gang block address dva,
460 * and put its data in buf.
464 zio_read_gang(blkptr_t
*bp
, zfs_endian_t endian
, dva_t
*dva
, void *buf
,
465 struct zfs_data
*data
)
467 zio_gbh_phys_t
*zio_gb
;
468 uint64_t offset
, sector
;
473 memset(&zc
, 0, sizeof(zc
));
475 zio_gb
= malloc(SPA_GANGBLOCKSIZE
);
477 return ZFS_ERR_OUT_OF_MEMORY
;
479 offset
= dva_get_offset(dva
, endian
);
480 sector
= DVA_OFFSET_TO_PHYS_SECTOR(offset
);
482 /* read in the gang block header */
483 err
= zfs_devread(sector
, 0, SPA_GANGBLOCKSIZE
, (char *) zio_gb
);
491 /* self checksuming the gang block header */
492 ZIO_SET_CHECKSUM(&zc
, DVA_GET_VDEV(dva
),
493 dva_get_offset(dva
, endian
), bp
->blk_birth
, 0);
494 err
= zio_checksum_verify(zc
, ZIO_CHECKSUM_GANG_HEADER
, endian
,
495 (char *) zio_gb
, SPA_GANGBLOCKSIZE
);
501 endian
= (zfs_to_cpu64(bp
->blk_prop
, endian
) >> 63) & 1;
503 for (i
= 0; i
< SPA_GBH_NBLKPTRS
; i
++) {
504 if (zio_gb
->zg_blkptr
[i
].blk_birth
== 0)
507 err
= zio_read_data(&zio_gb
->zg_blkptr
[i
], endian
, buf
, data
);
512 buf
= (char *) buf
+ get_psize(&zio_gb
->zg_blkptr
[i
], endian
);
519 * Read in a block of raw data to buf.
522 zio_read_data(blkptr_t
*bp
, zfs_endian_t endian
, void *buf
,
523 struct zfs_data
*data
)
526 int err
= ZFS_ERR_NONE
;
528 psize
= get_psize(bp
, endian
);
530 /* pick a good dva from the block pointer */
531 for (i
= 0; i
< SPA_DVAS_PER_BP
; i
++) {
532 uint64_t offset
, sector
;
534 if (bp
->blk_dva
[i
].dva_word
[0] == 0 && bp
->blk_dva
[i
].dva_word
[1] == 0)
537 if ((zfs_to_cpu64(bp
->blk_dva
[i
].dva_word
[1], endian
)>>63) & 1) {
538 err
= zio_read_gang(bp
, endian
, &bp
->blk_dva
[i
], buf
, data
);
540 /* read in a data block */
541 offset
= dva_get_offset(&bp
->blk_dva
[i
], endian
);
542 sector
= DVA_OFFSET_TO_PHYS_SECTOR(offset
);
544 err
= zfs_devread(sector
, 0, psize
, buf
);
548 /*Check the underlying checksum before we rule this DVA as "good"*/
549 uint32_t checkalgo
= (zfs_to_cpu64((bp
)->blk_prop
, endian
) >> 40) & 0xff;
551 err
= zio_checksum_verify(bp
->blk_cksum
, checkalgo
, endian
, buf
, psize
);
556 /* If read failed or checksum bad, reset the error. Hopefully we've got some more DVA's to try.*/
560 printf("couldn't find a valid DVA\n");
561 err
= ZFS_ERR_BAD_FS
;
568 * Read in a block of data, verify its checksum, decompress if needed,
569 * and put the uncompressed data in buf.
572 zio_read(blkptr_t
*bp
, zfs_endian_t endian
, void **buf
,
573 size_t *size
, struct zfs_data
*data
)
577 char *compbuf
= NULL
;
582 comp
= (zfs_to_cpu64((bp
)->blk_prop
, endian
)>>32) & 0xff;
583 lsize
= (BP_IS_HOLE(bp
) ? 0 :
584 (((zfs_to_cpu64((bp
)->blk_prop
, endian
) & 0xffff) + 1)
585 << SPA_MINBLOCKSHIFT
));
586 psize
= get_psize(bp
, endian
);
591 if (comp
>= ZIO_COMPRESS_FUNCTIONS
) {
592 printf("compression algorithm %u not supported\n", (unsigned int) comp
);
593 return ZFS_ERR_NOT_IMPLEMENTED_YET
;
596 if (comp
!= ZIO_COMPRESS_OFF
&& decomp_table
[comp
].decomp_func
== NULL
) {
597 printf("compression algorithm %s not supported\n", decomp_table
[comp
].name
);
598 return ZFS_ERR_NOT_IMPLEMENTED_YET
;
601 if (comp
!= ZIO_COMPRESS_OFF
) {
602 compbuf
= malloc(psize
);
604 return ZFS_ERR_OUT_OF_MEMORY
;
606 compbuf
= *buf
= malloc(lsize
);
609 err
= zio_read_data(bp
, endian
, compbuf
, data
);
616 if (comp
!= ZIO_COMPRESS_OFF
) {
617 *buf
= malloc(lsize
);
620 return ZFS_ERR_OUT_OF_MEMORY
;
623 err
= decomp_table
[comp
].decomp_func(compbuf
, *buf
, psize
, lsize
);
636 * Get the block from a block id.
637 * push the block onto the stack.
641 dmu_read(dnode_end_t
*dn
, uint64_t blkid
, void **buf
,
642 zfs_endian_t
*endian_out
, struct zfs_data
*data
)
645 blkptr_t
*bp_array
= dn
->dn
.dn_blkptr
;
646 int epbs
= dn
->dn
.dn_indblkshift
- SPA_BLKPTRSHIFT
;
650 int err
= ZFS_ERR_NONE
;
652 bp
= malloc(sizeof(blkptr_t
));
654 return ZFS_ERR_OUT_OF_MEMORY
;
657 for (level
= dn
->dn
.dn_nlevels
- 1; level
>= 0; level
--) {
658 idx
= (blkid
>> (epbs
* level
)) & ((1 << epbs
) - 1);
660 if (bp_array
!= dn
->dn
.dn_blkptr
) {
665 if (BP_IS_HOLE(bp
)) {
666 size_t size
= zfs_to_cpu16(dn
->dn
.dn_datablkszsec
,
668 << SPA_MINBLOCKSHIFT
;
671 err
= ZFS_ERR_OUT_OF_MEMORY
;
674 memset(*buf
, 0, size
);
675 endian
= (zfs_to_cpu64(bp
->blk_prop
, endian
) >> 63) & 1;
679 err
= zio_read(bp
, endian
, buf
, 0, data
);
680 endian
= (zfs_to_cpu64(bp
->blk_prop
, endian
) >> 63) & 1;
683 err
= zio_read(bp
, endian
, &tmpbuf
, 0, data
);
684 endian
= (zfs_to_cpu64(bp
->blk_prop
, endian
) >> 63) & 1;
689 if (bp_array
!= dn
->dn
.dn_blkptr
)
692 *endian_out
= endian
;
699 * mzap_lookup: Looks up property described by "name" and returns the value
703 mzap_lookup(mzap_phys_t
*zapobj
, zfs_endian_t endian
,
704 int objsize
, char *name
, uint64_t * value
)
707 mzap_ent_phys_t
*mzap_ent
= zapobj
->mz_chunk
;
709 chunks
= objsize
/ MZAP_ENT_LEN
- 1;
710 for (i
= 0; i
< chunks
; i
++) {
711 if (strcmp(mzap_ent
[i
].mze_name
, name
) == 0) {
712 *value
= zfs_to_cpu64(mzap_ent
[i
].mze_value
, endian
);
717 printf("couldn't find '%s'\n", name
);
718 return ZFS_ERR_FILE_NOT_FOUND
;
722 mzap_iterate(mzap_phys_t
*zapobj
, zfs_endian_t endian
, int objsize
,
723 int (*hook
)(const char *name
,
725 struct zfs_data
*data
),
726 struct zfs_data
*data
)
729 mzap_ent_phys_t
*mzap_ent
= zapobj
->mz_chunk
;
731 chunks
= objsize
/ MZAP_ENT_LEN
- 1;
732 for (i
= 0; i
< chunks
; i
++) {
733 if (hook(mzap_ent
[i
].mze_name
,
734 zfs_to_cpu64(mzap_ent
[i
].mze_value
, endian
),
743 zap_hash(uint64_t salt
, const char *name
)
745 static uint64_t table
[256];
750 if (table
[128] == 0) {
753 for (i
= 0; i
< 256; i
++) {
754 for (ct
= table
+ i
, *ct
= i
, j
= 8; j
> 0; j
--)
755 *ct
= (*ct
>> 1) ^ (-(*ct
& 1) & ZFS_CRC64_POLY
);
759 for (cp
= (const uint8_t *) name
; (c
= *cp
) != '\0'; cp
++)
760 crc
= (crc
>> 8) ^ table
[(crc
^ c
) & 0xFF];
763 * Only use 28 bits, since we need 4 bits in the cookie for the
764 * collision differentiator. We MUST use the high bits, since
765 * those are the onces that we first pay attention to when
766 * chosing the bucket.
768 crc
&= ~((1ULL << (64 - ZAP_HASHBITS
)) - 1);
774 * Only to be used on 8-bit arrays.
775 * array_len is actual len in bytes (not encoded le_value_length).
776 * buf is null-terminated.
780 zap_leaf_array_equal(zap_leaf_phys_t
*l
, zfs_endian_t endian
,
781 int blksft
, int chunk
, int array_len
, const char *buf
)
785 while (bseen
< array_len
) {
786 struct zap_leaf_array
*la
= &ZAP_LEAF_CHUNK(l
, blksft
, chunk
).l_array
;
787 int toread
= MIN(array_len
- bseen
, ZAP_LEAF_ARRAY_BYTES
);
789 if (chunk
>= ZAP_LEAF_NUMCHUNKS(blksft
))
792 if (memcmp(la
->la_array
, buf
+ bseen
, toread
) != 0)
794 chunk
= zfs_to_cpu16(la
->la_next
, endian
);
797 return (bseen
== array_len
);
802 zap_leaf_array_get(zap_leaf_phys_t
*l
, zfs_endian_t endian
, int blksft
,
803 int chunk
, int array_len
, char *buf
)
807 while (bseen
< array_len
) {
808 struct zap_leaf_array
*la
= &ZAP_LEAF_CHUNK(l
, blksft
, chunk
).l_array
;
809 int toread
= MIN(array_len
- bseen
, ZAP_LEAF_ARRAY_BYTES
);
811 if (chunk
>= ZAP_LEAF_NUMCHUNKS(blksft
))
812 /* Don't use errno because this error is to be ignored. */
813 return ZFS_ERR_BAD_FS
;
815 memcpy(buf
+ bseen
, la
->la_array
, toread
);
816 chunk
= zfs_to_cpu16(la
->la_next
, endian
);
824 * Given a zap_leaf_phys_t, walk thru the zap leaf chunks to get the
825 * value for the property "name".
830 zap_leaf_lookup(zap_leaf_phys_t
*l
, zfs_endian_t endian
,
831 int blksft
, uint64_t h
,
832 const char *name
, uint64_t *value
)
835 struct zap_leaf_entry
*le
;
837 /* Verify if this is a valid leaf block */
838 if (zfs_to_cpu64(l
->l_hdr
.lh_block_type
, endian
) != ZBT_LEAF
) {
839 printf("invalid leaf type\n");
840 return ZFS_ERR_BAD_FS
;
842 if (zfs_to_cpu32(l
->l_hdr
.lh_magic
, endian
) != ZAP_LEAF_MAGIC
) {
843 printf("invalid leaf magic\n");
844 return ZFS_ERR_BAD_FS
;
847 for (chunk
= zfs_to_cpu16(l
->l_hash
[LEAF_HASH(blksft
, h
)], endian
);
848 chunk
!= CHAIN_END
; chunk
= le
->le_next
) {
850 if (chunk
>= ZAP_LEAF_NUMCHUNKS(blksft
)) {
851 printf("invalid chunk number\n");
852 return ZFS_ERR_BAD_FS
;
855 le
= ZAP_LEAF_ENTRY(l
, blksft
, chunk
);
857 /* Verify the chunk entry */
858 if (le
->le_type
!= ZAP_CHUNK_ENTRY
) {
859 printf("invalid chunk entry\n");
860 return ZFS_ERR_BAD_FS
;
863 if (zfs_to_cpu64(le
->le_hash
, endian
) != h
)
866 if (zap_leaf_array_equal(l
, endian
, blksft
,
867 zfs_to_cpu16(le
->le_name_chunk
, endian
),
868 zfs_to_cpu16(le
->le_name_length
, endian
),
870 struct zap_leaf_array
*la
;
872 if (le
->le_int_size
!= 8 || le
->le_value_length
!= 1) {
873 printf("invalid leaf chunk entry\n");
874 return ZFS_ERR_BAD_FS
;
876 /* get the uint64_t property value */
877 la
= &ZAP_LEAF_CHUNK(l
, blksft
, le
->le_value_chunk
).l_array
;
879 *value
= be64_to_cpu(la
->la_array64
);
885 printf("couldn't find '%s'\n", name
);
886 return ZFS_ERR_FILE_NOT_FOUND
;
890 /* Verify if this is a fat zap header block */
892 zap_verify(zap_phys_t
*zap
)
894 if (zap
->zap_magic
!= (uint64_t) ZAP_MAGIC
) {
895 printf("bad ZAP magic\n");
896 return ZFS_ERR_BAD_FS
;
899 if (zap
->zap_flags
!= 0) {
900 printf("bad ZAP flags\n");
901 return ZFS_ERR_BAD_FS
;
904 if (zap
->zap_salt
== 0) {
905 printf("bad ZAP salt\n");
906 return ZFS_ERR_BAD_FS
;
918 fzap_lookup(dnode_end_t
*zap_dnode
, zap_phys_t
*zap
,
919 char *name
, uint64_t *value
, struct zfs_data
*data
)
922 uint64_t hash
, idx
, blkid
;
923 int blksft
= zfs_log2(zfs_to_cpu16(zap_dnode
->dn
.dn_datablkszsec
,
924 zap_dnode
->endian
) << DNODE_SHIFT
);
926 zfs_endian_t leafendian
;
928 err
= zap_verify(zap
);
932 hash
= zap_hash(zap
->zap_salt
, name
);
934 /* get block id from index */
935 if (zap
->zap_ptrtbl
.zt_numblks
!= 0) {
936 printf("external pointer tables not supported\n");
937 return ZFS_ERR_NOT_IMPLEMENTED_YET
;
939 idx
= ZAP_HASH_IDX(hash
, zap
->zap_ptrtbl
.zt_shift
);
940 blkid
= ((uint64_t *) zap
)[idx
+ (1 << (blksft
- 3 - 1))];
942 /* Get the leaf block */
943 if ((1U << blksft
) < sizeof(zap_leaf_phys_t
)) {
944 printf("ZAP leaf is too small\n");
945 return ZFS_ERR_BAD_FS
;
947 err
= dmu_read(zap_dnode
, blkid
, &l
, &leafendian
, data
);
951 err
= zap_leaf_lookup(l
, leafendian
, blksft
, hash
, name
, value
);
958 fzap_iterate(dnode_end_t
*zap_dnode
, zap_phys_t
*zap
,
959 int (*hook
)(const char *name
,
961 struct zfs_data
*data
),
962 struct zfs_data
*data
)
968 int blksft
= zfs_log2(zfs_to_cpu16(zap_dnode
->dn
.dn_datablkszsec
,
969 zap_dnode
->endian
) << DNODE_SHIFT
);
976 /* get block id from index */
977 if (zap
->zap_ptrtbl
.zt_numblks
!= 0) {
978 printf("external pointer tables not supported\n");
981 /* Get the leaf block */
982 if ((1U << blksft
) < sizeof(zap_leaf_phys_t
)) {
983 printf("ZAP leaf is too small\n");
986 for (idx
= 0; idx
< zap
->zap_ptrtbl
.zt_numblks
; idx
++) {
987 blkid
= ((uint64_t *) zap
)[idx
+ (1 << (blksft
- 3 - 1))];
989 err
= dmu_read(zap_dnode
, blkid
, &l_in
, &endian
, data
);
994 /* Verify if this is a valid leaf block */
995 if (zfs_to_cpu64(l
->l_hdr
.lh_block_type
, endian
) != ZBT_LEAF
) {
999 if (zfs_to_cpu32(l
->l_hdr
.lh_magic
, endian
) != ZAP_LEAF_MAGIC
) {
1004 for (chunk
= 0; chunk
< ZAP_LEAF_NUMCHUNKS(blksft
); chunk
++) {
1006 struct zap_leaf_array
*la
;
1007 struct zap_leaf_entry
*le
;
1009 le
= ZAP_LEAF_ENTRY(l
, blksft
, chunk
);
1011 /* Verify the chunk entry */
1012 if (le
->le_type
!= ZAP_CHUNK_ENTRY
)
1015 buf
= malloc(zfs_to_cpu16(le
->le_name_length
, endian
)
1017 if (zap_leaf_array_get(l
, endian
, blksft
, le
->le_name_chunk
,
1018 le
->le_name_length
, buf
)) {
1022 buf
[le
->le_name_length
] = 0;
1024 if (le
->le_int_size
!= 8
1025 || zfs_to_cpu16(le
->le_value_length
, endian
) != 1)
1028 /* get the uint64_t property value */
1029 la
= &ZAP_LEAF_CHUNK(l
, blksft
, le
->le_value_chunk
).l_array
;
1030 val
= be64_to_cpu(la
->la_array64
);
1031 if (hook(buf
, val
, data
))
1041 * Read in the data of a zap object and find the value for a matching
1046 zap_lookup(dnode_end_t
*zap_dnode
, char *name
, uint64_t *val
,
1047 struct zfs_data
*data
)
1049 uint64_t block_type
;
1053 zfs_endian_t endian
;
1055 /* Read in the first block of the zap object data. */
1056 size
= zfs_to_cpu16(zap_dnode
->dn
.dn_datablkszsec
,
1057 zap_dnode
->endian
) << SPA_MINBLOCKSHIFT
;
1058 err
= dmu_read(zap_dnode
, 0, &zapbuf
, &endian
, data
);
1061 block_type
= zfs_to_cpu64(*((uint64_t *) zapbuf
), endian
);
1063 if (block_type
== ZBT_MICRO
) {
1064 err
= (mzap_lookup(zapbuf
, endian
, size
, name
, val
));
1067 } else if (block_type
== ZBT_HEADER
) {
1068 /* this is a fat zap */
1069 err
= (fzap_lookup(zap_dnode
, zapbuf
, name
, val
, data
));
1074 printf("unknown ZAP type\n");
1075 return ZFS_ERR_BAD_FS
;
1079 zap_iterate(dnode_end_t
*zap_dnode
,
1080 int (*hook
)(const char *name
, uint64_t val
,
1081 struct zfs_data
*data
),
1082 struct zfs_data
*data
)
1084 uint64_t block_type
;
1089 zfs_endian_t endian
;
1091 /* Read in the first block of the zap object data. */
1092 size
= zfs_to_cpu16(zap_dnode
->dn
.dn_datablkszsec
, zap_dnode
->endian
) << SPA_MINBLOCKSHIFT
;
1093 err
= dmu_read(zap_dnode
, 0, &zapbuf
, &endian
, data
);
1096 block_type
= zfs_to_cpu64(*((uint64_t *) zapbuf
), endian
);
1098 if (block_type
== ZBT_MICRO
) {
1099 ret
= mzap_iterate(zapbuf
, endian
, size
, hook
, data
);
1102 } else if (block_type
== ZBT_HEADER
) {
1103 /* this is a fat zap */
1104 ret
= fzap_iterate(zap_dnode
, zapbuf
, hook
, data
);
1108 printf("unknown ZAP type\n");
1114 * Get the dnode of an object number from the metadnode of an object set.
1117 * mdn - metadnode to get the object dnode
1118 * objnum - object number for the object dnode
1119 * buf - data buffer that holds the returning dnode
1122 dnode_get(dnode_end_t
*mdn
, uint64_t objnum
, uint8_t type
,
1123 dnode_end_t
*buf
, struct zfs_data
*data
)
1125 uint64_t blkid
, blksz
; /* the block id this object dnode is in */
1126 int epbs
; /* shift of number of dnodes in a block */
1127 int idx
; /* index within a block */
1130 zfs_endian_t endian
;
1132 blksz
= zfs_to_cpu16(mdn
->dn
.dn_datablkszsec
,
1133 mdn
->endian
) << SPA_MINBLOCKSHIFT
;
1135 epbs
= zfs_log2(blksz
) - DNODE_SHIFT
;
1136 blkid
= objnum
>> epbs
;
1137 idx
= objnum
& ((1 << epbs
) - 1);
1139 if (data
->dnode_buf
!= NULL
&& memcmp(data
->dnode_mdn
, mdn
,
1141 && objnum
>= data
->dnode_start
&& objnum
< data
->dnode_end
) {
1142 memmove(&(buf
->dn
), &(data
->dnode_buf
)[idx
], DNODE_SIZE
);
1143 buf
->endian
= data
->dnode_endian
;
1144 if (type
&& buf
->dn
.dn_type
!= type
) {
1145 printf("incorrect dnode type: %02X != %02x\n", buf
->dn
.dn_type
, type
);
1146 return ZFS_ERR_BAD_FS
;
1148 return ZFS_ERR_NONE
;
1151 err
= dmu_read(mdn
, blkid
, &dnbuf
, &endian
, data
);
1155 free(data
->dnode_buf
);
1156 free(data
->dnode_mdn
);
1157 data
->dnode_mdn
= malloc(sizeof(*mdn
));
1158 if (!data
->dnode_mdn
) {
1159 data
->dnode_buf
= 0;
1161 memcpy(data
->dnode_mdn
, mdn
, sizeof(*mdn
));
1162 data
->dnode_buf
= dnbuf
;
1163 data
->dnode_start
= blkid
<< epbs
;
1164 data
->dnode_end
= (blkid
+ 1) << epbs
;
1165 data
->dnode_endian
= endian
;
1168 memmove(&(buf
->dn
), (dnode_phys_t
*) dnbuf
+ idx
, DNODE_SIZE
);
1169 buf
->endian
= endian
;
1170 if (type
&& buf
->dn
.dn_type
!= type
) {
1171 printf("incorrect dnode type\n");
1172 return ZFS_ERR_BAD_FS
;
1175 return ZFS_ERR_NONE
;
1179 * Get the file dnode for a given file name where mdn is the meta dnode
1180 * for this ZFS object set. When found, place the file dnode in dn.
1181 * The 'path' argument will be mangled.
1185 dnode_get_path(dnode_end_t
*mdn
, const char *path_in
, dnode_end_t
*dn
,
1186 struct zfs_data
*data
)
1188 uint64_t objnum
, version
;
1190 int err
= ZFS_ERR_NONE
;
1191 char *path
, *path_buf
;
1192 struct dnode_chain
{
1193 struct dnode_chain
*next
;
1196 struct dnode_chain
*dnode_path
= 0, *dn_new
, *root
;
1198 dn_new
= malloc(sizeof(*dn_new
));
1200 return ZFS_ERR_OUT_OF_MEMORY
;
1202 dnode_path
= root
= dn_new
;
1204 err
= dnode_get(mdn
, MASTER_NODE_OBJ
, DMU_OT_MASTER_NODE
,
1205 &(dnode_path
->dn
), data
);
1211 err
= zap_lookup(&(dnode_path
->dn
), ZPL_VERSION_STR
, &version
, data
);
1216 if (version
> ZPL_VERSION
) {
1218 printf("too new ZPL version\n");
1219 return ZFS_ERR_NOT_IMPLEMENTED_YET
;
1222 err
= zap_lookup(&(dnode_path
->dn
), ZFS_ROOT_OBJ
, &objnum
, data
);
1228 err
= dnode_get(mdn
, objnum
, 0, &(dnode_path
->dn
), data
);
1234 path
= path_buf
= strdup(path_in
);
1237 return ZFS_ERR_OUT_OF_MEMORY
;
1241 /* skip leading slashes */
1242 while (*path
== '/')
1246 /* get the next component name */
1248 while (*path
&& *path
!= '/')
1251 if (cname
+ 1 == path
&& cname
[0] == '.')
1253 /* Handle double dot. */
1254 if (cname
+ 2 == path
&& cname
[0] == '.' && cname
[1] == '.') {
1256 dn_new
= dnode_path
;
1257 dnode_path
= dn_new
->next
;
1260 printf("can't resolve ..\n");
1261 err
= ZFS_ERR_FILE_NOT_FOUND
;
1268 *path
= 0; /* ensure null termination */
1270 if (dnode_path
->dn
.dn
.dn_type
!= DMU_OT_DIRECTORY_CONTENTS
) {
1272 printf("not a directory\n");
1273 return ZFS_ERR_BAD_FILE_TYPE
;
1275 err
= zap_lookup(&(dnode_path
->dn
), cname
, &objnum
, data
);
1279 dn_new
= malloc(sizeof(*dn_new
));
1281 err
= ZFS_ERR_OUT_OF_MEMORY
;
1284 dn_new
->next
= dnode_path
;
1285 dnode_path
= dn_new
;
1287 objnum
= ZFS_DIRENT_OBJ(objnum
);
1288 err
= dnode_get(mdn
, objnum
, 0, &(dnode_path
->dn
), data
);
1296 memcpy(dn
, &(dnode_path
->dn
), sizeof(*dn
));
1298 while (dnode_path
) {
1299 dn_new
= dnode_path
->next
;
1301 dnode_path
= dn_new
;
1309 * Given a MOS metadnode, get the metadnode of a given filesystem name (fsname),
1310 * e.g. pool/rootfs, or a given object number (obj), e.g. the object number
1313 * If no fsname and no obj are given, return the DSL_DIR metadnode.
1314 * If fsname is given, return its metadnode and its matching object number.
1315 * If only obj is given, return the metadnode for this object number.
1319 get_filesystem_dnode(dnode_end_t
*mosmdn
, char *fsname
,
1320 dnode_end_t
*mdn
, struct zfs_data
*data
)
1325 err
= dnode_get(mosmdn
, DMU_POOL_DIRECTORY_OBJECT
,
1326 DMU_OT_OBJECT_DIRECTORY
, mdn
, data
);
1330 err
= zap_lookup(mdn
, DMU_POOL_ROOT_DATASET
, &objnum
, data
);
1334 err
= dnode_get(mosmdn
, objnum
, DMU_OT_DSL_DIR
, mdn
, data
);
1342 while (*fsname
== '/')
1345 if (!*fsname
|| *fsname
== '@')
1349 while (*fsname
&& !isspace(*fsname
) && *fsname
!= '/')
1354 childobj
= zfs_to_cpu64((((dsl_dir_phys_t
*) DN_BONUS(&mdn
->dn
)))->dd_child_dir_zapobj
, mdn
->endian
);
1355 err
= dnode_get(mosmdn
, childobj
,
1356 DMU_OT_DSL_DIR_CHILD_MAP
, mdn
, data
);
1360 err
= zap_lookup(mdn
, cname
, &objnum
, data
);
1364 err
= dnode_get(mosmdn
, objnum
, DMU_OT_DSL_DIR
, mdn
, data
);
1370 return ZFS_ERR_NONE
;
1374 make_mdn(dnode_end_t
*mdn
, struct zfs_data
*data
)
1381 bp
= &(((dsl_dataset_phys_t
*) DN_BONUS(&mdn
->dn
))->ds_bp
);
1382 err
= zio_read(bp
, mdn
->endian
, &osp
, &ospsize
, data
);
1385 if (ospsize
< OBJSET_PHYS_SIZE_V14
) {
1387 printf("too small osp\n");
1388 return ZFS_ERR_BAD_FS
;
1391 mdn
->endian
= (zfs_to_cpu64(bp
->blk_prop
, mdn
->endian
)>>63) & 1;
1392 memmove((char *) &(mdn
->dn
),
1393 (char *) &((objset_phys_t
*) osp
)->os_meta_dnode
, DNODE_SIZE
);
1395 return ZFS_ERR_NONE
;
1399 dnode_get_fullpath(const char *fullpath
, dnode_end_t
*mdn
,
1400 uint64_t *mdnobj
, dnode_end_t
*dn
, int *isfs
,
1401 struct zfs_data
*data
)
1403 char *fsname
, *snapname
;
1404 const char *ptr_at
, *filename
;
1408 ptr_at
= strchr(fullpath
, '@');
1413 fsname
= strdup(fullpath
);
1415 const char *ptr_slash
= strchr(ptr_at
, '/');
1418 fsname
= malloc(ptr_at
- fullpath
+ 1);
1420 return ZFS_ERR_OUT_OF_MEMORY
;
1421 memcpy(fsname
, fullpath
, ptr_at
- fullpath
);
1422 fsname
[ptr_at
- fullpath
] = 0;
1423 if (ptr_at
[1] && ptr_at
[1] != '/') {
1424 snapname
= malloc(ptr_slash
- ptr_at
);
1427 return ZFS_ERR_OUT_OF_MEMORY
;
1429 memcpy(snapname
, ptr_at
+ 1, ptr_slash
- ptr_at
- 1);
1430 snapname
[ptr_slash
- ptr_at
- 1] = 0;
1435 filename
= ptr_slash
;
1438 printf("zfs fsname = '%s' snapname='%s' filename = '%s'\n",
1439 fsname
, snapname
, filename
);
1443 err
= get_filesystem_dnode(&(data
->mos
), fsname
, dn
, data
);
1451 headobj
= zfs_to_cpu64(((dsl_dir_phys_t
*) DN_BONUS(&dn
->dn
))->dd_head_dataset_obj
, dn
->endian
);
1453 err
= dnode_get(&(data
->mos
), headobj
, DMU_OT_DSL_DATASET
, mdn
, data
);
1463 snapobj
= zfs_to_cpu64(((dsl_dataset_phys_t
*) DN_BONUS(&mdn
->dn
))->ds_snapnames_zapobj
, mdn
->endian
);
1465 err
= dnode_get(&(data
->mos
), snapobj
,
1466 DMU_OT_DSL_DS_SNAP_MAP
, mdn
, data
);
1468 err
= zap_lookup(mdn
, snapname
, &headobj
, data
);
1470 err
= dnode_get(&(data
->mos
), headobj
, DMU_OT_DSL_DATASET
, mdn
, data
);
1481 make_mdn(mdn
, data
);
1486 return ZFS_ERR_NONE
;
1488 err
= dnode_get_path(mdn
, filename
, dn
, data
);
1495 * For a given XDR packed nvlist, verify the first 4 bytes and move on.
1497 * An XDR packed nvlist is encoded as (comments from nvs_xdr_create) :
1499 * encoding method/host endian (4 bytes)
1500 * nvl_version (4 bytes)
1501 * nvl_nvflag (4 bytes)
1503 * encoded size of the nvpair (4 bytes)
1504 * decoded size of the nvpair (4 bytes)
1505 * name string size (4 bytes)
1506 * name string data (sizeof(NV_ALIGN4(string))
1507 * data type (4 bytes)
1508 * # of elements in the nvpair (4 bytes)
1510 * 2 zero's for the last nvpair
1511 * (end of the entire list) (8 bytes)
1516 nvlist_find_value(char *nvlist
, char *name
, int valtype
, char **val
,
1517 size_t *size_out
, size_t *nelm_out
)
1519 int name_len
, type
, encode_size
;
1520 char *nvpair
, *nvp_name
;
1522 /* Verify if the 1st and 2nd byte in the nvlist are valid. */
1523 /* NOTE: independently of what endianness header announces all
1524 subsequent values are big-endian. */
1525 if (nvlist
[0] != NV_ENCODE_XDR
|| (nvlist
[1] != NV_LITTLE_ENDIAN
1526 && nvlist
[1] != NV_BIG_ENDIAN
)) {
1527 printf("zfs incorrect nvlist header\n");
1528 return ZFS_ERR_BAD_FS
;
1531 /* skip the header, nvl_version, and nvl_nvflag */
1532 nvlist
= nvlist
+ 4 * 3;
1534 * Loop thru the nvpair list
1535 * The XDR representation of an integer is in big-endian byte order.
1537 while ((encode_size
= be32_to_cpu(*(uint32_t *) nvlist
))) {
1540 nvpair
= nvlist
+ 4 * 2; /* skip the encode/decode size */
1542 name_len
= be32_to_cpu(*(uint32_t *) nvpair
);
1546 nvpair
= nvpair
+ ((name_len
+ 3) & ~3); /* align */
1548 type
= be32_to_cpu(*(uint32_t *) nvpair
);
1551 nelm
= be32_to_cpu(*(uint32_t *) nvpair
);
1553 printf("empty nvpair\n");
1554 return ZFS_ERR_BAD_FS
;
1559 if ((strncmp(nvp_name
, name
, name_len
) == 0) && type
== valtype
) {
1561 *size_out
= encode_size
;
1567 nvlist
+= encode_size
; /* goto the next nvpair */
1573 zfs_nvlist_lookup_uint64(char *nvlist
, char *name
, uint64_t *out
)
1579 found
= nvlist_find_value(nvlist
, name
, DATA_TYPE_UINT64
, &nvpair
, &size
, 0);
1582 if (size
< sizeof(uint64_t)) {
1583 printf("invalid uint64\n");
1584 return ZFS_ERR_BAD_FS
;
1587 *out
= be64_to_cpu(*(uint64_t *) nvpair
);
1592 zfs_nvlist_lookup_string(char *nvlist
, char *name
)
1600 found
= nvlist_find_value(nvlist
, name
, DATA_TYPE_STRING
, &nvpair
, &size
, 0);
1604 printf("invalid string\n");
1607 slen
= be32_to_cpu(*(uint32_t *) nvpair
);
1608 if (slen
> size
- 4)
1610 ret
= malloc(slen
+ 1);
1613 memcpy(ret
, nvpair
+ 4, slen
);
1619 zfs_nvlist_lookup_nvlist(char *nvlist
, char *name
)
1626 found
= nvlist_find_value(nvlist
, name
, DATA_TYPE_NVLIST
, &nvpair
,
1630 ret
= calloc(1, size
+ 3 * sizeof(uint32_t));
1633 memcpy(ret
, nvlist
, sizeof(uint32_t));
1635 memcpy(ret
+ sizeof(uint32_t), nvpair
, size
);
1640 zfs_nvlist_lookup_nvlist_array_get_nelm(char *nvlist
, char *name
)
1646 found
= nvlist_find_value(nvlist
, name
, DATA_TYPE_NVLIST
, &nvpair
,
1654 zfs_nvlist_lookup_nvlist_array(char *nvlist
, char *name
,
1657 char *nvpair
, *nvpairptr
;
1664 found
= nvlist_find_value(nvlist
, name
, DATA_TYPE_NVLIST
, &nvpair
,
1668 if (index
>= nelm
) {
1669 printf("trying to lookup past nvlist array\n");
1675 for (i
= 0; i
< index
; i
++) {
1676 uint32_t encode_size
;
1678 /* skip the header, nvl_version, and nvl_nvflag */
1679 nvpairptr
= nvpairptr
+ 4 * 2;
1681 while (nvpairptr
< nvpair
+ size
1682 && (encode_size
= be32_to_cpu(*(uint32_t *) nvpairptr
)))
1683 nvlist
+= encode_size
; /* goto the next nvpair */
1685 nvlist
= nvlist
+ 4 * 2; /* skip the ending 2 zeros - 8 bytes */
1688 if (nvpairptr
>= nvpair
+ size
1689 || nvpairptr
+ be32_to_cpu(*(uint32_t *) (nvpairptr
+ 4 * 2))
1691 printf("incorrect nvlist array\n");
1695 ret
= calloc(1, be32_to_cpu(*(uint32_t *) (nvpairptr
+ 4 * 2))
1696 + 3 * sizeof(uint32_t));
1699 memcpy(ret
, nvlist
, sizeof(uint32_t));
1701 memcpy(ret
+ sizeof(uint32_t), nvpairptr
, size
);
1706 int_zfs_fetch_nvlist(struct zfs_data
*data
, char **nvlist
)
1710 *nvlist
= malloc(VDEV_PHYS_SIZE
);
1711 /* Read in the vdev name-value pair list (112K). */
1712 err
= zfs_devread(data
->vdev_phys_sector
, 0, VDEV_PHYS_SIZE
, *nvlist
);
1718 return ZFS_ERR_NONE
;
1722 * Check the disk label information and retrieve needed vdev name-value pairs.
1726 check_pool_label(struct zfs_data
*data
)
1728 uint64_t pool_state
;
1729 char *nvlist
; /* for the pool */
1730 char *vdevnvlist
; /* for the vdev */
1736 err
= int_zfs_fetch_nvlist(data
, &nvlist
);
1740 found
= zfs_nvlist_lookup_uint64(nvlist
, ZPOOL_CONFIG_POOL_STATE
,
1744 printf("zfs pool state not found\n");
1745 return ZFS_ERR_BAD_FS
;
1748 if (pool_state
== POOL_STATE_DESTROYED
) {
1750 printf("zpool is marked as destroyed\n");
1751 return ZFS_ERR_BAD_FS
;
1754 data
->label_txg
= 0;
1755 found
= zfs_nvlist_lookup_uint64(nvlist
, ZPOOL_CONFIG_POOL_TXG
,
1759 printf("zfs pool txg not found\n");
1760 return ZFS_ERR_BAD_FS
;
1763 /* not an active device */
1764 if (data
->label_txg
== 0) {
1766 printf("zpool is not active\n");
1767 return ZFS_ERR_BAD_FS
;
1770 found
= zfs_nvlist_lookup_uint64(nvlist
, ZPOOL_CONFIG_VERSION
,
1774 printf("zpool config version not found\n");
1775 return ZFS_ERR_BAD_FS
;
1778 if (version
> SPA_VERSION
) {
1780 printf("SPA version too new %llu > %llu\n",
1781 (unsigned long long) version
,
1782 (unsigned long long) SPA_VERSION
);
1783 return ZFS_ERR_NOT_IMPLEMENTED_YET
;
1786 vdevnvlist
= zfs_nvlist_lookup_nvlist(nvlist
, ZPOOL_CONFIG_VDEV_TREE
);
1789 printf("ZFS config vdev tree not found\n");
1790 return ZFS_ERR_BAD_FS
;
1793 found
= zfs_nvlist_lookup_uint64(vdevnvlist
, ZPOOL_CONFIG_ASHIFT
,
1794 &data
->vdev_ashift
);
1798 printf("ZPOOL config ashift not found\n");
1799 return ZFS_ERR_BAD_FS
;
1802 found
= zfs_nvlist_lookup_uint64(nvlist
, ZPOOL_CONFIG_GUID
, &diskguid
);
1805 printf("ZPOOL config guid not found\n");
1806 return ZFS_ERR_BAD_FS
;
1809 found
= zfs_nvlist_lookup_uint64(nvlist
, ZPOOL_CONFIG_POOL_GUID
, &data
->pool_guid
);
1812 printf("ZPOOL config pool guid not found\n");
1813 return ZFS_ERR_BAD_FS
;
1818 printf("ZFS Pool GUID: %llu (%016llx) Label: GUID: %llu (%016llx), txg: %llu, SPA v%llu, ashift: %llu\n",
1819 (unsigned long long) data
->pool_guid
,
1820 (unsigned long long) data
->pool_guid
,
1821 (unsigned long long) diskguid
,
1822 (unsigned long long) diskguid
,
1823 (unsigned long long) data
->label_txg
,
1824 (unsigned long long) version
,
1825 (unsigned long long) data
->vdev_ashift
);
1827 return ZFS_ERR_NONE
;
1831 * vdev_label_start returns the physical disk offset (in bytes) of
1834 static uint64_t vdev_label_start(uint64_t psize
, int l
)
1836 return (l
* sizeof(vdev_label_t
) + (l
< VDEV_LABELS
/ 2 ?
1838 VDEV_LABELS
* sizeof(vdev_label_t
)));
1842 zfs_unmount(struct zfs_data
*data
)
1844 free(data
->dnode_buf
);
1845 free(data
->dnode_mdn
);
1846 free(data
->file_buf
);
1851 * zfs_mount() locates a valid uberblock of the root pool and read in its MOS
1852 * to the memory address MOS.
1856 zfs_mount(device_t dev
)
1858 struct zfs_data
*data
= 0;
1859 int label
= 0, bestlabel
= -1;
1861 uberblock_t
*ubbest
;
1862 uberblock_t
*ubcur
= NULL
;
1867 data
= malloc(sizeof(*data
));
1870 memset(data
, 0, sizeof(*data
));
1872 ub_array
= malloc(VDEV_UBERBLOCK_RING
);
1878 ubbest
= malloc(sizeof(*ubbest
));
1883 memset(ubbest
, 0, sizeof(*ubbest
));
1886 * some eltorito stacks don't give us a size and
1887 * we end up setting the size to MAXUINT, further
1888 * some of these devices stop working once a single
1889 * read past the end has been issued. Checking
1890 * for a maximum part_length and skipping the backup
1891 * labels at the end of the slice/partition/device
1892 * avoids breaking down on such devices.
1895 dev
->part_length
== 0 ?
1896 VDEV_LABELS
/ 2 : VDEV_LABELS
;
1898 /* Size in bytes of the device (disk or partition) aligned to label size*/
1899 uint64_t device_size
=
1900 dev
->part_length
<< SECTOR_BITS
;
1902 const uint64_t alignedbytes
=
1903 P2ALIGN(device_size
, (uint64_t) sizeof(vdev_label_t
));
1905 for (label
= 0; label
< vdevnum
; label
++) {
1906 uint64_t labelstartbytes
= vdev_label_start(alignedbytes
, label
);
1907 uint64_t labelstart
= labelstartbytes
>> SECTOR_BITS
;
1909 debug("zfs reading label %d at sector %llu (byte %llu)\n",
1910 label
, (unsigned long long) labelstart
,
1911 (unsigned long long) labelstartbytes
);
1913 data
->vdev_phys_sector
= labelstart
+
1914 ((VDEV_SKIP_SIZE
+ VDEV_BOOT_HEADER_SIZE
) >> SECTOR_BITS
);
1916 err
= check_pool_label(data
);
1918 printf("zfs error checking label %d\n", label
);
1922 /* Read in the uberblock ring (128K). */
1923 err
= zfs_devread(data
->vdev_phys_sector
+
1924 (VDEV_PHYS_SIZE
>> SECTOR_BITS
),
1925 0, VDEV_UBERBLOCK_RING
, ub_array
);
1927 printf("zfs error reading uberblock ring for label %d\n", label
);
1931 ubcur
= find_bestub(ub_array
, data
);
1933 printf("zfs No good uberblocks found in label %d\n", label
);
1937 if (vdev_uberblock_compare(ubcur
, ubbest
) > 0) {
1938 /* Looks like the block is good, so use it.*/
1939 memcpy(ubbest
, ubcur
, sizeof(*ubbest
));
1941 debug("zfs Current best uberblock found in label %d\n", label
);
1946 /* We zero'd the structure to begin with. If we never assigned to it,
1947 magic will still be zero. */
1948 if (!ubbest
->ub_magic
) {
1949 printf("couldn't find a valid ZFS label\n");
1955 debug("zfs ubbest %p in label %d\n", ubbest
, bestlabel
);
1957 zfs_endian_t ub_endian
=
1958 zfs_to_cpu64(ubbest
->ub_magic
, LITTLE_ENDIAN
) == UBERBLOCK_MAGIC
1959 ? LITTLE_ENDIAN
: BIG_ENDIAN
;
1961 debug("zfs endian set to %s\n", !ub_endian
? "big" : "little");
1963 err
= zio_read(&ubbest
->ub_rootbp
, ub_endian
, &osp
, &ospsize
, data
);
1966 printf("couldn't zio_read object directory\n");
1972 if (ospsize
< OBJSET_PHYS_SIZE_V14
) {
1973 printf("osp too small\n");
1980 /* Got the MOS. Save it at the memory addr MOS. */
1981 memmove(&(data
->mos
.dn
), &((objset_phys_t
*) osp
)->os_meta_dnode
, DNODE_SIZE
);
1983 (zfs_to_cpu64(ubbest
->ub_rootbp
.blk_prop
, ub_endian
) >> 63) & 1;
1984 memmove(&(data
->current_uberblock
), ubbest
, sizeof(uberblock_t
));
1993 zfs_fetch_nvlist(device_t dev
, char **nvlist
)
1995 struct zfs_data
*zfs
;
1998 zfs
= zfs_mount(dev
);
2000 return ZFS_ERR_BAD_FS
;
2001 err
= int_zfs_fetch_nvlist(zfs
, nvlist
);
2007 * zfs_open() locates a file in the rootpool by following the
2008 * MOS and places the dnode of the file in the memory address DNODE.
2011 zfs_open(struct zfs_file
*file
, const char *fsfilename
)
2013 struct zfs_data
*data
;
2017 data
= zfs_mount(file
->device
);
2019 return ZFS_ERR_BAD_FS
;
2021 err
= dnode_get_fullpath(fsfilename
, &(data
->mdn
), 0,
2022 &(data
->dnode
), &isfs
, data
);
2030 printf("Missing @ or / separator\n");
2031 return ZFS_ERR_FILE_NOT_FOUND
;
2034 /* We found the dnode for this file. Verify if it is a plain file. */
2035 if (data
->dnode
.dn
.dn_type
!= DMU_OT_PLAIN_FILE_CONTENTS
) {
2037 printf("not a file\n");
2038 return ZFS_ERR_BAD_FILE_TYPE
;
2041 /* get the file size and set the file position to 0 */
2044 * For DMU_OT_SA we will need to locate the SIZE attribute
2045 * attribute, which could be either in the bonus buffer
2046 * or the "spill" block.
2048 if (data
->dnode
.dn
.dn_bonustype
== DMU_OT_SA
) {
2052 if (data
->dnode
.dn
.dn_bonuslen
!= 0) {
2053 sahdrp
= (sa_hdr_phys_t
*) DN_BONUS(&data
->dnode
.dn
);
2054 } else if (data
->dnode
.dn
.dn_flags
& DNODE_FLAG_SPILL_BLKPTR
) {
2055 blkptr_t
*bp
= &data
->dnode
.dn
.dn_spill
;
2057 err
= zio_read(bp
, data
->dnode
.endian
, &sahdrp
, NULL
, data
);
2061 printf("filesystem is corrupt :(\n");
2062 return ZFS_ERR_BAD_FS
;
2065 hdrsize
= SA_HDR_SIZE(((sa_hdr_phys_t
*) sahdrp
));
2066 file
->size
= *(uint64_t *) ((char *) sahdrp
+ hdrsize
+ SA_SIZE_OFFSET
);
2068 file
->size
= zfs_to_cpu64(((znode_phys_t
*) DN_BONUS(&data
->dnode
.dn
))->zp_size
, data
->dnode
.endian
);
2074 return ZFS_ERR_NONE
;
2078 zfs_read(zfs_file_t file
, char *buf
, uint64_t len
)
2080 struct zfs_data
*data
= (struct zfs_data
*) file
->data
;
2081 int blksz
, movesize
;
2086 if (data
->file_buf
== NULL
) {
2087 data
->file_buf
= malloc(SPA_MAXBLOCKSIZE
);
2088 if (!data
->file_buf
)
2090 data
->file_start
= data
->file_end
= 0;
2094 * If offset is in memory, move it into the buffer provided and return.
2096 if (file
->offset
>= data
->file_start
2097 && file
->offset
+ len
<= data
->file_end
) {
2098 memmove(buf
, data
->file_buf
+ file
->offset
- data
->file_start
,
2103 blksz
= zfs_to_cpu16(data
->dnode
.dn
.dn_datablkszsec
,
2104 data
->dnode
.endian
) << SPA_MINBLOCKSHIFT
;
2107 * Entire Dnode is too big to fit into the space available. We
2108 * will need to read it in chunks. This could be optimized to
2109 * read in as large a chunk as there is space available, but for
2110 * now, this only reads in one data block at a time.
2117 * Find requested blkid and the offset within that block.
2119 uint64_t blkid
= file
->offset
+ red
;
2120 blkid
= do_div(blkid
, blksz
);
2121 free(data
->file_buf
);
2124 err
= dmu_read(&(data
->dnode
), blkid
, &t
,
2130 data
->file_start
= blkid
* blksz
;
2131 data
->file_end
= data
->file_start
+ blksz
;
2133 movesize
= MIN(length
, data
->file_end
- (int) file
->offset
- red
);
2135 memmove(buf
, data
->file_buf
+ file
->offset
+ red
2136 - data
->file_start
, movesize
);
2146 zfs_close(zfs_file_t file
)
2148 zfs_unmount((struct zfs_data
*) file
->data
);
2149 return ZFS_ERR_NONE
;
2153 zfs_getmdnobj(device_t dev
, const char *fsfilename
,
2156 struct zfs_data
*data
;
2160 data
= zfs_mount(dev
);
2162 return ZFS_ERR_BAD_FS
;
2164 err
= dnode_get_fullpath(fsfilename
, &(data
->mdn
), mdnobj
,
2165 &(data
->dnode
), &isfs
, data
);
2171 fill_fs_info(struct zfs_dirhook_info
*info
,
2172 dnode_end_t mdn
, struct zfs_data
*data
)
2179 memset(info
, 0, sizeof(*info
));
2183 if (mdn
.dn
.dn_type
== DMU_OT_DSL_DIR
) {
2184 headobj
= zfs_to_cpu64(((dsl_dir_phys_t
*) DN_BONUS(&mdn
.dn
))->dd_head_dataset_obj
, mdn
.endian
);
2186 err
= dnode_get(&(data
->mos
), headobj
, DMU_OT_DSL_DATASET
, &mdn
, data
);
2188 printf("zfs failed here 1\n");
2192 make_mdn(&mdn
, data
);
2193 err
= dnode_get(&mdn
, MASTER_NODE_OBJ
, DMU_OT_MASTER_NODE
,
2196 printf("zfs failed here 2\n");
2200 err
= zap_lookup(&dn
, ZFS_ROOT_OBJ
, &objnum
, data
);
2202 printf("zfs failed here 3\n");
2206 err
= dnode_get(&mdn
, objnum
, 0, &dn
, data
);
2208 printf("zfs failed here 4\n");
2213 info
->mtime
= zfs_to_cpu64(((znode_phys_t
*) DN_BONUS(&dn
.dn
))->zp_mtime
[0], dn
.endian
);
2218 static int iterate_zap(const char *name
, uint64_t val
, struct zfs_data
*data
)
2220 struct zfs_dirhook_info info
;
2223 memset(&info
, 0, sizeof(info
));
2225 dnode_get(&(data
->mdn
), val
, 0, &dn
, data
);
2227 info
.mtime
= zfs_to_cpu64(((znode_phys_t
*) DN_BONUS(&dn
.dn
))->zp_mtime
[0], dn
.endian
);
2228 info
.dir
= (dn
.dn
.dn_type
== DMU_OT_DIRECTORY_CONTENTS
);
2229 debug("zfs type=%d, name=%s\n",
2230 (int)dn
.dn
.dn_type
, (char *)name
);
2231 if (!data
->userhook
)
2233 return data
->userhook(name
, &info
);
2236 static int iterate_zap_fs(const char *name
, uint64_t val
, struct zfs_data
*data
)
2238 struct zfs_dirhook_info info
;
2241 err
= dnode_get(&(data
->mos
), val
, 0, &mdn
, data
);
2244 if (mdn
.dn
.dn_type
!= DMU_OT_DSL_DIR
)
2247 fill_fs_info(&info
, mdn
, data
);
2249 if (!data
->userhook
)
2251 return data
->userhook(name
, &info
);
2254 static int iterate_zap_snap(const char *name
, uint64_t val
, struct zfs_data
*data
)
2256 struct zfs_dirhook_info info
;
2262 err
= dnode_get(&(data
->mos
), val
, 0, &mdn
, data
);
2266 if (mdn
.dn
.dn_type
!= DMU_OT_DSL_DATASET
)
2269 fill_fs_info(&info
, mdn
, data
);
2271 name2
= malloc(strlen(name
) + 2);
2273 memcpy(name2
+ 1, name
, strlen(name
) + 1);
2275 ret
= data
->userhook(name2
, &info
);
2281 zfs_ls(device_t device
, const char *path
,
2282 int (*hook
)(const char *, const struct zfs_dirhook_info
*))
2284 struct zfs_data
*data
;
2288 data
= zfs_mount(device
);
2290 return ZFS_ERR_BAD_FS
;
2292 data
->userhook
= hook
;
2294 err
= dnode_get_fullpath(path
, &(data
->mdn
), 0, &(data
->dnode
), &isfs
, data
);
2300 uint64_t childobj
, headobj
;
2303 struct zfs_dirhook_info info
;
2305 fill_fs_info(&info
, data
->dnode
, data
);
2308 childobj
= zfs_to_cpu64(((dsl_dir_phys_t
*) DN_BONUS(&data
->dnode
.dn
))->dd_child_dir_zapobj
, data
->dnode
.endian
);
2309 headobj
= zfs_to_cpu64(((dsl_dir_phys_t
*) DN_BONUS(&data
->dnode
.dn
))->dd_head_dataset_obj
, data
->dnode
.endian
);
2310 err
= dnode_get(&(data
->mos
), childobj
,
2311 DMU_OT_DSL_DIR_CHILD_MAP
, &dn
, data
);
2318 zap_iterate(&dn
, iterate_zap_fs
, data
);
2320 err
= dnode_get(&(data
->mos
), headobj
, DMU_OT_DSL_DATASET
, &dn
, data
);
2326 snapobj
= zfs_to_cpu64(((dsl_dataset_phys_t
*) DN_BONUS(&dn
.dn
))->ds_snapnames_zapobj
, dn
.endian
);
2328 err
= dnode_get(&(data
->mos
), snapobj
,
2329 DMU_OT_DSL_DS_SNAP_MAP
, &dn
, data
);
2335 zap_iterate(&dn
, iterate_zap_snap
, data
);
2337 if (data
->dnode
.dn
.dn_type
!= DMU_OT_DIRECTORY_CONTENTS
) {
2339 printf("not a directory\n");
2340 return ZFS_ERR_BAD_FILE_TYPE
;
2342 zap_iterate(&(data
->dnode
), iterate_zap
, data
);
2345 return ZFS_ERR_NONE
;