From 61983f67786db6770ff2ce5e3086ce2561c4cc8c Mon Sep 17 00:00:00 2001 From: Barry Naujok Date: Tue, 5 Jun 2007 04:03:18 +0000 Subject: [PATCH] XFS metadata dump tool Merge of master-melb:xfs-cmds:28782a by kenmcd. Add xfs_mdrestore directory to makefile --- Makefile | 2 +- VERSION | 4 +- db/Makefile | 5 +- db/command.c | 2 + db/init.c | 10 +- db/metadump.c | 1554 +++++++++++++++++++++++++++++++++++++ db/metadump.h | 19 + db/xfs_metadump.sh | 38 + doc/CHANGES | 3 +- include/xfs_metadump.h | 32 + man/man8/xfs_db.8 | 5 + man/man8/xfs_mdrestore.8 | 48 ++ man/man8/xfs_metadump.8 | 128 +++ man/man8/xfs_repair.8 | 36 +- mdrestore/Makefile | 22 + mdrestore/xfs_mdrestore.c | 263 +++++++ 16 files changed, 2146 insertions(+), 25 deletions(-) create mode 100644 db/metadump.c create mode 100644 db/metadump.h create mode 100755 db/xfs_metadump.sh create mode 100644 include/xfs_metadump.h create mode 100644 man/man8/xfs_mdrestore.8 create mode 100644 man/man8/xfs_metadump.8 create mode 100644 mdrestore/Makefile create mode 100644 mdrestore/xfs_mdrestore.c diff --git a/Makefile b/Makefile index fcfd4e6e2..7882ee2c7 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ LDIRT = config.log .dep config.status config.cache confdefs.h conftest* \ Logs/* built .census install.* install-dev.* *.gz SUBDIRS = include libxfs libxlog libxcmd libhandle libdisk \ - copy db fsck growfs io logprint mkfs quota repair rtcp \ + copy db fsck growfs io logprint mkfs quota mdrestore repair rtcp \ m4 man doc po debian build default: $(CONFIGURE) diff --git a/VERSION b/VERSION index 65662ac16..fc566825b 100644 --- a/VERSION +++ b/VERSION @@ -2,6 +2,6 @@ # This file is used by configure to get version information # PKG_MAJOR=2 -PKG_MINOR=8 -PKG_REVISION=21 +PKG_MINOR=9 +PKG_REVISION=0 PKG_BUILD=1 diff --git a/db/Makefile b/db/Makefile index 9c8fe75d7..11d65bcdc 100644 --- a/db/Makefile +++ b/db/Makefile @@ -11,11 +11,11 @@ HFILES = addr.h agf.h agfl.h agi.h attr.h attrshort.h bit.h block.h bmap.h \ bmapbt.h bmroot.h bnobt.h check.h cntbt.h command.h convert.h \ dbread.h debug.h dir.h dir2.h dir2sf.h dirshort.h dquot.h echo.h \ faddr.h field.h flist.h fprint.h frag.h freesp.h hash.h help.h \ - init.h inobt.h inode.h input.h io.h malloc.h output.h \ + init.h inobt.h inode.h input.h io.h malloc.h metadump.h output.h \ print.h quit.h sb.h sig.h strvec.h text.h type.h write.h \ attrset.h CFILES = $(HFILES:.h=.c) -LSRCFILES = xfs_admin.sh xfs_check.sh xfs_ncheck.sh +LSRCFILES = xfs_admin.sh xfs_check.sh xfs_ncheck.sh xfs_metadump.sh LLDLIBS = $(LIBXFS) $(LIBXLOG) $(LIBUUID) $(LIBRT) LTDEPENDENCIES = $(LIBXFS) $(LIBXLOG) LLDFLAGS += -static @@ -40,4 +40,5 @@ install: default $(INSTALL) -m 755 xfs_admin.sh $(PKG_BIN_DIR)/xfs_admin $(INSTALL) -m 755 xfs_check.sh $(PKG_BIN_DIR)/xfs_check $(INSTALL) -m 755 xfs_ncheck.sh $(PKG_BIN_DIR)/xfs_ncheck + $(INSTALL) -m 755 xfs_metadump.sh $(PKG_BIN_DIR)/xfs_metadump install-dev: diff --git a/db/command.c b/db/command.c index 564895373..442fe2545 100644 --- a/db/command.c +++ b/db/command.c @@ -40,6 +40,7 @@ #include "inode.h" #include "input.h" #include "io.h" +#include "metadump.h" #include "output.h" #include "print.h" #include "quit.h" @@ -131,6 +132,7 @@ init_commands(void) inode_init(); input_init(); io_init(); + metadump_init(); output_init(); print_init(); quit_init(); diff --git a/db/init.c b/db/init.c index 8901fc74b..27be16818 100644 --- a/db/init.c +++ b/db/init.c @@ -107,8 +107,8 @@ init( } if (read_bbs(XFS_SB_DADDR, 1, &bufp, NULL)) { - dbprintf(_("%s: %s is invalid (cannot read first 512 bytes)\n"), - progname, fsdevice); + fprintf(stderr, _("%s: %s is invalid (cannot read first 512 " + "bytes)\n"), progname, fsdevice); exit(1); } @@ -118,7 +118,7 @@ init( sbp = &xmount.m_sb; if (sbp->sb_magicnum != XFS_SB_MAGIC) { - dbprintf(_("%s: unexpected XFS SB magic number 0x%08x\n"), + fprintf(stderr, _("%s: unexpected XFS SB magic number 0x%08x\n"), progname, sbp->sb_magicnum); } @@ -128,8 +128,8 @@ init( mp = libxfs_mount(&xmount, sbp, x.ddev, x.logdev, x.rtdev, LIBXFS_MOUNT_DEBUGGER); if (!mp) { - dbprintf(_("%s: device %s unusable (not an XFS filesystem?)\n"), - progname, fsdevice); + fprintf(stderr, _("%s: device %s unusable (not an XFS " + "filesystem?)\n"), progname, fsdevice); exit(1); } } diff --git a/db/metadump.c b/db/metadump.c new file mode 100644 index 000000000..f3778b227 --- /dev/null +++ b/db/metadump.c @@ -0,0 +1,1554 @@ +/* + * Copyright (c) 2007 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include "bmap.h" +#include "command.h" +#include "metadump.h" +#include "io.h" +#include "output.h" +#include "type.h" +#include "init.h" +#include "sig.h" +#include "xfs_metadump.h" + +/* copy all metadata structures to/from a file */ + +static int metadump_f(int argc, char **argv); +static void metadump_help(void); + +/* + * metadump commands issue info/wornings/errors to standard error as + * metadump supports stdout as a destination. + * + * All static functions return zero on failure, while the public functions + * return zero on success. + */ + +static const cmdinfo_t metadump_cmd = + { "metadump", NULL, metadump_f, 0, -1, 0, + "[-e] [-g] [-w] [-o] filename", + "dump metadata to a file", metadump_help }; + +static FILE *outf; /* metadump file */ + +static xfs_metablock_t *metablock; /* header + index + buffers */ +static __be64 *block_index; +static char *block_buffer; + +static int num_indicies; +static int cur_index; + +static xfs_ino_t cur_ino; + +static int show_progress = 0; +static int stop_on_read_error = 0; +static int dont_obfuscate = 0; +static int show_warnings = 0; +static int progress_since_warning = 0; + +void +metadump_init(void) +{ + add_command(&metadump_cmd); +} + +static void +metadump_help(void) +{ + dbprintf( +"\n" +" The 'metadump' command dumps the known metadata to a compact file suitable\n" +" for compressing and sending to an XFS maintainer for corruption analysis \n" +" or xfs_repair failures.\n\n" +" There are 3 options:\n" +" -e -- Ignore read errors and keep going\n" +" -g -- Display dump progress\n" +" -o -- Don't obfuscate names and extended attributes\n" +" -w -- Show warnings of bad metadata information\n" +"\n"); +} + +static void +print_warning(const char *fmt, ...) +{ + char buf[200]; + va_list ap; + + if (seenint()) + return; + + va_start(ap, fmt); + vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + buf[sizeof(buf)-1] = '\0'; + + fprintf(stderr, "%s%s: %s\n", progress_since_warning ? "\n" : "", + progname, buf); + progress_since_warning = 0; +} + +static void +print_progress(const char *fmt, ...) +{ + char buf[60]; + va_list ap; + FILE *f; + + if (seenint()) + return; + + va_start(ap, fmt); + vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + buf[sizeof(buf)-1] = '\0'; + + f = (outf == stdout) ? stderr : stdout; + fprintf(f, "\r%-59s", buf); + fflush(f); + progress_since_warning = 1; +} + +/* + * A complete dump file will have a "zero" entry in the last index block, + * even if the dump is exactly aligned, the last index will be full of + * zeros. If the last index entry is non-zero, the dump is incomplete. + * Correspondingly, the last chunk will have a count < num_indicies. + */ + +static int +write_index(void) +{ + /* + * write index block and following data blocks (streaming) + */ + metablock->mb_count = cpu_to_be16(cur_index); + if (fwrite(metablock, (cur_index + 1) << BBSHIFT, 1, outf) != 1) { + print_warning("error writing to file: %s", strerror(errno)); + return 0; + } + + memset(block_index, 0, num_indicies * sizeof(__be64)); + cur_index = 0; + return 1; +} + +static int +write_buf( + iocur_t *buf) +{ + char *data; + __int64_t off; + int i; + + for (i = 0, off = buf->bb, data = buf->data; + i < buf->blen; + i++, off++, data += BBSIZE) { + block_index[cur_index] = cpu_to_be64(off); + memcpy(&block_buffer[cur_index << BBSHIFT], data, BBSIZE); + if (++cur_index == num_indicies) { + if (!write_index()) + return 0; + } + } + return !seenint(); +} + + +static int +scan_btree( + xfs_agnumber_t agno, + xfs_agblock_t agbno, + int level, + typnm_t btype, + void *arg, + int (*func)(xfs_btree_hdr_t *bthdr, + xfs_agnumber_t agno, + xfs_agblock_t agbno, + int level, + typnm_t btype, + void *arg)) +{ + push_cur(); + set_cur(&typtab[btype], XFS_AGB_TO_DADDR(mp, agno, agbno), blkbb, + DB_RING_IGN, NULL); + if (iocur_top->data == NULL) { + print_warning("cannot read %s block %u/%u", typtab[btype].name, + agno, agbno); + return !stop_on_read_error; + } + if (!write_buf(iocur_top)) + return 0; + + if (!(*func)(iocur_top->data, agno, agbno, level - 1, btype, arg)) + return 0; + + pop_cur(); + return 1; +} + +/* free space tree copy routines */ + +static int +valid_bno( + xfs_agblock_t bno, + xfs_agnumber_t agno, + xfs_agblock_t agbno, + typnm_t btype) +{ + if (bno > 0 && bno <= mp->m_sb.sb_agblocks) + return 1; + + if (show_warnings) + print_warning("invalid block number (%u) in %s block %u/%u", + bno, typtab[btype].name, agno, agbno); + return 0; +} + +static int +scanfunc_freesp( + xfs_btree_hdr_t *bthdr, + xfs_agnumber_t agno, + xfs_agblock_t agbno, + int level, + typnm_t btype, + void *arg) +{ + xfs_alloc_ptr_t *pp; + int i; + int nrecs; + + if (level == 0) + return 1; + + nrecs = be16_to_cpu(bthdr->bb_numrecs); + if (nrecs > mp->m_alloc_mxr[1]) { + if (show_warnings) + print_warning("invalid nrecs (%u) in %s block %u/%u", + nrecs, typtab[btype].name, agno, agbno); + return 1; + } + + pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, bthdr, 1, + mp->m_alloc_mxr[1]); + for (i = 0; i < nrecs; i++) { + if (!valid_bno(be32_to_cpu(pp[i]), agno, agbno, btype)) + continue; + if (!scan_btree(agno, be32_to_cpu(pp[i]), level, btype, arg, + scanfunc_freesp)) + return 0; + } + return 1; +} + +static int +copy_free_bno_btree( + xfs_agnumber_t agno, + xfs_agf_t *agf) +{ + xfs_agblock_t root; + int levels; + + root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]); + levels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]); + + /* validate root and levels before processing the tree */ + if (root == 0 || root > mp->m_sb.sb_agblocks) { + if (show_warnings) + print_warning("invalid block number (%u) in bnobt " + "root in agf %u", root, agno); + return 1; + } + if (levels >= XFS_BTREE_MAXLEVELS) { + if (show_warnings) + print_warning("invalid level (%u) in bnobt root " + "in agf %u", levels, agno); + return 1; + } + + return scan_btree(agno, root, levels, TYP_BNOBT, agf, scanfunc_freesp); +} + +static int +copy_free_cnt_btree( + xfs_agnumber_t agno, + xfs_agf_t *agf) +{ + xfs_agblock_t root; + int levels; + + root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]); + levels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]); + + /* validate root and levels before processing the tree */ + if (root == 0 || root > mp->m_sb.sb_agblocks) { + if (show_warnings) + print_warning("invalid block number (%u) in cntbt " + "root in agf %u", root, agno); + return 1; + } + if (levels >= XFS_BTREE_MAXLEVELS) { + if (show_warnings) + print_warning("invalid level (%u) in cntbt root " + "in agf %u", levels, agno); + return 1; + } + + return scan_btree(agno, root, levels, TYP_CNTBT, agf, scanfunc_freesp); +} + +/* filename and extended attribute obfuscation routines */ + +typedef struct name_ent { + struct name_ent *next; + xfs_dahash_t hash; + int namelen; + uchar_t name[1]; +} name_ent_t; + +#define NAME_TABLE_SIZE 4096 + +static name_ent_t **nametable; + +static int +create_nametable(void) +{ + nametable = calloc(NAME_TABLE_SIZE, sizeof(name_ent_t)); + return nametable != NULL; +} + +static void +clear_nametable(void) +{ + int i; + name_ent_t *p; + + for (i = 0; i < NAME_TABLE_SIZE; i++) { + while (nametable[i]) { + p = nametable[i]; + nametable[i] = p->next; + free(p); + } + } +} + + +#define is_invalid_char(c) ((c) == '/' || (c) == '\0') +#define rol32(x,y) (((x) << (y)) | ((x) >> (32 - (y)))) + +static inline uchar_t +random_filename_char(void) +{ + uchar_t c; + + do { + c = random() % 127 + 1; + } while (c == '/'); + return c; +} + +static int +is_special_dirent( + xfs_ino_t ino, + int namelen, + uchar_t *name) +{ + static xfs_ino_t orphanage_ino = 0; + char s[32]; + int slen; + + /* + * due to the XFS name hashing algorithm, we cannot obfuscate + * names with 4 chars or less. + */ + if (namelen <= 4) + return 1; + + if (ino == 0) + return 0; + + /* + * don't obfuscate lost+found nor any inodes within lost+found with + * the inode number + */ + if (cur_ino == mp->m_sb.sb_rootino && namelen == 10 && + memcmp(name, "lost+found", 10) == 0) { + orphanage_ino = ino; + return 1; + } + if (cur_ino != orphanage_ino) + return 0; + + slen = sprintf(s, "%lld", (long long)ino); + return (slen == namelen && memcmp(name, s, namelen) == 0); +} + +static void +generate_obfuscated_name( + xfs_ino_t ino, + int namelen, + uchar_t *name) +{ + xfs_dahash_t hash; + name_ent_t *p; + int i; + int dup; + xfs_dahash_t newhash; + uchar_t newname[namelen]; + + if (is_special_dirent(ino, namelen, name)) + return; + + hash = libxfs_da_hashname(name, namelen); + + /* create a random name with the same hash value */ + + do { + dup = 0; + newname[0] = '/'; + + for (;;) { + /* if the first char is a "/", preserve it */ + i = (name[0] == '/'); + + for (newhash = 0; i < namelen - 5; i++) { + newname[i] = random_filename_char(); + newhash = newname[i] ^ rol32(newhash, 7); + } + newhash = rol32(newhash, 3) ^ hash; + if (name[0] != '/' || namelen > 5) { + newname[namelen - 5] = (newhash >> 28) | + (random_filename_char() & 0xf0); + if (is_invalid_char(newname[namelen - 5])) + continue; + } + newname[namelen - 4] = (newhash >> 21) & 0x7f; + if (is_invalid_char(newname[namelen - 4])) + continue; + newname[namelen - 3] = (newhash >> 14) & 0x7f; + if (is_invalid_char(newname[namelen - 3])) + continue; + newname[namelen - 2] = (newhash >> 7) & 0x7f; + if (is_invalid_char(newname[namelen - 2])) + continue; + newname[namelen - 1] = ((newhash >> 0) ^ + (newname[namelen - 5] >> 4)) & 0x7f; + if (is_invalid_char(newname[namelen - 1])) + continue; + break; + } + + ASSERT(libxfs_da_hashname(newname, namelen) == hash); + + for (p = nametable[hash % NAME_TABLE_SIZE]; p; p = p->next) { + if (p->hash == hash && p->namelen == namelen && + memcmp(p->name, newname, namelen) == 0){ + dup = 1; + break; + } + } + } while (dup); + + memcpy(name, newname, namelen); + + p = malloc(sizeof(name_ent_t) + namelen); + if (p == NULL) + return; + + p->next = nametable[hash % NAME_TABLE_SIZE]; + p->hash = hash; + p->namelen = namelen; + memcpy(p->name, name, namelen); + + nametable[hash % NAME_TABLE_SIZE] = p; +} + +static void +obfuscate_sf_dir( + xfs_dinode_t *dip) +{ + xfs_dir2_sf_t *sfp; + xfs_dir2_sf_entry_t *sfep; + int ino_dir_size; + int i; + + sfp = &dip->di_u.di_dir2sf; + ino_dir_size = dip->di_core.di_size; + if (ino_dir_size > XFS_DFORK_DSIZE(dip, mp)) { + ino_dir_size = XFS_DFORK_DSIZE(dip, mp); + if (show_warnings) + print_warning("invalid size for dir inode %llu", + (long long)cur_ino); + } + + sfep = XFS_DIR2_SF_FIRSTENTRY(sfp); + for (i = 0; (i < sfp->hdr.count) && + ((char *)sfep - (char *)sfp < ino_dir_size); i++) { + + /* + * first check for bad name lengths. If they are bad, we + * have limitations to how much can be obfuscated. + */ + int namelen = sfep->namelen; + + if (namelen == 0) { + if (show_warnings) + print_warning("zero length entry in dir inode " + "%llu", (long long)cur_ino); + if (i != sfp->hdr.count - 1) + break; + namelen = ino_dir_size - ((char *)&sfep->name[0] - + (char *)sfp); + } else if ((char *)sfep - (char *)sfp + + XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp, sfep) > + ino_dir_size) { + if (show_warnings) + print_warning("entry length in dir inode %llu " + "overflows space", (long long)cur_ino); + if (i != sfp->hdr.count - 1) + break; + namelen = ino_dir_size - ((char *)&sfep->name[0] - + (char *)sfp); + } + + generate_obfuscated_name(XFS_DIR2_SF_GET_INUMBER(sfp, + XFS_DIR2_SF_INUMBERP(sfep)), namelen, + &sfep->name[0]); + + sfep = (xfs_dir2_sf_entry_t *)((char *)sfep + + XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, namelen)); + } +} + +static void +obfuscate_sf_symlink( + xfs_dinode_t *dip) +{ + int i; + + for (i = 0; i < dip->di_core.di_size; i++) + dip->di_u.di_symlink[i] = random() % 127 + 1; +} + +static void +obfuscate_sf_attr( + xfs_dinode_t *dip) +{ + /* + * with extended attributes, obfuscate the names and zero the actual + * values. + */ + + xfs_attr_shortform_t *asfp; + xfs_attr_sf_entry_t *asfep; + int ino_attr_size; + int i; + + asfp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); + if (asfp->hdr.count == 0) + return; + + ino_attr_size = be16_to_cpu(asfp->hdr.totsize); + if (ino_attr_size > XFS_DFORK_ASIZE(dip, mp)) { + ino_attr_size = XFS_DFORK_ASIZE(dip, mp); + if (show_warnings) + print_warning("invalid attr size in inode %llu", + (long long)cur_ino); + } + + asfep = &asfp->list[0]; + for (i = 0; (i < asfp->hdr.count) && + ((char *)asfep - (char *)asfp < ino_attr_size); i++) { + + int namelen = asfep->namelen; + + if (namelen == 0) { + if (show_warnings) + print_warning("zero length attr entry in inode " + "%llu", (long long)cur_ino); + break; + } else if ((char *)asfep - (char *)asfp + + XFS_ATTR_SF_ENTSIZE(asfep) > ino_attr_size) { + if (show_warnings) + print_warning("attr entry length in inode %llu " + "overflows space", (long long)cur_ino); + break; + } + + generate_obfuscated_name(0, asfep->namelen, &asfep->nameval[0]); + memset(&asfep->nameval[asfep->namelen], 0, asfep->valuelen); + + asfep = (xfs_attr_sf_entry_t *)((char *)asfep + + XFS_ATTR_SF_ENTSIZE(asfep)); + } +} + +/* + * dir_data structure is used to track multi-fsblock dir2 blocks between extent + * processing calls. + */ + +static struct dir_data_s { + int end_of_data; + int block_index; + int offset_to_entry; + int bad_block; +} dir_data; + +static void +obfuscate_dir_data_blocks( + char *block, + xfs_dfiloff_t offset, + xfs_dfilblks_t count, + int is_block_format) +{ + /* + * we have to rely on the fileoffset and signature of the block to + * handle it's contents. If it's invalid, leave it alone. + * for multi-fsblock dir blocks, if a name crosses an extent boundary, + * ignore it and continue. + */ + int c; + int dir_offset; + char *ptr; + char *endptr; + + if (is_block_format && count != mp->m_dirblkfsbs) + return; /* too complex to handle this rare case */ + + for (c = 0, endptr = block; c < count; c++) { + + if (dir_data.block_index == 0) { + int wantmagic; + + if (offset % mp->m_dirblkfsbs != 0) + return; /* corrupted, leave it alone */ + + dir_data.bad_block = 0; + + if (is_block_format) { + xfs_dir2_leaf_entry_t *blp; + xfs_dir2_block_tail_t *btp; + + btp = XFS_DIR2_BLOCK_TAIL_P(mp, + (xfs_dir2_block_t *)block); + blp = XFS_DIR2_BLOCK_LEAF_P(btp); + if ((char *)blp > (char *)btp) + blp = (xfs_dir2_leaf_entry_t *)btp; + + dir_data.end_of_data = (char *)blp - block; + wantmagic = XFS_DIR2_BLOCK_MAGIC; + } else { /* leaf/node format */ + dir_data.end_of_data = mp->m_dirblkfsbs << + mp->m_sb.sb_blocklog; + wantmagic = XFS_DIR2_DATA_MAGIC; + } + dir_data.offset_to_entry = offsetof(xfs_dir2_data_t, u); + + if (be32_to_cpu(((xfs_dir2_data_hdr_t*)block)->magic) != + wantmagic) { + if (show_warnings) + print_warning("invalid magic in dir " + "inode %llu block %ld", + (long long)cur_ino, + (long)offset); + dir_data.bad_block = 1; + } + } + dir_data.block_index++; + if (dir_data.block_index == mp->m_dirblkfsbs) + dir_data.block_index = 0; + + if (dir_data.bad_block) + continue; + + dir_offset = (dir_data.block_index << mp->m_sb.sb_blocklog) + + dir_data.offset_to_entry; + + ptr = endptr + dir_data.offset_to_entry; + endptr += mp->m_sb.sb_blocksize; + + while (ptr < endptr && dir_offset < dir_data.end_of_data) { + xfs_dir2_data_entry_t *dep; + xfs_dir2_data_unused_t *dup; + int length; + + dup = (xfs_dir2_data_unused_t *)ptr; + + if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { + int length = be16_to_cpu(dup->length); + if (dir_offset + length > dir_data.end_of_data || + length == 0 || (length & + (XFS_DIR2_DATA_ALIGN - 1))) { + if (show_warnings) + print_warning("invalid length " + "for dir free space in " + "inode %llu", + (long long)cur_ino); + dir_data.bad_block = 1; + break; + } + if (be16_to_cpu(*XFS_DIR2_DATA_UNUSED_TAG_P(dup)) != + dir_offset) { + dir_data.bad_block = 1; + break; + } + dir_offset += length; + ptr += length; + if (dir_offset >= dir_data.end_of_data || + ptr >= endptr) + break; + } + + dep = (xfs_dir2_data_entry_t *)ptr; + length = XFS_DIR2_DATA_ENTSIZE(dep->namelen); + + if (dir_offset + length > dir_data.end_of_data || + ptr + length > endptr) { + if (show_warnings) + print_warning("invalid length for " + "dir entry name in inode %llu", + (long long)cur_ino); + break; + } + if (be16_to_cpu(*XFS_DIR2_DATA_ENTRY_TAG_P(dep)) != + dir_offset) { + dir_data.bad_block = 1; + break; + } + generate_obfuscated_name(be64_to_cpu(dep->inumber), + dep->namelen, &dep->name[0]); + dir_offset += length; + ptr += length; + } + dir_data.offset_to_entry = dir_offset & + (mp->m_sb.sb_blocksize - 1); + } +} + +static void +obfuscate_symlink_blocks( + char *block, + xfs_dfilblks_t count) +{ + int i; + + count <<= mp->m_sb.sb_blocklog; + for (i = 0; i < count; i++) + block[i] = random() % 127 + 1; +} + +#define MAX_REMOTE_VALS 4095 + +static struct attr_data_s { + int remote_val_count; + xfs_dablk_t remote_vals[MAX_REMOTE_VALS]; +} attr_data; + +static inline void +add_remote_vals( + xfs_dablk_t blockidx, + int length) +{ + while (length > 0 && attr_data.remote_val_count < MAX_REMOTE_VALS) { + attr_data.remote_vals[attr_data.remote_val_count] = blockidx; + attr_data.remote_val_count++; + blockidx++; + length -= XFS_LBSIZE(mp); + } +} + +static void +obfuscate_attr_blocks( + char *block, + xfs_dfiloff_t offset, + xfs_dfilblks_t count) +{ + xfs_attr_leafblock_t *leaf; + int c; + int i; + int nentries; + xfs_attr_leaf_entry_t *entry; + xfs_attr_leaf_name_local_t *local; + xfs_attr_leaf_name_remote_t *remote; + + for (c = 0; c < count; c++, offset++, block += XFS_LBSIZE(mp)) { + + leaf = (xfs_attr_leafblock_t *)block; + + if (be16_to_cpu(leaf->hdr.info.magic) != XFS_ATTR_LEAF_MAGIC) { + for (i = 0; i < attr_data.remote_val_count; i++) { + if (attr_data.remote_vals[i] == offset) + memset(block, 0, XFS_LBSIZE(mp)); + } + continue; + } + + nentries = be16_to_cpu(leaf->hdr.count); + if (nentries * sizeof(xfs_attr_leaf_entry_t) + + sizeof(xfs_attr_leaf_hdr_t) > XFS_LBSIZE(mp)) { + if (show_warnings) + print_warning("invalid attr count in inode %llu", + (long long)cur_ino); + continue; + } + + for (i = 0, entry = &leaf->entries[0]; i < nentries; + i++, entry++) { + if (be16_to_cpu(entry->nameidx) > XFS_LBSIZE(mp)) { + if (show_warnings) + print_warning("invalid attr nameidx " + "in inode %llu", + (long long)cur_ino); + break; + } + if (entry->flags & XFS_ATTR_LOCAL) { + local = XFS_ATTR_LEAF_NAME_LOCAL(leaf, i); + if (local->namelen == 0) { + if (show_warnings) + print_warning("zero length for " + "attr name in inode %llu", + (long long)cur_ino); + break; + } + generate_obfuscated_name(0, local->namelen, + &local->nameval[0]); + memset(&local->nameval[local->namelen], 0, + be16_to_cpu(local->valuelen)); + } else { + remote = XFS_ATTR_LEAF_NAME_REMOTE(leaf, i); + if (remote->namelen == 0 || + remote->valueblk == 0) { + if (show_warnings) + print_warning("invalid attr " + "entry in inode %llu", + (long long)cur_ino); + break; + } + generate_obfuscated_name(0, remote->namelen, + &remote->name[0]); + add_remote_vals(be32_to_cpu(remote->valueblk), + be32_to_cpu(remote->valuelen)); + } + } + } +} + +/* inode copy routines */ + +static int +process_bmbt_reclist( + xfs_bmbt_rec_t *rp, + int numrecs, + typnm_t btype) +{ + int i; + xfs_dfiloff_t o; + xfs_dfsbno_t s; + xfs_dfilblks_t c; + int f; + xfs_dfiloff_t last; + + if (btype == TYP_DATA) + return 1; + + convert_extent(&rp[numrecs - 1], &o, &s, &c, &f); + last = o + c; + + for (i = 0; i < numrecs; i++, rp++) { + convert_extent(rp, &o, &s, &c, &f); + + push_cur(); + set_cur(&typtab[btype], XFS_FSB_TO_DADDR(mp, s), c * blkbb, + DB_RING_IGN, NULL); + if (iocur_top->data == NULL) { + print_warning("cannot read %s block %u/%u", + typtab[btype].name, + XFS_FSB_TO_AGNO(mp, s), + XFS_FSB_TO_AGBNO(mp, s)); + if (stop_on_read_error) + return 0; + } else { + if (!dont_obfuscate) + switch (btype) { + case TYP_DIR2: + if (o < mp->m_dirleafblk) + obfuscate_dir_data_blocks( + iocur_top->data, o, c, + last == mp->m_dirblkfsbs); + break; + + case TYP_SYMLINK: + obfuscate_symlink_blocks( + iocur_top->data, c); + break; + + case TYP_ATTR: + obfuscate_attr_blocks(iocur_top->data, + o, c); + break; + + default: ; + } + if (!write_buf(iocur_top)) + return 0; + } + pop_cur(); + } + + return 1; +} + +static int +scanfunc_bmap( + xfs_btree_hdr_t *bthdr, + xfs_agnumber_t agno, + xfs_agblock_t agbno, + int level, + typnm_t btype, + void *arg) /* ptr to itype */ +{ + int i; + xfs_bmbt_ptr_t *pp; + xfs_bmbt_rec_t *rp; + int nrecs; + + nrecs = be16_to_cpu(bthdr->bb_numrecs); + + if (level == 0) { + if (nrecs > mp->m_bmap_dmxr[0]) { + if (show_warnings) + print_warning("invalid numrecs (%u) in %s " + "block %u/%u", nrecs, + typtab[btype].name, agno, agbno); + return 1; + } + rp = XFS_BTREE_REC_ADDR(mp->m_sb.sqb_blocksize, xfs_bmbt, bthdr, + 1, mp->m_bmap_dmxr[0]); + + return process_bmbt_reclist(rp, nrecs, *(typnm_t*)arg); + } + + if (nrecs > mp->m_bmap_dmxr[1]) { + if (show_warnings) + print_warning("invalid numrecs (%u) in %s block %u/%u", + nrecs, typtab[btype].name, agno, agbno); + return 1; + } + pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, bthdr, 1, + mp->m_bmap_dmxr[1]); + for (i = 0; i < nrecs; i++) { + xfs_agnumber_t ag; + xfs_agblock_t bno; + + ag = XFS_FSB_TO_AGNO(mp, be64_to_cpu(pp[i])); + bno = XFS_FSB_TO_AGBNO(mp, be64_to_cpu(pp[i])); + + if (bno == 0 || bno > mp->m_sb.sb_agblocks || + ag > mp->m_sb.sb_agcount) { + if (show_warnings) + print_warning("invalid block number (%u/%u) " + "in %s block %u/%u", ag, bno, + typtab[btype].name, agno, agbno); + continue; + } + + if (!scan_btree(ag, bno, level, btype, arg, scanfunc_bmap)) + return 0; + } + return 1; +} + +static int +process_btinode( + xfs_dinode_t *dip, + typnm_t itype) +{ + xfs_bmdr_block_t *dib; + int i; + xfs_bmbt_ptr_t *pp; + xfs_bmbt_rec_t *rp; + int level; + int nrecs; + int maxrecs; + int whichfork; + typnm_t btype; + + whichfork = (itype == TYP_ATTR) ? XFS_ATTR_FORK : XFS_DATA_FORK; + btype = (itype == TYP_ATTR) ? TYP_BMAPBTA : TYP_BMAPBTD; + + dib = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); + level = be16_to_cpu(dib->bb_level); + nrecs = be16_to_cpu(dib->bb_numrecs); + + if (level > XFS_BM_MAXLEVELS(mp, whichfork)) { + if (show_warnings) + print_warning("invalid level (%u) in inode %lld %s " + "root", level, (long long)cur_ino, + typtab[btype].name); + return 1; + } + + if (level == 0) { + rp = XFS_BTREE_REC_ADDR(XFS_DFORK_SIZE(dip, mp, whichfork), + xfs_bmdr, dib, 1, XFS_BTREE_BLOCK_MAXRECS( + XFS_DFORK_SIZE(dip, mp, whichfork), + xfs_bmdr, 1)); + + return process_bmbt_reclist(rp, nrecs, itype); + } + + maxrecs = XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_SIZE(dip, mp, whichfork), + xfs_bmdr, 0); + if (nrecs > maxrecs) { + if (show_warnings) + print_warning("invalid numrecs (%u) in inode %lld %s " + "root", nrecs, (long long)cur_ino, + typtab[btype].name); + return 1; + } + + pp = XFS_BTREE_PTR_ADDR(XFS_DFORK_SIZE(dip, mp, whichfork), xfs_bmdr, + dib, 1, maxrecs); + for (i = 0; i < nrecs; i++) { + xfs_agnumber_t ag; + xfs_agblock_t bno; + + ag = XFS_FSB_TO_AGNO(mp, be64_to_cpu(pp[i])); + bno = XFS_FSB_TO_AGBNO(mp, be64_to_cpu(pp[i])); + + if (bno == 0 || bno > mp->m_sb.sb_agblocks || + ag > mp->m_sb.sb_agcount) { + if (show_warnings) + print_warning("invalid block number (%u/%u) " + "in inode %llu %s root", ag, + bno, (long long)cur_ino, + typtab[btype].name); + continue; + } + + if (!scan_btree(ag, bno, level, btype, &itype, scanfunc_bmap)) + return 0; + } + return 1; +} + +static int +process_exinode( + xfs_dinode_t *dip, + typnm_t itype) +{ + int whichfork; + + whichfork = (itype == TYP_ATTR) ? XFS_ATTR_FORK : XFS_DATA_FORK; + + return process_bmbt_reclist( + (xfs_bmbt_rec_t *)XFS_DFORK_PTR(dip, whichfork), + XFS_DFORK_NEXTENTS_HOST(dip, whichfork), itype); +} + +static int +process_inode_data( + xfs_dinode_t *dip, + typnm_t itype) +{ + switch (dip->di_core.di_format) { + case XFS_DINODE_FMT_LOCAL: + if (!dont_obfuscate) + switch (itype) { + case TYP_DIR2: + obfuscate_sf_dir(dip); + break; + + case TYP_SYMLINK: + obfuscate_sf_symlink(dip); + break; + + default: ; + } + break; + + case XFS_DINODE_FMT_EXTENTS: + return process_exinode(dip, itype); + + case XFS_DINODE_FMT_BTREE: + return process_btinode(dip, itype); + } + return 1; +} + +static int +process_inode( + xfs_agnumber_t agno, + xfs_agino_t agino, + xfs_dinode_t *dip) +{ + xfs_dinode_core_t odic; + int success; + + /* convert the core */ + memcpy(&odic, &dip->di_core, sizeof(xfs_dinode_core_t)); + libxfs_xlate_dinode_core((xfs_caddr_t)&odic, &dip->di_core, 1); + + success = 1; + cur_ino = XFS_AGINO_TO_INO(mp, agno, agino); + + + /* copy appropriate data fork metadata */ + switch (dip->di_core.di_mode & S_IFMT) { + case S_IFDIR: + memset(&dir_data, 0, sizeof(dir_data)); + success = process_inode_data(dip, TYP_DIR2); + break; + case S_IFLNK: + success = process_inode_data(dip, TYP_SYMLINK); + break; + default: + success = process_inode_data(dip, TYP_DATA); + } + clear_nametable(); + + /* copy extended attributes if they exist */ + if (success && dip->di_core.di_forkoff) { + attr_data.remote_val_count = 0; + switch (dip->di_core.di_aformat) { + case XFS_DINODE_FMT_LOCAL: + if (!dont_obfuscate) + obfuscate_sf_attr(dip); + break; + + case XFS_DINODE_FMT_EXTENTS: + success = process_exinode(dip, TYP_ATTR); + break; + + case XFS_DINODE_FMT_BTREE: + success = process_btinode(dip, TYP_ATTR); + break; + } + clear_nametable(); + } + + /* restore the core back to it's original endianess */ + memcpy(&dip->di_core, &odic, sizeof(xfs_dinode_core_t)); + + return success; +} + +static __uint32_t inodes_copied = 0; + +static int +copy_inode_chunk( + xfs_agnumber_t agno, + xfs_inobt_rec_t *rp) +{ + xfs_agino_t agino; + int off; + xfs_agblock_t agbno; + int i; + + agino = be32_to_cpu(rp->ir_startino); + agbno = XFS_AGINO_TO_AGBNO(mp, agino); + off = XFS_INO_TO_OFFSET(mp, agino); + + push_cur(); + set_cur(&typtab[TYP_INODE], XFS_AGB_TO_DADDR(mp, agno, agbno), + XFS_FSB_TO_BB(mp, XFS_IALLOC_BLOCKS(mp)), + DB_RING_IGN, NULL); + if (iocur_top->data == NULL) { + print_warning("cannot read inode block %u/%u", agno, agbno); + return !stop_on_read_error; + } + + /* + * scan through inodes and copy any btree extent lists, directory + * contents and extended attributes. + */ + + for (i = 0; i < XFS_INODES_PER_CHUNK; i++) { + xfs_dinode_t *dip; + + if (XFS_INOBT_IS_FREE_DISK(rp, i)) + continue; + + dip = (xfs_dinode_t *)((char *)iocur_top->data + + ((off + i) << mp->m_sb.sb_inodelog)); + + if (!process_inode(agno, agino + i, dip)) + return 0; + } + + if (!write_buf(iocur_top)) + return 0; + + inodes_copied += XFS_INODES_PER_CHUNK; + + if (show_progress) + print_progress("Copied %u of %u inodes (%u of %u AGs)", + inodes_copied, mp->m_sb.sb_icount, agno, + mp->m_sb.sb_agcount); + + pop_cur(); + + return 1; +} + +static int +scanfunc_ino( + xfs_btree_hdr_t *bthdr, + xfs_agnumber_t agno, + xfs_agblock_t agbno, + int level, + typnm_t btype, + void *arg) +{ + xfs_inobt_rec_t *rp; + xfs_inobt_ptr_t *pp; + int i; + + if (level == 0) { + rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_inobt, + bthdr, 1, mp->m_inobt_mxr[0]); + for (i = 0; i < be16_to_cpu(bthdr->bb_numrecs); i++, rp++) { + if (!copy_inode_chunk(agno, rp)) + return 0; + } + } else { + pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_inobt, + bthdr, 1, mp->m_inobt_mxr[1]); + for (i = 0; i < be16_to_cpu(bthdr->bb_numrecs); i++) { + if (!valid_bno(be32_to_cpu(pp[i]), agno, agbno, btype)) + continue; + if (!scan_btree(agno, be32_to_cpu(pp[i]), level, + btype, arg, scanfunc_ino)) + return 0; + } + } + return 1; +} + +static int +copy_inodes( + xfs_agnumber_t agno, + xfs_agi_t *agi) +{ + xfs_agblock_t root; + int levels; + + root = be32_to_cpu(agi->agi_root); + levels = be32_to_cpu(agi->agi_level); + + /* validate root and levels before processing the tree */ + if (root == 0 || root > mp->m_sb.sb_agblocks) { + if (show_warnings) + print_warning("invalid block number (%u) in inobt " + "root in agi %u", root, agno); + return 1; + } + if (levels >= XFS_BTREE_MAXLEVELS) { + if (show_warnings) + print_warning("invalid level (%u) in inobt root " + "in agi %u", levels, agno); + return 1; + } + + return scan_btree(agno, root, levels, TYP_INOBT, agi, scanfunc_ino); +} + +static int +scan_ag( + xfs_agnumber_t agno) +{ + xfs_agf_t *agf; + xfs_agi_t *agi; + + /* copy the superblock of the AG */ + push_cur(); + set_cur(&typtab[TYP_SB], XFS_AG_DADDR(mp, agno, XFS_SB_DADDR), + XFS_FSS_TO_BB(mp, 1), DB_RING_IGN, NULL); + if (!iocur_top->data) { + print_warning("cannot read superblock for ag %u", agno); + if (stop_on_read_error) + return 0; + } else { + if (!write_buf(iocur_top)) + return 0; + } + + /* copy the AG free space btree root */ + push_cur(); + set_cur(&typtab[TYP_AGF], XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), + XFS_FSS_TO_BB(mp, 1), DB_RING_IGN, NULL); + agf = iocur_top->data; + if (iocur_top->data == NULL) { + print_warning("cannot read agf block for ag %u", agno); + if (stop_on_read_error) + return 0; + } else { + if (!write_buf(iocur_top)) + return 0; + } + + /* copy the AG inode btree root */ + push_cur(); + set_cur(&typtab[TYP_AGI], XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), + XFS_FSS_TO_BB(mp, 1), DB_RING_IGN, NULL); + agi = iocur_top->data; + if (iocur_top->data == NULL) { + print_warning("cannot read agi block for ag %u", agno); + if (stop_on_read_error) + return 0; + } else { + if (!write_buf(iocur_top)) + return 0; + } + + /* copy the AG free list header */ + push_cur(); + set_cur(&typtab[TYP_AGFL], XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)), + XFS_FSS_TO_BB(mp, 1), DB_RING_IGN, NULL); + if (iocur_top->data == NULL) { + print_warning("cannot read agfl block for ag %u", agno); + if (stop_on_read_error) + return 0; + } else { + if (!write_buf(iocur_top)) + return 0; + } + pop_cur(); + + /* copy AG free space btrees */ + if (agf) { + if (show_progress) + print_progress("Copying free space trees of AG %u", + agno); + if (!copy_free_bno_btree(agno, agf)) + return 0; + if (!copy_free_cnt_btree(agno, agf)) + return 0; + } + + /* copy inode btrees and the inodes and their associated metadata */ + if (agi) { + if (!copy_inodes(agno, agi)) + return 0; + } + + pop_cur(); + pop_cur(); + pop_cur(); + + return 1; +} + +static int +copy_ino( + xfs_ino_t ino, + typnm_t itype) +{ + xfs_agnumber_t agno; + xfs_agblock_t agbno; + xfs_agino_t agino; + xfs_dinode_t *dip; + xfs_dinode_core_t tdic; + int offset; + + if (ino == 0) + return 1; + + agno = XFS_INO_TO_AGNO(mp, ino); + agino = XFS_INO_TO_AGINO(mp, ino); + agbno = XFS_AGINO_TO_AGBNO(mp, agino); + offset = XFS_AGINO_TO_OFFSET(mp, agino); + + if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks || + offset >= mp->m_sb.sb_inopblock) { + if (show_warnings) + print_warning("invalid %s inode number (%lld)", + typtab[itype].name, (long long)ino); + return 1; + } + + push_cur(); + set_cur(&typtab[TYP_INODE], XFS_AGB_TO_DADDR(mp, agno, agbno), + blkbb, DB_RING_IGN, NULL); + if (iocur_top->data == NULL) { + print_warning("cannot read %s inode %lld", + typtab[itype].name, (long long)ino); + return !stop_on_read_error; + } + off_cur(offset << mp->m_sb.sb_inodelog, mp->m_sb.sb_inodesize); + + dip = iocur_top->data; + libxfs_xlate_dinode_core((xfs_caddr_t)&dip->di_core, &tdic, 1); + memcpy(&dip->di_core, &tdic, sizeof(xfs_dinode_core_t)); + + cur_ino = ino; + return process_inode_data(dip, itype); +} + + +static int +copy_sb_inodes(void) +{ + if (!copy_ino(mp->m_sb.sb_rbmino, TYP_RTBITMAP)) + return 0; + + if (!copy_ino(mp->m_sb.sb_rsumino, TYP_RTSUMMARY)) + return 0; + + if (!copy_ino(mp->m_sb.sb_uquotino, TYP_DQBLK)) + return 0; + + return copy_ino(mp->m_sb.sb_gquotino, TYP_DQBLK); +} + +static int +copy_log(void) +{ + if (show_progress) + print_progress("Copying log"); + + push_cur(); + set_cur(&typtab[TYP_LOG], XFS_FSB_TO_DADDR(mp, mp->m_sb.sb_logstart), + mp->m_sb.sb_logblocks * blkbb, DB_RING_IGN, NULL); + if (iocur_top->data == NULL) { + print_warning("cannot read log"); + return !stop_on_read_error; + } + return write_buf(iocur_top); +} + +static int +metadump_f( + int argc, + char **argv) +{ + xfs_agnumber_t agno; + int c; + int start_iocur_sp; + + exitcode = 1; + show_progress = 0; + show_warnings = 0; + stop_on_read_error = 0; + + if (mp->m_sb.sb_magicnum != XFS_SB_MAGIC) { + print_warning("bad superblock magic number %x, giving up", + mp->m_sb.sb_magicnum); + return 0; + } + + while ((c = getopt(argc, argv, "egow")) != EOF) { + switch (c) { + case 'e': + stop_on_read_error = 1; + break; + case 'g': + show_progress = 1; + break; + case 'o': + dont_obfuscate = 1; + break; + case 'w': + show_warnings = 1; + break; + default: + print_warning("bad option for metadump command"); + return 0; + } + } + + if (optind != argc - 1) { + print_warning("too few options for metadump (no filename given)"); + return 0; + } + + metablock = (xfs_metablock_t *)calloc(BBSIZE + 1, BBSIZE); + if (metablock == NULL) { + print_warning("memory allocation failure"); + return 0; + } + metablock->mb_blocklog = BBSHIFT; + metablock->mb_magic = cpu_to_be32(XFS_MD_MAGIC); + + if (!create_nametable()) { + print_warning("memory allocation failure"); + free(metablock); + return 0; + } + + block_index = (__be64 *)((char *)metablock + sizeof(xfs_metablock_t)); + block_buffer = (char *)metablock + BBSIZE; + num_indicies = (BBSIZE - sizeof(xfs_metablock_t)) / sizeof(__be64); + cur_index = 0; + start_iocur_sp = iocur_sp; + + if (strcmp(argv[optind], "-") == 0) { + if (isatty(fileno(stdout))) { + print_warning("cannot write to a terminal"); + free(nametable); + free(metablock); + return 0; + } + outf = stdout; + } else { + outf = fopen(argv[optind], "wb"); + if (outf == NULL) { + print_warning("cannot create dump file"); + free(nametable); + free(metablock); + return 0; + } + } + + exitcode = 0; + + for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { + if (!scan_ag(agno)) { + exitcode = 1; + break; + } + } + + /* copy realtime and quota inode contents */ + if (!exitcode) + exitcode = !copy_sb_inodes(); + + /* copy log if it's internal */ + if ((mp->m_sb.sb_logstart != 0) && !exitcode) + exitcode = !copy_log(); + + /* write the remaining index */ + if (!exitcode) + exitcode = !write_index(); + + if (progress_since_warning) + fputc('\n', (outf == stdout) ? stderr : stdout); + + if (outf != stdout) + fclose(outf); + + /* cleanup iocur stack */ + while (iocur_sp > start_iocur_sp) + pop_cur(); + + free(nametable); + free(metablock); + + return 0; +} diff --git a/db/metadump.h b/db/metadump.h new file mode 100644 index 000000000..32e7fc47e --- /dev/null +++ b/db/metadump.h @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2007 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +extern void metadump_init(void); diff --git a/db/xfs_metadump.sh b/db/xfs_metadump.sh new file mode 100755 index 000000000..5341098ae --- /dev/null +++ b/db/xfs_metadump.sh @@ -0,0 +1,38 @@ +#!/bin/sh -f +# +# Copyright (c) 2007 Silicon Graphics, Inc. All Rights Reserved. +# + +OPTS=" " +DBOPTS=" " +USAGE="Usage: xfs_metadump [-efogwV] [-l logdev] source target" + +while getopts "efgl:owV" c +do + case $c in + e) OPTS=$OPTS"-e ";; + g) OPTS=$OPTS"-g ";; + o) OPTS=$OPTS"-o ";; + w) OPTS=$OPTS"-w ";; + f) DBOPTS=$DBOPTS" -f";; + l) DBOPTS=$DBOPTS" -l "$OPTARG" ";; + V) xfs_db -p xfs_metadump -V + status=$? + exit $status + ;; + \?) echo $USAGE 1>&2 + exit 2 + ;; + esac +done +set -- extra $@ +shift $OPTIND +case $# in + 2) xfs_db$DBOPTS -i -p xfs_metadump -c "metadump$OPTS $2" $1 + status=$? + ;; + *) echo $USAGE 1>&2 + exit 2 + ;; +esac +exit $status diff --git a/doc/CHANGES b/doc/CHANGES index 9e97aed06..948e06d8a 100644 --- a/doc/CHANGES +++ b/doc/CHANGES @@ -1,4 +1,5 @@ -xfsprogs-2.8.22 +xfsprogs-2.9.0 (5 June 2007) + - Added new tools: xfs_metadump and xfs_mdrestore. - Fix up the HAVE___U32 fix from 2.8.20 Thanks to Eric Sandeen for pointing this out. diff --git a/include/xfs_metadump.h b/include/xfs_metadump.h new file mode 100644 index 000000000..f4be51b42 --- /dev/null +++ b/include/xfs_metadump.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2007 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _XFS_METADUMP_H_ +#define _XFS_METADUMP_H_ + +#define XFS_MD_MAGIC 0x5846534d /* 'XFSM' */ + +typedef struct xfs_metablock { + __be32 mb_magic; + __be16 mb_count; + __uint8_t mb_blocklog; + __uint8_t mb_reserved; + /* followed by an array of xfs_daddr_t */ +} xfs_metablock_t; + +#endif /* _XFS_METADUMP_H_ */ diff --git a/man/man8/xfs_db.8 b/man/man8/xfs_db.8 index 2b28a1b55..352a7510a 100644 --- a/man/man8/xfs_db.8 +++ b/man/man8/xfs_db.8 @@ -367,6 +367,10 @@ If no \f2label\f1 is given, the current filesystem label is printed. Start logging output to \f2filename\f1, stop logging, or print the current logging status. .TP +\f3metadump\f1 [ \f3-egow\f1 ] \f2filename\f1 +Dumps metadata to a file. See +.BR xfs_metadump "(8) for more information." +.TP \f3ncheck\f1 [ \f3\-s\f1 ] [ \f3\-i\f1 \f2ino\f1 ] ... Print name-inode pairs. A \f3blockget \-n\f1 command must be run first to gather the information. @@ -1239,6 +1243,7 @@ xfs_admin(8), xfs_check(8), xfs_copy(8), xfs_logprint(8), +xfs_metadump(8), xfs_ncheck(8), xfs_repair(8), mount(8), diff --git a/man/man8/xfs_mdrestore.8 b/man/man8/xfs_mdrestore.8 new file mode 100644 index 000000000..d662cc9b0 --- /dev/null +++ b/man/man8/xfs_mdrestore.8 @@ -0,0 +1,48 @@ +.TH xfs_mdrestore 8 +.SH NAME +xfs_mdrestore \- restores an XFS metadump image to a filesystem image +.SH SYNOPSIS +.B xfs_mdrestore +.RB [ \-g ] +.I source +.I target +.SH DESCRIPTION +.B xfs_mdrestore +is a debugging tool that restores a metadata image generated by +.BR xfs_metadump (8) +to a filesystem. The +.I source +argument specifies the location of the metadump image and the +.I target +argument specifies the destination for the filsystem image. +If the +.I source +is \-, then the metadata image is read from stdin. This allows the output of +be another program such as a compression application to be redirected to +.BR xfs_mdrestore . +The +.I target +can be either a file or a device. +.PP +.B xfs_mdrestore +should not be used to restore metadata onto an existing filesystem unless +you are completely certain the +.I target +can be destroyed. +.PP +.SH OPTIONS +.TP +.B \-g +Shows restore progress on stdout. +.SH DIAGNOSTICS +.B xfs_mdrestore +returns an exit code of 0 if all the metadata is succesfully restored or +1 if an error occurs. +.SH SEE ALSO +.BR xfs_metadump (8), +.BR xfs_repair (8), +.BR xfs_check (8), +.BR xfs (5) +.SH BUGS +Email bug reports to +.BR xfs@oss.sgi.com . \ No newline at end of file diff --git a/man/man8/xfs_metadump.8 b/man/man8/xfs_metadump.8 new file mode 100644 index 000000000..506a961b3 --- /dev/null +++ b/man/man8/xfs_metadump.8 @@ -0,0 +1,128 @@ +.TH xfs_metadump 8 +.SH NAME +xfs_metadump \- copy XFS filesystem metadata to a file +.SH SYNOPSIS +.B xfs_metadump +.RB [ \-efgow ] +.RB [ \-l +.IR logdev ] +.I source +.I target +.SH DESCRIPTION +.B xfs_metadump +is a debugging tool that copies the metadata from an XFS filesystem to a file. +The +.I source +argument must be the pathname of the device or file +containing the XFS filesystem and the +.I target +argument specifies the destination file name. +If +.I target +is \-, then the output is sent to stdout. This allows the output to be +redirected to another program such as a compression application. +.PP +.B xfs_metadump +should only be used to copy unmounted filesystems, read-only mounted +filesystems, or frozen filesystems (see +.BR xfs_freeze (8)). +Otherwise, the generated dump could be inconsistent or corrupt. +.PP +.B xfs_metadump +does not alter the source filesystem in any way. The +.I target +image is a contiguous (non-sparse) file containing all the +filesystem's metadata and indexes to where the blocks were copied from. +.PP +By default, +.B xfs_metadump +obfuscates most directory names and extended attribute names to allow the dumps +to be sent without revealing confidential information. Extended attribute +values are zeroed and no data is copied. The only exceptions are directory +or attribute names that are 4 or less characters in length. Also directory +names that span extents (this can only occur with the +.BR mkfs.xfs (8) +options where +.B \-n +.I size +> +.B \-b +.IR size ) +are not obfuscated. Names between 5 and 8 characters in length inclusively +are partially obfuscated. +.PP +.B xfs_metadump +should not be used for any purposes other than for debugging and reporting +filesystem problems. The most common usage scenario for this tool is when +.BR xfs_repair (8) +fails to repair a filesystem and a metadump image can be sent for +analysis. +.PP +The file generated by +.B xfs_metadump +can be restored to filesystem image (minus the data) using the +.BR xfs_mdrestore (8) +tool. +.PP +.SH OPTIONS +.TP +.B \-e +Stops the dump on a read error. Normally, it will ignore read errors and copy +all the metadata that is accessible. +.TP +.B \-f +Specifies that the filesystem image to be processed is stored in a regular file +(see the +.B mkfs.xfs -d +file option). This can also happen if an image copy of a filesystem has +been made into an ordinary file with +.BR xfs_copy (8). +.TP +.B \-g +Shows dump progress. This is sent to stdout if the +.I target +is a file or to stderr if the +.I target +is stdout. +.TP +.BI \-l " logdev" +For filesystems which use an external log, this specifies the device where the +external log resides. The external log is not copied, only internal logs are +copied. +.TP +.B \-o +Disables obfuscation of file names and extended attributes. +.TP +.B \-w +Prints warnings of inconsistant metadata encountered to stderr. Bad metadata +is still copied. +.SH DIAGNOSTICS +.B xfs_metadump +returns an exit code of 0 if all readable metadata is succesfully copied or +1 if a write error occurs or a read error occurs and the +.B \-e +option used. +.SH NOTES +As +.B xfs_metadump +copies metadata only, it does not matter if the +.I source +filesystem has a realtime section or not. If the filesystem has an external +log, it is not copied. Internal logs are copied and any outstanding log +transactions are not obfuscated if they contain names. +.PP +.B xfs_metadump +is a shell wrapper around the +.BR xfs_db (8) +.B metadump +command. +.SH SEE ALSO +.BR xfs_repair (8), +.BR xfs_mdrestore (8), +.BR xfs_freeze (8), +.BR xfs_db (8), +.BR xfs_copy (8), +.BR xfs (5) +.SH BUGS +Email bug reports to +.BR xfs@oss.sgi.com . \ No newline at end of file diff --git a/man/man8/xfs_repair.8 b/man/man8/xfs_repair.8 index e03015319..f0d7a5b32 100644 --- a/man/man8/xfs_repair.8 +++ b/man/man8/xfs_repair.8 @@ -96,49 +96,49 @@ will ignore an EFS superblock if one is found. .IP The .BI ihash= ihashsize -suboption modifies the default xfs_repair inode cache hash size. -The total number of inode cache entries are limited to 8 times this +suboption modifies the default xfs_repair inode cache hash size. +The total number of inode cache entries are limited to 8 times this amount. .IP The .BI bhash= bhashsize -suboption modifies the default xfs_repair buffer cache hash size. -The total number of buffer cache entries are limited to 8 times this +suboption modifies the default xfs_repair buffer cache hash size. +The total number of buffer cache entries are limited to 8 times this amount. .IP The .BI pfino= inode_blocks -suboption modifies the default size of read ahead xfs_repair inode +suboption modifies the default size of read ahead xfs_repair inode blocks. .IP -The +The .BI pfdir= dir_blocks -suboption modifies the default size of read ahead xfs_repair dir +suboption modifies the default size of read ahead xfs_repair dir blocks. .IP The .BI thread= thread_count -suboption modifies the number of xfs_repair parallel threads. +suboption modifies the number of xfs_repair parallel threads. .TP \f3-t\f1 \f2interval\f1 -Modify reporting interval. During long runs xfs_repair outputs -its progress every 15 minutes. Reporting is only activated when +Modify reporting interval. During long runs xfs_repair outputs +its progress every 15 minutes. Reporting is only activated when xfs_repair is multi-threaded. .TP .B \-M -Disable multi-threaded mode. Normally, xfs_repair runs with twice +Disable multi-threaded mode. Normally, xfs_repair runs with twice the number of threads as processors. .TP .B \-P -Disable read ahead of inode and directory blocks. If applicable, +Disable read ahead of inode and directory blocks. If applicable, a read ahead of up to 16 additional blocks is done. .TP .B \-v Verbose output. .TP .B \-d -Repair dangerously. Allow xfs_repair to repair an XFS filesystem -mounted read only. This is typically done on a root fileystem from +Repair dangerously. Allow xfs_repair to repair an XFS filesystem +mounted read only. This is typically done on a root fileystem from single user mode, immediately followed by a reboot. .SS Checks Performed Inconsistencies corrected include the following: @@ -434,9 +434,17 @@ maps, particularly lost blocks or subtly corrupted maps (trees). The no-modify mode can generate repeated warnings about the same problems because it cannot fix the problems as they are encountered. +.PP +If a filesystem fails to be repaired, a metadump image can be generated +with +.BR xfs_metadump (8) +and be sent to an XFS maintainer to be analysed and +.B xfs_repair +fixed and/or improved. .SH SEE ALSO dd(1), mkfs.xfs(8), umount(8), xfs_check(8), +xfs_metadump(8), xfs(5). diff --git a/mdrestore/Makefile b/mdrestore/Makefile new file mode 100644 index 000000000..b4bea1d9e --- /dev/null +++ b/mdrestore/Makefile @@ -0,0 +1,22 @@ +# +# Copyright (c) 2007 Silicon Graphics, Inc. All Rights Reserved. +# + +TOPDIR = .. +include $(TOPDIR)/include/builddefs + +LTCOMMAND = xfs_mdrestore +CFILES = xfs_mdrestore.c + +LLDLIBS = $(LIBXFS) $(LIBRT) +LTDEPENDENCIES = $(LIBXFS) +LLDFLAGS = -static + +default: $(LTCOMMAND) + +include $(BUILDRULES) + +install: + $(INSTALL) -m 755 -d $(PKG_BIN_DIR) + $(LTINSTALL) -m 755 $(LTCOMMAND) $(PKG_BIN_DIR) +install-dev: diff --git a/mdrestore/xfs_mdrestore.c b/mdrestore/xfs_mdrestore.c new file mode 100644 index 000000000..983657eb7 --- /dev/null +++ b/mdrestore/xfs_mdrestore.c @@ -0,0 +1,263 @@ +/* + * Copyright (c) 2007 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include "xfs_metadump.h" + +char *progname; +int show_progress = 0; +int progress_since_warning = 0; + +static void +fatal(const char *msg, ...) +{ + va_list args; + + va_start(args, msg); + fprintf(stderr, "%s: ", progname); + vfprintf(stderr, msg, args); + exit(1); +} + +static void +print_progress(const char *fmt, ...) +{ + char buf[60]; + va_list ap; + + va_start(ap, fmt); + vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + buf[sizeof(buf)-1] = '\0'; + + printf("\r%-59s", buf); + fflush(stdout); + progress_since_warning = 1; +} + +static void +perform_restore( + FILE *src_f, + int dst_fd, + int is_target_file) +{ + xfs_metablock_t *metablock; /* header + index + blocks */ + __be64 *block_index; + char *block_buffer; + int block_size; + int max_indicies; + int cur_index; + xfs_metablock_t tmb; + xfs_sb_t sb; + __int64_t bytes_read; + + /* + * read in first blocks (superblock 0), set "inprogress" flag for it, + * read in the rest of the file, and if complete, clear SB 0's + * "inprogress flag" + */ + + if (fread(&tmb, sizeof(tmb), 1, src_f) != 1) + fatal("error reading from file: %s\n", strerror(errno)); + + if (be32_to_cpu(tmb.mb_magic) != XFS_MD_MAGIC) + fatal("specified file is not a metadata dump\n"); + + block_size = 1 << tmb.mb_blocklog; + max_indicies = (block_size - sizeof(xfs_metablock_t)) / sizeof(__be64); + + metablock = (xfs_metablock_t *)calloc(max_indicies + 1, block_size); + if (metablock == NULL) + fatal("memory allocation failure\n"); + + metablock->mb_count = be16_to_cpu(tmb.mb_count); + metablock->mb_blocklog = tmb.mb_blocklog; + + if (metablock->mb_count == 0 || metablock->mb_count > max_indicies) + fatal("bad block count: %u\n", metablock->mb_count); + + block_index = (__be64 *)((char *)metablock + sizeof(xfs_metablock_t)); + block_buffer = (char *)metablock + block_size; + + if (fread(block_index, block_size - sizeof(tmb), 1, src_f) != 1) + fatal("error reading from file: %s\n", strerror(errno)); + + if (block_index[0] != 0) + fatal("first block is not the primary superblock\n"); + + + if (fread(block_buffer, metablock->mb_count << metablock->mb_blocklog, + 1, src_f) != 1) + fatal("error reading from file: %s\n", strerror(errno)); + + libxfs_xlate_sb(block_buffer, &sb, 1, XFS_SB_ALL_BITS); + + if (sb.sb_magicnum != XFS_SB_MAGIC) + fatal("bad magic number for primary superblock\n"); + + ((xfs_sb_t*)block_buffer)->sb_inprogress = 1; + + if (is_target_file) { + /* ensure regular files are correctly sized */ + + if (ftruncate64(dst_fd, sb.sb_dblocks * sb.sb_blocksize)) + fatal("cannot set filesystem image size: %s\n", + strerror(errno)); + } else { + /* ensure device is sufficiently large enough */ + + char *lb[XFS_MAX_SECTORSIZE] = { 0 }; + off64_t off; + + off = sb.sb_dblocks * sb.sb_blocksize - sizeof(lb); + if (pwrite64(dst_fd, lb, sizeof(lb), off) < 0) + fatal("failed to write last block, is target too " + "small? (error: %s)\n", strerror(errno)); + } + + bytes_read = 0; + + for (;;) { + if (show_progress && (bytes_read & ((1 << 20) - 1)) == 0) + print_progress("%lld MB read\n", bytes_read >> 20); + + for (cur_index = 0; cur_index < metablock->mb_count; cur_index++) { + if (pwrite64(dst_fd, &block_buffer[cur_index << + metablock->mb_blocklog], + block_size, + be64_to_cpu(block_index[cur_index]) << + BBSHIFT) < 0) + fatal("error writing block %llu: %s\n", + be64_to_cpu(block_index[cur_index]) << BBSHIFT, + strerror(errno)); + } + if (metablock->mb_count < max_indicies) + break; + + if (fread(metablock, block_size, 1, src_f) != 1) + fatal("error reading from file: %s\n", strerror(errno)); + + if (metablock->mb_count == 0) + break; + + metablock->mb_count = be16_to_cpu(metablock->mb_count); + if (metablock->mb_count > max_indicies) + fatal("bad block count: %u\n", metablock->mb_count); + + if (fread(block_buffer, metablock->mb_count << + metablock->mb_blocklog, 1, src_f) != 1) + fatal("error reading from file: %s\n", strerror(errno)); + + bytes_read += block_size; + } + + if (progress_since_warning) + putchar('\n'); + + memset(block_buffer, 0, sb.sb_sectsize); + sb.sb_inprogress = 0; + libxfs_xlate_sb(block_buffer, &sb, 0, XFS_SB_ALL_BITS); + if (pwrite(dst_fd, block_buffer, sb.sb_sectsize, 0) < 0) + fatal("error writing primary superblock: %s\n", strerror(errno)); + + free(metablock); +} + +static void +usage(void) +{ + fprintf(stderr, "Usage: %s [-bg] source target\n", progname); + exit(1); +} + +extern int platform_check_ismounted(char *, char *, struct stat64 *, int); + +int +main( + int argc, + char **argv) +{ + FILE *src_f; + int dst_fd; + int c; + int open_flags; + struct stat64 statbuf; + int is_target_file; + + progname = basename(argv[0]); + + while ((c = getopt(argc, argv, "gV")) != EOF) { + switch (c) { + case 'g': + show_progress = 1; + break; + case 'V': + printf("%s version %s\n", progname, VERSION); + exit(0); + default: + usage(); + } + } + + if (argc - optind != 2) + usage(); + + /* open source */ + if (strcmp(argv[optind], "-") == 0) { + src_f = stdin; + if (isatty(fileno(stdin))) + fatal("cannot read from a terminal\n"); + } else { + src_f = fopen(argv[optind], "rb"); + if (src_f == NULL) + fatal("cannot open source dump file\n"); + } + optind++; + + /* check and open target */ + open_flags = O_RDWR; + is_target_file = 0; + if (stat64(argv[optind], &statbuf) < 0) { + /* ok, assume it's a file and create it */ + open_flags |= O_CREAT; + is_target_file = 1; + } else if (S_ISREG(statbuf.st_mode)) { + open_flags |= O_TRUNC; + is_target_file = 1; + } else { + /* + * check to make sure a filesystem isn't mounted on the device + */ + if (platform_check_ismounted(argv[optind], NULL, &statbuf, 0)) + fatal("a filesystem is mounted on target device \"%s\"," + " cannot restore to a mounted filesystem.\n", + argv[optind]); + } + + dst_fd = open(argv[optind], open_flags, 0644); + if (dst_fd < 0) + fatal("couldn't open target \"%s\"\n", argv[optind]); + + perform_restore(src_f, dst_fd, is_target_file); + + close(dst_fd); + if (src_f != stdin) + fclose(src_f); + + return 0; +} -- 2.47.2