From 2bd0ea187d570cb004585bd58c4ad4dfea6606ca Mon Sep 17 00:00:00 2001 From: Nathan Scott Date: Mon, 15 Jan 2001 05:36:03 +0000 Subject: [PATCH] cmd/xfs/bmap/Makefile 1.8 Renamed to cmd/xfsprogs/bmap/Makefile --- bmap/Makefile | 45 + bmap/xfs_bmap.c | 405 ++++ build/Makefile | 78 + build/rpm/Makefile | 78 + build/rpm/macros.template | 30 + build/rpm/rpm-2.rc.template | 25 + build/tar/Makefile | 50 + db/Makefile | 58 + db/addr.c | 139 ++ db/addr.h | 33 + db/agf.c | 143 ++ db/agf.h | 39 + db/agfl.c | 122 + db/agfl.h | 39 + db/agi.c | 130 + db/agi.h | 39 + db/attr.c | 465 ++++ db/attr.h | 44 + db/attrshort.c | 182 ++ db/attrshort.h | 39 + db/bit.c | 202 ++ db/bit.h | 43 + db/block.c | 303 +++ db/block.h | 36 + db/bmap.c | 355 +++ db/bmap.h | 48 + db/bmapbt.c | 331 +++ db/bmapbt.h | 45 + db/bmroot.c | 274 +++ db/bmroot.h | 41 + db/bnobt.c | 187 ++ db/bnobt.h | 40 + db/check.c | 4468 ++++++++++++++++++++++++++++++++++ db/check.h | 33 + db/cntbt.c | 193 ++ db/cntbt.h | 40 + db/command.c | 158 ++ db/command.h | 55 + db/convert.c | 340 +++ db/convert.h | 33 + db/data.c | 41 + db/data.h | 39 + db/dbread.c | 84 + db/dbread.h | 34 + db/debug.c | 68 + db/debug.h | 36 + db/dir.c | 270 +++ db/dir.h | 44 + db/dir2.c | 727 ++++++ db/dir2.h | 45 + db/dir2sf.c | 235 ++ db/dir2sf.h | 41 + db/dirshort.c | 146 ++ db/dirshort.h | 39 + db/dquot.c | 176 ++ db/dquot.h | 39 + db/echo.c | 62 + db/echo.h | 33 + db/faddr.c | 404 ++++ db/faddr.h | 50 + db/field.c | 394 +++ db/field.h | 224 ++ db/flist.c | 437 ++++ db/flist.h | 66 + db/fprint.c | 201 ++ db/fprint.h | 45 + db/frag.c | 534 +++++ db/frag.h | 33 + db/freesp.c | 427 ++++ db/freesp.h | 33 + db/hash.c | 78 + db/hash.h | 34 + db/help.c | 109 + db/help.h | 33 + db/init.c | 148 ++ db/init.h | 34 + db/inobt.c | 193 ++ db/inobt.h | 40 + db/inode.c | 594 +++++ db/inode.h | 47 + db/input.c | 272 +++ db/input.h | 37 + db/io.c | 627 +++++ db/io.h | 76 + db/main.c | 61 + db/malloc.c | 106 + db/malloc.h | 37 + db/mount.c | 153 ++ db/mount.h | 34 + db/output.c | 124 + db/output.h | 37 + db/print.c | 310 +++ db/print.h | 41 + db/quit.c | 55 + db/quit.h | 33 + db/sb.c | 162 ++ db/sb.h | 39 + db/sig.c | 80 + db/sig.h | 37 + db/strvec.c | 113 + db/strvec.h | 37 + db/type.c | 197 ++ db/type.h | 68 + db/uuid.c | 364 +++ db/uuid.h | 33 + db/write.c | 708 ++++++ db/write.h | 38 + db/xfs_admin.sh | 60 + db/xfs_check.sh | 63 + db/xfs_check64.sh | 63 + db/xfs_ncheck.sh | 61 + db/xfs_ncheck64.sh | 61 + doc/Makefile | 45 + doc/README.LVM | 77 + fsck/Makefile | 46 + fsck/xfs_fsck.c | 42 + growfs/Makefile | 50 + growfs/xfs_growfs.c | 458 ++++ growfs/xfs_info.sh | 56 + include/Makefile | 52 + include/arch.h | 236 ++ include/builddefs.in | 173 ++ include/buildrules | 76 + include/handle.h | 53 + include/jdm.h | 61 + include/libxfs.h | 474 ++++ include/platform_defs.h.in | 107 + include/xfs_ag.h | 343 +++ include/xfs_alloc.h | 200 ++ include/xfs_alloc_btree.h | 251 ++ include/xfs_arch.h | 79 + include/xfs_attr_leaf.h | 305 +++ include/xfs_attr_sf.h | 156 ++ include/xfs_bit.h | 102 + include/xfs_bmap.h | 397 +++ include/xfs_bmap_btree.h | 661 +++++ include/xfs_btree.h | 573 +++++ include/xfs_buf_item.h | 180 ++ include/xfs_cred.h | 152 ++ include/xfs_da_btree.h | 340 +++ include/xfs_dfrag.h | 67 + include/xfs_dinode.h | 476 ++++ include/xfs_dir.h | 162 ++ include/xfs_dir2.h | 111 + include/xfs_dir2_block.h | 128 + include/xfs_dir2_data.h | 232 ++ include/xfs_dir2_leaf.h | 361 +++ include/xfs_dir2_node.h | 160 ++ include/xfs_dir2_sf.h | 256 ++ include/xfs_dir_leaf.h | 257 ++ include/xfs_dir_sf.h | 188 ++ include/xfs_dqblk.h | 99 + include/xfs_dquot_item.h | 104 + include/xfs_extfree_item.h | 123 + include/xfs_fs.h | 476 ++++ include/xfs_ialloc.h | 181 ++ include/xfs_ialloc_btree.h | 318 +++ include/xfs_imap.h | 54 + include/xfs_inode.h | 615 +++++ include/xfs_inode_item.h | 193 ++ include/xfs_inum.h | 173 ++ include/xfs_log.h | 183 ++ include/xfs_log_priv.h | 540 +++++ include/xfs_log_recover.h | 81 + include/xfs_mount.h | 490 ++++ include/xfs_quota.h | 320 +++ include/xfs_rtalloc.h | 164 ++ include/xfs_sb.h | 490 ++++ include/xfs_trans.h | 1000 ++++++++ include/xfs_trans_space.h | 105 + include/xfs_types.h | 303 +++ include/xqm.h | 166 ++ libxfs/Makefile | 62 + libxfs/init.c | 764 ++++++ libxfs/logitem.c | 496 ++++ libxfs/rdwr.c | 468 ++++ libxfs/trans.c | 754 ++++++ libxfs/util.c | 735 ++++++ libxfs/xfs.h | 548 +++++ libxfs/xfs_alloc.c | 2355 ++++++++++++++++++ libxfs/xfs_alloc_btree.c | 2136 +++++++++++++++++ libxfs/xfs_attr_leaf.c | 1169 +++++++++ libxfs/xfs_bit.c | 307 +++ libxfs/xfs_bmap.c | 4511 +++++++++++++++++++++++++++++++++++ libxfs/xfs_bmap_btree.c | 2528 ++++++++++++++++++++ libxfs/xfs_btree.c | 889 +++++++ libxfs/xfs_da_btree.c | 2524 ++++++++++++++++++++ libxfs/xfs_dir.c | 622 +++++ libxfs/xfs_dir2.c | 594 +++++ libxfs/xfs_dir2_block.c | 1094 +++++++++ libxfs/xfs_dir2_data.c | 832 +++++++ libxfs/xfs_dir2_leaf.c | 1496 ++++++++++++ libxfs/xfs_dir2_node.c | 1988 +++++++++++++++ libxfs/xfs_dir2_sf.c | 1119 +++++++++ libxfs/xfs_dir_leaf.c | 1695 +++++++++++++ libxfs/xfs_ialloc.c | 1113 +++++++++ libxfs/xfs_ialloc_btree.c | 1552 ++++++++++++ libxfs/xfs_inode.c | 1371 +++++++++++ libxfs/xfs_mount.c | 214 ++ libxfs/xfs_rtalloc.c | 835 +++++++ libxfs/xfs_rtbit.c | 61 + libxfs/xfs_trans.c | 79 + logprint/Makefile | 50 + logprint/log_misc.c | 1184 +++++++++ logprint/log_print_all.c | 593 +++++ logprint/log_print_trans.c | 146 ++ logprint/logprint.c | 247 ++ logprint/logprint.h | 162 ++ man/Makefile | 41 + man/man5/Makefile | 49 + man/man5/xfs.5 | 114 + man/man8/Makefile | 49 + man/man8/fsck.xfs.8 | 23 + man/man8/mkfs.xfs.8 | 485 ++++ man/man8/xfs_admin.8 | 68 + man/man8/xfs_bmap.8 | 54 + man/man8/xfs_check.8 | 177 ++ man/man8/xfs_db.8 | 1187 +++++++++ man/man8/xfs_growfs.8 | 135 ++ man/man8/xfs_logprint.8 | 86 + man/man8/xfs_mkfile.8 | 27 + man/man8/xfs_ncheck.8 | 53 + man/man8/xfs_repair.8 | 353 +++ mkfile/Makefile | 45 + mkfile/xfs_mkfile.c | 284 +++ mkfs/Makefile | 59 + mkfs/maxtrres.c | 193 ++ mkfs/proto.c | 769 ++++++ mkfs/proto.h | 35 + mkfs/xfs_mkfs.c | 1944 +++++++++++++++ mkfs/xfs_mkfs.h | 50 + repair/Makefile | 72 + repair/README | 718 ++++++ repair/agheader.c | 432 ++++ repair/agheader.h | 113 + repair/attr_repair.c | 1067 +++++++++ repair/attr_repair.h | 47 + repair/avl.c | 1465 ++++++++++++ repair/avl.h | 143 ++ repair/avl64.c | 1458 +++++++++++ repair/avl64.h | 151 ++ repair/bmap.c | 409 ++++ repair/bmap.h | 87 + repair/dino_chunks.c | 1178 +++++++++ repair/dinode.c | 2914 ++++++++++++++++++++++ repair/dinode.h | 155 ++ repair/dir.c | 3033 +++++++++++++++++++++++ repair/dir.h | 160 ++ repair/dir2.c | 2070 ++++++++++++++++ repair/dir2.h | 124 + repair/dir_stack.c | 136 ++ repair/dir_stack.h | 47 + repair/err_protos.h | 36 + repair/globals.c | 37 + repair/globals.h | 205 ++ repair/incore.c | 308 +++ repair/incore.h | 564 +++++ repair/incore_bmc.c | 57 + repair/incore_ext.c | 1000 ++++++++ repair/incore_ino.c | 834 +++++++ repair/init.c | 69 + repair/io.c | 76 + repair/phase1.c | 128 + repair/phase2.c | 173 ++ repair/phase3.c | 215 ++ repair/phase4.c | 1337 +++++++++++ repair/phase5.c | 1633 +++++++++++++ repair/phase6.c | 3971 ++++++++++++++++++++++++++++++ repair/phase7.c | 186 ++ repair/protos.h | 59 + repair/rt.c | 297 +++ repair/rt.h | 56 + repair/sb.c | 824 +++++++ repair/scan.c | 1279 ++++++++++ repair/scan.h | 116 + repair/versions.c | 307 +++ repair/versions.h | 95 + repair/xfs_repair.c | 582 +++++ 278 files changed, 108229 insertions(+) create mode 100644 bmap/Makefile create mode 100644 bmap/xfs_bmap.c create mode 100644 build/Makefile create mode 100644 build/rpm/Makefile create mode 100644 build/rpm/macros.template create mode 100644 build/rpm/rpm-2.rc.template create mode 100644 build/tar/Makefile create mode 100644 db/Makefile create mode 100644 db/addr.c create mode 100644 db/addr.h create mode 100644 db/agf.c create mode 100644 db/agf.h create mode 100644 db/agfl.c create mode 100644 db/agfl.h create mode 100644 db/agi.c create mode 100644 db/agi.h create mode 100644 db/attr.c create mode 100644 db/attr.h create mode 100644 db/attrshort.c create mode 100644 db/attrshort.h create mode 100644 db/bit.c create mode 100644 db/bit.h create mode 100644 db/block.c create mode 100644 db/block.h create mode 100644 db/bmap.c create mode 100644 db/bmap.h create mode 100644 db/bmapbt.c create mode 100644 db/bmapbt.h create mode 100644 db/bmroot.c create mode 100644 db/bmroot.h create mode 100644 db/bnobt.c create mode 100644 db/bnobt.h create mode 100644 db/check.c create mode 100644 db/check.h create mode 100644 db/cntbt.c create mode 100644 db/cntbt.h create mode 100644 db/command.c create mode 100644 db/command.h create mode 100644 db/convert.c create mode 100644 db/convert.h create mode 100644 db/data.c create mode 100644 db/data.h create mode 100644 db/dbread.c create mode 100644 db/dbread.h create mode 100644 db/debug.c create mode 100644 db/debug.h create mode 100644 db/dir.c create mode 100644 db/dir.h create mode 100644 db/dir2.c create mode 100644 db/dir2.h create mode 100644 db/dir2sf.c create mode 100644 db/dir2sf.h create mode 100644 db/dirshort.c create mode 100644 db/dirshort.h create mode 100644 db/dquot.c create mode 100644 db/dquot.h create mode 100644 db/echo.c create mode 100644 db/echo.h create mode 100644 db/faddr.c create mode 100644 db/faddr.h create mode 100644 db/field.c create mode 100644 db/field.h create mode 100644 db/flist.c create mode 100644 db/flist.h create mode 100644 db/fprint.c create mode 100644 db/fprint.h create mode 100644 db/frag.c create mode 100644 db/frag.h create mode 100644 db/freesp.c create mode 100644 db/freesp.h create mode 100644 db/hash.c create mode 100644 db/hash.h create mode 100644 db/help.c create mode 100644 db/help.h create mode 100644 db/init.c create mode 100644 db/init.h create mode 100644 db/inobt.c create mode 100644 db/inobt.h create mode 100644 db/inode.c create mode 100644 db/inode.h create mode 100644 db/input.c create mode 100644 db/input.h create mode 100644 db/io.c create mode 100644 db/io.h create mode 100644 db/main.c create mode 100644 db/malloc.c create mode 100644 db/malloc.h create mode 100644 db/mount.c create mode 100644 db/mount.h create mode 100644 db/output.c create mode 100644 db/output.h create mode 100644 db/print.c create mode 100644 db/print.h create mode 100644 db/quit.c create mode 100644 db/quit.h create mode 100644 db/sb.c create mode 100644 db/sb.h create mode 100644 db/sig.c create mode 100644 db/sig.h create mode 100644 db/strvec.c create mode 100644 db/strvec.h create mode 100644 db/type.c create mode 100644 db/type.h create mode 100644 db/uuid.c create mode 100644 db/uuid.h create mode 100644 db/write.c create mode 100644 db/write.h create mode 100755 db/xfs_admin.sh create mode 100755 db/xfs_check.sh create mode 100755 db/xfs_check64.sh create mode 100755 db/xfs_ncheck.sh create mode 100755 db/xfs_ncheck64.sh create mode 100644 doc/Makefile create mode 100644 doc/README.LVM create mode 100644 fsck/Makefile create mode 100644 fsck/xfs_fsck.c create mode 100644 growfs/Makefile create mode 100644 growfs/xfs_growfs.c create mode 100755 growfs/xfs_info.sh create mode 100644 include/Makefile create mode 100644 include/arch.h create mode 100644 include/builddefs.in create mode 100644 include/buildrules create mode 100644 include/handle.h create mode 100644 include/jdm.h create mode 100644 include/libxfs.h create mode 100644 include/platform_defs.h.in create mode 100644 include/xfs_ag.h create mode 100644 include/xfs_alloc.h create mode 100644 include/xfs_alloc_btree.h create mode 100644 include/xfs_arch.h create mode 100644 include/xfs_attr_leaf.h create mode 100644 include/xfs_attr_sf.h create mode 100644 include/xfs_bit.h create mode 100644 include/xfs_bmap.h create mode 100644 include/xfs_bmap_btree.h create mode 100644 include/xfs_btree.h create mode 100644 include/xfs_buf_item.h create mode 100644 include/xfs_cred.h create mode 100644 include/xfs_da_btree.h create mode 100644 include/xfs_dfrag.h create mode 100644 include/xfs_dinode.h create mode 100644 include/xfs_dir.h create mode 100644 include/xfs_dir2.h create mode 100644 include/xfs_dir2_block.h create mode 100644 include/xfs_dir2_data.h create mode 100644 include/xfs_dir2_leaf.h create mode 100644 include/xfs_dir2_node.h create mode 100644 include/xfs_dir2_sf.h create mode 100644 include/xfs_dir_leaf.h create mode 100644 include/xfs_dir_sf.h create mode 100644 include/xfs_dqblk.h create mode 100644 include/xfs_dquot_item.h create mode 100644 include/xfs_extfree_item.h create mode 100644 include/xfs_fs.h create mode 100644 include/xfs_ialloc.h create mode 100644 include/xfs_ialloc_btree.h create mode 100644 include/xfs_imap.h create mode 100644 include/xfs_inode.h create mode 100644 include/xfs_inode_item.h create mode 100644 include/xfs_inum.h create mode 100644 include/xfs_log.h create mode 100644 include/xfs_log_priv.h create mode 100644 include/xfs_log_recover.h create mode 100644 include/xfs_mount.h create mode 100644 include/xfs_quota.h create mode 100644 include/xfs_rtalloc.h create mode 100644 include/xfs_sb.h create mode 100644 include/xfs_trans.h create mode 100644 include/xfs_trans_space.h create mode 100644 include/xfs_types.h create mode 100644 include/xqm.h create mode 100644 libxfs/Makefile create mode 100644 libxfs/init.c create mode 100644 libxfs/logitem.c create mode 100644 libxfs/rdwr.c create mode 100644 libxfs/trans.c create mode 100644 libxfs/util.c create mode 100644 libxfs/xfs.h create mode 100644 libxfs/xfs_alloc.c create mode 100644 libxfs/xfs_alloc_btree.c create mode 100644 libxfs/xfs_attr_leaf.c create mode 100644 libxfs/xfs_bit.c create mode 100644 libxfs/xfs_bmap.c create mode 100644 libxfs/xfs_bmap_btree.c create mode 100644 libxfs/xfs_btree.c create mode 100644 libxfs/xfs_da_btree.c create mode 100644 libxfs/xfs_dir.c create mode 100644 libxfs/xfs_dir2.c create mode 100644 libxfs/xfs_dir2_block.c create mode 100644 libxfs/xfs_dir2_data.c create mode 100644 libxfs/xfs_dir2_leaf.c create mode 100644 libxfs/xfs_dir2_node.c create mode 100644 libxfs/xfs_dir2_sf.c create mode 100644 libxfs/xfs_dir_leaf.c create mode 100644 libxfs/xfs_ialloc.c create mode 100644 libxfs/xfs_ialloc_btree.c create mode 100644 libxfs/xfs_inode.c create mode 100644 libxfs/xfs_mount.c create mode 100644 libxfs/xfs_rtalloc.c create mode 100644 libxfs/xfs_rtbit.c create mode 100644 libxfs/xfs_trans.c create mode 100644 logprint/Makefile create mode 100644 logprint/log_misc.c create mode 100644 logprint/log_print_all.c create mode 100644 logprint/log_print_trans.c create mode 100644 logprint/logprint.c create mode 100644 logprint/logprint.h create mode 100644 man/Makefile create mode 100644 man/man5/Makefile create mode 100644 man/man5/xfs.5 create mode 100644 man/man8/Makefile create mode 100644 man/man8/fsck.xfs.8 create mode 100644 man/man8/mkfs.xfs.8 create mode 100644 man/man8/xfs_admin.8 create mode 100644 man/man8/xfs_bmap.8 create mode 100644 man/man8/xfs_check.8 create mode 100644 man/man8/xfs_db.8 create mode 100644 man/man8/xfs_growfs.8 create mode 100644 man/man8/xfs_logprint.8 create mode 100644 man/man8/xfs_mkfile.8 create mode 100644 man/man8/xfs_ncheck.8 create mode 100644 man/man8/xfs_repair.8 create mode 100644 mkfile/Makefile create mode 100644 mkfile/xfs_mkfile.c create mode 100644 mkfs/Makefile create mode 100644 mkfs/maxtrres.c create mode 100644 mkfs/proto.c create mode 100644 mkfs/proto.h create mode 100644 mkfs/xfs_mkfs.c create mode 100644 mkfs/xfs_mkfs.h create mode 100644 repair/Makefile create mode 100644 repair/README create mode 100644 repair/agheader.c create mode 100644 repair/agheader.h create mode 100644 repair/attr_repair.c create mode 100644 repair/attr_repair.h create mode 100644 repair/avl.c create mode 100644 repair/avl.h create mode 100644 repair/avl64.c create mode 100644 repair/avl64.h create mode 100644 repair/bmap.c create mode 100644 repair/bmap.h create mode 100644 repair/dino_chunks.c create mode 100644 repair/dinode.c create mode 100644 repair/dinode.h create mode 100644 repair/dir.c create mode 100644 repair/dir.h create mode 100644 repair/dir2.c create mode 100644 repair/dir2.h create mode 100644 repair/dir_stack.c create mode 100644 repair/dir_stack.h create mode 100644 repair/err_protos.h create mode 100644 repair/globals.c create mode 100644 repair/globals.h create mode 100644 repair/incore.c create mode 100644 repair/incore.h create mode 100644 repair/incore_bmc.c create mode 100644 repair/incore_ext.c create mode 100644 repair/incore_ino.c create mode 100644 repair/init.c create mode 100644 repair/io.c create mode 100644 repair/phase1.c create mode 100644 repair/phase2.c create mode 100644 repair/phase3.c create mode 100644 repair/phase4.c create mode 100644 repair/phase5.c create mode 100644 repair/phase6.c create mode 100644 repair/phase7.c create mode 100644 repair/protos.h create mode 100644 repair/rt.c create mode 100644 repair/rt.h create mode 100644 repair/sb.c create mode 100644 repair/scan.c create mode 100644 repair/scan.h create mode 100644 repair/versions.c create mode 100644 repair/versions.h create mode 100644 repair/xfs_repair.c diff --git a/bmap/Makefile b/bmap/Makefile new file mode 100644 index 000000000..741ae5682 --- /dev/null +++ b/bmap/Makefile @@ -0,0 +1,45 @@ +# +# Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write the Free Software Foundation, Inc., 59 +# Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, +# Mountain View, CA 94043, or: +# +# http://www.sgi.com +# +# For further information regarding this notice, see: +# +# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ +# + +TOPDIR = .. +include $(TOPDIR)/include/builddefs + +CMDTARGET = xfs_bmap +CFILES = xfs_bmap.c + +default: $(CMDTARGET) + +include $(BUILDRULES) + +install: default + $(INSTALL) -m 755 -d $(XFS_CMDS_BIN_DIR) + $(INSTALL) -m 755 $(CMDTARGET) $(XFS_CMDS_BIN_DIR) diff --git a/bmap/xfs_bmap.c b/bmap/xfs_bmap.c new file mode 100644 index 000000000..bd594a7bc --- /dev/null +++ b/bmap/xfs_bmap.c @@ -0,0 +1,405 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +/* + * Bmap display utility for xfs. + */ + +#include +#include +#include +#include +#include +#include +#include + +int aflag = 0; /* Attribute fork. */ +int lflag = 0; /* list number of blocks with each extent */ +int nflag = 0; /* number of extents specified */ +int vflag = 0; /* Verbose output */ +int bmv_iflags = 0; /* Input flags for XFS_IOC_GETBMAPX */ + +int dofile(char *); +__off64_t file_size(int fd, char * fname); +int numlen(__off64_t); + +int +main(int argc, char **argv) +{ + char *fname; + int i = 0; + int option; + + fname = basename(argv[0]); + while ((option = getopt(argc, argv, "adln:pvV")) != EOF) { + switch (option) { + case 'a': + bmv_iflags |= BMV_IF_ATTRFORK; + aflag = 1; + break; + case 'l': + lflag = 1; + break; + case 'n': + nflag = atoi(optarg); + break; + case 'd': + /* do not recall possibly offline DMAPI files */ + bmv_iflags |= BMV_IF_NO_DMAPI_READ; + break; + case 'p': + /* report unwritten preallocated blocks */ + bmv_iflags |= BMV_IF_PREALLOC; + break; + case 'v': + vflag++; + break; + case 'V': + printf("%s version %s\n", fname, VERSION); + break; + default: + fprintf(stderr, "Usage: %s [-adlpV] [-n nx] file...\n", + fname); + exit(1); + } + } + if (aflag) + bmv_iflags &= ~(BMV_IF_PREALLOC|BMV_IF_NO_DMAPI_READ); + while (optind < argc) { + fname = argv[optind]; + i += dofile(fname); + optind++; + } + return(i ? 1 : 0); +} + +__off64_t +file_size(int fd, char *fname) +{ + struct stat64 st; + int i; + int errno_save; + + errno_save = errno; /* in case fstat64 fails */ + i = fstat64(fd, &st); + if (i < 0) { + fprintf(stderr,"fstat64 failed for %s", fname); + perror("fstat64"); + errno = errno_save; + return -1; + } + return st.st_size; +} + + +int +dofile(char *fname) +{ + int fd; + struct fsxattr fsx; + int i; + struct getbmapx *map; + char mbuf[1024]; + int map_size; + int loop = 0; + xfs_fsop_geom_t fsgeo; + + fd = open(fname, O_RDONLY); + if (fd < 0) { + sprintf(mbuf, "open %s", fname); + perror(mbuf); + return 1; + } + + if (vflag) { + if (ioctl(fd, XFS_IOC_FSGEOMETRY, &fsgeo) < 0) { + sprintf(mbuf, "Can't get XFS geom, %s", fname); + perror(mbuf); + close(fd); + return 1; + } + + if (vflag > 1) + printf( + "xfs_bmap: fsgeo.agblocks=%u, fsgeo.blocksize=%u, fsgeo.agcount=%u\n", + fsgeo.agblocks, fsgeo.blocksize, + fsgeo.agcount); + + if ((ioctl(fd, XFS_IOC_FSGETXATTR, &fsx)) < 0) { + sprintf(mbuf, "Can't read attrs %s", fname); + perror(mbuf); + close(fd); + return 1; + } + + if (vflag > 1) + printf( + "xfs_bmap: fsx.dsx_xflags=%u, fsx.fsx_extsize=%u, fsx.fsx_nextents=%u\n", + fsx.fsx_xflags, fsx.fsx_extsize, + fsx.fsx_nextents); + + if (fsx.fsx_xflags == XFS_XFLAG_REALTIME) { + /* + * ag info not applicable to rt, continue + * without ag output. + */ + vflag = 0; + } + } + + map_size = nflag ? nflag+1 : 32; /* initial guess - 256 for checkin KCM */ + map = malloc(map_size*sizeof(*map)); + if (map == NULL) { + fprintf(stderr, "malloc of %d bytes failed.\n", + map_size*sizeof(*map)); + close(fd); + return 1; + } + + +/* Try the ioctl(XFS_IOC_GETBMAPX) for the number of extents specified by + * nflag, or the initial guess number of extents (256). + * + * If there are more extents than we guessed, use ioctl + * (XFS_IOC_FSGETXATTR[A]) to get the extent count, realloc some more + * space based on this count, and try again. + * + * If the initial FGETBMAPX attempt returns EINVAL, this may mean + * that we tried the FGETBMAPX on a zero length file. If we get + * EINVAL, check the length with fstat() and return "no extents" + * if the length == 0. + * + * Why not do the ioctl(XFS_IOC_FSGETXATTR[A]) first? Two reasons: + * (1) The extent count may be wrong for a file with delayed + * allocation blocks. The XFS_IOC_GETBMAPX forces the real + * allocation and fixes up the extent count. + * (2) For XFS_IOC_GETBMAP[X] on a DMAPI file that has been moved + * offline by a DMAPI application (e.g., DMF) the + * XFS_IOC_FSGETXATTR only reflects the extents actually online. + * Doing XFS_IOC_GETBMAPX call first forces that data blocks online + * and then everything proceeds normally (see PV #545725). + * + * If you don't want this behavior on a DMAPI offline file, + * try the "-d" option which sets the BMV_IF_NO_DMAPI_READ + * iflag for XFS_IOC_GETBMAPX. + */ + + do { /* loop a miximum of two times */ + + bzero(map, sizeof(*map)); /* zero header */ + + map->bmv_length = -1; + map->bmv_count = map_size; + map->bmv_iflags = bmv_iflags; + + i = ioctl(fd, XFS_IOC_GETBMAPX, map); + + if (vflag > 1) + printf( + "xfs_bmap: i=%d map.bmv_offset=%lld, map.bmv_block=%lld, " + "map.bmv_length=%lld, map.bmv_count=%d, map.bmv_entries=%d\n", + i, map->bmv_offset, map->bmv_block, + map->bmv_length, map->bmv_count, + map->bmv_entries); + if (i < 0) { + if ( errno == EINVAL + && !aflag && file_size(fd, fname) == 0) { + break; + } else { + sprintf(mbuf, "ioctl(XFS_IOC_GETBMAPX (iflags 0x%x) %s", + map->bmv_iflags, fname); + perror(mbuf); + close(fd); + free(map); + return 1; + } + } + if (nflag) + break; + if (map->bmv_entries < map->bmv_count-1) + break; + /* Get number of extents from ioctl XFS_IOC_FSGETXATTR[A] + * syscall. + */ + i = ioctl(fd, aflag ? XFS_IOC_FSGETXATTRA : XFS_IOC_FSGETXATTR, &fsx); + if (i < 0) { + sprintf(mbuf, "ioctl(XFS_IOC_FSGETXATTR%s) %s", + aflag ? "A" : "", fname); + perror(mbuf); + close(fd); + free(map); + return 1; + } + if (fsx.fsx_nextents >= map_size-1) { + map_size = 2*(fsx.fsx_nextents+1); + map = realloc(map, map_size*sizeof(*map)); + if (map == NULL) { + fprintf(stderr,"cannot realloc %d bytes.\n", + map_size*sizeof(*map)); + close(fd); + return 1; + } + } + } while (++loop < 2); + if (!nflag) { + if (map->bmv_entries <= 0) { + printf("%s: no extents\n", fname); + close(fd); + free(map); + return 0; + } + } + close(fd); + printf("%s:\n", fname); + if (!vflag) { + for (i = 0; i < map->bmv_entries; i++) { + printf("\t%d: [%lld..%lld]: ", i, + map[i + 1].bmv_offset, + map[i + 1].bmv_offset + + map[i + 1].bmv_length - 1LL); + if (map[i + 1].bmv_block == -1) + printf("hole"); + else { + printf("%lld..%lld", map[i + 1].bmv_block, + map[i + 1].bmv_block + + map[i + 1].bmv_length - 1LL); + + } + if (lflag) + printf(" %lld blocks\n", map[i+1].bmv_length); + else + printf("\n"); + } + } else { + /* + * Verbose mode displays: + * extent: [startoffset..endoffset]: startblock..endblock \ + * ag# (agoffset..agendoffset) totalbbs + */ +#define MINRANGE_WIDTH 16 +#define MINAG_WIDTH 2 +#define MINTOT_WIDTH 5 +#define max(a,b) (a > b ? a : b) + int agno; + __off64_t agoff, bbperag; + int foff_w, boff_w, aoff_w, tot_w, agno_w; + char rbuf[32], bbuf[32], abuf[32]; + + foff_w = boff_w = aoff_w = MINRANGE_WIDTH; + tot_w = MINTOT_WIDTH; + bbperag = (__off64_t)fsgeo.agblocks * + (__off64_t)fsgeo.blocksize / BBSIZE; + + /* + * Go through the extents and figure out the width + * needed for all columns. + */ + for (i = 0; i < map->bmv_entries; i++) { + sprintf(rbuf, "[%lld..%lld]:", + map[i + 1].bmv_offset, + map[i + 1].bmv_offset + + map[i + 1].bmv_length - 1LL); + if (map[i + 1].bmv_block == -1) { + foff_w = max(foff_w, strlen(rbuf)); + tot_w = max(tot_w, + numlen(map[i+1].bmv_length)); + } else { + sprintf(bbuf, "%lld..%lld", + map[i + 1].bmv_block, + map[i + 1].bmv_block + + map[i + 1].bmv_length - 1LL); + agno = map[i + 1].bmv_block / bbperag; + agoff = map[i + 1].bmv_block - (agno * bbperag); + sprintf(abuf, "(%lld..%lld)", + agoff, + (agoff + map[i + 1].bmv_length - 1LL)); + foff_w = max(foff_w, strlen(rbuf)); + boff_w = max(boff_w, strlen(bbuf)); + aoff_w = max(aoff_w, strlen(abuf)); + tot_w = max(tot_w, + numlen(map[i+1].bmv_length)); + } + } + agno_w = max(MINAG_WIDTH, numlen(fsgeo.agcount)); + printf("%4s: %-*s %-*s %*s %-*s %*s\n", + "EXT", + foff_w, "FILE-OFFSET", + boff_w, "BLOCK-RANGE", + agno_w, "AG", + aoff_w, "AG-OFFSET", + tot_w, "TOTAL"); + for (i = 0; i < map->bmv_entries; i++) { + sprintf(rbuf, "[%lld..%lld]:", + map[i + 1].bmv_offset, + map[i + 1].bmv_offset + + map[i + 1].bmv_length - 1LL); + if (map[i + 1].bmv_block == -1) { + printf("%4d: %-*s %-*s %*s %-*s %*lld\n", + i, + foff_w, rbuf, + boff_w, "hole", + agno_w, "", + aoff_w, "", + tot_w, map[i+1].bmv_length); + } else { + sprintf(bbuf, "%lld..%lld", + map[i + 1].bmv_block, + map[i + 1].bmv_block + + map[i + 1].bmv_length - 1LL); + agno = map[i + 1].bmv_block / bbperag; + agoff = map[i + 1].bmv_block - (agno * bbperag); + sprintf(abuf, "(%lld..%lld)", + agoff, + (agoff + map[i + 1].bmv_length - 1LL)); + printf("%4d: %-*s %-*s %*d %-*s %*lld\n", + i, + foff_w, rbuf, + boff_w, bbuf, + agno_w, agno, + aoff_w, abuf, + tot_w, map[i+1].bmv_length); + } + } + } + free(map); + return 0; +} + +int +numlen( __off64_t val) +{ + __off64_t tmp; + int len; + + for (len=0, tmp=val; tmp > 0; tmp=tmp/10) len++; + return(len == 0 ? 1 : len); +} diff --git a/build/Makefile b/build/Makefile new file mode 100644 index 000000000..7e336de44 --- /dev/null +++ b/build/Makefile @@ -0,0 +1,78 @@ +# +# Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write the Free Software Foundation, Inc., 59 +# Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, +# Mountain View, CA 94043, or: +# +# http://www.sgi.com +# +# For further information regarding this notice, see: +# +# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ +# + +TOPDIR = .. +include $(TOPDIR)/include/builddefs + +MANIFEST=src-manifest +SRCTAR=$(PACKAGE_NAME)-$(PACKAGE_VERSION).src.tar.gz +SRCTAR2=$(PACKAGE_NAME)_$(PACKAGE_VERSION).orig.tar.gz + +LDIRT = $(MANIFEST) $(SRCTAR) $(SRCTAR2) bin-manifest $(TOPDIR)/$(PACKAGE_NAME)-$(PACKAGE_VERSION) + +# for clean and clobber +SUBDIRS = tar rpm deb + +# nothing to build here (it's all packaging) +default install : + +include $(BUILDRULES) + +# Symlink in the TOPDIR is used to pack files relative to +# product-version directory. +$(MANIFEST) : $(_FORCE) + @if [ ! -L $(TOPDIR)/$(PACKAGE_NAME)-$(PACKAGE_VERSION) ] ; then \ + $(LN_S) . $(TOPDIR)/$(PACKAGE_NAME)-$(PACKAGE_VERSION) ; \ + fi + @CDIR=`pwd`; cd $(TOPDIR); \ + $(MAKE) --no-print-directory source | \ + sed -e 's/^\./$(PACKAGE_NAME)-$(PACKAGE_VERSION)/' > $$CDIR/$@ ;\ + if [ $$? -ne 0 ] ; then \ + exit 1; \ + else \ + unset TAPE; \ + $(TAR) -T $$CDIR/$@ -cf - | $(ZIP) --best > $$CDIR/$(SRCTAR); \ + $(LN_S) $$CDIR/$(SRCTAR) $$CDIR/$(SRCTAR2); \ + fi + +dist : default $(MANIFEST) + @DIST_MANIFEST=`pwd`/bin-manifest; DIST_ROOT=/tmp/$$$$; \ + export DIST_MANIFEST DIST_ROOT; \ + rm -f $$DIST_MANIFEST; \ + echo === install === && $(MAKE) -C $(TOPDIR) install || exit $$?; \ + if [ -x $(TAR) ]; then \ + ( echo "=== tar ===" && $(MAKEF) -C tar $@ || exit $$? ); \ + fi; \ + if [ -x $(RPM) ]; then \ + ( echo "=== rpm ===" && $(MAKEF) -C rpm $@ || exit $$? ); \ + fi; \ + test -z "$$KEEP_DIST_ROOT" || rm -rf $$DIST_ROOT; echo Done diff --git a/build/rpm/Makefile b/build/rpm/Makefile new file mode 100644 index 000000000..26253705d --- /dev/null +++ b/build/rpm/Makefile @@ -0,0 +1,78 @@ +# +# Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write the Free Software Foundation, Inc., 59 +# Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, +# Mountain View, CA 94043, or: +# +# http://www.sgi.com +# +# For further information regarding this notice, see: +# +# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ +# + +TOPDIR = ../.. +TREEROOT = $(shell cd ${TOPDIR}; pwd) +include $(TOPDIR)/include/builddefs + +SPECF = xfs-cmds.spec + +LDIRT = $(PACKAGE_NAME)-$(PACKAGE_VERSION)-[0-9]*.*.rpm $(SPECF) \ + rpmmacros rpm-$(RPM_VERSION).rc + +LSRCFILES = macros.template $(SPECF).in rpm-2.rc.template + +default install : + +include $(BUILDRULES) + +# generate a binary rpm file +dist : default $(SPECF) rpm-$(RPM_VERSION).rc + $(RPM) -ba --rcfile ./rpm-$(RPM_VERSION).rc $(SPECF) + +# Because rpm prior to v.2.90 does not support macros and old style config +# is not supported by rpm v.3, we have to resort to such ugly hacks +ifneq ($RPM_VERSION,2) +rpm-$(RPM_VERSION).rc : rpmmacros + sed -e '/^macrofiles:/s|~/.rpmmacros|./rpmmacros|' $@ + +rpmmacros : macros.template + @sed -e 's|%topdir%|$(TREEROOT)|g' < $< > $@ +else +rpm-2.rc: rpm-2.rc.template + @sed -e 's|%topdir%|$(TOPDIR)|g' < $< > $@ +endif + +.PHONY: $(SPECF) +${SPECF} : ${SPECF}.in + sed -e's|@package_name@|$(PACKAGE_NAME)|g' \ + -e's|@package_version@|$(PACKAGE_VERSION)|g' \ + -e's|@package_release@|$(PACKAGE_RELEASE)|g' \ + -e's|@package_distribution@|$(PACKAGE_DISTRIBUTION)|g' \ + -e's|@package_builder@|$(PACKAGE_BUILDER)|g' \ + -e's|@build_root@|$(DIST_ROOT)|g' \ + -e'/^BuildRoot: *$$/d' \ + -e's|@xfs_cmds_var_dir@|$(XFS_CMDS_VAR_DIR)|g' \ + -e's|@xfs_cmds_share_dir@|$(XFS_CMDS_SHARE_DIR)|g' \ + -e's|@xfs_cmds_log_dir@|$(XFS_CMDS_LOG_DIR)|g' \ + -e's|@xfs_cmds_tmp_dir@|$(XFS_CMDS_TMP_DIR)|g' \ + -e's|@make@|$(MAKE)|g' < $< > $@ diff --git a/build/rpm/macros.template b/build/rpm/macros.template new file mode 100644 index 000000000..6ab46e1e1 --- /dev/null +++ b/build/rpm/macros.template @@ -0,0 +1,30 @@ +# +# rpmrc.template +# +# Template to fudge rpm directory structure inside IRIX-like build +# environment + +# Force 386 build on all platforms +%_target i386-pc-linux +%_target_cpu i386 +%_target_os linux + +# topdir == $(WORKAREA) +%_topdir %topdir% + +# Following directories are specific to the topdir +# This is where build is done. In our case it's the same as $WORKAREA +%_builddir %topdir% + +# This is where foo.1.99.tar.gz is living in the real world. +# Be careful not to run full rpm build as it will override the sources +%_sourcedir %topdir%/build + +# This is where binary RPM and source RPM would end up +%_rpmdir %topdir%/build/rpm +%_srcrpmdir %topdir%/build/rpm +%_specdir %topdir%/build/rpm + +# Leave RPM files in the same directory - we're not building for +# multiple architectures +%_rpmfilename %%{NAME}-%%{VERSION}-%%{RELEASE}.%%{ARCH}.rpm diff --git a/build/rpm/rpm-2.rc.template b/build/rpm/rpm-2.rc.template new file mode 100644 index 000000000..f3b3eba3a --- /dev/null +++ b/build/rpm/rpm-2.rc.template @@ -0,0 +1,25 @@ +# +# rpmrc.template +# +# Template to fudge rpm directory structure inside IRIX-like build +# environment + +# topdir == $(WORKAREA) +topdir: %topdir% + +# Following directories are specific to the topdir +# This is where build is done. In out case it's the same as $WORKAREA +# Be careful not to run full rpm build as it will override the sources +builddir: %topdir% + +# This is where foo.1.99.tar.gz is living in the real world. +sourcedir: %topdir%/build + +# This is where binary RPM and source RPM would end up +rpmdir: %topdir%/build/rpm +srcrpmdir: %topdir%/build/rpm +specdir: %topdir%/build/rpm + +# Leave RPM files in the same directory - we're not building for +# multiple architectures +rpmfilename: %{NAME}-%{VERSION}-%{RELEASE}.%{ARCH}.rpm diff --git a/build/tar/Makefile b/build/tar/Makefile new file mode 100644 index 000000000..e010d9736 --- /dev/null +++ b/build/tar/Makefile @@ -0,0 +1,50 @@ +# +# Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write the Free Software Foundation, Inc., 59 +# Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, +# Mountain View, CA 94043, or: +# +# http://www.sgi.com +# +# For further information regarding this notice, see: +# +# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ +# + +TOPDIR = ../.. +include $(TOPDIR)/include/builddefs + +BINTAR=$(PACKAGE_NAME)-$(PACKAGE_VERSION).tar.gz +LDIRT = $(BINTAR) + +default install : + +include $(BUILDRULES) + +dist : default + @HERE=`pwd`; cd $${DIST_ROOT:-/}; \ + sort $$HERE/../bin-manifest | uniq | $(AWK) ' \ + $$1 == "f" { printf (".%s\n", $$6); } \ + $$1 == "d" { next; } \ + $$1 == "l" { printf (".%s\n", $$3); }' \ + | $(TAR) -T - -cf - | $(ZIP) --best > $$HERE/$(BINTAR) + @echo Wrote: `pwd`/$(BINTAR) diff --git a/db/Makefile b/db/Makefile new file mode 100644 index 000000000..74cacbae3 --- /dev/null +++ b/db/Makefile @@ -0,0 +1,58 @@ +# +# Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write the Free Software Foundation, Inc., 59 +# Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, +# Mountain View, CA 94043, or: +# +# http://www.sgi.com +# +# For further information regarding this notice, see: +# +# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ +# + +TOPDIR = .. +include $(TOPDIR)/include/builddefs + +CMDTARGET = xfs_db +CMDDEPS = $(LIBXFS) + +HFILES = addr.h agf.h agfl.h agi.h attr.h attrshort.h bit.h block.h bmap.h \ + bmapbt.h bmroot.h bnobt.h check.h cntbt.h command.h convert.h data.h \ + dbread.h debug.h dir.h dir2.h dir2sf.h dirshort.h dquot.h echo.h \ + faddr.h field.h flist.h fprint.h frag.h freesp.h hash.h help.h \ + init.h inobt.h inode.h input.h io.h malloc.h mount.h output.h \ + print.h quit.h sb.h uuid.h sig.h strvec.h type.h write.h +CFILES = $(HFILES:.h=.c) main.c +LSRCFILES = xfs_admin.sh xfs_check.sh xfs_ncheck.sh +LLDLIBS = $(LIBXFS) $(LIBUUID) + +default: $(CMDTARGET) + +include $(BUILDRULES) + +install: default + $(INSTALL) -m 755 -d $(XFS_CMDS_BIN_DIR) + $(INSTALL) -m 755 $(CMDTARGET) $(XFS_CMDS_BIN_DIR) + $(INSTALL) -m 755 xfs_admin.sh $(XFS_CMDS_BIN_DIR)/xfs_admin + $(INSTALL) -m 755 xfs_check.sh $(XFS_CMDS_BIN_DIR)/xfs_check + $(INSTALL) -m 755 xfs_ncheck.sh $(XFS_CMDS_BIN_DIR)/xfs_ncheck diff --git a/db/addr.c b/db/addr.c new file mode 100644 index 000000000..cb69ecf61 --- /dev/null +++ b/db/addr.c @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "addr.h" +#include "command.h" +#include "type.h" +#include "faddr.h" +#include "fprint.h" +#include "field.h" +#include "io.h" +#include "flist.h" +#include "inode.h" +#include "output.h" + +static int addr_f(int argc, char **argv); +static void addr_help(void); + +static const cmdinfo_t addr_cmd = + { "addr", "a", addr_f, 0, 1, 1, "[field-expression]", + "set current address", addr_help }; + +static void +addr_help(void) +{ + dbprintf( +"\n" +" 'addr' uses the given field to set the filesystem address and type\n" +"\n" +" Examples:\n" +"\n" +" sb\n" +" a rootino - set the type to inode and set position to the root inode\n" +" a u.bmx[0].startblock (for inode with blockmap)\n" +"\n" +); + +} + +static int +addr_f( + int argc, + char **argv) +{ + adfnc_t adf; + const ftattr_t *fa; + flist_t *fl; + const field_t *fld; + typnm_t next; + flist_t *tfl; + + if (argc == 1) { + print_iocur("current", iocur_top); + return 0; + } + if (cur_typ == NULL) { + dbprintf("no current type\n"); + return 0; + } + fld = cur_typ->fields; + if (fld != NULL && fld->name[0] == '\0') { + fa = &ftattrtab[fld->ftyp]; + ASSERT(fa->ftyp == fld->ftyp); + fld = fa->subfld; + } + if (fld == NULL) { + dbprintf("no fields for type %s\n", cur_typ->name); + return 0; + } + fl = flist_scan(argv[1]); + if (fl == NULL) + return 0; + if (!flist_parse(fld, fl, iocur_top->data, 0)) { + flist_free(fl); + return 0; + } + flist_print(fl); + for (tfl = fl; tfl->child != NULL; tfl = tfl->child) { + if ((tfl->flags & FL_OKLOW) && tfl->low < tfl->high) { + dbprintf("array not allowed for addr command\n"); + flist_free(fl); + return 0; + } + } + fld = tfl->fld; + next = fld->next; + if (next == TYP_INODATA) + next = inode_next_type(); + if (next == TYP_NONE) { + dbprintf("no next type for field %s\n", fld->name); + return 0; + } + fa = &ftattrtab[fld->ftyp]; + ASSERT(fa->ftyp == fld->ftyp); + adf = fa->adfunc; + if (adf == NULL) { + dbprintf("no addr function for field %s (type %s)\n", + fld->name, fa->name); + return 0; + } + (*adf)(iocur_top->data, tfl->offset, next); + flist_free(fl); + return 0; +} + +void +addr_init(void) +{ + add_command(&addr_cmd); +} diff --git a/db/addr.h b/db/addr.h new file mode 100644 index 000000000..4b61e4d4e --- /dev/null +++ b/db/addr.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +extern void addr_init(void); diff --git a/db/agf.c b/db/agf.c new file mode 100644 index 000000000..a9a5c4425 --- /dev/null +++ b/db/agf.c @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "agf.h" +#include "command.h" +#include "data.h" +#include "type.h" +#include "faddr.h" +#include "fprint.h" +#include "field.h" +#include "io.h" +#include "bit.h" +#include "output.h" +#include "mount.h" + +static int agf_f(int argc, char **argv); +static void agf_help(void); + +static const cmdinfo_t agf_cmd = + { "agf", NULL, agf_f, 0, 1, 1, "[agno]", + "set address to agf header", agf_help }; + +const field_t agf_hfld[] = { + { "", FLDT_AGF, OI(0), C1, 0, TYP_NONE }, + { NULL } +}; + +#define OFF(f) bitize(offsetof(xfs_agf_t, agf_ ## f)) +#define SZ(f) bitszof(xfs_agf_t, agf_ ## f) +const field_t agf_flds[] = { + { "magicnum", FLDT_UINT32X, OI(OFF(magicnum)), C1, 0, TYP_NONE }, + { "versionnum", FLDT_UINT32D, OI(OFF(versionnum)), C1, 0, TYP_NONE }, + { "seqno", FLDT_AGNUMBER, OI(OFF(seqno)), C1, 0, TYP_NONE }, + { "length", FLDT_AGBLOCK, OI(OFF(length)), C1, 0, TYP_NONE }, + { "roots", FLDT_AGBLOCK, OI(OFF(roots)), CI(XFS_BTNUM_AGF), + FLD_ARRAY|FLD_SKIPALL, TYP_NONE }, + { "bnoroot", FLDT_AGBLOCK, + OI(OFF(roots) + XFS_BTNUM_BNO * SZ(roots[XFS_BTNUM_BNO])), C1, 0, + TYP_BNOBT }, + { "cntroot", FLDT_AGBLOCK, + OI(OFF(roots) + XFS_BTNUM_CNT * SZ(roots[XFS_BTNUM_CNT])), C1, 0, + TYP_CNTBT }, + { "levels", FLDT_UINT32D, OI(OFF(levels)), CI(XFS_BTNUM_AGF), + FLD_ARRAY|FLD_SKIPALL, TYP_NONE }, + { "bnolevel", FLDT_UINT32D, + OI(OFF(levels) + XFS_BTNUM_BNO * SZ(levels[XFS_BTNUM_BNO])), C1, 0, + TYP_NONE }, + { "cntlevel", FLDT_UINT32D, + OI(OFF(levels) + XFS_BTNUM_CNT * SZ(levels[XFS_BTNUM_CNT])), C1, 0, + TYP_NONE }, + { "flfirst", FLDT_UINT32D, OI(OFF(flfirst)), C1, 0, TYP_NONE }, + { "fllast", FLDT_UINT32D, OI(OFF(fllast)), C1, 0, TYP_NONE }, + { "flcount", FLDT_UINT32D, OI(OFF(flcount)), C1, 0, TYP_NONE }, + { "freeblks", FLDT_EXTLEN, OI(OFF(freeblks)), C1, 0, TYP_NONE }, + { "longest", FLDT_EXTLEN, OI(OFF(longest)), C1, 0, TYP_NONE }, + { NULL } +}; + +static void +agf_help(void) +{ + dbprintf( +"\n" +" set allocation group free block list\n" +"\n" +" Example:\n" +"\n" +" agf 2 - move location to AGF in 2nd filesystem allocation group\n" +"\n" +" Located in the 2nd 512 byte block of each allocation group,\n" +" the AGF contains the root of two different freespace btrees:\n" +" The 'cnt' btree keeps track freespace indexed on section size.\n" +" The 'bno' btree tracks sections of freespace indexed on block number.\n" +); +} + +static int +agf_f( + int argc, + char **argv) +{ + xfs_agnumber_t agno; + char *p; + + if (argc > 1) { + agno = (xfs_agnumber_t)strtoul(argv[1], &p, 0); + if (*p != '\0' || agno >= mp->m_sb.sb_agcount) { + dbprintf("bad allocation group number %s\n", argv[1]); + return 0; + } + cur_agno = agno; + } else if (cur_agno == NULLAGNUMBER) + cur_agno = 0; + ASSERT(typtab[TYP_AGF].typnm == TYP_AGF); + set_cur(&typtab[TYP_AGF], XFS_AG_DADDR(mp, cur_agno, XFS_AGF_DADDR), 1, + DB_RING_ADD, NULL); + return 0; +} + +void +agf_init(void) +{ + add_command(&agf_cmd); +} + +int +agf_size( + void *obj, + int startoff, + int idx) +{ + return bitize(mp->m_sb.sb_sectsize); +} diff --git a/db/agf.h b/db/agf.h new file mode 100644 index 000000000..26ce84987 --- /dev/null +++ b/db/agf.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +struct field; + +extern const struct field agf_flds[]; +extern const struct field agf_hfld[]; + +extern void agf_init(void); +extern int agf_size(void *obj, int startoff, int idx); diff --git a/db/agfl.c b/db/agfl.c new file mode 100644 index 000000000..3d7f0a06d --- /dev/null +++ b/db/agfl.c @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "agfl.h" +#include "command.h" +#include "data.h" +#include "type.h" +#include "faddr.h" +#include "fprint.h" +#include "field.h" +#include "io.h" +#include "bit.h" +#include "output.h" +#include "mount.h" + +static int agfl_f(int argc, char **argv); +static void agfl_help(void); + +static const cmdinfo_t agfl_cmd = + { "agfl", NULL, agfl_f, 0, 1, 1, "[agno]", + "set address to agfl block", agfl_help }; + +const field_t agfl_hfld[] = { + { "", FLDT_AGFL, OI(0), C1, 0, TYP_NONE }, + { NULL } +}; + +#define OFF(f) bitize(offsetof(xfs_agfl_t, agfl_ ## f)) +const field_t agfl_flds[] = { + { "bno", FLDT_AGBLOCKNZ, OI(OFF(bno)), CI(XFS_AGFL_SIZE), FLD_ARRAY, + TYP_DATA }, + { NULL } +}; + +static void +agfl_help(void) +{ + dbprintf( +"\n" +" set allocation group freelist\n" +"\n" +" Example:\n" +"\n" +" agfl 5" +"\n" +" Located in the 4th 512 byte block of each allocation group,\n" +" the agfl freelist for internal btree space allocation is maintained\n" +" for each allocation group. This acts as a reserved pool of space\n" +" separate from the general filesystem freespace (not used for user data).\n" +"\n" +); + +} + +static int +agfl_f( + int argc, + char **argv) +{ + xfs_agnumber_t agno; + char *p; + + if (argc > 1) { + agno = (xfs_agnumber_t)strtoul(argv[1], &p, 0); + if (*p != '\0' || agno >= mp->m_sb.sb_agcount) { + dbprintf("bad allocation group number %s\n", argv[1]); + return 0; + } + cur_agno = agno; + } else if (cur_agno == NULLAGNUMBER) + cur_agno = 0; + ASSERT(typtab[TYP_AGFL].typnm == TYP_AGFL); + set_cur(&typtab[TYP_AGFL], XFS_AG_DADDR(mp, cur_agno, XFS_AGFL_DADDR), + 1, DB_RING_ADD, NULL); + return 0; +} + +void +agfl_init(void) +{ + add_command(&agfl_cmd); +} + +/*ARGSUSED*/ +int +agfl_size( + void *obj, + int startoff, + int idx) +{ + return bitize(mp->m_sb.sb_sectsize); +} diff --git a/db/agfl.h b/db/agfl.h new file mode 100644 index 000000000..37670c381 --- /dev/null +++ b/db/agfl.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +struct field; + +extern const struct field agfl_flds[]; +extern const struct field agfl_hfld[]; + +extern void agfl_init(void); +extern int agfl_size(void *obj, int startoff, int idx); diff --git a/db/agi.c b/db/agi.c new file mode 100644 index 000000000..f1c56516e --- /dev/null +++ b/db/agi.c @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "agi.h" +#include "command.h" +#include "data.h" +#include "type.h" +#include "faddr.h" +#include "fprint.h" +#include "field.h" +#include "io.h" +#include "bit.h" +#include "output.h" +#include "mount.h" + +static int agi_f(int argc, char **argv); +static void agi_help(void); + +static const cmdinfo_t agi_cmd = + { "agi", NULL, agi_f, 0, 1, 1, "[agno]", + "set address to agi header", agi_help }; + +const field_t agi_hfld[] = { + { "", FLDT_AGI, OI(0), C1, 0, TYP_NONE }, + { NULL } +}; + +#define OFF(f) bitize(offsetof(xfs_agi_t, agi_ ## f)) +const field_t agi_flds[] = { + { "magicnum", FLDT_UINT32X, OI(OFF(magicnum)), C1, 0, TYP_NONE }, + { "versionnum", FLDT_UINT32D, OI(OFF(versionnum)), C1, 0, TYP_NONE }, + { "seqno", FLDT_AGNUMBER, OI(OFF(seqno)), C1, 0, TYP_NONE }, + { "length", FLDT_AGBLOCK, OI(OFF(length)), C1, 0, TYP_NONE }, + { "count", FLDT_AGINO, OI(OFF(count)), C1, 0, TYP_NONE }, + { "root", FLDT_AGBLOCK, OI(OFF(root)), C1, 0, TYP_INOBT }, + { "level", FLDT_UINT32D, OI(OFF(level)), C1, 0, TYP_NONE }, + { "freecount", FLDT_AGINO, OI(OFF(freecount)), C1, 0, TYP_NONE }, + { "newino", FLDT_AGINO, OI(OFF(newino)), C1, 0, TYP_INODE }, + { "dirino", FLDT_AGINO, OI(OFF(dirino)), C1, 0, TYP_INODE }, + { "unlinked", FLDT_AGINONN, OI(OFF(unlinked)), + CI(XFS_AGI_UNLINKED_BUCKETS), FLD_ARRAY, TYP_NONE }, + { NULL } +}; + +static void +agi_help(void) +{ + dbprintf( +"\n" +" set allocation group inode btree\n" +"\n" +" Example:\n" +"\n" +" agi 3 (set location to 3rd allocation group inode btree and type to 'agi')\n" +"\n" +" Located in the 3rd 512 byte block of each allocation group,\n" +" the agi inode btree tracks all used/free inodes in the allocation group.\n" +" Inodes are allocated in 16k 'chunks', each btree entry tracks a 'chunk'.\n" +"\n" +); +} + +static int +agi_f( + int argc, + char **argv) +{ + xfs_agnumber_t agno; + char *p; + + if (argc > 1) { + agno = (xfs_agnumber_t)strtoul(argv[1], &p, 0); + if (*p != '\0' || agno >= mp->m_sb.sb_agcount) { + dbprintf("bad allocation group number %s\n", argv[1]); + return 0; + } + cur_agno = agno; + } else if (cur_agno == NULLAGNUMBER) + cur_agno = 0; + ASSERT(typtab[TYP_AGI].typnm == TYP_AGI); + set_cur(&typtab[TYP_AGI], XFS_AG_DADDR(mp, cur_agno, XFS_AGI_DADDR), 1, + DB_RING_ADD, NULL); + return 0; +} + +void +agi_init(void) +{ + add_command(&agi_cmd); +} + +/*ARGSUSED*/ +int +agi_size( + void *obj, + int startoff, + int idx) +{ + return bitize(mp->m_sb.sb_sectsize); +} diff --git a/db/agi.h b/db/agi.h new file mode 100644 index 000000000..6d3881d24 --- /dev/null +++ b/db/agi.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +struct field; + +extern const struct field agi_flds[]; +extern const struct field agi_hfld[]; + +extern void agi_init(void); +extern int agi_size(void *obj, int startoff, int idx); diff --git a/db/attr.c b/db/attr.c new file mode 100644 index 000000000..5828d0e2b --- /dev/null +++ b/db/attr.c @@ -0,0 +1,465 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "bit.h" +#include "type.h" +#include "faddr.h" +#include "fprint.h" +#include "field.h" +#include "attr.h" +#include "io.h" +#include "data.h" +#include "mount.h" + +static int attr_leaf_entries_count(void *obj, int startoff); +static int attr_leaf_hdr_count(void *obj, int startoff); +static int attr_leaf_name_local_count(void *obj, int startoff); +static int attr_leaf_name_local_name_count(void *obj, int startoff); +static int attr_leaf_name_local_value_count(void *obj, int startoff); +static int attr_leaf_name_local_value_offset(void *obj, int startoff, + int idx); +static int attr_leaf_name_remote_count(void *obj, int startoff); +static int attr_leaf_name_remote_name_count(void *obj, int startoff); +static int attr_leaf_nvlist_count(void *obj, int startoff); +static int attr_leaf_nvlist_offset(void *obj, int startoff, int idx); +static int attr_node_btree_count(void *obj, int startoff); +static int attr_node_hdr_count(void *obj, int startoff); + +const field_t attr_hfld[] = { + { "", FLDT_ATTR, OI(0), C1, 0, TYP_NONE }, + { NULL } +}; + +#define LOFF(f) bitize(offsetof(xfs_attr_leafblock_t, f)) +#define NOFF(f) bitize(offsetof(xfs_da_intnode_t, f)) +const field_t attr_flds[] = { + { "hdr", FLDT_ATTR_LEAF_HDR, OI(LOFF(hdr)), attr_leaf_hdr_count, + FLD_COUNT, TYP_NONE }, + { "hdr", FLDT_ATTR_NODE_HDR, OI(NOFF(hdr)), attr_node_hdr_count, + FLD_COUNT, TYP_NONE }, + { "entries", FLDT_ATTR_LEAF_ENTRY, OI(LOFF(entries)), + attr_leaf_entries_count, FLD_ARRAY|FLD_COUNT, TYP_NONE }, + { "btree", FLDT_ATTR_NODE_ENTRY, OI(NOFF(btree)), attr_node_btree_count, + FLD_ARRAY|FLD_COUNT, TYP_NONE }, + { "nvlist", FLDT_ATTR_LEAF_NAME, attr_leaf_nvlist_offset, + attr_leaf_nvlist_count, FLD_ARRAY|FLD_OFFSET|FLD_COUNT, TYP_NONE }, + { NULL } +}; + +#define BOFF(f) bitize(offsetof(xfs_da_blkinfo_t, f)) +const field_t attr_blkinfo_flds[] = { + { "forw", FLDT_ATTRBLOCK, OI(BOFF(forw)), C1, 0, TYP_ATTR }, + { "back", FLDT_ATTRBLOCK, OI(BOFF(back)), C1, 0, TYP_ATTR }, + { "magic", FLDT_UINT16X, OI(BOFF(magic)), C1, 0, TYP_NONE }, + { "pad", FLDT_UINT16X, OI(BOFF(pad)), C1, FLD_SKIPALL, TYP_NONE }, + { NULL } +}; + +#define LEOFF(f) bitize(offsetof(xfs_attr_leaf_entry_t, f)) +const field_t attr_leaf_entry_flds[] = { + { "hashval", FLDT_UINT32X, OI(LEOFF(hashval)), C1, 0, TYP_NONE }, + { "nameidx", FLDT_UINT16D, OI(LEOFF(nameidx)), C1, 0, TYP_NONE }, + { "flags", FLDT_UINT8X, OI(LEOFF(flags)), C1, FLD_SKIPALL, TYP_NONE }, + { "incomplete", FLDT_UINT1, + OI(LEOFF(flags) + bitsz(__uint8_t) - XFS_ATTR_INCOMPLETE_BIT - 1), C1, + 0, TYP_NONE }, + { "root", FLDT_UINT1, + OI(LEOFF(flags) + bitsz(__uint8_t) - XFS_ATTR_ROOT_BIT - 1), C1, 0, + TYP_NONE }, + { "local", FLDT_UINT1, + OI(LEOFF(flags) + bitsz(__uint8_t) - XFS_ATTR_LOCAL_BIT - 1), C1, 0, + TYP_NONE }, + { "pad2", FLDT_UINT8X, OI(LEOFF(pad2)), C1, FLD_SKIPALL, TYP_NONE }, + { NULL } +}; + +#define LHOFF(f) bitize(offsetof(xfs_attr_leaf_hdr_t, f)) +const field_t attr_leaf_hdr_flds[] = { + { "info", FLDT_ATTR_BLKINFO, OI(LHOFF(info)), C1, 0, TYP_NONE }, + { "count", FLDT_UINT16D, OI(LHOFF(count)), C1, 0, TYP_NONE }, + { "usedbytes", FLDT_UINT16D, OI(LHOFF(usedbytes)), C1, 0, TYP_NONE }, + { "firstused", FLDT_UINT16D, OI(LHOFF(firstused)), C1, 0, TYP_NONE }, + { "holes", FLDT_UINT8D, OI(LHOFF(holes)), C1, 0, TYP_NONE }, + { "pad1", FLDT_UINT8X, OI(LHOFF(pad1)), C1, FLD_SKIPALL, TYP_NONE }, + { "freemap", FLDT_ATTR_LEAF_MAP, OI(LHOFF(freemap)), + CI(XFS_ATTR_LEAF_MAPSIZE), FLD_ARRAY, TYP_NONE }, + { NULL } +}; + +#define LMOFF(f) bitize(offsetof(xfs_attr_leaf_map_t, f)) +const field_t attr_leaf_map_flds[] = { + { "base", FLDT_UINT16D, OI(LMOFF(base)), C1, 0, TYP_NONE }, + { "size", FLDT_UINT16D, OI(LMOFF(size)), C1, 0, TYP_NONE }, + { NULL } +}; + +#define LNOFF(f) bitize(offsetof(xfs_attr_leaf_name_local_t, f)) +#define LVOFF(f) bitize(offsetof(xfs_attr_leaf_name_remote_t, f)) +const field_t attr_leaf_name_flds[] = { + { "valuelen", FLDT_UINT16D, OI(LNOFF(valuelen)), + attr_leaf_name_local_count, FLD_COUNT, TYP_NONE }, + { "namelen", FLDT_UINT8D, OI(LNOFF(namelen)), + attr_leaf_name_local_count, FLD_COUNT, TYP_NONE }, + { "name", FLDT_CHARNS, OI(LNOFF(nameval)), + attr_leaf_name_local_name_count, FLD_COUNT, TYP_NONE }, + { "value", FLDT_CHARNS, attr_leaf_name_local_value_offset, + attr_leaf_name_local_value_count, FLD_COUNT|FLD_OFFSET, TYP_NONE }, + { "valueblk", FLDT_UINT32X, OI(LVOFF(valueblk)), + attr_leaf_name_remote_count, FLD_COUNT, TYP_NONE }, + { "valuelen", FLDT_UINT32D, OI(LVOFF(valuelen)), + attr_leaf_name_remote_count, FLD_COUNT, TYP_NONE }, + { "namelen", FLDT_UINT8D, OI(LVOFF(namelen)), + attr_leaf_name_remote_count, FLD_COUNT, TYP_NONE }, + { "name", FLDT_CHARNS, OI(LVOFF(name)), + attr_leaf_name_remote_name_count, FLD_COUNT, TYP_NONE }, + { NULL } +}; + +#define EOFF(f) bitize(offsetof(xfs_da_node_entry_t, f)) +const field_t attr_node_entry_flds[] = { + { "hashval", FLDT_UINT32X, OI(EOFF(hashval)), C1, 0, TYP_NONE }, + { "before", FLDT_ATTRBLOCK, OI(EOFF(before)), C1, 0, TYP_ATTR }, + { NULL } +}; + +#define HOFF(f) bitize(offsetof(xfs_da_node_hdr_t, f)) +const field_t attr_node_hdr_flds[] = { + { "info", FLDT_ATTR_BLKINFO, OI(HOFF(info)), C1, 0, TYP_NONE }, + { "count", FLDT_UINT16D, OI(HOFF(count)), C1, 0, TYP_NONE }, + { "level", FLDT_UINT16D, OI(HOFF(level)), C1, 0, TYP_NONE }, + { NULL } +}; + +/*ARGSUSED*/ +static int +attr_leaf_entries_count( + void *obj, + int startoff) +{ + xfs_attr_leafblock_t *block; + + ASSERT(startoff == 0); + block = obj; + if (INT_GET(block->hdr.info.magic, ARCH_CONVERT) + != XFS_ATTR_LEAF_MAGIC) { + return 0; + } + + return INT_GET(block->hdr.count, ARCH_CONVERT); +} + +/*ARGSUSED*/ +static int +attr_leaf_hdr_count( + void *obj, + int startoff) +{ + xfs_attr_leafblock_t *block; + + ASSERT(startoff == 0); + block = obj; + return INT_GET(block->hdr.info.magic, ARCH_CONVERT) + == XFS_ATTR_LEAF_MAGIC; +} + +static int +attr_leaf_name_local_count( + void *obj, + int startoff) +{ + xfs_attr_leafblock_t *block; + xfs_attr_leaf_entry_t *e; + int i; + int off; + + ASSERT(bitoffs(startoff) == 0); + off = byteize(startoff); + block = obj; + if (INT_GET(block->hdr.info.magic, ARCH_CONVERT) + != XFS_ATTR_LEAF_MAGIC) + return 0; + for (i = 0; i < INT_GET(block->hdr.count, ARCH_CONVERT); i++) { + e = &block->entries[i]; + if (INT_GET(e->nameidx, ARCH_CONVERT) == off) + return (INT_GET(e->flags, ARCH_CONVERT) + & XFS_ATTR_LOCAL) != 0; + } + return 0; +} + +static int +attr_leaf_name_local_name_count( + void *obj, + int startoff) +{ + xfs_attr_leafblock_t *block; + xfs_attr_leaf_entry_t *e; + int i; + xfs_attr_leaf_name_local_t *l; + int off; + + ASSERT(bitoffs(startoff) == 0); + off = byteize(startoff); + block = obj; + if (INT_GET(block->hdr.info.magic, ARCH_CONVERT) + != XFS_ATTR_LEAF_MAGIC) + return 0; + for (i = 0; i < INT_GET(block->hdr.count, ARCH_CONVERT); i++) { + e = &block->entries[i]; + if (INT_GET(e->nameidx, ARCH_CONVERT) == off) { + if (INT_GET(e->flags, ARCH_CONVERT) & XFS_ATTR_LOCAL) { + l = XFS_ATTR_LEAF_NAME_LOCAL(block, i); + return INT_GET(l->namelen, ARCH_CONVERT); + } else + return 0; + } + } + return 0; +} + +static int +attr_leaf_name_local_value_count( + void *obj, + int startoff) +{ + xfs_attr_leafblock_t *block; + xfs_attr_leaf_entry_t *e; + int i; + xfs_attr_leaf_name_local_t *l; + int off; + + ASSERT(bitoffs(startoff) == 0); + off = byteize(startoff); + block = obj; + if (INT_GET(block->hdr.info.magic, ARCH_CONVERT) + != XFS_ATTR_LEAF_MAGIC) + return 0; + for (i = 0; i < INT_GET(block->hdr.count, ARCH_CONVERT); i++) { + e = &block->entries[i]; + if (INT_GET(e->nameidx, ARCH_CONVERT) == off) { + if (INT_GET(e->flags, ARCH_CONVERT) & XFS_ATTR_LOCAL) { + l = XFS_ATTR_LEAF_NAME_LOCAL(block, i); + return INT_GET(l->valuelen, ARCH_CONVERT); + } else + return 0; + } + } + return 0; +} + +/*ARGSUSED*/ +static int +attr_leaf_name_local_value_offset( + void *obj, + int startoff, + int idx) +{ + xfs_attr_leafblock_t *block; + xfs_attr_leaf_name_local_t *l; + char *vp; + int off; + xfs_attr_leaf_entry_t *e; + int i; + + ASSERT(bitoffs(startoff) == 0); + off = byteize(startoff); + block = obj; + if (INT_GET(block->hdr.info.magic, ARCH_CONVERT) + != XFS_ATTR_LEAF_MAGIC) + return 0; + + for (i = 0; i < INT_GET(block->hdr.count, ARCH_CONVERT); i++) { + e = &block->entries[i]; + if (INT_GET(e->nameidx, ARCH_CONVERT) == off) + break; + } + if (i>=INT_GET(block->hdr.count, ARCH_CONVERT)) return 0; + + l = XFS_ATTR_LEAF_NAME_LOCAL(block, i); + vp = (char *)&l->nameval[l->namelen]; + return (int)bitize(vp - (char *)l); +} + +static int +attr_leaf_name_remote_count( + void *obj, + int startoff) +{ + xfs_attr_leafblock_t *block; + xfs_attr_leaf_entry_t *e; + int i; + int off; + + ASSERT(bitoffs(startoff) == 0); + off = byteize(startoff); + block = obj; + if (INT_GET(block->hdr.info.magic, ARCH_CONVERT) + != XFS_ATTR_LEAF_MAGIC) + return 0; + for (i = 0; i < INT_GET(block->hdr.count, ARCH_CONVERT); i++) { + e = &block->entries[i]; + if (INT_GET(e->nameidx, ARCH_CONVERT) == off) + return (INT_GET(e->flags, ARCH_CONVERT) & XFS_ATTR_LOCAL) == 0; + } + return 0; +} + +static int +attr_leaf_name_remote_name_count( + void *obj, + int startoff) +{ + xfs_attr_leafblock_t *block; + xfs_attr_leaf_entry_t *e; + int i; + int off; + xfs_attr_leaf_name_remote_t *r; + + ASSERT(bitoffs(startoff) == 0); + off = byteize(startoff); + block = obj; + if (INT_GET(block->hdr.info.magic, ARCH_CONVERT) + != XFS_ATTR_LEAF_MAGIC) + return 0; + for (i = 0; i < INT_GET(block->hdr.count, ARCH_CONVERT); i++) { + e = &block->entries[i]; + if (INT_GET(e->nameidx, ARCH_CONVERT) == off) { + if (!(INT_GET(e->flags, ARCH_CONVERT) & XFS_ATTR_LOCAL)) { + r = XFS_ATTR_LEAF_NAME_REMOTE(block, i); + return INT_GET(r->namelen, ARCH_CONVERT); + } else + return 0; + } + } + return 0; +} + +/*ARGSUSED*/ +int +attr_leaf_name_size( + void *obj, + int startoff, + int idx) +{ + xfs_attr_leafblock_t *block; + xfs_attr_leaf_entry_t *e; + xfs_attr_leaf_name_local_t *l; + xfs_attr_leaf_name_remote_t *r; + + ASSERT(startoff == 0); + block = obj; + if (INT_GET(block->hdr.info.magic, ARCH_CONVERT) + != XFS_ATTR_LEAF_MAGIC) + return 0; + e = &block->entries[idx]; + if (INT_GET(e->flags, ARCH_CONVERT) & XFS_ATTR_LOCAL) { + l = XFS_ATTR_LEAF_NAME_LOCAL(block, idx); + return (int)bitize(XFS_ATTR_LEAF_ENTSIZE_LOCAL(INT_GET(l->namelen, ARCH_CONVERT), + INT_GET(l->valuelen, ARCH_CONVERT))); + } else { + r = XFS_ATTR_LEAF_NAME_REMOTE(block, idx); + return (int)bitize(XFS_ATTR_LEAF_ENTSIZE_REMOTE(INT_GET(r->namelen, ARCH_CONVERT))); + } +} + +/*ARGSUSED*/ +static int +attr_leaf_nvlist_count( + void *obj, + int startoff) +{ + xfs_attr_leafblock_t *block; + + ASSERT(startoff == 0); + block = obj; + if (INT_GET(block->hdr.info.magic, ARCH_CONVERT) + != XFS_ATTR_LEAF_MAGIC) + return 0; + return INT_GET(block->hdr.count, ARCH_CONVERT); +} + +/*ARGSUSED*/ +static int +attr_leaf_nvlist_offset( + void *obj, + int startoff, + int idx) +{ + xfs_attr_leafblock_t *block; + xfs_attr_leaf_entry_t *e; + + ASSERT(startoff == 0); + block = obj; + e = &block->entries[idx]; + return bitize(INT_GET(e->nameidx, ARCH_CONVERT)); +} + +/*ARGSUSED*/ +static int +attr_node_btree_count( + void *obj, + int startoff) +{ + xfs_da_intnode_t *block; + + ASSERT(startoff == 0); /* this is a base structure */ + block = obj; + if (INT_GET(block->hdr.info.magic, ARCH_CONVERT) + != XFS_DA_NODE_MAGIC) + return 0; + return INT_GET(block->hdr.count, ARCH_CONVERT); +} + +/*ARGSUSED*/ +static int +attr_node_hdr_count( + void *obj, + int startoff) +{ + xfs_da_intnode_t *block; + + ASSERT(startoff == 0); + block = obj; + return INT_GET(block->hdr.info.magic, ARCH_CONVERT) + == XFS_DA_NODE_MAGIC; +} + +/*ARGSUSED*/ +int +attr_size( + void *obj, + int startoff, + int idx) +{ + return bitize(mp->m_sb.sb_blocksize); +} diff --git a/db/attr.h b/db/attr.h new file mode 100644 index 000000000..00ae7e98a --- /dev/null +++ b/db/attr.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +extern const field_t attr_flds[]; +extern const field_t attr_hfld[]; +extern const field_t attr_blkinfo_flds[]; +extern const field_t attr_leaf_entry_flds[]; +extern const field_t attr_leaf_hdr_flds[]; +extern const field_t attr_leaf_map_flds[]; +extern const field_t attr_leaf_name_flds[]; +extern const field_t attr_node_entry_flds[]; +extern const field_t attr_node_hdr_flds[]; + +extern int attr_leaf_name_size(void *obj, int startoff, int idx); +extern int attr_size(void *obj, int startoff, int idx); diff --git a/db/attrshort.c b/db/attrshort.c new file mode 100644 index 000000000..04477fe15 --- /dev/null +++ b/db/attrshort.c @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "type.h" +#include "faddr.h" +#include "fprint.h" +#include "field.h" +#include "bit.h" +#include "attrshort.h" + +static int attr_sf_entry_name_count(void *obj, int startoff); +static int attr_sf_entry_value_count(void *obj, int startoff); +static int attr_sf_entry_value_offset(void *obj, int startoff, int idx); +static int attr_shortform_list_count(void *obj, int startoff); +static int attr_shortform_list_offset(void *obj, int startoff, int idx); + +#define OFF(f) bitize(offsetof(xfs_attr_shortform_t, f)) +const field_t attr_shortform_flds[] = { + { "hdr", FLDT_ATTR_SF_HDR, OI(OFF(hdr)), C1, 0, TYP_NONE }, + { "list", FLDT_ATTR_SF_ENTRY, attr_shortform_list_offset, + attr_shortform_list_count, FLD_ARRAY|FLD_COUNT|FLD_OFFSET, TYP_NONE }, + { NULL } +}; + +#define HOFF(f) bitize(offsetof(xfs_attr_sf_hdr_t, f)) +const field_t attr_sf_hdr_flds[] = { + { "totsize", FLDT_UINT16D, OI(HOFF(totsize)), C1, 0, TYP_NONE }, + { "count", FLDT_UINT8D, OI(HOFF(count)), C1, 0, TYP_NONE }, + { NULL } +}; + +#define EOFF(f) bitize(offsetof(xfs_attr_sf_entry_t, f)) +const field_t attr_sf_entry_flds[] = { + { "namelen", FLDT_UINT8D, OI(EOFF(namelen)), C1, 0, TYP_NONE }, + { "valuelen", FLDT_UINT8D, OI(EOFF(valuelen)), C1, 0, TYP_NONE }, + { "flags", FLDT_UINT8X, OI(EOFF(flags)), C1, FLD_SKIPALL, TYP_NONE }, + { "root", FLDT_UINT1, + OI(EOFF(flags) + bitsz(__uint8_t) - XFS_ATTR_ROOT_BIT - 1), C1, 0, + TYP_NONE }, + { "name", FLDT_CHARNS, OI(EOFF(nameval)), attr_sf_entry_name_count, + FLD_COUNT, TYP_NONE }, + { "value", FLDT_CHARNS, attr_sf_entry_value_offset, + attr_sf_entry_value_count, FLD_COUNT|FLD_OFFSET, TYP_NONE }, + { NULL } +}; + +static int +attr_sf_entry_name_count( + void *obj, + int startoff) +{ + xfs_attr_sf_entry_t *e; + + ASSERT(bitoffs(startoff) == 0); + e = (xfs_attr_sf_entry_t *)((char *)obj + byteize(startoff)); + return e->namelen; +} + +int +attr_sf_entry_size( + void *obj, + int startoff, + int idx) +{ + xfs_attr_sf_entry_t *e; + int i; + xfs_attr_shortform_t *sf; + + ASSERT(bitoffs(startoff) == 0); + sf = (xfs_attr_shortform_t *)((char *)obj + byteize(startoff)); + e = &sf->list[0]; + for (i = 0; i < idx; i++) + e = XFS_ATTR_SF_NEXTENTRY(e); + return bitize((int)XFS_ATTR_SF_ENTSIZE(e)); +} + +static int +attr_sf_entry_value_count( + void *obj, + int startoff) +{ + xfs_attr_sf_entry_t *e; + + ASSERT(bitoffs(startoff) == 0); + e = (xfs_attr_sf_entry_t *)((char *)obj + byteize(startoff)); + return e->valuelen; +} + +/*ARGSUSED*/ +static int +attr_sf_entry_value_offset( + void *obj, + int startoff, + int idx) +{ + xfs_attr_sf_entry_t *e; + + ASSERT(bitoffs(startoff) == 0); + ASSERT(idx == 0); + e = (xfs_attr_sf_entry_t *)((char *)obj + byteize(startoff)); + return bitize((int)((char *)&e->nameval[e->namelen] - (char *)e)); +} + +static int +attr_shortform_list_count( + void *obj, + int startoff) +{ + xfs_attr_shortform_t *sf; + + ASSERT(bitoffs(startoff) == 0); + sf = (xfs_attr_shortform_t *)((char *)obj + byteize(startoff)); + return sf->hdr.count; +} + +static int +attr_shortform_list_offset( + void *obj, + int startoff, + int idx) +{ + xfs_attr_sf_entry_t *e; + int i; + xfs_attr_shortform_t *sf; + + ASSERT(bitoffs(startoff) == 0); + sf = (xfs_attr_shortform_t *)((char *)obj + byteize(startoff)); + e = &sf->list[0]; + for (i = 0; i < idx; i++) + e = XFS_ATTR_SF_NEXTENTRY(e); + return bitize((int)((char *)e - (char *)sf)); +} + +/*ARGSUSED*/ +int +attrshort_size( + void *obj, + int startoff, + int idx) +{ + xfs_attr_sf_entry_t *e; + int i; + xfs_attr_shortform_t *sf; + + ASSERT(bitoffs(startoff) == 0); + ASSERT(idx == 0); + sf = (xfs_attr_shortform_t *)((char *)obj + byteize(startoff)); + e = &sf->list[0]; + for (i = 0; i < sf->hdr.count; i++) + e = XFS_ATTR_SF_NEXTENTRY(e); + return bitize((int)((char *)e - (char *)sf)); +} diff --git a/db/attrshort.h b/db/attrshort.h new file mode 100644 index 000000000..95c25b64f --- /dev/null +++ b/db/attrshort.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +extern const field_t attr_sf_entry_flds[]; +extern const field_t attr_sf_hdr_flds[]; +extern const field_t attr_shortform_flds[]; +extern const field_t attrshort_hfld[]; + +extern int attr_sf_entry_size(void *obj, int startoff, int idx); +extern int attrshort_size(void *obj, int startoff, int idx); diff --git a/db/bit.c b/db/bit.c new file mode 100644 index 000000000..de3337877 --- /dev/null +++ b/db/bit.c @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "bit.h" + +#undef setbit /* defined in param.h on Linux */ + +static int getbit(char *ptr, int bit); +static void setbit(char *ptr, int bit, int val); + +static int +getbit( + char *ptr, + int bit) +{ + int mask; + int shift; + + ptr += byteize(bit); + bit = bitoffs(bit); + shift = 7 - bit; + mask = 1 << shift; + return (*ptr & mask) >> shift; +} + +static void +setbit( + char *ptr, + int bit, + int val) +{ + int mask; + int shift; + + ptr += byteize(bit); + bit = bitoffs(bit); + shift = 7 - bit; + mask = (1 << shift); + if (val) { + *ptr |= mask; + } else { + mask = ~mask; + *ptr &= mask; + } +} + +__int64_t +getbitval( + void *obj, + int bitoff, + int nbits, + int flags) +{ + int bit; + int i; + char *p; + __int64_t rval; + int signext; + int z1, z2, z3, z4; + + ASSERT(nbits<=64); + + p = (char *)obj + byteize(bitoff); + bit = bitoffs(bitoff); + signext = (flags & BVSIGNED) != 0; + z4 = ((__psint_t)p & 0xf) == 0 && bit == 0; + if (nbits == 64 && z4) { + if (signext) + return (__int64_t)INT_GET(*(__int64_t *)p, ARCH_CONVERT); + else + return (__int64_t)INT_GET(*(__uint64_t *)p, ARCH_CONVERT); + } + z3 = ((__psint_t)p & 0x7) == 0 && bit == 0; + if (nbits == 32 && z3) { + if (signext) + return (__int64_t)INT_GET(*(__int32_t *)p, ARCH_CONVERT); + else + return (__int64_t)INT_GET(*(__uint32_t *)p, ARCH_CONVERT); + } + z2 = ((__psint_t)p & 0x3) == 0 && bit == 0; + if (nbits == 16 && z2) { + if (signext) + return (__int64_t)INT_GET(*(__int16_t *)p, ARCH_CONVERT); + else + return (__int64_t)INT_GET(*(__uint16_t *)p, ARCH_CONVERT); + } + z1 = ((__psint_t)p & 0x1) == 0 && bit == 0; + if (nbits == 8 && z1) { + if (signext) + return (__int64_t)INT_GET(*(__int8_t *)p, ARCH_CONVERT); + else + return (__int64_t)INT_GET(*(__uint8_t *)p, ARCH_CONVERT); + } + + + for (i = 0, rval = 0LL; i < nbits; i++) { + if (getbit(p, bit + i)) { + /* If the last bit is on and we care about sign + * bits and we don't have a full 64 bit + * container, turn all bits on between the + * sign bit and the most sig bit. + */ + + /* handle endian swap here */ +#if __BYTE_ORDER == LITTLE_ENDIAN + if (i == 0 && signext && nbits < 64) + rval = -1LL << nbits; + rval |= 1LL << (nbits - i - 1); +#else + if ((i == (nbits - 1)) && signext && nbits < 64) + rval |= (-1LL << nbits); + rval |= 1LL << i; +#endif + } + } + return rval; +} + +void +setbitval( + void *obuf, /* buffer to write into */ + int bitoff, /* bit offset of where to write */ + int nbits, /* number of bits to write */ + void *ibuf) /* source bits */ +{ + char *in = (char *)ibuf; + char *out = (char *)obuf; + + int bit; + +#if BYTE_ORDER == LITTLE_ENDIAN + int big = 0; +#else + int big = 1; +#endif + + /* only need to swap LE integers */ + if (big || (nbits!=2 && nbits!=4 && nbits!=8) ) { + /* We don't have type info, so we can only assume + * that 2,4 & 8 byte values are integers. sigh. + */ + + /* byte aligned ? */ + if (bitoff%NBBY) { + /* no - bit copy */ + for (bit=0; bit +#include "block.h" +#include "bmap.h" +#include "command.h" +#include "data.h" +#include "type.h" +#include "faddr.h" +#include "fprint.h" +#include "field.h" +#include "inode.h" +#include "io.h" +#include "output.h" +#include "mount.h" + +static int ablock_f(int argc, char **argv); +static void ablock_help(void); +static int daddr_f(int argc, char **argv); +static void daddr_help(void); +static int dblock_f(int argc, char **argv); +static void dblock_help(void); +static int fsblock_f(int argc, char **argv); +static void fsblock_help(void); +static void print_rawdata(void *data, int len); + +static const cmdinfo_t ablock_cmd = + { "ablock", NULL, ablock_f, 1, 1, 1, "filoff", + "set address to file offset (attr fork)", ablock_help }; +static const cmdinfo_t daddr_cmd = + { "daddr", NULL, daddr_f, 0, 1, 1, "[d]", + "set address to daddr value", daddr_help }; +static const cmdinfo_t dblock_cmd = + { "dblock", NULL, dblock_f, 1, 1, 1, "filoff", + "set address to file offset (data fork)", dblock_help }; +static const cmdinfo_t fsblock_cmd = + { "fsblock", "fsb", fsblock_f, 0, 1, 1, "[fsb]", + "set address to fsblock value", fsblock_help }; + +static void +ablock_help(void) +{ + dbprintf( +"\n Example:\n" +"\n" +" 'ablock 23' - sets the file position to the 23rd filesystem block in\n" +" the inode's attribute fork. The filesystem block size is specified in\n" +" the superblock.\n\n" +); +} + +/*ARGSUSED*/ +static int +ablock_f( + int argc, + char **argv) +{ + bmap_ext_t bm; + xfs_dfiloff_t bno; + xfs_dfsbno_t dfsbno; + int haveattr; + int nex; + char *p; + + bno = (xfs_dfiloff_t)strtoull(argv[1], &p, 0); + if (*p != '\0') { + dbprintf("bad block number %s\n", argv[1]); + return 0; + } + push_cur(); + set_cur_inode(iocur_top->ino); + haveattr = XFS_DFORK_Q((xfs_dinode_t *)iocur_top->data); + pop_cur(); + if (!haveattr) { + dbprintf("no attribute data for file\n"); + return 0; + } + nex = 1; + bmap(bno, 1, XFS_ATTR_FORK, &nex, &bm); + if (nex == 0) { + dbprintf("file attr block is unmapped\n"); + return 0; + } + dfsbno = bm.startblock + (bno - bm.startoff); + ASSERT(typtab[TYP_ATTR].typnm == TYP_ATTR); + set_cur(&typtab[TYP_ATTR], (__int64_t)XFS_FSB_TO_DADDR(mp, dfsbno), + blkbb, DB_RING_ADD, NULL); + return 0; +} + +void +block_init(void) +{ + add_command(&ablock_cmd); + add_command(&daddr_cmd); + add_command(&dblock_cmd); + add_command(&fsblock_cmd); +} + +static void +daddr_help(void) +{ + dbprintf( +"\n Example:\n" +"\n" +" 'daddr 102' - sets position to the 102nd absolute disk block\n" +" (512 byte block).\n" +); +} + +static int +daddr_f( + int argc, + char **argv) +{ + __int64_t d; + char *p; + + if (argc == 1) { + dbprintf("current daddr is %lld\n", iocur_top->off >> BBSHIFT); + return 0; + } + d = (__int64_t)strtoull(argv[1], &p, 0); + if (*p != '\0' || + d >= mp->m_sb.sb_dblocks << (mp->m_sb.sb_blocklog - BBSHIFT)) { + dbprintf("bad daddr %s\n", argv[1]); + return 0; + } + ASSERT(typtab[TYP_DATA].typnm == TYP_DATA); + set_cur(&typtab[TYP_DATA], d, 1, DB_RING_ADD, NULL); + return 0; +} + +static void +dblock_help(void) +{ + dbprintf( +"\n Example:\n" +"\n" +" 'dblock 23' - sets the file position to the 23rd filesystem block in\n" +" the inode's data fork. The filesystem block size is specified in the\n" +" superblock.\n\n" +); +} + +static int +dblock_f( + int argc, + char **argv) +{ + bbmap_t bbmap; + bmap_ext_t *bmp; + xfs_dfiloff_t bno; + xfs_dfsbno_t dfsbno; + int nb; + int nex; + char *p; + typnm_t type; + + bno = (xfs_dfiloff_t)strtoull(argv[1], &p, 0); + if (*p != '\0') { + dbprintf("bad block number %s\n", argv[1]); + return 0; + } + push_cur(); + set_cur_inode(iocur_top->ino); + type = inode_next_type(); + pop_cur(); + if (type == TYP_NONE) { + dbprintf("no type for file data\n"); + return 0; + } + nex = nb = type == TYP_DIR2 ? mp->m_dirblkfsbs : 1; + bmp = malloc(nb * sizeof(*bmp)); + bmap(bno, nb, XFS_DATA_FORK, &nex, bmp); + if (nex == 0) { + dbprintf("file data block is unmapped\n"); + free(bmp); + return 0; + } + dfsbno = bmp->startblock + (bno - bmp->startoff); + ASSERT(typtab[type].typnm == type); + if (nex > 1) + make_bbmap(&bbmap, nex, bmp); + set_cur(&typtab[type], (__int64_t)XFS_FSB_TO_DADDR(mp, dfsbno), + nb * blkbb, DB_RING_ADD, nex > 1 ? &bbmap : NULL); + free(bmp); + return 0; +} + +static void +fsblock_help(void) +{ + dbprintf( +"\n Example:\n" +"\n" +" 'fsblock 1023' - sets the file position to the 1023rd filesystem block.\n" +" The filesystem block size is specified in the superblock and set during\n" +" mkfs time. Offset is absolute (not AG relative).\n\n" +); +} + +static int +fsblock_f( + int argc, + char **argv) +{ + xfs_agblock_t agbno; + xfs_agnumber_t agno; + xfs_dfsbno_t d; + char *p; + + if (argc == 1) { + dbprintf("current fsblock is %lld\n", + XFS_DADDR_TO_FSB(mp, iocur_top->off >> BBSHIFT)); + return 0; + } + d = strtoull(argv[1], &p, 0); + if (*p != '\0') { + dbprintf("bad fsblock %s\n", argv[1]); + return 0; + } + agno = XFS_FSB_TO_AGNO(mp, d); + agbno = XFS_FSB_TO_AGBNO(mp, d); + if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks) { + dbprintf("bad fsblock %s\n", argv[1]); + return 0; + } + ASSERT(typtab[TYP_DATA].typnm == TYP_DATA); + set_cur(&typtab[TYP_DATA], XFS_AGB_TO_DADDR(mp, agno, agbno), + blkbb, DB_RING_ADD, NULL); + return 0; +} + +void +print_block( + const field_t *fields, + int argc, + char **argv) +{ + print_rawdata(iocur_top->data, iocur_top->len); +} + +static void +print_rawdata( + void *data, + int len) +{ + int i; + int j; + int lastaddr; + int offchars; + unsigned char *p; + + lastaddr = (len - 1) & ~(32 - 1); + if (lastaddr < 0x10) + offchars = 1; + else if (lastaddr < 0x100) + offchars = 2; + else if (lastaddr < 0x1000) + offchars = 3; + else + offchars = 4; + for (i = 0, p = data; i < len; i += 32) { + dbprintf("%-0*.*x:", offchars, offchars, i); + for (j = 0; j < 32 && i + j < len; j++, p++) { + if ((j & 3) == 0) + dbprintf(" "); + dbprintf("%02x", *p); + } + dbprintf("\n"); + } +} diff --git a/db/block.h b/db/block.h new file mode 100644 index 000000000..cf17f38c2 --- /dev/null +++ b/db/block.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +struct field; + +extern void block_init(void); +extern void print_block(const struct field *fields, int argc, char **argv); diff --git a/db/bmap.c b/db/bmap.c new file mode 100644 index 000000000..69e2d3dd9 --- /dev/null +++ b/db/bmap.c @@ -0,0 +1,355 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include +#include "command.h" +#include "data.h" +#include "type.h" +#include "bmap.h" +#include "io.h" +#include "inode.h" +#include "output.h" +#include "mount.h" + +static int bmap_f(int argc, char **argv); +static int bmap_one_extent(xfs_bmbt_rec_64_t *ep, + xfs_dfiloff_t *offp, xfs_dfiloff_t eoff, + int *idxp, bmap_ext_t *bep); +static xfs_fsblock_t select_child(xfs_dfiloff_t off, xfs_bmbt_key_t *kp, + xfs_bmbt_ptr_t *pp, int nrecs); + +static const cmdinfo_t bmap_cmd = + { "bmap", NULL, bmap_f, 0, 3, 0, "[-ad] [block [len]]", + "show block map for current file", NULL }; + +void +bmap( + xfs_dfiloff_t offset, + xfs_dfilblks_t len, + int whichfork, + int *nexp, + bmap_ext_t *bep) +{ + xfs_bmbt_block_t *block; + xfs_fsblock_t bno; + xfs_dfiloff_t curoffset; + xfs_dinode_t *dip; + xfs_dfiloff_t eoffset; + xfs_bmbt_rec_64_t *ep; + xfs_dinode_fmt_t fmt; + int fsize; + xfs_bmbt_key_t *kp; + int n; + int nex; + xfs_fsblock_t nextbno; + int nextents; + xfs_bmbt_ptr_t *pp; + xfs_bmdr_block_t *rblock; + typnm_t typ; + xfs_bmbt_rec_64_t *xp; + + push_cur(); + set_cur_inode(iocur_top->ino); + nex = *nexp; + *nexp = 0; + ASSERT(nex > 0); + dip = iocur_top->data; + n = 0; + eoffset = offset + len - 1; + curoffset = offset; + fmt = (xfs_dinode_fmt_t)XFS_DFORK_FORMAT_ARCH(dip, whichfork, ARCH_CONVERT); + typ = whichfork == XFS_DATA_FORK ? TYP_BMAPBTD : TYP_BMAPBTA; + ASSERT(typtab[typ].typnm == typ); + ASSERT(fmt == XFS_DINODE_FMT_EXTENTS || fmt == XFS_DINODE_FMT_BTREE); + if (fmt == XFS_DINODE_FMT_EXTENTS) { + nextents = XFS_DFORK_NEXTENTS_ARCH(dip, whichfork, ARCH_CONVERT); + xp = (xfs_bmbt_rec_64_t *)XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT); + for (ep = xp; ep < &xp[nextents] && n < nex; ep++) { + if (!bmap_one_extent(ep, &curoffset, eoffset, &n, bep)) + break; + } + } else { + push_cur(); + bno = NULLFSBLOCK; + rblock = (xfs_bmdr_block_t *)XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT); + fsize = XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_CONVERT); + pp = XFS_BTREE_PTR_ADDR(fsize, xfs_bmdr, rblock, 1, + XFS_BTREE_BLOCK_MAXRECS(fsize, xfs_bmdr, 0)); + kp = XFS_BTREE_KEY_ADDR(fsize, xfs_bmdr, rblock, 1, + XFS_BTREE_BLOCK_MAXRECS(fsize, xfs_bmdr, 0)); + bno = select_child(curoffset, kp, pp, INT_GET(rblock->bb_numrecs, ARCH_CONVERT)); + for (;;) { + set_cur(&typtab[typ], XFS_FSB_TO_DADDR(mp, bno), + blkbb, DB_RING_IGN, NULL); + block = (xfs_bmbt_block_t *)iocur_top->data; + if (INT_GET(block->bb_level, ARCH_CONVERT) == 0) + break; + pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, + block, 1, + XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, + xfs_bmbt, 0)); + kp = XFS_BTREE_KEY_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, + block, 1, + XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, + xfs_bmbt, 0)); + bno = select_child(curoffset, kp, pp, + INT_GET(block->bb_numrecs, ARCH_CONVERT)); + } + for (;;) { + nextbno = INT_GET(block->bb_rightsib, ARCH_CONVERT); + nextents = INT_GET(block->bb_numrecs, ARCH_CONVERT); + xp = (xfs_bmbt_rec_64_t *)XFS_BTREE_REC_ADDR( + mp->m_sb.sb_blocksize, xfs_bmbt, block, 1, + XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, + xfs_bmbt, 1)); + for (ep = xp; ep < &xp[nextents] && n < nex; ep++) { + if (!bmap_one_extent(ep, &curoffset, eoffset, + &n, bep)) { + nextbno = NULLFSBLOCK; + break; + } + } + bno = nextbno; + if (bno == NULLFSBLOCK) + break; + set_cur(&typtab[typ], XFS_FSB_TO_DADDR(mp, bno), + blkbb, DB_RING_IGN, NULL); + block = (xfs_bmbt_block_t *)iocur_top->data; + } + pop_cur(); + } + pop_cur(); + *nexp = n; +} + +static int +bmap_f( + int argc, + char **argv) +{ + int afork = 0; + bmap_ext_t be; + int c; + xfs_dfiloff_t co; + int dfork = 0; + xfs_dinode_t *dip; + xfs_dfiloff_t eo; + xfs_dfilblks_t len; + int nex; + char *p; + int whichfork; + + if (iocur_top->ino == NULLFSINO) { + dbprintf("no current inode\n"); + return 0; + } + optind = 0; + if (argc) while ((c = getopt(argc, argv, "ad")) != EOF) { + switch (c) { + case 'a': + afork = 1; + break; + case 'd': + dfork = 1; + break; + default: + dbprintf("bad option for bmap command\n"); + return 0; + } + } + if (afork + dfork == 0) { + push_cur(); + set_cur_inode(iocur_top->ino); + dip = iocur_top->data; + if (INT_GET(dip->di_core.di_nextents, ARCH_CONVERT)) + dfork = 1; + if (INT_GET(dip->di_core.di_anextents, ARCH_CONVERT)) + afork = 1; + pop_cur(); + } + if (optind < argc) { + co = (xfs_dfiloff_t)strtoull(argv[optind], &p, 0); + if (*p != '\0') { + dbprintf("bad block number for bmap %s\n", + argv[optind]); + return 0; + } + optind++; + if (optind < argc) { + len = (xfs_dfilblks_t)strtoull(argv[optind], &p, 0); + if (*p != '\0') { + dbprintf("bad len for bmap %s\n", argv[optind]); + return 0; + } + eo = co + len - 1; + } else + eo = co; + } else { + co = 0; + eo = -1; + } + for (whichfork = XFS_DATA_FORK; + whichfork <= XFS_ATTR_FORK; + whichfork++) { + if (whichfork == XFS_DATA_FORK && !dfork) + continue; + if (whichfork == XFS_ATTR_FORK && !afork) + continue; + for (;;) { + nex = 1; + bmap(co, eo - co + 1, whichfork, &nex, &be); + if (nex == 0) + break; + dbprintf("%s offset %lld startblock %llu (%u/%u) count " + "%llu flag %u\n", + whichfork == XFS_DATA_FORK ? "data" : "attr", + be.startoff, be.startblock, + XFS_FSB_TO_AGNO(mp, be.startblock), + XFS_FSB_TO_AGBNO(mp, be.startblock), + be.blockcount, be.flag); + co = be.startoff + be.blockcount; + } + } + return 0; +} + +void +bmap_init(void) +{ + add_command(&bmap_cmd); +} + +static int +bmap_one_extent( + xfs_bmbt_rec_64_t *ep, + xfs_dfiloff_t *offp, + xfs_dfiloff_t eoff, + int *idxp, + bmap_ext_t *bep) +{ + xfs_dfilblks_t c; + xfs_dfiloff_t curoffset; + int f; + int idx; + xfs_dfiloff_t o; + xfs_dfsbno_t s; + + convert_extent(ep, &o, &s, &c, &f); + curoffset = *offp; + idx = *idxp; + if (o + c <= curoffset) + return 1; + if (o > eoff) + return 0; + if (o < curoffset) { + c -= curoffset - o; + s += curoffset - o; + o = curoffset; + } + if (o + c - 1 > eoff) + c -= (o + c - 1) - eoff; + bep[idx].startoff = o; + bep[idx].startblock = s; + bep[idx].blockcount = c; + bep[idx].flag = f; + *idxp = idx + 1; + *offp = o + c; + return 1; +} + +void +convert_extent( + xfs_bmbt_rec_64_t *rp, + xfs_dfiloff_t *op, + xfs_dfsbno_t *sp, + xfs_dfilblks_t *cp, + int *fp) +{ + xfs_bmbt_irec_t irec, *s = &irec; + + libxfs_bmbt_get_all((xfs_bmbt_rec_t *)rp, s); + + if (s->br_state == XFS_EXT_UNWRITTEN) { + *fp = 1; + } else { + *fp = 0; + } + + *op = s->br_startoff; + *sp = s->br_startblock; + *cp = s->br_blockcount; +} + +void +make_bbmap( + bbmap_t *bbmap, + int nex, + bmap_ext_t *bmp) +{ + int d; + xfs_dfsbno_t dfsbno; + int i; + int j; + int k; + + for (i = 0, d = 0; i < nex; i++) { + dfsbno = bmp[i].startblock; + for (j = 0; j < bmp[i].blockcount; j++, dfsbno++) { + for (k = 0; k < blkbb; k++) + bbmap->b[d++] = + XFS_FSB_TO_DADDR(mp, dfsbno) + k; + } + } +} + +static xfs_fsblock_t +select_child( + xfs_dfiloff_t off, + xfs_bmbt_key_t *kp, + xfs_bmbt_ptr_t *pp, + int nrecs) +{ + int i; + + for (i = 0; i < nrecs; i++) { + if (INT_GET(kp[i].br_startoff, ARCH_CONVERT) == off) + return INT_GET(pp[i], ARCH_CONVERT); + if (INT_GET(kp[i].br_startoff, ARCH_CONVERT) > off) { + if (i == 0) + return INT_GET(pp[i], ARCH_CONVERT); + else + return INT_GET(pp[i - 1], ARCH_CONVERT); + } + } + return INT_GET(pp[nrecs - 1], ARCH_CONVERT); +} diff --git a/db/bmap.h b/db/bmap.h new file mode 100644 index 000000000..2420601e8 --- /dev/null +++ b/db/bmap.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +struct bbmap; +struct xfs_bmbt_rec_64; + +typedef struct bmap_ext { + xfs_dfiloff_t startoff; + xfs_dfsbno_t startblock; + xfs_dfilblks_t blockcount; + int flag; +} bmap_ext_t; + +extern void bmap(xfs_dfiloff_t offset, xfs_dfilblks_t len, int whichfork, + int *nexp, bmap_ext_t *bep); +extern void bmap_init(void); +extern void convert_extent(struct xfs_bmbt_rec_64 *rp, xfs_dfiloff_t *op, + xfs_dfsbno_t *sp, xfs_dfilblks_t *cp, int *fp); +extern void make_bbmap(struct bbmap *bbmap, int nex, bmap_ext_t *bmp); diff --git a/db/bmapbt.c b/db/bmapbt.c new file mode 100644 index 000000000..3ecfb374f --- /dev/null +++ b/db/bmapbt.c @@ -0,0 +1,331 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "data.h" +#include "type.h" +#include "faddr.h" +#include "fprint.h" +#include "field.h" +#include "bmapbt.h" +#include "print.h" +#include "bit.h" +#include "mount.h" + +static int bmapbta_key_count(void *obj, int startoff); +static int bmapbta_key_offset(void *obj, int startoff, int idx); +static int bmapbta_ptr_count(void *obj, int startoff); +static int bmapbta_ptr_offset(void *obj, int startoff, int idx); +static int bmapbta_rec_count(void *obj, int startoff); +static int bmapbta_rec_offset(void *obj, int startoff, int idx); +static int bmapbtd_key_count(void *obj, int startoff); +static int bmapbtd_key_offset(void *obj, int startoff, int idx); +static int bmapbtd_ptr_count(void *obj, int startoff); +static int bmapbtd_ptr_offset(void *obj, int startoff, int idx); +static int bmapbtd_rec_count(void *obj, int startoff); +static int bmapbtd_rec_offset(void *obj, int startoff, int idx); + +const field_t bmapbta_hfld[] = { + { "", FLDT_BMAPBTA, OI(0), C1, 0, TYP_NONE }, + { NULL } +}; +const field_t bmapbtd_hfld[] = { + { "", FLDT_BMAPBTD, OI(0), C1, 0, TYP_NONE }, + { NULL } +}; + +#define OFF(f) bitize(offsetof(xfs_bmbt_block_t, bb_ ## f)) +const field_t bmapbta_flds[] = { + { "magic", FLDT_UINT32X, OI(OFF(magic)), C1, 0, TYP_NONE }, + { "level", FLDT_UINT16D, OI(OFF(level)), C1, 0, TYP_NONE }, + { "numrecs", FLDT_UINT16D, OI(OFF(numrecs)), C1, 0, TYP_NONE }, + { "leftsib", FLDT_DFSBNO, OI(OFF(leftsib)), C1, 0, TYP_BMAPBTA }, + { "rightsib", FLDT_DFSBNO, OI(OFF(rightsib)), C1, 0, TYP_BMAPBTA }, + { "recs", FLDT_BMAPBTAREC, bmapbta_rec_offset, bmapbta_rec_count, + FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE }, + { "keys", FLDT_BMAPBTAKEY, bmapbta_key_offset, bmapbta_key_count, + FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE }, + { "ptrs", FLDT_BMAPBTAPTR, bmapbta_ptr_offset, bmapbta_ptr_count, + FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_BMAPBTA }, + { NULL } +}; +const field_t bmapbtd_flds[] = { + { "magic", FLDT_UINT32X, OI(OFF(magic)), C1, 0, TYP_NONE }, + { "level", FLDT_UINT16D, OI(OFF(level)), C1, 0, TYP_NONE }, + { "numrecs", FLDT_UINT16D, OI(OFF(numrecs)), C1, 0, TYP_NONE }, + { "leftsib", FLDT_DFSBNO, OI(OFF(leftsib)), C1, 0, TYP_BMAPBTD }, + { "rightsib", FLDT_DFSBNO, OI(OFF(rightsib)), C1, 0, TYP_BMAPBTD }, + { "recs", FLDT_BMAPBTDREC, bmapbtd_rec_offset, bmapbtd_rec_count, + FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE }, + { "keys", FLDT_BMAPBTDKEY, bmapbtd_key_offset, bmapbtd_key_count, + FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE }, + { "ptrs", FLDT_BMAPBTDPTR, bmapbtd_ptr_offset, bmapbtd_ptr_count, + FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_BMAPBTD }, + { NULL } +}; + +#define KOFF(f) bitize(offsetof(xfs_bmbt_key_t, br_ ## f)) +const field_t bmapbta_key_flds[] = { + { "startoff", FLDT_DFILOFFA, OI(KOFF(startoff)), C1, 0, TYP_ATTR }, + { NULL } +}; +const field_t bmapbtd_key_flds[] = { + { "startoff", FLDT_DFILOFFD, OI(KOFF(startoff)), C1, 0, TYP_INODATA }, + { NULL } +}; + +const field_t bmapbta_rec_flds[] = { + { "startoff", FLDT_CFILEOFFA, OI(BMBT_STARTOFF_BITOFF), C1, 0, + TYP_ATTR }, + { "startblock", FLDT_CFSBLOCK, OI(BMBT_STARTBLOCK_BITOFF), C1, 0, + TYP_ATTR }, + { "blockcount", FLDT_CEXTLEN, OI(BMBT_BLOCKCOUNT_BITOFF), C1, 0, + TYP_NONE }, + { "extentflag", FLDT_CEXTFLG, OI(BMBT_EXNTFLAG_BITOFF), C1, 0, + TYP_NONE }, + { NULL } +}; +const field_t bmapbtd_rec_flds[] = { + { "startoff", FLDT_CFILEOFFD, OI(BMBT_STARTOFF_BITOFF), C1, 0, + TYP_INODATA }, + { "startblock", FLDT_CFSBLOCK, OI(BMBT_STARTBLOCK_BITOFF), C1, 0, + TYP_INODATA }, + { "blockcount", FLDT_CEXTLEN, OI(BMBT_BLOCKCOUNT_BITOFF), C1, 0, + TYP_NONE }, + { "extentflag", FLDT_CEXTFLG, OI(BMBT_EXNTFLAG_BITOFF), C1, 0, + TYP_NONE }, + { NULL } +}; + +static int +bmapbta_key_count( + void *obj, + int startoff) +{ + xfs_bmbt_block_t *block; + + ASSERT(startoff == 0); + block = obj; + if (INT_GET(block->bb_level, ARCH_CONVERT) == 0) + return 0; + return INT_GET(block->bb_numrecs, ARCH_CONVERT); +} + +static int +bmapbta_key_offset( + void *obj, + int startoff, + int idx) +{ + xfs_bmbt_block_t *block; + xfs_bmbt_key_t *kp; + + ASSERT(startoff == 0); + block = obj; + ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0); + kp = XFS_BTREE_KEY_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, idx, + XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_bmbt, 0)); + return bitize((int)((char *)kp - (char *)block)); +} + +static int +bmapbta_ptr_count( + void *obj, + int startoff) +{ + xfs_bmbt_block_t *block; + + ASSERT(startoff == 0); + block = obj; + if (INT_GET(block->bb_level, ARCH_CONVERT) == 0) + return 0; + return INT_GET(block->bb_numrecs, ARCH_CONVERT); +} + +static int +bmapbta_ptr_offset( + void *obj, + int startoff, + int idx) +{ + xfs_bmbt_block_t *block; + xfs_bmbt_ptr_t *pp; + + ASSERT(startoff == 0); + block = obj; + ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0); + pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, idx, + XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_bmbt, 0)); + return bitize((int)((char *)pp - (char *)block)); +} + +static int +bmapbta_rec_count( + void *obj, + int startoff) +{ + xfs_bmbt_block_t *block; + + ASSERT(startoff == 0); + block = obj; + if (INT_GET(block->bb_level, ARCH_CONVERT) > 0) + return 0; + return INT_GET(block->bb_numrecs, ARCH_CONVERT); +} + +static int +bmapbta_rec_offset( + void *obj, + int startoff, + int idx) +{ + xfs_bmbt_block_t *block; + xfs_bmbt_rec_t *rp; + + ASSERT(startoff == 0); + block = obj; + ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) == 0); + rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, idx, + XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_bmbt, 1)); + return bitize((int)((char *)rp - (char *)block)); +} + +int +bmapbta_size( + void *obj, + int startoff, + int idx) +{ + return bitize(mp->m_sb.sb_blocksize); +} + +static int +bmapbtd_key_count( + void *obj, + int startoff) +{ + xfs_bmbt_block_t *block; + + ASSERT(startoff == 0); + block = obj; + if (INT_GET(block->bb_level, ARCH_CONVERT) == 0) + return 0; + return INT_GET(block->bb_numrecs, ARCH_CONVERT); +} + +static int +bmapbtd_key_offset( + void *obj, + int startoff, + int idx) +{ + xfs_bmbt_block_t *block; + xfs_bmbt_key_t *kp; + + ASSERT(startoff == 0); + block = obj; + ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0); + kp = XFS_BTREE_KEY_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, idx, + XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_bmbt, 0)); + return bitize((int)((char *)kp - (char *)block)); +} + +static int +bmapbtd_ptr_count( + void *obj, + int startoff) +{ + xfs_bmbt_block_t *block; + + ASSERT(startoff == 0); + block = obj; + if (INT_GET(block->bb_level, ARCH_CONVERT) == 0) + return 0; + return INT_GET(block->bb_numrecs, ARCH_CONVERT); +} + +static int +bmapbtd_ptr_offset( + void *obj, + int startoff, + int idx) +{ + xfs_bmbt_block_t *block; + xfs_bmbt_ptr_t *pp; + + ASSERT(startoff == 0); + block = obj; + ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0); + pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, idx, + XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_bmbt, 0)); + return bitize((int)((char *)pp - (char *)block)); +} + +static int +bmapbtd_rec_count( + void *obj, + int startoff) +{ + xfs_bmbt_block_t *block; + + ASSERT(startoff == 0); + block = obj; + if (INT_GET(block->bb_level, ARCH_CONVERT) > 0) + return 0; + return INT_GET(block->bb_numrecs, ARCH_CONVERT); +} + +static int +bmapbtd_rec_offset( + void *obj, + int startoff, + int idx) +{ + xfs_bmbt_block_t *block; + xfs_bmbt_rec_t *rp; + + ASSERT(startoff == 0); + block = obj; + ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) == 0); + rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, idx, + XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_bmbt, 1)); + return bitize((int)((char *)rp - (char *)block)); +} + +int +bmapbtd_size( + void *obj, + int startoff, + int idx) +{ + return bitize(mp->m_sb.sb_blocksize); +} diff --git a/db/bmapbt.h b/db/bmapbt.h new file mode 100644 index 000000000..8f39c98f3 --- /dev/null +++ b/db/bmapbt.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +struct field; + +extern const struct field bmapbta_flds[]; +extern const struct field bmapbta_hfld[]; +extern const struct field bmapbta_key_flds[]; +extern const struct field bmapbta_rec_flds[]; +extern const struct field bmapbtd_flds[]; +extern const struct field bmapbtd_hfld[]; +extern const struct field bmapbtd_key_flds[]; +extern const struct field bmapbtd_rec_flds[]; + +extern int bmapbta_size(void *obj, int startoff, int idx); +extern int bmapbtd_size(void *obj, int startoff, int idx); diff --git a/db/bmroot.c b/db/bmroot.c new file mode 100644 index 000000000..a96c6d29f --- /dev/null +++ b/db/bmroot.c @@ -0,0 +1,274 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "data.h" +#include "type.h" +#include "faddr.h" +#include "fprint.h" +#include "field.h" +#include "bmroot.h" +#include "io.h" +#include "print.h" +#include "bit.h" +#include "mount.h" + +static int bmroota_key_count(void *obj, int startoff); +static int bmroota_key_offset(void *obj, int startoff, int idx); +static int bmroota_ptr_count(void *obj, int startoff); +static int bmroota_ptr_offset(void *obj, int startoff, int idx); +static int bmrootd_key_count(void *obj, int startoff); +static int bmrootd_key_offset(void *obj, int startoff, int idx); +static int bmrootd_ptr_count(void *obj, int startoff); +static int bmrootd_ptr_offset(void *obj, int startoff, int idx); + +#define OFF(f) bitize(offsetof(xfs_bmdr_block_t, bb_ ## f)) +const field_t bmroota_flds[] = { + { "level", FLDT_UINT16D, OI(OFF(level)), C1, 0, TYP_NONE }, + { "numrecs", FLDT_UINT16D, OI(OFF(numrecs)), C1, 0, TYP_NONE }, + { "keys", FLDT_BMROOTAKEY, bmroota_key_offset, bmroota_key_count, + FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE }, + { "ptrs", FLDT_BMROOTAPTR, bmroota_ptr_offset, bmroota_ptr_count, + FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_BMAPBTA }, + { NULL } +}; +const field_t bmrootd_flds[] = { + { "level", FLDT_UINT16D, OI(OFF(level)), C1, 0, TYP_NONE }, + { "numrecs", FLDT_UINT16D, OI(OFF(numrecs)), C1, 0, TYP_NONE }, + { "keys", FLDT_BMROOTDKEY, bmrootd_key_offset, bmrootd_key_count, + FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE }, + { "ptrs", FLDT_BMROOTDPTR, bmrootd_ptr_offset, bmrootd_ptr_count, + FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_BMAPBTD }, + { NULL } +}; + +#define KOFF(f) bitize(offsetof(xfs_bmdr_key_t, br_ ## f)) +const field_t bmroota_key_flds[] = { + { "startoff", FLDT_DFILOFFA, OI(KOFF(startoff)), C1, 0, TYP_NONE }, + { NULL } +}; +const field_t bmrootd_key_flds[] = { + { "startoff", FLDT_DFILOFFD, OI(KOFF(startoff)), C1, 0, TYP_NONE }, + { NULL } +}; + +static int +bmroota_key_count( + void *obj, + int startoff) +{ + xfs_bmdr_block_t *block; +#ifdef DEBUG + xfs_dinode_t *dip = obj; +#endif + + ASSERT(bitoffs(startoff) == 0); + ASSERT(obj == iocur_top->data); + block = (xfs_bmdr_block_t *)((char *)obj + byteize(startoff)); + ASSERT(XFS_DFORK_Q(dip) && (char *)block == XFS_DFORK_APTR(dip)); + ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0); + return INT_GET(block->bb_numrecs, ARCH_CONVERT); +} + +static int +bmroota_key_offset( + void *obj, + int startoff, + int idx) +{ + xfs_bmdr_block_t *block; + /* REFERENCED */ + xfs_dinode_t *dip; + xfs_bmdr_key_t *kp; + + ASSERT(bitoffs(startoff) == 0); + ASSERT(obj == iocur_top->data); + dip = obj; + block = (xfs_bmdr_block_t *)((char *)obj + byteize(startoff)); + ASSERT(XFS_DFORK_Q(dip) && (char *)block == XFS_DFORK_APTR(dip)); + ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0); + kp = XFS_BTREE_KEY_ADDR(iocur_top->len, xfs_bmdr, block, idx, + XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_ASIZE(dip, mp), xfs_bmdr, 0)); + return bitize((int)((char *)kp - (char *)block)); +} + +static int +bmroota_ptr_count( + void *obj, + int startoff) +{ + xfs_bmdr_block_t *block; +#ifdef DEBUG + xfs_dinode_t *dip = obj; +#endif + + ASSERT(bitoffs(startoff) == 0); + ASSERT(obj == iocur_top->data); + block = (xfs_bmdr_block_t *)((char *)obj + byteize(startoff)); + ASSERT(XFS_DFORK_Q(dip) && (char *)block == XFS_DFORK_APTR(dip)); + ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0); + return INT_GET(block->bb_numrecs, ARCH_CONVERT); +} + +static int +bmroota_ptr_offset( + void *obj, + int startoff, + int idx) +{ + xfs_bmdr_block_t *block; + xfs_dinode_t *dip; + xfs_bmdr_ptr_t *pp; + + ASSERT(bitoffs(startoff) == 0); + ASSERT(obj == iocur_top->data); + dip = obj; + block = (xfs_bmdr_block_t *)((char *)obj + byteize(startoff)); + ASSERT(XFS_DFORK_Q(dip) && (char *)block == XFS_DFORK_APTR(dip)); + ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0); + pp = XFS_BTREE_PTR_ADDR(iocur_top->len, xfs_bmdr, block, idx, + XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_ASIZE(dip, mp), xfs_bmdr, 0)); + return bitize((int)((char *)pp - (char *)block)); +} + +int +bmroota_size( + void *obj, + int startoff, + int idx) +{ + xfs_dinode_t *dip; +#ifdef DEBUG + xfs_bmdr_block_t *block; +#endif + + ASSERT(bitoffs(startoff) == 0); + ASSERT(obj == iocur_top->data); + ASSERT(idx == 0); + dip = obj; +#ifdef DEBUG + block = (xfs_bmdr_block_t *)((char *)obj + byteize(startoff)); + ASSERT(XFS_DFORK_Q(dip) && (char *)block == XFS_DFORK_APTR(dip)); +#endif + return bitize((int)XFS_DFORK_ASIZE(dip, mp)); +} + +static int +bmrootd_key_count( + void *obj, + int startoff) +{ + xfs_bmdr_block_t *block; +#ifdef DEBUG + xfs_dinode_t *dip = obj; +#endif + + ASSERT(bitoffs(startoff) == 0); + ASSERT(obj == iocur_top->data); + block = (xfs_bmdr_block_t *)((char *)obj + byteize(startoff)); + ASSERT((char *)block == XFS_DFORK_DPTR(dip)); + ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0); + return INT_GET(block->bb_numrecs, ARCH_CONVERT); +} + +static int +bmrootd_key_offset( + void *obj, + int startoff, + int idx) +{ + xfs_bmdr_block_t *block; + xfs_bmdr_key_t *kp; + xfs_dinode_t *dip; + + ASSERT(bitoffs(startoff) == 0); + ASSERT(obj == iocur_top->data); + dip = obj; + block = (xfs_bmdr_block_t *)((char *)obj + byteize(startoff)); + ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0); + kp = XFS_BTREE_KEY_ADDR(iocur_top->len, xfs_bmdr, block, idx, + XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_DSIZE(dip, mp), xfs_bmdr, 0)); + return bitize((int)((char *)kp - (char *)block)); +} + +static int +bmrootd_ptr_count( + void *obj, + int startoff) +{ + xfs_bmdr_block_t *block; +#ifdef DEBUG + xfs_dinode_t *dip = obj; +#endif + + ASSERT(bitoffs(startoff) == 0); + ASSERT(obj == iocur_top->data); + block = (xfs_bmdr_block_t *)((char *)obj + byteize(startoff)); + ASSERT((char *)block == XFS_DFORK_DPTR(dip)); + ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0); + return INT_GET(block->bb_numrecs, ARCH_CONVERT); +} + +static int +bmrootd_ptr_offset( + void *obj, + int startoff, + int idx) +{ + xfs_bmdr_block_t *block; + xfs_bmdr_ptr_t *pp; + xfs_dinode_t *dip; + + ASSERT(bitoffs(startoff) == 0); + ASSERT(obj == iocur_top->data); + dip = obj; + block = (xfs_bmdr_block_t *)((char *)obj + byteize(startoff)); + ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0); + pp = XFS_BTREE_PTR_ADDR(iocur_top->len, xfs_bmdr, block, idx, + XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_DSIZE(dip, mp), xfs_bmdr, 0)); + return bitize((int)((char *)pp - (char *)block)); +} + +int +bmrootd_size( + void *obj, + int startoff, + int idx) +{ + xfs_dinode_t *dip; + + ASSERT(bitoffs(startoff) == 0); + ASSERT(obj == iocur_top->data); + ASSERT(idx == 0); + dip = obj; + return bitize((int)XFS_DFORK_DSIZE(dip, mp)); +} diff --git a/db/bmroot.h b/db/bmroot.h new file mode 100644 index 000000000..3f8ef0cc5 --- /dev/null +++ b/db/bmroot.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +struct field; + +extern const struct field bmroota_flds[]; +extern const struct field bmroota_key_flds[]; +extern const struct field bmrootd_flds[]; +extern const struct field bmrootd_key_flds[]; + +extern int bmroota_size(void *obj, int startoff, int idx); +extern int bmrootd_size(void *obj, int startoff, int idx); diff --git a/db/bnobt.c b/db/bnobt.c new file mode 100644 index 000000000..3b0960562 --- /dev/null +++ b/db/bnobt.c @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "data.h" +#include "type.h" +#include "faddr.h" +#include "fprint.h" +#include "field.h" +#include "bnobt.h" +#include "io.h" +#include "print.h" +#include "bit.h" +#include "mount.h" + +static int bnobt_key_count(void *obj, int startoff); +static int bnobt_key_offset(void *obj, int startoff, int idx); +static int bnobt_ptr_count(void *obj, int startoff); +static int bnobt_ptr_offset(void *obj, int startoff, int idx); +static int bnobt_rec_count(void *obj, int startoff); +static int bnobt_rec_offset(void *obj, int startoff, int idx); + +const field_t bnobt_hfld[] = { + { "", FLDT_BNOBT, OI(0), C1, 0, TYP_NONE }, + { NULL } +}; + +#define OFF(f) bitize(offsetof(xfs_alloc_block_t, bb_ ## f)) +const field_t bnobt_flds[] = { + { "magic", FLDT_UINT32X, OI(OFF(magic)), C1, 0, TYP_NONE }, + { "level", FLDT_UINT16D, OI(OFF(level)), C1, 0, TYP_NONE }, + { "numrecs", FLDT_UINT16D, OI(OFF(numrecs)), C1, 0, TYP_NONE }, + { "leftsib", FLDT_AGBLOCK, OI(OFF(leftsib)), C1, 0, TYP_BNOBT }, + { "rightsib", FLDT_AGBLOCK, OI(OFF(rightsib)), C1, 0, TYP_BNOBT }, + { "recs", FLDT_BNOBTREC, bnobt_rec_offset, bnobt_rec_count, + FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE }, + { "keys", FLDT_BNOBTKEY, bnobt_key_offset, bnobt_key_count, + FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE }, + { "ptrs", FLDT_BNOBTPTR, bnobt_ptr_offset, bnobt_ptr_count, + FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_BNOBT }, + { NULL } +}; + +#define KOFF(f) bitize(offsetof(xfs_alloc_key_t, ar_ ## f)) +const field_t bnobt_key_flds[] = { + { "startblock", FLDT_AGBLOCK, OI(KOFF(startblock)), C1, 0, TYP_DATA }, + { "blockcount", FLDT_EXTLEN, OI(KOFF(blockcount)), C1, 0, TYP_NONE }, + { NULL } +}; + +#define ROFF(f) bitize(offsetof(xfs_alloc_rec_t, ar_ ## f)) +const field_t bnobt_rec_flds[] = { + { "startblock", FLDT_AGBLOCK, OI(ROFF(startblock)), C1, 0, TYP_DATA }, + { "blockcount", FLDT_EXTLEN, OI(ROFF(blockcount)), C1, 0, TYP_NONE }, + { NULL } +}; + +static int +bnobt_key_count( + void *obj, + int startoff) +{ + xfs_alloc_block_t *block; + + ASSERT(startoff == 0); + block = obj; + if (INT_GET(block->bb_level, ARCH_CONVERT) == 0) + return 0; + return INT_GET(block->bb_numrecs, ARCH_CONVERT); +} + +static int +bnobt_key_offset( + void *obj, + int startoff, + int idx) +{ + xfs_alloc_block_t *block; + xfs_alloc_key_t *kp; + + ASSERT(startoff == 0); + block = obj; + ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0); + kp = XFS_BTREE_KEY_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, idx, + XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_alloc, 0)); + return bitize((int)((char *)kp - (char *)block)); +} + +static int +bnobt_ptr_count( + void *obj, + int startoff) +{ + xfs_alloc_block_t *block; + + ASSERT(startoff == 0); + block = obj; + if (INT_GET(block->bb_level, ARCH_CONVERT) == 0) + return 0; + return INT_GET(block->bb_numrecs, ARCH_CONVERT); +} + +static int +bnobt_ptr_offset( + void *obj, + int startoff, + int idx) +{ + xfs_alloc_block_t *block; + xfs_alloc_ptr_t *pp; + + ASSERT(startoff == 0); + block = obj; + ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0); + pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, idx, + XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_alloc, 0)); + return bitize((int)((char *)pp - (char *)block)); +} + +static int +bnobt_rec_count( + void *obj, + int startoff) +{ + xfs_alloc_block_t *block; + + ASSERT(startoff == 0); + block = obj; + if (INT_GET(block->bb_level, ARCH_CONVERT) > 0) + return 0; + return INT_GET(block->bb_numrecs, ARCH_CONVERT); +} + +static int +bnobt_rec_offset( + void *obj, + int startoff, + int idx) +{ + xfs_alloc_block_t *block; + xfs_alloc_rec_t *rp; + + ASSERT(startoff == 0); + block = obj; + ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) == 0); + rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, idx, + XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_alloc, 1)); + return bitize((int)((char *)rp - (char *)block)); +} + +int +bnobt_size( + void *obj, + int startoff, + int idx) +{ + return bitize(mp->m_sb.sb_blocksize); +} diff --git a/db/bnobt.h b/db/bnobt.h new file mode 100644 index 000000000..07e8b2637 --- /dev/null +++ b/db/bnobt.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +struct field; + +extern const struct field bnobt_flds[]; +extern const struct field bnobt_hfld[]; +extern const struct field bnobt_key_flds[]; +extern const struct field bnobt_rec_flds[]; + +extern int bnobt_size(void *obj, int startoff, int idx); diff --git a/db/check.c b/db/check.c new file mode 100644 index 000000000..b40442206 --- /dev/null +++ b/db/check.c @@ -0,0 +1,4468 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include +#include +#include +#include "bmap.h" +#include "check.h" +#include "command.h" +#include "data.h" +#include "io.h" +#include "output.h" +#include "type.h" +#include "mount.h" +#include "malloc.h" + +typedef enum { + DBM_UNKNOWN, DBM_AGF, DBM_AGFL, DBM_AGI, + DBM_ATTR, DBM_BTBMAPA, DBM_BTBMAPD, DBM_BTBNO, + DBM_BTCNT, DBM_BTINO, DBM_DATA, DBM_DIR, + DBM_FREE1, DBM_FREE2, DBM_FREELIST, DBM_INODE, + DBM_LOG, DBM_MISSING, DBM_QUOTA, DBM_RTBITMAP, + DBM_RTDATA, DBM_RTFREE, DBM_RTSUM, DBM_SB, + DBM_SYMLINK, + DBM_NDBM +} dbm_t; + +typedef struct inodata { + struct inodata *next; + nlink_t link_set; + nlink_t link_add; + char isdir; + char security; + char ilist; + xfs_ino_t ino; + struct inodata *parent; + char *name; +} inodata_t; +#define MIN_INODATA_HASH_SIZE 256 +#define MAX_INODATA_HASH_SIZE 65536 +#define INODATA_AVG_HASH_LENGTH 8 + +typedef struct qinfo { + xfs_qcnt_t bc; + xfs_qcnt_t ic; + xfs_qcnt_t rc; +} qinfo_t; + +#define QDATA_HASH_SIZE 256 +typedef struct qdata { + struct qdata *next; + xfs_dqid_t id; + qinfo_t count; + qinfo_t dq; +} qdata_t; + +typedef struct blkent { + xfs_fileoff_t startoff; + int nblks; + xfs_fsblock_t blks[1]; +} blkent_t; +#define BLKENT_SIZE(n) \ + (offsetof(blkent_t, blks) + (sizeof(xfs_fsblock_t) * (n))) + +typedef struct blkmap { + int naents; + int nents; + blkent_t *ents[1]; +} blkmap_t; +#define BLKMAP_SIZE(n) \ + (offsetof(blkmap_t, ents) + (sizeof(blkent_t *) * (n))) + +typedef struct freetab { + int naents; + int nents; + xfs_dir2_data_off_t ents[1]; +} freetab_t; +#define FREETAB_SIZE(n) \ + (offsetof(freetab_t, ents) + (sizeof(xfs_dir2_data_off_t) * (n))) + +typedef struct dirhash { + struct dirhash *next; + xfs_dir2_leaf_entry_t entry; + int seen; +} dirhash_t; +#define DIR_HASH_SIZE 1024 +#define DIR_HASH_FUNC(h,a) (((h) ^ (a)) % DIR_HASH_SIZE) + +static xfs_extlen_t agffreeblks; +static xfs_extlen_t agflongest; +static xfs_agino_t agicount; +static xfs_agino_t agifreecount; +static xfs_fsblock_t *blist; +static int blist_size; +static char **dbmap; /* really dbm_t:8 */ +static dirhash_t **dirhash; +static int error; +static __uint64_t fdblocks; +static __uint64_t frextents; +static __uint64_t icount; +static __uint64_t ifree; +static inodata_t ***inodata; +static int inodata_hash_size; +static inodata_t ***inomap; +static int nflag; +static int pflag; +static qdata_t **qpdata; +static int qpdo; +static qdata_t **qudata; +static int qudo; +static unsigned sbversion; +static int sbver_err; +static int serious_error; +static int sflag; +static xfs_suminfo_t *sumcompute; +static xfs_suminfo_t *sumfile; +static const char *typename[] = { + "unknown", + "agf", + "agfl", + "agi", + "attr", + "btbmapa", + "btbmapd", + "btbno", + "btcnt", + "btino", + "data", + "dir", + "free1", + "free2", + "freelist", + "inode", + "log", + "missing", + "quota", + "rtbitmap", + "rtdata", + "rtfree", + "rtsum", + "sb", + "symlink", + NULL +}; +static int verbose; + +#define CHECK_BLIST(b) (blist_size && check_blist(b)) +#define CHECK_BLISTA(a,b) \ + (blist_size && check_blist(XFS_AGB_TO_FSB(mp, a, b))) + +typedef void (*scan_lbtree_f_t)(xfs_btree_lblock_t *block, + int level, + dbm_t type, + xfs_fsblock_t bno, + inodata_t *id, + xfs_drfsbno_t *totd, + xfs_drfsbno_t *toti, + xfs_extnum_t *nex, + blkmap_t **blkmapp, + int isroot, + typnm_t btype); + +typedef void (*scan_sbtree_f_t)(xfs_btree_sblock_t *block, + int level, + xfs_agf_t *agf, + xfs_agblock_t bno, + int isroot); + +static void add_blist(xfs_fsblock_t bno); +static void add_ilist(xfs_ino_t ino); +static void addlink_inode(inodata_t *id); +static void addname_inode(inodata_t *id, char *name, int namelen); +static void addparent_inode(inodata_t *id, xfs_ino_t parent); +static void blkent_append(blkent_t **entp, xfs_fsblock_t b, + xfs_extlen_t c); +static blkent_t *blkent_new(xfs_fileoff_t o, xfs_fsblock_t b, + xfs_extlen_t c); +static void blkent_prepend(blkent_t **entp, xfs_fsblock_t b, + xfs_extlen_t c); +static blkmap_t *blkmap_alloc(xfs_extnum_t); +static void blkmap_free(blkmap_t *blkmap); +static xfs_fsblock_t blkmap_get(blkmap_t *blkmap, xfs_fileoff_t o); +static int blkmap_getn(blkmap_t *blkmap, xfs_fileoff_t o, int nb, + bmap_ext_t **bmpp); +static void blkmap_grow(blkmap_t **blkmapp, blkent_t **entp, + blkent_t *newent); +static xfs_fileoff_t blkmap_next_off(blkmap_t *blkmap, xfs_fileoff_t o, + int *t); +static void blkmap_set_blk(blkmap_t **blkmapp, xfs_fileoff_t o, + xfs_fsblock_t b); +static void blkmap_set_ext(blkmap_t **blkmapp, xfs_fileoff_t o, + xfs_fsblock_t b, xfs_extlen_t c); +static void blkmap_shrink(blkmap_t *blkmap, blkent_t **entp); +static int blockfree_f(int argc, char **argv); +static int blockget_f(int argc, char **argv); +#ifdef DEBUG +static int blocktrash_f(int argc, char **argv); +#endif +static int blockuse_f(int argc, char **argv); +static int check_blist(xfs_fsblock_t bno); +static void check_dbmap(xfs_agnumber_t agno, xfs_agblock_t agbno, + xfs_extlen_t len, dbm_t type); +static int check_inomap(xfs_agnumber_t agno, xfs_agblock_t agbno, + xfs_extlen_t len, xfs_ino_t c_ino); +static void check_linkcounts(xfs_agnumber_t agno); +static int check_range(xfs_agnumber_t agno, xfs_agblock_t agbno, + xfs_extlen_t len); +static void check_rdbmap(xfs_drfsbno_t bno, xfs_extlen_t len, + dbm_t type); +static int check_rinomap(xfs_drfsbno_t bno, xfs_extlen_t len, + xfs_ino_t c_ino); +static void check_rootdir(void); +static int check_rrange(xfs_drfsbno_t bno, xfs_extlen_t len); +static void check_set_dbmap(xfs_agnumber_t agno, + xfs_agblock_t agbno, xfs_extlen_t len, + dbm_t type1, dbm_t type2, + xfs_agnumber_t c_agno, + xfs_agblock_t c_agbno); +static void check_set_rdbmap(xfs_drfsbno_t bno, xfs_extlen_t len, + dbm_t type1, dbm_t type2); +static void check_summary(void); +static void checknot_dbmap(xfs_agnumber_t agno, xfs_agblock_t agbno, + xfs_extlen_t len, int typemask); +static void checknot_rdbmap(xfs_drfsbno_t bno, xfs_extlen_t len, + int typemask); +static void dir_hash_add(xfs_dahash_t hash, + xfs_dir2_dataptr_t addr); +static void dir_hash_check(inodata_t *id, int v); +static void dir_hash_done(void); +static void dir_hash_init(void); +static int dir_hash_see(xfs_dahash_t hash, + xfs_dir2_dataptr_t addr); +static inodata_t *find_inode(xfs_ino_t ino, int add); +static void free_inodata(xfs_agnumber_t agno); +static int init(int argc, char **argv); +static char *inode_name(xfs_ino_t ino, inodata_t **ipp); +static int ncheck_f(int argc, char **argv); +static char *prepend_path(char *oldpath, char *parent); +static xfs_ino_t process_block_dir_v2(blkmap_t *blkmap, int *dot, + int *dotdot, inodata_t *id); +static void process_bmbt_reclist(xfs_bmbt_rec_32_t *rp, int numrecs, + dbm_t type, inodata_t *id, + xfs_drfsbno_t *tot, + blkmap_t **blkmapp); +static void process_btinode(inodata_t *id, xfs_dinode_t *dip, + dbm_t type, xfs_drfsbno_t *totd, + xfs_drfsbno_t *toti, xfs_extnum_t *nex, + blkmap_t **blkmapp, int whichfork); +static xfs_ino_t process_data_dir_v2(int *dot, int *dotdot, + inodata_t *id, int v, + xfs_dablk_t dabno, + freetab_t **freetabp); +static xfs_dir2_data_free_t + *process_data_dir_v2_freefind(xfs_dir2_data_t *data, + xfs_dir2_data_unused_t *dup); +static void process_dir(xfs_dinode_t *dip, blkmap_t *blkmap, + inodata_t *id); +static int process_dir_v1(xfs_dinode_t *dip, blkmap_t *blkmap, + int *dot, int *dotdot, inodata_t *id, + xfs_ino_t *parent); +static int process_dir_v2(xfs_dinode_t *dip, blkmap_t *blkmap, + int *dot, int *dotdot, inodata_t *id, + xfs_ino_t *parent); +static void process_exinode(inodata_t *id, xfs_dinode_t *dip, + dbm_t type, xfs_drfsbno_t *totd, + xfs_drfsbno_t *toti, xfs_extnum_t *nex, + blkmap_t **blkmapp, int whichfork); +static void process_inode(xfs_agf_t *agf, xfs_agino_t agino, + xfs_dinode_t *dip, int isfree); +static void process_lclinode(inodata_t *id, xfs_dinode_t *dip, + dbm_t type, xfs_drfsbno_t *totd, + xfs_drfsbno_t *toti, xfs_extnum_t *nex, + blkmap_t **blkmapp, int whichfork); +static xfs_ino_t process_leaf_dir_v1(blkmap_t *blkmap, int *dot, + int *dotdot, inodata_t *id); +static xfs_ino_t process_leaf_dir_v1_int(int *dot, int *dotdot, + inodata_t *id); +static xfs_ino_t process_leaf_node_dir_v2(blkmap_t *blkmap, int *dot, + int *dotdot, inodata_t *id, + xfs_fsize_t dirsize); +static void process_leaf_node_dir_v2_free(inodata_t *id, int v, + xfs_dablk_t dbno, + freetab_t *freetab); +static void process_leaf_node_dir_v2_int(inodata_t *id, int v, + xfs_dablk_t dbno, + freetab_t *freetab); +static xfs_ino_t process_node_dir_v1(blkmap_t *blkmap, int *dot, + int *dotdot, inodata_t *id); +static void process_quota(int isproj, inodata_t *id, + blkmap_t *blkmap); +static void process_rtbitmap(blkmap_t *blkmap); +static void process_rtsummary(blkmap_t *blkmap); +static xfs_ino_t process_sf_dir_v2(xfs_dinode_t *dip, int *dot, + int *dotdot, inodata_t *id); +static xfs_ino_t process_shortform_dir_v1(xfs_dinode_t *dip, int *dot, + int *dotdot, inodata_t *id); +static void quota_add(xfs_dqid_t projid, xfs_dqid_t userid, + int dq, xfs_qcnt_t bc, xfs_qcnt_t ic, + xfs_qcnt_t rc); +static void quota_add1(qdata_t **qt, xfs_dqid_t id, int dq, + xfs_qcnt_t bc, xfs_qcnt_t ic, + xfs_qcnt_t rc); +static void quota_check(char *s, qdata_t **qt); +static void quota_init(void); +static void scan_ag(xfs_agnumber_t agno); +static void scan_freelist(xfs_agf_t *agf); +static void scan_lbtree(xfs_fsblock_t root, int nlevels, + scan_lbtree_f_t func, dbm_t type, + inodata_t *id, xfs_drfsbno_t *totd, + xfs_drfsbno_t *toti, xfs_extnum_t *nex, + blkmap_t **blkmapp, int isroot, + typnm_t btype); +static void scan_sbtree(xfs_agf_t *agf, xfs_agblock_t root, + int nlevels, int isroot, + scan_sbtree_f_t func, typnm_t btype); +static void scanfunc_bmap(xfs_btree_lblock_t *ablock, int level, + dbm_t type, xfs_fsblock_t bno, + inodata_t *id, xfs_drfsbno_t *totd, + xfs_drfsbno_t *toti, xfs_extnum_t *nex, + blkmap_t **blkmapp, int isroot, + typnm_t btype); +static void scanfunc_bno(xfs_btree_sblock_t *ablock, int level, + xfs_agf_t *agf, xfs_agblock_t bno, + int isroot); +static void scanfunc_cnt(xfs_btree_sblock_t *ablock, int level, + xfs_agf_t *agf, xfs_agblock_t bno, + int isroot); +static void scanfunc_ino(xfs_btree_sblock_t *ablock, int level, + xfs_agf_t *agf, xfs_agblock_t bno, + int isroot); +static void set_dbmap(xfs_agnumber_t agno, xfs_agblock_t agbno, + xfs_extlen_t len, dbm_t type, + xfs_agnumber_t c_agno, xfs_agblock_t c_agbno); +static void set_inomap(xfs_agnumber_t agno, xfs_agblock_t agbno, + xfs_extlen_t len, inodata_t *id); +static void set_rdbmap(xfs_drfsbno_t bno, xfs_extlen_t len, + dbm_t type); +static void set_rinomap(xfs_drfsbno_t bno, xfs_extlen_t len, + inodata_t *id); +static void setlink_inode(inodata_t *id, nlink_t nlink, int isdir, + int security); + +static const cmdinfo_t blockfree_cmd = + { "blockfree", NULL, blockfree_f, 0, 0, 0, + NULL, "free block usage information", NULL }; +static const cmdinfo_t blockget_cmd = + { "blockget", "check", blockget_f, 0, -1, 0, + "[-s|-v] [-n] [-b bno]... [-i ino] ...", + "get block usage and check consistency", NULL }; +#ifdef DEBUG +static const cmdinfo_t blocktrash_cmd = + { "blocktrash", NULL, blocktrash_f, 0, -1, 0, + "[-n count] [-x minlen] [-y maxlen] [-s seed] [-0123] [-t type] ...", + "trash randomly selected block(s)", NULL }; +#endif +static const cmdinfo_t blockuse_cmd = + { "blockuse", NULL, blockuse_f, 0, 3, 0, + "[-n] [-c blockcount]", + "print usage for current block(s)", NULL }; +static const cmdinfo_t ncheck_cmd = + { "ncheck", NULL, ncheck_f, 0, -1, 0, + "[-s] [-i ino] ...", + "print inode-name pairs", NULL }; + + +static void +add_blist( + xfs_fsblock_t bno) +{ + blist_size++; + blist = xrealloc(blist, blist_size * sizeof(bno)); + blist[blist_size - 1] = bno; +} + +static void +add_ilist( + xfs_ino_t ino) +{ + inodata_t *id; + + id = find_inode(ino, 1); + if (id == NULL) { + dbprintf("-i %lld bad inode number\n", ino); + return; + } + id->ilist = 1; +} + +static void +addlink_inode( + inodata_t *id) +{ + id->link_add++; + if (verbose || id->ilist) + dbprintf("inode %lld add link, now %u\n", id->ino, + id->link_add); +} + +static void +addname_inode( + inodata_t *id, + char *name, + int namelen) +{ + if (!nflag || id->name) + return; + id->name = xmalloc(namelen + 1); + memcpy(id->name, name, namelen); + id->name[namelen] = '\0'; +} + +static void +addparent_inode( + inodata_t *id, + xfs_ino_t parent) +{ + inodata_t *pid; + + pid = find_inode(parent, 1); + id->parent = pid; + if (verbose || id->ilist || (pid && pid->ilist)) + dbprintf("inode %lld parent %lld\n", id->ino, parent); +} + +static void +blkent_append( + blkent_t **entp, + xfs_fsblock_t b, + xfs_extlen_t c) +{ + blkent_t *ent; + int i; + + ent = *entp; + *entp = ent = xrealloc(ent, BLKENT_SIZE(c + ent->nblks)); + for (i = 0; i < c; i++) + ent->blks[ent->nblks + i] = b + i; + ent->nblks += c; +} + +static blkent_t * +blkent_new( + xfs_fileoff_t o, + xfs_fsblock_t b, + xfs_extlen_t c) +{ + blkent_t *ent; + int i; + + ent = xmalloc(BLKENT_SIZE(c)); + ent->nblks = c; + ent->startoff = o; + for (i = 0; i < c; i++) + ent->blks[i] = b + i; + return ent; +} + +static void +blkent_prepend( + blkent_t **entp, + xfs_fsblock_t b, + xfs_extlen_t c) +{ + int i; + blkent_t *newent; + blkent_t *oldent; + + oldent = *entp; + newent = xmalloc(BLKENT_SIZE(oldent->nblks + c)); + newent->nblks = oldent->nblks + c; + newent->startoff = oldent->startoff - c; + for (i = 0; i < c; i++) + newent->blks[i] = b + c; + for (; i < oldent->nblks + c; i++) + newent->blks[i] = oldent->blks[i - c]; + xfree(oldent); + *entp = newent; +} + +static blkmap_t * +blkmap_alloc( + xfs_extnum_t nex) +{ + blkmap_t *blkmap; + + if (nex < 1) + nex = 1; + blkmap = xmalloc(BLKMAP_SIZE(nex)); + blkmap->naents = nex; + blkmap->nents = 0; + return blkmap; +} + +static void +blkmap_free( + blkmap_t *blkmap) +{ + blkent_t **entp; + xfs_extnum_t i; + + for (i = 0, entp = blkmap->ents; i < blkmap->nents; i++, entp++) + xfree(*entp); + xfree(blkmap); +} + +static xfs_fsblock_t +blkmap_get( + blkmap_t *blkmap, + xfs_fileoff_t o) +{ + blkent_t *ent; + blkent_t **entp; + int i; + + for (i = 0, entp = blkmap->ents; i < blkmap->nents; i++, entp++) { + ent = *entp; + if (o >= ent->startoff && o < ent->startoff + ent->nblks) + return ent->blks[o - ent->startoff]; + } + return NULLFSBLOCK; +} + +static int +blkmap_getn( + blkmap_t *blkmap, + xfs_fileoff_t o, + int nb, + bmap_ext_t **bmpp) +{ + bmap_ext_t *bmp; + blkent_t *ent; + xfs_fileoff_t ento; + blkent_t **entp; + int i; + int nex; + + for (i = nex = 0, bmp = NULL, entp = blkmap->ents; + i < blkmap->nents; + i++, entp++) { + ent = *entp; + if (ent->startoff >= o + nb) + break; + if (ent->startoff + ent->nblks <= o) + continue; + for (ento = ent->startoff; + ento < ent->startoff + ent->nblks && ento < o + nb; + ento++) { + if (ento < o) + continue; + if (bmp && + bmp[nex - 1].startoff + bmp[nex - 1].blockcount == + ento && + bmp[nex - 1].startblock + bmp[nex - 1].blockcount == + ent->blks[ento - ent->startoff]) + bmp[nex - 1].blockcount++; + else { + bmp = realloc(bmp, ++nex * sizeof(*bmp)); + bmp[nex - 1].startoff = ento; + bmp[nex - 1].startblock = + ent->blks[ento - ent->startoff]; + bmp[nex - 1].blockcount = 1; + bmp[nex - 1].flag = 0; + } + } + } + *bmpp = bmp; + return nex; +} + +static void +blkmap_grow( + blkmap_t **blkmapp, + blkent_t **entp, + blkent_t *newent) +{ + blkmap_t *blkmap; + int i; + int idx; + + blkmap = *blkmapp; + idx = (int)(entp - blkmap->ents); + if (blkmap->naents == blkmap->nents) { + blkmap = xrealloc(blkmap, BLKMAP_SIZE(blkmap->nents + 1)); + *blkmapp = blkmap; + blkmap->naents++; + } + for (i = blkmap->nents; i > idx; i--) + blkmap->ents[i] = blkmap->ents[i - 1]; + blkmap->ents[idx] = newent; + blkmap->nents++; +} + +static xfs_fileoff_t +blkmap_last_off( + blkmap_t *blkmap) +{ + blkent_t *ent; + + if (!blkmap->nents) + return NULLFILEOFF; + ent = blkmap->ents[blkmap->nents - 1]; + return ent->startoff + ent->nblks; +} + +static xfs_fileoff_t +blkmap_next_off( + blkmap_t *blkmap, + xfs_fileoff_t o, + int *t) +{ + blkent_t *ent; + blkent_t **entp; + + if (!blkmap->nents) + return NULLFILEOFF; + if (o == NULLFILEOFF) { + *t = 0; + ent = blkmap->ents[0]; + return ent->startoff; + } + entp = &blkmap->ents[*t]; + ent = *entp; + if (o < ent->startoff + ent->nblks - 1) + return o + 1; + entp++; + if (entp >= &blkmap->ents[blkmap->nents]) + return NULLFILEOFF; + (*t)++; + ent = *entp; + return ent->startoff; +} + +static void +blkmap_set_blk( + blkmap_t **blkmapp, + xfs_fileoff_t o, + xfs_fsblock_t b) +{ + blkmap_t *blkmap; + blkent_t *ent; + blkent_t **entp; + blkent_t *nextent; + + blkmap = *blkmapp; + for (entp = blkmap->ents; entp < &blkmap->ents[blkmap->nents]; entp++) { + ent = *entp; + if (o < ent->startoff - 1) { + ent = blkent_new(o, b, 1); + blkmap_grow(blkmapp, entp, ent); + return; + } + if (o == ent->startoff - 1) { + blkent_prepend(entp, b, 1); + return; + } + if (o >= ent->startoff && o < ent->startoff + ent->nblks) { + ent->blks[o - ent->startoff] = b; + return; + } + if (o > ent->startoff + ent->nblks) + continue; + blkent_append(entp, b, 1); + if (entp == &blkmap->ents[blkmap->nents - 1]) + return; + ent = *entp; + nextent = entp[1]; + if (ent->startoff + ent->nblks < nextent->startoff) + return; + blkent_append(entp, nextent->blks[0], nextent->nblks); + blkmap_shrink(blkmap, &entp[1]); + return; + } + ent = blkent_new(o, b, 1); + blkmap_grow(blkmapp, entp, ent); +} + +static void +blkmap_set_ext( + blkmap_t **blkmapp, + xfs_fileoff_t o, + xfs_fsblock_t b, + xfs_extlen_t c) +{ + blkmap_t *blkmap; + blkent_t *ent; + blkent_t **entp; + xfs_extnum_t i; + + blkmap = *blkmapp; + if (!blkmap->nents) { + blkmap->ents[0] = blkent_new(o, b, c); + blkmap->nents = 1; + return; + } + entp = &blkmap->ents[blkmap->nents - 1]; + ent = *entp; + if (ent->startoff + ent->nblks == o) { + blkent_append(entp, b, c); + return; + } + if (ent->startoff + ent->nblks < o) { + ent = blkent_new(o, b, c); + blkmap_grow(blkmapp, &blkmap->ents[blkmap->nents], ent); + return; + } + for (i = 0; i < c; i++) + blkmap_set_blk(blkmapp, o + i, b + i); +} + +static void +blkmap_shrink( + blkmap_t *blkmap, + blkent_t **entp) +{ + int i; + int idx; + + xfree(*entp); + idx = (int)(entp - blkmap->ents); + for (i = idx + 1; i < blkmap->nents; i++) + blkmap->ents[i] = blkmap->ents[i - 1]; + blkmap->nents--; +} + +/* ARGSUSED */ +static int +blockfree_f( + int argc, + char **argv) +{ + xfs_agnumber_t c; + int rt; + + if (!dbmap) { + dbprintf("block usage information not allocated\n"); + return 0; + } + rt = mp->m_sb.sb_rextents != 0; + for (c = 0; c < mp->m_sb.sb_agcount; c++) { + xfree(dbmap[c]); + xfree(inomap[c]); + free_inodata(c); + } + if (rt) { + xfree(dbmap[c]); + xfree(inomap[c]); + xfree(sumcompute); + xfree(sumfile); + sumcompute = sumfile = NULL; + } + xfree(dbmap); + xfree(inomap); + xfree(inodata); + dbmap = NULL; + inomap = NULL; + inodata = NULL; + return 0; +} + +/* + * Check consistency of xfs filesystem contents. + */ +static int +blockget_f( + int argc, + char **argv) +{ + xfs_agnumber_t agno; + int oldprefix; + int sbyell; + + if (dbmap) { + dbprintf("already have block usage information\n"); + return 0; + } + if (!init(argc, argv)) + return 0; + oldprefix = dbprefix; + dbprefix |= pflag; + for (agno = 0, sbyell = 0; agno < mp->m_sb.sb_agcount; agno++) { + scan_ag(agno); + if (sbver_err > 4 && !sbyell && sbver_err >= agno) { + sbyell = 1; + dbprintf("WARNING: this may be a newer XFS " + "filesystem.\n"); + } + } + if (blist_size) { + xfree(blist); + blist = NULL; + blist_size = 0; + } + if (serious_error) { + exitcode = 2; + dbprefix = oldprefix; + return 0; + } + check_rootdir(); + for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { + /* + * Check that there are no blocks either + * a) unaccounted for or + * b) bno-free but not cnt-free + */ + checknot_dbmap(agno, 0, mp->m_sb.sb_agblocks, + (1 << DBM_UNKNOWN) | (1 << DBM_FREE1)); + check_linkcounts(agno); + } + if (mp->m_sb.sb_rblocks) { + checknot_rdbmap(0, + (xfs_extlen_t)(mp->m_sb.sb_rextents * + mp->m_sb.sb_rextsize), + 1 << DBM_UNKNOWN); + check_summary(); + } + if (mp->m_sb.sb_icount != icount) { + if (!sflag) + dbprintf("sb_icount %lld, counted %lld\n", + mp->m_sb.sb_icount, icount); + error++; + } + if (mp->m_sb.sb_ifree != ifree) { + if (!sflag) + dbprintf("sb_ifree %lld, counted %lld\n", + mp->m_sb.sb_ifree, ifree); + error++; + } + if (mp->m_sb.sb_fdblocks != fdblocks) { + if (!sflag) + dbprintf("sb_fdblocks %lld, counted %lld\n", + mp->m_sb.sb_fdblocks, fdblocks); + error++; + } + if (mp->m_sb.sb_frextents != frextents) { + if (!sflag) + dbprintf("sb_frextents %lld, counted %lld\n", + mp->m_sb.sb_frextents, frextents); + error++; + } + if ((sbversion & XFS_SB_VERSION_ATTRBIT) && + !XFS_SB_VERSION_HASATTR(&mp->m_sb)) { + if (!sflag) + dbprintf("sb versionnum missing attr bit %x\n", + XFS_SB_VERSION_ATTRBIT); + error++; + } + if ((sbversion & XFS_SB_VERSION_NLINKBIT) && + !XFS_SB_VERSION_HASNLINK(&mp->m_sb)) { + if (!sflag) + dbprintf("sb versionnum missing nlink bit %x\n", + XFS_SB_VERSION_NLINKBIT); + error++; + } + if ((sbversion & XFS_SB_VERSION_QUOTABIT) && + !XFS_SB_VERSION_HASQUOTA(&mp->m_sb)) { + if (!sflag) + dbprintf("sb versionnum missing quota bit %x\n", + XFS_SB_VERSION_QUOTABIT); + error++; + } + if (!(sbversion & XFS_SB_VERSION_ALIGNBIT) && + XFS_SB_VERSION_HASALIGN(&mp->m_sb)) { + if (!sflag) + dbprintf("sb versionnum extra align bit %x\n", + XFS_SB_VERSION_ALIGNBIT); + error++; + } + if (qudo) + quota_check("user", qudata); + if (qpdo) + quota_check("project", qpdata); + if (sbver_err > mp->m_sb.sb_agcount / 2) + dbprintf("WARNING: this may be a newer XFS filesystem.\n"); + if (error) + exitcode = 3; + dbprefix = oldprefix; + return 0; +} + +#ifdef DEBUG +typedef struct ltab { + int min; + int max; +} ltab_t; + +static void +blocktrash_b( + xfs_agnumber_t agno, + xfs_agblock_t agbno, + dbm_t type, + ltab_t *ltabp, + int mode) +{ + int bit; + int bitno; + char *buf; + int byte; + int len; + int mask; + int newbit; + int offset; + static char *modestr[] = { + "zeroed", "set", "flipped", "randomized" + }; + + len = (int)((random() % (ltabp->max - ltabp->min + 1)) + ltabp->min); + offset = (int)(random() % (int)(mp->m_sb.sb_blocksize * NBBY)); + newbit = 0; + push_cur(); + set_cur(&typtab[DBM_UNKNOWN], + XFS_AGB_TO_DADDR(mp, agno, agbno), blkbb, DB_RING_IGN, NULL); + if ((buf = iocur_top->data) == NULL) { + dbprintf("can't read block %u/%u for trashing\n", agno, agbno); + pop_cur(); + return; + } + for (bitno = 0; bitno < len; bitno++) { + bit = (offset + bitno) % (mp->m_sb.sb_blocksize * NBBY); + byte = bit / NBBY; + bit %= NBBY; + mask = 1 << bit; + switch (mode) { + case 0: + newbit = 0; + break; + case 1: + newbit = 1; + break; + case 2: + newbit = (buf[byte] & mask) == 0; + break; + case 3: + newbit = (int)random() & 1; + break; + } + if (newbit) + buf[byte] |= mask; + else + buf[byte] &= ~mask; + } + write_cur(); + pop_cur(); + printf("blocktrash: %u/%u %s block %d bit%s starting %d:%d %s\n", + agno, agbno, typename[type], len, len == 1 ? "" : "s", + offset / NBBY, offset % NBBY, modestr[mode]); +} + +int +blocktrash_f( + int argc, + char **argv) +{ + xfs_agblock_t agbno; + xfs_agnumber_t agno; + xfs_drfsbno_t bi; + xfs_drfsbno_t blocks; + int c; + int count; + int done; + int goodmask; + int i; + ltab_t *lentab; + int lentablen; + int max; + int min; + int mode; + struct timeval now; + char *p; + xfs_drfsbno_t randb; + uint seed; + int sopt; + int tmask; + + if (!dbmap) { + dbprintf("must run blockget first\n"); + return 0; + } + optind = 0; + count = 1; + min = 1; + max = 128 * NBBY; + mode = 2; + gettimeofday(&now, NULL); + seed = (unsigned int)(now.tv_sec ^ now.tv_usec); + sopt = 0; + tmask = 0; + goodmask = (1 << DBM_AGF) | + (1 << DBM_AGFL) | + (1 << DBM_AGI) | + (1 << DBM_ATTR) | + (1 << DBM_BTBMAPA) | + (1 << DBM_BTBMAPD) | + (1 << DBM_BTBNO) | + (1 << DBM_BTCNT) | + (1 << DBM_BTINO) | + (1 << DBM_DIR) | + (1 << DBM_INODE) | + (1 << DBM_QUOTA) | + (1 << DBM_RTBITMAP) | + (1 << DBM_RTSUM) | + (1 << DBM_SB); + while ((c = getopt(argc, argv, "0123n:s:t:x:y:")) != EOF) { + switch (c) { + case '0': + mode = 0; + break; + case '1': + mode = 1; + break; + case '2': + mode = 2; + break; + case '3': + mode = 3; + break; + case 'n': + count = (int)strtol(optarg, &p, 0); + if (*p != '\0' || count <= 0) { + dbprintf("bad blocktrash count %s\n", optarg); + return 0; + } + break; + case 's': + seed = (uint)strtoul(optarg, &p, 0); + sopt = 1; + break; + case 't': + for (i = 0; typename[i]; i++) { + if (strcmp(typename[i], optarg) == 0) + break; + } + if (!typename[i] || (((1 << i) & goodmask) == 0)) { + dbprintf("bad blocktrash type %s\n", optarg); + return 0; + } + tmask |= 1 << i; + break; + case 'x': + min = (int)strtol(optarg, &p, 0); + if (*p != '\0' || min <= 0 || + min > mp->m_sb.sb_blocksize * NBBY) { + dbprintf("bad blocktrash min %s\n", optarg); + return 0; + } + break; + case 'y': + max = (int)strtol(optarg, &p, 0); + if (*p != '\0' || max <= 0 || + max > mp->m_sb.sb_blocksize * NBBY) { + dbprintf("bad blocktrash max %s\n", optarg); + return 0; + } + break; + default: + dbprintf("bad option for blocktrash command\n"); + return 0; + } + } + if (min > max) { + dbprintf("bad min/max for blocktrash command\n"); + return 0; + } + if (tmask == 0) + tmask = goodmask; + lentab = xmalloc(sizeof(ltab_t)); + lentab->min = lentab->max = min; + lentablen = 1; + for (i = min + 1; i <= max; i++) { + if ((i & (i - 1)) == 0) { + lentab = xrealloc(lentab, + sizeof(ltab_t) * (lentablen + 1)); + lentab[lentablen].min = lentab[lentablen].max = i; + lentablen++; + } else + lentab[lentablen - 1].max = i; + } + for (blocks = 0, agno = 0; agno < mp->m_sb.sb_agcount; agno++) { + for (agbno = 0, p = dbmap[agno]; + agbno < mp->m_sb.sb_agblocks; + agbno++, p++) { + if ((1 << *p) & tmask) + blocks++; + } + } + if (blocks == 0) { + dbprintf("blocktrash: no matching blocks\n"); + return 0; + } + if (!sopt) + dbprintf("blocktrash: seed %u\n", seed); + srandom(seed); + for (i = 0; i < count; i++) { + randb = (xfs_drfsbno_t)((((__int64_t)random() << 32) | + random()) % blocks); + for (bi = 0, agno = 0, done = 0; + !done && agno < mp->m_sb.sb_agcount; + agno++) { + for (agbno = 0, p = dbmap[agno]; + agbno < mp->m_sb.sb_agblocks; + agbno++, p++) { + if (!((1 << *p) & tmask)) + continue; + if (bi++ < randb) + continue; + blocktrash_b(agno, agbno, (dbm_t)*p, + &lentab[random() % lentablen], mode); + done = 1; + break; + } + } + } + xfree(lentab); + return 0; +} +#endif + +int +blockuse_f( + int argc, + char **argv) +{ + xfs_agblock_t agbno; + xfs_agnumber_t agno; + int c; + int count; + xfs_agblock_t end; + xfs_fsblock_t fsb; + inodata_t *i; + char *p; + int shownames; + + if (!dbmap) { + dbprintf("must run blockget first\n"); + return 0; + } + optind = 0; + count = 1; + shownames = 0; + fsb = XFS_DADDR_TO_FSB(mp, iocur_top->off >> BBSHIFT); + agno = XFS_FSB_TO_AGNO(mp, fsb); + end = agbno = XFS_FSB_TO_AGBNO(mp, fsb); + while ((c = getopt(argc, argv, "c:n")) != EOF) { + switch (c) { + case 'c': + count = (int)strtol(optarg, &p, 0); + end = agbno + count - 1; + if (*p != '\0' || count <= 0 || + end >= mp->m_sb.sb_agblocks) { + dbprintf("bad blockuse count %s\n", optarg); + return 0; + } + break; + case 'n': + if (!nflag) { + dbprintf("must run blockget -n first\n"); + return 0; + } + shownames = 1; + break; + default: + dbprintf("bad option for blockuse command\n"); + return 0; + } + } + while (agbno <= end) { + p = &dbmap[agno][agbno]; + i = inomap[agno][agbno]; + dbprintf("block %llu (%u/%u) type %s", + (xfs_dfsbno_t)XFS_AGB_TO_FSB(mp, agno, agbno), + agno, agbno, typename[(dbm_t)*p]); + if (i) { + dbprintf(" inode %lld", i->ino); + if (shownames && (p = inode_name(i->ino, NULL))) { + dbprintf(" %s", p); + xfree(p); + } + } + dbprintf("\n"); + agbno++; + } + return 0; +} + +static int +check_blist( + xfs_fsblock_t bno) +{ + int i; + + for (i = 0; i < blist_size; i++) { + if (blist[i] == bno) + return 1; + } + return 0; +} + +static void +check_dbmap( + xfs_agnumber_t agno, + xfs_agblock_t agbno, + xfs_extlen_t len, + dbm_t type) +{ + xfs_extlen_t i; + char *p; + + for (i = 0, p = &dbmap[agno][agbno]; i < len; i++, p++) { + if ((dbm_t)*p != type) { + if (!sflag || CHECK_BLISTA(agno, agbno + i)) + dbprintf("block %u/%u expected type %s got " + "%s\n", + agno, agbno + i, typename[type], + typename[(dbm_t)*p]); + error++; + } + } +} + +void +check_init(void) +{ + add_command(&blockfree_cmd); + add_command(&blockget_cmd); +#ifdef DEBUG + add_command(&blocktrash_cmd); +#endif + add_command(&blockuse_cmd); + add_command(&ncheck_cmd); +} + +static int +check_inomap( + xfs_agnumber_t agno, + xfs_agblock_t agbno, + xfs_extlen_t len, + xfs_ino_t c_ino) +{ + xfs_extlen_t i; + inodata_t **idp; + int rval; + + if (!check_range(agno, agbno, len)) { + dbprintf("blocks %u/%u..%u claimed by inode %lld\n", + agno, agbno, agbno + len - 1, c_ino); + return 0; + } + for (i = 0, rval = 1, idp = &inomap[agno][agbno]; i < len; i++, idp++) { + if (*idp) { + if (!sflag || (*idp)->ilist || + CHECK_BLISTA(agno, agbno + i)) + dbprintf("block %u/%u claimed by inode %lld, " + "previous inum %lld\n", + agno, agbno + i, c_ino, (*idp)->ino); + error++; + rval = 0; + } + } + return rval; +} + +static void +check_linkcounts( + xfs_agnumber_t agno) +{ + inodata_t *ep; + inodata_t **ht; + int idx; + char *path; + + ht = inodata[agno]; + for (idx = 0; idx < inodata_hash_size; ht++, idx++) { + ep = *ht; + while (ep) { + if (ep->link_set != ep->link_add || ep->link_set == 0) { + path = inode_name(ep->ino, NULL); + if (!path && ep->link_add) + path = xstrdup("?"); + if (!sflag || ep->ilist) { + if (ep->link_add) + dbprintf("link count mismatch " + "for inode %lld (name " + "%s), nlink %d, " + "counted %d\n", + ep->ino, path, + ep->link_set, + ep->link_add); + else if (ep->link_set) + dbprintf("disconnected inode " + "%lld, nlink %d\n", + ep->ino, ep->link_set); + else + dbprintf("allocated inode %lld " + "has 0 link count\n", + ep->ino); + } + if (path) + xfree(path); + error++; + } else if (verbose || ep->ilist) { + path = inode_name(ep->ino, NULL); + if (path) { + dbprintf("inode %lld name %s\n", + ep->ino, path); + xfree(path); + } + } + ep = ep->next; + } + } + +} + +static int +check_range( + xfs_agnumber_t agno, + xfs_agblock_t agbno, + xfs_extlen_t len) +{ + xfs_extlen_t i; + + if (agno >= mp->m_sb.sb_agcount || + agbno + len - 1 >= mp->m_sb.sb_agblocks) { + for (i = 0; i < len; i++) { + if (!sflag || CHECK_BLISTA(agno, agbno + i)) + dbprintf("block %u/%u out of range\n", + agno, agbno + i); + } + error++; + return 0; + } + return 1; +} + +static void +check_rdbmap( + xfs_drfsbno_t bno, + xfs_extlen_t len, + dbm_t type) +{ + xfs_extlen_t i; + char *p; + + for (i = 0, p = &dbmap[mp->m_sb.sb_agcount][bno]; i < len; i++, p++) { + if ((dbm_t)*p != type) { + if (!sflag || CHECK_BLIST(bno + i)) + dbprintf("rtblock %llu expected type %s got " + "%s\n", + bno + i, typename[type], + typename[(dbm_t)*p]); + error++; + } + } +} + +static int +check_rinomap( + xfs_drfsbno_t bno, + xfs_extlen_t len, + xfs_ino_t c_ino) +{ + xfs_extlen_t i; + inodata_t **idp; + int rval; + + if (!check_rrange(bno, len)) { + dbprintf("rtblocks %llu..%llu claimed by inode %lld\n", + bno, bno + len - 1, c_ino); + return 0; + } + for (i = 0, rval = 1, idp = &inomap[mp->m_sb.sb_agcount][bno]; + i < len; + i++, idp++) { + if (*idp) { + if (!sflag || (*idp)->ilist || CHECK_BLIST(bno + i)) + dbprintf("rtblock %llu claimed by inode %lld, " + "previous inum %lld\n", + bno + i, c_ino, (*idp)->ino); + error++; + rval = 0; + } + } + return rval; +} + +static void +check_rootdir(void) +{ + inodata_t *id; + + id = find_inode(mp->m_sb.sb_rootino, 0); + if (id == NULL) { + if (!sflag) + dbprintf("root inode %lld is missing\n", + mp->m_sb.sb_rootino); + error++; + } else if (!id->isdir) { + if (!sflag || id->ilist) + dbprintf("root inode %lld is not a directory\n", + mp->m_sb.sb_rootino); + error++; + } +} + +static int +check_rrange( + xfs_drfsbno_t bno, + xfs_extlen_t len) +{ + xfs_extlen_t i; + + if (bno + len - 1 >= mp->m_sb.sb_rblocks) { + for (i = 0; i < len; i++) { + if (!sflag || CHECK_BLIST(bno + i)) + dbprintf("rtblock %llu out of range\n", + bno + i); + } + error++; + return 0; + } + return 1; +} + +static void +check_set_dbmap( + xfs_agnumber_t agno, + xfs_agblock_t agbno, + xfs_extlen_t len, + dbm_t type1, + dbm_t type2, + xfs_agnumber_t c_agno, + xfs_agblock_t c_agbno) +{ + xfs_extlen_t i; + int mayprint; + char *p; + + if (!check_range(agno, agbno, len)) { + dbprintf("blocks %u/%u..%u claimed by block %u/%u\n", agno, + agbno, agbno + len - 1, c_agno, c_agbno); + return; + } + check_dbmap(agno, agbno, len, type1); + mayprint = verbose | blist_size; + for (i = 0, p = &dbmap[agno][agbno]; i < len; i++, p++) { + *p = (char)type2; + if (mayprint && (verbose || CHECK_BLISTA(agno, agbno + i))) + dbprintf("setting block %u/%u to %s\n", agno, agbno + i, + typename[type2]); + } +} + +static void +check_set_rdbmap( + xfs_drfsbno_t bno, + xfs_extlen_t len, + dbm_t type1, + dbm_t type2) +{ + xfs_extlen_t i; + int mayprint; + char *p; + + if (!check_rrange(bno, len)) + return; + check_rdbmap(bno, len, type1); + mayprint = verbose | blist_size; + for (i = 0, p = &dbmap[mp->m_sb.sb_agcount][bno]; i < len; i++, p++) { + *p = (char)type2; + if (mayprint && (verbose || CHECK_BLIST(bno + i))) + dbprintf("setting rtblock %llu to %s\n", + bno + i, typename[type2]); + } +} + +static void +check_summary(void) +{ + xfs_drfsbno_t bno; + xfs_suminfo_t *csp; + xfs_suminfo_t *fsp; + int log; + + csp = sumcompute; + fsp = sumfile; + for (log = 0; log < mp->m_rsumlevels; log++) { + for (bno = 0; + bno < mp->m_sb.sb_rbmblocks; + bno++, csp++, fsp++) { + if (*csp != *fsp) { + if (!sflag) + dbprintf("rt summary mismatch, size %d " + "block %llu, file: %d, " + "computed: %d\n", + log, bno, *fsp, *csp); + error++; + } + } + } +} + +static void +checknot_dbmap( + xfs_agnumber_t agno, + xfs_agblock_t agbno, + xfs_extlen_t len, + int typemask) +{ + xfs_extlen_t i; + char *p; + + if (!check_range(agno, agbno, len)) + return; + for (i = 0, p = &dbmap[agno][agbno]; i < len; i++, p++) { + if ((1 << *p) & typemask) { + if (!sflag || CHECK_BLISTA(agno, agbno + i)) + dbprintf("block %u/%u type %s not expected\n", + agno, agbno + i, typename[(dbm_t)*p]); + error++; + } + } +} + +static void +checknot_rdbmap( + xfs_drfsbno_t bno, + xfs_extlen_t len, + int typemask) +{ + xfs_extlen_t i; + char *p; + + if (!check_rrange(bno, len)) + return; + for (i = 0, p = &dbmap[mp->m_sb.sb_agcount][bno]; i < len; i++, p++) { + if ((1 << *p) & typemask) { + if (!sflag || CHECK_BLIST(bno + i)) + dbprintf("rtblock %llu type %s not expected\n", + bno + i, typename[(dbm_t)*p]); + error++; + } + } +} + +static void +dir_hash_add( + xfs_dahash_t hash, + xfs_dir2_dataptr_t addr) +{ + int i; + dirhash_t *p; + + i = DIR_HASH_FUNC(hash, addr); + p = malloc(sizeof(*p)); + p->next = dirhash[i]; + dirhash[i] = p; + p->entry.hashval = hash; + p->entry.address = addr; + p->seen = 0; +} + +static void +dir_hash_check( + inodata_t *id, + int v) +{ + int i; + dirhash_t *p; + + for (i = 0; i < DIR_HASH_SIZE; i++) { + for (p = dirhash[i]; p; p = p->next) { + if (p->seen) + continue; + if (!sflag || id->ilist || v) + dbprintf("dir ino %lld missing leaf entry for " + "%x/%x\n", + id->ino, p->entry.hashval, + p->entry.address); + error++; + } + } +} + +static void +dir_hash_done(void) +{ + int i; + dirhash_t *n; + dirhash_t *p; + + for (i = 0; i < DIR_HASH_SIZE; i++) { + for (p = dirhash[i]; p; p = n) { + n = p->next; + free(p); + } + dirhash[i] = NULL; + } +} + +static void +dir_hash_init(void) +{ + if (!dirhash) + dirhash = calloc(DIR_HASH_SIZE, sizeof(*dirhash)); +} + +static int +dir_hash_see( + xfs_dahash_t hash, + xfs_dir2_dataptr_t addr) +{ + int i; + dirhash_t *p; + + i = DIR_HASH_FUNC(hash, addr); + for (p = dirhash[i]; p; p = p->next) { + if (p->entry.hashval == hash && p->entry.address == addr) { + if (p->seen) + return 1; + p->seen = 1; + return 0; + } + } + return -1; +} + +static inodata_t * +find_inode( + xfs_ino_t ino, + int add) +{ + xfs_agino_t agino; + xfs_agnumber_t agno; + inodata_t *ent; + inodata_t **htab; + xfs_agino_t ih; + + agno = XFS_INO_TO_AGNO(mp, ino); + agino = XFS_INO_TO_AGINO(mp, ino); + if (agno >= mp->m_sb.sb_agcount || + XFS_AGINO_TO_INO(mp, agno, agino) != ino) + return NULL; + htab = inodata[agno]; + ih = agino % inodata_hash_size; + ent = htab[ih]; + while (ent) { + if (ent->ino == ino) + return ent; + ent = ent->next; + } + if (!add) + return NULL; + ent = xcalloc(1, sizeof(*ent)); + ent->ino = ino; + ent->next = htab[ih]; + htab[ih] = ent; + return ent; +} + +static void +free_inodata( + xfs_agnumber_t agno) +{ + inodata_t *hp; + inodata_t **ht; + int i; + inodata_t *next; + + ht = inodata[agno]; + for (i = 0; i < inodata_hash_size; i++) { + hp = ht[i]; + while (hp) { + next = hp->next; + if (hp->name) + xfree(hp->name); + xfree(hp); + hp = next; + } + } + xfree(ht); +} + +static int +init( + int argc, + char **argv) +{ + xfs_fsblock_t bno; + int c; + xfs_ino_t ino; + int rt; + + if (mp->m_sb.sb_magicnum != XFS_SB_MAGIC) { + dbprintf("bad superblock magic number %x, giving up\n", + mp->m_sb.sb_magicnum); + return 0; + } + rt = mp->m_sb.sb_rextents != 0; + dbmap = xmalloc((mp->m_sb.sb_agcount + rt) * sizeof(*dbmap)); + inomap = xmalloc((mp->m_sb.sb_agcount + rt) * sizeof(*inomap)); + inodata = xmalloc(mp->m_sb.sb_agcount * sizeof(*inodata)); + inodata_hash_size = + (int)MAX(MIN(mp->m_sb.sb_icount / + (INODATA_AVG_HASH_LENGTH * mp->m_sb.sb_agcount), + MAX_INODATA_HASH_SIZE), + MIN_INODATA_HASH_SIZE); + for (c = 0; c < mp->m_sb.sb_agcount; c++) { + dbmap[c] = xcalloc(mp->m_sb.sb_agblocks, sizeof(**dbmap)); + inomap[c] = xcalloc(mp->m_sb.sb_agblocks, sizeof(**inomap)); + inodata[c] = xcalloc(inodata_hash_size, sizeof(**inodata)); + } + if (rt) { + dbmap[c] = xcalloc(mp->m_sb.sb_rblocks, sizeof(**dbmap)); + inomap[c] = xcalloc(mp->m_sb.sb_rblocks, sizeof(**inomap)); + sumfile = xcalloc(mp->m_rsumsize, 1); + sumcompute = xcalloc(mp->m_rsumsize, 1); + } + nflag = sflag = verbose = optind = 0; + while ((c = getopt(argc, argv, "b:i:npsv")) != EOF) { + switch (c) { + case 'b': + bno = atoll(optarg); + add_blist(bno); + break; + case 'i': + ino = atoll(optarg); + add_ilist(ino); + break; + case 'n': + nflag = 1; + break; + case 'p': + pflag = 1; + break; + case 's': + sflag = 1; + break; + case 'v': + verbose = 1; + break; + default: + dbprintf("bad option for blockget command\n"); + return 0; + } + } + error = sbver_err = serious_error = 0; + fdblocks = frextents = icount = ifree = 0; + sbversion = XFS_SB_VERSION_4; + if (mp->m_sb.sb_inoalignmt) + sbversion |= XFS_SB_VERSION_ALIGNBIT; + if ((mp->m_sb.sb_uquotino && mp->m_sb.sb_uquotino != NULLFSINO) || + (mp->m_sb.sb_pquotino && mp->m_sb.sb_pquotino != NULLFSINO)) + sbversion |= XFS_SB_VERSION_QUOTABIT; + quota_init(); + return 1; +} + +static char * +inode_name( + xfs_ino_t ino, + inodata_t **ipp) +{ + inodata_t *id; + char *npath; + char *path; + + id = find_inode(ino, 0); + if (ipp) + *ipp = id; + if (id == NULL) + return NULL; + if (id->name == NULL) + return NULL; + path = xstrdup(id->name); + while (id->parent) { + id = id->parent; + if (id->name == NULL) + break; + npath = prepend_path(path, id->name); + xfree(path); + path = npath; + } + return path; +} + +static int +ncheck_f( + int argc, + char **argv) +{ + xfs_agnumber_t agno; + int c; + inodata_t *hp; + inodata_t **ht; + int i; + inodata_t *id; + xfs_ino_t *ilist; + int ilist_size; + xfs_ino_t *ilp; + xfs_ino_t ino; + char *p; + int security; + + if (!inodata || !nflag) { + dbprintf("must run blockget -n first\n"); + return 0; + } + security = optind = ilist_size = 0; + ilist = NULL; + while ((c = getopt(argc, argv, "i:s")) != EOF) { + switch (c) { + case 'i': + ino = atoll(optarg); + ilist = xrealloc(ilist, (ilist_size + 1) * + sizeof(*ilist)); + ilist[ilist_size++] = ino; + break; + case 's': + security = 1; + break; + default: + dbprintf("bad option -%c for ncheck command\n", c); + return 0; + } + } + if (ilist) { + for (ilp = ilist; ilp < &ilist[ilist_size]; ilp++) { + ino = *ilp; + if (p = inode_name(ino, &hp)) { + dbprintf("%11llu %s", ino, p); + if (hp->isdir) + dbprintf("/."); + dbprintf("\n"); + xfree(p); + } + } + xfree(ilist); + return 0; + } + for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { + ht = inodata[agno]; + for (i = 0; i < inodata_hash_size; i++) { + hp = ht[i]; + for (hp = ht[i]; hp; hp = hp->next) { + ino = XFS_AGINO_TO_INO(mp, agno, hp->ino); + p = inode_name(ino, &id); + if (!p || !id) + continue; + if (!security || id->security) { + dbprintf("%11llu %s", ino, p); + if (hp->isdir) + dbprintf("/."); + dbprintf("\n"); + } + xfree(p); + } + } + } + return 0; +} + +static char * +prepend_path( + char *oldpath, + char *parent) +{ + int len; + char *path; + + len = (int)(strlen(oldpath) + strlen(parent) + 2); + path = xmalloc(len); + sprintf(path, "%s/%s", parent, oldpath); + return path; +} + +static xfs_ino_t +process_block_dir_v2( + blkmap_t *blkmap, + int *dot, + int *dotdot, + inodata_t *id) +{ + xfs_fsblock_t b; + bbmap_t bbmap; + bmap_ext_t *bmp; + int nex; + xfs_ino_t parent; + int v; + int x; + + nex = blkmap_getn(blkmap, 0, mp->m_dirblkfsbs, &bmp); + v = id->ilist || verbose; + if (nex == 0) { + if (!sflag || v) + dbprintf("block 0 for directory inode %lld is " + "missing\n", + id->ino); + error++; + return 0; + } + push_cur(); + if (nex > 1) + make_bbmap(&bbmap, nex, bmp); + set_cur(&typtab[TYP_DIR], XFS_FSB_TO_DADDR(mp, bmp->startblock), + mp->m_dirblkfsbs * blkbb, DB_RING_IGN, nex > 1 ? &bbmap : NULL); + for (x = 0; !v && x < nex; x++) { + for (b = bmp[x].startblock; + !v && b < bmp[x].startblock + bmp[x].blockcount; + b++) + v = CHECK_BLIST(b); + } + free(bmp); + if (iocur_top->data == NULL) { + if (!sflag || id->ilist || v) + dbprintf("can't read block 0 for directory inode " + "%lld\n", + id->ino); + error++; + return 0; + } + dir_hash_init(); + parent = process_data_dir_v2(dot, dotdot, id, v, mp->m_dirdatablk, + NULL); + dir_hash_check(id, v); + dir_hash_done(); + pop_cur(); + return parent; +} + +static void +process_bmbt_reclist( + xfs_bmbt_rec_32_t *rp, + int numrecs, + dbm_t type, + inodata_t *id, + xfs_drfsbno_t *tot, + blkmap_t **blkmapp) +{ + xfs_agblock_t agbno; + xfs_agnumber_t agno; + xfs_fsblock_t b; + xfs_dfilblks_t c; + xfs_dfilblks_t cp; + int f; + int i; + xfs_agblock_t iagbno; + xfs_agnumber_t iagno; + xfs_dfiloff_t o; + xfs_dfiloff_t op; + xfs_dfsbno_t s; + int v; + + cp = op = 0; + v = verbose || id->ilist; + iagno = XFS_INO_TO_AGNO(mp, id->ino); + iagbno = XFS_INO_TO_AGBNO(mp, id->ino); + for (i = 0; i < numrecs; i++, rp++) { + convert_extent((xfs_bmbt_rec_64_t *)rp, &o, &s, &c, &f); + if (v) + dbprintf("inode %lld extent [%lld,%lld,%lld,%d]\n", + id->ino, o, s, c, f); + if (!sflag && i > 0 && op + cp > o) + dbprintf("bmap rec out of order, inode %lld entry %d\n", + id->ino, i); + op = o; + cp = c; + if (type == DBM_RTDATA) { + if (!sflag && s >= mp->m_sb.sb_rblocks) { + dbprintf("inode %lld bad rt block number %lld, " + "offset %lld\n", + id->ino, s, o); + continue; + } + } else if (!sflag) { + agno = XFS_FSB_TO_AGNO(mp, s); + agbno = XFS_FSB_TO_AGBNO(mp, s); + if (agno >= mp->m_sb.sb_agcount || + agbno >= mp->m_sb.sb_agblocks) { + dbprintf("inode %lld bad block number %lld " + "[%d,%d], offset %lld\n", + id->ino, s, agno, agbno, o); + continue; + } + if (agbno + c - 1 >= mp->m_sb.sb_agblocks) { + dbprintf("inode %lld bad block number %lld " + "[%d,%d], offset %lld\n", + id->ino, s + c - 1, agno, + agbno + (xfs_agblock_t)c - 1, o); + continue; + } + } + if (blkmapp && *blkmapp) + blkmap_set_ext(blkmapp, (xfs_fileoff_t)o, + (xfs_fsblock_t)s, (xfs_extlen_t)c); + if (type == DBM_RTDATA) { + set_rdbmap((xfs_fsblock_t)s, (xfs_extlen_t)c, + DBM_RTDATA); + set_rinomap((xfs_fsblock_t)s, (xfs_extlen_t)c, id); + for (b = (xfs_fsblock_t)s; + blist_size && b < s + c; + b++, o++) { + if (CHECK_BLIST(b)) + dbprintf("inode %lld block %lld at " + "offset %lld\n", + id->ino, (xfs_dfsbno_t)b, o); + } + } else { + agno = XFS_FSB_TO_AGNO(mp, (xfs_fsblock_t)s); + agbno = XFS_FSB_TO_AGBNO(mp, (xfs_fsblock_t)s); + set_dbmap(agno, agbno, (xfs_extlen_t)c, type, iagno, + iagbno); + set_inomap(agno, agbno, (xfs_extlen_t)c, id); + for (b = (xfs_fsblock_t)s; + blist_size && b < s + c; + b++, o++, agbno++) { + if (CHECK_BLIST(b)) + dbprintf("inode %lld block %lld at " + "offset %lld\n", + id->ino, (xfs_dfsbno_t)b, o); + } + } + *tot += c; + } +} + +static void +process_btinode( + inodata_t *id, + xfs_dinode_t *dip, + dbm_t type, + xfs_drfsbno_t *totd, + xfs_drfsbno_t *toti, + xfs_extnum_t *nex, + blkmap_t **blkmapp, + int whichfork) +{ + xfs_bmdr_block_t *dib; + int i; + xfs_bmbt_ptr_t *pp; + xfs_bmbt_rec_32_t *rp; + + dib = (xfs_bmdr_block_t *)XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_NOCONVERT); + if (INT_GET(dib->bb_level, ARCH_CONVERT) >= XFS_BM_MAXLEVELS(mp, whichfork)) { + if (!sflag || id->ilist) + dbprintf("level for ino %lld %s fork bmap root too " + "large (%u)\n", + id->ino, + whichfork == XFS_DATA_FORK ? "data" : "attr", + INT_GET(dib->bb_level, ARCH_CONVERT)); + error++; + return; + } + if (INT_GET(dib->bb_numrecs, ARCH_CONVERT) > + XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_NOCONVERT), + xfs_bmdr, INT_GET(dib->bb_level, ARCH_CONVERT) == 0)) { + if (!sflag || id->ilist) + dbprintf("numrecs for ino %lld %s fork bmap root too " + "large (%u)\n", + id->ino, + whichfork == XFS_DATA_FORK ? "data" : "attr", + INT_GET(dib->bb_numrecs, ARCH_CONVERT)); + error++; + return; + } + if (INT_GET(dib->bb_level, ARCH_CONVERT) == 0) { + rp = (xfs_bmbt_rec_32_t *)XFS_BTREE_REC_ADDR( + XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_NOCONVERT), + xfs_bmdr, dib, 1, + XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_SIZE(dip, mp, + whichfork), + xfs_bmdr, 1)); + process_bmbt_reclist(rp, INT_GET(dib->bb_numrecs, ARCH_CONVERT), type, id, totd, + blkmapp); + *nex += INT_GET(dib->bb_numrecs, ARCH_CONVERT); + return; + } else { + pp = XFS_BTREE_PTR_ADDR(XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_NOCONVERT), + xfs_bmdr, dib, 1, + XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_SIZE(dip, mp, + whichfork), + xfs_bmdr, 0)); + for (i = 0; i < INT_GET(dib->bb_numrecs, ARCH_CONVERT); i++) + scan_lbtree((xfs_fsblock_t)INT_GET(pp[i], ARCH_CONVERT), INT_GET(dib->bb_level, ARCH_CONVERT), + scanfunc_bmap, type, id, totd, toti, nex, + blkmapp, 1, + whichfork == XFS_DATA_FORK ? + TYP_BMAPBTD : TYP_BMAPBTA); + } + if (*nex <= + XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_NOCONVERT) / sizeof(xfs_bmbt_rec_t)) { + if (!sflag || id->ilist) + dbprintf("extent count for ino %lld %s fork too low " + "(%d) for file format\n", + id->ino, + whichfork == XFS_DATA_FORK ? "data" : "attr", + *nex); + error++; + } +} + +static xfs_ino_t +process_data_dir_v2( + int *dot, + int *dotdot, + inodata_t *id, + int v, + xfs_dablk_t dabno, + freetab_t **freetabp) +{ + xfs_dir2_dataptr_t addr; + xfs_dir2_data_free_t *bf; + int bf_err; + xfs_dir2_block_t *block; + xfs_dir2_block_tail_t *btp = NULL; + inodata_t *cid; + int count; + xfs_dir2_data_t *data; + xfs_dir2_db_t db; + xfs_dir2_data_entry_t *dep; + xfs_dir2_data_free_t *dfp; + xfs_dir2_data_unused_t *dup; + char *endptr; + int freeseen; + freetab_t *freetab; + xfs_dahash_t hash; + int i; + int lastfree; + int lastfree_err; + xfs_dir2_leaf_entry_t *lep = NULL; + xfs_ino_t lino; + xfs_ino_t parent = 0; + char *ptr; + int stale = 0; + int tag_err; + xfs_dir2_data_off_t *tagp; + + data = iocur_top->data; + block = iocur_top->data; + if (INT_GET(block->hdr.magic, ARCH_CONVERT) != XFS_DIR2_BLOCK_MAGIC && + INT_GET(data->hdr.magic, ARCH_CONVERT) != XFS_DIR2_DATA_MAGIC) { + if (!sflag || v) + dbprintf("bad directory data magic # %#x for dir ino " + "%lld block %d\n", + INT_GET(data->hdr.magic, ARCH_CONVERT), id->ino, dabno); + error++; + return NULLFSINO; + } + db = XFS_DIR2_DA_TO_DB(mp, dabno); + bf = data->hdr.bestfree; + ptr = (char *)data->u; + if (INT_GET(block->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC) { + btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); + lep = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT); + endptr = (char *)lep; + if (endptr <= ptr || endptr > (char *)btp) { + endptr = (char *)data + mp->m_dirblksize; + lep = NULL; + if (!sflag || v) + dbprintf("bad block directory tail for dir ino " + "%lld\n", + id->ino); + error++; + } + } else + endptr = (char *)data + mp->m_dirblksize; + bf_err = lastfree_err = tag_err = 0; + count = lastfree = freeseen = 0; + if (INT_GET(bf[0].length, ARCH_CONVERT) == 0) { + bf_err += INT_GET(bf[0].offset, ARCH_CONVERT) != 0; + freeseen |= 1 << 0; + } + if (INT_GET(bf[1].length, ARCH_CONVERT) == 0) { + bf_err += INT_GET(bf[1].offset, ARCH_CONVERT) != 0; + freeseen |= 1 << 1; + } + if (INT_GET(bf[2].length, ARCH_CONVERT) == 0) { + bf_err += INT_GET(bf[2].offset, ARCH_CONVERT) != 0; + freeseen |= 1 << 2; + } + bf_err += INT_GET(bf[0].length, ARCH_CONVERT) < INT_GET(bf[1].length, ARCH_CONVERT); + bf_err += INT_GET(bf[1].length, ARCH_CONVERT) < INT_GET(bf[2].length, ARCH_CONVERT); + if (freetabp) { + freetab = *freetabp; + if (freetab->naents <= db) { + *freetabp = freetab = + realloc(freetab, FREETAB_SIZE(db + 1)); + for (i = freetab->naents; i < db; i++) + freetab->ents[i] = NULLDATAOFF; + freetab->naents = db + 1; + } + if (freetab->nents < db + 1) + freetab->nents = db + 1; + freetab->ents[db] = INT_GET(bf[0].length, ARCH_CONVERT); + } + while (ptr < endptr) { + dup = (xfs_dir2_data_unused_t *)ptr; + if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) { + lastfree_err += lastfree != 0; + if ((INT_GET(dup->length, ARCH_CONVERT) & (XFS_DIR2_DATA_ALIGN - 1)) || + INT_GET(dup->length, ARCH_CONVERT) == 0 || + (char *)(tagp = XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT)) >= + endptr) { + if (!sflag || v) + dbprintf("dir %lld block %d bad free " + "entry at %d\n", + id->ino, dabno, + (int)((char *)dup - + (char *)data)); + error++; + break; + } + tag_err += INT_GET(*tagp, ARCH_CONVERT) != (char *)dup - (char *)data; + dfp = process_data_dir_v2_freefind(data, dup); + if (dfp) { + i = (int)(dfp - bf); + bf_err += (freeseen & (1 << i)) != 0; + freeseen |= 1 << i; + } else + bf_err += INT_GET(dup->length, ARCH_CONVERT) > INT_GET(bf[2].length, ARCH_CONVERT); + ptr += INT_GET(dup->length, ARCH_CONVERT); + lastfree = 1; + continue; + } + dep = (xfs_dir2_data_entry_t *)dup; + if (dep->namelen == 0) { + if (!sflag || v) + dbprintf("dir %lld block %d zero length entry " + "at %d\n", + id->ino, dabno, + (int)((char *)dep - (char *)data)); + error++; + } + tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); + if ((char *)tagp >= endptr) { + if (!sflag || v) + dbprintf("dir %lld block %d bad entry at %d\n", + id->ino, dabno, + (int)((char *)dep - (char *)data)); + error++; + break; + } + tag_err += INT_GET(*tagp, ARCH_CONVERT) != (char *)dep - (char *)data; + addr = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, db, + (char *)dep - (char *)data); + hash = libxfs_da_hashname((char *)dep->name, dep->namelen); + dir_hash_add(hash, addr); + ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen); + count++; + lastfree = 0; + lino = INT_GET(dep->inumber, ARCH_CONVERT); + cid = find_inode(lino, 1); + if (v) + dbprintf("dir %lld block %d entry %*.*s %lld\n", + id->ino, dabno, dep->namelen, dep->namelen, + dep->name, lino); + if (cid) + addlink_inode(cid); + else { + if (!sflag || v) + dbprintf("dir %lld block %d entry %*.*s bad " + "inode number %lld\n", + id->ino, dabno, dep->namelen, + dep->namelen, dep->name, lino); + error++; + } + if (dep->namelen == 2 && dep->name[0] == '.' && + dep->name[1] == '.') { + if (parent) { + if (!sflag || v) + dbprintf("multiple .. entries in dir " + "%lld (%lld, %lld)\n", + id->ino, parent, lino); + error++; + } else + parent = cid ? lino : NULLFSINO; + (*dotdot)++; + } else if (dep->namelen != 1 || dep->name[0] != '.') { + if (cid != NULL) { + if (!cid->parent) + cid->parent = id; + addname_inode(cid, (char *)dep->name, + dep->namelen); + } + } else { + if (lino != id->ino) { + if (!sflag || v) + dbprintf("dir %lld entry . inode " + "number mismatch (%lld)\n", + id->ino, lino); + error++; + } + (*dot)++; + } + } + if (INT_GET(data->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC) { + endptr = (char *)data + mp->m_dirblksize; + for (i = stale = 0; lep && i < INT_GET(btp->count, ARCH_CONVERT); i++) { + if ((char *)&lep[i] >= endptr) { + if (!sflag || v) + dbprintf("dir %lld block %d bad count " + "%u\n", + id->ino, dabno, INT_GET(btp->count, ARCH_CONVERT)); + error++; + break; + } + if (INT_GET(lep[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR) + stale++; + else if (dir_hash_see(INT_GET(lep[i].hashval, ARCH_CONVERT), INT_GET(lep[i].address, ARCH_CONVERT))) { + if (!sflag || v) + dbprintf("dir %lld block %d extra leaf " + "entry %x %x\n", + id->ino, dabno, INT_GET(lep[i].hashval, ARCH_CONVERT), + INT_GET(lep[i].address, ARCH_CONVERT)); + error++; + } + } + } + bf_err += freeseen != 7; + if (bf_err) { + if (!sflag || v) + dbprintf("dir %lld block %d bad bestfree data\n", + id->ino, dabno); + error++; + } + if (INT_GET(data->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC && + count != INT_GET(btp->count, ARCH_CONVERT) - INT_GET(btp->stale, ARCH_CONVERT)) { + if (!sflag || v) + dbprintf("dir %lld block %d bad block tail count %d " + "(stale %d)\n", + id->ino, dabno, INT_GET(btp->count, ARCH_CONVERT), INT_GET(btp->stale, ARCH_CONVERT)); + error++; + } + if (INT_GET(data->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC && stale != INT_GET(btp->stale, ARCH_CONVERT)) { + if (!sflag || v) + dbprintf("dir %lld block %d bad stale tail count %d\n", + id->ino, dabno, INT_GET(btp->stale, ARCH_CONVERT)); + error++; + } + if (lastfree_err) { + if (!sflag || v) + dbprintf("dir %lld block %d consecutive free entries\n", + id->ino, dabno); + error++; + } + if (tag_err) { + if (!sflag || v) + dbprintf("dir %lld block %d entry/unused tag " + "mismatch\n", + id->ino, dabno); + error++; + } + return parent; +} + +static xfs_dir2_data_free_t * +process_data_dir_v2_freefind( + xfs_dir2_data_t *data, + xfs_dir2_data_unused_t *dup) +{ + xfs_dir2_data_free_t *dfp; + xfs_dir2_data_aoff_t off; + + off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)data); + if (INT_GET(dup->length, ARCH_CONVERT) < INT_GET(data->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT - 1].length, ARCH_CONVERT)) + return NULL; + for (dfp = &data->hdr.bestfree[0]; + dfp < &data->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT]; + dfp++) { + if (INT_GET(dfp->offset, ARCH_CONVERT) == 0) + return NULL; + if (INT_GET(dfp->offset, ARCH_CONVERT) == off) + return dfp; + } + return NULL; +} + +static void +process_dir( + xfs_dinode_t *dip, + blkmap_t *blkmap, + inodata_t *id) +{ + xfs_fsblock_t bno; + int dot; + int dotdot; + xfs_ino_t parent; + + dot = dotdot = 0; + if (XFS_DIR_IS_V2(mp)) { + if (process_dir_v2(dip, blkmap, &dot, &dotdot, id, &parent)) + return; + } else + { + if (process_dir_v1(dip, blkmap, &dot, &dotdot, id, &parent)) + return; + } + bno = XFS_INO_TO_FSB(mp, id->ino); + if (dot == 0) { + if (!sflag || id->ilist || CHECK_BLIST(bno)) + dbprintf("no . entry for directory %lld\n", id->ino); + error++; + } + if (dotdot == 0) { + if (!sflag || id->ilist || CHECK_BLIST(bno)) + dbprintf("no .. entry for directory %lld\n", id->ino); + error++; + } else if (parent == id->ino && id->ino != mp->m_sb.sb_rootino) { + if (!sflag || id->ilist || CHECK_BLIST(bno)) + dbprintf(". and .. same for non-root directory %lld\n", + id->ino); + error++; + } else if (id->ino == mp->m_sb.sb_rootino && id->ino != parent) { + if (!sflag || id->ilist || CHECK_BLIST(bno)) + dbprintf("root directory %lld has .. %lld\n", id->ino, + parent); + error++; + } else if (parent != NULLFSINO && id->ino != parent) + addparent_inode(id, parent); +} + +static int +process_dir_v1( + xfs_dinode_t *dip, + blkmap_t *blkmap, + int *dot, + int *dotdot, + inodata_t *id, + xfs_ino_t *parent) +{ + if (dip->di_core.di_size <= XFS_DFORK_DSIZE_ARCH(dip, mp, ARCH_NOCONVERT) && + dip->di_core.di_format == XFS_DINODE_FMT_LOCAL) + *parent = + process_shortform_dir_v1(dip, dot, dotdot, id); + else if (dip->di_core.di_size == XFS_LBSIZE(mp) && + (dip->di_core.di_format == XFS_DINODE_FMT_EXTENTS || + dip->di_core.di_format == XFS_DINODE_FMT_BTREE)) + *parent = process_leaf_dir_v1(blkmap, dot, dotdot, id); + else if (dip->di_core.di_size >= XFS_LBSIZE(mp) && + (dip->di_core.di_format == XFS_DINODE_FMT_EXTENTS || + dip->di_core.di_format == XFS_DINODE_FMT_BTREE)) + *parent = process_node_dir_v1(blkmap, dot, dotdot, id); + else { + dbprintf("bad size (%lld) or format (%d) for directory inode " + "%lld\n", + dip->di_core.di_size, (int)dip->di_core.di_format, + id->ino); + error++; + return 1; + } + return 0; +} + +static int +process_dir_v2( + xfs_dinode_t *dip, + blkmap_t *blkmap, + int *dot, + int *dotdot, + inodata_t *id, + xfs_ino_t *parent) +{ + xfs_fileoff_t last = 0; + + if (blkmap) + last = blkmap_last_off(blkmap); + if (dip->di_core.di_size <= XFS_DFORK_DSIZE_ARCH(dip, mp, ARCH_NOCONVERT) && + dip->di_core.di_format == XFS_DINODE_FMT_LOCAL) + *parent = process_sf_dir_v2(dip, dot, dotdot, id); + else if (last == mp->m_dirblkfsbs && + (dip->di_core.di_format == XFS_DINODE_FMT_EXTENTS || + dip->di_core.di_format == XFS_DINODE_FMT_BTREE)) + *parent = process_block_dir_v2(blkmap, dot, dotdot, id); + else if (last >= mp->m_dirleafblk + mp->m_dirblkfsbs && + (dip->di_core.di_format == XFS_DINODE_FMT_EXTENTS || + dip->di_core.di_format == XFS_DINODE_FMT_BTREE)) + *parent = process_leaf_node_dir_v2(blkmap, dot, dotdot, id, + dip->di_core.di_size); + else { + dbprintf("bad size (%lld) or format (%d) for directory inode " + "%lld\n", + dip->di_core.di_size, (int)dip->di_core.di_format, + id->ino); + error++; + return 1; + } + return 0; +} + +/* ARGSUSED */ +static void +process_exinode( + inodata_t *id, + xfs_dinode_t *dip, + dbm_t type, + xfs_drfsbno_t *totd, + xfs_drfsbno_t *toti, + xfs_extnum_t *nex, + blkmap_t **blkmapp, + int whichfork) +{ + xfs_bmbt_rec_32_t *rp; + + rp = (xfs_bmbt_rec_32_t *)XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_NOCONVERT); + *nex = XFS_DFORK_NEXTENTS_ARCH(dip, whichfork, ARCH_NOCONVERT); + if (*nex < 0 || + *nex > + XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_NOCONVERT) / sizeof(xfs_bmbt_rec_32_t)) { + if (!sflag || id->ilist) + dbprintf("bad number of extents %d for inode %lld\n", + *nex, id->ino); + error++; + return; + } + process_bmbt_reclist(rp, *nex, type, id, totd, blkmapp); +} + +static void +process_inode( + xfs_agf_t *agf, + xfs_agino_t agino, + xfs_dinode_t *dip, + int isfree) +{ + blkmap_t *blkmap; + xfs_fsblock_t bno = 0; + xfs_dinode_core_t tdic; + xfs_dinode_core_t *dic; + inodata_t *id = NULL; + xfs_ino_t ino; + xfs_extnum_t nextents = 0; + int nlink; + int security; + xfs_drfsbno_t totblocks; + xfs_drfsbno_t totdblocks = 0; + xfs_drfsbno_t totiblocks = 0; + dbm_t type; + xfs_extnum_t anextents = 0; + xfs_drfsbno_t atotdblocks = 0; + xfs_drfsbno_t atotiblocks = 0; + xfs_qcnt_t bc = 0; + xfs_qcnt_t ic = 0; + xfs_qcnt_t rc = 0; + static char okfmts[] = { + 0, /* type 0 unused */ + 1 << XFS_DINODE_FMT_DEV, /* FIFO */ + 1 << XFS_DINODE_FMT_DEV, /* CHR */ + 0, /* type 3 unused */ + (1 << XFS_DINODE_FMT_LOCAL) | + (1 << XFS_DINODE_FMT_EXTENTS) | + (1 << XFS_DINODE_FMT_BTREE), /* DIR */ + 0, /* type 5 unused */ + 1 << XFS_DINODE_FMT_DEV, /* BLK */ + 0, /* type 7 unused */ + (1 << XFS_DINODE_FMT_EXTENTS) | + (1 << XFS_DINODE_FMT_BTREE), /* REG */ + 0, /* type 9 unused */ + (1 << XFS_DINODE_FMT_LOCAL) | + (1 << XFS_DINODE_FMT_EXTENTS), /* LNK */ + 0, /* type 11 unused */ + 1 << XFS_DINODE_FMT_DEV, /* SOCK */ + 0, /* type 13 unused */ + 1 << XFS_DINODE_FMT_UUID, /* MNT */ + 0 /* type 15 unused */ + }; + static char *fmtnames[] = { + "dev", "local", "extents", "btree", "uuid" + }; + + /* convert the core, then copy it back into the inode */ + libxfs_xlate_dinode_core((xfs_caddr_t)&dip->di_core, &tdic, 1, + ARCH_CONVERT); + memcpy(&dip->di_core, &tdic, sizeof(xfs_dinode_core_t)); + dic=&dip->di_core; + + ino = XFS_AGINO_TO_INO(mp, INT_GET(agf->agf_seqno, ARCH_CONVERT), agino); + if (!isfree) { + id = find_inode(ino, 1); + bno = XFS_INO_TO_FSB(mp, ino); + blkmap = NULL; + } + if (dic->di_magic != XFS_DINODE_MAGIC) { + if (!sflag || isfree || id->ilist || CHECK_BLIST(bno)) + dbprintf("bad magic number %#x for inode %lld\n", + dic->di_magic, ino); + error++; + return; + } + if (!XFS_DINODE_GOOD_VERSION(dic->di_version)) { + if (!sflag || isfree || id->ilist || CHECK_BLIST(bno)) + dbprintf("bad version number %#x for inode %lld\n", + dic->di_version, ino); + error++; + return; + } + if (isfree) { + if (dic->di_nblocks != 0) { + if (!sflag || id->ilist || CHECK_BLIST(bno)) + dbprintf("bad nblocks %lld for free inode " + "%lld\n", + dic->di_nblocks, ino); + error++; + } + if (dic->di_version == XFS_DINODE_VERSION_1) + nlink = dic->di_onlink; + else + nlink = dic->di_nlink; + if (nlink != 0) { + if (!sflag || id->ilist || CHECK_BLIST(bno)) + dbprintf("bad nlink %d for free inode %lld\n", + nlink, ino); + error++; + } + if (dic->di_mode != 0) { + if (!sflag || id->ilist || CHECK_BLIST(bno)) + dbprintf("bad mode %#o for free inode %lld\n", + dic->di_mode, ino); + error++; + } + return; + } + /* + * di_mode is a 16-bit uint so no need to check the < 0 case + */ + if ((((dic->di_mode & IFMT) >> 12) > 15) || + (!(okfmts[(dic->di_mode & IFMT) >> 12] & (1 << dic->di_format)))) { + if (!sflag || id->ilist || CHECK_BLIST(bno)) + dbprintf("bad format %d for inode %lld type %#o\n", + dic->di_format, id->ino, dic->di_mode & IFMT); + error++; + return; + } + if ((unsigned int)XFS_DFORK_ASIZE_ARCH(dip, mp, ARCH_NOCONVERT) >= XFS_LITINO(mp)) { + if (!sflag || id->ilist) + dbprintf("bad fork offset %d for inode %lld\n", + dic->di_forkoff, id->ino); + error++; + return; + } + if ((unsigned int)dic->di_aformat > XFS_DINODE_FMT_BTREE) { + if (!sflag || id->ilist) + dbprintf("bad attribute format %d for inode %lld\n", + dic->di_aformat, id->ino); + error++; + return; + } + if (verbose || id->ilist || CHECK_BLIST(bno)) + dbprintf("inode %lld mode %#o fmt %s " + "afmt %s " + "nex %d anex %d nblk %lld sz %lld%s%s\n", + id->ino, dic->di_mode, fmtnames[dic->di_format], + fmtnames[dic->di_aformat], + dic->di_nextents, + dic->di_anextents, + dic->di_nblocks, dic->di_size, + dic->di_flags & XFS_DIFLAG_REALTIME ? " rt" : "", + dic->di_flags & XFS_DIFLAG_PREALLOC ? " pre" : "" + ); + security = 0; + switch (dic->di_mode & IFMT) { + case IFDIR: + type = DBM_DIR; + if (dic->di_format == XFS_DINODE_FMT_LOCAL) + break; + blkmap = blkmap_alloc(dic->di_nextents); + break; + case IFREG: + if (dic->di_flags & XFS_DIFLAG_REALTIME) + type = DBM_RTDATA; + else if (id->ino == mp->m_sb.sb_rbmino) { + type = DBM_RTBITMAP; + blkmap = blkmap_alloc(dic->di_nextents); + addlink_inode(id); + } else if (id->ino == mp->m_sb.sb_rsumino) { + type = DBM_RTSUM; + blkmap = blkmap_alloc(dic->di_nextents); + addlink_inode(id); + } + else if (id->ino == mp->m_sb.sb_uquotino || + id->ino == mp->m_sb.sb_pquotino) { + type = DBM_QUOTA; + blkmap = blkmap_alloc(dic->di_nextents); + addlink_inode(id); + } + else + type = DBM_DATA; + if (dic->di_mode & (ISUID | ISGID)) + security = 1; + break; + case IFLNK: + type = DBM_SYMLINK; + break; + default: + security = 1; + type = DBM_UNKNOWN; + break; + } + if (dic->di_version == XFS_DINODE_VERSION_1) + setlink_inode(id, dic->di_onlink, type == DBM_DIR, security); + else { + sbversion |= XFS_SB_VERSION_NLINKBIT; + setlink_inode(id, dic->di_nlink, type == DBM_DIR, security); + } + switch (dic->di_format) { + case XFS_DINODE_FMT_LOCAL: + process_lclinode(id, dip, type, &totdblocks, &totiblocks, + &nextents, &blkmap, XFS_DATA_FORK); + break; + case XFS_DINODE_FMT_EXTENTS: + process_exinode(id, dip, type, &totdblocks, &totiblocks, + &nextents, &blkmap, XFS_DATA_FORK); + break; + case XFS_DINODE_FMT_BTREE: + process_btinode(id, dip, type, &totdblocks, &totiblocks, + &nextents, &blkmap, XFS_DATA_FORK); + break; + } + if (XFS_DFORK_Q_ARCH(dip, ARCH_NOCONVERT)) { + sbversion |= XFS_SB_VERSION_ATTRBIT; + switch (dic->di_aformat) { + case XFS_DINODE_FMT_LOCAL: + process_lclinode(id, dip, DBM_ATTR, &atotdblocks, + &atotiblocks, &anextents, NULL, XFS_ATTR_FORK); + break; + case XFS_DINODE_FMT_EXTENTS: + process_exinode(id, dip, DBM_ATTR, &atotdblocks, + &atotiblocks, &anextents, NULL, XFS_ATTR_FORK); + break; + case XFS_DINODE_FMT_BTREE: + process_btinode(id, dip, DBM_ATTR, &atotdblocks, + &atotiblocks, &anextents, NULL, XFS_ATTR_FORK); + break; + } + } + if (qpdo || qudo) { + switch (type) { + case DBM_DATA: + case DBM_DIR: + case DBM_RTBITMAP: + case DBM_RTSUM: + case DBM_SYMLINK: + case DBM_UNKNOWN: + bc = totdblocks + totiblocks + + atotdblocks + atotiblocks; + ic = 1; + break; + case DBM_RTDATA: + bc = totiblocks + atotdblocks + atotiblocks; + rc = totdblocks; + ic = 1; + break; + default: + } + if (ic) + quota_add(dic->di_version >= XFS_DINODE_VERSION_2 ? + dic->di_projid : -1, + dic->di_uid, 0, bc, ic, rc); + } + totblocks = totdblocks + totiblocks + atotdblocks + atotiblocks; + if (totblocks != dic->di_nblocks) { + if (!sflag || id->ilist || CHECK_BLIST(bno)) + dbprintf("bad nblocks %lld for inode %lld, counted " + "%lld\n", + dic->di_nblocks, id->ino, totblocks); + error++; + } + if (nextents != dic->di_nextents) { + if (!sflag || id->ilist || CHECK_BLIST(bno)) + dbprintf("bad nextents %d for inode %lld, counted %d\n", + dic->di_nextents, id->ino, nextents); + error++; + } + if (anextents != dic->di_anextents) { + if (!sflag || id->ilist || CHECK_BLIST(bno)) + dbprintf("bad anextents %d for inode %lld, counted " + "%d\n", + dic->di_anextents, id->ino, anextents); + error++; + } + if (type == DBM_DIR) + process_dir(dip, blkmap, id); + else if (type == DBM_RTBITMAP) + process_rtbitmap(blkmap); + else if (type == DBM_RTSUM) + process_rtsummary(blkmap); + /* + * If the CHKD flag is not set, this can legitimately contain garbage; + * xfs_repair may have cleared that bit. + */ + else if (type == DBM_QUOTA && (mp->m_sb.sb_qflags & XFS_UQUOTA_CHKD)) + process_quota(id->ino == mp->m_sb.sb_pquotino, id, blkmap); + if (blkmap) + blkmap_free(blkmap); +} + +/* ARGSUSED */ +static void +process_lclinode( + inodata_t *id, + xfs_dinode_t *dip, + dbm_t type, + xfs_drfsbno_t *totd, + xfs_drfsbno_t *toti, + xfs_extnum_t *nex, + blkmap_t **blkmapp, + int whichfork) +{ + xfs_attr_shortform_t *asf; + xfs_fsblock_t bno; + xfs_dinode_core_t *dic; + + dic = &dip->di_core; + bno = XFS_INO_TO_FSB(mp, id->ino); + if (whichfork == XFS_DATA_FORK && + dic->di_size > XFS_DFORK_DSIZE_ARCH(dip, mp, ARCH_NOCONVERT)) { + if (!sflag || id->ilist || CHECK_BLIST(bno)) + dbprintf("local inode %lld data is too large (size " + "%lld)\n", + id->ino, dic->di_size); + error++; + } + else if (whichfork == XFS_ATTR_FORK) { + asf = (xfs_attr_shortform_t *)XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_NOCONVERT); + if (INT_GET(asf->hdr.totsize, ARCH_CONVERT) > XFS_DFORK_ASIZE_ARCH(dip, mp, ARCH_NOCONVERT)) { + if (!sflag || id->ilist || CHECK_BLIST(bno)) + dbprintf("local inode %lld attr is too large " + "(size %d)\n", + id->ino, INT_GET(asf->hdr.totsize, ARCH_CONVERT)); + error++; + } + } +} + +static xfs_ino_t +process_leaf_dir_v1( + blkmap_t *blkmap, + int *dot, + int *dotdot, + inodata_t *id) +{ + xfs_fsblock_t bno; + xfs_ino_t parent; + + bno = blkmap_get(blkmap, 0); + if (bno == NULLFSBLOCK) { + if (!sflag || id->ilist) + dbprintf("block 0 for directory inode %lld is " + "missing\n", + id->ino); + error++; + return 0; + } + push_cur(); + set_cur(&typtab[TYP_DIR], XFS_FSB_TO_DADDR(mp, bno), blkbb, DB_RING_IGN, + NULL); + if (iocur_top->data == NULL) { + if (!sflag || id->ilist || CHECK_BLIST(bno)) + dbprintf("can't read block 0 for directory inode " + "%lld\n", + id->ino); + error++; + return 0; + } + parent = process_leaf_dir_v1_int(dot, dotdot, id); + pop_cur(); + return parent; +} + +static xfs_ino_t +process_leaf_dir_v1_int( + int *dot, + int *dotdot, + inodata_t *id) +{ + xfs_fsblock_t bno; + inodata_t *cid; + xfs_dir_leaf_entry_t *entry; + int i; + xfs_dir_leafblock_t *leaf; + xfs_ino_t lino; + xfs_dir_leaf_name_t *namest; + xfs_ino_t parent = 0; + int v; + + bno = XFS_DADDR_TO_FSB(mp, iocur_top->bb); + v = verbose || id->ilist || CHECK_BLIST(bno); + leaf = iocur_top->data; + if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC) { + if (!sflag || id->ilist || CHECK_BLIST(bno)) + dbprintf("bad directory leaf magic # %#x for dir ino " + "%lld\n", + INT_GET(leaf->hdr.info.magic, ARCH_CONVERT), id->ino); + error++; + return NULLFSINO; + } + entry = &leaf->entries[0]; + for (i = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT); entry++, i++) { + namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT)); + lino=DIRINO_GET_ARCH(&namest->inumber, ARCH_CONVERT); + cid = find_inode(lino, 1); + if (v) + dbprintf("dir %lld entry %*.*s %lld\n", id->ino, + entry->namelen, entry->namelen, namest->name, + lino); + if (cid) + addlink_inode(cid); + else { + if (!sflag) + dbprintf("dir %lld entry %*.*s bad inode " + "number %lld\n", + id->ino, entry->namelen, entry->namelen, + namest->name, lino); + error++; + } + if (entry->namelen == 2 && namest->name[0] == '.' && + namest->name[1] == '.') { + if (parent) { + if (!sflag || id->ilist || CHECK_BLIST(bno)) + dbprintf("multiple .. entries in dir " + "%lld (%lld, %lld)\n", + id->ino, parent, lino); + error++; + } else + parent = cid ? lino : NULLFSINO; + (*dotdot)++; + } else if (entry->namelen != 1 || namest->name[0] != '.') { + if (cid != NULL) { + if (!cid->parent) + cid->parent = id; + addname_inode(cid, (char *)namest->name, + entry->namelen); + } + } else { + if (lino != id->ino) { + if (!sflag) + dbprintf("dir %lld entry . inode " + "number mismatch (%lld)\n", + id->ino, lino); + error++; + } + (*dot)++; + } + } + return parent; +} + +static xfs_ino_t +process_leaf_node_dir_v2( + blkmap_t *blkmap, + int *dot, + int *dotdot, + inodata_t *id, + xfs_fsize_t dirsize) +{ + xfs_fsblock_t b; + bbmap_t bbmap; + bmap_ext_t *bmp; + xfs_fileoff_t dbno; + freetab_t *freetab; + int i; + xfs_ino_t lino; + int nex; + xfs_ino_t parent; + int t; + int v; + int v2; + int x; + + v2 = verbose || id->ilist; + v = parent = 0; + dbno = NULLFILEOFF; + freetab = malloc(FREETAB_SIZE(dirsize / mp->m_dirblksize)); + freetab->naents = (int)(dirsize / mp->m_dirblksize); + freetab->nents = 0; + for (i = 0; i < freetab->naents; i++) + freetab->ents[i] = NULLDATAOFF; + dir_hash_init(); + while ((dbno = blkmap_next_off(blkmap, dbno, &t)) != NULLFILEOFF) { + nex = blkmap_getn(blkmap, dbno, mp->m_dirblkfsbs, &bmp); + ASSERT(nex > 0); + for (v = v2, x = 0; !v && x < nex; x++) { + for (b = bmp[x].startblock; + !v && b < bmp[x].startblock + bmp[x].blockcount; + b++) + v = CHECK_BLIST(b); + } + if (v) + dbprintf("dir inode %lld block %u=%llu\n", id->ino, + (__uint32_t)dbno, + (xfs_dfsbno_t)bmp->startblock); + push_cur(); + if (nex > 1) + make_bbmap(&bbmap, nex, bmp); + set_cur(&typtab[TYP_DIR], XFS_FSB_TO_DADDR(mp, bmp->startblock), + mp->m_dirblkfsbs * blkbb, DB_RING_IGN, + nex > 1 ? &bbmap : NULL); + free(bmp); + if (iocur_top->data == NULL) { + if (!sflag || v) + dbprintf("can't read block %u for directory " + "inode %lld\n", + (__uint32_t)dbno, id->ino); + error++; + pop_cur(); + dbno += mp->m_dirblkfsbs - 1; + continue; + } + if (dbno < mp->m_dirleafblk) { + lino = process_data_dir_v2(dot, dotdot, id, v, + (xfs_dablk_t)dbno, &freetab); + if (lino) { + if (parent) { + if (!sflag || v) + dbprintf("multiple .. entries " + "in dir %lld\n", + id->ino); + error++; + } else + parent = lino; + } + } else if (dbno < mp->m_dirfreeblk) { + process_leaf_node_dir_v2_int(id, v, (xfs_dablk_t)dbno, + freetab); + } else { + process_leaf_node_dir_v2_free(id, v, (xfs_dablk_t)dbno, + freetab); + } + pop_cur(); + dbno += mp->m_dirblkfsbs - 1; + } + dir_hash_check(id, v); + dir_hash_done(); + for (i = 0; i < freetab->nents; i++) { + if (freetab->ents[i] != NULLDATAOFF) { + if (!sflag || v) + dbprintf("missing free index for data block %d " + "in dir ino %lld\n", + XFS_DIR2_DB_TO_DA(mp, i), id->ino); + error++; + } + } + free(freetab); + return parent; +} + +static void +process_leaf_node_dir_v2_free( + inodata_t *id, + int v, + xfs_dablk_t dabno, + freetab_t *freetab) +{ + xfs_dir2_data_off_t ent; + xfs_dir2_free_t *free; + int i; + int maxent; + int used; + + free = iocur_top->data; + if (INT_GET(free->hdr.magic, ARCH_CONVERT) != XFS_DIR2_FREE_MAGIC) { + if (!sflag || v) + dbprintf("bad free block magic # %#x for dir ino %lld " + "block %d\n", + INT_GET(free->hdr.magic, ARCH_CONVERT), id->ino, dabno); + error++; + return; + } + maxent = XFS_DIR2_MAX_FREE_BESTS(mp); + if (INT_GET(free->hdr.firstdb, ARCH_CONVERT) != + XFS_DIR2_DA_TO_DB(mp, dabno - mp->m_dirfreeblk) * maxent) { + if (!sflag || v) + dbprintf("bad free block firstdb %d for dir ino %lld " + "block %d\n", + INT_GET(free->hdr.firstdb, ARCH_CONVERT), id->ino, dabno); + error++; + return; + } + if (INT_GET(free->hdr.nvalid, ARCH_CONVERT) > maxent || INT_GET(free->hdr.nvalid, ARCH_CONVERT) < 0 || + INT_GET(free->hdr.nused, ARCH_CONVERT) > maxent || INT_GET(free->hdr.nused, ARCH_CONVERT) < 0 || + INT_GET(free->hdr.nused, ARCH_CONVERT) > INT_GET(free->hdr.nvalid, ARCH_CONVERT)) { + if (!sflag || v) + dbprintf("bad free block nvalid/nused %d/%d for dir " + "ino %lld block %d\n", + INT_GET(free->hdr.nvalid, ARCH_CONVERT), INT_GET(free->hdr.nused, ARCH_CONVERT), id->ino, + dabno); + error++; + return; + } + for (used = i = 0; i < INT_GET(free->hdr.nvalid, ARCH_CONVERT); i++) { + if (freetab->nents <= INT_GET(free->hdr.firstdb, ARCH_CONVERT) + i) + ent = NULLDATAOFF; + else + ent = freetab->ents[INT_GET(free->hdr.firstdb, ARCH_CONVERT) + i]; + if (ent != INT_GET(free->bests[i], ARCH_CONVERT)) { + if (!sflag || v) + dbprintf("bad free block ent %d is %d should " + "be %d for dir ino %lld block %d\n", + i, INT_GET(free->bests[i], ARCH_CONVERT), ent, id->ino, dabno); + error++; + } + if (INT_GET(free->bests[i], ARCH_CONVERT) != NULLDATAOFF) + used++; + if (ent != NULLDATAOFF) + freetab->ents[INT_GET(free->hdr.firstdb, ARCH_CONVERT) + i] = NULLDATAOFF; + } + if (used != INT_GET(free->hdr.nused, ARCH_CONVERT)) { + if (!sflag || v) + dbprintf("bad free block nused %d should be %d for dir " + "ino %lld block %d\n", + INT_GET(free->hdr.nused, ARCH_CONVERT), used, id->ino, dabno); + error++; + } +} + +static void +process_leaf_node_dir_v2_int( + inodata_t *id, + int v, + xfs_dablk_t dabno, + freetab_t *freetab) +{ + int i; + xfs_dir2_data_off_t *lbp; + xfs_dir2_leaf_t *leaf; + xfs_dir2_leaf_entry_t *lep; + xfs_dir2_leaf_tail_t *ltp; + xfs_da_intnode_t *node; + int stale; + + leaf = iocur_top->data; + switch (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)) { + case XFS_DIR2_LEAF1_MAGIC: + if (INT_GET(leaf->hdr.info.forw, ARCH_CONVERT) || INT_GET(leaf->hdr.info.back, ARCH_CONVERT)) { + if (!sflag || v) + dbprintf("bad leaf block forw/back pointers " + "%d/%d for dir ino %lld block %d\n", + INT_GET(leaf->hdr.info.forw, ARCH_CONVERT), + INT_GET(leaf->hdr.info.back, ARCH_CONVERT), id->ino, dabno); + error++; + } + if (dabno != mp->m_dirleafblk) { + if (!sflag || v) + dbprintf("single leaf block for dir ino %lld " + "block %d should be at block %d\n", + id->ino, dabno, + (xfs_dablk_t)mp->m_dirleafblk); + error++; + } + ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); + lbp = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT); + for (i = 0; i < INT_GET(ltp->bestcount, ARCH_CONVERT); i++) { + if (freetab->nents <= i || freetab->ents[i] != INT_GET(lbp[i], ARCH_CONVERT)) { + if (!sflag || v) + dbprintf("bestfree %d for dir ino %lld " + "block %d doesn't match table " + "value %d\n", + freetab->nents <= i ? + NULLDATAOFF : + freetab->ents[i], + id->ino, + XFS_DIR2_DB_TO_DA(mp, i), + INT_GET(lbp[i], ARCH_CONVERT)); + } + if (freetab->nents > i) + freetab->ents[i] = NULLDATAOFF; + } + break; + case XFS_DIR2_LEAFN_MAGIC: + /* if it's at the root location then we can check the + * pointers are null XXX */ + break; + case XFS_DA_NODE_MAGIC: + node = iocur_top->data; + if (INT_GET(node->hdr.level, ARCH_CONVERT) < 1 || + INT_GET(node->hdr.level, ARCH_CONVERT) > XFS_DA_NODE_MAXDEPTH) { + if (!sflag || v) + dbprintf("bad node block level %d for dir ino " + "%lld block %d\n", + INT_GET(node->hdr.level, ARCH_CONVERT), id->ino, dabno); + error++; + } + return; + default: + if (!sflag || v) + dbprintf("bad directory data magic # %#x for dir ino " + "%lld block %d\n", + INT_GET(leaf->hdr.info.magic, ARCH_CONVERT), id->ino, dabno); + error++; + return; + } + lep = leaf->ents; + for (i = stale = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT); i++) { + if (INT_GET(lep[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR) + stale++; + else if (dir_hash_see(INT_GET(lep[i].hashval, ARCH_CONVERT), INT_GET(lep[i].address, ARCH_CONVERT))) { + if (!sflag || v) + dbprintf("dir %lld block %d extra leaf entry " + "%x %x\n", + id->ino, dabno, INT_GET(lep[i].hashval, ARCH_CONVERT), + INT_GET(lep[i].address, ARCH_CONVERT)); + error++; + } + } + if (stale != INT_GET(leaf->hdr.stale, ARCH_CONVERT)) { + if (!sflag || v) + dbprintf("dir %lld block %d stale mismatch " + "%d/%d\n", + id->ino, dabno, stale, + INT_GET(leaf->hdr.stale, ARCH_CONVERT)); + error++; + } +} + +static xfs_ino_t +process_node_dir_v1( + blkmap_t *blkmap, + int *dot, + int *dotdot, + inodata_t *id) +{ + xfs_fsblock_t bno; + xfs_fileoff_t dbno; + xfs_ino_t lino; + xfs_da_intnode_t *node; + xfs_ino_t parent; + int t; + int v; + int v2; + + v = verbose || id->ilist; + parent = 0; + dbno = NULLFILEOFF; + while ((dbno = blkmap_next_off(blkmap, dbno, &t)) != NULLFILEOFF) { + bno = blkmap_get(blkmap, dbno); + v2 = bno != NULLFSBLOCK && CHECK_BLIST(bno); + if (bno == NULLFSBLOCK && dbno == 0) { + if (!sflag || v) + dbprintf("can't read root block for directory " + "inode %lld\n", + id->ino); + error++; + } + if (v || v2) + dbprintf("dir inode %lld block %u=%llu\n", id->ino, + (__uint32_t)dbno, (xfs_dfsbno_t)bno); + if (bno == NULLFSBLOCK) + continue; + push_cur(); + set_cur(&typtab[TYP_DIR], XFS_FSB_TO_DADDR(mp, bno), blkbb, + DB_RING_IGN, NULL); + if ((node = iocur_top->data) == NULL) { + if (!sflag || v || v2) + dbprintf("can't read block %u for directory " + "inode %lld\n", + (__uint32_t)dbno, id->ino); + error++; + continue; + } +#if VERS >= V_62 + if (INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) +#else + if (INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_NODE_MAGIC) +#endif + { + pop_cur(); + continue; + } + lino = process_leaf_dir_v1_int(dot, dotdot, id); + if (lino) { + if (parent) { + if (!sflag || v || v2) + dbprintf("multiple .. entries in dir " + "%lld\n", + id->ino); + error++; + } else + parent = lino; + } + pop_cur(); + } + return parent; +} + +static void +process_quota( + int isproj, + inodata_t *id, + blkmap_t *blkmap) +{ + xfs_fsblock_t bno; + int cb; + xfs_dqblk_t *dqb; + xfs_dqid_t dqid; + u_int8_t exp_flags; + int i; + int perblock; + xfs_fileoff_t qbno; + char *s; + int scicb; + int t; + + perblock = (int)(mp->m_sb.sb_blocksize / sizeof(*dqb)); + s = isproj ? "project" : "user"; + exp_flags = isproj ? XFS_DQ_PROJ : XFS_DQ_USER; + dqid = 0; + qbno = NULLFILEOFF; + while ((qbno = blkmap_next_off(blkmap, qbno, &t)) != + NULLFILEOFF) { + bno = blkmap_get(blkmap, qbno); + dqid = (xfs_dqid_t)qbno * perblock; + cb = CHECK_BLIST(bno); + scicb = !sflag || id->ilist || cb; + push_cur(); + set_cur(&typtab[TYP_DQBLK], XFS_FSB_TO_DADDR(mp, bno), blkbb, + DB_RING_IGN, NULL); + if ((dqb = iocur_top->data) == NULL) { + pop_cur(); + if (scicb) + dbprintf("can't read block %lld for %s quota " + "inode (fsblock %lld)\n", + (xfs_dfiloff_t)qbno, s, + (xfs_dfsbno_t)bno); + error++; + continue; + } + for (i = 0; i < perblock; i++, dqid++, dqb++) { + if (verbose || id->ilist || cb) + dbprintf("%s dqblk %lld entry %d id %d bc " + "%lld ic %lld rc %lld\n", + s, (xfs_dfiloff_t)qbno, i, dqid, + INT_GET(dqb->dd_diskdq.d_bcount, ARCH_CONVERT), + INT_GET(dqb->dd_diskdq.d_icount, ARCH_CONVERT), + INT_GET(dqb->dd_diskdq.d_rtbcount, ARCH_CONVERT)); + if (INT_GET(dqb->dd_diskdq.d_magic, ARCH_CONVERT) != XFS_DQUOT_MAGIC) { + if (scicb) + dbprintf("bad magic number %#x for %s " + "dqblk %lld entry %d id %d\n", + INT_GET(dqb->dd_diskdq.d_magic, ARCH_CONVERT), s, + (xfs_dfiloff_t)qbno, i, dqid); + error++; + continue; + } + if (INT_GET(dqb->dd_diskdq.d_version, ARCH_CONVERT) != XFS_DQUOT_VERSION) { + if (scicb) + dbprintf("bad version number %#x for " + "%s dqblk %lld entry %d id " + "%d\n", + INT_GET(dqb->dd_diskdq.d_version, ARCH_CONVERT), s, + (xfs_dfiloff_t)qbno, i, dqid); + error++; + continue; + } + if (INT_GET(dqb->dd_diskdq.d_flags, ARCH_CONVERT) != exp_flags) { + if (scicb) + dbprintf("bad flags %#x for %s dqblk " + "%lld entry %d id %d\n", + INT_GET(dqb->dd_diskdq.d_flags, ARCH_CONVERT), s, + (xfs_dfiloff_t)qbno, i, dqid); + error++; + continue; + } + if (INT_GET(dqb->dd_diskdq.d_id, ARCH_CONVERT) != dqid) { + if (scicb) + dbprintf("bad id %d for %s dqblk %lld " + "entry %d id %d\n", + INT_GET(dqb->dd_diskdq.d_id, ARCH_CONVERT), s, + (xfs_dfiloff_t)qbno, i, dqid); + error++; + continue; + } + quota_add(isproj ? dqid : -1, isproj ? -1 : dqid, 1, + INT_GET(dqb->dd_diskdq.d_bcount, ARCH_CONVERT), + INT_GET(dqb->dd_diskdq.d_icount, ARCH_CONVERT), + INT_GET(dqb->dd_diskdq.d_rtbcount, ARCH_CONVERT)); + } + pop_cur(); + } +} + +static void +process_rtbitmap( + blkmap_t *blkmap) +{ +#define xfs_highbit64 libxfs_highbit64 /* for XFS_RTBLOCKLOG macro */ + int bit; + int bitsperblock; + xfs_fileoff_t bmbno; + xfs_fsblock_t bno; + xfs_drtbno_t extno; + int len; + int log; + int offs; + int prevbit; + xfs_drfsbno_t rtbno; + int start_bmbno; + int start_bit; + int t; + xfs_rtword_t *words; + + bitsperblock = mp->m_sb.sb_blocksize * NBBY; + bit = extno = prevbit = start_bmbno = start_bit = 0; + bmbno = NULLFILEOFF; + while ((bmbno = blkmap_next_off(blkmap, bmbno, &t)) != + NULLFILEOFF) { + bno = blkmap_get(blkmap, bmbno); + if (bno == NULLFSBLOCK) { + if (!sflag) + dbprintf("block %lld for rtbitmap inode is " + "missing\n", + (xfs_dfiloff_t)bmbno); + error++; + continue; + } + push_cur(); + set_cur(&typtab[TYP_RTBITMAP], XFS_FSB_TO_DADDR(mp, bno), blkbb, + DB_RING_IGN, NULL); + if ((words = iocur_top->data) == NULL) { + pop_cur(); + if (!sflag) + dbprintf("can't read block %lld for rtbitmap " + "inode\n", + (xfs_dfiloff_t)bmbno); + error++; + continue; + } + for (bit = 0; + bit < bitsperblock && extno < mp->m_sb.sb_rextents; + bit++, extno++) { + if (isset(words, bit)) { + rtbno = extno * mp->m_sb.sb_rextsize; + set_rdbmap(rtbno, mp->m_sb.sb_rextsize, + DBM_RTFREE); + frextents++; + if (prevbit == 0) { + start_bmbno = (int)bmbno; + start_bit = bit; + prevbit = 1; + } + } else if (prevbit == 1) { + len = ((int)bmbno - start_bmbno) * + bitsperblock + (bit - start_bit); + log = XFS_RTBLOCKLOG(len); + offs = XFS_SUMOFFS(mp, log, start_bmbno); + sumcompute[offs]++; + prevbit = 0; + } + } + pop_cur(); + if (extno == mp->m_sb.sb_rextents) + break; + } + if (prevbit == 1) { + len = ((int)bmbno - start_bmbno) * bitsperblock + + (bit - start_bit); + log = XFS_RTBLOCKLOG(len); + offs = XFS_SUMOFFS(mp, log, start_bmbno); + sumcompute[offs]++; + } +} + +static void +process_rtsummary( + blkmap_t *blkmap) +{ + xfs_fsblock_t bno; + char *bytes; + xfs_fileoff_t sumbno; + int t; + + sumbno = NULLFILEOFF; + while ((sumbno = blkmap_next_off(blkmap, sumbno, &t)) != + NULLFILEOFF) { + bno = blkmap_get(blkmap, sumbno); + if (bno == NULLFSBLOCK) { + if (!sflag) + dbprintf("block %lld for rtsummary inode is " + "missing\n", + (xfs_dfiloff_t)sumbno); + error++; + continue; + } + push_cur(); + set_cur(&typtab[TYP_RTSUMMARY], XFS_FSB_TO_DADDR(mp, bno), + blkbb, DB_RING_IGN, NULL); + if ((bytes = iocur_top->data) == NULL) { + if (!sflag) + dbprintf("can't read block %lld for rtsummary " + "inode\n", + (xfs_dfiloff_t)sumbno); + error++; + continue; + } + memcpy((char *)sumfile + sumbno * mp->m_sb.sb_blocksize, bytes, + mp->m_sb.sb_blocksize); + pop_cur(); + } +} + +static xfs_ino_t +process_sf_dir_v2( + xfs_dinode_t *dip, + int *dot, + int *dotdot, + inodata_t *id) +{ + inodata_t *cid; + int i; + int i8; + xfs_ino_t lino; + int offset; + xfs_dir2_sf_t *sf; + xfs_dir2_sf_entry_t *sfe; + int v; + + sf = &dip->di_u.di_dir2sf; + addlink_inode(id); + v = verbose || id->ilist; + if (v) + dbprintf("dir %lld entry . %lld\n", id->ino, id->ino); + (*dot)++; + sfe = XFS_DIR2_SF_FIRSTENTRY(sf); + offset = XFS_DIR2_DATA_FIRST_OFFSET; + for (i = INT_GET(sf->hdr.count, ARCH_CONVERT) - 1, i8 = 0; i >= 0; i--) { + if ((__psint_t)sfe + XFS_DIR2_SF_ENTSIZE_BYENTRY(sf, sfe) - + (__psint_t)sf > dip->di_core.di_size) { + if (!sflag) + dbprintf("dir %llu bad size in entry at %d\n", + id->ino, + (int)((char *)sfe - (char *)sf)); + error++; + break; + } + lino = XFS_DIR2_SF_GET_INUMBER_ARCH(sf, XFS_DIR2_SF_INUMBERP(sfe), ARCH_CONVERT); + if (lino > XFS_DIR2_MAX_SHORT_INUM) + i8++; + cid = find_inode(lino, 1); + if (cid == NULL) { + if (!sflag) + dbprintf("dir %lld entry %*.*s bad inode " + "number %lld\n", + id->ino, sfe->namelen, sfe->namelen, + sfe->name, lino); + error++; + } else { + addlink_inode(cid); + if (!cid->parent) + cid->parent = id; + addname_inode(cid, (char *)sfe->name, sfe->namelen); + } + if (v) + dbprintf("dir %lld entry %*.*s offset %d %lld\n", + id->ino, sfe->namelen, sfe->namelen, sfe->name, + XFS_DIR2_SF_GET_OFFSET_ARCH(sfe, ARCH_CONVERT), lino); + if (XFS_DIR2_SF_GET_OFFSET_ARCH(sfe, ARCH_CONVERT) < offset) { + if (!sflag) + dbprintf("dir %lld entry %*.*s bad offset %d\n", + id->ino, sfe->namelen, sfe->namelen, + sfe->name, XFS_DIR2_SF_GET_OFFSET_ARCH(sfe, ARCH_CONVERT)); + error++; + } + offset = + XFS_DIR2_SF_GET_OFFSET_ARCH(sfe, ARCH_CONVERT) + + XFS_DIR2_DATA_ENTSIZE(sfe->namelen); + sfe = XFS_DIR2_SF_NEXTENTRY(sf, sfe); + } + if (i < 0 && (__psint_t)sfe - (__psint_t)sf != dip->di_core.di_size) { + if (!sflag) + dbprintf("dir %llu size is %lld, should be %u\n", + id->ino, dip->di_core.di_size, + (uint)((char *)sfe - (char *)sf)); + error++; + } + if (offset + (INT_GET(sf->hdr.count, ARCH_CONVERT) + 2) * sizeof(xfs_dir2_leaf_entry_t) + + sizeof(xfs_dir2_block_tail_t) > mp->m_dirblksize) { + if (!sflag) + dbprintf("dir %llu offsets too high\n", id->ino); + error++; + } + lino = XFS_DIR2_SF_GET_INUMBER_ARCH(sf, &sf->hdr.parent, ARCH_CONVERT); + if (lino > XFS_DIR2_MAX_SHORT_INUM) + i8++; + cid = find_inode(lino, 1); + if (cid) + addlink_inode(cid); + else { + if (!sflag) + dbprintf("dir %lld entry .. bad inode number %lld\n", + id->ino, lino); + error++; + } + if (v) + dbprintf("dir %lld entry .. %lld\n", id->ino, lino); + if (i8 != sf->hdr.i8count) { + if (!sflag) + dbprintf("dir %lld i8count mismatch is %d should be " + "%d\n", + id->ino, sf->hdr.i8count, i8); + error++; + } + (*dotdot)++; + return cid ? lino : NULLFSINO; +} + +static xfs_ino_t +process_shortform_dir_v1( + xfs_dinode_t *dip, + int *dot, + int *dotdot, + inodata_t *id) +{ + inodata_t *cid; + int i; + xfs_ino_t lino; + xfs_dir_shortform_t *sf; + xfs_dir_sf_entry_t *sfe; + int v; + + sf = &dip->di_u.di_dirsf; + addlink_inode(id); + v = verbose || id->ilist; + if (v) + dbprintf("dir %lld entry . %lld\n", id->ino, id->ino); + (*dot)++; + sfe = &sf->list[0]; + for (i = INT_GET(sf->hdr.count, ARCH_CONVERT) - 1; i >= 0; i--) { + lino = DIRINO_GET_ARCH(&sfe->inumber, ARCH_CONVERT); + cid = find_inode(lino, 1); + if (cid == NULL) { + if (!sflag) + dbprintf("dir %lld entry %*.*s bad inode " + "number %lld\n", + id->ino, sfe->namelen, sfe->namelen, + sfe->name, lino); + error++; + } else { + addlink_inode(cid); + if (!cid->parent) + cid->parent = id; + addname_inode(cid, (char *)sfe->name, sfe->namelen); + } + if (v) + dbprintf("dir %lld entry %*.*s %lld\n", id->ino, + sfe->namelen, sfe->namelen, sfe->name, lino); + sfe = XFS_DIR_SF_NEXTENTRY(sfe); + } + if ((__psint_t)sfe - (__psint_t)sf != dip->di_core.di_size) + dbprintf("dir %llu size is %lld, should be %d\n", + id->ino, dip->di_core.di_size, + (int)((char *)sfe - (char *)sf)); + lino=DIRINO_GET_ARCH(&sf->hdr.parent, ARCH_CONVERT); + cid = find_inode(lino, 1); + if (cid) + addlink_inode(cid); + else { + if (!sflag) + dbprintf("dir %lld entry .. bad inode number %lld\n", + id->ino, lino); + error++; + } + if (v) + dbprintf("dir %lld entry .. %lld\n", id->ino, lino); + (*dotdot)++; + return cid ? lino : NULLFSINO; +} + +static void +quota_add( + xfs_dqid_t projid, + xfs_dqid_t userid, + int dq, + xfs_qcnt_t bc, + xfs_qcnt_t ic, + xfs_qcnt_t rc) +{ + if (qudo && userid != -1) + quota_add1(qudata, userid, dq, bc, ic, rc); + if (qpdo && projid != -1) + quota_add1(qpdata, projid, dq, bc, ic, rc); +} + +static void +quota_add1( + qdata_t **qt, + xfs_dqid_t id, + int dq, + xfs_qcnt_t bc, + xfs_qcnt_t ic, + xfs_qcnt_t rc) +{ + qdata_t *qe; + int qh; + qinfo_t *qi; + + qh = (int)((__uint32_t)id % QDATA_HASH_SIZE); + qe = qt[qh]; + while (qe) { + if (qe->id == id) { + qi = dq ? &qe->dq : &qe->count; + qi->bc += bc; + qi->ic += ic; + qi->rc += rc; + return; + } + qe = qe->next; + } + qe = xmalloc(sizeof(*qe)); + qe->id = id; + qi = dq ? &qe->dq : &qe->count; + qi->bc = bc; + qi->ic = ic; + qi->rc = rc; + qi = dq ? &qe->count : &qe->dq; + qi->bc = qi->ic = qi->rc = 0; + qe->next = qt[qh]; + qt[qh] = qe; +} + +static void +quota_check( + char *s, + qdata_t **qt) +{ + int i; + qdata_t *next; + qdata_t *qp; + + for (i = 0; i < QDATA_HASH_SIZE; i++) { + qp = qt[i]; + while (qp) { + next = qp->next; + if (qp->count.bc != qp->dq.bc || + qp->count.ic != qp->dq.ic || + qp->count.rc != qp->dq.rc) { + if (!sflag) { + dbprintf("%s quota id %d, have/exp", + s, qp->id); + if (qp->count.bc != qp->dq.bc) + dbprintf(" bc %lld/%lld", + qp->dq.bc, + qp->count.bc); + if (qp->count.ic != qp->dq.ic) + dbprintf(" ic %lld/%lld", + qp->dq.ic, + qp->count.ic); + if (qp->count.rc != qp->dq.rc) + dbprintf(" rc %lld/%lld", + qp->dq.rc, + qp->count.rc); + dbprintf("\n"); + } + error++; + } + xfree(qp); + qp = next; + } + } + xfree(qt); +} + +static void +quota_init(void) +{ + qudo = mp->m_sb.sb_uquotino != 0 && + mp->m_sb.sb_uquotino != NULLFSINO && + (mp->m_sb.sb_qflags & XFS_UQUOTA_CHKD); + qpdo = mp->m_sb.sb_pquotino != 0 && + mp->m_sb.sb_pquotino != NULLFSINO && + (mp->m_sb.sb_qflags & XFS_PQUOTA_CHKD); + if (qudo) + qudata = xcalloc(QDATA_HASH_SIZE, sizeof(qdata_t *)); + if (qpdo) + qpdata = xcalloc(QDATA_HASH_SIZE, sizeof(qdata_t *)); +} + +static void +scan_ag( + xfs_agnumber_t agno) +{ + xfs_agf_t *agf; + xfs_agi_t *agi; + int i; + xfs_sb_t tsb; + xfs_sb_t *sb=&tsb; + + agffreeblks = agflongest = 0; + agicount = agifreecount = 0; + push_cur(); + set_cur(&typtab[TYP_SB], XFS_AG_DADDR(mp, agno, XFS_SB_DADDR), 1, + DB_RING_IGN, NULL); + + if (!iocur_top->data) { + dbprintf("can't read superblock for ag %u\n", agno); + pop_cur(); + serious_error++; + return; + } + + libxfs_xlate_sb(iocur_top->data, sb, 1, ARCH_CONVERT, XFS_SB_ALL_BITS); + + if (sb->sb_magicnum != XFS_SB_MAGIC) { + if (!sflag) + dbprintf("bad sb magic # %#x in ag %u\n", + sb->sb_magicnum, agno); + error++; + } + if (!XFS_SB_GOOD_VERSION(sb)) { + if (!sflag) + dbprintf("bad sb version # %#x in ag %u\n", + sb->sb_versionnum, agno); + error++; + sbver_err++; + } + if (agno == 0 && sb->sb_inprogress != 0) { + if (!sflag) + dbprintf("mkfs not completed successfully\n"); + error++; + } + set_dbmap(agno, XFS_SB_BLOCK(mp), 1, DBM_SB, agno, XFS_SB_BLOCK(mp)); + if (sb->sb_logstart && XFS_FSB_TO_AGNO(mp, sb->sb_logstart) == agno) + set_dbmap(agno, XFS_FSB_TO_AGBNO(mp, sb->sb_logstart), + sb->sb_logblocks, DBM_LOG, agno, XFS_SB_BLOCK(mp)); + push_cur(); + set_cur(&typtab[TYP_AGF], XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR), 1, + DB_RING_IGN, NULL); + if ((agf = iocur_top->data) == NULL) { + dbprintf("can't read agf block for ag %u\n", agno); + pop_cur(); + pop_cur(); + serious_error++; + return; + } + if (INT_GET(agf->agf_magicnum, ARCH_CONVERT) != XFS_AGF_MAGIC) { + if (!sflag) + dbprintf("bad agf magic # %#x in ag %u\n", + INT_GET(agf->agf_magicnum, ARCH_CONVERT), agno); + error++; + } + if (!XFS_AGF_GOOD_VERSION(INT_GET(agf->agf_versionnum, ARCH_CONVERT))) { + if (!sflag) + dbprintf("bad agf version # %#x in ag %u\n", + INT_GET(agf->agf_versionnum, ARCH_CONVERT), agno); + error++; + } + if (XFS_SB_BLOCK(mp) != XFS_AGF_BLOCK(mp)) + set_dbmap(agno, XFS_AGF_BLOCK(mp), 1, DBM_AGF, agno, + XFS_SB_BLOCK(mp)); + if (sb->sb_agblocks > INT_GET(agf->agf_length, ARCH_CONVERT)) + set_dbmap(agno, INT_GET(agf->agf_length, ARCH_CONVERT), + sb->sb_agblocks - INT_GET(agf->agf_length, ARCH_CONVERT), + DBM_MISSING, agno, XFS_SB_BLOCK(mp)); + push_cur(); + set_cur(&typtab[TYP_AGI], XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR), 1, + DB_RING_IGN, NULL); + if ((agi = iocur_top->data) == NULL) { + dbprintf("can't read agi block for ag %u\n", agno); + serious_error++; + pop_cur(); + pop_cur(); + pop_cur(); + return; + } + if (INT_GET(agi->agi_magicnum, ARCH_CONVERT) != XFS_AGI_MAGIC) { + if (!sflag) + dbprintf("bad agi magic # %#x in ag %u\n", + INT_GET(agi->agi_magicnum, ARCH_CONVERT), agno); + error++; + } + if (!XFS_AGI_GOOD_VERSION(INT_GET(agi->agi_versionnum, ARCH_CONVERT))) { + if (!sflag) + dbprintf("bad agi version # %#x in ag %u\n", + INT_GET(agi->agi_versionnum, ARCH_CONVERT), agno); + error++; + } + if (XFS_SB_BLOCK(mp) != XFS_AGI_BLOCK(mp) && + XFS_AGF_BLOCK(mp) != XFS_AGI_BLOCK(mp)) + set_dbmap(agno, XFS_AGI_BLOCK(mp), 1, DBM_AGI, agno, + XFS_SB_BLOCK(mp)); + scan_freelist(agf); + fdblocks--; + scan_sbtree(agf, + INT_GET(agf->agf_roots[XFS_BTNUM_BNO], ARCH_CONVERT), + INT_GET(agf->agf_levels[XFS_BTNUM_BNO], ARCH_CONVERT), + 1, scanfunc_bno, TYP_BNOBT); + fdblocks--; + scan_sbtree(agf, + INT_GET(agf->agf_roots[XFS_BTNUM_CNT], ARCH_CONVERT), + INT_GET(agf->agf_levels[XFS_BTNUM_CNT], ARCH_CONVERT), + 1, scanfunc_cnt, TYP_CNTBT); + scan_sbtree(agf, + INT_GET(agi->agi_root, ARCH_CONVERT), + INT_GET(agi->agi_level, ARCH_CONVERT), + 1, scanfunc_ino, TYP_INOBT); + if (INT_GET(agf->agf_freeblks, ARCH_CONVERT) != agffreeblks) { + if (!sflag) + dbprintf("agf_freeblks %u, counted %u in ag %u\n", + INT_GET(agf->agf_freeblks, ARCH_CONVERT), + agffreeblks, agno); + error++; + } + if (INT_GET(agf->agf_longest, ARCH_CONVERT) != agflongest) { + if (!sflag) + dbprintf("agf_longest %u, counted %u in ag %u\n", + INT_GET(agf->agf_longest, ARCH_CONVERT), + agflongest, agno); + error++; + } + if (INT_GET(agi->agi_count, ARCH_CONVERT) != agicount) { + if (!sflag) + dbprintf("agi_count %u, counted %u in ag %u\n", + INT_GET(agi->agi_count, ARCH_CONVERT), + agicount, agno); + error++; + } + if (INT_GET(agi->agi_freecount, ARCH_CONVERT) != agifreecount) { + if (!sflag) + dbprintf("agi_freecount %u, counted %u in ag %u\n", + INT_GET(agi->agi_freecount, ARCH_CONVERT), + agifreecount, agno); + error++; + } + for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) { + if (INT_GET(agi->agi_unlinked[i], ARCH_CONVERT) != NULLAGINO) { + if (!sflag) { + xfs_agino_t agino=INT_GET(agi->agi_unlinked[i], ARCH_CONVERT); + dbprintf("agi unlinked bucket %d is %u in ag " + "%u (inode=%lld)\n", i, agino, agno, + XFS_AGINO_TO_INO(mp, agno, agino)); + } + error++; + } + } + pop_cur(); + pop_cur(); + pop_cur(); +} + +static void +scan_freelist( + xfs_agf_t *agf) +{ + xfs_agnumber_t seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT); + xfs_agfl_t *agfl; + xfs_agblock_t bno; + uint count; + int i; + + if (XFS_SB_BLOCK(mp) != XFS_AGFL_BLOCK(mp) && + XFS_AGF_BLOCK(mp) != XFS_AGFL_BLOCK(mp) && + XFS_AGI_BLOCK(mp) != XFS_AGFL_BLOCK(mp)) + set_dbmap(seqno, XFS_AGFL_BLOCK(mp), 1, DBM_AGFL, seqno, + XFS_SB_BLOCK(mp)); + if (INT_GET(agf->agf_flcount, ARCH_CONVERT) == 0) + return; + push_cur(); + set_cur(&typtab[TYP_AGFL], + XFS_AG_DADDR(mp, seqno, XFS_AGFL_DADDR), 1, DB_RING_IGN, NULL); + if ((agfl = iocur_top->data) == NULL) { + dbprintf("can't read agfl block for ag %u\n", seqno); + serious_error++; + return; + } + i = INT_GET(agf->agf_flfirst, ARCH_CONVERT); + count = 0; + for (;;) { + bno = INT_GET(agfl->agfl_bno[i], ARCH_CONVERT); + set_dbmap(seqno, bno, 1, DBM_FREELIST, seqno, + XFS_AGFL_BLOCK(mp)); + count++; + if (i == INT_GET(agf->agf_fllast, ARCH_CONVERT)) + break; + if (++i == XFS_AGFL_SIZE) + i = 0; + } + if (count != INT_GET(agf->agf_flcount, ARCH_CONVERT)) { + if (!sflag) + dbprintf("freeblk count %u != flcount %u in ag %u\n", + count, INT_GET(agf->agf_flcount, ARCH_CONVERT), + seqno); + error++; + } + fdblocks += count; + pop_cur(); +} + +static void +scan_lbtree( + xfs_fsblock_t root, + int nlevels, + scan_lbtree_f_t func, + dbm_t type, + inodata_t *id, + xfs_drfsbno_t *totd, + xfs_drfsbno_t *toti, + xfs_extnum_t *nex, + blkmap_t **blkmapp, + int isroot, + typnm_t btype) +{ + push_cur(); + set_cur(&typtab[btype], XFS_FSB_TO_DADDR(mp, root), blkbb, DB_RING_IGN, + NULL); + if (iocur_top->data == NULL) { + if (!sflag) + dbprintf("can't read btree block %u/%u\n", + XFS_FSB_TO_AGNO(mp, root), + XFS_FSB_TO_AGBNO(mp, root)); + error++; + return; + } + (*func)(iocur_top->data, nlevels - 1, type, root, id, totd, toti, nex, + blkmapp, isroot, btype); + pop_cur(); +} + +static void +scan_sbtree( + xfs_agf_t *agf, + xfs_agblock_t root, + int nlevels, + int isroot, + scan_sbtree_f_t func, + typnm_t btype) +{ + xfs_agnumber_t seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT); + + push_cur(); + set_cur(&typtab[btype], + XFS_AGB_TO_DADDR(mp, seqno, root), blkbb, DB_RING_IGN, NULL); + if (iocur_top->data == NULL) { + if (!sflag) + dbprintf("can't read btree block %u/%u\n", seqno, root); + error++; + return; + } + (*func)(iocur_top->data, nlevels - 1, agf, root, isroot); + pop_cur(); +} + +static void +scanfunc_bmap( + xfs_btree_lblock_t *ablock, + int level, + dbm_t type, + xfs_fsblock_t bno, + inodata_t *id, + xfs_drfsbno_t *totd, + xfs_drfsbno_t *toti, + xfs_extnum_t *nex, + blkmap_t **blkmapp, + int isroot, + typnm_t btype) +{ + xfs_agblock_t agbno; + xfs_agnumber_t agno; + xfs_bmbt_block_t *block = (xfs_bmbt_block_t *)ablock; + int i; + xfs_bmbt_ptr_t *pp; + xfs_bmbt_rec_32_t *rp; + + agno = XFS_FSB_TO_AGNO(mp, bno); + agbno = XFS_FSB_TO_AGBNO(mp, bno); + if (INT_GET(block->bb_magic, ARCH_CONVERT) != XFS_BMAP_MAGIC) { + if (!sflag || id->ilist || CHECK_BLIST(bno)) + dbprintf("bad magic # %#x in inode %lld bmbt block " + "%u/%u\n", + INT_GET(block->bb_magic, ARCH_CONVERT), id->ino, agno, agbno); + error++; + } + if (INT_GET(block->bb_level, ARCH_CONVERT) != level) { + if (!sflag || id->ilist || CHECK_BLIST(bno)) + dbprintf("expected level %d got %d in inode %lld bmbt " + "block %u/%u\n", + level, INT_GET(block->bb_level, ARCH_CONVERT), id->ino, agno, agbno); + error++; + } + set_dbmap(agno, agbno, 1, type, agno, agbno); + set_inomap(agno, agbno, 1, id); + (*toti)++; + if (level == 0) { + if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_bmap_dmxr[0] || + isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_bmap_dmnr[0]) { + if (!sflag || id->ilist || CHECK_BLIST(bno)) + dbprintf("bad btree nrecs (%u, min=%u, max=%u) " + "in inode %lld bmap block %lld\n", + INT_GET(block->bb_numrecs, ARCH_CONVERT), mp->m_bmap_dmnr[0], + mp->m_bmap_dmxr[0], id->ino, + (xfs_dfsbno_t)bno); + error++; + return; + } + rp = (xfs_bmbt_rec_32_t *) + XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, + block, 1, mp->m_bmap_dmxr[0]); + *nex += INT_GET(block->bb_numrecs, ARCH_CONVERT); + process_bmbt_reclist(rp, INT_GET(block->bb_numrecs, ARCH_CONVERT), type, id, totd, + blkmapp); + return; + } + if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_bmap_dmxr[1] || + isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_bmap_dmnr[1]) { + if (!sflag || id->ilist || CHECK_BLIST(bno)) + dbprintf("bad btree nrecs (%u, min=%u, max=%u) in " + "inode %lld bmap block %lld\n", + INT_GET(block->bb_numrecs, ARCH_CONVERT), mp->m_bmap_dmnr[1], + mp->m_bmap_dmxr[1], id->ino, (xfs_dfsbno_t)bno); + error++; + return; + } + pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, 1, + mp->m_bmap_dmxr[0]); + for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++) + scan_lbtree(INT_GET(pp[i], ARCH_CONVERT), level, scanfunc_bmap, type, id, totd, toti, + nex, blkmapp, 0, btype); +} + +static void +scanfunc_bno( + xfs_btree_sblock_t *ablock, + int level, + xfs_agf_t *agf, + xfs_agblock_t bno, + int isroot) +{ + xfs_alloc_block_t *block = (xfs_alloc_block_t *)ablock; + int i; + xfs_alloc_ptr_t *pp; + xfs_alloc_rec_t *rp; + xfs_agnumber_t seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT); + + if (INT_GET(block->bb_magic, ARCH_CONVERT) != XFS_ABTB_MAGIC) { + dbprintf("bad magic # %#x in btbno block %u/%u\n", + INT_GET(block->bb_magic, ARCH_CONVERT), seqno, bno); + serious_error++; + return; + } + fdblocks++; + if (INT_GET(block->bb_level, ARCH_CONVERT) != level) { + if (!sflag) + dbprintf("expected level %d got %d in btbno block " + "%u/%u\n", + level, INT_GET(block->bb_level, ARCH_CONVERT), seqno, bno); + error++; + } + set_dbmap(seqno, bno, 1, DBM_BTBNO, seqno, bno); + if (level == 0) { + if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_alloc_mxr[0] || + isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_alloc_mnr[0]) { + dbprintf("bad btree nrecs (%u, min=%u, max=%u) in " + "btbno block %u/%u\n", + INT_GET(block->bb_numrecs, ARCH_CONVERT), mp->m_alloc_mnr[0], + mp->m_alloc_mxr[0], seqno, bno); + serious_error++; + return; + } + rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, + 1, mp->m_alloc_mxr[0]); + for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++) { + set_dbmap(seqno, INT_GET(rp[i].ar_startblock, ARCH_CONVERT), + INT_GET(rp[i].ar_blockcount, ARCH_CONVERT), DBM_FREE1, + seqno, bno); + } + return; + } + if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_alloc_mxr[1] || + isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_alloc_mnr[1]) { + dbprintf("bad btree nrecs (%u, min=%u, max=%u) in btbno block " + "%u/%u\n", + INT_GET(block->bb_numrecs, ARCH_CONVERT), mp->m_alloc_mnr[1], + mp->m_alloc_mxr[1], seqno, bno); + serious_error++; + return; + } + pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, 1, + mp->m_alloc_mxr[1]); + for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++) + scan_sbtree(agf, INT_GET(pp[i], ARCH_CONVERT), level, 0, scanfunc_bno, TYP_BNOBT); +} + +static void +scanfunc_cnt( + xfs_btree_sblock_t *ablock, + int level, + xfs_agf_t *agf, + xfs_agblock_t bno, + int isroot) +{ + xfs_alloc_block_t *block = (xfs_alloc_block_t *)ablock; + xfs_agnumber_t seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT); + int i; + xfs_alloc_ptr_t *pp; + xfs_alloc_rec_t *rp; + + if (INT_GET(block->bb_magic, ARCH_CONVERT) != XFS_ABTC_MAGIC) { + dbprintf("bad magic # %#x in btcnt block %u/%u\n", + INT_GET(block->bb_magic, ARCH_CONVERT), seqno, bno); + serious_error++; + return; + } + fdblocks++; + if (INT_GET(block->bb_level, ARCH_CONVERT) != level) { + if (!sflag) + dbprintf("expected level %d got %d in btcnt block " + "%u/%u\n", + level, INT_GET(block->bb_level, ARCH_CONVERT), seqno, bno); + error++; + } + set_dbmap(seqno, bno, 1, DBM_BTCNT, seqno, bno); + if (level == 0) { + if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_alloc_mxr[0] || + isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_alloc_mnr[0]) { + dbprintf("bad btree nrecs (%u, min=%u, max=%u) in " + "btbno block %u/%u\n", + INT_GET(block->bb_numrecs, ARCH_CONVERT), mp->m_alloc_mnr[0], + mp->m_alloc_mxr[0], seqno, bno); + serious_error++; + return; + } + rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, + 1, mp->m_alloc_mxr[0]); + for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++) { + check_set_dbmap(seqno, INT_GET(rp[i].ar_startblock, ARCH_CONVERT), + INT_GET(rp[i].ar_blockcount, ARCH_CONVERT), DBM_FREE1, DBM_FREE2, + seqno, bno); + fdblocks += INT_GET(rp[i].ar_blockcount, ARCH_CONVERT); + agffreeblks += INT_GET(rp[i].ar_blockcount, ARCH_CONVERT); + if (INT_GET(rp[i].ar_blockcount, ARCH_CONVERT) > agflongest) + agflongest = INT_GET(rp[i].ar_blockcount, ARCH_CONVERT); + } + return; + } + if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_alloc_mxr[1] || + isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_alloc_mnr[1]) { + dbprintf("bad btree nrecs (%u, min=%u, max=%u) in btbno block " + "%u/%u\n", + INT_GET(block->bb_numrecs, ARCH_CONVERT), mp->m_alloc_mnr[1], + mp->m_alloc_mxr[1], seqno, bno); + serious_error++; + return; + } + pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, 1, + mp->m_alloc_mxr[1]); + for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++) + scan_sbtree(agf, INT_GET(pp[i], ARCH_CONVERT), level, 0, scanfunc_cnt, TYP_CNTBT); +} + +static void +scanfunc_ino( + xfs_btree_sblock_t *ablock, + int level, + xfs_agf_t *agf, + xfs_agblock_t bno, + int isroot) +{ + xfs_agino_t agino; + xfs_inobt_block_t *block = (xfs_inobt_block_t *)ablock; + xfs_agnumber_t seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT); + int i; + int isfree; + int j; + int nfree; + int off; + xfs_inobt_ptr_t *pp; + xfs_inobt_rec_t *rp; + + if (INT_GET(block->bb_magic, ARCH_CONVERT) != XFS_IBT_MAGIC) { + dbprintf("bad magic # %#x in inobt block %u/%u\n", + INT_GET(block->bb_magic, ARCH_CONVERT), seqno, bno); + serious_error++; + return; + } + if (INT_GET(block->bb_level, ARCH_CONVERT) != level) { + if (!sflag) + dbprintf("expected level %d got %d in inobt block " + "%u/%u\n", + level, INT_GET(block->bb_level, ARCH_CONVERT), seqno, bno); + error++; + } + set_dbmap(seqno, bno, 1, DBM_BTINO, seqno, bno); + if (level == 0) { + if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_inobt_mxr[0] || + isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_inobt_mnr[0]) { + dbprintf("bad btree nrecs (%u, min=%u, max=%u) in " + "inobt block %u/%u\n", + INT_GET(block->bb_numrecs, ARCH_CONVERT), mp->m_inobt_mnr[0], + mp->m_inobt_mxr[0], seqno, bno); + serious_error++; + return; + } + rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_inobt, block, + 1, mp->m_inobt_mxr[0]); + for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++) { + agino = INT_GET(rp[i].ir_startino, ARCH_CONVERT); + off = XFS_INO_TO_OFFSET(mp, agino); + if (off == 0) { + if ((sbversion & XFS_SB_VERSION_ALIGNBIT) && + mp->m_sb.sb_inoalignmt && + (XFS_INO_TO_AGBNO(mp, agino) % + mp->m_sb.sb_inoalignmt)) + sbversion &= ~XFS_SB_VERSION_ALIGNBIT; + set_dbmap(seqno, XFS_AGINO_TO_AGBNO(mp, agino), + (xfs_extlen_t)MAX(1, + XFS_INODES_PER_CHUNK >> + mp->m_sb.sb_inopblog), + DBM_INODE, seqno, bno); + } + icount += XFS_INODES_PER_CHUNK; + agicount += XFS_INODES_PER_CHUNK; + ifree += INT_GET(rp[i].ir_freecount, ARCH_CONVERT); + agifreecount += INT_GET(rp[i].ir_freecount, ARCH_CONVERT); + push_cur(); + set_cur(&typtab[TYP_INODE], + XFS_AGB_TO_DADDR(mp, seqno, + XFS_AGINO_TO_AGBNO(mp, agino)), + (int)XFS_FSB_TO_BB(mp, XFS_IALLOC_BLOCKS(mp)), + DB_RING_IGN, NULL); + if (iocur_top->data == NULL) { + if (!sflag) + dbprintf("can't read inode block " + "%u/%u\n", + seqno, + XFS_AGINO_TO_AGBNO(mp, agino)); + error++; + continue; + } + for (j = 0, nfree = 0; j < XFS_INODES_PER_CHUNK; j++) { + if (isfree = XFS_INOBT_IS_FREE(&rp[i], j, ARCH_CONVERT)) + nfree++; + process_inode(agf, agino + j, + (xfs_dinode_t *)((char *)iocur_top->data + ((off + j) << mp->m_sb.sb_inodelog)), + isfree); + } + if (nfree != INT_GET(rp[i].ir_freecount, ARCH_CONVERT)) { + if (!sflag) + dbprintf("ir_freecount/free mismatch, " + "inode chunk %u/%u, freecount " + "%d nfree %d\n", + seqno, agino, + INT_GET(rp[i].ir_freecount, ARCH_CONVERT), nfree); + error++; + } + pop_cur(); + } + return; + } + if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_inobt_mxr[1] || + isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_inobt_mnr[1]) { + dbprintf("bad btree nrecs (%u, min=%u, max=%u) in inobt block " + "%u/%u\n", + INT_GET(block->bb_numrecs, ARCH_CONVERT), mp->m_inobt_mnr[1], + mp->m_inobt_mxr[1], seqno, bno); + serious_error++; + return; + } + pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_inobt, block, 1, + mp->m_inobt_mxr[1]); + for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++) + scan_sbtree(agf, INT_GET(pp[i], ARCH_CONVERT), level, 0, scanfunc_ino, TYP_INOBT); +} + +static void +set_dbmap( + xfs_agnumber_t agno, + xfs_agblock_t agbno, + xfs_extlen_t len, + dbm_t type, + xfs_agnumber_t c_agno, + xfs_agblock_t c_agbno) +{ + check_set_dbmap(agno, agbno, len, DBM_UNKNOWN, type, c_agno, c_agbno); +} + +static void +set_inomap( + xfs_agnumber_t agno, + xfs_agblock_t agbno, + xfs_extlen_t len, + inodata_t *id) +{ + xfs_extlen_t i; + inodata_t **idp; + int mayprint; + + if (!check_inomap(agno, agbno, len, id->ino)) + return; + mayprint = verbose | id->ilist | blist_size; + for (i = 0, idp = &inomap[agno][agbno]; i < len; i++, idp++) { + *idp = id; + if (mayprint && + (verbose || id->ilist || CHECK_BLISTA(agno, agbno + i))) + dbprintf("setting inode to %lld for block %u/%u\n", + id->ino, agno, agbno + i); + } +} + +static void +set_rdbmap( + xfs_drfsbno_t bno, + xfs_extlen_t len, + dbm_t type) +{ + check_set_rdbmap(bno, len, DBM_UNKNOWN, type); +} + +static void +set_rinomap( + xfs_drfsbno_t bno, + xfs_extlen_t len, + inodata_t *id) +{ + xfs_extlen_t i; + inodata_t **idp; + int mayprint; + + if (!check_rinomap(bno, len, id->ino)) + return; + mayprint = verbose | id->ilist | blist_size; + for (i = 0, idp = &inomap[mp->m_sb.sb_agcount][bno]; + i < len; + i++, idp++) { + *idp = id; + if (mayprint && (verbose || id->ilist || CHECK_BLIST(bno + i))) + dbprintf("setting inode to %lld for rtblock %llu\n", + id->ino, bno + i); + } +} + +static void +setlink_inode( + inodata_t *id, + nlink_t nlink, + int isdir, + int security) +{ + id->link_set = nlink; + id->isdir = isdir; + id->security = security; + if (verbose || id->ilist) + dbprintf("inode %lld nlink %u %s dir\n", id->ino, nlink, + isdir ? "is" : "not"); +} diff --git a/db/check.h b/db/check.h new file mode 100644 index 000000000..71d557688 --- /dev/null +++ b/db/check.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +extern void check_init(void); diff --git a/db/cntbt.c b/db/cntbt.c new file mode 100644 index 000000000..5e4a6dafb --- /dev/null +++ b/db/cntbt.c @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "data.h" +#include "type.h" +#include "faddr.h" +#include "fprint.h" +#include "field.h" +#include "cntbt.h" +#include "print.h" +#include "bit.h" +#include "mount.h" + +static int cntbt_key_count(void *obj, int startoff); +static int cntbt_key_offset(void *obj, int startoff, int idx); +static int cntbt_ptr_count(void *obj, int startoff); +static int cntbt_ptr_offset(void *obj, int startoff, int idx); +static int cntbt_rec_count(void *obj, int startoff); +static int cntbt_rec_offset(void *obj, int startoff, int idx); + +const field_t cntbt_hfld[] = { + { "", FLDT_CNTBT, OI(0), C1, 0, TYP_NONE }, + { NULL } +}; + +#define OFF(f) bitize(offsetof(xfs_alloc_block_t, bb_ ## f)) +const field_t cntbt_flds[] = { + { "magic", FLDT_UINT32X, OI(OFF(magic)), C1, 0, TYP_NONE }, + { "level", FLDT_UINT16D, OI(OFF(level)), C1, 0, TYP_NONE }, + { "numrecs", FLDT_UINT16D, OI(OFF(numrecs)), C1, 0, TYP_NONE }, + { "leftsib", FLDT_AGBLOCK, OI(OFF(leftsib)), C1, 0, TYP_CNTBT }, + { "rightsib", FLDT_AGBLOCK, OI(OFF(rightsib)), C1, 0, TYP_CNTBT }, + { "recs", FLDT_CNTBTREC, cntbt_rec_offset, cntbt_rec_count, + FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE }, + { "keys", FLDT_CNTBTKEY, cntbt_key_offset, cntbt_key_count, + FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE }, + { "ptrs", FLDT_CNTBTPTR, cntbt_ptr_offset, cntbt_ptr_count, + FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_CNTBT }, + { NULL } +}; + +#define KOFF(f) bitize(offsetof(xfs_alloc_key_t, ar_ ## f)) +const field_t cntbt_key_flds[] = { + { "blockcount", FLDT_EXTLEN, OI(KOFF(blockcount)), C1, 0, TYP_NONE }, + { "startblock", FLDT_AGBLOCK, OI(KOFF(startblock)), C1, 0, TYP_DATA }, + { NULL } +}; + +#define ROFF(f) bitize(offsetof(xfs_alloc_rec_t, ar_ ## f)) +const field_t cntbt_rec_flds[] = { + { "startblock", FLDT_AGBLOCK, OI(ROFF(startblock)), C1, 0, TYP_DATA }, + { "blockcount", FLDT_EXTLEN, OI(ROFF(blockcount)), C1, 0, TYP_NONE }, + { NULL } +}; + +/*ARGSUSED*/ +static int +cntbt_key_count( + void *obj, + int startoff) +{ + xfs_alloc_block_t *block; + + ASSERT(startoff == 0); + block = obj; + if (INT_GET(block->bb_level, ARCH_CONVERT) == 0) + return 0; + return INT_GET(block->bb_numrecs, ARCH_CONVERT); +} + +/*ARGSUSED*/ +static int +cntbt_key_offset( + void *obj, + int startoff, + int idx) +{ + xfs_alloc_block_t *block; + xfs_alloc_key_t *kp; + + ASSERT(startoff == 0); + block = obj; + ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0); + kp = XFS_BTREE_KEY_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, idx, + XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_alloc, 0)); + return bitize((int)((char *)kp - (char *)block)); +} + +/*ARGSUSED*/ +static int +cntbt_ptr_count( + void *obj, + int startoff) +{ + xfs_alloc_block_t *block; + + ASSERT(startoff == 0); + block = obj; + if (INT_GET(block->bb_level, ARCH_CONVERT) == 0) + return 0; + return INT_GET(block->bb_numrecs, ARCH_CONVERT); +} + +/*ARGSUSED*/ +static int +cntbt_ptr_offset( + void *obj, + int startoff, + int idx) +{ + xfs_alloc_block_t *block; + xfs_alloc_ptr_t *pp; + + ASSERT(startoff == 0); + block = obj; + ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0); + pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, idx, + XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_alloc, 0)); + return bitize((int)((char *)pp - (char *)block)); +} + +/*ARGSUSED*/ +static int +cntbt_rec_count( + void *obj, + int startoff) +{ + xfs_alloc_block_t *block; + + ASSERT(startoff == 0); + block = obj; + if (INT_GET(block->bb_level, ARCH_CONVERT) > 0) + return 0; + return INT_GET(block->bb_numrecs, ARCH_CONVERT); +} + +/*ARGSUSED*/ +static int +cntbt_rec_offset( + void *obj, + int startoff, + int idx) +{ + xfs_alloc_block_t *block; + xfs_alloc_rec_t *rp; + + ASSERT(startoff == 0); + block = obj; + ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) == 0); + rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, idx, + XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_alloc, 1)); + return bitize((int)((char *)rp - (char *)block)); +} + +/*ARGSUSED*/ +int +cntbt_size( + void *obj, + int startoff, + int idx) +{ + return bitize(mp->m_sb.sb_blocksize); +} diff --git a/db/cntbt.h b/db/cntbt.h new file mode 100644 index 000000000..768150385 --- /dev/null +++ b/db/cntbt.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +struct field; + +extern const struct field cntbt_flds[]; +extern const struct field cntbt_hfld[]; +extern const struct field cntbt_key_flds[]; +extern const struct field cntbt_rec_flds[]; + +extern int cntbt_size(void *obj, int startoff, int idx); diff --git a/db/command.c b/db/command.c new file mode 100644 index 000000000..5c8153a66 --- /dev/null +++ b/db/command.c @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include +#include "addr.h" +#include "agf.h" +#include "agfl.h" +#include "agi.h" +#include "block.h" +#include "bmap.h" +#include "check.h" +#include "command.h" +#include "convert.h" +#include "debug.h" +#include "type.h" +#include "echo.h" +#include "faddr.h" +#include "fprint.h" +#include "field.h" +#include "frag.h" +#include "freesp.h" +#include "help.h" +#include "hash.h" +#include "inode.h" +#include "input.h" +#include "io.h" +#include "output.h" +#include "print.h" +#include "quit.h" +#include "sb.h" +#include "uuid.h" +#include "write.h" +#include "malloc.h" +#include "dquot.h" + +cmdinfo_t *cmdtab; +int ncmds; + +static int cmd_compare(const void *a, const void *b); + +static int +cmd_compare(const void *a, const void *b) +{ + return strcmp(((const cmdinfo_t *)a)->name, + ((const cmdinfo_t *)b)->name); +} + +void +add_command( + const cmdinfo_t *ci) +{ + cmdtab = xrealloc((void *)cmdtab, ++ncmds * sizeof(*cmdtab)); + cmdtab[ncmds - 1] = *ci; + qsort(cmdtab, ncmds, sizeof(*cmdtab), cmd_compare); +} + +int +command( + int argc, + char **argv) +{ + char *cmd; + const cmdinfo_t *ct; + + cmd = argv[0]; + ct = find_command(cmd); + if (ct == NULL) { + dbprintf("command %s not found\n", cmd); + return 0; + } + if (argc-1 < ct->argmin || (ct->argmax != -1 && argc-1 > ct->argmax)) { + dbprintf("bad argument count %d to %s, expected ", argc-1, cmd); + if (ct->argmax == -1) + dbprintf("at least %d", ct->argmin); + else if (ct->argmin == ct->argmax) + dbprintf("%d", ct->argmin); + else + dbprintf("between %d and %d", ct->argmin, ct->argmax); + dbprintf(" arguments\n"); + return 0; + } + optind = 0; + return ct->cfunc(argc, argv); +} + +const cmdinfo_t * +find_command( + const char *cmd) +{ + cmdinfo_t *ct; + + for (ct = cmdtab; ct < &cmdtab[ncmds]; ct++) { + if (strcmp(ct->name, cmd) == 0 || + (ct->altname && strcmp(ct->altname, cmd) == 0)) + return (const cmdinfo_t *)ct; + } + return NULL; +} + +void +init_commands(void) +{ + addr_init(); + agf_init(); + agfl_init(); + agi_init(); + block_init(); + bmap_init(); + check_init(); + convert_init(); + debug_init(); + echo_init(); + frag_init(); + freesp_init(); + help_init(); + hash_init(); + inode_init(); + input_init(); + io_init(); + output_init(); + print_init(); + quit_init(); + sb_init(); + uuid_init(); + type_init(); + write_init(); + dquot_init(); +} diff --git a/db/command.h b/db/command.h new file mode 100644 index 000000000..dd35ed627 --- /dev/null +++ b/db/command.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +typedef int (*cfunc_t)(int argc, char **argv); +typedef void (*helpfunc_t)(void); + +typedef struct cmdinfo +{ + const char *name; + const char *altname; + cfunc_t cfunc; + int argmin; + int argmax; + int canpush; + const char *args; + const char *oneline; + helpfunc_t help; +} cmdinfo_t; + +extern cmdinfo_t *cmdtab; +extern int ncmds; + +extern void add_command(const cmdinfo_t *ci); +extern int command(int argc, char **argv); +extern const cmdinfo_t *find_command(const char *cmd); +extern void init_commands(void); diff --git a/db/convert.c b/db/convert.c new file mode 100644 index 000000000..02a4d24fe --- /dev/null +++ b/db/convert.c @@ -0,0 +1,340 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "command.h" +#include "data.h" +#include "convert.h" +#include "output.h" +#include "mount.h" + +#define M(A) (1 << CT_ ## A) +#define agblock_to_bytes(x) \ + ((__uint64_t)(x) << mp->m_sb.sb_blocklog) +#define agino_to_bytes(x) \ + ((__uint64_t)(x) << mp->m_sb.sb_inodelog) +#define agnumber_to_bytes(x) \ + agblock_to_bytes((__uint64_t)(x) * mp->m_sb.sb_agblocks) +#define daddr_to_bytes(x) \ + ((__uint64_t)(x) << BBSHIFT) +#define fsblock_to_bytes(x) \ + (agnumber_to_bytes(XFS_FSB_TO_AGNO(mp, (x))) + \ + agblock_to_bytes(XFS_FSB_TO_AGBNO(mp, (x)))) +#define ino_to_bytes(x) \ + (agnumber_to_bytes(XFS_INO_TO_AGNO(mp, (x))) + \ + agino_to_bytes(XFS_INO_TO_AGINO(mp, (x)))) +#define inoidx_to_bytes(x) \ + ((__uint64_t)(x) << mp->m_sb.sb_inodelog) + +typedef enum { + CT_NONE = -1, + CT_AGBLOCK, /* xfs_agblock_t */ + CT_AGINO, /* xfs_agino_t */ + CT_AGNUMBER, /* xfs_agno_t */ + CT_BBOFF, /* byte offset in daddr */ + CT_BLKOFF, /* byte offset in fsb/agb */ + CT_BYTE, /* byte in filesystem */ + CT_DADDR, /* daddr_t */ + CT_FSBLOCK, /* xfs_fsblock_t */ + CT_INO, /* xfs_ino_t */ + CT_INOIDX, /* index of inode in fsblock */ + CT_INOOFF, /* byte offset in inode */ + NCTS +} ctype_t; + +typedef struct ctydesc { + ctype_t ctype; + int allowed; + const char **names; +} ctydesc_t; + +typedef union { + xfs_agblock_t agblock; + xfs_agino_t agino; + xfs_agnumber_t agnumber; + int bboff; + int blkoff; + __uint64_t byte; + xfs_daddr_t daddr; + xfs_fsblock_t fsblock; + xfs_ino_t ino; + int inoidx; + int inooff; +} cval_t; + +static __uint64_t bytevalue(ctype_t ctype, cval_t *val); +static int convert_f(int argc, char **argv); +static int getvalue(char *s, ctype_t ctype, cval_t *val); +static ctype_t lookupcty(char *ctyname); + +static const char *agblock_names[] = { "agblock", "agbno", NULL }; +static const char *agino_names[] = { "agino", "aginode", NULL }; +static const char *agnumber_names[] = { "agnumber", "agno", NULL }; +static const char *bboff_names[] = { "bboff", "daddroff", NULL }; +static const char *blkoff_names[] = { "blkoff", "fsboff", "agboff", + NULL }; +static const char *byte_names[] = { "byte", "fsbyte", NULL }; +static const char *daddr_names[] = { "daddr", "bb", NULL }; +static const char *fsblock_names[] = { "fsblock", "fsb", "fsbno", NULL }; +static const char *ino_names[] = { "ino", "inode", NULL }; +static const char *inoidx_names[] = { "inoidx", "offset", NULL }; +static const char *inooff_names[] = { "inooff", "inodeoff", NULL }; + +static const ctydesc_t ctydescs[NCTS] = { + { CT_AGBLOCK, M(AGNUMBER)|M(BBOFF)|M(BLKOFF)|M(INOIDX)|M(INOOFF), + agblock_names }, + { CT_AGINO, M(AGNUMBER)|M(INOOFF), agino_names }, + { CT_AGNUMBER, + M(AGBLOCK)|M(AGINO)|M(BBOFF)|M(BLKOFF)|M(INOIDX)|M(INOOFF), + agnumber_names }, + { CT_BBOFF, M(AGBLOCK)|M(AGNUMBER)|M(DADDR)|M(FSBLOCK), bboff_names }, + { CT_BLKOFF, M(AGBLOCK)|M(AGNUMBER)|M(FSBLOCK), blkoff_names }, + { CT_BYTE, 0, byte_names }, + { CT_DADDR, M(BBOFF), daddr_names }, + { CT_FSBLOCK, M(BBOFF)|M(BLKOFF)|M(INOIDX), fsblock_names }, + { CT_INO, M(INOOFF), ino_names }, + { CT_INOIDX, M(AGBLOCK)|M(AGNUMBER)|M(FSBLOCK)|M(INOOFF), + inoidx_names }, + { CT_INOOFF, + M(AGBLOCK)|M(AGINO)|M(AGNUMBER)|M(FSBLOCK)|M(INO)|M(INOIDX), + inooff_names }, +}; + +static const cmdinfo_t convert_cmd = + { "convert", NULL, convert_f, 3, 9, 0, "type num [type num]... type", + "convert from one address form to another", NULL }; + +static __uint64_t +bytevalue(ctype_t ctype, cval_t *val) +{ + switch (ctype) { + case CT_AGBLOCK: + return agblock_to_bytes(val->agblock); + case CT_AGINO: + return agino_to_bytes(val->agino); + case CT_AGNUMBER: + return agnumber_to_bytes(val->agnumber); + case CT_BBOFF: + return (__uint64_t)val->bboff; + case CT_BLKOFF: + return (__uint64_t)val->blkoff; + case CT_BYTE: + return val->byte; + case CT_DADDR: + return daddr_to_bytes(val->daddr); + case CT_FSBLOCK: + return fsblock_to_bytes(val->fsblock); + case CT_INO: + return ino_to_bytes(val->ino); + case CT_INOIDX: + return inoidx_to_bytes(val->inoidx); + case CT_INOOFF: + return (__uint64_t)val->inooff; + case CT_NONE: + case NCTS: + } + /* NOTREACHED */ + return 0; +} + +static int +convert_f(int argc, char **argv) +{ + ctype_t c; + int conmask; + cval_t cvals[NCTS]; + int i; + int mask; + __uint64_t v; + ctype_t wtype; + + /* move past the "convert" command */ + argc--; + argv++; + + if ((argc % 2) != 1) { + dbprintf("bad argument count %d to convert, expected 3,5,7,9 " + "arguments\n", argc); + return 0; + } + if ((wtype = lookupcty(argv[argc - 1])) == CT_NONE) { + dbprintf("unknown conversion type %s\n", argv[argc - 1]); + return 0; + } + + for (i = mask = conmask = 0; i < (argc - 1) / 2; i++) { + c = lookupcty(argv[i * 2]); + if (c == CT_NONE) { + dbprintf("unknown conversion type %s\n", argv[i * 2]); + return 0; + } + if (c == wtype) { + dbprintf("result type same as argument\n"); + return 0; + } + if (conmask & (1 << c)) { + dbprintf("conflicting conversion type %s\n", + argv[i * 2]); + return 0; + } + if (!getvalue(argv[i * 2 + 1], c, &cvals[c])) + return 0; + mask |= 1 << c; + conmask |= ~ctydescs[c].allowed; + } + if (cur_agno != NULLAGNUMBER && (conmask & M(AGNUMBER)) == 0) { + cvals[CT_AGNUMBER].agnumber = cur_agno; + mask |= M(AGNUMBER); + conmask |= ~ctydescs[CT_AGNUMBER].allowed; + } + v = 0; + for (c = (ctype_t)0; c < NCTS; c++) { + if (!(mask & (1 << c))) + continue; + v += bytevalue(c, &cvals[c]); + } + switch (wtype) { + case CT_AGBLOCK: + v = XFS_DADDR_TO_AGBNO(mp, v >> BBSHIFT); + break; + case CT_AGINO: + v = (v >> mp->m_sb.sb_inodelog) % + (mp->m_sb.sb_agblocks << mp->m_sb.sb_inopblog); + break; + case CT_AGNUMBER: + v = XFS_DADDR_TO_AGNO(mp, v >> BBSHIFT); + break; + case CT_BBOFF: + v &= BBMASK; + break; + case CT_BLKOFF: + v &= mp->m_blockmask; + break; + case CT_BYTE: + break; + case CT_DADDR: + v >>= BBSHIFT; + break; + case CT_FSBLOCK: + v = XFS_DADDR_TO_FSB(mp, v >> BBSHIFT); + break; + case CT_INO: + v = XFS_AGINO_TO_INO(mp, XFS_DADDR_TO_AGNO(mp, v >> BBSHIFT), + (v >> mp->m_sb.sb_inodelog) % + (mp->m_sb.sb_agblocks << mp->m_sb.sb_inopblog)); + break; + case CT_INOIDX: + v = (v >> mp->m_sb.sb_inodelog) & (mp->m_sb.sb_inopblock - 1); + break; + case CT_INOOFF: + v &= mp->m_sb.sb_inodesize - 1; + break; + case CT_NONE: + case NCTS: + /* NOTREACHED */ + } + dbprintf("0x%llx (%llu)\n", v, v); + return 0; +} + +void +convert_init(void) +{ + add_command(&convert_cmd); +} + +static int +getvalue(char *s, ctype_t ctype, cval_t *val) +{ + char *p; + __uint64_t v; + + v = strtoull(s, &p, 0); + if (*p != '\0') { + dbprintf("%s is not a number\n", s); + return 0; + } + switch (ctype) { + case CT_AGBLOCK: + val->agblock = (xfs_agblock_t)v; + break; + case CT_AGINO: + val->agino = (xfs_agino_t)v; + break; + case CT_AGNUMBER: + val->agnumber = (xfs_agnumber_t)v; + break; + case CT_BBOFF: + val->bboff = (int)v; + break; + case CT_BLKOFF: + val->blkoff = (int)v; + break; + case CT_BYTE: + val->byte = (__uint64_t)v; + break; + case CT_DADDR: + val->daddr = (xfs_daddr_t)v; + break; + case CT_FSBLOCK: + val->fsblock = (xfs_fsblock_t)v; + break; + case CT_INO: + val->ino = (xfs_ino_t)v; + break; + case CT_INOIDX: + val->inoidx = (int)v; + break; + case CT_INOOFF: + val->inooff = (int)v; + break; + case CT_NONE: + case NCTS: + /* NOTREACHED */ + } + return 1; +} + +static ctype_t +lookupcty(char *ctyname) +{ + ctype_t cty; + const char **name; + + for (cty = (ctype_t)0; cty < NCTS; cty++) { + for (name = ctydescs[cty].names; *name; name++) { + if (strcmp(ctyname, *name) == 0) + return cty; + } + } + return CT_NONE; +} diff --git a/db/convert.h b/db/convert.h new file mode 100644 index 000000000..0ddbca4d1 --- /dev/null +++ b/db/convert.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +extern void convert_init(void); diff --git a/db/data.c b/db/data.c new file mode 100644 index 000000000..c53a5bbc4 --- /dev/null +++ b/db/data.c @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "data.h" + +int blkbb; +xfs_agnumber_t cur_agno = NULLAGNUMBER; +int exitcode; +int flag_expert_mode = 0; +int flag_readonly = 0; +libxfs_init_t xfsargs; diff --git a/db/data.h b/db/data.h new file mode 100644 index 000000000..77e51ffbc --- /dev/null +++ b/db/data.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +extern int blkbb; +extern xfs_agnumber_t cur_agno; +extern int exitcode; +extern int flag_expert_mode; +extern int flag_readonly; +extern int flag_arch; +extern libxfs_init_t xfsargs; diff --git a/db/dbread.c b/db/dbread.c new file mode 100644 index 000000000..95032ba3b --- /dev/null +++ b/db/dbread.c @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "bmap.h" +#include "data.h" +#include "dbread.h" +#include "io.h" +#include "mount.h" + +int +dbread(void *buf, int nblocks, xfs_fileoff_t bno, int whichfork) +{ + bmap_ext_t bm; + char *bp; + xfs_dfiloff_t eb; + xfs_dfiloff_t end; + int i; + int nex; + + nex = 1; + end = bno + nblocks; + bp = buf; + while (bno < end) { + bmap(bno, end - bno, whichfork, &nex, &bm); + if (nex == 0) { + bm.startoff = end; + bm.blockcount = 1; + } + if (bm.startoff > bno) { + eb = end < bm.startoff ? end : bm.startoff; + i = (int)XFS_FSB_TO_B(mp, eb - bno); + memset(bp, 0, i); + bp += i; + bno = eb; + } + if (bno == end) + break; + if (bno > bm.startoff) { + bm.blockcount -= bno - bm.startoff; + bm.startblock += bno - bm.startoff; + bm.startoff = bno; + } + if (bm.startoff + bm.blockcount > end) + bm.blockcount = end - bm.startoff; + i = read_bbs(XFS_FSB_TO_DADDR(mp, bm.startblock), + (int)XFS_FSB_TO_BB(mp, bm.blockcount), + (void **)&bp, NULL); + if (i) + return i; + bp += XFS_FSB_TO_B(mp, bm.blockcount); + bno += bm.blockcount; + } + return 0; +} diff --git a/db/dbread.h b/db/dbread.h new file mode 100644 index 000000000..cab33971f --- /dev/null +++ b/db/dbread.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +extern int dbread(void *buf, int nblocks, xfs_fileoff_t bno, + int whichfork); diff --git a/db/debug.c b/db/debug.c new file mode 100644 index 000000000..093079d23 --- /dev/null +++ b/db/debug.c @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "command.h" +#include "debug.h" +#include "output.h" + +static int debug_f(int argc, char **argv); + +static const cmdinfo_t debug_cmd = + { "debug", NULL, debug_f, 0, 1, 0, "[flagbits]", + "set debug option bits", NULL }; + +long debug_state; + +static int +debug_f( + int argc, + char **argv) +{ + char *p; + + if (argc > 1) { + debug_state = strtol(argv[1], &p, 0); + if (*p != '\0') { + dbprintf("bad value for debug %s\n", argv[1]); + return 0; + } + } + dbprintf("debug = %ld\n", debug_state); + return 0; +} + +void +debug_init(void) +{ + add_command(&debug_cmd); +} diff --git a/db/debug.h b/db/debug.h new file mode 100644 index 000000000..1224b5953 --- /dev/null +++ b/db/debug.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#define DEBUG_FLIST 0x1 + +extern long debug_state; +extern void debug_init(void); diff --git a/db/dir.c b/db/dir.c new file mode 100644 index 000000000..32b03fa6c --- /dev/null +++ b/db/dir.c @@ -0,0 +1,270 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "bit.h" +#include "type.h" +#include "faddr.h" +#include "fprint.h" +#include "field.h" +#include "dir.h" +#include "io.h" +#include "data.h" +#include "mount.h" + +static int dir_leaf_entries_count(void *obj, int startoff); +static int dir_leaf_hdr_count(void *obj, int startoff); +static int dir_leaf_name_count(void *obj, int startoff); +static int dir_leaf_namelist_count(void *obj, int startoff); +static int dir_leaf_namelist_offset(void *obj, int startoff, int idx); +static int dir_node_btree_count(void *obj, int startoff); +static int dir_node_hdr_count(void *obj, int startoff); + +const field_t dir_hfld[] = { + { "", FLDT_DIR, OI(0), C1, 0, TYP_NONE }, + { NULL } +}; + +#define LOFF(f) bitize(offsetof(xfs_dir_leafblock_t, f)) +#define NOFF(f) bitize(offsetof(xfs_da_intnode_t, f)) +const field_t dir_flds[] = { + { "lhdr", FLDT_DIR_LEAF_HDR, OI(LOFF(hdr)), dir_leaf_hdr_count, + FLD_COUNT, TYP_NONE }, + { "nhdr", FLDT_DIR_NODE_HDR, OI(NOFF(hdr)), dir_node_hdr_count, + FLD_COUNT, TYP_NONE }, + { "entries", FLDT_DIR_LEAF_ENTRY, OI(LOFF(entries)), + dir_leaf_entries_count, FLD_ARRAY|FLD_COUNT, TYP_NONE }, + { "btree", FLDT_DIR_NODE_ENTRY, OI(NOFF(btree)), + dir_node_btree_count, FLD_ARRAY|FLD_COUNT, TYP_NONE }, + { "namelist", FLDT_DIR_LEAF_NAME, dir_leaf_namelist_offset, + dir_leaf_namelist_count, FLD_ARRAY|FLD_OFFSET|FLD_COUNT, TYP_NONE }, + { NULL } +}; + +#define BOFF(f) bitize(offsetof(xfs_da_blkinfo_t, f)) +const field_t dir_blkinfo_flds[] = { + { "forw", FLDT_DIRBLOCK, OI(BOFF(forw)), C1, 0, TYP_INODATA }, + { "back", FLDT_DIRBLOCK, OI(BOFF(back)), C1, 0, TYP_INODATA }, + { "magic", FLDT_UINT16X, OI(BOFF(magic)), C1, 0, TYP_NONE }, + { "pad", FLDT_UINT16X, OI(BOFF(pad)), C1, FLD_SKIPALL, TYP_NONE }, + { NULL } +}; + +#define LEOFF(f) bitize(offsetof(xfs_dir_leaf_entry_t, f)) +const field_t dir_leaf_entry_flds[] = { + { "hashval", FLDT_UINT32X, OI(LEOFF(hashval)), C1, 0, TYP_NONE }, + { "nameidx", FLDT_UINT16D, OI(LEOFF(nameidx)), C1, 0, TYP_NONE }, + { "namelen", FLDT_UINT8D, OI(LEOFF(namelen)), C1, 0, TYP_NONE }, + { "pad2", FLDT_UINT8X, OI(LEOFF(pad2)), C1, FLD_SKIPALL, TYP_NONE }, + { NULL } +}; + +#define LHOFF(f) bitize(offsetof(xfs_dir_leaf_hdr_t, f)) +const field_t dir_leaf_hdr_flds[] = { + { "info", FLDT_DIR_BLKINFO, OI(LHOFF(info)), C1, 0, TYP_NONE }, + { "count", FLDT_UINT16D, OI(LHOFF(count)), C1, 0, TYP_NONE }, + { "namebytes", FLDT_UINT16D, OI(LHOFF(namebytes)), C1, 0, TYP_NONE }, + { "firstused", FLDT_UINT16D, OI(LHOFF(firstused)), C1, 0, TYP_NONE }, + { "holes", FLDT_UINT8D, OI(LHOFF(holes)), C1, 0, TYP_NONE }, + { "pad1", FLDT_UINT8X, OI(LHOFF(pad1)), C1, FLD_SKIPALL, TYP_NONE }, + { "freemap", FLDT_DIR_LEAF_MAP, OI(LHOFF(freemap)), + CI(XFS_DIR_LEAF_MAPSIZE), FLD_ARRAY, TYP_NONE }, + { NULL } +}; + +#define LMOFF(f) bitize(offsetof(xfs_dir_leaf_map_t, f)) +const field_t dir_leaf_map_flds[] = { + { "base", FLDT_UINT16D, OI(LMOFF(base)), C1, 0, TYP_NONE }, + { "size", FLDT_UINT16D, OI(LMOFF(size)), C1, 0, TYP_NONE }, + { NULL } +}; + +#define LNOFF(f) bitize(offsetof(xfs_dir_leaf_name_t, f)) +const field_t dir_leaf_name_flds[] = { + { "inumber", FLDT_DIR_INO, OI(LNOFF(inumber)), C1, 0, TYP_INODE }, + { "name", FLDT_CHARNS, OI(LNOFF(name)), dir_leaf_name_count, FLD_COUNT, + TYP_NONE }, + { NULL } +}; + +#define EOFF(f) bitize(offsetof(xfs_da_node_entry_t, f)) +const field_t dir_node_entry_flds[] = { + { "hashval", FLDT_UINT32X, OI(EOFF(hashval)), C1, 0, TYP_NONE }, + { "before", FLDT_DIRBLOCK, OI(EOFF(before)), C1, 0, TYP_INODATA }, + { NULL } +}; + +#define HOFF(f) bitize(offsetof(xfs_da_node_hdr_t, f)) +const field_t dir_node_hdr_flds[] = { + { "info", FLDT_DIR_BLKINFO, OI(HOFF(info)), C1, 0, TYP_NONE }, + { "count", FLDT_UINT16D, OI(HOFF(count)), C1, 0, TYP_NONE }, + { "level", FLDT_UINT16D, OI(HOFF(level)), C1, 0, TYP_NONE }, + { NULL } +}; + +/*ARGSUSED*/ +static int +dir_leaf_entries_count( + void *obj, + int startoff) +{ + xfs_dir_leafblock_t *block; + + ASSERT(startoff == 0); + block = obj; + if (INT_GET(block->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC) + return 0; + return INT_GET(block->hdr.count, ARCH_CONVERT); +} + +/*ARGSUSED*/ +static int +dir_leaf_hdr_count( + void *obj, + int startoff) +{ + xfs_dir_leafblock_t *block; + + ASSERT(startoff == 0); + block = obj; + return INT_GET(block->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC; +} + +static int +dir_leaf_name_count( + void *obj, + int startoff) +{ + xfs_dir_leafblock_t *block; + xfs_dir_leaf_entry_t *e; + int i; + int off; + + ASSERT(bitoffs(startoff) == 0); + off = byteize(startoff); + block = obj; + if (INT_GET(block->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC) + return 0; + for (i = 0; i < INT_GET(block->hdr.count, ARCH_CONVERT); i++) { + e = &block->entries[i]; + if (INT_GET(e->nameidx, ARCH_CONVERT) == off) + return e->namelen; + } + return 0; +} + +/*ARGSUSED*/ +int +dir_leaf_name_size( + void *obj, + int startoff, + int idx) +{ + xfs_dir_leafblock_t *block; + xfs_dir_leaf_entry_t *e; + + ASSERT(startoff == 0); + block = obj; + if (INT_GET(block->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC) + return 0; + e = &block->entries[idx]; + return bitize((int)XFS_DIR_LEAF_ENTSIZE_BYENTRY(e)); +} + +/*ARGSUSED*/ +static int +dir_leaf_namelist_count( + void *obj, + int startoff) +{ + xfs_dir_leafblock_t *block; + + ASSERT(startoff == 0); + block = obj; + if (INT_GET(block->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC) + return 0; + return INT_GET(block->hdr.count, ARCH_CONVERT); +} + +/*ARGSUSED*/ +static int +dir_leaf_namelist_offset( + void *obj, + int startoff, + int idx) +{ + xfs_dir_leafblock_t *block; + xfs_dir_leaf_entry_t *e; + + ASSERT(startoff == 0); + block = obj; + e = &block->entries[idx]; + return bitize(INT_GET(e->nameidx, ARCH_CONVERT)); +} + +/*ARGSUSED*/ +static int +dir_node_btree_count( + void *obj, + int startoff) +{ + xfs_da_intnode_t *block; + + ASSERT(startoff == 0); /* this is a base structure */ + block = obj; + if (INT_GET(block->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC) + return 0; + return INT_GET(block->hdr.count, ARCH_CONVERT); +} + +/*ARGSUSED*/ +static int +dir_node_hdr_count( + void *obj, + int startoff) +{ + xfs_da_intnode_t *block; + + ASSERT(startoff == 0); + block = obj; + return INT_GET(block->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC; +} + +/*ARGSUSED*/ +int +dir_size( + void *obj, + int startoff, + int idx) +{ + return bitize(mp->m_sb.sb_blocksize); +} diff --git a/db/dir.h b/db/dir.h new file mode 100644 index 000000000..ee28b4477 --- /dev/null +++ b/db/dir.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +extern const field_t dir_flds[]; +extern const field_t dir_hfld[]; +extern const field_t dir_blkinfo_flds[]; +extern const field_t dir_leaf_entry_flds[]; +extern const field_t dir_leaf_hdr_flds[]; +extern const field_t dir_leaf_map_flds[]; +extern const field_t dir_leaf_name_flds[]; +extern const field_t dir_node_entry_flds[]; +extern const field_t dir_node_hdr_flds[]; + +extern int dir_leaf_name_size(void *obj, int startoff, int idx); +extern int dir_size(void *obj, int startoff, int idx); diff --git a/db/dir2.c b/db/dir2.c new file mode 100644 index 000000000..b8b440cc5 --- /dev/null +++ b/db/dir2.c @@ -0,0 +1,727 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "bit.h" +#include "type.h" +#include "faddr.h" +#include "fprint.h" +#include "field.h" +#include "dir.h" +#include "dir2.h" +#include "mount.h" +#include "data.h" + +static int dir2_block_hdr_count(void *obj, int startoff); +static int dir2_block_leaf_count(void *obj, int startoff); +static int dir2_block_leaf_offset(void *obj, int startoff, int idx); +static int dir2_block_tail_count(void *obj, int startoff); +static int dir2_block_tail_offset(void *obj, int startoff, int idx); +static int dir2_block_u_count(void *obj, int startoff); +static int dir2_block_u_offset(void *obj, int startoff, int idx); +static int dir2_data_union_freetag_count(void *obj, int startoff); +static int dir2_data_union_inumber_count(void *obj, int startoff); +static int dir2_data_union_length_count(void *obj, int startoff); +static int dir2_data_union_name_count(void *obj, int startoff); +static int dir2_data_union_namelen_count(void *obj, int startoff); +static int dir2_data_union_tag_count(void *obj, int startoff); +static int dir2_data_union_tag_offset(void *obj, int startoff, int idx); +static int dir2_data_hdr_count(void *obj, int startoff); +static int dir2_data_u_count(void *obj, int startoff); +static int dir2_data_u_offset(void *obj, int startoff, int idx); +static int dir2_free_bests_count(void *obj, int startoff); +static int dir2_free_hdr_count(void *obj, int startoff); +static int dir2_leaf_bests_count(void *obj, int startoff); +static int dir2_leaf_bests_offset(void *obj, int startoff, int idx); +static int dir2_leaf_ents_count(void *obj, int startoff); +static int dir2_leaf_hdr_count(void *obj, int startoff); +static int dir2_leaf_tail_count(void *obj, int startoff); +static int dir2_leaf_tail_offset(void *obj, int startoff, int idx); +static int dir2_node_btree_count(void *obj, int startoff); +static int dir2_node_hdr_count(void *obj, int startoff); + +const field_t dir2_hfld[] = { + { "", FLDT_DIR2, OI(0), C1, 0, TYP_NONE }, + { NULL } +}; + +#define BOFF(f) bitize(offsetof(xfs_dir2_block_t, f)) +#define DOFF(f) bitize(offsetof(xfs_dir2_data_t, f)) +#define FOFF(f) bitize(offsetof(xfs_dir2_free_t, f)) +#define LOFF(f) bitize(offsetof(xfs_dir2_leaf_t, f)) +#define NOFF(f) bitize(offsetof(xfs_da_intnode_t, f)) +const field_t dir2_flds[] = { + { "bhdr", FLDT_DIR2_DATA_HDR, OI(BOFF(hdr)), dir2_block_hdr_count, + FLD_COUNT, TYP_NONE }, + { "bu", FLDT_DIR2_DATA_UNION, dir2_block_u_offset, dir2_block_u_count, + FLD_ARRAY|FLD_OFFSET|FLD_COUNT, TYP_NONE }, + { "bleaf", FLDT_DIR2_LEAF_ENTRY, dir2_block_leaf_offset, + dir2_block_leaf_count, FLD_ARRAY|FLD_OFFSET|FLD_COUNT, TYP_NONE }, + { "btail", FLDT_DIR2_BLOCK_TAIL, dir2_block_tail_offset, + dir2_block_tail_count, FLD_OFFSET|FLD_COUNT, TYP_NONE }, + { "dhdr", FLDT_DIR2_DATA_HDR, OI(DOFF(hdr)), dir2_data_hdr_count, + FLD_COUNT, TYP_NONE }, + { "du", FLDT_DIR2_DATA_UNION, dir2_data_u_offset, dir2_data_u_count, + FLD_ARRAY|FLD_OFFSET|FLD_COUNT, TYP_NONE }, + { "lhdr", FLDT_DIR2_LEAF_HDR, OI(LOFF(hdr)), dir2_leaf_hdr_count, + FLD_COUNT, TYP_NONE }, + { "lbests", FLDT_DIR2_DATA_OFF, dir2_leaf_bests_offset, + dir2_leaf_bests_count, FLD_ARRAY|FLD_OFFSET|FLD_COUNT, TYP_NONE }, + { "lents", FLDT_DIR2_LEAF_ENTRY, OI(LOFF(ents)), dir2_leaf_ents_count, + FLD_ARRAY|FLD_COUNT, TYP_NONE }, + { "ltail", FLDT_DIR2_LEAF_TAIL, dir2_leaf_tail_offset, + dir2_leaf_tail_count, FLD_OFFSET|FLD_COUNT, TYP_NONE }, + { "nhdr", FLDT_DIR_NODE_HDR, OI(NOFF(hdr)), dir2_node_hdr_count, + FLD_COUNT, TYP_NONE }, + { "nbtree", FLDT_DIR_NODE_ENTRY, OI(NOFF(btree)), dir2_node_btree_count, + FLD_ARRAY|FLD_COUNT, TYP_NONE }, + { "fhdr", FLDT_DIR2_FREE_HDR, OI(FOFF(hdr)), dir2_free_hdr_count, + FLD_COUNT, TYP_NONE }, + { "fbests", FLDT_DIR2_DATA_OFFNZ, OI(FOFF(bests)), + dir2_free_bests_count, FLD_ARRAY|FLD_COUNT, TYP_NONE }, + { NULL } +}; + +#define BTOFF(f) bitize(offsetof(xfs_dir2_block_tail_t, f)) +const field_t dir2_block_tail_flds[] = { + { "count", FLDT_UINT32D, OI(BTOFF(count)), C1, 0, TYP_NONE }, + { "stale", FLDT_UINT32D, OI(BTOFF(stale)), C1, 0, TYP_NONE }, + { NULL } +}; + +#define DFOFF(f) bitize(offsetof(xfs_dir2_data_free_t, f)) +const field_t dir2_data_free_flds[] = { + { "offset", FLDT_DIR2_DATA_OFF, OI(DFOFF(offset)), C1, 0, TYP_NONE }, + { "length", FLDT_DIR2_DATA_OFF, OI(DFOFF(length)), C1, 0, TYP_NONE }, + { NULL } +}; + +#define DHOFF(f) bitize(offsetof(xfs_dir2_data_hdr_t, f)) +const field_t dir2_data_hdr_flds[] = { + { "magic", FLDT_UINT32X, OI(DHOFF(magic)), C1, 0, TYP_NONE }, + { "bestfree", FLDT_DIR2_DATA_FREE, OI(DHOFF(bestfree)), + CI(XFS_DIR2_DATA_FD_COUNT), FLD_ARRAY, TYP_NONE }, + { NULL } +}; + +#define DEOFF(f) bitize(offsetof(xfs_dir2_data_entry_t, f)) +#define DUOFF(f) bitize(offsetof(xfs_dir2_data_unused_t, f)) +const field_t dir2_data_union_flds[] = { + { "freetag", FLDT_UINT16X, OI(DUOFF(freetag)), + dir2_data_union_freetag_count, FLD_COUNT, TYP_NONE }, + { "inumber", FLDT_INO, OI(DEOFF(inumber)), + dir2_data_union_inumber_count, FLD_COUNT, TYP_INODE }, + { "length", FLDT_DIR2_DATA_OFF, OI(DUOFF(length)), + dir2_data_union_length_count, FLD_COUNT, TYP_NONE }, + { "namelen", FLDT_UINT8D, OI(DEOFF(namelen)), + dir2_data_union_namelen_count, FLD_COUNT, TYP_NONE }, + { "name", FLDT_CHARNS, OI(DEOFF(name)), dir2_data_union_name_count, + FLD_COUNT, TYP_NONE }, + { "tag", FLDT_DIR2_DATA_OFF, dir2_data_union_tag_offset, + dir2_data_union_tag_count, FLD_OFFSET|FLD_COUNT, TYP_NONE }, + { NULL } +}; + +#define LEOFF(f) bitize(offsetof(xfs_dir2_leaf_entry_t, f)) +const field_t dir2_leaf_entry_flds[] = { + { "hashval", FLDT_UINT32X, OI(LEOFF(hashval)), C1, 0, TYP_NONE }, + { "address", FLDT_UINT32X, OI(LEOFF(address)), C1, 0, TYP_NONE }, + { NULL } +}; + +#define LHOFF(f) bitize(offsetof(xfs_dir2_leaf_hdr_t, f)) +const field_t dir2_leaf_hdr_flds[] = { + { "info", FLDT_DIR_BLKINFO, OI(LHOFF(info)), C1, 0, TYP_NONE }, + { "count", FLDT_UINT16D, OI(LHOFF(count)), C1, 0, TYP_NONE }, + { "stale", FLDT_UINT16D, OI(LHOFF(stale)), C1, 0, TYP_NONE }, + { NULL } +}; + +#define LTOFF(f) bitize(offsetof(xfs_dir2_leaf_tail_t, f)) +const field_t dir2_leaf_tail_flds[] = { + { "bestcount", FLDT_UINT32D, OI(LTOFF(bestcount)), C1, 0, TYP_NONE }, + { NULL } +}; + +#define FHOFF(f) bitize(offsetof(xfs_dir2_free_hdr_t, f)) +const field_t dir2_free_hdr_flds[] = { + { "magic", FLDT_UINT32X, OI(FHOFF(magic)), C1, 0, TYP_NONE }, + { "firstdb", FLDT_INT32D, OI(FHOFF(firstdb)), C1, 0, TYP_NONE }, + { "nvalid", FLDT_INT32D, OI(FHOFF(nvalid)), C1, 0, TYP_NONE }, + { "nused", FLDT_INT32D, OI(FHOFF(nused)), C1, 0, TYP_NONE }, + { NULL } +}; + +/*ARGSUSED*/ +static int +dir2_block_hdr_count( + void *obj, + int startoff) +{ + xfs_dir2_block_t *block; + + ASSERT(startoff == 0); + block = obj; + return INT_GET(block->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC; +} + +/*ARGSUSED*/ +static int +dir2_block_leaf_count( + void *obj, + int startoff) +{ + xfs_dir2_block_t *block; + xfs_dir2_block_tail_t *btp; + + ASSERT(startoff == 0); + block = obj; + if (INT_GET(block->hdr.magic, ARCH_CONVERT) != XFS_DIR2_BLOCK_MAGIC) + return 0; + btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); + return INT_GET(btp->count, ARCH_CONVERT); +} + +/*ARGSUSED*/ +static int +dir2_block_leaf_offset( + void *obj, + int startoff, + int idx) +{ + xfs_dir2_block_t *block; + xfs_dir2_block_tail_t *btp; + xfs_dir2_leaf_entry_t *lep; + + ASSERT(startoff == 0); + block = obj; + ASSERT(INT_GET(block->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC); + btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); + lep = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT) + idx; + return bitize((int)((char *)lep - (char *)block)); +} + +/*ARGSUSED*/ +static int +dir2_block_tail_count( + void *obj, + int startoff) +{ + xfs_dir2_block_t *block; + + ASSERT(startoff == 0); + block = obj; + return INT_GET(block->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC; +} + +/*ARGSUSED*/ +static int +dir2_block_tail_offset( + void *obj, + int startoff, + int idx) +{ + xfs_dir2_block_t *block; + xfs_dir2_block_tail_t *btp; + + ASSERT(startoff == 0); + ASSERT(idx == 0); + block = obj; + ASSERT(INT_GET(block->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC); + btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); + return bitize((int)((char *)btp - (char *)block)); +} + +/*ARGSUSED*/ +static int +dir2_block_u_count( + void *obj, + int startoff) +{ + xfs_dir2_block_t *block; + xfs_dir2_block_tail_t *btp; + xfs_dir2_data_entry_t *dep; + xfs_dir2_data_unused_t *dup; + char *endptr; + int i; + char *ptr; + + ASSERT(startoff == 0); + block = obj; + if (INT_GET(block->hdr.magic, ARCH_CONVERT) != XFS_DIR2_BLOCK_MAGIC) + return 0; + btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); + ptr = (char *)block->u; + endptr = (char *)XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT); + for (i = 0; ptr < endptr; i++) { + dup = (xfs_dir2_data_unused_t *)ptr; + if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) + ptr += INT_GET(dup->length, ARCH_CONVERT); + else { + dep = (xfs_dir2_data_entry_t *)ptr; + ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen); + } + } + return i; +} + +/*ARGSUSED*/ +static int +dir2_block_u_offset( + void *obj, + int startoff, + int idx) +{ + xfs_dir2_block_t *block; + xfs_dir2_block_tail_t *btp; + xfs_dir2_data_entry_t *dep; + xfs_dir2_data_unused_t *dup; + /*REFERENCED*/ + char *endptr; + int i; + char *ptr; + + ASSERT(startoff == 0); + block = obj; + ASSERT(INT_GET(block->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC); + btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); + ptr = (char *)block->u; + endptr = (char *)XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT); + for (i = 0; i < idx; i++) { + ASSERT(ptr < endptr); + dup = (xfs_dir2_data_unused_t *)ptr; + if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) + ptr += INT_GET(dup->length, ARCH_CONVERT); + else { + dep = (xfs_dir2_data_entry_t *)ptr; + ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen); + } + } + return bitize((int)(ptr - (char *)block)); +} + +static int +dir2_data_union_freetag_count( + void *obj, + int startoff) +{ + xfs_dir2_data_unused_t *dup; + char *end; + + ASSERT(bitoffs(startoff) == 0); + dup = (xfs_dir2_data_unused_t *)((char *)obj + byteize(startoff)); + end = (char *)&dup->freetag + sizeof(dup->freetag); + return end <= (char *)obj + mp->m_dirblksize && + INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG; +} + +static int +dir2_data_union_inumber_count( + void *obj, + int startoff) +{ + xfs_dir2_data_entry_t *dep; + xfs_dir2_data_unused_t *dup; + char *end; + + ASSERT(bitoffs(startoff) == 0); + dup = (xfs_dir2_data_unused_t *)((char *)obj + byteize(startoff)); + dep = (xfs_dir2_data_entry_t *)dup; + end = (char *)&dep->inumber + sizeof(dep->inumber); + return end <= (char *)obj + mp->m_dirblksize && + INT_GET(dup->freetag, ARCH_CONVERT) != XFS_DIR2_DATA_FREE_TAG; +} + +static int +dir2_data_union_length_count( + void *obj, + int startoff) +{ + xfs_dir2_data_unused_t *dup; + char *end; + + ASSERT(bitoffs(startoff) == 0); + dup = (xfs_dir2_data_unused_t *)((char *)obj + byteize(startoff)); + end = (char *)&dup->length + sizeof(dup->length); + return end <= (char *)obj + mp->m_dirblksize && + INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG; +} + +static int +dir2_data_union_name_count( + void *obj, + int startoff) +{ + xfs_dir2_data_entry_t *dep; + xfs_dir2_data_unused_t *dup; + char *end; + + ASSERT(bitoffs(startoff) == 0); + dup = (xfs_dir2_data_unused_t *)((char *)obj + byteize(startoff)); + dep = (xfs_dir2_data_entry_t *)dup; + end = (char *)&dep->namelen + sizeof(dep->namelen); + if (end >= (char *)obj + mp->m_dirblksize || + INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) + return 0; + end = (char *)&dep->name[0] + dep->namelen; + return end <= (char *)obj + mp->m_dirblksize ? dep->namelen : 0; +} + +static int +dir2_data_union_namelen_count( + void *obj, + int startoff) +{ + xfs_dir2_data_entry_t *dep; + xfs_dir2_data_unused_t *dup; + char *end; + + ASSERT(bitoffs(startoff) == 0); + dup = (xfs_dir2_data_unused_t *)((char *)obj + byteize(startoff)); + dep = (xfs_dir2_data_entry_t *)dup; + end = (char *)&dep->namelen + sizeof(dep->namelen); + return end <= (char *)obj + mp->m_dirblksize && + INT_GET(dup->freetag, ARCH_CONVERT) != XFS_DIR2_DATA_FREE_TAG; +} + +static int +dir2_data_union_tag_count( + void *obj, + int startoff) +{ + xfs_dir2_data_entry_t *dep; + xfs_dir2_data_unused_t *dup; + char *end; + xfs_dir2_data_off_t *tagp; + + ASSERT(bitoffs(startoff) == 0); + dup = (xfs_dir2_data_unused_t *)((char *)obj + byteize(startoff)); + dep = (xfs_dir2_data_entry_t *)dup; + end = (char *)&dup->freetag + sizeof(dup->freetag); + if (end > (char *)obj + mp->m_dirblksize) + return 0; + if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) { + end = (char *)&dup->length + sizeof(dup->length); + if (end > (char *)obj + mp->m_dirblksize) + return 0; + tagp = XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT); + } else { + end = (char *)&dep->namelen + sizeof(dep->namelen); + if (end > (char *)obj + mp->m_dirblksize) + return 0; + tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); + } + end = (char *)tagp + sizeof(*tagp); + return end <= (char *)obj + mp->m_dirblksize; +} + +/*ARGSUSED*/ +static int +dir2_data_union_tag_offset( + void *obj, + int startoff, + int idx) +{ + xfs_dir2_data_entry_t *dep; + xfs_dir2_data_unused_t *dup; + + ASSERT(bitoffs(startoff) == 0); + ASSERT(idx == 0); + dup = (xfs_dir2_data_unused_t *)((char *)obj + byteize(startoff)); + if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) + return bitize((int)((char *)XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT) - + (char *)dup)); + dep = (xfs_dir2_data_entry_t *)dup; + return bitize((int)((char *)XFS_DIR2_DATA_ENTRY_TAG_P(dep) - + (char *)dep)); +} + +/*ARGSUSED*/ +static int +dir2_data_hdr_count( + void *obj, + int startoff) +{ + xfs_dir2_data_t *data; + + ASSERT(startoff == 0); + data = obj; + return INT_GET(data->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC; +} + +/*ARGSUSED*/ +static int +dir2_data_u_count( + void *obj, + int startoff) +{ + xfs_dir2_data_t *data; + xfs_dir2_data_entry_t *dep; + xfs_dir2_data_unused_t *dup; + char *endptr; + int i; + char *ptr; + + ASSERT(startoff == 0); + data = obj; + if (INT_GET(data->hdr.magic, ARCH_CONVERT) != XFS_DIR2_DATA_MAGIC) + return 0; + ptr = (char *)data->u; + endptr = (char *)data + mp->m_dirblksize; + for (i = 0; ptr < endptr; i++) { + dup = (xfs_dir2_data_unused_t *)ptr; + if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) + ptr += INT_GET(dup->length, ARCH_CONVERT); + else { + dep = (xfs_dir2_data_entry_t *)ptr; + ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen); + } + } + return i; +} + +/*ARGSUSED*/ +static int +dir2_data_u_offset( + void *obj, + int startoff, + int idx) +{ + xfs_dir2_data_t *data; + xfs_dir2_data_entry_t *dep; + xfs_dir2_data_unused_t *dup; + /*REFERENCED*/ + char *endptr; + int i; + char *ptr; + + ASSERT(startoff == 0); + data = obj; + ASSERT(INT_GET(data->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC); + ptr = (char *)data->u; + endptr = (char *)data + mp->m_dirblksize; + for (i = 0; i < idx; i++) { + ASSERT(ptr < endptr); + dup = (xfs_dir2_data_unused_t *)ptr; + if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) + ptr += INT_GET(dup->length, ARCH_CONVERT); + else { + dep = (xfs_dir2_data_entry_t *)ptr; + ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen); + } + } + return bitize((int)(ptr - (char *)data)); +} + +/*ARGSUSED*/ +int +dir2_data_union_size( + void *obj, + int startoff, + int idx) +{ + xfs_dir2_data_entry_t *dep; + xfs_dir2_data_unused_t *dup; + + ASSERT(bitoffs(startoff) == 0); + ASSERT(idx == 0); + dup = (xfs_dir2_data_unused_t *)((char *)obj + byteize(startoff)); + if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) + return bitize(INT_GET(dup->length, ARCH_CONVERT)); + else { + dep = (xfs_dir2_data_entry_t *)dup; + return bitize(XFS_DIR2_DATA_ENTSIZE(dep->namelen)); + } +} + +/*ARGSUSED*/ +static int +dir2_free_bests_count( + void *obj, + int startoff) +{ + xfs_dir2_free_t *free; + + ASSERT(startoff == 0); + free = obj; + if (INT_GET(free->hdr.magic, ARCH_CONVERT) != XFS_DIR2_FREE_MAGIC) + return 0; + return INT_GET(free->hdr.nvalid, ARCH_CONVERT); +} + +/*ARGSUSED*/ +static int +dir2_free_hdr_count( + void *obj, + int startoff) +{ + xfs_dir2_free_t *free; + + ASSERT(startoff == 0); + free = obj; + return INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC; +} + +/*ARGSUSED*/ +static int +dir2_leaf_bests_count( + void *obj, + int startoff) +{ + xfs_dir2_leaf_t *leaf; + xfs_dir2_leaf_tail_t *ltp; + + ASSERT(startoff == 0); + leaf = obj; + if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR2_LEAF1_MAGIC) + return 0; + ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); + return INT_GET(ltp->bestcount, ARCH_CONVERT); +} + +/*ARGSUSED*/ +static int +dir2_leaf_bests_offset( + void *obj, + int startoff, + int idx) +{ + xfs_dir2_data_off_t *lbp; + xfs_dir2_leaf_t *leaf; + xfs_dir2_leaf_tail_t *ltp; + + ASSERT(startoff == 0); + leaf = obj; + ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC); + ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); + lbp = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT) + idx; + return bitize((int)((char *)lbp - (char *)leaf)); +} + +/*ARGSUSED*/ +static int +dir2_leaf_ents_count( + void *obj, + int startoff) +{ + xfs_dir2_leaf_t *leaf; + + ASSERT(startoff == 0); + leaf = obj; + if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR2_LEAF1_MAGIC && + INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR2_LEAFN_MAGIC) + return 0; + return INT_GET(leaf->hdr.count, ARCH_CONVERT); +} + +/*ARGSUSED*/ +static int +dir2_leaf_hdr_count( + void *obj, + int startoff) +{ + xfs_dir2_leaf_t *leaf; + + ASSERT(startoff == 0); + leaf = obj; + return INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC || + INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC; +} + +/*ARGSUSED*/ +static int +dir2_leaf_tail_count( + void *obj, + int startoff) +{ + xfs_dir2_leaf_t *leaf; + + ASSERT(startoff == 0); + leaf = obj; + return INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC; +} + +/*ARGSUSED*/ +static int +dir2_leaf_tail_offset( + void *obj, + int startoff, + int idx) +{ + xfs_dir2_leaf_t *leaf; + xfs_dir2_leaf_tail_t *ltp; + + ASSERT(startoff == 0); + ASSERT(idx == 0); + leaf = obj; + ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC); + ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); + return bitize((int)((char *)ltp - (char *)leaf)); +} + +/*ARGSUSED*/ +static int +dir2_node_btree_count( + void *obj, + int startoff) +{ + xfs_da_intnode_t *node; + + ASSERT(startoff == 0); + node = obj; + if (INT_GET(node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC) + return 0; + return INT_GET(node->hdr.count, ARCH_CONVERT); +} + +/*ARGSUSED*/ +static int +dir2_node_hdr_count( + void *obj, + int startoff) +{ + xfs_da_intnode_t *node; + + ASSERT(startoff == 0); + node = obj; + return INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC; +} + +/*ARGSUSED*/ +int +dir2_size( + void *obj, + int startoff, + int idx) +{ + return bitize(mp->m_dirblksize); +} diff --git a/db/dir2.h b/db/dir2.h new file mode 100644 index 000000000..a1516c99e --- /dev/null +++ b/db/dir2.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +extern const field_t dir2_flds[]; +extern const field_t dir2_hfld[]; +extern const field_t dir2_block_tail_flds[]; +extern const field_t dir2_data_free_flds[]; +extern const field_t dir2_data_hdr_flds[]; +extern const field_t dir2_data_union_flds[]; +extern const field_t dir2_free_hdr_flds[]; +extern const field_t dir2_leaf_entry_flds[]; +extern const field_t dir2_leaf_hdr_flds[]; +extern const field_t dir2_leaf_tail_flds[]; + +extern int dir2_data_union_size(void *obj, int startoff, int idx); +extern int dir2_size(void *obj, int startoff, int idx); diff --git a/db/dir2sf.c b/db/dir2sf.c new file mode 100644 index 000000000..9d8c35f48 --- /dev/null +++ b/db/dir2sf.c @@ -0,0 +1,235 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "type.h" +#include "faddr.h" +#include "fprint.h" +#include "field.h" +#include "bit.h" +#include "dir2sf.h" + +static int dir2_inou_i4_count(void *obj, int startoff); +static int dir2_inou_i8_count(void *obj, int startoff); +static int dir2_sf_entry_inumber_offset(void *obj, int startoff, int idx); +static int dir2_sf_entry_name_count(void *obj, int startoff); +static int dir2_sf_list_count(void *obj, int startoff); +static int dir2_sf_list_offset(void *obj, int startoff, int idx); + +#define OFF(f) bitize(offsetof(xfs_dir2_sf_t, f)) +const field_t dir2sf_flds[] = { + { "hdr", FLDT_DIR2_SF_HDR, OI(OFF(hdr)), C1, 0, TYP_NONE }, + { "list", FLDT_DIR2_SF_ENTRY, dir2_sf_list_offset, dir2_sf_list_count, + FLD_ARRAY|FLD_COUNT|FLD_OFFSET, TYP_NONE }, + { NULL } +}; + +#define UOFF(f) bitize(offsetof(xfs_dir2_inou_t, f)) +const field_t dir2_inou_flds[] = { + { "i8", FLDT_DIR2_INO8, OI(UOFF(i8)), dir2_inou_i8_count, FLD_COUNT, + TYP_INODE }, + { "i4", FLDT_DIR2_INO4, OI(UOFF(i4)), dir2_inou_i4_count, FLD_COUNT, + TYP_INODE }, + { NULL } +}; + +#define HOFF(f) bitize(offsetof(xfs_dir2_sf_hdr_t, f)) +const field_t dir2_sf_hdr_flds[] = { + { "count", FLDT_UINT8D, OI(HOFF(count)), C1, 0, TYP_NONE }, + { "i8count", FLDT_UINT8D, OI(HOFF(i8count)), C1, 0, TYP_NONE }, + { "parent", FLDT_DIR2_INOU, OI(HOFF(parent)), C1, 0, TYP_NONE }, + { NULL } +}; + +#define EOFF(f) bitize(offsetof(xfs_dir2_sf_entry_t, f)) +const field_t dir2_sf_entry_flds[] = { + { "namelen", FLDT_UINT8D, OI(EOFF(namelen)), C1, 0, TYP_NONE }, + { "offset", FLDT_DIR2_SF_OFF, OI(EOFF(offset)), C1, 0, TYP_NONE }, + { "name", FLDT_CHARNS, OI(EOFF(name)), dir2_sf_entry_name_count, + FLD_COUNT, TYP_NONE }, + { "inumber", FLDT_DIR2_INOU, dir2_sf_entry_inumber_offset, C1, + FLD_OFFSET, TYP_NONE }, + { NULL } +}; + +/*ARGSUSED*/ +static int +dir2_inou_i4_count( + void *obj, + int startoff) +{ + xfs_dir2_sf_t *sf; + + ASSERT(bitoffs(startoff) == 0); + sf = &((xfs_dinode_t *)obj)->di_u.di_dir2sf; + return sf->hdr.i8count == 0; +} + +/*ARGSUSED*/ +static int +dir2_inou_i8_count( + void *obj, + int startoff) +{ + xfs_dir2_sf_t *sf; + + ASSERT(bitoffs(startoff) == 0); + sf = &((xfs_dinode_t *)obj)->di_u.di_dir2sf; + return sf->hdr.i8count != 0; +} + +/*ARGSUSED*/ +int +dir2_inou_size( + void *obj, + int startoff, + int idx) +{ + xfs_dir2_sf_t *sf; + + ASSERT(bitoffs(startoff) == 0); + ASSERT(idx == 0); + sf = &((xfs_dinode_t *)obj)->di_u.di_dir2sf; + return bitize(sf->hdr.i8count ? + (uint)sizeof(xfs_dir2_ino8_t) : + (uint)sizeof(xfs_dir2_ino4_t)); +} + +static int +dir2_sf_entry_name_count( + void *obj, + int startoff) +{ + xfs_dir2_sf_entry_t *e; + + ASSERT(bitoffs(startoff) == 0); + e = (xfs_dir2_sf_entry_t *)((char *)obj + byteize(startoff)); + return e->namelen; +} + +/*ARGSUSED*/ +static int +dir2_sf_entry_inumber_offset( + void *obj, + int startoff, + int idx) +{ + xfs_dir2_sf_entry_t *e; + + ASSERT(bitoffs(startoff) == 0); + ASSERT(idx == 0); + e = (xfs_dir2_sf_entry_t *)((char *)obj + byteize(startoff)); + return bitize((int)((char *)XFS_DIR2_SF_INUMBERP(e) - (char *)e)); +} + +int +dir2_sf_entry_size( + void *obj, + int startoff, + int idx) +{ + xfs_dir2_sf_entry_t *e; + int i; + xfs_dir2_sf_t *sf; + + ASSERT(bitoffs(startoff) == 0); + sf = (xfs_dir2_sf_t *)((char *)obj + byteize(startoff)); + e = XFS_DIR2_SF_FIRSTENTRY(sf); + for (i = 0; i < idx; i++) + e = XFS_DIR2_SF_NEXTENTRY(sf, e); + return bitize((int)XFS_DIR2_SF_ENTSIZE_BYENTRY(sf, e)); +} + +/*ARGSUSED*/ +int +dir2_sf_hdr_size( + void *obj, + int startoff, + int idx) +{ + xfs_dir2_sf_t *sf; + + ASSERT(bitoffs(startoff) == 0); + ASSERT(idx == 0); + sf = (xfs_dir2_sf_t *)((char *)obj + byteize(startoff)); + return bitize(XFS_DIR2_SF_HDR_SIZE(sf->hdr.i8count)); +} + +static int +dir2_sf_list_count( + void *obj, + int startoff) +{ + xfs_dir2_sf_t *sf; + + ASSERT(bitoffs(startoff) == 0); + sf = (xfs_dir2_sf_t *)((char *)obj + byteize(startoff)); + return sf->hdr.count; +} + +static int +dir2_sf_list_offset( + void *obj, + int startoff, + int idx) +{ + xfs_dir2_sf_entry_t *e; + int i; + xfs_dir2_sf_t *sf; + + ASSERT(bitoffs(startoff) == 0); + sf = (xfs_dir2_sf_t *)((char *)obj + byteize(startoff)); + e = XFS_DIR2_SF_FIRSTENTRY(sf); + for (i = 0; i < idx; i++) + e = XFS_DIR2_SF_NEXTENTRY(sf, e); + return bitize((int)((char *)e - (char *)sf)); +} + +/*ARGSUSED*/ +int +dir2sf_size( + void *obj, + int startoff, + int idx) +{ + xfs_dir2_sf_entry_t *e; + int i; + xfs_dir2_sf_t *sf; + + ASSERT(bitoffs(startoff) == 0); + ASSERT(idx == 0); + sf = (xfs_dir2_sf_t *)((char *)obj + byteize(startoff)); + e = XFS_DIR2_SF_FIRSTENTRY(sf); + for (i = 0; i < sf->hdr.count; i++) + e = XFS_DIR2_SF_NEXTENTRY(sf, e); + return bitize((int)((char *)e - (char *)sf)); +} diff --git a/db/dir2sf.h b/db/dir2sf.h new file mode 100644 index 000000000..f720c8b83 --- /dev/null +++ b/db/dir2sf.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +extern const field_t dir2sf_flds[]; +extern const field_t dir2_inou_flds[]; +extern const field_t dir2_sf_hdr_flds[]; +extern const field_t dir2_sf_entry_flds[]; + +extern int dir2sf_size(void *obj, int startoff, int idx); +extern int dir2_inou_size(void *obj, int startoff, int idx); +extern int dir2_sf_entry_size(void *obj, int startoff, int idx); +extern int dir2_sf_hdr_size(void *obj, int startoff, int idx); diff --git a/db/dirshort.c b/db/dirshort.c new file mode 100644 index 000000000..4a6f4f45f --- /dev/null +++ b/db/dirshort.c @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "type.h" +#include "faddr.h" +#include "fprint.h" +#include "field.h" +#include "bit.h" +#include "dirshort.h" + +static int dir_sf_entry_name_count(void *obj, int startoff); +static int dir_shortform_list_count(void *obj, int startoff); +static int dir_shortform_list_offset(void *obj, int startoff, int idx); + +#define OFF(f) bitize(offsetof(xfs_dir_shortform_t, f)) +const field_t dir_shortform_flds[] = { + { "hdr", FLDT_DIR_SF_HDR, OI(OFF(hdr)), C1, 0, TYP_NONE }, + { "list", FLDT_DIR_SF_ENTRY, dir_shortform_list_offset, + dir_shortform_list_count, FLD_ARRAY|FLD_COUNT|FLD_OFFSET, TYP_NONE }, + { NULL } +}; + +#define HOFF(f) bitize(offsetof(xfs_dir_sf_hdr_t, f)) +const field_t dir_sf_hdr_flds[] = { + { "parent", FLDT_DIR_INO, OI(HOFF(parent)), C1, 0, TYP_INODE }, + { "count", FLDT_UINT8D, OI(HOFF(count)), C1, 0, TYP_NONE }, + { NULL } +}; + +#define EOFF(f) bitize(offsetof(xfs_dir_sf_entry_t, f)) +const field_t dir_sf_entry_flds[] = { + { "inumber", FLDT_DIR_INO, OI(EOFF(inumber)), C1, 0, TYP_INODE }, + { "namelen", FLDT_UINT8D, OI(EOFF(namelen)), C1, 0, TYP_NONE }, + { "name", FLDT_CHARNS, OI(EOFF(name)), dir_sf_entry_name_count, + FLD_COUNT, TYP_NONE }, + { NULL } +}; + +static int +dir_sf_entry_name_count( + void *obj, + int startoff) +{ + xfs_dir_sf_entry_t *e; + + ASSERT(bitoffs(startoff) == 0); + e = (xfs_dir_sf_entry_t *)((char *)obj + byteize(startoff)); + return e->namelen; +} + +int +dir_sf_entry_size( + void *obj, + int startoff, + int idx) +{ + xfs_dir_sf_entry_t *e; + int i; + xfs_dir_shortform_t *sf; + + ASSERT(bitoffs(startoff) == 0); + sf = (xfs_dir_shortform_t *)((char *)obj + byteize(startoff)); + e = &sf->list[0]; + for (i = 0; i < idx; i++) + e = XFS_DIR_SF_NEXTENTRY(e); + return bitize((int)XFS_DIR_SF_ENTSIZE_BYENTRY(e)); +} + +static int +dir_shortform_list_count( + void *obj, + int startoff) +{ + xfs_dir_shortform_t *sf; + + ASSERT(bitoffs(startoff) == 0); + sf = (xfs_dir_shortform_t *)((char *)obj + byteize(startoff)); + return sf->hdr.count; +} + +static int +dir_shortform_list_offset( + void *obj, + int startoff, + int idx) +{ + xfs_dir_sf_entry_t *e; + int i; + xfs_dir_shortform_t *sf; + + ASSERT(bitoffs(startoff) == 0); + sf = (xfs_dir_shortform_t *)((char *)obj + byteize(startoff)); + e = &sf->list[0]; + for (i = 0; i < idx; i++) + e = XFS_DIR_SF_NEXTENTRY(e); + return bitize((int)((char *)e - (char *)sf)); +} + +int +dirshort_size( + void *obj, + int startoff, + int idx) +{ + xfs_dir_sf_entry_t *e; + int i; + xfs_dir_shortform_t *sf; + + ASSERT(bitoffs(startoff) == 0); + ASSERT(idx == 0); + sf = (xfs_dir_shortform_t *)((char *)obj + byteize(startoff)); + e = &sf->list[0]; + for (i = 0; i < sf->hdr.count; i++) + e = XFS_DIR_SF_NEXTENTRY(e); + return bitize((int)((char *)e - (char *)sf)); +} diff --git a/db/dirshort.h b/db/dirshort.h new file mode 100644 index 000000000..2d50efbf6 --- /dev/null +++ b/db/dirshort.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +extern const field_t dir_sf_entry_flds[]; +extern const field_t dir_sf_hdr_flds[]; +extern const field_t dir_shortform_flds[]; +extern const field_t dirshort_hfld[]; + +extern int dir_sf_entry_size(void *obj, int startoff, int idx); +extern int dirshort_size(void *obj, int startoff, int idx); diff --git a/db/dquot.c b/db/dquot.c new file mode 100644 index 000000000..be22d81e1 --- /dev/null +++ b/db/dquot.c @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include +#include "bit.h" +#include "bmap.h" +#include "command.h" +#include "data.h" +#include "dquot.h" +#include "type.h" +#include "faddr.h" +#include "fprint.h" +#include "field.h" +#include "inode.h" +#include "io.h" +#include "mount.h" +#include "output.h" + +static int dquot_f(int argc, char **argv); +static void dquot_help(void); + +static const cmdinfo_t dquot_cmd = + { "dquot", NULL, dquot_f, 1, 2, 1, "[project|user id]", + "set current address to project or user quota block", dquot_help }; + +const field_t dqblk_hfld[] = { + { "", FLDT_DQBLK, OI(0), C1, 0, TYP_NONE }, + { NULL } +}; + +#define DDOFF(f) bitize(offsetof(xfs_dqblk_t, dd_ ## f)) +#define DDSZC(f) szcount(xfs_dqblk_t, dd_ ## f) +const field_t dqblk_flds[] = { + { "diskdq", FLDT_DISK_DQUOT, OI(DDOFF(diskdq)), C1, 0, TYP_NONE }, + { "fill", FLDT_CHARS, OI(DDOFF(fill)), CI(DDSZC(fill)), FLD_SKIPALL, + TYP_NONE }, + { NULL } +}; + +#define DOFF(f) bitize(offsetof(xfs_disk_dquot_t, d_ ## f)) +const field_t disk_dquot_flds[] = { + { "magic", FLDT_UINT16X, OI(DOFF(magic)), C1, 0, TYP_NONE }, + { "version", FLDT_UINT8X, OI(DOFF(version)), C1, 0, TYP_NONE }, + { "flags", FLDT_UINT8X, OI(DOFF(flags)), C1, 0, TYP_NONE }, + { "id", FLDT_DQID, OI(DOFF(id)), C1, 0, TYP_NONE }, + { "blk_hardlimit", FLDT_QCNT, OI(DOFF(blk_hardlimit)), C1, 0, + TYP_NONE }, + { "blk_softlimit", FLDT_QCNT, OI(DOFF(blk_softlimit)), C1, 0, + TYP_NONE }, + { "ino_hardlimit", FLDT_QCNT, OI(DOFF(ino_hardlimit)), C1, 0, + TYP_NONE }, + { "ino_softlimit", FLDT_QCNT, OI(DOFF(ino_softlimit)), C1, 0, + TYP_NONE }, + { "bcount", FLDT_QCNT, OI(DOFF(bcount)), C1, 0, TYP_NONE }, + { "icount", FLDT_QCNT, OI(DOFF(icount)), C1, 0, TYP_NONE }, + { "itimer", FLDT_INT32D, OI(DOFF(itimer)), C1, 0, TYP_NONE }, + { "btimer", FLDT_INT32D, OI(DOFF(btimer)), C1, 0, TYP_NONE }, + { "iwarns", FLDT_QWARNCNT, OI(DOFF(iwarns)), C1, 0, TYP_NONE }, + { "bwarns", FLDT_QWARNCNT, OI(DOFF(bwarns)), C1, 0, TYP_NONE }, + { "pad0", FLDT_INT32D, OI(DOFF(pad0)), C1, FLD_SKIPALL, TYP_NONE }, + { "rtb_hardlimit", FLDT_QCNT, OI(DOFF(rtb_hardlimit)), C1, 0, + TYP_NONE }, + { "rtb_softlimit", FLDT_QCNT, OI(DOFF(rtb_softlimit)), C1, 0, + TYP_NONE }, + { "rtbcount", FLDT_QCNT, OI(DOFF(rtbcount)), C1, 0, TYP_NONE }, + { "rtbtimer", FLDT_INT32D, OI(DOFF(rtbtimer)), C1, 0, TYP_NONE }, + { "rtbwarns", FLDT_QWARNCNT, OI(DOFF(rtbwarns)), C1, 0, TYP_NONE }, + { "pad", FLDT_UINT16X, OI(DOFF(pad)), C1, FLD_SKIPALL, TYP_NONE }, + { NULL } +}; + +static void +dquot_help(void) +{ +} + +static int +dquot_f( + int argc, + char **argv) +{ + bmap_ext_t bm; + int c; + int doproj; + xfs_dqid_t id; + xfs_ino_t ino; + int nex; + char *p; + int perblock; + xfs_fileoff_t qbno; + int qoff; + char *s; + + doproj = optind = 0; + while ((c = getopt(argc, argv, "pu")) != EOF) { + switch (c) { + case 'p': + doproj = 1; + break; + case 'u': + doproj = 0; + break; + default: + dbprintf("bad option for dquot command\n"); + return 0; + } + } + s = doproj ? "project" : "user"; + if (optind != argc - 1) { + dbprintf("dquot command requires one %s id argument\n", s); + return 0; + } + ino = doproj ? mp->m_sb.sb_pquotino : mp->m_sb.sb_uquotino; + if (ino == 0 || ino == NULLFSINO) { + dbprintf("no %s quota inode present\n", s); + return 0; + } + id = (xfs_dqid_t)strtol(argv[optind], &p, 0); + if (*p != '\0') { + dbprintf("bad %s id for dquot %s\n", s, argv[optind]); + return 0; + } + perblock = (int)(mp->m_sb.sb_blocksize / sizeof(xfs_dqblk_t)); + qbno = (xfs_fileoff_t)(id / perblock); + qoff = (int)(id % perblock); + push_cur(); + set_cur_inode(ino); + nex = 1; + bmap(qbno, 1, XFS_DATA_FORK, &nex, &bm); + pop_cur(); + if (nex == 0) { + dbprintf("no %s quota data for id %d\n", s, id); + return 0; + } + set_cur(&typtab[TYP_DQBLK], XFS_FSB_TO_DADDR(mp, bm.startblock), blkbb, + DB_RING_IGN, NULL); + off_cur(qoff * (int)sizeof(xfs_dqblk_t), sizeof(xfs_dqblk_t)); + ring_add(); + return 0; +} + +void +dquot_init(void) +{ + add_command(&dquot_cmd); +} diff --git a/db/dquot.h b/db/dquot.h new file mode 100644 index 000000000..ce231976d --- /dev/null +++ b/db/dquot.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +struct field; + +extern const struct field disk_dquot_flds[]; +extern const struct field dqblk_flds[]; +extern const struct field dqblk_hfld[]; + +extern void dquot_init(void); diff --git a/db/echo.c b/db/echo.c new file mode 100644 index 000000000..7027870dc --- /dev/null +++ b/db/echo.c @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "command.h" +#include "echo.h" +#include "output.h" + +static int echo_f(int argc, char **argv); + +static const cmdinfo_t echo_cmd = + { "echo", NULL, echo_f, 0, -1, 0, "[args]...", + "echo arguments", NULL }; + +/*ARGSUSED*/ +static int +echo_f( + int argc, + char **argv) +{ + char *c; + + for (c = *(++argv); c; c = *(++argv)) + dbprintf("%s ", c); + dbprintf("\n"); + return 0; +} + +void +echo_init(void) +{ + add_command(&echo_cmd); +} diff --git a/db/echo.h b/db/echo.h new file mode 100644 index 000000000..a2ddeb664 --- /dev/null +++ b/db/echo.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +extern void echo_init(void); diff --git a/db/faddr.c b/db/faddr.c new file mode 100644 index 000000000..ee58936fe --- /dev/null +++ b/db/faddr.c @@ -0,0 +1,404 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "data.h" +#include "type.h" +#include "faddr.h" +#include "inode.h" +#include "io.h" +#include "bit.h" +#include "bmap.h" +#include "output.h" +#include "mount.h" + +void +fa_agblock( + void *obj, + int bit, + typnm_t next) +{ + xfs_agblock_t bno; + + if (cur_agno == NULLAGNUMBER) { + dbprintf("no current allocation group, cannot set new addr\n"); + return; + } + bno = (xfs_agblock_t)getbitval(obj, bit, bitsz(bno), BVUNSIGNED); + if (bno == NULLAGBLOCK) { + dbprintf("null block number, cannot set new addr\n"); + return; + } + ASSERT(typtab[next].typnm == next); + set_cur(&typtab[next], XFS_AGB_TO_DADDR(mp, cur_agno, bno), blkbb, + DB_RING_ADD, NULL); +} + +/*ARGSUSED*/ +void +fa_agino( + void *obj, + int bit, + typnm_t next) +{ + xfs_agino_t agino; + + if (cur_agno == NULLAGNUMBER) { + dbprintf("no current allocation group, cannot set new addr\n"); + return; + } + agino = (xfs_agino_t)getbitval(obj, bit, bitsz(agino), BVUNSIGNED); + if (agino == NULLAGINO) { + dbprintf("null inode number, cannot set new addr\n"); + return; + } + set_cur_inode(XFS_AGINO_TO_INO(mp, cur_agno, agino)); +} + +/*ARGSUSED*/ +void +fa_attrblock( + void *obj, + int bit, + typnm_t next) +{ + bmap_ext_t bm; + __uint32_t bno; + xfs_dfsbno_t dfsbno; + int nex; + + bno = (__uint32_t)getbitval(obj, bit, bitsz(bno), BVUNSIGNED); + if (bno == 0) { + dbprintf("null attribute block number, cannot set new addr\n"); + return; + } + nex = 1; + bmap(bno, 1, XFS_ATTR_FORK, &nex, &bm); + if (nex == 0) { + dbprintf("attribute block is unmapped\n"); + return; + } + dfsbno = bm.startblock + (bno - bm.startoff); + ASSERT(typtab[next].typnm == next); + set_cur(&typtab[next], (__int64_t)XFS_FSB_TO_DADDR(mp, dfsbno), blkbb, + DB_RING_ADD, NULL); +} + +void +fa_cfileoffa( + void *obj, + int bit, + typnm_t next) +{ + bmap_ext_t bm; + xfs_dfiloff_t bno; + xfs_dfsbno_t dfsbno; + int nex; + + bno = (xfs_dfiloff_t)getbitval(obj, bit, BMBT_STARTOFF_BITLEN, + BVUNSIGNED); + if (bno == NULLDFILOFF) { + dbprintf("null block number, cannot set new addr\n"); + return; + } + nex = 1; + bmap(bno, 1, XFS_ATTR_FORK, &nex, &bm); + if (nex == 0) { + dbprintf("file block is unmapped\n"); + return; + } + dfsbno = bm.startblock + (bno - bm.startoff); + ASSERT(typtab[next].typnm == next); + set_cur(&typtab[next], XFS_FSB_TO_DADDR(mp, dfsbno), blkbb, DB_RING_ADD, + NULL); +} + +void +fa_cfileoffd( + void *obj, + int bit, + typnm_t next) +{ + bbmap_t bbmap; + bmap_ext_t *bmp; + xfs_dfiloff_t bno; + xfs_dfsbno_t dfsbno; + int nb; + int nex; + + bno = (xfs_dfiloff_t)getbitval(obj, bit, BMBT_STARTOFF_BITLEN, + BVUNSIGNED); + if (bno == NULLDFILOFF) { + dbprintf("null block number, cannot set new addr\n"); + return; + } + nex = nb = next == TYP_DIR2 ? mp->m_dirblkfsbs : 1; + bmp = malloc(nb * sizeof(*bmp)); + bmap(bno, nb, XFS_DATA_FORK, &nex, bmp); + if (nex == 0) { + dbprintf("file block is unmapped\n"); + free(bmp); + return; + } + dfsbno = bmp->startblock + (bno - bmp->startoff); + ASSERT(typtab[next].typnm == next); + if (nex > 1) + make_bbmap(&bbmap, nex, bmp); + set_cur(&typtab[next], XFS_FSB_TO_DADDR(mp, dfsbno), nb * blkbb, + DB_RING_ADD, nex > 1 ? &bbmap: NULL); + free(bmp); +} + +void +fa_cfsblock( + void *obj, + int bit, + typnm_t next) +{ + xfs_dfsbno_t bno; + + bno = (xfs_dfsbno_t)getbitval(obj, bit, BMBT_STARTBLOCK_BITLEN, + BVUNSIGNED); + if (bno == NULLDFSBNO) { + dbprintf("null block number, cannot set new addr\n"); + return; + } + ASSERT(typtab[next].typnm == next); + set_cur(&typtab[next], XFS_FSB_TO_DADDR(mp, bno), blkbb, DB_RING_ADD, + NULL); +} + +void +fa_dfiloffa( + void *obj, + int bit, + typnm_t next) +{ + bmap_ext_t bm; + xfs_dfiloff_t bno; + xfs_dfsbno_t dfsbno; + int nex; + + bno = (xfs_dfiloff_t)getbitval(obj, bit, bitsz(bno), BVUNSIGNED); + if (bno == NULLDFILOFF) { + dbprintf("null block number, cannot set new addr\n"); + return; + } + nex = 1; + bmap(bno, 1, XFS_ATTR_FORK, &nex, &bm); + if (nex == 0) { + dbprintf("file block is unmapped\n"); + return; + } + dfsbno = bm.startblock + (bno - bm.startoff); + ASSERT(typtab[next].typnm == next); + set_cur(&typtab[next], XFS_FSB_TO_DADDR(mp, dfsbno), blkbb, DB_RING_ADD, + NULL); +} + +void +fa_dfiloffd( + void *obj, + int bit, + typnm_t next) +{ + bbmap_t bbmap; + bmap_ext_t *bmp; + xfs_dfiloff_t bno; + xfs_dfsbno_t dfsbno; + int nb; + int nex; + + bno = (xfs_dfiloff_t)getbitval(obj, bit, bitsz(bno), BVUNSIGNED); + if (bno == NULLDFILOFF) { + dbprintf("null block number, cannot set new addr\n"); + return; + } + nex = nb = next == TYP_DIR2 ? mp->m_dirblkfsbs : 1; + bmp = malloc(nb * sizeof(*bmp)); + bmap(bno, nb, XFS_DATA_FORK, &nex, bmp); + if (nex == 0) { + dbprintf("file block is unmapped\n"); + free(bmp); + return; + } + dfsbno = bmp->startblock + (bno - bmp->startoff); + ASSERT(typtab[next].typnm == next); + if (nex > 1) + make_bbmap(&bbmap, nex, bmp); + set_cur(&typtab[next], XFS_FSB_TO_DADDR(mp, dfsbno), nb * blkbb, + DB_RING_ADD, nex > 1 ? &bbmap : NULL); + free(bmp); +} + +void +fa_dfsbno( + void *obj, + int bit, + typnm_t next) +{ + xfs_dfsbno_t bno; + + bno = (xfs_dfsbno_t)getbitval(obj, bit, bitsz(bno), BVUNSIGNED); + if (bno == NULLDFSBNO) { + dbprintf("null block number, cannot set new addr\n"); + return; + } + ASSERT(typtab[next].typnm == next); + set_cur(&typtab[next], XFS_FSB_TO_DADDR(mp, bno), blkbb, DB_RING_ADD, + NULL); +} + +/*ARGSUSED*/ +void +fa_dirblock( + void *obj, + int bit, + typnm_t next) +{ + bbmap_t bbmap; + bmap_ext_t *bmp; + __uint32_t bno; + xfs_dfsbno_t dfsbno; + int nex; + + bno = (__uint32_t)getbitval(obj, bit, bitsz(bno), BVUNSIGNED); + if (bno == 0) { + dbprintf("null directory block number, cannot set new addr\n"); + return; + } + nex = mp->m_dirblkfsbs; + bmp = malloc(nex * sizeof(*bmp)); + bmap(bno, mp->m_dirblkfsbs, XFS_DATA_FORK, &nex, bmp); + if (nex == 0) { + dbprintf("directory block is unmapped\n"); + free(bmp); + return; + } + dfsbno = bmp->startblock + (bno - bmp->startoff); + ASSERT(typtab[next].typnm == next); + if (nex > 1) + make_bbmap(&bbmap, nex, bmp); + set_cur(&typtab[next], (__int64_t)XFS_FSB_TO_DADDR(mp, dfsbno), + (int)XFS_FSB_TO_DADDR(mp, mp->m_dirblkfsbs), DB_RING_ADD, + nex > 1 ? &bbmap : NULL); + free(bmp); +} + +void +fa_drfsbno( + void *obj, + int bit, + typnm_t next) +{ + xfs_drfsbno_t bno; + + bno = (xfs_drfsbno_t)getbitval(obj, bit, bitsz(bno), BVUNSIGNED); + if (bno == NULLDRFSBNO) { + dbprintf("null block number, cannot set new addr\n"); + return; + } + ASSERT(typtab[next].typnm == next); + set_cur(&typtab[next], (__int64_t)XFS_FSB_TO_BB(mp, bno), blkbb, + DB_RING_ADD, NULL); +} + +/*ARGSUSED*/ +void +fa_drtbno( + void *obj, + int bit, + typnm_t next) +{ + xfs_drtbno_t bno; + + bno = (xfs_drtbno_t)getbitval(obj, bit, bitsz(bno), BVUNSIGNED); + if (bno == NULLDRTBNO) { + dbprintf("null block number, cannot set new addr\n"); + return; + } + /* need set_cur to understand rt subvolume */ +} + +/*ARGSUSED*/ +void +fa_ino( + void *obj, + int bit, + typnm_t next) +{ + xfs_ino_t ino; + + ASSERT(next == TYP_INODE); + ino = (xfs_ino_t)getbitval(obj, bit, bitsz(ino), BVUNSIGNED); + if (ino == NULLFSINO) { + dbprintf("null inode number, cannot set new addr\n"); + return; + } + set_cur_inode(ino); +} + +void +fa_ino4( + void *obj, + int bit, + typnm_t next) +{ + xfs_ino_t ino; + xfs_dir2_ino4_t ino4; + + ASSERT(next == TYP_INODE); + ino = (xfs_ino_t)getbitval(obj, bit, bitsz(ino4), BVUNSIGNED); + if (ino == NULLFSINO) { + dbprintf("null inode number, cannot set new addr\n"); + return; + } + set_cur_inode(ino); +} + +void +fa_ino8( + void *obj, + int bit, + typnm_t next) +{ + xfs_ino_t ino; + xfs_dir2_ino8_t ino8; + + ASSERT(next == TYP_INODE); + ino = (xfs_ino_t)getbitval(obj, bit, bitsz(ino8), BVUNSIGNED); + if (ino == NULLFSINO) { + dbprintf("null inode number, cannot set new addr\n"); + return; + } + set_cur_inode(ino); +} diff --git a/db/faddr.h b/db/faddr.h new file mode 100644 index 000000000..25c471e55 --- /dev/null +++ b/db/faddr.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +typedef void (*adfnc_t)(void *obj, int bit, typnm_t next); + +extern void fa_agblock(void *obj, int bit, typnm_t next); +extern void fa_agino(void *obj, int bit, typnm_t next); +extern void fa_attrblock(void *obj, int bit, typnm_t next); +extern void fa_cfileoffd(void *obj, int bit, typnm_t next); +extern void fa_cfsblock(void *obj, int bit, typnm_t next); +extern void fa_dfiloffd(void *obj, int bit, typnm_t next); +extern void fa_dfsbno(void *obj, int bit, typnm_t next); +extern void fa_dinode_union(void *obj, int bit, typnm_t next); +extern void fa_dirblock(void *obj, int bit, typnm_t next); +extern void fa_drfsbno(void *obj, int bit, typnm_t next); +extern void fa_drtbno(void *obj, int bit, typnm_t next); +extern void fa_ino(void *obj, int bit, typnm_t next); +extern void fa_cfileoffa(void *obj, int bit, typnm_t next); +extern void fa_dfiloffa(void *obj, int bit, typnm_t next); +extern void fa_ino4(void *obj, int bit, typnm_t next); +extern void fa_ino8(void *obj, int bit, typnm_t next); diff --git a/db/field.c b/db/field.c new file mode 100644 index 000000000..399c47266 --- /dev/null +++ b/db/field.c @@ -0,0 +1,394 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "type.h" +#include "faddr.h" +#include "fprint.h" +#include "field.h" +#include "inode.h" +#include "bnobt.h" +#include "cntbt.h" +#include "inobt.h" +#include "bmapbt.h" +#include "bmroot.h" +#include "bit.h" +#include "agf.h" +#include "agfl.h" +#include "agi.h" +#include "sb.h" +#include "dir.h" +#include "dirshort.h" +#include "attr.h" +#include "attrshort.h" +#include "dquot.h" +#include "dir2.h" +#include "dir2sf.h" + +const ftattr_t ftattrtab[] = { + { FLDT_AEXTNUM, "aextnum", fp_num, "%d", SI(bitsz(xfs_aextnum_t)), + FTARG_SIGNED, NULL, NULL }, + { FLDT_AGBLOCK, "agblock", fp_num, "%u", SI(bitsz(xfs_agblock_t)), + FTARG_DONULL, fa_agblock, NULL }, + { FLDT_AGBLOCKNZ, "agblocknz", fp_num, "%u", SI(bitsz(xfs_agblock_t)), + FTARG_SKIPZERO|FTARG_DONULL, fa_agblock, NULL }, + { FLDT_AGF, "agf", NULL, (char *)agf_flds, agf_size, FTARG_SIZE, NULL, + agf_flds }, + { FLDT_AGFL, "agfl", NULL, (char *)agfl_flds, agfl_size, FTARG_SIZE, + NULL, agfl_flds }, + { FLDT_AGI, "agi", NULL, (char *)agi_flds, agi_size, FTARG_SIZE, NULL, + agi_flds }, + { FLDT_AGINO, "agino", fp_num, "%u", SI(bitsz(xfs_agino_t)), + FTARG_DONULL, fa_agino, NULL }, + { FLDT_AGINONN, "aginonn", fp_num, "%u", SI(bitsz(xfs_agino_t)), + FTARG_SKIPNULL, fa_agino, NULL }, + { FLDT_AGNUMBER, "agnumber", fp_num, "%u", SI(bitsz(xfs_agnumber_t)), + FTARG_DONULL, NULL, NULL }, + { FLDT_ATTR, "attr", NULL, (char *)attr_flds, attr_size, FTARG_SIZE, + NULL, attr_flds }, + { FLDT_ATTR_BLKINFO, "attr_blkinfo", NULL, (char *)attr_blkinfo_flds, + SI(bitsz(struct xfs_da_blkinfo)), 0, NULL, attr_blkinfo_flds }, + { FLDT_ATTR_LEAF_ENTRY, "attr_leaf_entry", fp_sarray, + (char *)attr_leaf_entry_flds, SI(bitsz(struct xfs_attr_leaf_entry)), + 0, NULL, attr_leaf_entry_flds }, + { FLDT_ATTR_LEAF_HDR, "attr_leaf_hdr", NULL, (char *)attr_leaf_hdr_flds, + SI(bitsz(struct xfs_attr_leaf_hdr)), 0, NULL, attr_leaf_hdr_flds }, + { FLDT_ATTR_LEAF_MAP, "attr_leaf_map", fp_sarray, + (char *)attr_leaf_map_flds, SI(bitsz(struct xfs_attr_leaf_map)), 0, + NULL, attr_leaf_map_flds }, + { FLDT_ATTR_LEAF_NAME, "attr_leaf_name", NULL, + (char *)attr_leaf_name_flds, attr_leaf_name_size, FTARG_SIZE, NULL, + attr_leaf_name_flds }, + { FLDT_ATTR_NODE_ENTRY, "attr_node_entry", fp_sarray, + (char *)attr_node_entry_flds, SI(bitsz(struct xfs_da_node_entry)), 0, + NULL, attr_node_entry_flds }, + { FLDT_ATTR_NODE_HDR, "attr_node_hdr", NULL, (char *)attr_node_hdr_flds, + SI(bitsz(struct xfs_da_node_hdr)), 0, NULL, attr_node_hdr_flds }, + { FLDT_ATTR_SF_ENTRY, "attr_sf_entry", NULL, (char *)attr_sf_entry_flds, + attr_sf_entry_size, FTARG_SIZE, NULL, attr_sf_entry_flds }, + { FLDT_ATTR_SF_HDR, "attr_sf_hdr", NULL, (char *)attr_sf_hdr_flds, + SI(bitsz(struct xfs_attr_sf_hdr)), 0, NULL, attr_sf_hdr_flds }, + { FLDT_ATTRBLOCK, "attrblock", fp_num, "%u", SI(bitsz(__uint32_t)), 0, + fa_attrblock, NULL }, + { FLDT_ATTRSHORT, "attrshort", NULL, (char *)attr_shortform_flds, + attrshort_size, FTARG_SIZE, NULL, attr_shortform_flds }, + { FLDT_BMAPBTA, "bmapbta", NULL, (char *)bmapbta_flds, bmapbta_size, + FTARG_SIZE, NULL, bmapbta_flds }, + { FLDT_BMAPBTAKEY, "bmapbtakey", fp_sarray, (char *)bmapbta_key_flds, + SI(bitsz(xfs_bmbt_key_t)), 0, NULL, bmapbta_key_flds }, + { FLDT_BMAPBTAPTR, "bmapbtaptr", fp_num, "%llu", + SI(bitsz(xfs_bmbt_ptr_t)), 0, fa_dfsbno, NULL }, + { FLDT_BMAPBTAREC, "bmapbtarec", fp_sarray, (char *)bmapbta_rec_flds, + SI(bitsz(xfs_bmbt_rec_t)), 0, NULL, bmapbta_rec_flds }, + { FLDT_BMAPBTD, "bmapbtd", NULL, (char *)bmapbtd_flds, bmapbtd_size, + FTARG_SIZE, NULL, bmapbtd_flds }, + { FLDT_BMAPBTDKEY, "bmapbtdkey", fp_sarray, (char *)bmapbtd_key_flds, + SI(bitsz(xfs_bmbt_key_t)), 0, NULL, bmapbtd_key_flds }, + { FLDT_BMAPBTDPTR, "bmapbtdptr", fp_num, "%llu", + SI(bitsz(xfs_bmbt_ptr_t)), 0, fa_dfsbno, NULL }, + { FLDT_BMAPBTDREC, "bmapbtdrec", fp_sarray, (char *)bmapbtd_rec_flds, + SI(bitsz(xfs_bmbt_rec_t)), 0, NULL, bmapbtd_rec_flds }, + { FLDT_BMROOTA, "bmroota", NULL, (char *)bmroota_flds, bmroota_size, + FTARG_SIZE, NULL, bmroota_flds }, + { FLDT_BMROOTAKEY, "bmrootakey", fp_sarray, (char *)bmroota_key_flds, + SI(bitsz(xfs_bmdr_key_t)), 0, NULL, bmroota_key_flds }, + { FLDT_BMROOTAPTR, "bmrootaptr", fp_num, "%llu", + SI(bitsz(xfs_bmdr_ptr_t)), 0, fa_dfsbno, NULL }, + { FLDT_BMROOTD, "bmrootd", NULL, (char *)bmrootd_flds, bmrootd_size, + FTARG_SIZE, NULL, bmrootd_flds }, + { FLDT_BMROOTDKEY, "bmrootdkey", fp_sarray, (char *)bmrootd_key_flds, + SI(bitsz(xfs_bmdr_key_t)), 0, NULL, bmrootd_key_flds }, + { FLDT_BMROOTDPTR, "bmrootdptr", fp_num, "%llu", + SI(bitsz(xfs_bmdr_ptr_t)), 0, fa_dfsbno, NULL }, + { FLDT_BNOBT, "bnobt", NULL, (char *)bnobt_flds, bnobt_size, FTARG_SIZE, + NULL, bnobt_flds }, + { FLDT_BNOBTKEY, "bnobtkey", fp_sarray, (char *)bnobt_key_flds, + SI(bitsz(xfs_alloc_key_t)), 0, NULL, bnobt_key_flds }, + { FLDT_BNOBTPTR, "bnobtptr", fp_num, "%u", SI(bitsz(xfs_alloc_ptr_t)), + 0, fa_agblock, NULL }, + { FLDT_BNOBTREC, "bnobtrec", fp_sarray, (char *)bnobt_rec_flds, + SI(bitsz(xfs_alloc_rec_t)), 0, NULL, bnobt_rec_flds }, + { FLDT_CEXTFLG, "cextflag", fp_num, "%u", SI(BMBT_EXNTFLAG_BITLEN), 0, + NULL, NULL }, + { FLDT_CEXTLEN, "cextlen", fp_num, "%u", SI(BMBT_BLOCKCOUNT_BITLEN), 0, + NULL, NULL }, + { FLDT_CFILEOFFA, "cfileoffa", fp_num, "%llu", SI(BMBT_STARTOFF_BITLEN), + 0, fa_cfileoffa, NULL }, + { FLDT_CFILEOFFD, "cfileoffd", fp_num, "%llu", SI(BMBT_STARTOFF_BITLEN), + 0, fa_cfileoffd, NULL }, + { FLDT_CFSBLOCK, "cfsblock", fp_num, "%llu", SI(BMBT_STARTBLOCK_BITLEN), + 0, fa_cfsblock, NULL }, + { FLDT_CHARNS, "charns", fp_charns, NULL, SI(bitsz(char)), 0, NULL, + NULL }, + { FLDT_CHARS, "chars", fp_num, "%c", SI(bitsz(char)), 0, NULL, NULL }, + { FLDT_CNTBT, "cntbt", NULL, (char *)cntbt_flds, cntbt_size, FTARG_SIZE, + NULL, cntbt_flds }, + { FLDT_CNTBTKEY, "cntbtkey", fp_sarray, (char *)cntbt_key_flds, + SI(bitsz(xfs_alloc_key_t)), 0, NULL, cntbt_key_flds }, + { FLDT_CNTBTPTR, "cntbtptr", fp_num, "%u", SI(bitsz(xfs_alloc_ptr_t)), + 0, fa_agblock, NULL }, + { FLDT_CNTBTREC, "cntbtrec", fp_sarray, (char *)cntbt_rec_flds, + SI(bitsz(xfs_alloc_rec_t)), 0, NULL, cntbt_rec_flds }, + { FLDT_DEV, "dev", fp_num, "%#x", SI(bitsz(xfs_dev_t)), 0, NULL, NULL }, + { FLDT_DFILOFFA, "dfiloffa", fp_num, "%llu", SI(bitsz(xfs_dfiloff_t)), + 0, fa_dfiloffa, NULL }, + { FLDT_DFILOFFD, "dfiloffd", fp_num, "%llu", SI(bitsz(xfs_dfiloff_t)), + 0, fa_dfiloffd, NULL }, + { FLDT_DFSBNO, "dfsbno", fp_num, "%llu", SI(bitsz(xfs_dfsbno_t)), + FTARG_DONULL, fa_dfsbno, NULL }, + { FLDT_DINODE_A, "dinode_a", NULL, (char *)inode_a_flds, inode_a_size, + FTARG_SIZE|FTARG_OKEMPTY, NULL, inode_a_flds }, + { FLDT_DINODE_CORE, "dinode_core", NULL, (char *)inode_core_flds, + SI(bitsz(xfs_dinode_core_t)), 0, NULL, inode_core_flds }, + { FLDT_DINODE_FMT, "dinode_fmt", fp_dinode_fmt, NULL, + SI(bitsz(__int8_t)), 0, NULL, NULL }, + { FLDT_DINODE_U, "dinode_u", NULL, (char *)inode_u_flds, inode_u_size, + FTARG_SIZE|FTARG_OKEMPTY, NULL, inode_u_flds }, + { FLDT_DIR, "dir", NULL, (char *)dir_flds, dir_size, FTARG_SIZE, NULL, + dir_flds }, + { FLDT_DIR2, "dir2", NULL, (char *)dir2_flds, dir2_size, FTARG_SIZE, + NULL, dir2_flds }, + { FLDT_DIR2_BLOCK_TAIL, "dir2_block_tail", NULL, + (char *)dir2_block_tail_flds, SI(bitsz(xfs_dir2_block_tail_t)), 0, + NULL, dir2_block_tail_flds }, + { FLDT_DIR2_DATA_FREE, "dir2_data_free", NULL, + (char *)dir2_data_free_flds, SI(bitsz(xfs_dir2_data_free_t)), 0, NULL, + dir2_data_free_flds }, + { FLDT_DIR2_DATA_HDR, "dir2_data_hdr", NULL, (char *)dir2_data_hdr_flds, + SI(bitsz(xfs_dir2_data_hdr_t)), 0, NULL, dir2_data_hdr_flds }, + { FLDT_DIR2_DATA_OFF, "dir2_data_off", fp_num, "%#x", + SI(bitsz(xfs_dir2_data_off_t)), 0, NULL, NULL }, + { FLDT_DIR2_DATA_OFFNZ, "dir2_data_offnz", fp_num, "%#x", + SI(bitsz(xfs_dir2_data_off_t)), FTARG_SKIPZERO, NULL, NULL }, + { FLDT_DIR2_DATA_UNION, "dir2_data_union", NULL, + (char *)dir2_data_union_flds, dir2_data_union_size, FTARG_SIZE, NULL, + dir2_data_union_flds }, + { FLDT_DIR2_FREE_HDR, "dir2_free_hdr", NULL, (char *)dir2_free_hdr_flds, + SI(bitsz(xfs_dir2_free_hdr_t)), 0, NULL, dir2_free_hdr_flds }, + { FLDT_DIR2_INO4, "dir2_ino4", fp_num, "%u", SI(bitsz(xfs_dir2_ino4_t)), + 0, fa_ino4, NULL }, + { FLDT_DIR2_INO8, "dir2_ino8", fp_num, "%llu", + SI(bitsz(xfs_dir2_ino8_t)), 0, fa_ino8, NULL }, + { FLDT_DIR2_INOU, "dir2_inou", NULL, (char *)dir2_inou_flds, + dir2_inou_size, FTARG_SIZE, NULL, dir2_inou_flds }, + { FLDT_DIR2_LEAF_ENTRY, "dir2_leaf_entry", NULL, + (char *)dir2_leaf_entry_flds, SI(bitsz(xfs_dir2_leaf_entry_t)), 0, + NULL, dir2_leaf_entry_flds }, + { FLDT_DIR2_LEAF_HDR, "dir2_leaf_hdr", NULL, (char *)dir2_leaf_hdr_flds, + SI(bitsz(xfs_dir2_leaf_hdr_t)), 0, NULL, dir2_leaf_hdr_flds }, + { FLDT_DIR2_LEAF_TAIL, "dir2_leaf_tail", NULL, + (char *)dir2_leaf_tail_flds, SI(bitsz(xfs_dir2_leaf_tail_t)), 0, NULL, + dir2_leaf_tail_flds }, + { FLDT_DIR2_SF_ENTRY, "dir2_sf_entry", NULL, (char *)dir2_sf_entry_flds, + dir2_sf_entry_size, FTARG_SIZE, NULL, dir2_sf_entry_flds }, + { FLDT_DIR2_SF_HDR, "dir2_sf_hdr", NULL, (char *)dir2_sf_hdr_flds, + dir2_sf_hdr_size, FTARG_SIZE, NULL, dir2_sf_hdr_flds }, + { FLDT_DIR2_SF_OFF, "dir2_sf_off", fp_num, "%#x", + SI(bitsz(xfs_dir2_sf_off_t)), 0, NULL, NULL }, + { FLDT_DIR2SF, "dir2sf", NULL, (char *)dir2sf_flds, dir2sf_size, + FTARG_SIZE, NULL, dir2sf_flds }, + { FLDT_DIR_BLKINFO, "dir_blkinfo", NULL, (char *)dir_blkinfo_flds, + SI(bitsz(struct xfs_da_blkinfo)), 0, NULL, dir_blkinfo_flds }, + { FLDT_DIR_INO, "dir_ino", fp_num, "%llu", SI(bitsz(xfs_dir_ino_t)), 0, + fa_ino, NULL }, + { FLDT_DIR_LEAF_ENTRY, "dir_leaf_entry", fp_sarray, + (char *)dir_leaf_entry_flds, SI(bitsz(struct xfs_dir_leaf_entry)), 0, + NULL, dir_leaf_entry_flds }, + { FLDT_DIR_LEAF_HDR, "dir_leaf_hdr", NULL, (char *)dir_leaf_hdr_flds, + SI(bitsz(struct xfs_dir_leaf_hdr)), 0, NULL, dir_leaf_hdr_flds }, + { FLDT_DIR_LEAF_MAP, "dir_leaf_map", fp_sarray, + (char *)dir_leaf_map_flds, SI(bitsz(struct xfs_dir_leaf_map)), 0, + NULL, dir_leaf_map_flds }, + { FLDT_DIR_LEAF_NAME, "dir_leaf_name", NULL, (char *)dir_leaf_name_flds, + dir_leaf_name_size, FTARG_SIZE, NULL, dir_leaf_name_flds }, + { FLDT_DIR_NODE_ENTRY, "dir_node_entry", fp_sarray, + (char *)dir_node_entry_flds, SI(bitsz(struct xfs_da_node_entry)), 0, + NULL, dir_node_entry_flds }, + { FLDT_DIR_NODE_HDR, "dir_node_hdr", NULL, (char *)dir_node_hdr_flds, + SI(bitsz(struct xfs_da_node_hdr)), 0, NULL, dir_node_hdr_flds }, + { FLDT_DIR_SF_ENTRY, "dir_sf_entry", NULL, (char *)dir_sf_entry_flds, + dir_sf_entry_size, FTARG_SIZE, NULL, dir_sf_entry_flds }, + { FLDT_DIR_SF_HDR, "dir_sf_hdr", NULL, (char *)dir_sf_hdr_flds, + SI(bitsz(struct xfs_dir_sf_hdr)), 0, NULL, dir_sf_hdr_flds }, + { FLDT_DIRBLOCK, "dirblock", fp_num, "%u", SI(bitsz(__uint32_t)), 0, + fa_dirblock, NULL }, + { FLDT_DIRSHORT, "dirshort", NULL, (char *)dir_shortform_flds, + dirshort_size, FTARG_SIZE, NULL, dir_shortform_flds }, + { FLDT_DISK_DQUOT, "disk_dquot", NULL, (char *)disk_dquot_flds, + SI(bitsz(xfs_disk_dquot_t)), 0, NULL, disk_dquot_flds }, + { FLDT_DQBLK, "dqblk", NULL, (char *)dqblk_flds, SI(bitsz(xfs_dqblk_t)), + 0, NULL, dqblk_flds }, + { FLDT_DQID, "dqid", fp_num, "%d", SI(bitsz(xfs_dqid_t)), 0, NULL, + NULL }, + { FLDT_DRFSBNO, "drfsbno", fp_num, "%llu", SI(bitsz(xfs_drfsbno_t)), + FTARG_DONULL, fa_drfsbno, NULL }, + { FLDT_DRTBNO, "drtbno", fp_num, "%llu", SI(bitsz(xfs_drtbno_t)), + FTARG_DONULL, fa_drtbno, NULL }, + { FLDT_EXTLEN, "extlen", fp_num, "%u", SI(bitsz(xfs_extlen_t)), 0, NULL, + NULL }, + { FLDT_EXTNUM, "extnum", fp_num, "%d", SI(bitsz(xfs_extnum_t)), + FTARG_SIGNED, NULL, NULL }, + { FLDT_FSIZE, "fsize", fp_num, "%lld", SI(bitsz(xfs_fsize_t)), + FTARG_SIGNED, NULL, NULL }, + { FLDT_INO, "ino", fp_num, "%llu", SI(bitsz(xfs_ino_t)), FTARG_DONULL, + fa_ino, NULL }, + { FLDT_INOBT, "inobt", NULL, (char *)inobt_flds, inobt_size, + FTARG_SIZE, NULL, inobt_flds }, + { FLDT_INOBTKEY, "inobtkey", fp_sarray, (char *)inobt_key_flds, + SI(bitsz(xfs_inobt_key_t)), 0, NULL, inobt_key_flds }, + { FLDT_INOBTPTR, "inobtptr", fp_num, "%u", SI(bitsz(xfs_inobt_ptr_t)), + 0, fa_agblock, NULL }, + { FLDT_INOBTREC, "inobtrec", fp_sarray, (char *)inobt_rec_flds, + SI(bitsz(xfs_inobt_rec_t)), 0, NULL, inobt_rec_flds }, + { FLDT_INODE, "inode", NULL, (char *)inode_flds, inode_size, FTARG_SIZE, + NULL, inode_flds }, + { FLDT_INOFREE, "inofree", fp_num, "%#llx", SI(bitsz(xfs_inofree_t)), 0, + NULL, NULL }, + { FLDT_INT16D, "int16d", fp_num, "%d", SI(bitsz(__int16_t)), + FTARG_SIGNED, NULL, NULL }, + { FLDT_INT32D, "int32d", fp_num, "%d", SI(bitsz(__int32_t)), + FTARG_SIGNED, NULL, NULL }, + { FLDT_INT64D, "int64d", fp_num, "%lld", SI(bitsz(__int64_t)), + FTARG_SIGNED, NULL, NULL }, + { FLDT_INT8D, "int8d", fp_num, "%d", SI(bitsz(__int8_t)), FTARG_SIGNED, + NULL, NULL }, + { FLDT_NSEC, "nsec", fp_num, "%09d", SI(bitsz(__int32_t)), FTARG_SIGNED, + NULL, NULL }, + { FLDT_QCNT, "qcnt", fp_num, "%llu", SI(bitsz(xfs_qcnt_t)), 0, NULL, + NULL }, + { FLDT_QWARNCNT, "qwarncnt", fp_num, "%u", SI(bitsz(xfs_qwarncnt_t)), 0, + NULL, NULL }, + { FLDT_SB, "sb", NULL, (char *)sb_flds, sb_size, FTARG_SIZE, NULL, + sb_flds }, + { FLDT_TIME, "time", fp_time, NULL, SI(bitsz(__int32_t)), FTARG_SIGNED, + NULL, NULL }, + { FLDT_TIMESTAMP, "timestamp", NULL, (char *)timestamp_flds, + SI(bitsz(xfs_timestamp_t)), 0, NULL, timestamp_flds }, + { FLDT_UINT1, "uint1", fp_num, "%u", SI(1), 0, NULL, NULL }, + { FLDT_UINT16D, "uint16d", fp_num, "%u", SI(bitsz(__uint16_t)), 0, NULL, + NULL }, + { FLDT_UINT16O, "uint16o", fp_num, "%#o", SI(bitsz(__uint16_t)), 0, + NULL, NULL }, + { FLDT_UINT16X, "uint16x", fp_num, "%#x", SI(bitsz(__uint16_t)), 0, + NULL, NULL }, + { FLDT_UINT32D, "uint32d", fp_num, "%u", SI(bitsz(__uint32_t)), 0, NULL, + NULL }, + { FLDT_UINT32O, "uint32o", fp_num, "%#o", SI(bitsz(__uint32_t)), 0, + NULL, NULL }, + { FLDT_UINT32X, "uint32x", fp_num, "%#x", SI(bitsz(__uint32_t)), 0, + NULL, NULL }, + { FLDT_UINT64D, "uint64d", fp_num, "%llu", SI(bitsz(__uint64_t)), 0, + NULL, NULL }, + { FLDT_UINT64O, "uint64o", fp_num, "%#llo", SI(bitsz(__uint64_t)), 0, + NULL, NULL }, + { FLDT_UINT64X, "uint64x", fp_num, "%#llx", SI(bitsz(__uint64_t)), 0, + NULL, NULL }, + { FLDT_UINT8D, "uint8d", fp_num, "%u", SI(bitsz(__uint8_t)), 0, NULL, + NULL }, + { FLDT_UINT8O, "uint8o", fp_num, "%#o", SI(bitsz(__uint8_t)), 0, NULL, + NULL }, + { FLDT_UINT8X, "uint8x", fp_num, "%#x", SI(bitsz(__uint8_t)), 0, NULL, + NULL }, + { FLDT_UUID, "uuid", fp_uuid, NULL, SI(bitsz(uuid_t)), 0, NULL, NULL }, + { FLDT_ZZZ, NULL } +}; + +int +bitoffset( + const field_t *f, + void *obj, + int startoff, + int idx) +{ + + if (!(f->flags & FLD_OFFSET)) { + if (f->flags & FLD_ARRAY) { + int abase; +#ifdef DEBUG + const ftattr_t *fa = &ftattrtab[f->ftyp]; +#endif + + abase = (f->flags & FLD_ABASE1) != 0; + ASSERT(fa->ftyp == f->ftyp); + ASSERT((fa->arg & FTARG_SIZE) == 0); + return (int)(__psint_t)f->offset + + (idx - abase) * fsize(f, obj, startoff, idx); + } else + return (int)(__psint_t)f->offset; + } else + return (*f->offset)(obj, startoff, idx); +} + +int +fcount( + const field_t *f, + void *obj, + int startoff) +{ + if (!(f->flags & FLD_COUNT)) + return (int)(__psint_t)f->count; + else + return (*f->count)(obj, startoff); +} + +const field_t * +findfield( + char *name, + const field_t *fields, + void *obj, + int startoff) +{ + const field_t *f; + + /* we only match if this field name matches and has a non-zero count */ + for (f = fields; f->name; f++) + if (strcmp(f->name, name) == 0 && fcount(f, obj, startoff)) + return f; + return NULL; +} + +int +fsize( + const field_t *f, + void *obj, + int startoff, + int idx) +{ + const ftattr_t *fa; + + fa = &ftattrtab[f->ftyp]; + ASSERT(fa->ftyp == f->ftyp); + if (!(fa->arg & FTARG_SIZE)) + return (int)(__psint_t)fa->size; + else + return (*fa->size)(obj, startoff, idx); +} diff --git a/db/field.h b/db/field.h new file mode 100644 index 000000000..c5249f7da --- /dev/null +++ b/db/field.h @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +typedef enum fldt { + FLDT_AEXTNUM, + FLDT_AGBLOCK, + FLDT_AGBLOCKNZ, + FLDT_AGF, + FLDT_AGFL, + FLDT_AGI, + FLDT_AGINO, + FLDT_AGINONN, + FLDT_AGNUMBER, + FLDT_ATTR, + FLDT_ATTR_BLKINFO, + FLDT_ATTR_LEAF_ENTRY, + FLDT_ATTR_LEAF_HDR, + FLDT_ATTR_LEAF_MAP, + FLDT_ATTR_LEAF_NAME, + FLDT_ATTR_NODE_ENTRY, + FLDT_ATTR_NODE_HDR, + FLDT_ATTR_SF_ENTRY, + FLDT_ATTR_SF_HDR, + FLDT_ATTRBLOCK, + FLDT_ATTRSHORT, + FLDT_BMAPBTA, + FLDT_BMAPBTAKEY, + FLDT_BMAPBTAPTR, + FLDT_BMAPBTAREC, + FLDT_BMAPBTD, + FLDT_BMAPBTDKEY, + FLDT_BMAPBTDPTR, + FLDT_BMAPBTDREC, + FLDT_BMROOTA, + FLDT_BMROOTAKEY, + FLDT_BMROOTAPTR, + FLDT_BMROOTD, + FLDT_BMROOTDKEY, + FLDT_BMROOTDPTR, + FLDT_BNOBT, + FLDT_BNOBTKEY, + FLDT_BNOBTPTR, + FLDT_BNOBTREC, + FLDT_CEXTFLG, + FLDT_CEXTLEN, + FLDT_CFILEOFFA, + FLDT_CFILEOFFD, + FLDT_CFSBLOCK, + FLDT_CHARNS, + FLDT_CHARS, + FLDT_CNTBT, + FLDT_CNTBTKEY, + FLDT_CNTBTPTR, + FLDT_CNTBTREC, + FLDT_DEV, + FLDT_DFILOFFA, + FLDT_DFILOFFD, + FLDT_DFSBNO, + FLDT_DINODE_A, + FLDT_DINODE_CORE, + FLDT_DINODE_FMT, + FLDT_DINODE_U, + FLDT_DIR, + FLDT_DIR2, + FLDT_DIR2_BLOCK_TAIL, + FLDT_DIR2_DATA_FREE, + FLDT_DIR2_DATA_HDR, + FLDT_DIR2_DATA_OFF, + FLDT_DIR2_DATA_OFFNZ, + FLDT_DIR2_DATA_UNION, + FLDT_DIR2_FREE_HDR, + FLDT_DIR2_INO4, + FLDT_DIR2_INO8, + FLDT_DIR2_INOU, + FLDT_DIR2_LEAF_ENTRY, + FLDT_DIR2_LEAF_HDR, + FLDT_DIR2_LEAF_TAIL, + FLDT_DIR2_SF_ENTRY, + FLDT_DIR2_SF_HDR, + FLDT_DIR2_SF_OFF, + FLDT_DIR2SF, + FLDT_DIR_BLKINFO, + FLDT_DIR_INO, + FLDT_DIR_LEAF_ENTRY, + FLDT_DIR_LEAF_HDR, + FLDT_DIR_LEAF_MAP, + FLDT_DIR_LEAF_NAME, + FLDT_DIR_NODE_ENTRY, + FLDT_DIR_NODE_HDR, + FLDT_DIR_SF_ENTRY, + FLDT_DIR_SF_HDR, + FLDT_DIRBLOCK, + FLDT_DIRSHORT, + FLDT_DISK_DQUOT, + FLDT_DQBLK, + FLDT_DQID, + FLDT_DRFSBNO, + FLDT_DRTBNO, + FLDT_EXTLEN, + FLDT_EXTNUM, + FLDT_FSIZE, + FLDT_INO, + FLDT_INOBT, + FLDT_INOBTKEY, + FLDT_INOBTPTR, + FLDT_INOBTREC, + FLDT_INODE, + FLDT_INOFREE, + FLDT_INT16D, + FLDT_INT32D, + FLDT_INT64D, + FLDT_INT8D, + FLDT_NSEC, + FLDT_QCNT, + FLDT_QWARNCNT, + FLDT_SB, + FLDT_TIME, + FLDT_TIMESTAMP, + FLDT_UINT1, + FLDT_UINT16D, + FLDT_UINT16O, + FLDT_UINT16X, + FLDT_UINT32D, + FLDT_UINT32O, + FLDT_UINT32X, + FLDT_UINT64D, + FLDT_UINT64O, + FLDT_UINT64X, + FLDT_UINT8D, + FLDT_UINT8O, + FLDT_UINT8X, + FLDT_UUID, + FLDT_ZZZ /* mark last entry */ +} fldt_t; + +typedef int (*offset_fnc_t)(void *obj, int startoff, int idx); +#define OI(o) ((offset_fnc_t)(__psint_t)(o)) + +typedef int (*count_fnc_t)(void *obj, int startoff); +#define CI(c) ((count_fnc_t)(__psint_t)(c)) +#define C1 CI(1) + +typedef struct field +{ + char *name; + fldt_t ftyp; + offset_fnc_t offset; + count_fnc_t count; + int flags; + typnm_t next; +} field_t; + +/* + * flag values + */ +#define FLD_ABASE1 1 /* field array base is 1 not 0 */ +#define FLD_SKIPALL 2 /* skip this field in an all-fields print */ +#define FLD_ARRAY 4 /* this field is an array */ +#define FLD_OFFSET 8 /* offset value is a function pointer */ +#define FLD_COUNT 16 /* count value is a function pointer */ + +typedef int (*size_fnc_t)(void *obj, int startoff, int idx); +#define SI(s) ((size_fnc_t)(__psint_t)(s)) + +typedef struct ftattr +{ + fldt_t ftyp; + char *name; + prfnc_t prfunc; + char *fmtstr; + size_fnc_t size; + int arg; + adfnc_t adfunc; + const field_t *subfld; +} ftattr_t; +extern const ftattr_t ftattrtab[]; + +/* + * arg values + */ +#define FTARG_SKIPZERO 1 /* skip 0 words */ +#define FTARG_DONULL 2 /* make -1 words be "null" */ +#define FTARG_SKIPNULL 4 /* skip -1 words */ +#define FTARG_SIGNED 8 /* field value is signed */ +#define FTARG_SIZE 16 /* size field is a function */ +#define FTARG_SKIPNMS 32 /* skip printing names this time */ +#define FTARG_OKEMPTY 64 /* ok if this (union type) is empty */ + +extern int bitoffset(const field_t *f, void *obj, int startoff, + int idx); +extern int fcount(const field_t *f, void *obj, int startoff); +extern const field_t *findfield(char *name, const field_t *fields, + void *obj, int startoff); +extern int fsize(const field_t *f, void *obj, int startoff, + int idx); diff --git a/db/flist.c b/db/flist.c new file mode 100644 index 000000000..fd5e19108 --- /dev/null +++ b/db/flist.c @@ -0,0 +1,437 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "type.h" +#include "faddr.h" +#include "fprint.h" +#include "field.h" +#include "flist.h" +#include "debug.h" +#include "output.h" +#include "malloc.h" + +static void flist_expand_arrays(flist_t *fl); +static void flist_expand_structs(flist_t *fl, void *obj); +static flist_t *flist_replicate(flist_t *fl); +static ftok_t *flist_split(char *s); +static void ftok_free(ftok_t *ft); + +static void +flist_expand_arrays( + flist_t *fl) +{ + const field_t *f; +#ifdef DEBUG + const ftattr_t *fa; +#endif + int high; + int idx; + int low; + flist_t *new; + flist_t *prev; + flist_t *sib; + + f = fl->fld; +#ifdef DEBUG + fa = &ftattrtab[f->ftyp]; +#endif + ASSERT(fa->ftyp == f->ftyp); + ASSERT(f->flags & FLD_ARRAY); + low = fl->low; + high = fl->high; + fl->high = fl->low; + sib = fl->sibling; + for (idx = low + 1, prev = fl; idx <= high; idx++) { + new = flist_make(f->name); + new->fld = f; + new->low = new->high = idx; + new->flags |= FL_OKLOW | FL_OKHIGH; + new->child = flist_replicate(fl->child); + prev->sibling = new; + prev = new; + } + prev->sibling = sib; +} + +static void +flist_expand_structs( + flist_t *fl, + void *obj) +{ + const field_t *cf; + const field_t *f; + const ftattr_t *fa; + flist_t *new; + flist_t *prev; + + f = fl->fld; + fa = &ftattrtab[f->ftyp]; + ASSERT(fa->ftyp == f->ftyp); + ASSERT(fa->subfld != NULL); + ASSERT(fl->child == NULL); + for (cf = fa->subfld, prev = NULL; cf->name != NULL; cf++) { + if (fcount(cf, obj, fl->offset) == 0) + continue; + if (cf->flags & FLD_SKIPALL) + continue; + new = flist_make(cf->name); + new->fld = cf; + if (prev) + prev->sibling = new; + else + fl->child = new; + prev = new; + } +} + +void +flist_free( + flist_t *fl) +{ + if (fl->child) + flist_free(fl->child); + if (fl->sibling) + flist_free(fl->sibling); + if (fl->name) + xfree(fl->name); + xfree(fl); +} + +flist_t * +flist_make( + char *name) +{ + flist_t *fl; + + fl = xmalloc(sizeof(*fl)); + fl->name = xstrdup(name); + fl->fld = NULL; + fl->child = NULL; + fl->sibling = NULL; + fl->low = 0; + fl->high = 0; + fl->flags = 0; + fl->offset = 0; + return fl; +} + +int +flist_parse( + const field_t *fields, + flist_t *fl, + void *obj, + int startoff) +{ + const field_t *f; + const ftattr_t *fa; + int high; + int low; + + while (fl) { + f = findfield(fl->name, fields, obj, startoff); + if (f == NULL) { + dbprintf("field %s not found\n", fl->name); + return 0; + } + fl->fld = f; + fa = &ftattrtab[f->ftyp]; + ASSERT(fa->ftyp == f->ftyp); + if (f->flags & FLD_ARRAY) { + low = (f->flags & FLD_ABASE1) != 0; + high = fcount(f, obj, startoff) + low - 1; + if (low > high) { + dbprintf("no elements in %s\n", fl->name); + return 0; + } + if (fl->flags & FL_OKHIGH) { + if (fl->low < low || fl->low > high || + fl->high < low || fl->high > high) { + dbprintf("indices %d-%d for field %s " + "out of range %d-%d\n", + fl->low, fl->high, fl->name, + low, high); + return 0; + } + } else if (fl->flags & FL_OKLOW) { + if (fl->low < low || fl->low > high) { + dbprintf("index %d for field %s out of " + "range %d-%d\n", + fl->low, fl->name, low, high); + return 0; + } + fl->high = fl->low; + fl->flags |= FL_OKHIGH; + } else { + fl->low = low; + fl->high = high; + fl->flags |= FL_OKLOW | FL_OKHIGH; + } + } else { + if (fl->flags & FL_OKLOW) { + dbprintf("field %s is not an array\n", + fl->name); + return 0; + } + } + fl->offset = startoff + bitoffset(f, obj, startoff, fl->low); + if ((fl->child != NULL || fa->prfunc == NULL) && + (f->flags & FLD_ARRAY) && fl->low != fl->high) + flist_expand_arrays(fl); + if (fa->prfunc == NULL && fl->child == NULL) + flist_expand_structs(fl, obj); + if (fl->child) { + if (fa->subfld == NULL) { + dbprintf("field %s has no subfields\n", + fl->name); + return 0; + } + if (!flist_parse(fa->subfld, fl->child, obj, + fl->offset)) + return 0; + } + fl = fl->sibling; + } + return 1; +} + +void +flist_print( + flist_t *fl) +{ + if (!(debug_state & DEBUG_FLIST)) + return; + while (fl) { + dbprintf("fl@%p:\n", fl); + dbprintf("\tname=%s, fld=%p, child=%p, sibling=%p\n", + fl->name, fl->fld, fl->child, fl->sibling); + dbprintf("\tlow=%d, high=%d, flags=%d (%s%s), offset=%d\n", + fl->low, fl->high, fl->flags, + fl->flags & FL_OKLOW ? "oklow " : "", + fl->flags & FL_OKHIGH ? "okhigh" : "", fl->offset); + dbprintf("\tfld->name=%s, fld->ftyp=%d (%s)\n", + fl->fld->name, fl->fld->ftyp, + ftattrtab[fl->fld->ftyp].name); + dbprintf("\tfld->flags=%d (%s%s%s%s%s)\n", fl->fld->flags, + fl->fld->flags & FLD_ABASE1 ? "abase1 " : "", + fl->fld->flags & FLD_SKIPALL ? "skipall " : "", + fl->fld->flags & FLD_ARRAY ? "array " : "", + fl->fld->flags & FLD_OFFSET ? "offset " : "", + fl->fld->flags & FLD_COUNT ? "count " : ""); + if (fl->child) + flist_print(fl->child); + fl = fl->sibling; + } +} + +static flist_t * +flist_replicate( + flist_t *f) +{ + flist_t *new; + + if (f == NULL) + return NULL; + new = flist_make(f->name); + new->fld = f->fld; + new->child = flist_replicate(f->child); + new->sibling = flist_replicate(f->sibling); + new->low = f->low; + new->high = f->high; + new->flags = f->flags; + new->offset = f->offset; + return new; +} + +flist_t * +flist_scan( + char *name) +{ + flist_t *fl; + flist_t *lfl; + flist_t *nfl; + int num; + ftok_t *p; + ftok_t *v; + char *x; + + v = flist_split(name); + if (!v) + return NULL; + p = v; + fl = lfl = NULL; + while (p->tokty != TT_END) { + if (p->tokty != TT_NAME) + goto bad; + nfl = flist_make(p->tok); + if (lfl) + lfl->child = nfl; + else + fl = nfl; + lfl = nfl; + p++; + if (p->tokty == TT_LB) { + p++; + if (p->tokty != TT_NUM) + goto bad; + num = (int)strtoul(p->tok, &x, 0); + if (*x != '\0') + goto bad; + nfl->flags |= FL_OKLOW; + nfl->low = num; + p++; + if (p->tokty == TT_DASH) { + p++; + if (p->tokty != TT_NUM) + goto bad; + num = (int)strtoul(p->tok, &x, 0); + if (*x != '\0') + goto bad; + nfl->flags |= FL_OKHIGH; + nfl->high = num; + p++; + } + if (p->tokty != TT_RB) + goto bad; + p++; + } + if (p->tokty == TT_DOT) { + p++; + if (p->tokty == TT_END) + goto bad; + } + } + ftok_free(v); + return fl; +bad: + dbprintf("bad syntax in field name %s\n", name); + ftok_free(v); + if (fl) + flist_free(fl); + return NULL; +} + +static ftok_t * +flist_split( + char *s) +{ + char *a; + int i; + static char *idchars; + static char *initidchar; + int l; + int tailskip = 0; + static char *numchars; + static char *xnumchars; /* extended for hex conversion */ + int nv; + static char punctchars[] = "[-]."; + static tokty_t puncttypes[] = { TT_LB, TT_DASH, TT_RB, TT_DOT }; + tokty_t t; + ftok_t *v; + + if (idchars == NULL) { + idchars = xmalloc(26 + 10 + 1 + 1); + initidchar = xmalloc(26 + 1); + numchars = xmalloc(10 + 1); + xnumchars = xmalloc(12 + 1); + for (i = 'a'; i <= 'z'; i++) { + idchars[i - 'a'] = i; + initidchar[i - 'a'] = i; + } + + for (i = '0'; i <= '9'; i++) { + idchars[26 + (i - '0')] = i; + numchars[i - '0'] = i; + xnumchars[i - '0'] = i; + } + idchars[26 + 10] = '_'; + idchars[26 + 10 + 1] = '\0'; + initidchar[26] = '\0'; + numchars[10] = '\0'; + xnumchars[10] = 'x'; + xnumchars[11] = 'X'; + xnumchars[12] = '\0'; + } + nv = 0; + v = xmalloc(sizeof(*v)); + v->tok = NULL; + while (*s) { + /* need to add string handling */ + if (*s == '\"') { + s++; /* skip first quote */ + if ((a = strrchr(s, '\"')) == NULL) { + dbprintf("missing closing quote %s\n", s); + ftok_free(v); + return NULL; + } + tailskip = 1; /* skip remaing quote */ + l = (int)(a - s); + t = TT_STRING; + } else if (strchr(initidchar, *s)) { + l = (int)strspn(s, idchars); + t = TT_NAME; + } else if (strchr(numchars, *s)) { + l = (int)strspn(s, xnumchars); + t = TT_NUM; + } else if (a = strchr(punctchars, *s)) { + l = 1; + t = puncttypes[a - punctchars]; + } else { + dbprintf("bad character in field %s\n", s); + ftok_free(v); + return NULL; + } + a = xmalloc(l + 1); + strncpy(a, s, l); + a[l] = '\0'; + v = xrealloc(v, (nv + 2) * sizeof(*v)); + v[nv].tok = a; + v[nv].tokty = t; + nv++; + s += l + tailskip; + tailskip = 0; + } + v[nv].tok = NULL; + v[nv].tokty = TT_END; + return v; +} + +static void +ftok_free( + ftok_t *ft) +{ + ftok_t *p; + + for (p = ft; p->tok; p++) + xfree(p->tok); + xfree(ft); +} diff --git a/db/flist.h b/db/flist.h new file mode 100644 index 000000000..eab154d7b --- /dev/null +++ b/db/flist.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +struct field; + +typedef struct flist { + char *name; + const struct field *fld; + struct flist *child; + struct flist *sibling; + int low; + int high; + int flags; + int offset; +} flist_t; + +/* + * Flags for flist + */ +#define FL_OKLOW 1 +#define FL_OKHIGH 2 + +typedef enum tokty { + TT_NAME, TT_NUM, TT_STRING, TT_LB, TT_RB, TT_DASH, TT_DOT, TT_END +} tokty_t; + +typedef struct ftok { + char *tok; + tokty_t tokty; +} ftok_t; + +extern void flist_free(flist_t *fl); +extern flist_t *flist_make(char *name); +extern int flist_parse(const struct field *fields, flist_t *fl, void *obj, + int startoff); +extern void flist_print(flist_t *fl); +extern flist_t *flist_scan(char *name); diff --git a/db/fprint.c b/db/fprint.c new file mode 100644 index 000000000..3e4a9bf80 --- /dev/null +++ b/db/fprint.c @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include +#include +#include +#include "type.h" +#include "faddr.h" +#include "fprint.h" +#include "field.h" +#include "inode.h" +#include "inobt.h" +#include "bit.h" +#include "print.h" +#include "output.h" +#include "sig.h" +#include "malloc.h" + +int +fp_charns( + void *obj, + int bit, + int count, + char *fmtstr, + int size, + int arg, + int base, + int array) +{ + int i; + char *p; + + ASSERT(bitoffs(bit) == 0); + ASSERT(size == bitsz(char)); + dbprintf("\""); + for (i = 0, p = (char *)obj + byteize(bit); + i < count && !seenint(); + i++, p++) { + if (*p == '\\' || *p == '\'' || *p == '"' || *p == '\?') + dbprintf("\\%c", *p); + else if (isgraph(*p) || *p == ' ') + dbprintf("%c", *p); + else if (*p == '\a' || *p == '\b' || *p == '\f' || *p == '\n' || + *p == '\r' || *p == '\t' || *p == '\v') + dbprintf("\\%c", *p + ('a' - '\a')); + else + dbprintf("\\%03o", *p & 0xff); + } + dbprintf("\""); + return 1; +} + +int +fp_num( + void *obj, + int bit, + int count, + char *fmtstr, + int size, + int arg, + int base, + int array) +{ + int bitpos; + int i; + int isnull; + __int64_t val; + + for (i = 0, bitpos = bit; + i < count && !seenint(); + i++, bitpos += size) { + val = getbitval(obj, bitpos, size, + (arg & FTARG_SIGNED) ? BVSIGNED : BVUNSIGNED); + if ((arg & FTARG_SKIPZERO) && val == 0) + continue; + isnull = (arg & FTARG_SIGNED) || size == 64 ? + val == -1LL : val == ((1LL << size) - 1LL); + if ((arg & FTARG_SKIPNULL) && isnull) + continue; + if (array) + dbprintf("%d:", i + base); + if ((arg & FTARG_DONULL) && isnull) + dbprintf("null"); + else if (size > 32) + dbprintf(fmtstr, val); + else + dbprintf(fmtstr, (__int32_t)val); + if (i < count - 1) + dbprintf(" "); + } + return 1; +} + +/*ARGSUSED*/ +int +fp_sarray( + void *obj, + int bit, + int count, + char *fmtstr, + int size, + int arg, + int base, + int array) +{ + print_sarray(obj, bit, count, size, base, array, + (const field_t *)fmtstr, (arg & FTARG_SKIPNMS) != 0); + return 1; +} + +/*ARGSUSED*/ +int +fp_time( + void *obj, + int bit, + int count, + char *fmtstr, + int size, + int arg, + int base, + int array) +{ + int bitpos; + char *c; + int i; + time_t t; + + ASSERT(bitoffs(bit) == 0); + for (i = 0, bitpos = bit; + i < count && !seenint(); + i++, bitpos += size) { + if (array) + dbprintf("%d:", i + base); + t=(time_t)getbitval((char *)obj + byteize(bitpos), 0, sizeof(time_t)*8, 0); + c = ctime(&t); + dbprintf("%24.24s", c); + if (i < count - 1) + dbprintf(" "); + } + return 1; +} + +/*ARGSUSED*/ +int +fp_uuid( + void *obj, + int bit, + int count, + char *fmtstr, + int size, + int arg, + int base, + int array) +{ + char bp[40]; /* UUID string is 36 chars + trailing '\0' */ + int i; + uuid_t *p; + + ASSERT(bitoffs(bit) == 0); + for (p = (uuid_t *)((char *)obj + byteize(bit)), i = 0; + i < count && !seenint(); + i++, p++) { + if (array) + dbprintf("%d:", i + base); + uuid_unparse(*p, bp); + dbprintf("%s", bp); + if (i < count - 1) + dbprintf(" "); + } + return 1; +} diff --git a/db/fprint.h b/db/fprint.h new file mode 100644 index 000000000..01c9385b8 --- /dev/null +++ b/db/fprint.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +typedef int (*prfnc_t)(void *obj, int bit, int count, char *fmtstr, int size, + int arg, int base, int array); + +extern int fp_charns(void *obj, int bit, int count, char *fmtstr, int size, + int arg, int base, int array); +extern int fp_num(void *obj, int bit, int count, char *fmtstr, int size, + int arg, int base, int array); +extern int fp_sarray(void *obj, int bit, int count, char *fmtstr, int size, + int arg, int base, int array); +extern int fp_time(void *obj, int bit, int count, char *fmtstr, int size, + int arg, int base, int array); +extern int fp_uuid(void *obj, int bit, int count, char *fmtstr, int size, + int arg, int base, int array); diff --git a/db/frag.c b/db/frag.c new file mode 100644 index 000000000..100d11aa8 --- /dev/null +++ b/db/frag.c @@ -0,0 +1,534 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include +#include +#include "bmap.h" +#include "command.h" +#include "data.h" +#include "frag.h" +#include "io.h" +#include "output.h" +#include "type.h" +#include "mount.h" +#include "malloc.h" + +typedef struct extent { + xfs_fileoff_t startoff; + xfs_filblks_t blockcount; +} extent_t; + +typedef struct extmap { + int naents; + int nents; + extent_t ents[1]; +} extmap_t; +#define EXTMAP_SIZE(n) \ + (offsetof(extmap_t, ents) + (sizeof(extent_t) * (n))) + +static int aflag; +static int dflag; +static __uint64_t extcount_actual; +static __uint64_t extcount_ideal; +static int fflag; +static int lflag; +static int qflag; +static int Rflag; +static int rflag; +static int vflag; + +typedef void (*scan_lbtree_f_t)(xfs_btree_lblock_t *block, + int level, + extmap_t **extmapp, + typnm_t btype); + +typedef void (*scan_sbtree_f_t)(xfs_btree_sblock_t *block, + int level, + xfs_agf_t *agf); + +static extmap_t *extmap_alloc(xfs_extnum_t nex); +static xfs_extnum_t extmap_ideal(extmap_t *extmap); +static void extmap_set_ext(extmap_t **extmapp, xfs_fileoff_t o, + xfs_extlen_t c); +static int frag_f(int argc, char **argv); +static int init(int argc, char **argv); +static void process_bmbt_reclist(xfs_bmbt_rec_32_t *rp, int numrecs, + extmap_t **extmapp); +static void process_btinode(xfs_dinode_t *dip, extmap_t **extmapp, + int whichfork); +static void process_exinode(xfs_dinode_t *dip, extmap_t **extmapp, + int whichfork); +static void process_fork(xfs_dinode_t *dip, int whichfork); +static void process_inode(xfs_agf_t *agf, xfs_agino_t agino, + xfs_dinode_t *dip); +static void scan_ag(xfs_agnumber_t agno); +static void scan_lbtree(xfs_fsblock_t root, int nlevels, + scan_lbtree_f_t func, extmap_t **extmapp, + typnm_t btype); +static void scan_sbtree(xfs_agf_t *agf, xfs_agblock_t root, + int nlevels, scan_sbtree_f_t func, + typnm_t btype); +static void scanfunc_bmap(xfs_btree_lblock_t *ablock, int level, + extmap_t **extmapp, typnm_t btype); +static void scanfunc_ino(xfs_btree_sblock_t *ablock, int level, + xfs_agf_t *agf); + +static const cmdinfo_t frag_cmd = + { "frag", NULL, frag_f, 0, -1, 0, + "[-a] [-d] [-f] [-l] [-r]", + "get file fragmentation data", NULL }; + +static extmap_t * +extmap_alloc( + xfs_extnum_t nex) +{ + extmap_t *extmap; + + if (nex < 1) + nex = 1; + extmap = xmalloc(EXTMAP_SIZE(nex)); + extmap->naents = nex; + extmap->nents = 0; + return extmap; +} + +static xfs_extnum_t +extmap_ideal( + extmap_t *extmap) +{ + extent_t *ep; + xfs_extnum_t rval; + + for (ep = &extmap->ents[0], rval = 0; + ep < &extmap->ents[extmap->nents]; + ep++) { + if (ep == &extmap->ents[0] || + ep->startoff != ep[-1].startoff + ep[-1].blockcount) + rval++; + } + return rval; +} + +static void +extmap_set_ext( + extmap_t **extmapp, + xfs_fileoff_t o, + xfs_extlen_t c) +{ + extmap_t *extmap; + extent_t *ent; + + extmap = *extmapp; + if (extmap->nents == extmap->naents) { + extmap->naents++; + extmap = xrealloc(extmap, EXTMAP_SIZE(extmap->naents)); + *extmapp = extmap; + } + ent = &extmap->ents[extmap->nents]; + ent->startoff = o; + ent->blockcount = c; + extmap->nents++; +} + +void +frag_init(void) +{ + add_command(&frag_cmd); +} + +/* + * Get file fragmentation information. + */ +static int +frag_f( + int argc, + char **argv) +{ + xfs_agnumber_t agno; + double answer; + + if (!init(argc, argv)) + return 0; + for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) + scan_ag(agno); + if (extcount_actual) + answer = (double)(extcount_actual - extcount_ideal) * 100.0 / + (double)extcount_actual; + else + answer = 0.0; + dbprintf("actual %llu, ideal %llu, fragmentation factor %.2f%%\n", + extcount_actual, extcount_ideal, answer); + return 0; +} + +static int +init( + int argc, + char **argv) +{ + int c; + + aflag = dflag = fflag = lflag = qflag = Rflag = rflag = vflag = 0; + optind = 0; + while ((c = getopt(argc, argv, "adflqRrv")) != EOF) { + switch (c) { + case 'a': + aflag = 1; + break; + case 'd': + dflag = 1; + break; + case 'f': + fflag = 1; + break; + case 'l': + lflag = 1; + break; + case 'q': + qflag = 1; + break; + case 'R': + Rflag = 1; + break; + case 'r': + rflag = 1; + break; + case 'v': + vflag = 1; + break; + default: + dbprintf("bad option for frag command\n"); + return 0; + } + } + if (!aflag && !dflag && !fflag && !lflag && !qflag && !Rflag && !rflag) + aflag = dflag = fflag = lflag = qflag = Rflag = rflag = 1; + extcount_actual = extcount_ideal = 0; + return 1; +} + +static void +process_bmbt_reclist( + xfs_bmbt_rec_32_t *rp, + int numrecs, + extmap_t **extmapp) +{ + xfs_dfilblks_t c; + int f; + int i; + xfs_dfiloff_t o; + xfs_dfsbno_t s; + + for (i = 0; i < numrecs; i++, rp++) { + convert_extent((xfs_bmbt_rec_64_t *)rp, &o, &s, &c, &f); + extmap_set_ext(extmapp, (xfs_fileoff_t)o, (xfs_extlen_t)c); + } +} + +static void +process_btinode( + xfs_dinode_t *dip, + extmap_t **extmapp, + int whichfork) +{ + xfs_bmdr_block_t *dib; + int i; + xfs_bmbt_ptr_t *pp; + xfs_bmbt_rec_32_t *rp; + + dib = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); + if (INT_GET(dib->bb_level, ARCH_CONVERT) == 0) { + rp = (xfs_bmbt_rec_32_t *)XFS_BTREE_REC_ADDR( + XFS_DFORK_SIZE(dip, mp, whichfork), + xfs_bmdr, dib, 1, + XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_SIZE(dip, mp, + whichfork), + xfs_bmdr, 1)); + process_bmbt_reclist(rp, INT_GET(dib->bb_numrecs, ARCH_CONVERT), extmapp); + return; + } + pp = XFS_BTREE_PTR_ADDR(XFS_DFORK_SIZE(dip, mp, whichfork), + xfs_bmdr, dib, 1, + XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_SIZE(dip, mp, whichfork), + xfs_bmdr, 0)); + for (i = 0; i < INT_GET(dib->bb_numrecs, ARCH_CONVERT); i++) + scan_lbtree((xfs_fsblock_t)INT_GET(pp[i], ARCH_CONVERT), INT_GET(dib->bb_level, ARCH_CONVERT), scanfunc_bmap, + extmapp, + whichfork == XFS_DATA_FORK ? TYP_BMAPBTD : TYP_BMAPBTA); +} + +static void +process_exinode( + xfs_dinode_t *dip, + extmap_t **extmapp, + int whichfork) +{ + xfs_bmbt_rec_32_t *rp; + + rp = (xfs_bmbt_rec_32_t *)XFS_DFORK_PTR(dip, whichfork); + process_bmbt_reclist(rp, XFS_DFORK_NEXTENTS(dip, whichfork), extmapp); +} + +static void +process_fork( + xfs_dinode_t *dip, + int whichfork) +{ + extmap_t *extmap; + int nex; + + nex = XFS_DFORK_NEXTENTS(dip, whichfork); + if (!nex) + return; + extmap = extmap_alloc(nex); + switch (XFS_DFORK_FORMAT(dip, whichfork)) { + case XFS_DINODE_FMT_EXTENTS: + process_exinode(dip, &extmap, whichfork); + break; + case XFS_DINODE_FMT_BTREE: + process_btinode(dip, &extmap, whichfork); + break; + } + extcount_actual += extmap->nents; + extcount_ideal += extmap_ideal(extmap); + xfree(extmap); +} + +static void +process_inode( + xfs_agf_t *agf, + xfs_agino_t agino, + xfs_dinode_t *dip) +{ + __uint64_t actual; + xfs_dinode_core_t *dic; + __uint64_t ideal; + xfs_ino_t ino; + int skipa; + int skipd; + + dic = &dip->di_core; + ino = XFS_AGINO_TO_INO(mp, INT_GET(agf->agf_seqno, ARCH_CONVERT), agino); + switch (INT_GET(dic->di_mode, ARCH_CONVERT) & IFMT) { + case IFDIR: + skipd = !dflag; + break; + case IFREG: + if (!rflag && (INT_GET(dic->di_flags, ARCH_CONVERT) & XFS_DIFLAG_REALTIME)) + skipd = 1; + else if (!Rflag && + (ino == mp->m_sb.sb_rbmino || + ino == mp->m_sb.sb_rsumino)) + skipd = 1; + else if (!qflag && + (ino == mp->m_sb.sb_uquotino || + ino == mp->m_sb.sb_pquotino)) + skipd = 1; + else + skipd = !fflag; + break; + case IFLNK: + skipd = !lflag; + break; + default: + skipd = 1; + break; + } + actual = extcount_actual; + ideal = extcount_ideal; + if (!skipd) + process_fork(dip, XFS_DATA_FORK); + skipa = !aflag || !XFS_DFORK_Q(dip); + if (!skipa) + process_fork(dip, XFS_ATTR_FORK); + if (vflag && (!skipd || !skipa)) + dbprintf("inode %lld actual %lld ideal %lld\n", + ino, extcount_actual - actual, extcount_ideal - ideal); +} + +static void +scan_ag( + xfs_agnumber_t agno) +{ + xfs_agf_t *agf; + xfs_agi_t *agi; + + push_cur(); + set_cur(&typtab[TYP_AGF], XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR), 1, + DB_RING_IGN, NULL); + if ((agf = iocur_top->data) == NULL) { + dbprintf("can't read agf block for ag %u\n", agno); + pop_cur(); + return; + } + push_cur(); + set_cur(&typtab[TYP_AGI], XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR), 1, + DB_RING_IGN, NULL); + if ((agi = iocur_top->data) == NULL) { + dbprintf("can't read agi block for ag %u\n", agno); + pop_cur(); + pop_cur(); + return; + } + scan_sbtree(agf, + INT_GET(agi->agi_root, ARCH_CONVERT), + INT_GET(agi->agi_level, ARCH_CONVERT), + scanfunc_ino, TYP_INOBT); + pop_cur(); + pop_cur(); +} + +static void +scan_lbtree( + xfs_fsblock_t root, + int nlevels, + scan_lbtree_f_t func, + extmap_t **extmapp, + typnm_t btype) +{ + push_cur(); + set_cur(&typtab[btype], XFS_FSB_TO_DADDR(mp, root), blkbb, DB_RING_IGN, + NULL); + if (iocur_top->data == NULL) { + dbprintf("can't read btree block %u/%u\n", + XFS_FSB_TO_AGNO(mp, root), + XFS_FSB_TO_AGBNO(mp, root)); + return; + } + (*func)(iocur_top->data, nlevels - 1, extmapp, btype); + pop_cur(); +} + +static void +scan_sbtree( + xfs_agf_t *agf, + xfs_agblock_t root, + int nlevels, + scan_sbtree_f_t func, + typnm_t btype) +{ + xfs_agnumber_t seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT); + + push_cur(); + set_cur(&typtab[btype], XFS_AGB_TO_DADDR(mp, seqno, root), + blkbb, DB_RING_IGN, NULL); + if (iocur_top->data == NULL) { + dbprintf("can't read btree block %u/%u\n", seqno, root); + return; + } + (*func)(iocur_top->data, nlevels - 1, agf); + pop_cur(); +} + +static void +scanfunc_bmap( + xfs_btree_lblock_t *ablock, + int level, + extmap_t **extmapp, + typnm_t btype) +{ + xfs_bmbt_block_t *block = (xfs_bmbt_block_t *)ablock; + int i; + xfs_bmbt_ptr_t *pp; + xfs_bmbt_rec_32_t *rp; + + if (level == 0) { + rp = (xfs_bmbt_rec_32_t *) + XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, + block, 1, mp->m_bmap_dmxr[0]); + process_bmbt_reclist(rp, INT_GET(block->bb_numrecs, ARCH_CONVERT), extmapp); + return; + } + pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, 1, + mp->m_bmap_dmxr[0]); + for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++) + scan_lbtree(INT_GET(pp[i], ARCH_CONVERT), level, scanfunc_bmap, extmapp, btype); +} + +static void +scanfunc_ino( + xfs_btree_sblock_t *ablock, + int level, + xfs_agf_t *agf) +{ + xfs_agino_t agino; + xfs_inobt_block_t *block = (xfs_inobt_block_t *)ablock; + xfs_agnumber_t seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT); + int i; + int j; + int off; + xfs_inobt_ptr_t *pp; + xfs_inobt_rec_t *rp; + + if (level == 0) { + rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_inobt, block, + 1, mp->m_inobt_mxr[0]); + for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++) { + agino = INT_GET(rp[i].ir_startino, ARCH_CONVERT); + off = XFS_INO_TO_OFFSET(mp, agino); + push_cur(); + set_cur(&typtab[TYP_INODE], + XFS_AGB_TO_DADDR(mp, seqno, + XFS_AGINO_TO_AGBNO(mp, agino)), + (int)XFS_FSB_TO_BB(mp, XFS_IALLOC_BLOCKS(mp)), + DB_RING_IGN, NULL); + if (iocur_top->data == NULL) { + dbprintf("can't read inode block %u/%u\n", + seqno, XFS_AGINO_TO_AGBNO(mp, agino)); + continue; + } + for (j = 0; j < XFS_INODES_PER_CHUNK; j++) { + xfs_dinode_t *dip; + xfs_dinode_core_t tdic; + + dip=(xfs_dinode_t *)((char *)iocur_top->data + ((off + j) << mp->m_sb.sb_inodelog)); + + /* convert the core, then copy it back into the inode */ + libxfs_xlate_dinode_core( (xfs_caddr_t) + &dip->di_core, &tdic, 1, ARCH_CONVERT ); + memcpy(&dip->di_core, &tdic, sizeof(xfs_dinode_core_t)); + + if (XFS_INOBT_IS_FREE(&rp[i], j, ARCH_CONVERT)) + continue; + process_inode(agf, agino + j, + (xfs_dinode_t *)((char *)iocur_top->data + ((off + j) << mp->m_sb.sb_inodelog))); + } + pop_cur(); + } + return; + } + pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_inobt, block, 1, + mp->m_inobt_mxr[1]); + for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++) + scan_sbtree(agf, INT_GET(pp[i], ARCH_CONVERT), level, scanfunc_ino, TYP_INOBT); +} diff --git a/db/frag.h b/db/frag.h new file mode 100644 index 000000000..b7986ed70 --- /dev/null +++ b/db/frag.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +extern void frag_init(void); diff --git a/db/freesp.c b/db/freesp.c new file mode 100644 index 000000000..160d2360c --- /dev/null +++ b/db/freesp.c @@ -0,0 +1,427 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include +#include "command.h" +#include "data.h" +#include "freesp.h" +#include "io.h" +#include "type.h" +#include "output.h" +#include "mount.h" +#include "malloc.h" + +typedef struct histent +{ + int low; + int high; + long long count; + long long blocks; +} histent_t; + +static void addhistent(int h); +static void addtohist(xfs_agnumber_t agno, xfs_agblock_t agbno, + xfs_extlen_t len); +static int freesp_f(int argc, char **argv); +static void histinit(int maxlen); +static int init(int argc, char **argv); +static void printhist(void); +static void scan_ag(xfs_agnumber_t agno); +static void scanfunc_bno(xfs_btree_sblock_t *ablock, typnm_t typ, int level, + xfs_agf_t *agf); +static void scanfunc_cnt(xfs_btree_sblock_t *ablock, typnm_t typ, int level, + xfs_agf_t *agf); +static void scan_freelist(xfs_agf_t *agf); +static void scan_sbtree(xfs_agf_t *agf, xfs_agblock_t root, typnm_t typ, + int nlevels, + void (*func)(xfs_btree_sblock_t *block, typnm_t typ, + int level, xfs_agf_t *agf)); +static int usage(void); + +static int agcount; +static xfs_agnumber_t *aglist; +static int countflag; +static int dumpflag; +static int equalsize; +static histent_t *hist; +static int histcount; +static int multsize; +static int seen1; +static int summaryflag; +static long long totblocks; +static long long totexts; + +static const cmdinfo_t freesp_cmd = + { "freesp", NULL, freesp_f, 0, -1, 0, + "[-bcdfs] [-a agno]... [-e binsize] [-h h1]... [-m binmult]", + "summarize free space for filesystem", NULL }; + +static int +inaglist( + xfs_agnumber_t agno) +{ + int i; + + if (agcount == 0) + return 1; + for (i = 0; i < agcount; i++) + if (aglist[i] == agno) + return 1; + return 0; +} + +/* + * Report on freespace usage in xfs filesystem. + */ +static int +freesp_f( + int argc, + char **argv) +{ + xfs_agnumber_t agno; + + if (!init(argc, argv)) + return 0; + for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { + if (inaglist(agno)) + scan_ag(agno); + } + if (histcount) + printhist(); + if (summaryflag) { + dbprintf("total free extents %lld\n", totexts); + dbprintf("total free blocks %lld\n", totblocks); + dbprintf("average free extent size %g\n", + (double)totblocks / (double)totexts); + } + if (aglist) + xfree(aglist); + if (hist) + xfree(hist); + return 0; +} + +void +freesp_init(void) +{ + add_command(&freesp_cmd); +} + +static void +aglistadd( + char *a) +{ + aglist = xrealloc(aglist, (agcount + 1) * sizeof(*aglist)); + aglist[agcount] = (xfs_agnumber_t)atoi(a); + agcount++; +} + +static int +init( + int argc, + char **argv) +{ + int c; + int speced = 0; + + agcount = countflag = dumpflag = equalsize = multsize = optind = 0; + histcount = seen1 = summaryflag = 0; + totblocks = totexts = 0; + aglist = NULL; + hist = NULL; + while ((c = getopt(argc, argv, "a:bcde:h:m:s")) != EOF) { + switch (c) { + case 'a': + aglistadd(optarg); + break; + case 'b': + if (speced) + return usage(); + multsize = 2; + speced = 1; + break; + case 'c': + countflag = 1; + break; + case 'd': + dumpflag = 1; + break; + case 'e': + if (speced) + return usage(); + equalsize = atoi(optarg); + speced = 1; + break; + case 'h': + if (speced && !histcount) + return usage(); + addhistent(atoi(optarg)); + speced = 1; + break; + case 'm': + if (speced) + return usage(); + multsize = atoi(optarg); + speced = 1; + break; + case 's': + summaryflag = 1; + break; + case '?': + return usage(); + } + } + if (optind != argc) + return usage(); + if (!speced) + multsize = 2; + histinit((int)mp->m_sb.sb_agblocks); + return 1; +} + +static int +usage(void) +{ + dbprintf("freesp arguments: [-bcdfs] [-a agno] [-e binsize] [-h h1]... " + "[-m binmult]\n"); + return 0; +} + +static void +scan_ag( + xfs_agnumber_t agno) +{ + xfs_agf_t *agf; + + push_cur(); + set_cur(&typtab[TYP_AGF], XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR), 1, + DB_RING_IGN, NULL); + agf = iocur_top->data; + scan_freelist(agf); + if (countflag) + scan_sbtree(agf, + INT_GET(agf->agf_roots[XFS_BTNUM_CNT], ARCH_CONVERT), + TYP_CNTBT, + INT_GET(agf->agf_levels[XFS_BTNUM_CNT], ARCH_CONVERT), + scanfunc_cnt); + else + scan_sbtree(agf, + INT_GET(agf->agf_roots[XFS_BTNUM_BNO], ARCH_CONVERT), + TYP_BNOBT, + INT_GET(agf->agf_levels[XFS_BTNUM_BNO], ARCH_CONVERT), + scanfunc_bno); + pop_cur(); +} + +static void +scan_freelist( + xfs_agf_t *agf) +{ + xfs_agnumber_t seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT); + xfs_agfl_t *agfl; + xfs_agblock_t bno; + int i; + + if (INT_GET(agf->agf_flcount, ARCH_CONVERT) == 0) + return; + push_cur(); + set_cur(&typtab[TYP_AGFL], + XFS_AG_DADDR(mp, seqno, XFS_AGFL_DADDR), 1, + DB_RING_IGN, NULL); + agfl = iocur_top->data; + i = INT_GET(agf->agf_flfirst, ARCH_CONVERT); + for (;;) { + bno = INT_GET(agfl->agfl_bno[i], ARCH_CONVERT); + addtohist(seqno, bno, 1); + if (i == INT_GET(agf->agf_fllast, ARCH_CONVERT)) + break; + if (++i == XFS_AGFL_SIZE) + i = 0; + } + pop_cur(); +} + +static void +scan_sbtree( + xfs_agf_t *agf, + xfs_agblock_t root, + typnm_t typ, + int nlevels, + void (*func)(xfs_btree_sblock_t *block, + typnm_t typ, + int level, + xfs_agf_t *agf)) +{ + push_cur(); + set_cur(&typtab[typ], + XFS_AGB_TO_DADDR(mp, INT_GET(agf->agf_seqno, ARCH_CONVERT), root), + blkbb, DB_RING_IGN, NULL); + (*func)((xfs_btree_sblock_t *)iocur_top->data, typ, nlevels - 1, agf); + pop_cur(); +} + +/*ARGSUSED*/ +static void +scanfunc_bno( + xfs_btree_sblock_t *ablock, + typnm_t typ, + int level, + xfs_agf_t *agf) +{ + xfs_alloc_block_t *block = (xfs_alloc_block_t *)ablock; + int i; + xfs_alloc_ptr_t *pp; + xfs_alloc_rec_t *rp; + + if (level == 0) { + rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, + 1, mp->m_alloc_mxr[0]); + for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++) + addtohist(INT_GET(agf->agf_seqno, ARCH_CONVERT), + INT_GET(rp[i].ar_startblock, ARCH_CONVERT), INT_GET(rp[i].ar_blockcount, ARCH_CONVERT)); + return; + } + pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, 1, + mp->m_alloc_mxr[1]); + for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++) + scan_sbtree(agf, pp[i], typ, level, scanfunc_bno); +} + +static void +scanfunc_cnt( + xfs_btree_sblock_t *ablock, + typnm_t typ, + int level, + xfs_agf_t *agf) +{ + xfs_alloc_block_t *block = (xfs_alloc_block_t *)ablock; + int i; + xfs_alloc_ptr_t *pp; + xfs_alloc_rec_t *rp; + + if (level == 0) { + rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, + 1, mp->m_alloc_mxr[0]); + for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++) + addtohist(INT_GET(agf->agf_seqno, ARCH_CONVERT), + INT_GET(rp[i].ar_startblock, ARCH_CONVERT), INT_GET(rp[i].ar_blockcount, ARCH_CONVERT)); + return; + } + pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, 1, + mp->m_alloc_mxr[1]); + for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++) + scan_sbtree(agf, pp[i], typ, level, scanfunc_cnt); +} + +static void +addhistent( + int h) +{ + hist = xrealloc(hist, (histcount + 1) * sizeof(*hist)); + if (h == 0) + h = 1; + hist[histcount].low = h; + hist[histcount].count = hist[histcount].blocks = 0; + histcount++; + if (h == 1) + seen1 = 1; +} + +static void +addtohist( + xfs_agnumber_t agno, + xfs_agblock_t agbno, + xfs_extlen_t len) +{ + int i; + + if (dumpflag) + dbprintf("%8d %8d %8d\n", agno, agbno, len); + totexts++; + totblocks += len; + for (i = 0; i < histcount; i++) { + if (hist[i].high >= len) { + hist[i].count++; + hist[i].blocks += len; + break; + } + } +} + +static int +hcmp( + const void *a, + const void *b) +{ + return ((histent_t *)a)->low - ((histent_t *)b)->low; +} + +static void +histinit( + int maxlen) +{ + int i; + + if (equalsize) { + for (i = 1; i < maxlen; i += equalsize) + addhistent(i); + } else if (multsize) { + for (i = 1; i < maxlen; i *= multsize) + addhistent(i); + } else { + if (!seen1) + addhistent(1); + qsort(hist, histcount, sizeof(*hist), hcmp); + } + for (i = 0; i < histcount; i++) { + if (i < histcount - 1) + hist[i].high = hist[i + 1].low - 1; + else + hist[i].high = maxlen; + } +} + +static void +printhist(void) +{ + int i; + + dbprintf("%7s %7s %7s %7s %6s\n", + "from", "to", "extents", "blocks", "pct"); + for (i = 0; i < histcount; i++) { + if (hist[i].count) + dbprintf("%7d %7d %7lld %7lld %6.2f\n", hist[i].low, + hist[i].high, hist[i].count, hist[i].blocks, + hist[i].blocks * 100.0 / totblocks); + } +} diff --git a/db/freesp.h b/db/freesp.h new file mode 100644 index 000000000..15ea50567 --- /dev/null +++ b/db/freesp.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +extern void freesp_init(void); diff --git a/db/hash.c b/db/hash.c new file mode 100644 index 000000000..e11851b51 --- /dev/null +++ b/db/hash.c @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "addr.h" +#include "command.h" +#include "type.h" +#include "io.h" +#include "output.h" + +static int hash_f(int argc, char **argv); +static void hash_help(void); + +static const cmdinfo_t hash_cmd = + { "hash", NULL, hash_f, 1, 1, 0, "string", + "calculate hash value", hash_help }; + +static void +hash_help(void) +{ + dbprintf( +"\n" +" 'hash' prints out the calculated hash value for a string using the\n" +"directory/attribute code hash function.\n" +"\n" +" Usage: \"hash \"\n" +"\n" +); + +} + +/* ARGSUSED */ +static int +hash_f( + int argc, + char **argv) +{ + xfs_dahash_t hashval; + + hashval = libxfs_da_hashname(argv[1], (int)strlen(argv[1])); + dbprintf("0x%x\n", hashval); + return 0; +} + +void +hash_init(void) +{ + add_command(&hash_cmd); +} diff --git a/db/hash.h b/db/hash.h new file mode 100644 index 000000000..05c807503 --- /dev/null +++ b/db/hash.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +extern void hash_init(void); +extern xfs_dahash_t xfs_da_hashname(char *name, int namelen); diff --git a/db/help.c b/db/help.c new file mode 100644 index 000000000..56dd8b8ac --- /dev/null +++ b/db/help.c @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "command.h" +#include "help.h" +#include "output.h" + +static void help_all(void); +static void help_onecmd(const char *cmd, const cmdinfo_t *ct); +static int help_f(int argc, char **argv); +static void help_oneline(const char *cmd, const cmdinfo_t *ct); + +static const cmdinfo_t help_cmd = + { "help", "?", help_f, 0, 1, 0, "[command]", + "help for one or all commands", NULL }; + +static void +help_all(void) +{ + const cmdinfo_t *ct; + + for (ct = cmdtab; ct < &cmdtab[ncmds]; ct++) + help_oneline(ct->name, ct); + dbprintf("\nUse 'help commandname' for extended help.\n"); +} + +static int +help_f( + int argc, + char **argv) +{ + const cmdinfo_t *ct; + + if (argc == 1) { + help_all(); + return 0; + } + ct = find_command(argv[1]); + if (ct == NULL) { + dbprintf("command %s not found\n", argv[1]); + return 0; + } + help_onecmd(argv[1], ct); + return 0; +} + +void +help_init(void) +{ + add_command(&help_cmd); +} + +static void +help_onecmd( + const char *cmd, + const cmdinfo_t *ct) +{ + help_oneline(cmd, ct); + if (ct->help) + ct->help(); +} + +static void +help_oneline( + const char *cmd, + const cmdinfo_t *ct) +{ + if (cmd) + dbprintf("%s ", cmd); + else { + dbprintf("%s ", ct->name); + if (ct->altname) + dbprintf("(or %s) ", ct->altname); + } + if (ct->args) + dbprintf("%s ", ct->args); + dbprintf("-- %s\n", ct->oneline); +} + diff --git a/db/help.h b/db/help.h new file mode 100644 index 000000000..4af57d24e --- /dev/null +++ b/db/help.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +extern void help_init(void); diff --git a/db/init.c b/db/init.c new file mode 100644 index 000000000..e4233dfc1 --- /dev/null +++ b/db/init.c @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include +#include +#include "command.h" +#include "data.h" +#include "init.h" +#include "input.h" +#include "io.h" +#include "mount.h" +#include "sig.h" +#include "output.h" + +char *fsdevice; + +static void +usage(void) +{ + dbprintf("Usage: %s [-c cmd]... [-p prog] [-l logdev] [-frxV] devname\n", progname); + exit(1); +} + +void +init( + int argc, + char **argv) +{ + int c; + FILE *cfile = NULL; + + progname = basename(argv[0]); + while ((c = getopt(argc, argv, "c:fip:rxVl:")) != EOF) { + switch (c) { + case 'c': + if (!cfile) + cfile = tmpfile(); + if (!cfile) { + perror("tmpfile"); + exit(1); + } + if (fprintf(cfile, "%s\n", optarg) < 0) { + perror("fprintf(tmpfile)"); + dbprintf("%s: error writing temporary file\n", + progname); + exit(1); + } + break; + case 'f': + xfsargs.disfile = 1; + break; + case 'i': + xfsargs.isreadonly = + (LIBXFS_ISREADONLY | LIBXFS_ISINACTIVE); + flag_readonly = 1; + break; + case 'p': + progname = optarg; + break; + case 'r': + xfsargs.isreadonly = LIBXFS_ISREADONLY; + flag_readonly = 1; + break; + case 'l': + xfsargs.logname = optarg; + break; + case 'x': + flag_expert_mode = 1; + break; + case 'V': + printf("%s version %s\n", progname, VERSION); + break; + case '?': + usage(); + /*NOTREACHED*/ + } + } + if (optind + 1 != argc) { + usage(); + /*NOTREACHED*/ + } + fsdevice = argv[optind]; + if (!xfsargs.disfile) + xfsargs.volname = fsdevice; + else + xfsargs.dname = fsdevice; + xfsargs.notvolok = 1; + if (!libxfs_init(&xfsargs)) { + fputs("\nfatal error -- couldn't initialize XFS library\n", + stderr); + exit(1); + } + mp = dbmount(); + if (mp == NULL) { + dbprintf("%s: %s is not a valid filesystem\n", + progname, fsdevice); + exit(1); + /*NOTREACHED*/ + } + blkbb = 1 << mp->m_blkbb_log; + push_cur(); + init_commands(); + init_sig(); + if (cfile) { + if (fprintf(cfile, "q\n")<0) { + perror("fprintf(tmpfile)"); + dbprintf("%s: error writing temporary file\n", progname); + exit(1); + } + if (fflush(cfile)<0) { + perror("fflush(tmpfile)"); + dbprintf("%s: error writing temporary file\n", progname); + exit(1); + } + rewind(cfile); + pushfile(cfile); + } +} diff --git a/db/init.h b/db/init.h new file mode 100644 index 000000000..5cbbda009 --- /dev/null +++ b/db/init.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +extern char *fsdevice; +extern void init(int argc, char **argv); diff --git a/db/inobt.c b/db/inobt.c new file mode 100644 index 000000000..4b43d9455 --- /dev/null +++ b/db/inobt.c @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "data.h" +#include "type.h" +#include "faddr.h" +#include "fprint.h" +#include "field.h" +#include "inobt.h" +#include "print.h" +#include "bit.h" +#include "mount.h" + +static int inobt_key_count(void *obj, int startoff); +static int inobt_key_offset(void *obj, int startoff, int idx); +static int inobt_ptr_count(void *obj, int startoff); +static int inobt_ptr_offset(void *obj, int startoff, int idx); +static int inobt_rec_count(void *obj, int startoff); +static int inobt_rec_offset(void *obj, int startoff, int idx); + +const field_t inobt_hfld[] = { + { "", FLDT_INOBT, OI(0), C1, 0, TYP_NONE }, + { NULL } +}; + +#define OFF(f) bitize(offsetof(xfs_inobt_block_t, bb_ ## f)) +const field_t inobt_flds[] = { + { "magic", FLDT_UINT32X, OI(OFF(magic)), C1, 0, TYP_NONE }, + { "level", FLDT_UINT16D, OI(OFF(level)), C1, 0, TYP_NONE }, + { "numrecs", FLDT_UINT16D, OI(OFF(numrecs)), C1, 0, TYP_NONE }, + { "leftsib", FLDT_AGBLOCK, OI(OFF(leftsib)), C1, 0, TYP_INOBT }, + { "rightsib", FLDT_AGBLOCK, OI(OFF(rightsib)), C1, 0, TYP_INOBT }, + { "recs", FLDT_INOBTREC, inobt_rec_offset, inobt_rec_count, + FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE }, + { "keys", FLDT_INOBTKEY, inobt_key_offset, inobt_key_count, + FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE }, + { "ptrs", FLDT_INOBTPTR, inobt_ptr_offset, inobt_ptr_count, + FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_INOBT }, + { NULL } +}; + +#define KOFF(f) bitize(offsetof(xfs_inobt_key_t, ir_ ## f)) +const field_t inobt_key_flds[] = { + { "startino", FLDT_AGINO, OI(KOFF(startino)), C1, 0, TYP_INODE }, + { NULL } +}; + +#define ROFF(f) bitize(offsetof(xfs_inobt_rec_t, ir_ ## f)) +const field_t inobt_rec_flds[] = { + { "startino", FLDT_AGINO, OI(ROFF(startino)), C1, 0, TYP_INODE }, + { "freecount", FLDT_INT32D, OI(ROFF(freecount)), C1, 0, TYP_NONE }, + { "free", FLDT_INOFREE, OI(ROFF(free)), C1, 0, TYP_NONE }, + { NULL } +}; + +/*ARGSUSED*/ +static int +inobt_key_count( + void *obj, + int startoff) +{ + xfs_inobt_block_t *block; + + ASSERT(startoff == 0); + block = obj; + if (INT_GET(block->bb_level, ARCH_CONVERT) == 0) + return 0; + return INT_GET(block->bb_numrecs, ARCH_CONVERT); +} + +/*ARGSUSED*/ +static int +inobt_key_offset( + void *obj, + int startoff, + int idx) +{ + xfs_inobt_block_t *block; + xfs_inobt_key_t *kp; + + ASSERT(startoff == 0); + block = obj; + ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0); + kp = XFS_BTREE_KEY_ADDR(mp->m_sb.sb_blocksize, xfs_inobt, block, idx, + XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_inobt, 0)); + return bitize((int)((char *)kp - (char *)block)); +} + +/*ARGSUSED*/ +static int +inobt_ptr_count( + void *obj, + int startoff) +{ + xfs_inobt_block_t *block; + + ASSERT(startoff == 0); + block = obj; + if (INT_GET(block->bb_level, ARCH_CONVERT) == 0) + return 0; + return INT_GET(block->bb_numrecs, ARCH_CONVERT); +} + +/*ARGSUSED*/ +static int +inobt_ptr_offset( + void *obj, + int startoff, + int idx) +{ + xfs_inobt_block_t *block; + xfs_inobt_ptr_t *pp; + + ASSERT(startoff == 0); + block = obj; + ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0); + pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_inobt, block, idx, + XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_inobt, 0)); + return bitize((int)((char *)pp - (char *)block)); +} + +/*ARGSUSED*/ +static int +inobt_rec_count( + void *obj, + int startoff) +{ + xfs_inobt_block_t *block; + + ASSERT(startoff == 0); + block = obj; + if (INT_GET(block->bb_level, ARCH_CONVERT) > 0) + return 0; + return INT_GET(block->bb_numrecs, ARCH_CONVERT); +} + +/*ARGSUSED*/ +static int +inobt_rec_offset( + void *obj, + int startoff, + int idx) +{ + xfs_inobt_block_t *block; + xfs_inobt_rec_t *rp; + + ASSERT(startoff == 0); + block = obj; + ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) == 0); + rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_inobt, block, idx, + XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_inobt, 1)); + return bitize((int)((char *)rp - (char *)block)); +} + +/*ARGSUSED*/ +int +inobt_size( + void *obj, + int startoff, + int idx) +{ + return bitize(mp->m_sb.sb_blocksize); +} diff --git a/db/inobt.h b/db/inobt.h new file mode 100644 index 000000000..23aeb7e21 --- /dev/null +++ b/db/inobt.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +struct field; + +extern const struct field inobt_flds[]; +extern const struct field inobt_hfld[]; +extern const struct field inobt_key_flds[]; +extern const struct field inobt_rec_flds[]; + +extern int inobt_size(void *obj, int startoff, int idx); diff --git a/db/inode.c b/db/inode.c new file mode 100644 index 000000000..a12e32fe3 --- /dev/null +++ b/db/inode.c @@ -0,0 +1,594 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "command.h" +#include "data.h" +#include "type.h" +#include "faddr.h" +#include "fprint.h" +#include "field.h" +#include "inode.h" +#include "io.h" +#include "print.h" +#include "block.h" +#include "bit.h" +#include "output.h" +#include "mount.h" + +static int inode_a_bmbt_count(void *obj, int startoff); +static int inode_a_bmx_count(void *obj, int startoff); +static int inode_a_count(void *obj, int startoff); +static int inode_a_offset(void *obj, int startoff, int idx); +static int inode_a_sfattr_count(void *obj, int startoff); +static int inode_core_nlinkv2_count(void *obj, int startoff); +static int inode_core_onlink_count(void *obj, int startoff); +static int inode_core_projid_count(void *obj, int startoff); +static int inode_core_nlinkv1_count(void *obj, int startoff); +static int inode_f(int argc, char **argv); +static int inode_u_bmbt_count(void *obj, int startoff); +static int inode_u_bmx_count(void *obj, int startoff); +static int inode_u_c_count(void *obj, int startoff); +static int inode_u_dev_count(void *obj, int startoff); +static int inode_u_muuid_count(void *obj, int startoff); +static int inode_u_sfdir_count(void *obj, int startoff); +static int inode_u_sfdir2_count(void *obj, int startoff); +static int inode_u_symlink_count(void *obj, int startoff); + +static const cmdinfo_t inode_cmd = + { "inode", NULL, inode_f, 0, 1, 1, "[inode#]", + "set current inode", NULL }; + +const field_t inode_hfld[] = { + { "", FLDT_INODE, OI(0), C1, 0, TYP_NONE }, + { NULL } +}; + +#define OFF(f) bitize(offsetof(xfs_dinode_t, di_ ## f)) +const field_t inode_flds[] = { + { "core", FLDT_DINODE_CORE, OI(OFF(core)), C1, 0, TYP_NONE }, + { "next_unlinked", FLDT_AGINO, OI(OFF(next_unlinked)), C1, 0, + TYP_INODE }, + { "u", FLDT_DINODE_U, OI(OFF(u)), C1, 0, TYP_NONE }, + { "a", FLDT_DINODE_A, inode_a_offset, inode_a_count, + FLD_COUNT|FLD_OFFSET, TYP_NONE }, + { NULL } +}; + +#define COFF(f) bitize(offsetof(xfs_dinode_core_t, di_ ## f)) +const field_t inode_core_flds[] = { + { "magic", FLDT_UINT16X, OI(COFF(magic)), C1, 0, TYP_NONE }, + { "mode", FLDT_UINT16O, OI(COFF(mode)), C1, 0, TYP_NONE }, + { "version", FLDT_INT8D, OI(COFF(version)), C1, 0, TYP_NONE }, + { "format", FLDT_DINODE_FMT, OI(COFF(format)), C1, 0, TYP_NONE }, + { "nlinkv1", FLDT_UINT16D, OI(COFF(onlink)), inode_core_nlinkv1_count, + FLD_COUNT, TYP_NONE }, + { "nlinkv2", FLDT_UINT32D, OI(COFF(nlink)), inode_core_nlinkv2_count, + FLD_COUNT, TYP_NONE }, + { "onlink", FLDT_UINT16D, OI(COFF(onlink)), inode_core_onlink_count, + FLD_COUNT, TYP_NONE }, + { "projid", FLDT_UINT16D, OI(COFF(projid)), inode_core_projid_count, + FLD_COUNT, TYP_NONE }, + { "uid", FLDT_UINT32D, OI(COFF(uid)), C1, 0, TYP_NONE }, + { "gid", FLDT_UINT32D, OI(COFF(gid)), C1, 0, TYP_NONE }, + { "atime", FLDT_TIMESTAMP, OI(COFF(atime)), C1, 0, TYP_NONE }, + { "mtime", FLDT_TIMESTAMP, OI(COFF(mtime)), C1, 0, TYP_NONE }, + { "ctime", FLDT_TIMESTAMP, OI(COFF(ctime)), C1, 0, TYP_NONE }, + { "size", FLDT_FSIZE, OI(COFF(size)), C1, 0, TYP_NONE }, + { "nblocks", FLDT_DRFSBNO, OI(COFF(nblocks)), C1, 0, TYP_NONE }, + { "extsize", FLDT_EXTLEN, OI(COFF(extsize)), C1, 0, TYP_NONE }, + { "nextents", FLDT_EXTNUM, OI(COFF(nextents)), C1, 0, TYP_NONE }, + { "naextents", FLDT_AEXTNUM, OI(COFF(anextents)), C1, 0, TYP_NONE }, + { "forkoff", FLDT_UINT8D, OI(COFF(forkoff)), C1, 0, TYP_NONE }, + { "aformat", FLDT_DINODE_FMT, OI(COFF(aformat)), C1, 0, TYP_NONE }, + { "dmevmask", FLDT_UINT32X, OI(COFF(dmevmask)), C1, 0, TYP_NONE }, + { "dmstate", FLDT_UINT16D, OI(COFF(dmstate)), C1, 0, TYP_NONE }, + { "flags", FLDT_UINT16X, OI(COFF(flags)), C1, FLD_SKIPALL, TYP_NONE }, + { "newrtbm", FLDT_UINT1, + OI(COFF(flags) + bitsz(__uint16_t) - XFS_DIFLAG_NEWRTBM_BIT - 1), C1, + 0, TYP_NONE }, + { "prealloc", FLDT_UINT1, + OI(COFF(flags) + bitsz(__uint16_t) - XFS_DIFLAG_PREALLOC_BIT - 1), C1, + 0, TYP_NONE }, + { "realtime", FLDT_UINT1, + OI(COFF(flags) + bitsz(__uint16_t) - XFS_DIFLAG_REALTIME_BIT - 1), C1, + 0, TYP_NONE }, + { "gen", FLDT_UINT32D, OI(COFF(gen)), C1, 0, TYP_NONE }, + { NULL } +}; + +#define TOFF(f) bitize(offsetof(xfs_timestamp_t, t_ ## f)) +const field_t timestamp_flds[] = { + { "sec", FLDT_TIME, OI(TOFF(sec)), C1, 0, TYP_NONE }, + { "nsec", FLDT_NSEC, OI(TOFF(nsec)), C1, 0, TYP_NONE }, + { NULL } +}; + +const field_t inode_u_flds[] = { + { "bmbt", FLDT_BMROOTD, 0, inode_u_bmbt_count, FLD_COUNT, TYP_NONE }, + { "bmx", FLDT_BMAPBTDREC, 0, inode_u_bmx_count, FLD_ARRAY|FLD_COUNT, + TYP_NONE }, + { "c", FLDT_CHARNS, 0, inode_u_c_count, FLD_COUNT, TYP_NONE }, + { "dev", FLDT_DEV, 0, inode_u_dev_count, FLD_COUNT, TYP_NONE }, + { "muuid", FLDT_UUID, 0, inode_u_muuid_count, FLD_COUNT, TYP_NONE }, + { "sfdir", FLDT_DIRSHORT, 0, inode_u_sfdir_count, FLD_COUNT, TYP_NONE }, + { "sfdir2", FLDT_DIR2SF, 0, inode_u_sfdir2_count, FLD_COUNT, TYP_NONE }, + { "symlink", FLDT_CHARNS, 0, inode_u_symlink_count, FLD_COUNT, + TYP_NONE }, + { NULL } +}; + +const field_t inode_a_flds[] = { + { "bmbt", FLDT_BMROOTA, 0, inode_a_bmbt_count, FLD_COUNT, TYP_NONE }, + { "bmx", FLDT_BMAPBTAREC, 0, inode_a_bmx_count, FLD_ARRAY|FLD_COUNT, + TYP_NONE }, + { "sfattr", FLDT_ATTRSHORT, 0, inode_a_sfattr_count, FLD_COUNT, + TYP_NONE }, + { NULL } +}; + +static const char *dinode_fmt_name[] = + { "dev", "local", "extents", "btree", "uuid" }; +static const int dinode_fmt_name_size = + sizeof(dinode_fmt_name) / sizeof(dinode_fmt_name[0]); + +/*ARGSUSED*/ +int +fp_dinode_fmt( + void *obj, + int bit, + int count, + char *fmtstr, + int size, + int arg, + int base, + int array) +{ + int bitpos; + xfs_dinode_fmt_t f; + int i; + + for (i = 0, bitpos = bit; i < count; i++, bitpos += size) { + f = (xfs_dinode_fmt_t)getbitval(obj, bitpos, size, BVSIGNED); + if (array) + dbprintf("%d:", i + base); + if (f < 0 || f >= dinode_fmt_name_size) + dbprintf("%d", (int)f); + else + dbprintf("%d (%s)", (int)f, dinode_fmt_name[(int)f]); + if (i < count - 1) + dbprintf(" "); + } + return 1; +} + +static int +inode_a_bmbt_count( + void *obj, + int startoff) +{ + xfs_dinode_t *dip; + + ASSERT(bitoffs(startoff) == 0); + ASSERT(obj == iocur_top->data); + dip = obj; + if (!XFS_DFORK_Q_ARCH(dip, ARCH_CONVERT)) + return 0; + ASSERT((char *)XFS_DFORK_APTR_ARCH(dip, ARCH_CONVERT) - (char *)dip == byteize(startoff)); + return INT_GET(dip->di_core.di_aformat, ARCH_CONVERT) == XFS_DINODE_FMT_BTREE; +} + +static int +inode_a_bmx_count( + void *obj, + int startoff) +{ + xfs_dinode_t *dip; + + ASSERT(bitoffs(startoff) == 0); + ASSERT(obj == iocur_top->data); + dip = obj; + if (!XFS_DFORK_Q_ARCH(dip, ARCH_CONVERT)) + return 0; + ASSERT((char *)XFS_DFORK_APTR_ARCH(dip, ARCH_CONVERT) - (char *)dip == byteize(startoff)); + return INT_GET(dip->di_core.di_aformat, ARCH_CONVERT) == XFS_DINODE_FMT_EXTENTS ? + INT_GET(dip->di_core.di_anextents, ARCH_CONVERT) : 0; +} + +static int +inode_a_count( + void *obj, + int startoff) +{ + xfs_dinode_t *dip; + + ASSERT(startoff == 0); + dip = obj; + return XFS_DFORK_Q_ARCH(dip, ARCH_CONVERT); +} + +static int +inode_a_offset( + void *obj, + int startoff, + int idx) +{ + xfs_dinode_t *dip; + + ASSERT(startoff == 0); + ASSERT(idx == 0); + dip = obj; + ASSERT(XFS_DFORK_Q_ARCH(dip, ARCH_CONVERT)); + return bitize((int)((char *)XFS_DFORK_APTR_ARCH(dip, ARCH_CONVERT) - (char *)dip)); +} + +static int +inode_a_sfattr_count( + void *obj, + int startoff) +{ + xfs_dinode_t *dip; + + ASSERT(bitoffs(startoff) == 0); + ASSERT(obj == iocur_top->data); + dip = obj; + if (!XFS_DFORK_Q_ARCH(dip, ARCH_CONVERT)) + return 0; + ASSERT((char *)XFS_DFORK_APTR_ARCH(dip, ARCH_CONVERT) - (char *)dip == byteize(startoff)); + return INT_GET(dip->di_core.di_aformat, ARCH_CONVERT) == XFS_DINODE_FMT_LOCAL; +} + +int +inode_a_size( + void *obj, + int startoff, + int idx) +{ + xfs_attr_shortform_t *asf; + xfs_dinode_t *dip; + + ASSERT(startoff == 0); + ASSERT(idx == 0); + dip = obj; + switch (INT_GET(dip->di_core.di_aformat, ARCH_CONVERT)) { + case XFS_DINODE_FMT_LOCAL: + asf = (xfs_attr_shortform_t *)XFS_DFORK_APTR_ARCH(dip, ARCH_CONVERT); + return bitize((int)asf->hdr.totsize); + case XFS_DINODE_FMT_EXTENTS: + return (int)(INT_GET(dip->di_core.di_anextents, ARCH_CONVERT) * bitsz(xfs_bmbt_rec_t)); + case XFS_DINODE_FMT_BTREE: + return bitize((int)XFS_DFORK_ASIZE_ARCH(dip, mp, ARCH_CONVERT)); + default: + return 0; + } +} + +static int +inode_core_nlinkv1_count( + void *obj, + int startoff) +{ + xfs_dinode_core_t *dic; + + ASSERT(startoff == 0); + ASSERT(obj == iocur_top->data); + dic = obj; + return dic->di_version == XFS_DINODE_VERSION_1; +} + +static int +inode_core_nlinkv2_count( + void *obj, + int startoff) +{ + xfs_dinode_core_t *dic; + + ASSERT(startoff == 0); + ASSERT(obj == iocur_top->data); + dic = obj; + return dic->di_version == XFS_DINODE_VERSION_2; +} + +static int +inode_core_onlink_count( + void *obj, + int startoff) +{ + xfs_dinode_core_t *dic; + + ASSERT(startoff == 0); + ASSERT(obj == iocur_top->data); + dic = obj; + return dic->di_version == XFS_DINODE_VERSION_2; +} + +static int +inode_core_projid_count( + void *obj, + int startoff) +{ + xfs_dinode_core_t *dic; + + ASSERT(startoff == 0); + ASSERT(obj == iocur_top->data); + dic = obj; + return dic->di_version == XFS_DINODE_VERSION_2; +} + +static int +inode_f( + int argc, + char **argv) +{ + xfs_ino_t ino; + char *p; + + if (argc > 1) { + ino = strtoull(argv[1], &p, 0); + if (*p != '\0') { + dbprintf("bad value for inode number %s\n", argv[1]); + return 0; + } + set_cur_inode(ino); + } else if (iocur_top->ino == NULLFSINO) + dbprintf("no current inode\n"); + else + dbprintf("current inode number is %lld\n", iocur_top->ino); + return 0; +} + +void +inode_init(void) +{ + add_command(&inode_cmd); +} + +typnm_t +inode_next_type(void) +{ + switch (iocur_top->mode & IFMT) { + case IFDIR: + return XFS_DIR_IS_V2(mp) ? TYP_DIR2 : TYP_DIR; + case IFLNK: + return TYP_SYMLINK; + case IFREG: + if (iocur_top->ino == mp->m_sb.sb_rbmino) + return TYP_RTBITMAP; + else if (iocur_top->ino == mp->m_sb.sb_rsumino) + return TYP_RTSUMMARY; + else if (iocur_top->ino == mp->m_sb.sb_uquotino || + iocur_top->ino == mp->m_sb.sb_pquotino) + return TYP_DQBLK; + else + return TYP_DATA; + default: + return TYP_NONE; + } +} + +int +inode_size( + void *obj, + int startoff, + int idx) +{ + return bitize(mp->m_sb.sb_inodesize); +} + +static int +inode_u_bmbt_count( + void *obj, + int startoff) +{ + xfs_dinode_t *dip; + + ASSERT(bitoffs(startoff) == 0); + ASSERT(obj == iocur_top->data); + dip = obj; + ASSERT((char *)&dip->di_u - (char *)dip == byteize(startoff)); + return INT_GET(dip->di_core.di_format, ARCH_CONVERT) == XFS_DINODE_FMT_BTREE; +} + +static int +inode_u_bmx_count( + void *obj, + int startoff) +{ + xfs_dinode_t *dip; + + ASSERT(bitoffs(startoff) == 0); + ASSERT(obj == iocur_top->data); + dip = obj; + ASSERT((char *)&dip->di_u - (char *)dip == byteize(startoff)); + return INT_GET(dip->di_core.di_format, ARCH_CONVERT) == XFS_DINODE_FMT_EXTENTS ? + INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) : 0; +} + +static int +inode_u_c_count( + void *obj, + int startoff) +{ + xfs_dinode_t *dip; + + ASSERT(bitoffs(startoff) == 0); + ASSERT(obj == iocur_top->data); + dip = obj; + ASSERT((char *)&dip->di_u - (char *)dip == byteize(startoff)); + return INT_GET(dip->di_core.di_format, ARCH_CONVERT) == XFS_DINODE_FMT_LOCAL && + (INT_GET(dip->di_core.di_mode, ARCH_CONVERT) & IFMT) == IFREG ? + (int)INT_GET(dip->di_core.di_size, ARCH_CONVERT) : 0; +} + +static int +inode_u_dev_count( + void *obj, + int startoff) +{ + xfs_dinode_t *dip; + + ASSERT(bitoffs(startoff) == 0); + ASSERT(obj == iocur_top->data); + dip = obj; + ASSERT((char *)&dip->di_u - (char *)dip == byteize(startoff)); + return INT_GET(dip->di_core.di_format, ARCH_CONVERT) == XFS_DINODE_FMT_DEV; +} + +static int +inode_u_muuid_count( + void *obj, + int startoff) +{ + xfs_dinode_t *dip; + + ASSERT(bitoffs(startoff) == 0); + ASSERT(obj == iocur_top->data); + dip = obj; + ASSERT((char *)&dip->di_u - (char *)dip == byteize(startoff)); + return INT_GET(dip->di_core.di_format, ARCH_CONVERT) == XFS_DINODE_FMT_UUID; +} + +static int +inode_u_sfdir_count( + void *obj, + int startoff) +{ + xfs_dinode_t *dip; + + ASSERT(bitoffs(startoff) == 0); + ASSERT(obj == iocur_top->data); + dip = obj; + ASSERT((char *)&dip->di_u - (char *)dip == byteize(startoff)); + return INT_GET(dip->di_core.di_format, ARCH_CONVERT) == XFS_DINODE_FMT_LOCAL && + (INT_GET(dip->di_core.di_mode, ARCH_CONVERT) & IFMT) == IFDIR + && XFS_DIR_IS_V1(mp); +} + +static int +inode_u_sfdir2_count( + void *obj, + int startoff) +{ + xfs_dinode_t *dip; + + ASSERT(bitoffs(startoff) == 0); + ASSERT(obj == iocur_top->data); + dip = obj; + ASSERT((char *)&dip->di_u - (char *)dip == byteize(startoff)); + return INT_GET(dip->di_core.di_format, ARCH_CONVERT) == XFS_DINODE_FMT_LOCAL && + (INT_GET(dip->di_core.di_mode, ARCH_CONVERT) & IFMT) == IFDIR && + XFS_DIR_IS_V2(mp); +} + +int +inode_u_size( + void *obj, + int startoff, + int idx) +{ + xfs_dinode_t *dip; + + ASSERT(startoff == 0); + ASSERT(idx == 0); + dip = obj; + switch (INT_GET(dip->di_core.di_format, ARCH_CONVERT)) { + case XFS_DINODE_FMT_DEV: + return bitsz(xfs_dev_t); + case XFS_DINODE_FMT_LOCAL: + return bitize((int)INT_GET(dip->di_core.di_size, ARCH_CONVERT)); + case XFS_DINODE_FMT_EXTENTS: + return (int)(INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) * bitsz(xfs_bmbt_rec_t)); + case XFS_DINODE_FMT_BTREE: + return bitize((int)XFS_DFORK_DSIZE_ARCH(dip, mp, ARCH_CONVERT)); + case XFS_DINODE_FMT_UUID: + return bitsz(uuid_t); + default: + return 0; + } +} + +static int +inode_u_symlink_count( + void *obj, + int startoff) +{ + xfs_dinode_t *dip; + + ASSERT(bitoffs(startoff) == 0); + ASSERT(obj == iocur_top->data); + dip = obj; + ASSERT((char *)&dip->di_u - (char *)dip == byteize(startoff)); + return INT_GET(dip->di_core.di_format, ARCH_CONVERT) == XFS_DINODE_FMT_LOCAL && + (INT_GET(dip->di_core.di_mode, ARCH_CONVERT) & IFMT) == IFLNK ? + (int)INT_GET(dip->di_core.di_size, ARCH_CONVERT) : 0; +} + +void +set_cur_inode( + xfs_ino_t ino) +{ + xfs_agblock_t agbno; + xfs_agino_t agino; + xfs_agnumber_t agno; + xfs_dinode_t *dip; + int offset; + + agno = XFS_INO_TO_AGNO(mp, ino); + agino = XFS_INO_TO_AGINO(mp, ino); + agbno = XFS_AGINO_TO_AGBNO(mp, agino); + offset = XFS_AGINO_TO_OFFSET(mp, agino); + if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks || + offset >= mp->m_sb.sb_inopblock || + XFS_AGINO_TO_INO(mp, agno, agino) != ino) { + dbprintf("bad inode number %lld\n", ino); + return; + } + cur_agno = agno; + /* + * First set_cur to the block with the inode + * then use off_cur to get the right part of the buffer. + */ + ASSERT(typtab[TYP_INODE].typnm == TYP_INODE); + + /* ingore ring update here, do it explicitly below */ + set_cur(&typtab[TYP_INODE], XFS_AGB_TO_DADDR(mp, agno, agbno), + blkbb, DB_RING_IGN, NULL); + off_cur(offset << mp->m_sb.sb_inodelog, mp->m_sb.sb_inodesize); + dip = iocur_top->data; + iocur_top->ino = ino; + iocur_top->mode = INT_GET(dip->di_core.di_mode, ARCH_CONVERT); + if ((iocur_top->mode & IFMT) == IFDIR) + iocur_top->dirino = ino; + + /* track updated info in ring */ + ring_add(); +} diff --git a/db/inode.h b/db/inode.h new file mode 100644 index 000000000..3d0a22f28 --- /dev/null +++ b/db/inode.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +extern const struct field inode_a_flds[]; +extern const struct field inode_core_flds[]; +extern const struct field inode_flds[]; +extern const struct field inode_hfld[]; +extern const struct field inode_u_flds[]; +extern const struct field timestamp_flds[]; + +extern int fp_dinode_fmt(void *obj, int bit, int count, char *fmtstr, + int size, int arg, int base, int array); +extern int inode_a_size(void *obj, int startoff, int idx); +extern void inode_init(void); +extern typnm_t inode_next_type(void); +extern int inode_size(void *obj, int startoff, int idx); +extern int inode_u_size(void *obj, int startoff, int idx); +extern void set_cur_inode(xfs_ino_t ino); diff --git a/db/input.c b/db/input.c new file mode 100644 index 000000000..1394f101b --- /dev/null +++ b/db/input.c @@ -0,0 +1,272 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include +#include +#include "command.h" +#include "data.h" +#include "input.h" +#include "output.h" +#include "sig.h" +#include "malloc.h" +#include "init.h" + +int inputstacksize; +FILE **inputstack; +FILE *curinput; + +static void popfile(void); +static int source_f(int argc, char **argv); + +static const cmdinfo_t source_cmd = + { "source", NULL, source_f, 1, 1, 0, "source-file", + "get commands from source-file", NULL }; + +/* our homegrown strtok that understands strings */ + +static char * +tokenize( + char *inp) +{ + static char *last_place = NULL; + char *start; + char *walk; + int in_string = 0; + int in_escape = 0; + + if (inp) { + start = inp; + } else { + if (last_place == NULL) + return NULL; + + /* we're done */ + if (*last_place != '\0') + return NULL; + + start = last_place + 1; + } + last_place = NULL; + + /* eat whitespace */ + while (*start == ' ' || *start == '\t') + start++; + + walk = start; + for (;*walk != '\0'; walk++) { + if (in_escape) { + in_escape = 0; + continue; + } + if (*walk == '\\') + in_escape = 1; + else if (*walk == '\"') + in_string ^= 1; + + if (!in_string && !in_escape && + (*walk == ' ' || *walk == '\t')) { + last_place = walk; + *last_place = '\0'; + break; + } + } + if (walk == start) + return NULL; + + return start; +} + +char ** +breakline( + char *input, + int *count) +{ + int c; + char *inp; + char *p; + char **rval; + + c = 0; + inp = input; + rval = xcalloc(sizeof(char *), 1); + for (;;) { + + p = tokenize(inp); + + if (p == NULL) + break; + inp = NULL; + c++; + rval = xrealloc(rval, sizeof(*rval) * (c + 1)); + rval[c - 1] = p; + rval[c] = NULL; + } + *count = c; + return rval; +} + +void +doneline( + char *input, + char **vec) +{ + xfree(input); + xfree(vec); +} + +char * +fetchline(void) +{ + char buf[1024]; + int iscont; + size_t len; + size_t rlen; + char *rval; + + rval = NULL; + for (rlen = iscont = 0; ; ) { + if (inputstacksize == 1) { + if (iscont) + dbprintf("... "); + else + dbprintf("%s: ", progname); + fflush(stdin); + } + if (seenint() || + (!fgets(buf, sizeof(buf), curinput) && + ferror(curinput) && seenint())) { + clearint(); + dbprintf("^C\n"); + clearerr(curinput); + if (iscont) { + iscont = 0; + rlen = 0; + if (rval) { + xfree(rval); + rval = NULL; + } + } + continue; + } + if (ferror(curinput) || feof(curinput) || + (len = strlen(buf)) == 0) { + popfile(); + if (curinput == NULL) { + dbprintf("\n"); + return NULL; + } + iscont = 0; + rlen = 0; + if (rval) { + xfree(rval); + rval = NULL; + } + continue; + } + if (inputstacksize == 1) + logprintf("%s", buf); + rval = xrealloc(rval, rlen + len + 1); + if (rlen == 0) + rval[0] = '\0'; + rlen += len; + strcat(rval, buf); + if (buf[len - 1] == '\n') { + if (len > 1 && buf[len - 2] == '\\') { + rval[rlen - 2] = ' '; + rval[rlen - 1] = '\0'; + rlen--; + iscont = 1; + } else { + rval[rlen - 1] = '\0'; + rlen--; + break; + } + } + } + return rval; +} + +void +input_init(void) +{ + add_command(&source_cmd); +} + +static void +popfile(void) +{ + if (inputstacksize == 0) { + curinput = NULL; + return; + } + if (curinput != stdin) + fclose(curinput); + + inputstacksize--; + if (inputstacksize) { + inputstack = + xrealloc(inputstack, inputstacksize * sizeof(*inputstack)); + curinput = inputstack[inputstacksize - 1]; + } else { + free(inputstack); + curinput = NULL; + inputstack = NULL; + } +} + +void +pushfile( + FILE *file) +{ + inputstack = + xrealloc(inputstack, + (inputstacksize + 1) * sizeof(*inputstack)); + inputstacksize++; + curinput = inputstack[inputstacksize - 1] = file; +} + +/* ARGSUSED */ +static int +source_f( + int argc, + char **argv) +{ + FILE *f; + + f = fopen(argv[1], "r"); + if (f == NULL) + dbprintf("can't open %s\n", argv[0]); + else + pushfile(f); + return 0; +} diff --git a/db/input.h b/db/input.h new file mode 100644 index 000000000..406997c79 --- /dev/null +++ b/db/input.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +extern char **breakline(char *input, int *count); +extern void doneline(char *input, char **vec); +extern char *fetchline(void); +extern void input_init(void); +extern void pushfile(FILE *file); diff --git a/db/io.c b/db/io.c new file mode 100644 index 000000000..849103d4b --- /dev/null +++ b/db/io.c @@ -0,0 +1,627 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include +#include "command.h" +#include "data.h" +#include "type.h" +#include "faddr.h" +#include "fprint.h" +#include "field.h" +#include "inode.h" +#include "io.h" +#include "output.h" +#include "mount.h" +#include "malloc.h" + +static int pop_f(int argc, char **argv); +static void pop_help(void); +static int push_f(int argc, char **argv); +static void push_help(void); +static int stack_f(int argc, char **argv); +static void stack_help(void); +static int forward_f(int argc, char **argv); +static void forward_help(void); +static int back_f(int argc, char **argv); +static void back_help(void); +static int ring_f(int argc, char **argv); +static void ring_help(void); + +static const cmdinfo_t pop_cmd = + { "pop", NULL, pop_f, 0, 0, 0, NULL, + "pop location from the stack", pop_help }; +static const cmdinfo_t push_cmd = + { "push", NULL, push_f, 0, 2, 0, "[command]", + "push location to the stack", push_help }; +static const cmdinfo_t stack_cmd = + { "stack", NULL, stack_f, 0, 0, 0, NULL, + "view the location stack", stack_help }; +static const cmdinfo_t forward_cmd = + { "forward", "f", forward_f, 0, 0, 0, NULL, + "move forward to next entry in the position ring", forward_help }; +static const cmdinfo_t back_cmd = + { "back", "b", back_f, 0, 0, 0, NULL, + "move to the previous location in the position ring", back_help }; +static const cmdinfo_t ring_cmd = + { "ring", NULL, ring_f, 0, 1, 0, NULL, + "show position ring or move to a specific entry", ring_help }; + +iocur_t *iocur_base; +iocur_t *iocur_top; +int iocur_sp = -1; +int iocur_len; + +#define RING_ENTRIES 20 +static iocur_t iocur_ring[RING_ENTRIES]; +static int ring_head = -1; +static int ring_tail = -1; +static int ring_current = -1; + +void +io_init(void) +{ + add_command(&pop_cmd); + add_command(&push_cmd); + add_command(&stack_cmd); + add_command(&forward_cmd); + add_command(&back_cmd); + add_command(&ring_cmd); +} + +void +off_cur( + int off, + int len) +{ + if (iocur_top == NULL || off + len > BBTOB(iocur_top->blen)) + dbprintf("can't set block offset to %d\n", off); + else { + iocur_top->boff = off; + iocur_top->off = ((xfs_off_t)iocur_top->bb << BBSHIFT) + off; + iocur_top->len = len; + iocur_top->data = (void *)((char *)iocur_top->buf + off); + } +} + +void +pop_cur(void) +{ + if (iocur_sp < 0) { + dbprintf("can't pop anything from I/O stack\n"); + return; + } + if (iocur_top->buf) + xfree(iocur_top->buf); + if (--iocur_sp >= 0) { + iocur_top = iocur_base + iocur_sp; + cur_typ = iocur_top->typ; + } else { + iocur_top = iocur_base; + iocur_sp = 0; + } +} + +/*ARGSUSED*/ +static int +pop_f( + int argc, + char **argv) +{ + pop_cur(); + return 0; +} + +static void +pop_help(void) +{ + dbprintf( +"\n" +" Changes the address and data type to the first entry on the stack.\n" +"\n" + ); +} + +void +print_iocur( + char *tag, + iocur_t *ioc) +{ + int i; + + dbprintf("%s\n", tag); + dbprintf("\tbyte offset %lld, length %d\n", ioc->off, ioc->len); + dbprintf("\tbuffer block %lld (fsbno %lld), %d bb%s\n", ioc->bb, + (xfs_dfsbno_t)XFS_DADDR_TO_FSB(mp, ioc->bb), ioc->blen, + ioc->blen == 1 ? "" : "s"); + if (ioc->use_bbmap) { + dbprintf("\tblock map"); + for (i = 0; i < ioc->blen; i++) + dbprintf(" %d:%lld", i, ioc->bbmap.b[i]); + dbprintf("\n"); + } + dbprintf("\tinode %lld, dir inode %lld, type %s\n", ioc->ino, + ioc->dirino, ioc->typ == NULL ? "none" : ioc->typ->name); +} + +void +print_ring(void) +{ + int i; + iocur_t *ioc; + + if (ring_current == -1) { + dbprintf("no entries in location ring.\n"); + return; + } + + dbprintf(" type bblock bblen fsbno inode\n"); + + i = ring_head; + for (;;) { + ioc = &iocur_ring[i]; + if (i == ring_current) + printf("*%2d: ", i); + else + printf(" %2d: ", i); + + dbprintf("%-7.7s %8lld %5d %8lld %9lld\n", + ioc->typ == NULL ? "none" : ioc->typ->name, + ioc->bb, + ioc->blen, + (xfs_dfsbno_t)XFS_DADDR_TO_FSB(mp, ioc->bb), + ioc->ino + ); + + if (i == ring_tail) + break; + + i = (i+(RING_ENTRIES-1))%RING_ENTRIES; + } +} + + +void +push_cur(void) +{ + if (iocur_sp + 1 >= iocur_len) { + iocur_base = xrealloc(iocur_base, + sizeof(*iocur_base) * (iocur_len + 1)); + iocur_len++; + } + iocur_sp++; + iocur_top = iocur_base + iocur_sp; + memset(iocur_top, 0, sizeof(*iocur_base)); + iocur_top->ino = iocur_sp > 0 ? iocur_top[-1].ino : NULLFSINO; + iocur_top->dirino = iocur_sp > 0 ? iocur_top[-1].dirino : NULLFSINO; + iocur_top->mode = iocur_sp > 0 ? iocur_top[-1].mode : 0; + cur_typ = NULL; +} + +static int +push_f( + int argc, + char **argv) +{ + const cmdinfo_t *ct; + + if (argc > 1) { + /* check we can execute command */ + ct = find_command(argv[1]); + if (ct == NULL) { + dbprintf("no such command %s\n", argv[1]); + return 0; + } + if (!ct->canpush) { + dbprintf("no push form allowed for %s\n", argv[1]); + return 0; + } + } + + /* save current state */ + push_cur(); + if (iocur_top[-1].typ && iocur_top[-1].typ->typnm == TYP_INODE) + set_cur_inode(iocur_top[-1].ino); + else + set_cur(iocur_top[-1].typ, iocur_top[-1].bb, + iocur_top[-1].blen, DB_RING_IGN, + iocur_top[-1].use_bbmap ? &iocur_top[-1].bbmap : NULL); + + /* run requested command */ + if (argc>1) + (void)command(argc-1, argv+1); + return 0; +} + +static void +push_help(void) +{ + dbprintf( +"\n" +" Allows you to push the current address and data type on the stack for\n" +" later return. 'push' also accepts an additional command to execute after\n" +" storing the current address (ex: 'push a rootino' from the superblock).\n" +"\n" + ); +} + +/* move forward through the ring */ +/* ARGSUSED */ +static int +forward_f( + int argc, + char **argv) +{ + if (ring_current == -1) { + dbprintf("ring is empty\n"); + return 0; + } + if (ring_current == ring_head) { + dbprintf("no further entries\n"); + return 0; + } + + ring_current = (ring_current+1)%RING_ENTRIES; + + set_cur(iocur_ring[ring_current].typ, + iocur_ring[ring_current].bb, + iocur_ring[ring_current].blen, + DB_RING_IGN, + iocur_ring[ring_current].use_bbmap ? + &iocur_ring[ring_current].bbmap : NULL); + + return 0; +} + +static void +forward_help(void) +{ + dbprintf( +"\n" +" The 'forward' ('f') command moves to the next location in the position\n" +" ring, updating the current position and data type. If the current location\n" +" is the top entry in the ring, then the 'forward' command will have\n" +" no effect.\n" +"\n" + ); +} + +/* move backwards through the ring */ +/* ARGSUSED */ +static int +back_f( + int argc, + char **argv) +{ + if (ring_current == -1) { + dbprintf("ring is empty\n"); + return 0; + } + if (ring_current == ring_tail) { + dbprintf("no previous entries\n"); + return 0; + } + + ring_current = (ring_current+(RING_ENTRIES-1))%RING_ENTRIES; + + set_cur(iocur_ring[ring_current].typ, + iocur_ring[ring_current].bb, + iocur_ring[ring_current].blen, + DB_RING_IGN, + iocur_ring[ring_current].use_bbmap ? + &iocur_ring[ring_current].bbmap : NULL); + + return 0; +} + +static void +back_help(void) +{ + dbprintf( +"\n" +" The 'back' ('b') command moves to the previous location in the position\n" +" ring, updating the current position and data type. If the current location\n" +" is the last entry in the ring, then the 'back' command will have no effect.\n" +"\n" + ); +} + +/* show or go to specific point in ring */ +static int +ring_f( + int argc, + char **argv) +{ + int index; + + if (argc == 1) { + print_ring(); + return 0; + } + + index = (int)strtoul(argv[0], NULL, 0); + if (index < 0 || index >= RING_ENTRIES) + dbprintf("invalid entry: %d\n", index); + + ring_current = index; + + set_cur(iocur_ring[index].typ, + iocur_ring[index].bb, + iocur_ring[index].blen, + DB_RING_IGN, + iocur_ring[index].use_bbmap ? &iocur_ring[index].bbmap : NULL); + + return 0; +} + +static void +ring_help(void) +{ + dbprintf( +"\n" +" The position ring automatically keeps track of each disk location and\n" +" structure type for each change of position you make during your xfs_db\n" +" session. The last %d most recent entries are kept in the ring.\n" +"\n" +" To display the current list of ring entries type 'ring' by itself on\n" +" the command line. The entry highlighted by an asterisk ('*') is the\n" +" current entry.\n" +"\n" +" To move to another entry in the ring type 'ring ' where is\n" +" your desired entry from the ring position list.\n" +"\n" +" You may also use the 'forward' ('f') or 'back' ('b') commands to move\n" +" to the previous or next entry in the ring, respectively.\n" +"\n" +" Note: Unlike the 'stack', 'push' and 'pop' commands, the ring tracks your\n" +" location implicitly. Use the 'push' and 'pop' commands if you wish to\n" +" store a specific location explicitly for later return.\n" +"\n", + RING_ENTRIES); +} + + +void +ring_add(void) +{ + if (ring_head == -1) { + /* only get here right after startup */ + ring_head = 0; + ring_tail = 0; + ring_current = 0; + iocur_ring[0] = *iocur_top; + } else { + if (ring_current == ring_head) { + ring_head = (ring_head+1)%RING_ENTRIES; + iocur_ring[ring_head] = *iocur_top; + if (ring_head == ring_tail) + ring_tail = (ring_tail+1)%RING_ENTRIES; + ring_current = ring_head; + } else { + ring_current = (ring_current+1)%RING_ENTRIES; + iocur_ring[ring_current] = *iocur_top; + } + } +} + + +int +write_bbs( + __int64_t bbno, + int count, + void *bufp, + bbmap_t *bbmap) +{ + int c; + int i; + int j; + int rval = EINVAL; /* initialize for zero `count' case */ + + for (j = 0; j < count; j += bbmap ? 1 : count) { + if (bbmap) + bbno = bbmap->b[j]; + if (lseek64(xfsargs.dfd, bbno << BBSHIFT, SEEK_SET) < 0) { + rval = errno; + dbprintf("can't seek in filesystem at bb %lld\n", bbno); + return rval; + } + c = BBTOB(bbmap ? 1 : count); + i = (int)write(xfsargs.dfd, (char *)bufp + BBTOB(j), c); + if (i < 0) { + rval = errno; + } else if (i < c) { + rval = -1; + } else + rval = 0; + if (rval) + break; + } + return rval; +} + +int +read_bbs( + __int64_t bbno, + int count, + void **bufp, + bbmap_t *bbmap) +{ + void *buf; + int c; + int i; + int j; + int rval = EINVAL; + + if (!count) + return EINVAL; + + c = BBTOB(count); + if (*bufp == NULL) + buf = xmalloc(c); + else + buf = *bufp; + for (j = 0; j < count; j += bbmap ? 1 : count) { + if (bbmap) + bbno = bbmap->b[j]; + if (lseek64(xfsargs.dfd, bbno << BBSHIFT, SEEK_SET) < 0) { + rval = errno; + dbprintf("can't seek in filesystem at bb %lld\n", bbno); + if (*bufp == NULL) + xfree(buf); + buf = NULL; + } else { + c = BBTOB(bbmap ? 1 : count); + i = (int)read(xfsargs.dfd, (char *)buf + BBTOB(j), c); + if (i < 0) { + rval = errno; + if (*bufp == NULL) + xfree(buf); + buf = NULL; + } else if (i < c) { + rval = -1; + if (*bufp == NULL) + xfree(buf); + buf = NULL; + } else + rval = 0; + } + if (buf == NULL) + break; + } + if (*bufp == NULL) + *bufp = buf; + return rval; +} + +void +write_cur(void) +{ + int ret; + + if (iocur_sp < 0) { + dbprintf("nothing to write\n"); + return; + } + ret = write_bbs(iocur_top->bb, iocur_top->blen, iocur_top->buf, + iocur_top->use_bbmap ? &iocur_top->bbmap : NULL); + if (ret == -1) + dbprintf("incomplete write, block: %lld\n", + (iocur_base + iocur_sp)->bb); + else if (ret != 0) + dbprintf("write error: %s\n", strerror(ret)); + /* re-read buffer from disk */ + ret = read_bbs(iocur_top->bb, iocur_top->blen, &iocur_top->buf, + iocur_top->use_bbmap ? &iocur_top->bbmap : NULL); + if (ret == -1) + dbprintf("incomplete read, block: %lld\n", + (iocur_base + iocur_sp)->bb); + else if (ret != 0) + dbprintf("read error: %s\n", strerror(ret)); +} + +void +set_cur( + const typ_t *t, + __int64_t d, + int c, + int ring_flag, + bbmap_t *bbmap) +{ + xfs_ino_t dirino; + xfs_ino_t ino; + __uint16_t mode; + + if (iocur_sp < 0) { + dbprintf("set_cur no stack element to set\n"); + return; + } + +#ifdef DEBUG + if (bbmap) + printf("xfs_db got a bbmap for %lld\n", d); +#endif + ino = iocur_top->ino; + dirino = iocur_top->dirino; + mode = iocur_top->mode; + pop_cur(); + push_cur(); + if (read_bbs(d, c, &iocur_top->buf, bbmap)) + return; + iocur_top->bb = d; + iocur_top->blen = c; + iocur_top->boff = 0; + iocur_top->data = iocur_top->buf; + iocur_top->len = BBTOB(c); + iocur_top->off = d << BBSHIFT; + iocur_top->typ = cur_typ = t; + iocur_top->ino = ino; + iocur_top->dirino = dirino; + iocur_top->mode = mode; + if (iocur_top->use_bbmap = (bbmap != NULL)) + iocur_top->bbmap = *bbmap; + + /* store location in ring */ + if (ring_flag) + ring_add(); +} + +static void +stack_help(void) +{ + dbprintf( +"\n" +" The stack is used to explicitly store your location and data type\n" +" for later return. The 'push' operation stores the current address\n" +" and type on the stack, the 'pop' operation returns you to the\n" +" position and datatype of the top entry on the stack.\n" +"\n" +" The 'stack' allows explicit location saves, see 'ring' for implicit\n" +" position tracking.\n" +"\n" + ); +} + +/*ARGSUSED*/ +static int +stack_f( + int argc, + char **argv) +{ + int i; + char tagbuf[8]; + + for (i = iocur_sp; i >= 0; i--) { + sprintf(tagbuf, "%d: ", i); + print_iocur(tagbuf, &iocur_base[i]); + } + return 0; +} diff --git a/db/io.h b/db/io.h new file mode 100644 index 000000000..85ee2994b --- /dev/null +++ b/db/io.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +struct typ; + +#define BBMAP_SIZE (XFS_MAX_BLOCKSIZE / BBSIZE) +typedef struct bbmap { + __int64_t b[BBMAP_SIZE]; +} bbmap_t; + +typedef struct iocur { + __int64_t bb; /* BB number in filesystem of buf */ + int blen; /* length of "buf", bb's */ + int boff; /* data - buf */ + void *buf; /* base address of buffer */ + void *data; /* current interesting data */ + xfs_ino_t dirino; /* current directory inode number */ + xfs_ino_t ino; /* current inode number */ + int len; /* length of "data", bytes */ + __uint16_t mode; /* current inode's mode */ + xfs_off_t off; /* fs offset of "data" in bytes */ + const struct typ *typ; /* type of "data" */ + int use_bbmap; /* set if bbmap is valid */ + bbmap_t bbmap; /* map daddr if fragmented */ +} iocur_t; + +#define DB_RING_ADD 1 /* add to ring on set_cur */ +#define DB_RING_IGN 0 /* do not add to ring on set_cur */ + +extern iocur_t *iocur_base; /* base of stack */ +extern iocur_t *iocur_top; /* top element of stack */ +extern int iocur_sp; /* current top of stack */ +extern int iocur_len; /* length of stack array */ + +extern void io_init(void); +extern void off_cur(int off, int len); +extern void pop_cur(void); +extern void print_iocur(char *tag, iocur_t *ioc); +extern void push_cur(void); +extern int read_bbs(__int64_t daddr, int count, void **bufp, + bbmap_t *bbmap); +extern int write_bbs(__int64_t daddr, int count, void *bufp, + bbmap_t *bbmap); +extern void write_cur(void); +extern void set_cur(const struct typ *t, __int64_t d, int c, int ring_add, + bbmap_t *bbmap); +extern void ring_add(void); diff --git a/db/main.c b/db/main.c new file mode 100644 index 000000000..e00046153 --- /dev/null +++ b/db/main.c @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "command.h" +#include "data.h" +#include "init.h" +#include "input.h" + +int +main( + int argc, + char **argv) +{ + int c; + int done; + char *input; + char **v; + + pushfile(stdin); + init(argc, argv); + done = 0; + while (!done) { + if ((input = fetchline()) == NULL) + break; + v = breakline(input, &c); + if (c) + done = command(c, v); + doneline(input, v); + } + return exitcode; +} diff --git a/db/malloc.c b/db/malloc.c new file mode 100644 index 000000000..413b87f57 --- /dev/null +++ b/db/malloc.c @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "init.h" +#include "malloc.h" +#include "output.h" + +static void +badmalloc(void) +{ + dbprintf("%s: out of memory\n", progname); + exit(4); +} + +void * +xcalloc( + size_t nelem, + size_t elsize) +{ + void *ptr; + + ptr = calloc(nelem, elsize); + if (ptr) + return ptr; + badmalloc(); + /* NOTREACHED */ + return NULL; +} + +void +xfree( + void *ptr) +{ + free(ptr); +} + +void * +xmalloc( + size_t size) +{ + void *ptr; + + ptr = malloc(size); + if (ptr) + return ptr; + badmalloc(); + /* NOTREACHED */ + return NULL; +} + +void * +xrealloc( + void *ptr, + size_t size) +{ + ptr = realloc(ptr, size); + if (ptr || !size) + return ptr; + badmalloc(); + /* NOTREACHED */ + return NULL; +} + +char * +xstrdup( + const char *s1) +{ + char *s; + + s = strdup(s1); + if (s) + return s; + badmalloc(); + /* NOTREACHED */ + return NULL; +} diff --git a/db/malloc.h b/db/malloc.h new file mode 100644 index 000000000..1680a44b0 --- /dev/null +++ b/db/malloc.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +extern void *xcalloc(size_t nelem, size_t elsize); +extern void xfree(void *ptr); +extern void *xmalloc(size_t size); +extern void *xrealloc(void *ptr, size_t size); +extern char *xstrdup(const char *s1); diff --git a/db/mount.c b/db/mount.c new file mode 100644 index 000000000..184972720 --- /dev/null +++ b/db/mount.c @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "init.h" +#include "io.h" +#include "mount.h" +#include "malloc.h" +#include "data.h" + +xfs_mount_t *mp; + +static void +compute_maxlevels( + xfs_mount_t *mp, + int whichfork) +{ + int level; + uint maxblocks; + uint maxleafents; + int maxrootrecs; + int minleafrecs; + int minnoderecs; + int sz; + + maxleafents = (whichfork == XFS_DATA_FORK) ? MAXEXTNUM : MAXAEXTNUM; + minleafrecs = mp->m_bmap_dmnr[0]; + minnoderecs = mp->m_bmap_dmnr[1]; + sz = mp->m_sb.sb_inodesize; + maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0); + maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; + for (level = 1; maxblocks > 1; level++) { + if (maxblocks <= maxrootrecs) + maxblocks = 1; + else + maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs; + } + mp->m_bm_maxlevels[whichfork] = level; +} + +xfs_mount_t * +dbmount(void) +{ + void *bufp; + int i; + xfs_mount_t *mp; + xfs_sb_t *sbp; + + mp = xcalloc(1, sizeof(*mp)); + bufp = NULL; + if (read_bbs(XFS_SB_DADDR, 1, &bufp, NULL)) + return NULL; + + /* copy sb from buf to in-core, converting architecture */ + libxfs_xlate_sb(bufp, &mp->m_sb, 1, ARCH_CONVERT, XFS_SB_ALL_BITS); + xfree(bufp); + sbp = &mp->m_sb; + + if (sbp->sb_magicnum != XFS_SB_MAGIC) { + fprintf(stderr,"%s: unexpected XFS SB magic number 0x%08x\n", + progname, sbp->sb_magicnum); + } + mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG; + mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT; + mp->m_agno_log = libxfs_highbit32(sbp->sb_agcount - 1) + 1; + mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog; + mp->m_litino = + (int)(sbp->sb_inodesize - + (sizeof(xfs_dinode_core_t) + sizeof(xfs_agino_t))); + mp->m_blockmask = sbp->sb_blocksize - 1; + mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG; + mp->m_blockwmask = mp->m_blockwsize - 1; + for (i = 0; i < 2; i++) { + mp->m_alloc_mxr[i] = + (uint)XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, + xfs_alloc, i == 0); + mp->m_alloc_mnr[i] = + (uint)XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, + xfs_alloc, i == 0); + mp->m_bmap_dmxr[i] = + (uint)XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, + xfs_bmbt, i == 0); + mp->m_bmap_dmnr[i] = + (uint)XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, + xfs_bmbt, i == 0); + mp->m_inobt_mxr[i] = + (uint)XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, + xfs_inobt, i == 0); + mp->m_inobt_mnr[i] = + (uint)XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, + xfs_inobt, i == 0); + } + compute_maxlevels(mp, XFS_DATA_FORK); + compute_maxlevels(mp, XFS_ATTR_FORK); + mp->m_bsize = XFS_FSB_TO_BB(mp, 1); + mp->m_ialloc_inos = (int)MAX(XFS_INODES_PER_CHUNK, sbp->sb_inopblock); + mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog; + if (sbp->sb_rblocks) { + mp->m_rsumlevels = sbp->sb_rextslog + 1; + mp->m_rsumsize = + (uint)sizeof(xfs_suminfo_t) * mp->m_rsumlevels * + sbp->sb_rbmblocks; + if (sbp->sb_blocksize) + mp->m_rsumsize = + roundup(mp->m_rsumsize, sbp->sb_blocksize); + } + if (XFS_SB_VERSION_HASDIRV2(sbp)) { + mp->m_dirversion = 2; + mp->m_dirblksize = + 1 << (sbp->sb_dirblklog + sbp->sb_blocklog); + mp->m_dirblkfsbs = 1 << sbp->sb_dirblklog; + mp->m_dirdatablk = + XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_DATA_FIRSTDB(mp)); + mp->m_dirleafblk = + XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_LEAF_FIRSTDB(mp)); + mp->m_dirfreeblk = + XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_FREE_FIRSTDB(mp)); + } else { + mp->m_dirversion = 1; + mp->m_dirblksize = sbp->sb_blocksize; + mp->m_dirblkfsbs = 1; + } + return mp; +} diff --git a/db/mount.h b/db/mount.h new file mode 100644 index 000000000..72348adcf --- /dev/null +++ b/db/mount.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +extern xfs_mount_t *dbmount(void); +extern xfs_mount_t *mp; diff --git a/db/output.c b/db/output.c new file mode 100644 index 000000000..b49b36521 --- /dev/null +++ b/db/output.c @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include +#include "command.h" +#include "output.h" +#include "sig.h" +#include "malloc.h" +#include "init.h" + +static int log_f(int argc, char **argv); + +static const cmdinfo_t log_cmd = + { "log", NULL, log_f, 0, 2, 0, "[stop|start ]", + "start or stop logging to a file", NULL }; + +int dbprefix; +static FILE *log_file; +static char *log_file_name; + +int +dbprintf(const char *fmt, ...) +{ + va_list ap; + int i; + + if (seenint()) + return 0; + va_start(ap, fmt); + blockint(); + i = 0; + if (dbprefix) + i += printf("%s: ", fsdevice); + i += vprintf(fmt, ap); + unblockint(); + va_end(ap); + if (log_file) { + va_start(ap, fmt); + vfprintf(log_file, fmt, ap); + va_end(ap); + } + return i; +} + +static int +log_f( + int argc, + char **argv) +{ + if (argc == 1) { + if (log_file) + dbprintf("logging to %s\n", log_file_name); + else + dbprintf("no log file\n"); + } else if (argc == 2 && strcmp(argv[1], "stop") == 0) { + if (log_file) { + xfree(log_file_name); + fclose(log_file); + log_file = NULL; + } else + dbprintf("no log file\n"); + } else if (argc == 3 && strcmp(argv[1], "start") == 0) { + if (log_file) + dbprintf("already logging to %s\n", log_file_name); + else { + log_file = fopen(argv[2], "a"); + if (log_file == NULL) + dbprintf("can't open %s for writing\n", + argv[2]); + else + log_file_name = xstrdup(argv[1]); + } + } else + dbprintf("bad log command, ignored\n"); + return 0; +} + +void +logprintf(const char *fmt, ...) +{ + va_list ap; + + if (log_file) { + va_start(ap, fmt); + (void)vfprintf(log_file, fmt, ap); + va_end(ap); + } +} + +void +output_init(void) +{ + add_command(&log_cmd); +} diff --git a/db/output.h b/db/output.h new file mode 100644 index 000000000..27861ff73 --- /dev/null +++ b/db/output.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +extern int dbprefix; + +extern int dbprintf(const char *, ...); +extern void logprintf(const char *, ...); +extern void output_init(void); diff --git a/db/print.c b/db/print.c new file mode 100644 index 000000000..f4c747957 --- /dev/null +++ b/db/print.c @@ -0,0 +1,310 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "command.h" +#include "data.h" +#include "type.h" +#include "faddr.h" +#include "fprint.h" +#include "field.h" +#include "io.h" +#include "print.h" +#include "bit.h" +#include "flist.h" +#include "strvec.h" +#include "output.h" +#include "sig.h" +#include "write.h" + +static void print_allfields(const struct field *fields); +static int print_f(int argc, char **argv); +static void print_flist_1(struct flist *flist, char **pfx, int parentoff); +static void print_somefields(const struct field *fields, int argc, + char **argv); + +static const cmdinfo_t print_cmd = + { "print", "p", print_f, 0, -1, 0, "[value]...", + "print field values", NULL }; + +static void +print_allfields( + const field_t *fields) +{ + flist_t *flist; +#ifdef DEBUG + int i; +#endif + + flist = flist_make(""); + flist->fld = fields; +#ifndef DEBUG + (void)flist_parse(fields, flist, iocur_top->data, 0); +#else + i = flist_parse(fields, flist, iocur_top->data, 0); + ASSERT(i == 1); +#endif + flist_print(flist); + print_flist(flist); + flist_free(flist); +} + +static int +print_f( + int argc, + char **argv) +{ + pfunc_t pf; + + if (cur_typ == NULL) { + dbprintf("no current type\n"); + return 0; + } + pf = cur_typ->pfunc; + if (pf == NULL) { + dbprintf("no print function for type %s\n", cur_typ->name); + return 0; + } + argc--; + argv++; + (*pf)(DB_READ, cur_typ->fields, argc, argv); + return 0; +} + +void +print_flist( + flist_t *flist) +{ + char **pfx; + + pfx = new_strvec(0); + print_flist_1(flist, pfx, 0); + free_strvec(pfx); +} + +static void +print_flist_1( + flist_t *flist, + char **ppfx, + int parentoff) +{ + char buf[16]; + const field_t *f; + const ftattr_t *fa; + flist_t *fl; + int low; + int neednl; + char **pfx; + + for (fl = flist; fl && !seenint(); fl = fl->sibling) { + pfx = copy_strvec(ppfx); + if (fl->name[0]) + add_strvec(&pfx, fl->name); + if (fl->flags & FL_OKLOW) { + add_strvec(&pfx, "["); + sprintf(buf, "%d", fl->low); + add_strvec(&pfx, buf); + if (fl->low != fl->high) { + add_strvec(&pfx, "-"); + sprintf(buf, "%d", fl->high); + add_strvec(&pfx, buf); + } + add_strvec(&pfx, "]"); + } + if (fl->child) { + if (fl->name[0]) + add_strvec(&pfx, "."); + print_flist_1(fl->child, pfx, fl->offset); + } else { + f = fl->fld; + fa = &ftattrtab[f->ftyp]; + ASSERT(fa->ftyp == f->ftyp); + print_strvec(pfx); + dbprintf(" = "); + if (fl->flags & FL_OKLOW) + low = fl->low; + else + low = 0; + if (fa->prfunc) { + neednl = fa->prfunc(iocur_top->data, fl->offset, + fcount(f, iocur_top->data, parentoff), + fa->fmtstr, + fsize(f, iocur_top->data, parentoff, 0), + fa->arg, low, + (f->flags & FLD_ARRAY) != 0); + if (neednl) + dbprintf("\n"); + } else { + ASSERT(fa->arg & FTARG_OKEMPTY); + dbprintf("(empty)\n"); + } + } + free_strvec(pfx); + } +} + +void +print_init(void) +{ + add_command(&print_cmd); +} + +void +print_sarray( + void *obj, + int bit, + int count, + int size, + int base, + int array, + const field_t *flds, + int skipnms) +{ + int bitoff; + const field_t *f; + const ftattr_t *fa; + int first; + int i; + + ASSERT(bitoffs(bit) == 0); + if (skipnms == 0) { + for (f = flds, first = 1; f->name; f++) { + if (f->flags & FLD_SKIPALL) + continue; + dbprintf("%c%s", first ? '[' : ',', f->name); + first = 0; + } + dbprintf("] "); + } + for (i = 0, bitoff = bit; + i < count && !seenint(); + i++, bitoff += size) { + if (array) + dbprintf("%d:", i + base); + for (f = flds, first = 1; f->name; f++) { + if (f->flags & FLD_SKIPALL) + continue; + fa = &ftattrtab[f->ftyp]; + ASSERT(fa->ftyp == f->ftyp); + dbprintf("%c", first ? '[' : ','); + first = 0; + if (fa->prfunc) + fa->prfunc(obj, + bitoff + + bitoffset(f, obj, bitoff, i + base), + fcount(f, obj, bitoff), fa->fmtstr, + fsize(f, obj, bitoff, i + base), + fa->arg, (f->flags & FLD_ABASE1) != 0, + f->flags & FLD_ARRAY); + else { + ASSERT(fa->arg & FTARG_OKEMPTY); + dbprintf("(empty)"); + } + } + dbprintf("]"); + if (i < count - 1) + dbprintf(" "); + } +} + +static void +print_somefields( + const field_t *fields, + int argc, + char **argv) +{ + const ftattr_t *fa; + flist_t *fl; + flist_t *lfl; + flist_t *nfl; + + fl = lfl = NULL; + while (argc > 0) { + nfl = flist_scan(*argv); + if (!nfl) { + if (fl) + flist_free(fl); + return; + } + if (lfl) + lfl->sibling = nfl; + else + fl = nfl; + lfl = nfl; + argc--; + argv++; + } + if (fields->name[0] == '\0') { + fa = &ftattrtab[fields->ftyp]; + ASSERT(fa->ftyp == fields->ftyp); + fields = fa->subfld; + } + if (!flist_parse(fields, fl, iocur_top->data, 0)) { + flist_free(fl); + return; + } + flist_print(fl); + print_flist(fl); + flist_free(fl); +} + +/*ARGSUSED*/ +void +print_string( + const field_t *fields, + int argc, + char **argv) +{ + char *cp; + + if (argc != 0) + dbprintf("no arguments allowed\n"); + dbprintf("\""); + for (cp = iocur_top->data; + cp < (char *)iocur_top->data + iocur_top->len && *cp && + !seenint(); + cp++) + dbprintf("%c", *cp); + dbprintf("\"\n"); +} + +void +print_struct( + const field_t *fields, + int argc, + char **argv) +{ + if (argc == 0) + print_allfields(fields); + else + print_somefields(fields, argc, argv); +} diff --git a/db/print.h b/db/print.h new file mode 100644 index 000000000..81ae2c01b --- /dev/null +++ b/db/print.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +struct field; +struct flist; + +extern void print_flist(struct flist *flist); +extern void print_init(void); +extern void print_sarray(void *obj, int bit, int count, int size, int base, + int array, const field_t *flds, int skipnms); +extern void print_struct(const struct field *fields, int argc, char **argv); +extern void print_string(const struct field *fields, int argc, char **argv); diff --git a/db/quit.c b/db/quit.c new file mode 100644 index 000000000..1a93178bd --- /dev/null +++ b/db/quit.c @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "command.h" +#include "quit.h" + +static int quit_f(int argc, char **argv); + +static const cmdinfo_t quit_cmd = + { "quit", "q", quit_f, 0, 0, 0, NULL, + "exit xfs_db", NULL }; + +static int +quit_f( + int argc, + char **argv) +{ + return 1; +} + +void +quit_init(void) +{ + add_command(&quit_cmd); +} diff --git a/db/quit.h b/db/quit.h new file mode 100644 index 000000000..0e3e50d4e --- /dev/null +++ b/db/quit.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +extern void quit_init(void); diff --git a/db/sb.c b/db/sb.c new file mode 100644 index 000000000..dc40b7dd5 --- /dev/null +++ b/db/sb.c @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "command.h" +#include "data.h" +#include "type.h" +#include "faddr.h" +#include "fprint.h" +#include "field.h" +#include "io.h" +#include "sb.h" +#include "bit.h" +#include "output.h" +#include "mount.h" + +static int sb_f(int argc, char **argv); +static void sb_help(void); + +static const cmdinfo_t sb_cmd = + { "sb", NULL, sb_f, 0, 1, 1, "[agno]", + "set current address to sb header", sb_help }; + +const field_t sb_hfld[] = { + { "", FLDT_SB, OI(0), C1, 0, TYP_NONE }, + { NULL } +}; + +#define OFF(f) bitize(offsetof(xfs_sb_t, sb_ ## f)) +#define SZC(f) szcount(xfs_sb_t, sb_ ## f) +const field_t sb_flds[] = { + { "magicnum", FLDT_UINT32X, OI(OFF(magicnum)), C1, 0, TYP_NONE }, + { "blocksize", FLDT_UINT32D, OI(OFF(blocksize)), C1, 0, TYP_NONE }, + { "dblocks", FLDT_DRFSBNO, OI(OFF(dblocks)), C1, 0, TYP_NONE }, + { "rblocks", FLDT_DRFSBNO, OI(OFF(rblocks)), C1, 0, TYP_NONE }, + { "rextents", FLDT_DRTBNO, OI(OFF(rextents)), C1, 0, TYP_NONE }, + { "uuid", FLDT_UUID, OI(OFF(uuid)), C1, 0, TYP_NONE }, + { "logstart", FLDT_DFSBNO, OI(OFF(logstart)), C1, 0, TYP_LOG }, + { "rootino", FLDT_INO, OI(OFF(rootino)), C1, 0, TYP_INODE }, + { "rbmino", FLDT_INO, OI(OFF(rbmino)), C1, 0, TYP_INODE }, + { "rsumino", FLDT_INO, OI(OFF(rsumino)), C1, 0, TYP_INODE }, + { "rextsize", FLDT_AGBLOCK, OI(OFF(rextsize)), C1, 0, TYP_NONE }, + { "agblocks", FLDT_AGBLOCK, OI(OFF(agblocks)), C1, 0, TYP_NONE }, + { "agcount", FLDT_AGNUMBER, OI(OFF(agcount)), C1, 0, TYP_NONE }, + { "rbmblocks", FLDT_EXTLEN, OI(OFF(rbmblocks)), C1, 0, TYP_NONE }, + { "logblocks", FLDT_EXTLEN, OI(OFF(logblocks)), C1, 0, TYP_NONE }, + { "versionnum", FLDT_UINT16X, OI(OFF(versionnum)), C1, 0, TYP_NONE }, + { "sectsize", FLDT_UINT16D, OI(OFF(sectsize)), C1, 0, TYP_NONE }, + { "inodesize", FLDT_UINT16D, OI(OFF(inodesize)), C1, 0, TYP_NONE }, + { "inopblock", FLDT_UINT16D, OI(OFF(inopblock)), C1, 0, TYP_NONE }, + { "fname", FLDT_CHARNS, OI(OFF(fname)), CI(SZC(fname)), 0, TYP_NONE }, + { "blocklog", FLDT_UINT8D, OI(OFF(blocklog)), C1, 0, TYP_NONE }, + { "sectlog", FLDT_UINT8D, OI(OFF(sectlog)), C1, 0, TYP_NONE }, + { "inodelog", FLDT_UINT8D, OI(OFF(inodelog)), C1, 0, TYP_NONE }, + { "inopblog", FLDT_UINT8D, OI(OFF(inopblog)), C1, 0, TYP_NONE }, + { "agblklog", FLDT_UINT8D, OI(OFF(agblklog)), C1, 0, TYP_NONE }, + { "rextslog", FLDT_UINT8D, OI(OFF(rextslog)), C1, 0, TYP_NONE }, + { "inprogress", FLDT_UINT8D, OI(OFF(inprogress)), C1, 0, TYP_NONE }, + { "imax_pct", FLDT_UINT8D, OI(OFF(imax_pct)), C1, 0, TYP_NONE }, + { "icount", FLDT_UINT64D, OI(OFF(icount)), C1, 0, TYP_NONE }, + { "ifree", FLDT_UINT64D, OI(OFF(ifree)), C1, 0, TYP_NONE }, + { "fdblocks", FLDT_UINT64D, OI(OFF(fdblocks)), C1, 0, TYP_NONE }, + { "frextents", FLDT_UINT64D, OI(OFF(frextents)), C1, 0, TYP_NONE }, + { "uquotino", FLDT_INO, OI(OFF(uquotino)), C1, 0, TYP_INODE }, + { "pquotino", FLDT_INO, OI(OFF(pquotino)), C1, 0, TYP_INODE }, + { "qflags", FLDT_UINT16X, OI(OFF(qflags)), C1, 0, TYP_NONE }, + { "flags", FLDT_UINT8X, OI(OFF(flags)), C1, 0, TYP_NONE }, + { "shared_vn", FLDT_UINT8D, OI(OFF(shared_vn)), C1, 0, TYP_NONE }, + { "inoalignmt", FLDT_EXTLEN, OI(OFF(inoalignmt)), C1, 0, TYP_NONE }, + { "unit", FLDT_UINT32D, OI(OFF(unit)), C1, 0, TYP_NONE }, + { "width", FLDT_UINT32D, OI(OFF(width)), C1, 0, TYP_NONE }, + { "dirblklog", FLDT_UINT8D, OI(OFF(dirblklog)), C1, 0, TYP_NONE }, + { NULL } +}; + +static void +sb_help(void) +{ + dbprintf( +"\n" +" set allocation group superblock\n" +"\n" +" Example:\n" +"\n" +" 'sb 7' - set location to 7th allocation group superblock, set type to 'sb'\n" +"\n" +" Located in the 1st 512 byte block of each allocation group,\n" +" the superblock contains the base information for the filesystem.\n" +" The superblock in allocation group 0 is the primary. The copies in the\n" +" remaining allocation groups only serve as backup for filesystem recovery.\n" +" The icount/ifree/fdblocks/frextents are only updated in superblock 0.\n" +"\n" +); +} + +static int +sb_f( + int argc, + char **argv) +{ + xfs_agnumber_t agno; + char *p; + + if (argc > 1) { + agno = (xfs_agnumber_t)strtoul(argv[1], &p, 0); + if (*p != '\0' || agno >= mp->m_sb.sb_agcount) { + dbprintf("bad allocation group number %s\n", argv[1]); + return 0; + } + cur_agno = agno; + } else if (cur_agno == NULLAGNUMBER) + cur_agno = 0; + ASSERT(typtab[TYP_SB].typnm == TYP_SB); + set_cur(&typtab[TYP_SB], XFS_AG_DADDR(mp, cur_agno, XFS_SB_DADDR), 1, + DB_RING_ADD, NULL); + return 0; +} + +void +sb_init(void) +{ + add_command(&sb_cmd); +} + +/*ARGSUSED*/ +int +sb_size( + void *obj, + int startoff, + int idx) +{ + return bitize(mp->m_sb.sb_sectsize); +} diff --git a/db/sb.h b/db/sb.h new file mode 100644 index 000000000..5d646cee6 --- /dev/null +++ b/db/sb.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +struct field; + +extern const struct field sb_flds[]; +extern const struct field sb_hfld[]; + +extern void sb_init(void); +extern int sb_size(void *obj, int startoff, int idx); diff --git a/db/sig.c b/db/sig.c new file mode 100644 index 000000000..9b70cedef --- /dev/null +++ b/db/sig.c @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include +#include "sig.h" + +static int gotintr; +static sigset_t intrset; + +static void +interrupt(int sig, siginfo_t *info, void *uc) +{ + gotintr = 1; +} + +void +blockint(void) +{ + sigprocmask(SIG_BLOCK, &intrset, NULL); +} + +void +clearint(void) +{ + gotintr = 0; +} + +void +init_sig(void) +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = interrupt; + sigaction(SIGINT, &sa, NULL); + sigemptyset(&intrset); + sigaddset(&intrset, SIGINT); +} + +int +seenint(void) +{ + return gotintr; +} + +void +unblockint(void) +{ + sigprocmask(SIG_UNBLOCK, &intrset, NULL); +} diff --git a/db/sig.h b/db/sig.h new file mode 100644 index 000000000..8bea24748 --- /dev/null +++ b/db/sig.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +extern void blockint(void); +extern void clearint(void); +extern void init_sig(void); +extern int seenint(void); +extern void unblockint(void); diff --git a/db/strvec.c b/db/strvec.c new file mode 100644 index 000000000..d346188e1 --- /dev/null +++ b/db/strvec.c @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "strvec.h" +#include "output.h" +#include "malloc.h" + +static int count_strvec(char **vec); + +void +add_strvec( + char ***vecp, + char *str) +{ + char *dup; + int i; + char **vec; + + dup = xstrdup(str); + vec = *vecp; + i = count_strvec(vec); + vec = xrealloc(vec, sizeof(*vec) * (i + 2)); + vec[i] = dup; + vec[i + 1] = NULL; + *vecp = vec; +} + +char ** +copy_strvec( + char **vec) +{ + int i; + char **rval; + + i = count_strvec(vec); + rval = new_strvec(i); + for (i = 0; vec[i] != NULL; i++) + rval[i] = xstrdup(vec[i]); + return rval; +} + +static int +count_strvec( + char **vec) +{ + int i; + + for (i = 0; vec[i] != NULL; i++) + continue; + return i; +} + +void +free_strvec( + char **vec) +{ + int i; + + for (i = 0; vec[i] != NULL; i++) + xfree(vec[i]); + xfree(vec); +} + +char ** +new_strvec( + int count) +{ + char **rval; + + rval = xmalloc(sizeof(*rval) * (count + 1)); + rval[count] = NULL; + return rval; +} + +void +print_strvec( + char **vec) +{ + int i; + + for (i = 0; vec[i] != NULL; i++) + dbprintf("%s", vec[i]); +} diff --git a/db/strvec.h b/db/strvec.h new file mode 100644 index 000000000..f74f3790e --- /dev/null +++ b/db/strvec.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +extern void add_strvec(char ***vecp, char *str); +extern char **copy_strvec(char **vec); +extern void free_strvec(char **vec); +extern char **new_strvec(int count); +extern void print_strvec(char **vec); diff --git a/db/type.c b/db/type.c new file mode 100644 index 000000000..50381c440 --- /dev/null +++ b/db/type.c @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "agf.h" +#include "agfl.h" +#include "agi.h" +#include "block.h" +#include "command.h" +#include "data.h" +#include "type.h" +#include "faddr.h" +#include "fprint.h" +#include "field.h" +#include "print.h" +#include "sb.h" +#include "inode.h" +#include "bnobt.h" +#include "cntbt.h" +#include "inobt.h" +#include "bmapbt.h" +#include "bmroot.h" +#include "agf.h" +#include "agfl.h" +#include "agi.h" +#include "dir.h" +#include "dirshort.h" +#include "io.h" +#include "output.h" +#include "write.h" +#include "attr.h" +#include "dquot.h" +#include "dir2.h" + +static const typ_t *findtyp(char *name); +static int type_f(int argc, char **argv); + +const typ_t *cur_typ; + +static const cmdinfo_t type_cmd = + { "type", NULL, type_f, 0, 1, 1, "[newtype]", + "set/show current data type", NULL }; + +const typ_t typtab[] = { + { TYP_AGF, "agf", handle_struct, agf_hfld }, + { TYP_AGFL, "agfl", handle_struct, agfl_hfld }, + { TYP_AGI, "agi", handle_struct, agi_hfld }, + { TYP_ATTR, "attr", handle_struct, attr_hfld }, + { TYP_BMAPBTA, "bmapbta", handle_struct, bmapbta_hfld }, + { TYP_BMAPBTD, "bmapbtd", handle_struct, bmapbtd_hfld }, + { TYP_BNOBT, "bnobt", handle_struct, bnobt_hfld }, + { TYP_CNTBT, "cntbt", handle_struct, cntbt_hfld }, + { TYP_DATA, "data", handle_block, NULL }, + { TYP_DIR, "dir", handle_struct, dir_hfld }, + { TYP_DIR2, "dir2", handle_struct, dir2_hfld }, + { TYP_DQBLK, "dqblk", handle_struct, dqblk_hfld }, + { TYP_INOBT, "inobt", handle_struct, inobt_hfld }, + { TYP_INODATA, "inodata", NULL, NULL }, + { TYP_INODE, "inode", handle_struct, inode_hfld }, + { TYP_LOG, "log", NULL, NULL }, + { TYP_RTBITMAP, "rtbitmap", NULL, NULL }, + { TYP_RTSUMMARY, "rtsummary", NULL, NULL }, + { TYP_SB, "sb", handle_struct, sb_hfld }, + { TYP_SYMLINK, "symlink", handle_string, NULL }, + { TYP_NONE, NULL } +}; + +static const typ_t * +findtyp( + char *name) +{ + const typ_t *tt; + + for (tt = typtab; tt->name != NULL; tt++) { + ASSERT(tt->typnm == (typnm_t)(tt - typtab)); + if (strcmp(tt->name, name) == 0) + return tt; + } + return NULL; +} + +static int +type_f( + int argc, + char **argv) +{ + const typ_t *tt; + int count = 0; + + if (argc == 1) { + if (cur_typ == NULL) + dbprintf("no current type\n"); + else + dbprintf("current type is \"%s\"\n", cur_typ->name); + + dbprintf("\n supported types are:\n "); + for (tt = typtab, count = 0; tt->name != NULL; tt++) { + if ((tt+1)->name != NULL) { + dbprintf("%s, ", tt->name); + if ((++count % 8) == 0) + dbprintf("\n "); + } else { + dbprintf("%s\n", tt->name); + } + } + + + } else { + tt = findtyp(argv[1]); + if (tt == NULL) { + dbprintf("no such type %s\n", argv[1]); + } else { + if (iocur_top->typ == NULL) { + dbprintf("no current object\n"); + } else { + iocur_top->typ = cur_typ = tt; + } + } + } + return 0; +} + +void +type_init(void) +{ + add_command(&type_cmd); +} + +/* read/write selectors for each major data type */ + +void +handle_struct( + int action, + const field_t *fields, + int argc, + char **argv) +{ + if (action == DB_WRITE) + write_struct(fields, argc, argv); + else + print_struct(fields, argc, argv); +} + +void +handle_string( + int action, + const field_t *fields, + int argc, + char **argv) +{ + if (action == DB_WRITE) + write_string(fields, argc, argv); + else + print_string(fields, argc, argv); +} + +void +handle_block( + int action, + const field_t *fields, + int argc, + char **argv) +{ + if (action == DB_WRITE) + write_block(fields, argc, argv); + else + print_block(fields, argc, argv); +} diff --git a/db/type.h b/db/type.h new file mode 100644 index 000000000..9108c8d9c --- /dev/null +++ b/db/type.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +struct field; + +#define szof(x,y) sizeof(((x *)0)->y) +#define szcount(x,y) (szof(x,y) / szof(x,y[0])) + +typedef enum typnm +{ + TYP_AGF, TYP_AGFL, TYP_AGI, TYP_ATTR, TYP_BMAPBTA, + TYP_BMAPBTD, TYP_BNOBT, TYP_CNTBT, TYP_DATA, TYP_DIR, + TYP_DIR2, TYP_DQBLK, TYP_INOBT, TYP_INODATA, TYP_INODE, + TYP_LOG, TYP_RTBITMAP, TYP_RTSUMMARY, TYP_SB, TYP_SYMLINK, + TYP_NONE +} typnm_t; + +#define DB_WRITE 1 +#define DB_READ 0 + +typedef void (*opfunc_t)(const struct field *fld, int argc, char **argv); +typedef void (*pfunc_t)(int action, const struct field *fld, int argc, char **argv); + +typedef struct typ +{ + typnm_t typnm; + char *name; + pfunc_t pfunc; + const struct field *fields; +} typ_t; +extern const typ_t typtab[], *cur_typ; + +extern void type_init(void); +extern void handle_block(int action, const struct field *fields, int argc, + char **argv); +extern void handle_string(int action, const struct field *fields, int argc, + char **argv); +extern void handle_struct(int action, const struct field *fields, int argc, + char **argv); diff --git a/db/uuid.c b/db/uuid.c new file mode 100644 index 000000000..1c1bf3290 --- /dev/null +++ b/db/uuid.c @@ -0,0 +1,364 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "command.h" +#include "data.h" +#include "type.h" +#include "faddr.h" +#include "fprint.h" +#include "field.h" +#include "io.h" +#include "uuid.h" +#include "bit.h" +#include "output.h" +#include "mount.h" + +static int uuid_f(int argc, char **argv); +static void uuid_help(void); +static int label_f(int argc, char **argv); +static void label_help(void); + +static const cmdinfo_t uuid_cmd = + { "uuid", NULL, uuid_f, 0, 1, 1, "[uuid]", + "write/print FS uuid", uuid_help }; +static const cmdinfo_t label_cmd = + { "label", NULL, label_f, 0, 1, 1, "[label]", + "write/print FS label", label_help }; +static int warned; + +static void +uuid_help(void) +{ + dbprintf( +"\n" +" write/print FS uuid\n" +"\n" +" Example:\n" +"\n" +" 'uuid' - print UUID\n" +" 'uuid 01234567-0123-0123-0123-0123456789ab' - write UUID\n" +" 'uuid generate' - generate and write\n" +" 'uuid rewrite' - copy UUID from SB 0\n" +" 'uuid null' - write a null uuid\n" +"\n" +"The print function checks the UUID in each SB and will warn if the UUIDs\n" +"differ between AGs (the log is not checked). The write commands will\n" +"set the uuid in all AGs to either a specified value, a newly generated\n" +"value, the value found in the first superblock (SB 0) or a null value\n" +"respectively. As a side effect of writing the UUID, the log is cleared\n" +"(which is fine on a CLEANLY unmounted FS).\n" +"\n" +); +} + +static void +label_help(void) +{ + dbprintf( +"\n" +" write/print FS label\n" +"\n" +" Example:\n" +"\n" +" 'label' - print label\n" +" 'label 123456789012' - write label\n" +" 'label --' - write an empty label\n" +"\n" +"The print function checks the label in each SB and will warn if the labels\n" +"differ between AGs. The write commands will set the label in all AGs to the\n" +"specified value. The maximum length of a label is 12 characters - use of a\n" +"longer label will result in truncation and a warning will be issued.\n" +"\n" +); +} + +static int +get_sb(xfs_agnumber_t agno, xfs_sb_t *sb) +{ + push_cur(); + set_cur(&typtab[TYP_SB], XFS_AG_DADDR(mp, agno, XFS_SB_DADDR), 1, + DB_RING_IGN, NULL); + + if (!iocur_top->data) { + dbprintf("can't read superblock for AG %u\n", agno); + pop_cur(); + return 0; + } + + libxfs_xlate_sb(iocur_top->data, sb, 1, ARCH_CONVERT, XFS_SB_ALL_BITS); + + if (sb->sb_magicnum != XFS_SB_MAGIC) { + dbprintf("bad sb magic # %#x in AG %u\n", + sb->sb_magicnum, agno); + return 0; + } + if (!XFS_SB_GOOD_VERSION(sb)) { + dbprintf("bad sb version # %#x in AG %u\n", + sb->sb_versionnum, agno); + return 0; + } + if (agno == 0 && sb->sb_inprogress != 0) { + dbprintf("mkfs not completed successfully\n"); + return 0; + } + return 1; +} + +static uuid_t * +do_uuid(xfs_agnumber_t agno, uuid_t *uuid) +{ + xfs_sb_t tsb; + static uuid_t uu; + + if (!get_sb(agno, &tsb)) + return NULL; + + if (!uuid) { /* get uuid */ + memcpy(&uu, &tsb.sb_uuid, sizeof(uuid_t)); + pop_cur(); + return &uu; + } + /* set uuid */ + memcpy(&tsb.sb_uuid, uuid, sizeof(uuid_t)); + libxfs_xlate_sb(iocur_top->data, &tsb, -1, ARCH_CONVERT, XFS_SB_UUID); + write_cur(); + return uuid; +} + +static char * +do_label(xfs_agnumber_t agno, char *label) +{ + size_t len; + xfs_sb_t tsb; + static char lbl[sizeof(tsb.sb_fname) + 1]; + + if (!get_sb(agno, &tsb)) + return NULL; + + memset(&lbl[0], 0, sizeof(lbl)); + + if (!label) { /* get label */ + pop_cur(); + memcpy(&lbl[0], &tsb.sb_fname, sizeof(tsb.sb_fname)); + return &lbl[0]; + } + /* set label */ + if ((len = strlen(label)) > sizeof(tsb.sb_fname)) { + if (!warned++) + dbprintf("warning: truncating label from %d to %d " + "characters\n", len, sizeof(tsb.sb_fname)); + len = sizeof(tsb.sb_fname); + } + if ( len == 2 && + (strcmp(label, "\"\"") == 0 || + strcmp(label, "''") == 0 || + strcmp(label, "--") == 0) ) + label[0] = label[1] = '\0'; + memset(&tsb.sb_fname, 0, sizeof(tsb.sb_fname)); + memcpy(&tsb.sb_fname, label, len); + memcpy(&lbl[0], &tsb.sb_fname, sizeof(tsb.sb_fname)); + libxfs_xlate_sb(iocur_top->data, &tsb, -1, ARCH_CONVERT, XFS_SB_FNAME); + write_cur(); + return &lbl[0]; +} + +static int +uuid_f( + int argc, + char **argv) +{ + char bp[40]; + xfs_agnumber_t agno; + uuid_t uu; + uuid_t *uup=NULL; + + if (argc != 1 && argc != 2) { + dbprintf("invalid parameters\n"); + return 0; + } + + if (argc==2) { + /* write uuid */ + + if (flag_readonly || !flag_expert_mode) { + dbprintf("%s not started in read-write expert mode, writing disabled\n", + progname); + return 0; + } + + if (!strcasecmp(argv[1], "generate")) { + uuid_generate(uu); + } else if (!strcasecmp(argv[1], "null")) { + uuid_clear(uu); + } else if (!strcasecmp(argv[1], "rewrite")) { + uup=do_uuid(0, NULL); + if (!uup) { + dbprintf("failed to read UUID from AG 0\n"); + return 0; + } + memcpy(&uu, *uup, sizeof(uuid_t)); + uuid_unparse(uu, bp); + dbprintf("old uuid = %s\n", bp); + } else { + if (uuid_parse(argv[1], uu)) { + dbprintf("invalid uuid\n"); + return 0; + } + } + + if (mp->m_sb.sb_logstart) { + if (xfsargs.logdev) { + dbprintf("external log specified for FS with internal log - aborting \n"); + return 0; + } + } else { + if (!xfsargs.logdev) { + dbprintf("no external log specified for FS with external log - aborting\n"); + return 0; + } + } + + dbprintf("clearing log and setting uuid\n"); + + /* clear log (setting uuid) */ + + if (libxfs_log_clear( + (mp->m_sb.sb_logstart)?xfsargs.ddev:xfsargs.logdev, + XFS_FSB_TO_DADDR(mp, mp->m_sb.sb_logstart), + XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks), + &uu, + XLOG_FMT)) { + dbprintf("error clearing log\n"); + return 0; + } + + + dbprintf("writing all SBs\n"); + + for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) + if (!do_uuid(agno, &uu)) { + dbprintf("failed to set uuid in AG %d\n", agno); + break; + } + + uuid_unparse(uu, bp); + dbprintf("new uuid = %s\n", bp); + + return 0; + + } else { + /* get (check) uuid */ + + for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { + uup=do_uuid(agno, NULL); + if (!uup) { + dbprintf("failed to read UUID from AG %d\n", agno); + return 0; + } + if (agno) { + if (memcmp(&uu, uup, sizeof(uuid_t))) { + dbprintf("warning: uuid copies differ\n"); + break; + } + } else { + memcpy(uu, uup, sizeof(uuid_t)); + } + } + if (mp->m_sb.sb_logstart) { + if (xfsargs.logdev) + dbprintf("warning: external log specified for FS with internal log\n"); + } else { + if (!xfsargs.logdev) { + dbprintf("warning: no external log specified for FS with external log\n"); + } + } + + uuid_unparse(uu, bp); + dbprintf("uuid = %s\n", bp); + } + + return 0; +} + +static int +label_f( + int argc, + char **argv) +{ + char *p = NULL; + xfs_sb_t sb; + xfs_agnumber_t ag; + + if (argc != 1 && argc != 2) { + dbprintf("invalid parameters\n"); + return 0; + } + + if (argc==2) { /* write label */ + if (flag_readonly || !flag_expert_mode) { + dbprintf("%s not started in read-write expert mode, " + "writing disabled\n", progname); + return 0; + } + + dbprintf("writing all SBs\n"); + for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) + if ((p = do_label(ag, argv[1])) == NULL) { + dbprintf("failed to set label in AG %d\n", ag); + break; + } + dbprintf("new label = \"%s\"\n", p); + } else { /* print label */ + for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { + p = do_label(ag, NULL); + if (!p) { + dbprintf("failed to read label in AG %d\n", ag); + return 0; + } + if (!ag) + memcpy(&sb.sb_fname, p, sizeof(sb.sb_fname)); + else if (memcmp(&sb.sb_fname, p, sizeof(sb.sb_fname))) + dbprintf("warning: label in AG %d differs\n"); + } + dbprintf("label = \"%s\"\n", p); + } + return 0; +} + +void +uuid_init(void) +{ + warned = 0; + add_command(&label_cmd); + add_command(&uuid_cmd); +} diff --git a/db/uuid.h b/db/uuid.h new file mode 100644 index 000000000..8997237ed --- /dev/null +++ b/db/uuid.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +extern void uuid_init(void); diff --git a/db/write.c b/db/write.c new file mode 100644 index 000000000..32477f237 --- /dev/null +++ b/db/write.c @@ -0,0 +1,708 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include +#include +#include "bit.h" +#include "block.h" +#include "command.h" +#include "data.h" +#include "type.h" +#include "faddr.h" +#include "fprint.h" +#include "field.h" +#include "flist.h" +#include "io.h" +#include "output.h" +#include "print.h" +#include "write.h" +#include "malloc.h" + +static int write_f(int argc, char **argv); +static void write_help(void); + +static const cmdinfo_t write_cmd = + { "write", NULL, write_f, 0, -1, 0, "[field or value]...", + "write value to disk", write_help }; + +void +write_init(void) +{ + if (!flag_expert_mode) + return; + + add_command(&write_cmd); + srand48(clock()); +} + +static void +write_help(void) +{ + dbprintf( +"\n" +" The 'write' command takes on different personalities depending on the\n" +" type of object being worked with.\n\n" +" Write has 3 modes:\n" +" 'struct mode' - is active anytime you're looking at a filesystem object\n" +" which contains individual fields (ex: an inode).\n" +" 'data mode' - is active anytime you set a disk address directly or set\n" +" the type to 'data'.\n" +" 'string mode' - only used for writing symlink blocks.\n" +"\n" +" Examples:\n" +" Struct mode: 'write core.uid 23' - set an inode uid field to 23.\n" +" 'write fname \"hello\\000\"' - write superblock fname.\n" +" (note: in struct mode strings are not null terminated)\n" +" 'write fname #6669736800' - write superblock fname with hex.\n" +" 'write uuid 00112233-4455-6677-8899-aabbccddeeff'\n" +" - write superblock uuid.\n" +" Data mode: 'write fill 0xff' - fill the entire block with 0xff's\n" +" 'write lshift 3' - shift the block 3 bytes to the left\n" +" 'write sequence 1 5' - write a cycle of number [1-5] through\n" +" the entire block.\n" +" String mode: 'write \"This_is_a_filename\" - write null terminated string.\n" +"\n" +" In data mode type 'write' by itself for a list of specific commands.\n\n" +); + +} + +static int +write_f( + int argc, + char **argv) +{ + pfunc_t pf; + extern char *progname; + + if (flag_readonly) { + dbprintf("%s started in read only mode, writing disabled\n", + progname); + return 0; + } + + if (cur_typ == NULL) { + dbprintf("no current type\n"); + return 0; + } + + pf = cur_typ->pfunc; + if (pf == NULL) { + dbprintf("no handler function for type %s, write unsupported.\n", + cur_typ->name); + return 0; + } + + /* move past the "write" command */ + argc--; + argv++; + + (*pf)(DB_WRITE, cur_typ->fields, argc, argv); + + return 0; +} + +/* compare significant portions of commands */ + +static int +sigcmp( + char *s1, + char *s2, + int sig) +{ + int sigcnt; + + if (!s1 || !s2) + return 0; + + for (sigcnt = 0; *s1 == *s2; s1++, s2++) { + sigcnt++; + if (*s1 == '\0') + return 1; + } + if (*s1 && *s2) + return 0; + + if (sig && (sigcnt >= sig)) + return 1; + + return 0; +} + +/* ARGSUSED */ +static void +bwrite_lshift( + int start, + int len, + int shift, + int from, + int to) +{ + char *base; + + if (shift == -1) + shift = 1; + if (start == -1) + start = 0; + if (len == -1) + len = iocur_top->len - start; + + if (len+start > iocur_top->len) { + dbprintf("length (%d) too large for data block size (%d)", + len, iocur_top->len); + } + + base = (char *)iocur_top->data + start; + + memcpy(base, base+shift, len-shift); + memset(base+(len-shift), 0, shift); +} + +/* ARGSUSED */ +static void +bwrite_rshift( + int start, + int len, + int shift, + int from, + int to) +{ + char *base; + + if (shift == -1) + shift = 1; + if (start == -1) + start = 0; + if (len == -1) + len = iocur_top->len - start; + + if (len+start > iocur_top->len) { + dbprintf("length (%d) too large for data block size (%d)", + len, iocur_top->len); + } + + base = (char *)iocur_top->data + start; + + memcpy(base+shift, base, len-shift); + memset(base, 0, shift); +} + +/* ARGSUSED */ +static void +bwrite_lrot( + int start, + int len, + int shift, + int from, + int to) +{ + char *base; + char *hold_region; + + if (shift == -1) + shift = 1; + if (start == -1) + start = 0; + if (len == -1) + len = iocur_top->len - start; + + if (len+start > iocur_top->len) { + dbprintf("length (%d) too large for data block size (%d)", + len, iocur_top->len); + } + + base = (char *)iocur_top->data + start; + + hold_region = xmalloc(shift); + memcpy(hold_region, base, shift); + memcpy(base, base+shift, len-shift); + memcpy(base+(len-shift), hold_region, shift); +} + +/* ARGSUSED */ +static void +bwrite_rrot( + int start, + int len, + int shift, + int from, + int to) +{ + char *base; + char *hold_region; + + if (shift == -1) + shift = 1; + if (start == -1) + start = 0; + if (len == -1) + len = iocur_top->len - start; + + if (len+start > iocur_top->len) { + dbprintf("length (%d) too large for data block size (%d)", + len, iocur_top->len); + } + + base = (char *)iocur_top->data + start; + + hold_region = xmalloc(shift); + memcpy(hold_region, base+(len-shift), shift); + memcpy(base+shift, base, len-shift); + memcpy(base, hold_region, shift); +} + +/* ARGSUSED */ +static void +bwrite_seq( + int start, + int len, + int step, + int from, + int to) +{ + int i; + int tmp; + int base; + int range; + int top; + char *buf = (char *)iocur_top->data; + + if (start == -1) + start = 0; + + if (len == -1) + len = iocur_top->len - start; + + if (len+start > iocur_top->len) { + dbprintf("length (%d) too large for data block size (%d)", + len, iocur_top->len); + } + + if (from == -1 || from > 255) + from = 0; + if (to == -1 || to > 255) + to = 255; + if (step == -1) + step = 1; + + base = from; + top = to; + if (from > to) { + base = to; + top = from; + if (step > 0) + step = -step; + } + + range = top - base; + + tmp = 0; + for (i = start; i < start+len; i++) { + *buf++ = tmp + base; + tmp = (tmp + step)%(range+1); + } +} + +/* ARGSUSED */ +static void +bwrite_random( + int start, + int len, + int shift, + int from, + int to) +{ + int i; + char *buf = (char *)iocur_top->data; + + if (start == -1) + start = 0; + + if (len == -1) + len = iocur_top->len - start; + + if (len+start > iocur_top->len) { + dbprintf("length (%d) too large for data block size (%d)", + len, iocur_top->len); + } + + for (i = start; i < start+len; i++) + *buf++ = (char)lrand48(); +} + +/* ARGSUSED */ +static void +bwrite_fill( + int start, + int len, + int value, + int from, + int to) +{ + char *base; + + if (value == -1) + value = 0; + if (start == -1) + start = 0; + if (len == -1) + len = iocur_top->len - start; + + if (len+start > iocur_top->len) { + dbprintf("length (%d) too large for data block size (%d)", + len, iocur_top->len); + } + + base = (char *)iocur_top->data + start; + + memset(base, value, len); +} + +static struct bw_cmd { + void (*cmdfunc)(int,int,int,int,int); + char *cmdstr; + int sig_chars; + int argmin; + int argmax; + int shiftcount_arg; + int from_arg; + int to_arg; + int start_arg; + int len_arg; + char *usage; +} bw_cmdtab[] = { + /* cmd sig min max sh frm to start len */ + { bwrite_lshift, "lshift", 2, 0, 3, 1, 0, 0, 2, 3, + "[shiftcount] [start] [len]", }, + { bwrite_rshift, "rshift", 2, 0, 3, 1, 0, 0, 2, 3, + "[shiftcount] [start] [len]", }, + { bwrite_lrot, "lrot", 2, 0, 3, 1, 0, 0, 2, 3, + "[shiftcount] [start] [len]", }, + { bwrite_rrot, "rrot", 2, 0, 3, 1, 0, 0, 2, 3, + "[shiftcount] [start] [len]", }, + { bwrite_seq, "sequence", 3, 0, 4, 0, 1, 2, 3, 4, + "[from] [to] [start] [len]", }, + { bwrite_random, "random", 3, 0, 2, 0, 0, 0, 1, 2, + "[start] [len]", }, + { bwrite_fill, "fill", 1, 1, 3, 1, 0, 0, 2, 3, + "num [start] [len]" } +}; + +#define BWRITE_CMD_MAX (sizeof(bw_cmdtab)/sizeof(bw_cmdtab[0])) + +static int +convert_oct( + char *arg, + int *ret) +{ + int count; + int i; + int val = 0; + + /* only allow 1 case, '\' and 3 octal digits (or less) */ + + for (count = 0; count < 3; count++) { + if (arg[count] == '\0') + break; + + if ((arg[count] < '0') && (arg[count] > '7')) + break; + } + + for (i = 0; i < count; i++) { + val |= ((arg[(count-1)-i]-'0')&0x07)<<(i*3); + } + + *ret = val&0xff; + + return(count); +} + +#define NYBBLE(x) (isdigit(x)?(x-'0'):(tolower(x)-'a'+0xa)) + +static char * +convert_arg( + char *arg, + int bit_length) +{ + int i; + static char *buf = NULL; + char *rbuf; + long long *value; + int alloc_size; + char *ostr; + int octval, ret; + + if (bit_length <= 64) + alloc_size = 8; + else + alloc_size = (bit_length+7)/8; + + buf = xrealloc(buf, alloc_size); + memset(buf, 0, alloc_size); + value = (long long *)buf; + rbuf = buf; + + if (*arg == '\"') { + /* handle strings */ + + /* zap closing quote if there is one */ + if ((ostr = strrchr(arg+1, '\"')) != NULL) + *ostr = '\0'; + + ostr = arg+1; + for (i = 0; i < alloc_size; i++) { + if (!*ostr) + break; + + /* do octal */ + if (*ostr == '\\') { + if (*(ostr+1) >= '0' || *(ostr+1) <= '7') { + ret = convert_oct(ostr+1, &octval); + *rbuf++ = octval; + ostr += ret+1; + continue; + } + } + *rbuf++ = *ostr++; + } + + return buf; + } else if (arg[0] == '#' || strchr(arg,'-')) { + /* + * handle hex blocks ie + * #00112233445566778899aabbccddeeff + * and uuids ie + * 1122334455667788-99aa-bbcc-ddee-ff00112233445566778899 + */ + int bytes=bit_length/8; + + /* skip leading hash */ + if (*arg=='#') arg++; + + while (*arg && bytes--) { + /* skip hypens */ + while (*arg=='-') arg++; + + /* get first nybble */ + if (!isxdigit(*arg)) return NULL; + *rbuf=NYBBLE(*arg)<<4; + arg++; + + /* skip more hyphens */ + while (*arg=='-') arg++; + + /* get second nybble */ + if (!isxdigit(*arg)) return NULL; + *rbuf++|=NYBBLE(*arg); + arg++; + } + if (bytes<0&&*arg) return NULL; + return buf; + } else { + /* + * handle integers + */ + *value = strtoll(arg, NULL, 0); + +#if __BYTE_ORDER == BIG_ENDIAN + /* hackery for big endian */ + if (bit_length <= 8) { + rbuf += 7; + } else if (bit_length <= 16) { + rbuf += 6; + } else if (bit_length <= 32) { + rbuf += 4; + } +#endif + return rbuf; + } +} + + +/* ARGSUSED */ +void +write_struct( + const field_t *fields, + int argc, + char **argv) +{ + const ftattr_t *fa; + flist_t *fl; + flist_t *sfl; + int bit_length; + char *buf; + int parentoffset; + + if (argc != 2) { + dbprintf("usage: write fieldname value\n"); + return; + } + + fl = flist_scan(argv[0]); + if (!fl) { + dbprintf("unable to parse '%s'.\n", argv[0]); + return; + } + + /* if we're a root field type, go down 1 layer to get field list */ + if (fields->name[0] == '\0') { + fa = &ftattrtab[fields->ftyp]; + ASSERT(fa->ftyp == fields->ftyp); + fields = fa->subfld; + } + + /* run down the field list and set offsets into the data */ + if (!flist_parse(fields, fl, iocur_top->data, 0)) { + flist_free(fl); + dbprintf("parsing error\n"); + return; + } + + sfl = fl; + parentoffset = 0; + while (sfl->child) { + parentoffset = sfl->offset; + sfl = sfl->child; + } + + bit_length = fsize(sfl->fld, iocur_top->data, parentoffset, 0); + bit_length *= fcount(sfl->fld, iocur_top->data, parentoffset); + + /* convert this to a generic conversion routine */ + /* should be able to handle str, num, or even labels */ + + buf = convert_arg(argv[1], bit_length); + if (!buf) { + dbprintf("unable to convert value '%s'.\n", argv[1]); + return; + } + + setbitval(iocur_top->data, sfl->offset, bit_length, buf); + write_cur(); + + flist_print(fl); + print_flist(fl); + flist_free(fl); +} + +/* ARGSUSED */ +void +write_string( + const field_t *fields, + int argc, + char **argv) +{ + char *buf; + int i; + + if (argc != 1) { + dbprintf("usage (in string mode): write \"string...\"\n"); + return; + } + + buf = convert_arg(argv[0], (int)((strlen(argv[0])+1)*8)); + for (i = 0; i < iocur_top->len; i++) { + ((char *)iocur_top->data)[i] = *buf; + if (*buf++ == '\0') + break; + } + + /* write back to disk */ + write_cur(); +} + +/* ARGSUSED */ +void +write_block( + const field_t *fields, + int argc, + char **argv) +{ + int i; + int shiftcount = -1; + int start = -1; + int len = -1; + int from = -1; + int to = -1; + struct bw_cmd *cmd = NULL; + + if (argc <= 1 || argc > 5) + goto block_usage; + + for (i = 0; i < BWRITE_CMD_MAX; i++) { + if (sigcmp(argv[0], bw_cmdtab[i].cmdstr, + bw_cmdtab[i].sig_chars)) { + cmd = &bw_cmdtab[i]; + break; + } + } + + if (!cmd) { + dbprintf("write: invalid subcommand\n"); + goto block_usage; + } + + if ((argc < cmd->argmin + 1) || (argc > cmd->argmax + 1)) { + dbprintf("write %s: invalid number of arguments\n", + cmd->cmdstr); + goto block_usage; + } + + if (cmd->shiftcount_arg && (cmd->shiftcount_arg < argc)) + shiftcount = (int)strtoul(argv[cmd->shiftcount_arg], NULL, 0); + if (cmd->start_arg && (cmd->start_arg < argc)) + start = (int)strtoul(argv[cmd->start_arg], NULL, 0); + if (cmd->len_arg && (cmd->len_arg < argc)) + len = (int)strtoul(argv[cmd->len_arg], NULL, 0); + if (cmd->from_arg && (cmd->len_arg < argc)) + from = (int)strtoul(argv[cmd->from_arg], NULL, 0); + if (cmd->to_arg && (cmd->len_arg < argc)) + to = (int)strtoul(argv[cmd->to_arg], NULL, 0); + + cmd->cmdfunc(start, len, shiftcount, from, to); + + /* write back to disk */ + write_cur(); + return; + + block_usage: + + dbprintf("usage: write (in data mode)\n"); + for (i = 0; i < BWRITE_CMD_MAX; i++) { + dbprintf(" %-9.9s %s\n", + bw_cmdtab[i].cmdstr, bw_cmdtab[i].usage); + } + dbprintf("\n"); + return; +} diff --git a/db/write.h b/db/write.h new file mode 100644 index 000000000..7e0596f0c --- /dev/null +++ b/db/write.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +struct field; + +extern void write_init(void); +extern void write_block(const field_t *fields, int argc, char **argv); +extern void write_string(const field_t *fields, int argc, char **argv); +extern void write_struct(const field_t *fields, int argc, char **argv); diff --git a/db/xfs_admin.sh b/db/xfs_admin.sh new file mode 100755 index 000000000..c516ae9f9 --- /dev/null +++ b/db/xfs_admin.sh @@ -0,0 +1,60 @@ +#!/bin/sh -f +# +# Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write the Free Software Foundation, Inc., 59 +# Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, +# Mountain View, CA 94043, or: +# +# http://www.sgi.com +# +# For further information regarding this notice, see: +# +# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ +# + +OPTS="" +USAGE="Usage: xfs_admin [-flu] [-L label] [-U uuid] special" + +while getopts "fluL:U:" c +do + case $c in + f) OPTS=$OPTS" -f";; + l) OPTS=$OPTS" -c label";; + L) OPTS=$OPTS" -c 'label "$OPTARG"'";; + u) OPTS=$OPTS" -c uuid";; + U) OPTS=$OPTS" -c 'uuid "$OPTARG"'";; + \?) echo $USAGE 1>&2 + exit 2 + ;; + esac +done +set -- extra $@ +shift $OPTIND +case $# in + 1) eval xfs_db -x -p xfs_admin $OPTS $1 + status=$? + ;; + *) echo $USAGE 1>&2 + exit 2 + ;; +esac +exit $status diff --git a/db/xfs_check.sh b/db/xfs_check.sh new file mode 100755 index 000000000..bff2ecc32 --- /dev/null +++ b/db/xfs_check.sh @@ -0,0 +1,63 @@ +#!/bin/sh -f +# +# Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write the Free Software Foundation, Inc., 59 +# Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, +# Mountain View, CA 94043, or: +# +# http://www.sgi.com +# +# For further information regarding this notice, see: +# +# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ +# +#ident "$Revision: 1.1 $" + +OPTS=" " +ISFILE=" " +USAGE="usage: xfs_check [-svf] [-i ino]... [-b bno]... special" + + +while getopts "b:fi:sv" c +do + case $c in + s) OPTS=$OPTS"-s ";; + v) OPTS=$OPTS"-v ";; + i) OPTS=$OPTS"-i "$OPTARG" ";; + b) OPTS=$OPTS"-b "$OPTARG" ";; + f) ISFILE=" -f";; + \?) echo $USAGE 1>&2 + exit 2 + ;; + esac +done +set -- extra $@ +shift $OPTIND +case $# in + 1) xfs_db$ISFILE -i -p xfs_check -c "check$OPTS" $1 + status=$? + ;; + *) echo $USAGE 1>&2 + exit 2 + ;; +esac +exit $status diff --git a/db/xfs_check64.sh b/db/xfs_check64.sh new file mode 100755 index 000000000..930939093 --- /dev/null +++ b/db/xfs_check64.sh @@ -0,0 +1,63 @@ +#!/bin/sh -f +# +# Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write the Free Software Foundation, Inc., 59 +# Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, +# Mountain View, CA 94043, or: +# +# http://www.sgi.com +# +# For further information regarding this notice, see: +# +# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ +# +#ident "$Revision: 1.1 $" + +OPTS=" " +ISFILE=" " +USAGE="usage: xfs_check64 [-svf] [-i ino]... [-b bno]... special" + + +while getopts "b:fi:sv" c +do + case $c in + s) OPTS=$OPTS"-s ";; + v) OPTS=$OPTS"-v ";; + i) OPTS=$OPTS"-i "$OPTARG" ";; + b) OPTS=$OPTS"-b "$OPTARG" ";; + f) ISFILE=" -f";; + \?) echo $USAGE 1>&2 + exit 2 + ;; + esac +done +set -- extra $@ +shift $OPTIND +case $# in + 1) xfs_db64$ISFILE -i -p xfs_check64 -c "check$OPTS" $1 + status=$? + ;; + *) echo $USAGE 1>&2 + exit 2 + ;; +esac +exit $status diff --git a/db/xfs_ncheck.sh b/db/xfs_ncheck.sh new file mode 100755 index 000000000..3c83e35d3 --- /dev/null +++ b/db/xfs_ncheck.sh @@ -0,0 +1,61 @@ +#!/bin/sh -f +# +# Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write the Free Software Foundation, Inc., 59 +# Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, +# Mountain View, CA 94043, or: +# +# http://www.sgi.com +# +# For further information regarding this notice, see: +# +# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ +# +#ident "$Revision: 1.1 $" + +OPTS=" " +ISFILE=" " +USAGE="usage: xfs_ncheck [-sf] [-i ino]... special" + + +while getopts "b:fi:sv" c +do + case $c in + s) OPTS=$OPTS"-s ";; + i) OPTS=$OPTS"-i "$OPTARG" ";; + f) ISFILE=" -f";; + \?) echo $USAGE 1>&2 + exit 2 + ;; + esac +done +set -- extra $@ +shift $OPTIND +case $# in + 1) xfs_db$ISFILE -r -p xfs_ncheck -c "blockget -ns" -c "ncheck$OPTS" $1 + status=$? + ;; + *) echo $USAGE 1>&2 + exit 2 + ;; +esac +exit $status diff --git a/db/xfs_ncheck64.sh b/db/xfs_ncheck64.sh new file mode 100755 index 000000000..7fcd3a3de --- /dev/null +++ b/db/xfs_ncheck64.sh @@ -0,0 +1,61 @@ +#!/bin/sh -f +# +# Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write the Free Software Foundation, Inc., 59 +# Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, +# Mountain View, CA 94043, or: +# +# http://www.sgi.com +# +# For further information regarding this notice, see: +# +# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ +# +#ident "$Revision: 1.1 $" + +OPTS=" " +ISFILE=" " +USAGE="usage: xfs_ncheck64 [-sf] [-i ino]... special" + + +while getopts "b:fi:sv" c +do + case $c in + s) OPTS=$OPTS"-s ";; + i) OPTS=$OPTS"-i "$OPTARG" ";; + f) ISFILE=" -f";; + \?) echo $USAGE 1>&2 + exit 2 + ;; + esac +done +set -- extra $@ +shift $OPTIND +case $# in + 1) xfs_db64$ISFILE -r -p xfs_ncheck64 -c "blockget -ns" -c "ncheck$OPTS" $1 + status=$? + ;; + *) echo $USAGE 1>&2 + exit 2 + ;; +esac +exit $status diff --git a/doc/Makefile b/doc/Makefile new file mode 100644 index 000000000..abd968598 --- /dev/null +++ b/doc/Makefile @@ -0,0 +1,45 @@ +# +# Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write the Free Software Foundation, Inc., 59 +# Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, +# Mountain View, CA 94043, or: +# +# http://www.sgi.com +# +# For further information regarding this notice, see: +# +# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ +# + +TOPDIR = .. +include $(TOPDIR)/include/builddefs + +DOCFILES = README.LVM README.xfsdump Porting-Guide +LSRCFILES = $(DOCFILES) + +default: $(CMDTARGET) + +include $(BUILDRULES) + +install: default + $(INSTALL) -m 755 -d $(XFS_CMDS_DOC_DIR) + $(INSTALL) -m 644 $(DOCFILES) $(XFS_CMDS_DOC_DIR) diff --git a/doc/README.LVM b/doc/README.LVM new file mode 100644 index 000000000..06eb6def8 --- /dev/null +++ b/doc/README.LVM @@ -0,0 +1,77 @@ +XFS on LVM +__________ + +PREFACE + +This is a quick reference to setting XFS up on LVM. For more information +please see the LVM HOWTO at: + + http://www.linuxdoc.org/HOWTO/LVM-HOWTO.html + +PREREQUISITES + +You need a kernel with LVM support either built in or as a module. +This document assumes lvm as a module. + +SETTING UP LVM + +>>> Load module + + [root@crash /sbin]# modprobe lvm-mod + +>>> Set partition type to 0x8e for partitions you wish to use with LVM + + [root@crash /sbin]# fdisk /dev/sda1 + Command (m for help): t + Partition number (1-4): 1 + Hex code (type L to list codes): 8e + Changed system type of partition 1 to 8e (Unknown) + + Command (m for help): w + The partition table has been altered! + +>>> Write PV superblock on physical volumes + + [root@crash /root]# pvcreate /dev/sda1 /dev/sdb1 /dev/sdc1 /dev/sdd1 + pvcreate -- physical volume "/dev/sda1" successfully created + pvcreate -- physical volume "/dev/sdb1" successfully created + pvcreate -- physical volume "/dev/sdc1" successfully created + pvcreate -- physical volume "/dev/sdd1" successfully created + +>>> Create a volume group consisting of the PVs we just set up + +[root@crash /root]# vgcreate vg00 /dev/sda1 /dev/sdb1 /dev/sdc1 /dev/sdd1 + vgcreate -- INFO: using default physical extent size 4 MB + vgcreate -- INFO: maximum logical volume size is 255.99 Gigabyte + vgcreate -- doing automatic backup of volume group "vg00" + vgcreate -- volume group "vg00" successfully created and activated + +>>> Create a logical volume - striped across 4 PVs, 64 KB chunk size, 20 GB + +[root@crash /root]# lvcreate -i 4 -I 64 -L 20G -n lv00 vg00 + lvcreate -- rounding 20971520 KB to stripe boundary size 20975616 KB / 5121 PE + lvcreate -- doing automatic backup of "vg00" + lvcreate -- logical volume "/dev/vg00/lv00" successfully created + +>>> Build a filesystem on the LV + +[root@crash /root]# mkfs -t xfs /dev/vg00/lv00 + meta-data=/dev/vg00/lv00 isize=256 agcount=20, agsize=262144 blks + data = bsize=4096 blocks=5242879, imaxpct=25 + = sunit=0 swidth=0 blks, unwritten=1 + naming =version 2 bsize=4096 + log =internal log bsize=4096 blocks=1200 + realtime =none extsz=65536 blocks=0, rtextents=0 + +[root@crash /root]# mount -t xfs /dev/vg00/lv00 /xfs + +>>> Go nuts + + +After a reboot you will need to reactivate the VGs/LVs: + + modprobe lvm-mod + vgchange -a y + +These commands could be added to a startup script. + diff --git a/fsck/Makefile b/fsck/Makefile new file mode 100644 index 000000000..965dca0b4 --- /dev/null +++ b/fsck/Makefile @@ -0,0 +1,46 @@ +# +# Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write the Free Software Foundation, Inc., 59 +# Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, +# Mountain View, CA 94043, or: +# +# http://www.sgi.com +# +# For further information regarding this notice, see: +# +# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ +# + +TOPDIR = .. +include $(TOPDIR)/include/builddefs + +CMDTARGET = fsck.xfs +CFILES = xfs_fsck.c +LCFLAGS = -s -O3 + +default: $(CMDTARGET) + +include $(BUILDRULES) + +install: default + $(INSTALL) -m 755 -d $(XFS_CMDS_SBIN_DIR) + $(INSTALL) -m 755 $(CMDTARGET) $(XFS_CMDS_SBIN_DIR) diff --git a/fsck/xfs_fsck.c b/fsck/xfs_fsck.c new file mode 100644 index 000000000..edb8746f2 --- /dev/null +++ b/fsck/xfs_fsck.c @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +/* http://www.muppetlabs.com/~breadbox/software/tiny/teensy.html */ +/* Unfortunately, we need to be a little more portable. ;^) */ +/* This used to be a symlink to /bin/true but that gives a wierd */ +/* dependency problem in a certain package manager. */ + +int +main(int argc, char **argv) +{ + return 0; +} diff --git a/growfs/Makefile b/growfs/Makefile new file mode 100644 index 000000000..f0bf7616c --- /dev/null +++ b/growfs/Makefile @@ -0,0 +1,50 @@ +# +# Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write the Free Software Foundation, Inc., 59 +# Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, +# Mountain View, CA 94043, or: +# +# http://www.sgi.com +# +# For further information regarding this notice, see: +# +# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ +# + +TOPDIR = .. +include $(TOPDIR)/include/builddefs + +CMDTARGET = xfs_growfs +CMDDEPS = $(LIBXFS) + +CFILES = xfs_growfs.c +LLDLIBS = $(LIBXFS) $(LIBUUID) +LSRCFILES = xfs_info.sh + +default: $(CMDTARGET) + +include $(BUILDRULES) + +install: default + $(INSTALL) -m 755 -d $(XFS_CMDS_BIN_DIR) + $(INSTALL) -m 755 $(CMDTARGET) $(XFS_CMDS_BIN_DIR) + $(INSTALL) -m 755 xfs_info.sh $(XFS_CMDS_BIN_DIR)/xfs_info diff --git a/growfs/xfs_growfs.c b/growfs/xfs_growfs.c new file mode 100644 index 000000000..555f924b4 --- /dev/null +++ b/growfs/xfs_growfs.c @@ -0,0 +1,458 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include +#include +#include +#include + +static char *fname; /* mount point name */ +static char *datadev; /* data device name */ +static char *logdev; /* log device name */ +static char *rtdev; /* RT device name */ + +static void +usage(void) +{ + fprintf(stderr, +"Usage: %s [options] mountpoint\n\n\ +Options:\n\ + -d grow data/metadata section\n\ + -l grow log section\n\ + -r grow realtime section\n\ + -n don't change anything, just show geometry\n\ + -i convert log from external to internal format\n\ + -t alternate location for mount table (/etc/mtab)\n\ + -x convert log from internal to external format\n\ + -D size grow data/metadata section to size blks\n\ + -L size grow/shrink log section to size blks\n\ + -R size grow realtime section to size blks\n\ + -e size set realtime extent size to size blks\n\ + -m imaxpct set inode max percent to imaxpct\n\ + -V print version information\n", + progname); + exit(2); +} + +void +report_info( + xfs_fsop_geom_t geo, + char *mntpoint, + int unwritten, + int dirversion, + int isint) +{ + printf("meta-data=%-22s isize=%-6d agcount=%d, agsize=%d blks\n" + "data =%-22s bsize=%-6d blocks=%lld, imaxpct=%d\n" + " =%-22s sunit=%-6d swidth=%d blks, unwritten=%d\n" + "naming =version %-14d bsize=%-6d\n" + "log =%-22s bsize=%-6d blocks=%d\n" + "realtime =%-22s extsz=%-6d blocks=%lld, rtextents=%lld\n", + mntpoint, geo.inodesize, geo.agcount, geo.agblocks, + "", geo.blocksize, geo.datablocks, geo.imaxpct, + "", geo.sunit, geo.swidth, unwritten, + dirversion, geo.dirblocksize, + isint ? "internal" : "external", geo.blocksize, geo.logblocks, + geo.rtblocks ? "external" : "none", + geo.rtextsize * geo.blocksize, geo.rtblocks, geo.rtextents); +} + +void +explore_mtab(char *mtab, char *mntpoint) +{ + struct mntent *mnt; + struct stat64 statuser; + struct stat64 statmtab; + FILE *mtp; + char *rtend; + char *logend; + + if ((mtp = setmntent(mtab, "r")) == NULL) { + fprintf(stderr, "%s: cannot access mount list %s: %s\n", + progname, MOUNTED, strerror(errno)); + exit(1); + } + if (stat64(mntpoint, &statuser) < 0) { + fprintf(stderr, "%s: cannot access mount point %s: %s\n", + progname, mntpoint, strerror(errno)); + exit(1); + } + + while ((mnt = getmntent(mtp)) != NULL) { + if (stat64(mnt->mnt_dir, &statmtab) < 0) { + fprintf(stderr, "%s: ignoring entry %s in %s: %s\n", + progname, mnt->mnt_dir, mtab, strerror(errno)); + continue; + } + if (statuser.st_ino != statmtab.st_ino || + statuser.st_dev != statmtab.st_dev) + continue; + else if (strcmp(mnt->mnt_type, "xfs") != 0) { + fprintf(stderr, "%s: %s is not an XFS filesystem\n", + progname, mntpoint); + exit(1); + } + break; /* we've found it */ + } + + if (mnt == NULL) { + fprintf(stderr, + "%s: %s is not a filesystem mount point, according to %s\n", + progname, mntpoint, MOUNTED); + exit(1); + } + + /* find the data, log (logdev=), and realtime (rtdev=) devices */ + rtend = logend = NULL; + fname = mnt->mnt_dir; + datadev = mnt->mnt_fsname; + if (logdev = hasmntopt(mnt, "logdev=")) { + logdev += 7; + logend = strtok(logdev, " "); + } + if (rtdev = hasmntopt(mnt, "rtdev=")) { + rtdev += 6; + rtend = strtok(rtdev, " "); + } + + /* Do this only after we've finished processing mount options */ + if (logdev && logend != logdev) + *logend = '\0'; /* terminate end of log device name */ + if (rtdev && rtend != rtdev) + *rtend = '\0'; /* terminate end of rt device name */ + + endmntent(mtp); +} + +int +main(int argc, char **argv) +{ + int aflag; /* fake flag, do all pieces */ + int c; /* current option character */ + long long ddsize; /* device size in 512-byte blocks */ + int dflag; /* -d flag */ + int dirversion; /* directory version number */ + long long dlsize; /* device size in 512-byte blocks */ + long long drsize; /* device size in 512-byte blocks */ + long long dsize; /* new data size in fs blocks */ + int error; /* we have hit an error */ + long esize; /* new rt extent size */ + int ffd; /* mount point file descriptor */ + xfs_fsop_geom_t geo; /* current fs geometry */ + int iflag; /* -i flag */ + int isint; /* log is currently internal */ + int lflag; /* -l flag */ + long long lsize; /* new log size in fs blocks */ + int maxpct; /* -m flag value */ + int mflag; /* -m flag */ + char *mtab; /* mount table file (/etc/mtab) */ + int nflag; /* -n flag */ + xfs_fsop_geom_t ngeo; /* new fs geometry */ + int rflag; /* -r flag */ + long long rsize; /* new rt size in fs blocks */ + int unwritten; /* unwritten extent flag */ + int xflag; /* -x flag */ + libxfs_init_t xi; /* libxfs structure */ + + mtab = MOUNTED; + progname = basename(argv[0]); + aflag = dflag = iflag = lflag = mflag = nflag = rflag = xflag = 0; + maxpct = esize = 0; + dsize = lsize = rsize = 0LL; + while ((c = getopt(argc, argv, "dD:e:ilL:m:np:rR:t:xV")) != EOF) { + switch (c) { + case 'D': + dsize = atoll(optarg); + /* fall through */ + case 'd': + dflag = 1; + break; + case 'e': + esize = atol(optarg); + rflag = 1; + break; + case 'i': + lflag = iflag = 1; + break; + case 'L': + lsize = atoll(optarg); + /* fall through */ + case 'l': + lflag = 1; + break; + case 'm': + mflag = 1; + maxpct = atoi(optarg); + break; + case 'n': + nflag = 1; + break; + case 'p': + progname = optarg; + break; + case 'R': + rsize = atoll(optarg); + /* fall through */ + case 'r': + rflag = 1; + break; + case 't': + mtab = optarg; + break; + case 'x': + lflag = xflag = 1; + break; + case 'V': + printf("%s version %s\n", progname, VERSION); + break; + case '?': + default: + usage(); + } + } + if (argc - optind != 1) + usage(); + if (iflag && xflag) + usage(); + if (dflag + lflag + rflag == 0) + aflag = 1; + + explore_mtab(mtab, argv[optind]); + + ffd = open(fname, O_RDONLY); + if (ffd < 0) { + perror(fname); + return 1; + } + + /* get the current filesystem size & geometry */ + if (ioctl(ffd, XFS_IOC_FSGEOMETRY, &geo) < 0) { + fprintf(stderr, "%s: cannot determine geometry of filesystem" + " mounted at %s: %s\n", + progname, fname, strerror(errno)); + exit(1); + } + isint = geo.logstart > 0; + unwritten = geo.flags & XFS_FSOP_GEOM_FLAGS_EXTFLG ? 1 : 0; + dirversion = geo.flags & XFS_FSOP_GEOM_FLAGS_DIRV2 ? 2 : 1; + + if (nflag) { + report_info(geo, fname, unwritten, dirversion, isint); + exit(0); + } + + /* + * Need root access from here on (using raw devices)... + */ + + bzero(&xi, sizeof(xi)); + xi.dname = datadev; + xi.logname = logdev; + xi.rtname = rtdev; + xi.notvolok = 1; + xi.isreadonly = LIBXFS_ISREADONLY; + + if (!libxfs_init(&xi)) + usage(); + + /* check we got the info for all the sections we are trying to modify */ + if (!xi.ddev) { + fprintf(stderr, "%s: failed to access data device for %s\n", + progname, fname); + exit(1); + } + if (lflag && !isint && !xi.logdev) { + fprintf(stderr, "%s: failed to access external log for %s\n", + progname, fname); + exit(1); + } + if (rflag && !xi.rtdev) { + fprintf(stderr, "%s: failed to access realtime device for %s\n", + progname, fname); + exit(1); + } + + report_info(geo, fname, unwritten, dirversion, isint); + + ddsize = xi.dsize; + dlsize = ( xi.logBBsize? xi.logBBsize : + geo.logblocks * (geo.blocksize / BBSIZE) ); + drsize = xi.rtsize; + + error = 0; + if (dflag | aflag) { + xfs_growfs_data_t in; + + if (!mflag) + maxpct = geo.imaxpct; + if (!dsize) + dsize = ddsize / (geo.blocksize / BBSIZE); + else if (dsize > ddsize / (geo.blocksize / BBSIZE)) { + fprintf(stderr, + "data size %llu too large, maximum is %lld\n", + (__u64)dsize, ddsize/(geo.blocksize/BBSIZE)); + error = 1; + } + if (!error && dsize < geo.datablocks) { + fprintf(stderr, "data size %llu too small," + " old size is %lld\n", + (__u64)dsize, geo.datablocks); + error = 1; + } else if (!error && + dsize == geo.datablocks && maxpct == geo.imaxpct) { + if (dflag) + fprintf(stderr, + "data size unchanged, skipping\n"); + if (mflag) + fprintf(stderr, + "inode max pct unchanged, skipping\n"); + } else if (!error && !nflag) { + in.newblocks = (__u64)dsize; + in.imaxpct = (__u32)maxpct; + if (ioctl(ffd, XFS_IOC_FSGROWFSDATA, &in) < 0) { + if (errno == EWOULDBLOCK) + fprintf(stderr, + "%s: growfs operation in progress already\n", + progname); + else + fprintf(stderr, + "%s: ioctl failed - XFS_IOC_FSGROWFSDATA: %s\n", + progname, strerror(errno)); + error = 1; + } + } + } + + if (!error && (rflag | aflag)) { + xfs_growfs_rt_t in; + + if (!esize) + esize = (__u32)geo.rtextsize; + if (!rsize) + rsize = drsize / (geo.blocksize / BBSIZE); + else if (rsize > drsize / (geo.blocksize / BBSIZE)) { + fprintf(stderr, + "realtime size %lld too large, maximum is %lld\n", + rsize, drsize / (geo.blocksize / BBSIZE)); + error = 1; + } + if (!error && rsize < geo.rtblocks) { + fprintf(stderr, + "realtime size %lld too small, old size is %lld\n", + rsize, geo.rtblocks); + error = 1; + } else if (!error && rsize == geo.rtblocks) { + if (rflag) + fprintf(stderr, + "realtime size unchanged, skipping\n"); + } else if (!error && !nflag) { + in.newblocks = (__u64)rsize; + in.extsize = (__u32)esize; + if (ioctl(ffd, XFS_IOC_FSGROWFSRT, &in) < 0) { + if (errno == EWOULDBLOCK) + fprintf(stderr, + "%s: growfs operation in progress already\n", + progname); + else if (errno == ENOSYS) + fprintf(stderr, + "%s: realtime growth not implemented\n", + progname); + else + fprintf(stderr, + "%s: ioctl failed - XFS_IOC_FSGROWFSRT: %s\n", + progname, strerror(errno)); + error = 1; + } + } + } + + if (!error && (lflag | aflag)) { + xfs_growfs_log_t in; + + if (!lsize) + lsize = dlsize / (geo.blocksize / BBSIZE); + if (iflag) + in.isint = 1; + else if (xflag) + in.isint = 0; + else + in.isint = xi.logBBsize == 0; + if (lsize == geo.logblocks && (in.isint == isint)) { + if (lflag) + fprintf(stderr, + "log size unchanged, skipping\n"); + } else if (!nflag) { + in.newblocks = (__u32)lsize; + if (ioctl(ffd, XFS_IOC_FSGROWFSLOG, &in) < 0) { + if (errno == EWOULDBLOCK) + fprintf(stderr, + "%s: growfs operation in progress already\n", + progname); + else if (errno == ENOSYS) + fprintf(stderr, + "%s: log growth not supported yet\n", progname); + else + fprintf(stderr, + "%s: ioctl failed - XFS_IOC_FSGROWFSLOG: %s\n", + progname, strerror(errno)); + error = 1; + } + } + } + + if (ioctl(ffd, XFS_IOC_FSGEOMETRY, &ngeo) < 0) { + fprintf(stderr, "%s: ioctl failed - XFS_IOC_FSGEOMETRY: %s\n", + progname, strerror(errno)); + exit(1); + } + if (geo.datablocks != ngeo.datablocks) + printf("data blocks changed from %lld to %lld\n", + geo.datablocks, ngeo.datablocks); + if (geo.imaxpct != ngeo.imaxpct) + printf("inode max percent changed from %d to %d\n", + geo.imaxpct, ngeo.imaxpct); + if (geo.logblocks != ngeo.logblocks) + printf("log blocks changed from %d to %d\n", + geo.logblocks, ngeo.logblocks); + if ((geo.logstart == 0) != (ngeo.logstart == 0)) + printf("log changed from %s to %s\n", + geo.logstart ? "internal" : "external", + ngeo.logstart ? "internal" : "external"); + if (geo.rtblocks != ngeo.rtblocks) + printf("realtime blocks changed from %lld to %lld\n", + geo.rtblocks, ngeo.rtblocks); + if (geo.rtextsize != ngeo.rtextsize) + printf("realtime extent size changed from %d to %d\n", + geo.rtextsize, ngeo.rtextsize); + exit(0); +} diff --git a/growfs/xfs_info.sh b/growfs/xfs_info.sh new file mode 100755 index 000000000..2b1316f81 --- /dev/null +++ b/growfs/xfs_info.sh @@ -0,0 +1,56 @@ +#!/bin/sh -f +# +# Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write the Free Software Foundation, Inc., 59 +# Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, +# Mountain View, CA 94043, or: +# +# http://www.sgi.com +# +# For further information regarding this notice, see: +# +# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ +# + +OPTS="" +USAGE="Usage: xfs_info [-t mtab] mountpoint" + +while getopts "t:" c +do + case $c in + t) OPTS="-t $OPTARG" ;; + *) echo $USAGE 1>&2 + exit 2 + ;; + esac +done +set -- extra $@ +shift $OPTIND +case $# in + 1) xfs_growfs -p xfs_info -n $OPTS $1 + status=$? + ;; + *) echo $USAGE 1>&2 + exit 2 + ;; +esac +exit $status diff --git a/include/Makefile b/include/Makefile new file mode 100644 index 000000000..60d0a28bd --- /dev/null +++ b/include/Makefile @@ -0,0 +1,52 @@ +# +# Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write the Free Software Foundation, Inc., 59 +# Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, +# Mountain View, CA 94043, or: +# +# http://www.sgi.com +# +# For further information regarding this notice, see: +# +# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ +# + +TOPDIR = .. +include $(TOPDIR)/include/builddefs + +LSRCFILES = libxfs.h acl.h arch.h attributes.h handle.h jdm.h \ + platform_defs.h.in builddefs.in buildrules \ + xfs_ag.h xfs_alloc.h xfs_alloc_btree.h xfs_arch.h xfs_attr_leaf.h \ + xfs_attr_sf.h xfs_bit.h xfs_bmap.h xfs_bmap_btree.h xfs_btree.h \ + xfs_buf_item.h xfs_cred.h xfs_da_btree.h xfs_dfrag.h xfs_dinode.h \ + xfs_dir.h xfs_dir2.h xfs_dir2_block.h xfs_dir2_data.h xfs_dir2_leaf.h \ + xfs_dir2_node.h xfs_dir2_sf.h xfs_dir_leaf.h xfs_dir_sf.h xfs_dqblk.h \ + xfs_dquot_item.h xfs_extfree_item.h xfs_fs.h xfs_ialloc.h \ + xfs_ialloc_btree.h xfs_imap.h xfs_inode.h xfs_inode_item.h xfs_inum.h \ + xfs_log.h xfs_log_priv.h xfs_log_recover.h xfs_mount.h xfs_quota.h \ + xfs_rtalloc.h xfs_sb.h xfs_trans.h xfs_trans_space.h xfs_types.h + +default : + +include $(BUILDRULES) + +install : default diff --git a/include/arch.h b/include/arch.h new file mode 100644 index 000000000..12ce1c5b1 --- /dev/null +++ b/include/arch.h @@ -0,0 +1,236 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_SUPPORT_ARCH_H__ +#define __XFS_SUPPORT_ARCH_H__ + +#ifdef __KERNEL__ + +#include + +#ifdef __LITTLE_ENDIAN +# define __BYTE_ORDER __LITTLE_ENDIAN +#endif +#ifdef __BIG_ENDIAN +# define __BYTE_ORDER __BIG_ENDIAN +#endif + +#else + +#include + +#endif /* __KERNEL__ */ + +/* do we need conversion? */ + +#define ARCH_NOCONVERT 1 +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define ARCH_CONVERT 0 +#else +#define ARCH_CONVERT ARCH_NOCONVERT +#endif + +/* generic swapping macros */ + +#define INT_SWAP16(A) ((typeof(A))(__swab16((__u16)A))) +#define INT_SWAP32(A) ((typeof(A))(__swab32((__u32)A))) +#define INT_SWAP64(A) ((typeof(A))(__swab64((__u64)A))) + +#define INT_SWAP(type, var) \ + ((sizeof(type) == 8) ? INT_SWAP64(var) : \ + ((sizeof(type) == 4) ? INT_SWAP32(var) : \ + ((sizeof(type) == 2) ? INT_SWAP16(var) : \ + (var)))) + + +#define INT_SWAP_UNALIGNED_32(from,to) \ + { \ + ((__u8*)(to))[0] = ((__u8*)(from))[3]; \ + ((__u8*)(to))[1] = ((__u8*)(from))[2]; \ + ((__u8*)(to))[2] = ((__u8*)(from))[1]; \ + ((__u8*)(to))[3] = ((__u8*)(from))[0]; \ + } + +#define INT_SWAP_UNALIGNED_64(from,to) \ + { \ + INT_SWAP_UNALIGNED_32( ((__u8*)(from)) + 4, ((__u8*)(to))); \ + INT_SWAP_UNALIGNED_32( ((__u8*)(from)), ((__u8*)(to)) + 4); \ + } + +/* + * get and set integers from potentially unaligned locations + */ + +#define INT_GET_UNALIGNED_16_LE(pointer) \ + ((__u16)((((__u8*)(pointer))[0] ) | (((__u8*)(pointer))[1] << 8 ))) +#define INT_GET_UNALIGNED_16_BE(pointer) \ + ((__u16)((((__u8*)(pointer))[0] << 8) | (((__u8*)(pointer))[1]))) +#define INT_SET_UNALIGNED_16_LE(pointer,value) \ + { \ + ((__u8*)(pointer))[0] = (((value) ) & 0xff); \ + ((__u8*)(pointer))[1] = (((value) >> 8) & 0xff); \ + } +#define INT_SET_UNALIGNED_16_BE(pointer,value) \ + { \ + ((__u8*)(pointer))[0] = (((value) >> 8) & 0xff); \ + ((__u8*)(pointer))[1] = (((value) ) & 0xff); \ + } + +#define INT_GET_UNALIGNED_32_LE(pointer) \ + ((__u32)((((__u8*)(pointer))[0] ) | (((__u8*)(pointer))[1] << 8 ) \ + |(((__u8*)(pointer))[2] << 16) | (((__u8*)(pointer))[3] << 24))) +#define INT_GET_UNALIGNED_32_BE(pointer) \ + ((__u32)((((__u8*)(pointer))[0] << 24) | (((__u8*)(pointer))[1] << 16) \ + |(((__u8*)(pointer))[2] << 8) | (((__u8*)(pointer))[3] ))) + +#define INT_GET_UNALIGNED_64_LE(pointer) \ + (((__u64)(INT_GET_UNALIGNED_32_LE(((__u8*)(pointer))+4)) << 32 ) \ + |((__u64)(INT_GET_UNALIGNED_32_LE(((__u8*)(pointer)) )) )) +#define INT_GET_UNALIGNED_64_BE(pointer) \ + (((__u64)(INT_GET_UNALIGNED_32_BE(((__u8*)(pointer)) )) << 32 ) \ + |((__u64)(INT_GET_UNALIGNED_32_BE(((__u8*)(pointer))+4)) )) + +/* + * now pick the right ones for our MACHINE ARCHITECTURE + */ + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define INT_GET_UNALIGNED_16(pointer) INT_GET_UNALIGNED_16_LE(pointer) +#define INT_SET_UNALIGNED_16(pointer,value) INT_SET_UNALIGNED_16_LE(pointer,value) +#define INT_GET_UNALIGNED_32(pointer) INT_GET_UNALIGNED_32_LE(pointer) +#define INT_GET_UNALIGNED_64(pointer) INT_GET_UNALIGNED_64_LE(pointer) +#else +#define INT_GET_UNALIGNED_16(pointer) INT_GET_UNALIGNED_16_BE(pointer) +#define INT_SET_UNALIGNED_16(pointer,value) INT_SET_UNALIGNED_16_BE(pointer,value) +#define INT_GET_UNALIGNED_32(pointer) INT_GET_UNALIGNED_32_BE(pointer) +#define INT_GET_UNALIGNED_64(pointer) INT_GET_UNALIGNED_64_BE(pointer) +#endif + +/* define generic INT_ macros */ + +#define INT_GET(reference,arch) \ + (((arch) == ARCH_NOCONVERT) \ + ? \ + (reference) \ + : \ + INT_SWAP((reference),(reference)) \ + ) + +/* does not return a value */ +#define INT_SET(reference,arch,valueref) \ + (void)( \ + ((reference) = (valueref)), \ + ( \ + ((arch) != ARCH_NOCONVERT) ? \ + (reference) = INT_SWAP((reference),(reference)) \ + : 0 \ + ) \ + ) + +/* does not return a value */ +#define INT_MOD_EXPR(reference,arch,code) \ + (void)(((arch) == ARCH_NOCONVERT) \ + ? \ + ((reference) code) \ + : \ + ( \ + (reference) = INT_GET((reference),arch) , \ + ((reference) code), \ + INT_SET(reference, arch, reference) \ + ) \ + ) + +/* does not return a value */ +#define INT_MOD(reference,arch,delta) \ + (void)( \ + INT_MOD_EXPR(reference,arch,+=(delta)) \ + ) + +/* + * INT_COPY - copy a value between two locations with the + * _same architecture_ but _potentially different sizes_ + * + * if the types of the two parameters are equal or they are + * in native architecture, a simple copy is done + * + * otherwise, architecture conversions are done + * + */ + +/* does not return a value */ +#define INT_COPY(dst,src,arch) \ + (void)( \ + ((sizeof(dst) == sizeof(src)) || ((arch) == ARCH_NOCONVERT)) \ + ? \ + ((dst) = (src)) \ + : \ + INT_SET(dst, arch, INT_GET(src, arch)) \ + ) + +/* + * INT_XLATE - copy a value in either direction between two locations + * with different architectures + * + * dir < 0 - copy from memory to buffer (native to arch) + * dir > 0 - copy from buffer to memory (arch to native) + */ + +/* does not return a value */ +#define INT_XLATE(buf,mem,dir,arch) {\ + ASSERT(dir); \ + if (dir>0) { \ + (mem)=INT_GET(buf, arch); \ + } else { \ + INT_SET(buf, arch, mem); \ + } \ +} + +#define INT_ISZERO(reference,arch) \ + ((reference) == 0) + +#define INT_ZERO(reference,arch) \ + ((reference) = 0) + +#define INT_GET_UNALIGNED_16_ARCH(pointer,arch) \ + ( ((arch) == ARCH_NOCONVERT) \ + ? \ + (INT_GET_UNALIGNED_16(pointer)) \ + : \ + (INT_GET_UNALIGNED_16_BE(pointer)) \ + ) +#define INT_SET_UNALIGNED_16_ARCH(pointer,value,arch) \ + if ((arch) == ARCH_NOCONVERT) { \ + INT_SET_UNALIGNED_16(pointer,value); \ + } else { \ + INT_SET_UNALIGNED_16_BE(pointer,value); \ + } + +#endif /* __XFS_SUPPORT_ARCH_H__ */ diff --git a/include/builddefs.in b/include/builddefs.in new file mode 100644 index 000000000..0f10b8aaf --- /dev/null +++ b/include/builddefs.in @@ -0,0 +1,173 @@ +# +# Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write the Free Software Foundation, Inc., 59 +# Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, +# Mountain View, CA 94043, or: +# +# http://www.sgi.com +# +# For further information regarding this notice, see: +# +# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ +# +# @configure_input@ +# + +ifndef _BUILDDEFS_INCLUDED_ +_BUILDDEFS_INCLUDED_ = 1 + +DEBUG = @debug_build@ +OPTIMIZER = @opt_build@ +MALLOCLIB = @malloc_lib@ + +LIBXFS = $(TOPDIR)/libxfs/libxfs.a +LIBATTR = $(TOPDIR)/libattr/libattr.a +LIBHANDLE = $(TOPDIR)/handle/libhandle.a +LIBUUID = /usr/lib/libuuid.a +LIBLVM = @liblvm@ + +BUILDRULES = $(TOPDIR)/include/buildrules + +# General package information +TARGET_OS = @host_platform@ +PACKAGE_NAME = @package_name@ +PACKAGE_RELEASE = @package_release@ +PACKAGE_VERSION = @package_version@ +PACKAGE_DISTRIBUTION = @package_distribution@ +PACKAGE_BUILDER = @package_builder@ +XFS_CMDS_SBIN_DIR = @xfs_cmds_sbin_dir@ +XFS_CMDS_BIN_DIR = @xfs_cmds_bin_dir@ +XFS_CMDS_LIB_DIR = @xfs_cmds_lib_dir@ +XFS_CMDS_SHARE_DIR = @xfs_cmds_share_dir@ +XFS_CMDS_INC_DIR = @xfs_cmds_inc_dir@ +XFS_CMDS_MAN_DIR = @xfs_cmds_man_dir@ +XFS_CMDS_TMP_DIR = @xfs_cmds_tmp_dir@ +XFS_CMDS_DOC_DIR = @xfs_cmds_doc_dir@ + +# LCFLAGS, LLDFLAGS, LLDLIBS, LSRCFILES and LDIRT may be specified in +# user Makefiles. Note: LSRCFILES is anything other than Makefile, $(CFILES) +# $(CXXFILES), or $(HFILES) and is used to construct the manifest list +# during the "dist" phase (packaging). + +CFLAGS += $(OPTIMIZER) $(DEBUG) -funsigned-char -Wall -Wno-parentheses \ + $(LCFLAGS) -I$(TOPDIR)/include '-DVERSION="$(PACKAGE_VERSION)"' \ + -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE \ + -DXFS_BIG_FILES=1 -DXFS_BIG_FILESYSTEMS=1 -DHAVE_LIBLVM=@have_liblvm@ + +LDFLAGS = $(LLDFLAGS) +LDLIBS = $(LLDLIBS) $(MALLOCLIB) + +MAKEOPTS = --no-print-directory +SRCFILES = Makefile $(HFILES) $(CFILES) $(LSRCFILES) $(LFILES) $(YFILES) +DIRT = $(LDIRT) dep dep.bak $(OBJECTS) $(CMDTARGET) $(LIBTARGET) \ + $(STATICLIBTARGET) *.[1-9].gz + +OBJECTS = $(ASFILES:.s=.o) \ + $(CFILES:.c=.o) \ + $(LFILES:.l=.o) \ + $(YFILES:%.y=%.tab.o) + +MAKE = @make@ +CC = @cc@ +LD = @ld@ +AWK = @awk@ +SED = @sed@ +INSTALL = $(TOPDIR)/install-sh -o root -g root +ECHO = @echo@ +LN_S = @LN_S@ + +CCF = $(CC) $(CFLAGS) +MAKEF = $(MAKE) $(MAKEOPTS) +CXXF = $(CXX) $(CXXFLAGS) +LDF = $(LD) $(LDFLAGS) +MAKEDEPEND = @makedepend@ + +ZIP = @zip@ +TAR = @tar@ +RPM = @rpm@ +RPM_VERSION = @rpm_version@ + +HAVE_ZIPPED_MANPAGES = @have_zipped_manpages@ + +SHELL = /bin/sh +IMAGES_DIR = $(TOPDIR)/all-images +DIST_DIR = $(TOPDIR)/dist + +SUBDIRS_MAKERULE = \ + @for d in $(SUBDIRS) ""; do \ + if test -d "$$d" -a ! -z "$$d"; then \ + $(ECHO) === $$d ===; \ + $(MAKEF) -C $$d $@ || exit $$?; \ + fi; \ + done + +MAN_MAKERULE = \ + @for f in *.[12345678] ""; do \ + if test ! -z "$$f"; then \ + $(ZIP) --best -c < $$f > $$f.gz; \ + fi; \ + done + +INSTALL_MAN = \ + @for d in $(MAN_PAGES); do \ + first=true; \ + for m in `$(AWK) '/^\.SH NAME/ {ok=1; next} ok {print; exit}' $$d \ + | sed -e 's/,/ /g' -e 's/\\-.*//' -e 's/\\\f[0-9]//g' -e 's/ / /g;q'`; \ + do \ + [ -z "$$m" -o "$$m" = "\\" ] && continue; \ + t=$(MAN_DEST)/$$m.$(MAN_SECTION); \ + if $$first; then \ + if $(HAVE_ZIPPED_MANPAGES); then \ + $(ZIP) --best -c $$d > $$d.gz; _sfx=.gz; \ + fi; \ + u=$$m.$(MAN_SECTION)$$_sfx; \ + echo $(INSTALL) -m 644 $${d}$$_sfx $${t}$$_sfx; \ + $(INSTALL) -m 644 $${d}$$_sfx $${t}$$_sfx; \ + else \ + echo $(INSTALL) -S $$u $${t}$$_sfx; \ + $(INSTALL) -S $$u $${t}$$_sfx; \ + fi; \ + first=false; \ + done; \ + done + +DIST_MAKERULE = \ + $(MAKEF) -C build dist + +SOURCE_MAKERULE = \ + @test -z "$$DIR" && DIR="."; \ + for f in $(SRCFILES) ""; do \ + if test ! -z "$$f"; then $(ECHO) $$DIR/$$f; fi;\ + done; \ + for d in `echo $(SUBDIRS)` ; do \ + if test -d "$$d" -a ! -z "$$d"; then \ + $(MAKEF) DIR=$$DIR/$$d -C $$d $@ || exit $$?; \ + fi; \ + done + +endif + +# +# For targets that should always be rebuilt, +# define a target that is never up-to-date. +# Targets needing this should depend on $(_FORCE) +_FORCE = __force_build diff --git a/include/buildrules b/include/buildrules new file mode 100644 index 000000000..af2a7caca --- /dev/null +++ b/include/buildrules @@ -0,0 +1,76 @@ +# +# Copyright (C) 1999 Silicon Graphics, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of version 2 of the GNU General Public License as published +# by the Free Software Fondation. +# +# This program is distributed in the hope that it would be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. Further, any license provided herein, +# whether implied or otherwise, is limited to this program in accordance with +# the express provisions of the GNU General Public License. Patent licenses, +# if any, provided herein do not apply to combinations of this program with +# other product or programs, or any other product whatsoever. This program is +# distributed without any warranty that the program is delivered free of the +# rightful claim of any third person by way of infringement or the like. See +# the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# this program; if not, write the Free Software Foundation, Inc., 59 Temple +# Place - Suite 330, Boston MA 02111-1307, USA. +# Common build rules for gmake +# +ifndef _BUILDRULES_INCLUDED_ +_BUILDRULES_INCLUDED_ = 1 + +include $(TOPDIR)/include/builddefs + +# +# Standard targets +# +ifdef CMDTARGET +$(CMDTARGET) : $(SUBDIRS) $(OBJECTS) $(CMDDEPS) + $(CCF) -o $(CMDTARGET) $(LDFLAGS) $(OBJECTS) $(LDLIBS) +endif + +ifdef LIBTARGET +$(LIBTARGET) : $(SUBDIRS) $(OBJECTS) + $(CC) $(LDFLAGS) -shared -Wl,-soname,$(LIBTARGET) -o $(LIBTARGET) \ + $(OBJECTS) $(LDLIBS) $(LIB_FOR_DLOPEN) $(LIB_FOR_BASENAME) +endif + +ifdef STATICLIBTARGET +$(STATICLIBTARGET) : $(SUBDIRS) $(OBJECTS) + $(AR) crf $(STATICLIBTARGET) $? +endif + +clean clobber : $(SUBDIRS) + rm -f $(DIRT) + $(SUBDIRS_MAKERULE) + +# Never blow away subdirs +ifdef SUBDIRS +.PRECIOUS: $(SUBDIRS) +$(SUBDIRS): + $(SUBDIRS_MAKERULE) +endif + +source : + $(SOURCE_MAKERULE) + +endif + +$(_FORCE): + +.PHONY : depend + +depend : $(CFILES) $(HFILES) + $(SUBDIRS_MAKERULE) + touch dep + $(MAKEDEPEND) -fdep -- $(CFLAGS) -- $(CFILES) + +# Include dep, but only if it exists +ifeq ($(shell test -f dep && echo dep), dep) +include dep +endif diff --git a/include/handle.h b/include/handle.h new file mode 100644 index 000000000..0400a7046 --- /dev/null +++ b/include/handle.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __HANDLE_H__ +#define __HANDLE_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +extern int path_to_handle (char *__path, void **__hanp, size_t *__hlen); +extern int path_to_fshandle (char *__path, void **__hanp, size_t *__hlen); +extern int fd_to_handle (int __fd, void **__hanp, size_t *__hlen); +extern int handle_to_fshandle (void *__hanp, size_t __hlen, void **__fshanp, + size_t *__fshlen); +extern void free_handle (void *__hanp, size_t __hlen); +extern int open_by_handle (void *__hanp, size_t __hlen, int __rw); +extern int readlink_by_handle (void *__hanp, size_t __hlen, void *__buf, + size_t __bs); + +#ifdef __cplusplus +} +#endif + +#endif /* __HANDLE_H__ */ diff --git a/include/jdm.h b/include/jdm.h new file mode 100644 index 000000000..3d2012018 --- /dev/null +++ b/include/jdm.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __JDM_H__ +#define __JDM_H__ + +typedef int intgen_t; +typedef void jdm_fshandle_t; + +struct xfs_bstat; +extern jdm_fshandle_t *jdm_getfshandle (char *mntpnt); +extern intgen_t jdm_open (jdm_fshandle_t *fsh, struct xfs_bstat *sp, + intgen_t oflags); +extern intgen_t jdm_readlink (jdm_fshandle_t *fsh, struct xfs_bstat *sp, + char *bufp, size_t bufsz); + +#ifdef EXTATTR + +struct attrlist_cursor; +extern intgen_t jdm_attr_multi (jdm_fshandle_t *fsh, struct xfs_bstat *sp, + char *bufp, int rtrvcnt, int flags); +extern intgen_t jdm_attr_list (jdm_fshandle_t *fsh, struct xfs_bstat *sp, + char *bufp, size_t bufsz, int flags, + struct attrlist_cursor *cursor); +#endif /* EXTATTR */ + +/* macro for determining the size of a structure member */ +#define sizeofmember( t, m ) sizeof( ( ( t * )0 )->m ) + +/* macro for calculating the offset of a structure member */ +#define offsetofmember( t, m ) ( ( size_t )( char * )&( ( ( t * )0 )->m ) ) + +#endif /* __JDM_H__ */ diff --git a/include/libxfs.h b/include/libxfs.h new file mode 100644 index 000000000..78e597846 --- /dev/null +++ b/include/libxfs.h @@ -0,0 +1,474 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __LIBXFS_H__ +#define __LIBXFS_H__ + +#include "platform_defs.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Argument structure for libxfs_init(). + */ +typedef struct { + /* input parameters */ + char *volname; /* pathname of volume */ + char *dname; /* pathname of data "subvolume" */ + char *logname; /* pathname of log "subvolume" */ + char *rtname; /* pathname of realtime "subvolume" */ + int isreadonly; /* filesystem is only read in applic */ + int disfile; /* data "subvolume" is a regular file */ int dcreat; /* try to create data subvolume */ + int lisfile; /* log "subvolume" is a regular file */ + int lcreat; /* try to create log subvolume */ + int risfile; /* realtime "subvolume" is a reg file */ int rcreat; /* try to create realtime subvolume */ + char *notvolmsg; /* format string for not XLV message */ + int notvolok; /* set if not XLV => try data */ + /* output results */ + dev_t ddev; /* device for data subvolume */ + dev_t logdev; /* device for log subvolume */ + dev_t rtdev; /* device for realtime subvolume */ + long long dsize; /* size of data subvolume (BBs) */ + long long logBBsize; /* size of log subvolume (BBs) */ + /* (blocks allocated for use as + * log is stored in mount structure) */ + long long logBBstart; /* start block of log subvolume (BBs) */ long long rtsize; /* size of realtime subvolume (BBs) */ + int dfd; /* data subvolume file descriptor */ + int logfd; /* log subvolume file descriptor */ + int rtfd; /* realtime subvolume file descriptor */ +} libxfs_init_t; + +#define LIBXFS_ISREADONLY 0x0069 /* disallow all mounted filesystems */ +#define LIBXFS_ISINACTIVE 0x6900 /* allow mounted only if mounted ro */ + +extern char *progname; +extern int libxfs_init (libxfs_init_t *); +extern int libxfs_device_to_fd (dev_t); +extern dev_t libxfs_device_open (char *, int, int); +extern void libxfs_device_zero (dev_t, xfs_daddr_t, uint); +extern void libxfs_device_close (dev_t); + +/* check or write log footer: specify device, log size in blocks & uuid */ +extern int libxfs_log_clear (dev_t, xfs_daddr_t, uint, uuid_t *, int); + +/* + * Define a user-level mount structure with all we need + * in order to make use of the numerous XFS_* macros. + */ +struct xfs_inode; +typedef struct xfs_mount { + xfs_sb_t m_sb; /* copy of fs superblock */ + int m_bsize; /* fs logical block size */ + xfs_agnumber_t m_agfrotor; /* last ag where space found */ + xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */ + uint m_rsumlevels; /* rt summary levels */ + uint m_rsumsize; /* size of rt summary, bytes */ + struct xfs_inode *m_rbmip; /* pointer to bitmap inode */ + struct xfs_inode *m_rsumip; /* pointer to summary inode */ + struct xfs_inode *m_rootip; /* pointer to root directory */ + dev_t m_dev; + dev_t m_logdev; + dev_t m_rtdev; + __uint8_t m_dircook_elog; /* log d-cookie entry bits */ + __uint8_t m_blkbit_log; /* blocklog + NBBY */ + __uint8_t m_blkbb_log; /* blocklog - BBSHIFT */ + __uint8_t m_agno_log; /* log #ag's */ + __uint8_t m_agino_log; /* #bits for agino in inum */ + __uint16_t m_inode_cluster_size;/* min inode buf size */ + uint m_blockmask; /* sb_blocksize-1 */ + uint m_blockwsize; /* sb_blocksize in words */ + uint m_blockwmask; /* blockwsize-1 */ + uint m_alloc_mxr[2]; /* XFS_ALLOC_BLOCK_MAXRECS */ + uint m_alloc_mnr[2]; /* XFS_ALLOC_BLOCK_MINRECS */ + uint m_bmap_dmxr[2]; /* XFS_BMAP_BLOCK_DMAXRECS */ + uint m_bmap_dmnr[2]; /* XFS_BMAP_BLOCK_DMINRECS */ + uint m_inobt_mxr[2]; /* XFS_INOBT_BLOCK_MAXRECS */ + uint m_inobt_mnr[2]; /* XFS_INOBT_BLOCK_MINRECS */ + uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */ + uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */ + uint m_in_maxlevels; /* XFS_IN_MAXLEVELS */ + xfs_perag_t *m_perag; /* per-ag accounting info */ + uint m_flags; /* global mount flags */ + uint m_qflags; /* quota status flags */ + uint m_attroffset; /* inode attribute offset */ + int m_da_node_ents; /* how many entries in danode */ + int m_ialloc_inos; /* inodes in inode allocation */ + int m_ialloc_blks; /* blocks in inode allocation */ + int m_litino; /* size of inode union area */ + int m_inoalign_mask;/* mask sb_inoalignmt if used */ + xfs_trans_reservations_t m_reservations;/* precomputed res values */ + __uint64_t m_maxicount; /* maximum inode count */ + int m_dalign; /* stripe unit */ + int m_swidth; /* stripe width */ + int m_sinoalign; /* stripe unit inode alignmnt */ + int m_dir_magicpct; /* 37% of the dir blocksize */ + __uint8_t m_dirversion; /* 1 or 2 */ + int m_dirblksize; /* directory block sz--bytes */ + int m_dirblkfsbs; /* directory block sz--fsbs */ + xfs_dablk_t m_dirdatablk; /* blockno of dir data v2 */ + xfs_dablk_t m_dirleafblk; /* blockno of dir non-data v2 */ + xfs_dablk_t m_dirfreeblk; /* blockno of dirfreeindex v2 */ +} xfs_mount_t; + + +extern xfs_mount_t *libxfs_mount (xfs_mount_t *, xfs_sb_t *, + dev_t, dev_t, dev_t, int); +extern void libxfs_mount_common (xfs_mount_t *, xfs_sb_t *); +extern void libxfs_umount (xfs_mount_t *); +extern int libxfs_rtmount_init (xfs_mount_t *); +extern void libxfs_alloc_compute_maxlevels (xfs_mount_t *); +extern void libxfs_bmap_compute_maxlevels (xfs_mount_t *, int); +extern void libxfs_ialloc_compute_maxlevels (xfs_mount_t *); +extern void libxfs_trans_init (xfs_mount_t *); + + +/* + * Simple I/O interface + */ +typedef struct xfs_buf { + xfs_daddr_t b_blkno; + unsigned b_bcount; + dev_t b_dev; + void *b_fsprivate; + void *b_fsprivate2; + void *b_fsprivate3; + char *b_addr; + /* b_addr must be the last field */ +} xfs_buf_t; +#define XFS_BUF_PTR(bp) ((bp)->b_addr) +#define xfs_buf_offset(bp, offset) (XFS_BUF_PTR(bp) + (offset)) +#define XFS_BUF_ADDR(bp) ((bp)->b_blkno) +#define XFS_BUF_COUNT(bp) ((bp)->b_bcount) +#define XFS_BUF_TARGET(bp) ((bp)->b_dev) +#define XFS_BUF_SET_PTR(bp,p,cnt) ((bp)->b_addr = (char *)(p)); \ + XFS_BUF_SETCOUNT(bp,cnt) +#define XFS_BUF_SET_ADDR(bp,blk) ((bp)->b_blkno = (blk)) +#define XFS_BUF_SETCOUNT(bp,cnt) ((bp)->b_bcount = (cnt)) + +#define XFS_BUF_FSPRIVATE(bp,type) ((type)(bp)->b_fsprivate) +#define XFS_BUF_SET_FSPRIVATE(bp,val) (bp)->b_fsprivate = (void *)(val) +#define XFS_BUF_FSPRIVATE2(bp,type) ((type)(bp)->b_fsprivate2) +#define XFS_BUF_SET_FSPRIVATE2(bp,val) (bp)->b_fsprivate2 = (void *)(val) +#define XFS_BUF_FSPRIVATE3(bp,type) ((type)(bp)->b_fsprivate3) +#define XFS_BUF_SET_FSPRIVATE3(bp,val) (bp)->b_fsprivate3 = (void *)(val) + +extern xfs_buf_t *libxfs_getbuf (dev_t, xfs_daddr_t, int); +extern xfs_buf_t *libxfs_readbuf (dev_t, xfs_daddr_t, int, int); +extern xfs_buf_t *libxfs_getsb (xfs_mount_t *, int); +extern int libxfs_readbufr (dev_t, xfs_daddr_t, xfs_buf_t *, int, int); +extern int libxfs_writebuf (xfs_buf_t *, int); +extern int libxfs_writebuf_int (xfs_buf_t *, int); +extern void libxfs_putbuf (xfs_buf_t *); + + +/* + * Transaction interface + */ + +typedef struct xfs_log_item { + struct xfs_log_item_desc *li_desc; /* ptr to current desc*/ + struct xfs_mount *li_mountp; /* ptr to fs mount */ + uint li_type; /* item type */ +} xfs_log_item_t; + +typedef struct xfs_inode_log_item { + xfs_log_item_t ili_item; /* common portion */ + struct xfs_inode *ili_inode; /* inode pointer */ + unsigned short ili_flags; /* misc flags */ + unsigned int ili_last_fields; /* fields when flushed*/ + xfs_inode_log_format_t ili_format; /* logged structure */ +} xfs_inode_log_item_t; + +typedef struct xfs_buf_log_item { + xfs_log_item_t bli_item; /* common item structure */ + struct xfs_buf *bli_buf; /* real buffer pointer */ + unsigned int bli_flags; /* misc flags */ + unsigned int bli_recur; /* recursion count */ + xfs_buf_log_format_t bli_format; /* in-log header */ +} xfs_buf_log_item_t; + +#include + +typedef struct xfs_trans { + unsigned int t_type; /* transaction type */ + xfs_mount_t *t_mountp; /* ptr to fs mount struct */ + unsigned int t_flags; /* misc flags */ + long t_icount_delta; /* superblock icount change */ + long t_ifree_delta; /* superblock ifree change */ + long t_fdblocks_delta; /* superblock fdblocks chg */ + long t_frextents_delta; /* superblock freextents chg */ + unsigned int t_items_free; /* log item descs free */ + xfs_log_item_chunk_t t_items; /* first log item desc chunk */ +} xfs_trans_t; + +extern xfs_trans_t *libxfs_trans_alloc (xfs_mount_t *, int); +extern xfs_trans_t *libxfs_trans_dup (xfs_trans_t *); +extern int libxfs_trans_reserve (xfs_trans_t *, uint,uint,uint,uint,uint); +extern int libxfs_trans_commit (xfs_trans_t *, uint, xfs_lsn_t *); +extern void libxfs_trans_cancel (xfs_trans_t *, int); +extern void libxfs_mod_sb (xfs_trans_t *, __int64_t); + +extern int libxfs_trans_iget (xfs_mount_t *, xfs_trans_t *, xfs_ino_t, + uint, struct xfs_inode **); +extern void libxfs_trans_iput(xfs_trans_t *, struct xfs_inode *, uint); +extern void libxfs_trans_ijoin (xfs_trans_t *, struct xfs_inode *, uint); +extern void libxfs_trans_ihold (xfs_trans_t *, struct xfs_inode *); +extern void libxfs_trans_log_inode (xfs_trans_t *, struct xfs_inode *, + uint); + +extern void libxfs_trans_brelse (xfs_trans_t *, struct xfs_buf *); +extern void libxfs_trans_binval (xfs_trans_t *, struct xfs_buf *); +extern void libxfs_trans_bjoin (xfs_trans_t *, struct xfs_buf *); +extern void libxfs_trans_bhold (xfs_trans_t *, struct xfs_buf *); +extern void libxfs_trans_log_buf (xfs_trans_t *, struct xfs_buf *, + uint, uint); +extern xfs_buf_t *libxfs_trans_get_buf (xfs_trans_t *, dev_t, + xfs_daddr_t, int, uint); +extern int libxfs_trans_read_buf (xfs_mount_t *, xfs_trans_t *, dev_t, + xfs_daddr_t, int, uint, struct xfs_buf **); + + +/* + * Simple memory interface + */ +typedef struct xfs_zone { + int zone_unitsize; /* Size in bytes of zone unit */ + char *zone_name; /* tag name */ + int allocated; /* debug: How many currently allocated */ +} xfs_zone_t; + +extern xfs_zone_t *libxfs_zone_init (int, char *); +extern void *libxfs_zone_zalloc (xfs_zone_t *); +extern void libxfs_zone_free (xfs_zone_t *, void *); +extern void *libxfs_malloc (size_t); +extern void libxfs_free (void *); +extern void *libxfs_realloc (void *, size_t); + + +/* + * Inode interface + */ +struct xfs_inode_log_item; +typedef struct xfs_inode { + xfs_mount_t *i_mount; /* fs mount struct ptr */ + xfs_ino_t i_ino; /* inode number (agno/agino) */ + xfs_daddr_t i_blkno; /* blkno of inode buffer */ + dev_t i_dev; /* dev for this inode */ + ushort i_len; /* len of inode buffer */ + ushort i_boffset; /* off of inode in buffer */ + xfs_ifork_t *i_afp; /* attribute fork pointer */ + xfs_ifork_t i_df; /* data fork */ + struct xfs_trans *i_transp; /* ptr to owning transaction */ + struct xfs_inode_log_item *i_itemp; /* logging information */ + unsigned int i_delayed_blks; /* count of delay alloc blks */ + xfs_dinode_core_t i_d; /* most of ondisk inode */ +} xfs_inode_t; + +extern int libxfs_inode_alloc (xfs_trans_t **, xfs_inode_t *, mode_t, + ushort, dev_t, cred_t *, xfs_inode_t **); +extern void libxfs_trans_inode_alloc_buf (xfs_trans_t *, xfs_buf_t *); + +extern void libxfs_idata_realloc (xfs_inode_t *, int, int); +extern int libxfs_iread (xfs_mount_t *, xfs_trans_t *, xfs_ino_t, + xfs_inode_t **, xfs_daddr_t); +extern void libxfs_ichgtime (xfs_inode_t *, int); +extern int libxfs_iflush_int (xfs_inode_t *, xfs_buf_t *); +extern int libxfs_itobp (xfs_mount_t *, xfs_trans_t *, xfs_inode_t *, + xfs_dinode_t **, xfs_buf_t **, xfs_daddr_t); +extern int libxfs_iget (xfs_mount_t *, xfs_trans_t *, xfs_ino_t, + uint, xfs_inode_t **, xfs_daddr_t); +extern void libxfs_iput (xfs_inode_t *, uint); + + +/* + * Directory interface + */ +extern void libxfs_dir_mount (xfs_mount_t *); +extern void libxfs_dir2_mount (xfs_mount_t *); +extern int libxfs_dir_init (xfs_trans_t *, xfs_inode_t *, xfs_inode_t *); +extern int libxfs_dir2_init (xfs_trans_t *, xfs_inode_t *, xfs_inode_t *); +extern int libxfs_dir_createname (xfs_trans_t *, xfs_inode_t *, char *, + int, xfs_ino_t, xfs_fsblock_t *, + xfs_bmap_free_t *, xfs_extlen_t); +extern int libxfs_dir2_createname (xfs_trans_t *, xfs_inode_t *, char *, + int, xfs_ino_t, xfs_fsblock_t *, + xfs_bmap_free_t *, xfs_extlen_t); +extern int libxfs_dir_lookup (xfs_trans_t *, xfs_inode_t *, + char *, int, xfs_ino_t *); +extern int libxfs_dir2_lookup (xfs_trans_t *, xfs_inode_t *, + char *, int, xfs_ino_t *); +extern int libxfs_dir_replace (xfs_trans_t *, xfs_inode_t *, + char *, int, xfs_ino_t, xfs_fsblock_t *, + xfs_bmap_free_t *, xfs_extlen_t); +extern int libxfs_dir2_replace (xfs_trans_t *, xfs_inode_t *, + char *, int, xfs_ino_t, xfs_fsblock_t *, + xfs_bmap_free_t *, xfs_extlen_t); +extern int libxfs_dir_removename (xfs_trans_t *, xfs_inode_t *, + char *, int, xfs_ino_t, xfs_fsblock_t *, + xfs_bmap_free_t *, xfs_extlen_t); +extern int libxfs_dir2_removename (xfs_trans_t *, xfs_inode_t *, + char *, int, xfs_ino_t, xfs_fsblock_t *, + xfs_bmap_free_t *, xfs_extlen_t); +extern int libxfs_dir_bogus_removename (xfs_trans_t *, xfs_inode_t *, + char *, xfs_fsblock_t *, xfs_bmap_free_t *, + xfs_extlen_t, xfs_dahash_t, int); +extern int libxfs_dir2_bogus_removename (xfs_trans_t *, xfs_inode_t *, + char *, xfs_fsblock_t *, xfs_bmap_free_t *, + xfs_extlen_t, xfs_dahash_t, int); + + +/* + * Block map interface + */ +extern int libxfs_bmapi (xfs_trans_t *, xfs_inode_t *, xfs_fileoff_t, + xfs_filblks_t, int, xfs_fsblock_t *, + xfs_extlen_t, xfs_bmbt_irec_t *, int *, + xfs_bmap_free_t *); +extern int libxfs_bmap_finish (xfs_trans_t **, xfs_bmap_free_t *, + xfs_fsblock_t, int *); +extern int libxfs_bmap_next_offset (xfs_trans_t *, xfs_inode_t *, + xfs_fileoff_t *, int); +extern int libxfs_bunmapi (xfs_trans_t *, xfs_inode_t *, xfs_fileoff_t, + xfs_filblks_t, int, xfs_extnum_t, + xfs_fsblock_t *, xfs_bmap_free_t *, int *); +extern void libxfs_bmap_del_free (xfs_bmap_free_t *, + xfs_bmap_free_item_t *, xfs_bmap_free_item_t *); + + +/* + * All other routines we want to keep common... + */ + +extern int libxfs_highbit32 (__uint32_t); +extern int libxfs_highbit64 (__uint64_t); +extern uint libxfs_da_log2_roundup (uint); + +extern void libxfs_xlate_sb (void *, xfs_sb_t *, int, xfs_arch_t, + __int64_t); +extern void libxfs_xlate_dinode_core (xfs_caddr_t buf, + xfs_dinode_core_t *, int, xfs_arch_t); + +extern int libxfs_alloc_fix_freelist (xfs_alloc_arg_t *, int); +extern int libxfs_alloc_file_space (xfs_inode_t *, xfs_off_t, + xfs_off_t, int, int); + +extern xfs_dahash_t libxfs_da_hashname (char *, int); +extern int libxfs_attr_leaf_newentsize (xfs_da_args_t *, int, int *); + +extern xfs_filblks_t libxfs_bmbt_get_blockcount (xfs_bmbt_rec_t *); +extern xfs_fileoff_t libxfs_bmbt_get_startoff (xfs_bmbt_rec_t *); +extern void libxfs_bmbt_get_all (xfs_bmbt_rec_t *, xfs_bmbt_irec_t *); + +extern int libxfs_free_extent (xfs_trans_t *, xfs_fsblock_t, xfs_extlen_t); +extern int libxfs_rtfree_extent (xfs_trans_t *, xfs_rtblock_t, + xfs_extlen_t); + +/* Directory/Attribute routines used by xfs_repair */ +extern void libxfs_da_bjoin (xfs_trans_t *, xfs_dabuf_t *); +extern int libxfs_da_shrink_inode (xfs_da_args_t *, xfs_dablk_t, + xfs_dabuf_t *); +extern int libxfs_da_grow_inode (xfs_da_args_t *, xfs_dablk_t *); +extern void libxfs_da_bhold (xfs_trans_t *, xfs_dabuf_t *); +extern void libxfs_da_brelse (xfs_trans_t *, xfs_dabuf_t *); +extern int libxfs_da_read_bufr (xfs_trans_t *, xfs_inode_t *, xfs_dablk_t, + xfs_daddr_t, xfs_dabuf_t **, int); +extern int libxfs_da_read_buf (xfs_trans_t *, xfs_inode_t *, + xfs_dablk_t, xfs_daddr_t, xfs_dabuf_t **, int); +extern int libxfs_da_get_buf (xfs_trans_t *, xfs_inode_t *, + xfs_dablk_t, xfs_daddr_t, xfs_dabuf_t **, int); +extern void libxfs_da_log_buf (xfs_trans_t *, xfs_dabuf_t *, uint, uint); +extern int libxfs_dir2_shrink_inode (xfs_da_args_t *, xfs_dir2_db_t, + xfs_dabuf_t *); +extern int libxfs_dir2_grow_inode (xfs_da_args_t *, int, xfs_dir2_db_t *); +extern int libxfs_dir2_isleaf (xfs_trans_t *, xfs_inode_t *, int *); +extern int libxfs_dir2_isblock (xfs_trans_t *, xfs_inode_t *, int *); +extern void libxfs_dir2_data_use_free (xfs_trans_t *, xfs_dabuf_t *, + xfs_dir2_data_unused_t *, xfs_dir2_data_aoff_t, + xfs_dir2_data_aoff_t, int *, int *); +extern void libxfs_dir2_data_make_free (xfs_trans_t *, xfs_dabuf_t *, + xfs_dir2_data_aoff_t, xfs_dir2_data_aoff_t, + int *, int *); +extern void libxfs_dir2_data_log_entry (xfs_trans_t *, xfs_dabuf_t *, + xfs_dir2_data_entry_t *); +extern void libxfs_dir2_data_log_header (xfs_trans_t *, xfs_dabuf_t *); +extern void libxfs_dir2_data_freescan (xfs_mount_t *, xfs_dir2_data_t *, + int *, char *); +extern void libxfs_dir2_free_log_bests (xfs_trans_t *, xfs_dabuf_t *, + int, int); + +/* Shared utility routines */ +extern unsigned int libxfs_log2_roundup(unsigned int i); + + +/* ick */ +extern __inline__ __const__ __u64 __fswab64 (__u64 x); + +#endif /* __LIBXFS_H__ */ diff --git a/include/platform_defs.h.in b/include/platform_defs.h.in new file mode 100644 index 000000000..9f3437529 --- /dev/null +++ b/include/platform_defs.h.in @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + * + * @configure_input@ + */ +#ifndef __XFS_PLATFORM_DEFS_H__ +#define __XFS_PLATFORM_DEFS_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if (__GLIBC__ <= 2) && (__GLIBC_MINOR__ <= 1) +# define constpp const char * const * +#else +# define constpp char * const * +#endif + +typedef loff_t xfs_off_t; +typedef __uint64_t xfs_ino_t; +typedef __uint32_t xfs_dev_t; +typedef __int64_t xfs_daddr_t; +typedef char* xfs_caddr_t; + +/* long and pointer must be either 32 bit or 64 bit */ +#undef HAVE_64BIT_LONG +#undef HAVE_32BIT_LONG +#undef HAVE_32BIT_PTR +#undef HAVE_64BIT_PTR + +/* Check if __psint_t is set to something meaningful */ +#undef HAVE___PSINT_T +#ifndef HAVE___PSINT_T +# ifdef HAVE_32BIT_PTR +typedef int __psint_t; +# elif defined HAVE_64BIT_PTR +# ifdef HAVE_64BIT_LONG +typedef long __psint_t; +# else +/* This is a very strange architecture, which has 64 bit pointers but + * not 64 bit longs. So, I'd just punt here and assume long long is Ok */ +typedef long long __psint_t; +# endif +# else +# error Unknown pointer size +# endif +#endif + +/* Check if __psunsigned_t is set to something meaningful */ +#undef HAVE___PSUNSIGNED_T +#ifndef HAVE___PSUNSIGNED_T +# ifdef HAVE_32BIT_PTR +typedef unsigned int __psunsigned_t; +# elif defined HAVE_64BIT_PTR +# ifdef HAVE_64BIT_LONG +typedef long __psunsigned_t; +# else +/* This is a very strange architecture, which has 64 bit pointers but + * not 64 bit longs. So, I'd just punt here and assume long long is Ok */ +typedef unsigned long long __psunsigned_t; +# endif +# else +# error Unknown pointer size +# endif +#endif + +#ifdef DEBUG +# define ASSERT assert +#else +# define ASSERT(EX) ((void) 0) +#endif + +#endif /* __XFS_PLATFORM_DEFS_H__ */ diff --git a/include/xfs_ag.h b/include/xfs_ag.h new file mode 100644 index 000000000..86e4095f8 --- /dev/null +++ b/include/xfs_ag.h @@ -0,0 +1,343 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_AG_H__ +#define __XFS_AG_H__ + +/* + * Allocation group header + * This is divided into three structures, placed in sequential 512-byte + * buffers after a copy of the superblock (also in a 512-byte buffer). + */ + +struct xfs_buf; +struct xfs_mount; + +#define XFS_AGF_MAGIC 0x58414746 /* 'XAGF' */ +#define XFS_AGI_MAGIC 0x58414749 /* 'XAGI' */ +#define XFS_AGF_VERSION 1 +#define XFS_AGI_VERSION 1 +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGF_GOOD_VERSION) +int xfs_agf_good_version(unsigned v); +#define XFS_AGF_GOOD_VERSION(v) xfs_agf_good_version(v) +#else +#define XFS_AGF_GOOD_VERSION(v) ((v) == XFS_AGF_VERSION) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGI_GOOD_VERSION) +int xfs_agi_good_version(unsigned v); +#define XFS_AGI_GOOD_VERSION(v) xfs_agi_good_version(v) +#else +#define XFS_AGI_GOOD_VERSION(v) ((v) == XFS_AGI_VERSION) +#endif + +/* + * Btree number 0 is bno, 1 is cnt. This value gives the size of the + * arrays below. + */ +#define XFS_BTNUM_AGF ((int)XFS_BTNUM_CNTi + 1) + +/* + * The second word of agf_levels in the first a.g. overlaps the EFS + * superblock's magic number. Since the magic numbers valid for EFS + * are > 64k, our value cannot be confused for an EFS superblock's. + */ + +typedef struct xfs_agf +{ + /* + * Common allocation group header information + */ + __uint32_t agf_magicnum; /* magic number == XFS_AGF_MAGIC */ + __uint32_t agf_versionnum; /* header version == XFS_AGF_VERSION */ + xfs_agnumber_t agf_seqno; /* sequence # starting from 0 */ + xfs_agblock_t agf_length; /* size in blocks of a.g. */ + /* + * Freespace information + */ + xfs_agblock_t agf_roots[XFS_BTNUM_AGF]; /* root blocks */ + __uint32_t agf_spare0; /* spare field */ + __uint32_t agf_levels[XFS_BTNUM_AGF]; /* btree levels */ + __uint32_t agf_spare1; /* spare field */ + __uint32_t agf_flfirst; /* first freelist block's index */ + __uint32_t agf_fllast; /* last freelist block's index */ + __uint32_t agf_flcount; /* count of blocks in freelist */ + xfs_extlen_t agf_freeblks; /* total free blocks */ + xfs_extlen_t agf_longest; /* longest free space */ +} xfs_agf_t; + +#define XFS_AGF_MAGICNUM 0x00000001 +#define XFS_AGF_VERSIONNUM 0x00000002 +#define XFS_AGF_SEQNO 0x00000004 +#define XFS_AGF_LENGTH 0x00000008 +#define XFS_AGF_ROOTS 0x00000010 +#define XFS_AGF_LEVELS 0x00000020 +#define XFS_AGF_FLFIRST 0x00000040 +#define XFS_AGF_FLLAST 0x00000080 +#define XFS_AGF_FLCOUNT 0x00000100 +#define XFS_AGF_FREEBLKS 0x00000200 +#define XFS_AGF_LONGEST 0x00000400 +#define XFS_AGF_NUM_BITS 11 +#define XFS_AGF_ALL_BITS ((1 << XFS_AGF_NUM_BITS) - 1) + +/* disk block (xfs_daddr_t) in the AG */ +#define XFS_AGF_DADDR ((xfs_daddr_t)1) +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGF_BLOCK) +xfs_agblock_t xfs_agf_block(struct xfs_mount *mp); +#define XFS_AGF_BLOCK(mp) xfs_agf_block(mp) +#else +#define XFS_AGF_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGF_DADDR) +#endif + +/* + * Size of the unlinked inode hash table in the agi. + */ +#define XFS_AGI_UNLINKED_BUCKETS 64 + +typedef struct xfs_agi +{ + /* + * Common allocation group header information + */ + __uint32_t agi_magicnum; /* magic number == XFS_AGI_MAGIC */ + __uint32_t agi_versionnum; /* header version == XFS_AGI_VERSION */ + xfs_agnumber_t agi_seqno; /* sequence # starting from 0 */ + xfs_agblock_t agi_length; /* size in blocks of a.g. */ + /* + * Inode information + * Inodes are mapped by interpreting the inode number, so no + * mapping data is needed here. + */ + xfs_agino_t agi_count; /* count of allocated inodes */ + xfs_agblock_t agi_root; /* root of inode btree */ + __uint32_t agi_level; /* levels in inode btree */ + xfs_agino_t agi_freecount; /* number of free inodes */ + xfs_agino_t agi_newino; /* new inode just allocated */ + xfs_agino_t agi_dirino; /* last directory inode chunk */ + /* + * Hash table of inodes which have been unlinked but are + * still being referenced. + */ + xfs_agino_t agi_unlinked[XFS_AGI_UNLINKED_BUCKETS]; +} xfs_agi_t; + +#define XFS_AGI_MAGICNUM 0x00000001 +#define XFS_AGI_VERSIONNUM 0x00000002 +#define XFS_AGI_SEQNO 0x00000004 +#define XFS_AGI_LENGTH 0x00000008 +#define XFS_AGI_COUNT 0x00000010 +#define XFS_AGI_ROOT 0x00000020 +#define XFS_AGI_LEVEL 0x00000040 +#define XFS_AGI_FREECOUNT 0x00000080 +#define XFS_AGI_NEWINO 0x00000100 +#define XFS_AGI_DIRINO 0x00000200 +#define XFS_AGI_UNLINKED 0x00000400 +#define XFS_AGI_NUM_BITS 11 +#define XFS_AGI_ALL_BITS ((1 << XFS_AGI_NUM_BITS) - 1) + +/* disk block (xfs_daddr_t) in the AG */ +#define XFS_AGI_DADDR ((xfs_daddr_t)2) +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGI_BLOCK) +xfs_agblock_t xfs_agi_block(struct xfs_mount *mp); +#define XFS_AGI_BLOCK(mp) xfs_agi_block(mp) +#else +#define XFS_AGI_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGI_DADDR) +#endif + +/* + * The third a.g. block contains the a.g. freelist, an array + * of block pointers to blocks owned by the allocation btree code. + */ +#define XFS_AGFL_DADDR ((xfs_daddr_t)3) +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGFL_BLOCK) +xfs_agblock_t xfs_agfl_block(struct xfs_mount *mp); +#define XFS_AGFL_BLOCK(mp) xfs_agfl_block(mp) +#else +#define XFS_AGFL_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGFL_DADDR) +#endif +#define XFS_AGFL_SIZE (BBSIZE / sizeof(xfs_agblock_t)) +typedef struct xfs_agfl +{ + xfs_agblock_t agfl_bno[XFS_AGFL_SIZE]; +} xfs_agfl_t; + +/* + * Per-ag incore structure, copies of information in agf and agi, + * to improve the performance of allocation group selection. + */ +typedef struct xfs_perag +{ + char pagf_init; /* this agf's entry is initialized */ + char pagi_init; /* this agi's entry is initialized */ + __uint8_t pagf_levels[XFS_BTNUM_AGF]; + /* # of levels in bno & cnt btree */ + __uint32_t pagf_flcount; /* count of blocks in freelist */ + xfs_extlen_t pagf_freeblks; /* total free blocks */ + xfs_extlen_t pagf_longest; /* longest free space */ + xfs_agino_t pagi_freecount; /* number of free inodes */ +} xfs_perag_t; + +#define XFS_AG_MIN_BYTES (1LL << 24) /* 16 MB */ +#define XFS_AG_BEST_BYTES (1LL << 30) /* 1 GB */ +#define XFS_AG_MAX_BYTES (1LL << 32) /* 4 GB */ + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AG_MIN_BLOCKS) +xfs_extlen_t xfs_ag_min_blocks(int bl); +#define XFS_AG_MIN_BLOCKS(bl) xfs_ag_min_blocks(bl) +#else +#define XFS_AG_MIN_BLOCKS(bl) ((xfs_extlen_t)(XFS_AG_MIN_BYTES >> bl)) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AG_BEST_BLOCKS) +xfs_extlen_t xfs_ag_best_blocks(int bl); +#define XFS_AG_BEST_BLOCKS(bl) xfs_ag_best_blocks(bl) +#else +#define XFS_AG_BEST_BLOCKS(bl) ((xfs_extlen_t)(XFS_AG_BEST_BYTES >> bl)) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AG_MAX_BLOCKS) +xfs_extlen_t xfs_ag_max_blocks(int bl); +#define XFS_AG_MAX_BLOCKS(bl) xfs_ag_max_blocks(bl) +#else +#define XFS_AG_MAX_BLOCKS(bl) ((xfs_extlen_t)(XFS_AG_MAX_BYTES >> bl)) +#endif + +#define XFS_MAX_AGNUMBER ((xfs_agnumber_t)(NULLAGNUMBER - 1)) + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AG_MAXLEVELS) +int xfs_ag_maxlevels(struct xfs_mount *mp); +#define XFS_AG_MAXLEVELS(mp) xfs_ag_maxlevels(mp) +#else +#define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MIN_FREELIST) +int xfs_min_freelist(xfs_agf_t *a, struct xfs_mount *mp); +#define XFS_MIN_FREELIST(a,mp) xfs_min_freelist(a,mp) +#else +#define XFS_MIN_FREELIST(a,mp) \ + XFS_MIN_FREELIST_RAW( \ + INT_GET((a)->agf_levels[XFS_BTNUM_BNOi], ARCH_CONVERT), \ + INT_GET((a)->agf_levels[XFS_BTNUM_CNTi], ARCH_CONVERT), mp) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MIN_FREELIST_PAG) +int xfs_min_freelist_pag(xfs_perag_t *pag, struct xfs_mount *mp); +#define XFS_MIN_FREELIST_PAG(pag,mp) xfs_min_freelist_pag(pag,mp) +#else +#define XFS_MIN_FREELIST_PAG(pag,mp) \ + XFS_MIN_FREELIST_RAW((pag)->pagf_levels[XFS_BTNUM_BNOi], \ + (pag)->pagf_levels[XFS_BTNUM_CNTi], mp) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MIN_FREELIST_RAW) +int xfs_min_freelist_raw(int bl, int cl, struct xfs_mount *mp); +#define XFS_MIN_FREELIST_RAW(bl,cl,mp) xfs_min_freelist_raw(bl,cl,mp) +#else +#define XFS_MIN_FREELIST_RAW(bl,cl,mp) \ + (MIN(bl + 1, XFS_AG_MAXLEVELS(mp)) + \ + MIN(cl + 1, XFS_AG_MAXLEVELS(mp))) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGB_TO_FSB) +xfs_fsblock_t xfs_agb_to_fsb(struct xfs_mount *mp, xfs_agnumber_t agno, + xfs_agblock_t agbno); +#define XFS_AGB_TO_FSB(mp,agno,agbno) xfs_agb_to_fsb(mp,agno,agbno) +#else +#define XFS_AGB_TO_FSB(mp,agno,agbno) \ + (((xfs_fsblock_t)(agno) << (mp)->m_sb.sb_agblklog) | (agbno)) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_FSB_TO_AGNO) +xfs_agnumber_t xfs_fsb_to_agno(struct xfs_mount *mp, xfs_fsblock_t fsbno); +#define XFS_FSB_TO_AGNO(mp,fsbno) xfs_fsb_to_agno(mp,fsbno) +#else +#define XFS_FSB_TO_AGNO(mp,fsbno) \ + ((xfs_agnumber_t)((fsbno) >> (mp)->m_sb.sb_agblklog)) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_FSB_TO_AGBNO) +xfs_agblock_t xfs_fsb_to_agbno(struct xfs_mount *mp, xfs_fsblock_t fsbno); +#define XFS_FSB_TO_AGBNO(mp,fsbno) xfs_fsb_to_agbno(mp,fsbno) +#else +#define XFS_FSB_TO_AGBNO(mp,fsbno) \ + ((xfs_agblock_t)((fsbno) & XFS_MASK32LO((mp)->m_sb.sb_agblklog))) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGB_TO_DADDR) +xfs_daddr_t xfs_agb_to_daddr(struct xfs_mount *mp, xfs_agnumber_t agno, + xfs_agblock_t agbno); +#define XFS_AGB_TO_DADDR(mp,agno,agbno) xfs_agb_to_daddr(mp,agno,agbno) +#else +#define XFS_AGB_TO_DADDR(mp,agno,agbno) \ + ((xfs_daddr_t)(XFS_FSB_TO_BB(mp, \ + (xfs_fsblock_t)(agno) * (mp)->m_sb.sb_agblocks + (agbno)))) +#endif +/* + * XFS_DADDR_TO_AGNO and XFS_DADDR_TO_AGBNO moved to xfs_mount.h + * to avoid header file ordering change + */ + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AG_DADDR) +xfs_daddr_t xfs_ag_daddr(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_daddr_t d); +#define XFS_AG_DADDR(mp,agno,d) xfs_ag_daddr(mp,agno,d) +#else +#define XFS_AG_DADDR(mp,agno,d) (XFS_AGB_TO_DADDR(mp, agno, 0) + (d)) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_AGF) +xfs_agf_t *xfs_buf_to_agf(struct xfs_buf *bp); +#define XFS_BUF_TO_AGF(bp) xfs_buf_to_agf(bp) +#else +#define XFS_BUF_TO_AGF(bp) ((xfs_agf_t *)XFS_BUF_PTR(bp)) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_AGI) +xfs_agi_t *xfs_buf_to_agi(struct xfs_buf *bp); +#define XFS_BUF_TO_AGI(bp) xfs_buf_to_agi(bp) +#else +#define XFS_BUF_TO_AGI(bp) ((xfs_agi_t *)XFS_BUF_PTR(bp)) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_AGFL) +xfs_agfl_t *xfs_buf_to_agfl(struct xfs_buf *bp); +#define XFS_BUF_TO_AGFL(bp) xfs_buf_to_agfl(bp) +#else +#define XFS_BUF_TO_AGFL(bp) ((xfs_agfl_t *)XFS_BUF_PTR(bp)) +#endif + +/* + * For checking for bad ranges of xfs_daddr_t's, covering multiple + * allocation groups or a single xfs_daddr_t that's a superblock copy. + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AG_CHECK_DADDR) +void xfs_ag_check_daddr(struct xfs_mount *mp, xfs_daddr_t d, xfs_extlen_t len); +#define XFS_AG_CHECK_DADDR(mp,d,len) xfs_ag_check_daddr(mp,d,len) +#else +#define XFS_AG_CHECK_DADDR(mp,d,len) \ + ((len) == 1 ? \ + ASSERT((d) == XFS_SB_DADDR || \ + XFS_DADDR_TO_AGBNO(mp, d) != XFS_SB_DADDR) : \ + ASSERT(XFS_DADDR_TO_AGNO(mp, d) == \ + XFS_DADDR_TO_AGNO(mp, (d) + (len) - 1))) +#endif + +#endif /* __XFS_AG_H__ */ diff --git a/include/xfs_alloc.h b/include/xfs_alloc.h new file mode 100644 index 000000000..55a2efa59 --- /dev/null +++ b/include/xfs_alloc.h @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_ALLOC_H__ +#define __XFS_ALLOC_H__ + +struct xfs_buf; +struct xfs_mount; +struct xfs_perag; +struct xfs_trans; + +/* + * Freespace allocation types. Argument to xfs_alloc_[v]extent. + */ +typedef enum xfs_alloctype +{ + XFS_ALLOCTYPE_ANY_AG, /* allocate anywhere, use rotor */ + XFS_ALLOCTYPE_FIRST_AG, /* ... start at ag 0 */ + XFS_ALLOCTYPE_START_AG, /* anywhere, start in this a.g. */ + XFS_ALLOCTYPE_THIS_AG, /* anywhere in this a.g. */ + XFS_ALLOCTYPE_START_BNO, /* near this block else anywhere */ + XFS_ALLOCTYPE_NEAR_BNO, /* in this a.g. and near this block */ + XFS_ALLOCTYPE_THIS_BNO /* at exactly this block */ +} xfs_alloctype_t; + +/* + * Flags for xfs_alloc_fix_freelist. + */ +#define XFS_ALLOC_FLAG_TRYLOCK 0x00000001 /* use trylock for buffer locking */ + +/* + * Argument structure for xfs_alloc routines. + * This is turned into a structure to avoid having 20 arguments passed + * down several levels of the stack. + */ +typedef struct xfs_alloc_arg { + struct xfs_trans *tp; /* transaction pointer */ + struct xfs_mount *mp; /* file system mount point */ + struct xfs_buf *agbp; /* buffer for a.g. freelist header */ + struct xfs_perag *pag; /* per-ag struct for this agno */ + xfs_fsblock_t fsbno; /* file system block number */ + xfs_agnumber_t agno; /* allocation group number */ + xfs_agblock_t agbno; /* allocation group-relative block # */ + xfs_extlen_t minlen; /* minimum size of extent */ + xfs_extlen_t maxlen; /* maximum size of extent */ + xfs_extlen_t mod; /* mod value for extent size */ + xfs_extlen_t prod; /* prod value for extent size */ + xfs_extlen_t minleft; /* min blocks must be left after us */ + xfs_extlen_t total; /* total blocks needed in xaction */ + xfs_extlen_t alignment; /* align answer to multiple of this */ + xfs_extlen_t minalignslop; /* slop for minlen+alignment calcs */ + xfs_extlen_t len; /* output: actual size of extent */ + xfs_alloctype_t type; /* allocation type XFS_ALLOCTYPE_... */ + xfs_alloctype_t otype; /* original allocation type */ + char wasdel; /* set if allocation was prev delayed */ + char wasfromfl; /* set if allocation is from freelist */ + char isfl; /* set if is freelist blocks - !actg */ + char userdata; /* set if this is user data */ +} xfs_alloc_arg_t; + + +#ifdef __KERNEL__ + +/* + * Types for alloc tracing. + */ +#define XFS_ALLOC_KTRACE_ALLOC 1 +#define XFS_ALLOC_KTRACE_FREE 2 +#define XFS_ALLOC_KTRACE_MODAGF 3 +/* + * Allocation tracing buffer size. + */ +#define XFS_ALLOC_TRACE_SIZE 4096 + +#ifdef XFS_ALL_TRACE +#define XFS_ALLOC_TRACE +#endif + +#if !defined(DEBUG) +#undef XFS_ALLOC_TRACE +#endif + +/* + * Prototypes for visible xfs_alloc.c routines + */ + +/* + * Compute and fill in value of m_ag_maxlevels. + */ +void +xfs_alloc_compute_maxlevels( + struct xfs_mount *mp); /* file system mount structure */ + +/* + * Decide whether to use this allocation group for this allocation. + * If so, fix up the btree freelist's size. + * This is external so mkfs can call it, too. + */ +int /* error */ +xfs_alloc_fix_freelist( + xfs_alloc_arg_t *args, /* allocation argument structure */ + int flags); /* XFS_ALLOC_FLAG_... */ + +/* + * Get a block from the freelist. + * Returns with the buffer for the block gotten. + */ +int /* error */ +xfs_alloc_get_freelist( + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_buf *agbp, /* buffer containing the agf structure */ + xfs_agblock_t *bnop); /* block address retrieved from freelist */ + +/* + * Log the given fields from the agf structure. + */ +void +xfs_alloc_log_agf( + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_buf *bp, /* buffer for a.g. freelist header */ + int fields);/* mask of fields to be logged (XFS_AGF_...) */ + +/* + * Interface for inode allocation to force the pag data to be initialized. + */ +int /* error */ +xfs_alloc_pagf_init( + struct xfs_mount *mp, /* file system mount structure */ + struct xfs_trans *tp, /* transaction pointer */ + xfs_agnumber_t agno, /* allocation group number */ + int flags); /* XFS_ALLOC_FLAGS_... */ + +/* + * Put the block on the freelist for the allocation group. + */ +int /* error */ +xfs_alloc_put_freelist( + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_buf *agbp, /* buffer for a.g. freelist header */ + struct xfs_buf *agflbp,/* buffer for a.g. free block array */ + xfs_agblock_t bno); /* block being freed */ + +/* + * Read in the allocation group header (free/alloc section). + */ +int /* error */ +xfs_alloc_read_agf( + struct xfs_mount *mp, /* mount point structure */ + struct xfs_trans *tp, /* transaction pointer */ + xfs_agnumber_t agno, /* allocation group number */ + int flags, /* XFS_ALLOC_FLAG_... */ + struct xfs_buf **bpp); /* buffer for the ag freelist header */ + +/* + * Allocate an extent (variable-size). + */ +int /* error */ +xfs_alloc_vextent( + xfs_alloc_arg_t *args); /* allocation argument structure */ + +/* + * Free an extent. + */ +int /* error */ +xfs_free_extent( + struct xfs_trans *tp, /* transaction pointer */ + xfs_fsblock_t bno, /* starting block number of extent */ + xfs_extlen_t len); /* length of extent */ + +#endif /* __KERNEL__ */ + +#endif /* __XFS_ALLOC_H__ */ diff --git a/include/xfs_alloc_btree.h b/include/xfs_alloc_btree.h new file mode 100644 index 000000000..7cd1a8737 --- /dev/null +++ b/include/xfs_alloc_btree.h @@ -0,0 +1,251 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_ALLOC_BTREE_H__ +#define __XFS_ALLOC_BTREE_H__ + +/* + * Freespace on-disk structures + */ + +struct xfs_buf; +struct xfs_btree_cur; +struct xfs_btree_sblock; +struct xfs_mount; + +/* + * There are two on-disk btrees, one sorted by blockno and one sorted + * by blockcount and blockno. All blocks look the same to make the code + * simpler; if we have time later, we'll make the optimizations. + */ +#define XFS_ABTB_MAGIC 0x41425442 /* 'ABTB' for bno tree */ +#define XFS_ABTC_MAGIC 0x41425443 /* 'ABTC' for cnt tree */ + +/* + * Data record/key structure + */ +typedef struct xfs_alloc_rec +{ + xfs_agblock_t ar_startblock; /* starting block number */ + xfs_extlen_t ar_blockcount; /* count of free blocks */ +} xfs_alloc_rec_t, xfs_alloc_key_t; + +typedef xfs_agblock_t xfs_alloc_ptr_t; /* btree pointer type */ + /* btree block header type */ +typedef struct xfs_btree_sblock xfs_alloc_block_t; + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_ALLOC_BLOCK) +xfs_alloc_block_t *xfs_buf_to_alloc_block(struct xfs_buf *bp); +#define XFS_BUF_TO_ALLOC_BLOCK(bp) xfs_buf_to_alloc_block(bp) +#else +#define XFS_BUF_TO_ALLOC_BLOCK(bp) ((xfs_alloc_block_t *)(XFS_BUF_PTR(bp))) +#endif + +/* + * Real block structures have a size equal to the disk block size. + */ + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_BLOCK_SIZE) +int xfs_alloc_block_size(int lev, struct xfs_btree_cur *cur); +#define XFS_ALLOC_BLOCK_SIZE(lev,cur) xfs_alloc_block_size(lev,cur) +#else +#define XFS_ALLOC_BLOCK_SIZE(lev,cur) (1 << (cur)->bc_blocklog) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_BLOCK_MAXRECS) +int xfs_alloc_block_maxrecs(int lev, struct xfs_btree_cur *cur); +#define XFS_ALLOC_BLOCK_MAXRECS(lev,cur) xfs_alloc_block_maxrecs(lev,cur) +#else +#define XFS_ALLOC_BLOCK_MAXRECS(lev,cur) \ + ((cur)->bc_mp->m_alloc_mxr[lev != 0]) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_BLOCK_MINRECS) +int xfs_alloc_block_minrecs(int lev, struct xfs_btree_cur *cur); +#define XFS_ALLOC_BLOCK_MINRECS(lev,cur) xfs_alloc_block_minrecs(lev,cur) +#else +#define XFS_ALLOC_BLOCK_MINRECS(lev,cur) \ + ((cur)->bc_mp->m_alloc_mnr[lev != 0]) +#endif + +/* + * Minimum and maximum blocksize. + * The blocksize upper limit is pretty much arbitrary. + */ +#define XFS_MIN_BLOCKSIZE_LOG 9 /* i.e. 512 bytes */ +#define XFS_MAX_BLOCKSIZE_LOG 16 /* i.e. 65536 bytes */ +#define XFS_MIN_BLOCKSIZE (1 << XFS_MIN_BLOCKSIZE_LOG) +#define XFS_MAX_BLOCKSIZE (1 << XFS_MAX_BLOCKSIZE_LOG) + +/* + * block numbers in the AG; SB is BB 0, AGF is BB 1, AGI is BB 2, AGFL is BB 3 + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BNO_BLOCK) +xfs_agblock_t xfs_bno_block(struct xfs_mount *mp); +#define XFS_BNO_BLOCK(mp) xfs_bno_block(mp) +#else +#define XFS_BNO_BLOCK(mp) ((xfs_agblock_t)(XFS_AGFL_BLOCK(mp) + 1)) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CNT_BLOCK) +xfs_agblock_t xfs_cnt_block(struct xfs_mount *mp); +#define XFS_CNT_BLOCK(mp) xfs_cnt_block(mp) +#else +#define XFS_CNT_BLOCK(mp) ((xfs_agblock_t)(XFS_BNO_BLOCK(mp) + 1)) +#endif + +/* + * Record, key, and pointer address macros for btree blocks. + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_REC_ADDR) +xfs_alloc_rec_t *xfs_alloc_rec_addr(xfs_alloc_block_t *bb, int i, + struct xfs_btree_cur *cur); +#define XFS_ALLOC_REC_ADDR(bb,i,cur) xfs_alloc_rec_addr(bb,i,cur) +#else +#define XFS_ALLOC_REC_ADDR(bb,i,cur) \ + XFS_BTREE_REC_ADDR(XFS_ALLOC_BLOCK_SIZE(0,cur), xfs_alloc, bb, i, \ + XFS_ALLOC_BLOCK_MAXRECS(0, cur)) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_KEY_ADDR) +xfs_alloc_key_t *xfs_alloc_key_addr(xfs_alloc_block_t *bb, int i, + struct xfs_btree_cur *cur); +#define XFS_ALLOC_KEY_ADDR(bb,i,cur) xfs_alloc_key_addr(bb,i,cur) +#else +#define XFS_ALLOC_KEY_ADDR(bb,i,cur) \ + XFS_BTREE_KEY_ADDR(XFS_ALLOC_BLOCK_SIZE(1,cur), xfs_alloc, bb, i, \ + XFS_ALLOC_BLOCK_MAXRECS(1, cur)) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ALLOC_PTR_ADDR) +xfs_alloc_ptr_t *xfs_alloc_ptr_addr(xfs_alloc_block_t *bb, int i, + struct xfs_btree_cur *cur); +#define XFS_ALLOC_PTR_ADDR(bb,i,cur) xfs_alloc_ptr_addr(bb,i,cur) +#else +#define XFS_ALLOC_PTR_ADDR(bb,i,cur) \ + XFS_BTREE_PTR_ADDR(XFS_ALLOC_BLOCK_SIZE(1,cur), xfs_alloc, bb, i, \ + XFS_ALLOC_BLOCK_MAXRECS(1, cur)) +#endif + +/* + * Prototypes for externally visible routines. + */ + +/* + * Decrement cursor by one record at the level. + * For nonzero levels the leaf-ward information is untouched. + */ +int /* error */ +xfs_alloc_decrement( + struct xfs_btree_cur *cur, /* btree cursor */ + int level, /* level in btree, 0 is leaf */ + int *stat); /* success/failure */ + +/* + * Delete the record pointed to by cur. + * The cursor refers to the place where the record was (could be inserted) + * when the operation returns. + */ +int /* error */ +xfs_alloc_delete( + struct xfs_btree_cur *cur, /* btree cursor */ + int *stat); /* success/failure */ + +/* + * Get the data from the pointed-to record. + */ +int /* error */ +xfs_alloc_get_rec( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t *bno, /* output: starting block of extent */ + xfs_extlen_t *len, /* output: length of extent */ + int *stat); /* output: success/failure */ + +/* + * Increment cursor by one record at the level. + * For nonzero levels the leaf-ward information is untouched. + */ +int /* error */ +xfs_alloc_increment( + struct xfs_btree_cur *cur, /* btree cursor */ + int level, /* level in btree, 0 is leaf */ + int *stat); /* success/failure */ + +/* + * Insert the current record at the point referenced by cur. + * The cursor may be inconsistent on return if splits have been done. + */ +int /* error */ +xfs_alloc_insert( + struct xfs_btree_cur *cur, /* btree cursor */ + int *stat); /* success/failure */ + +/* + * Lookup the record equal to [bno, len] in the btree given by cur. + */ +int /* error */ +xfs_alloc_lookup_eq( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len, /* length of extent */ + int *stat); /* success/failure */ + +/* + * Lookup the first record greater than or equal to [bno, len] + * in the btree given by cur. + */ +int /* error */ +xfs_alloc_lookup_ge( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len, /* length of extent */ + int *stat); /* success/failure */ + +/* + * Lookup the first record less than or equal to [bno, len] + * in the btree given by cur. + */ +int /* error */ +xfs_alloc_lookup_le( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len, /* length of extent */ + int *stat); /* success/failure */ + +/* + * Update the record referred to by cur, to the value given by [bno, len]. + * This either works (return 0) or gets an EFSCORRUPTED error. + */ +int /* error */ +xfs_alloc_update( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len); /* length of extent */ + +#endif /* __XFS_ALLOC_BTREE_H__ */ diff --git a/include/xfs_arch.h b/include/xfs_arch.h new file mode 100644 index 000000000..9a013819a --- /dev/null +++ b/include/xfs_arch.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_ARCH_H__ +#define __XFS_ARCH_H__ + +#ifndef XFS_BIG_FILESYSTEMS +#error XFS_BIG_FILESYSTEMS must be defined true or false +#endif + +#define DIRINO4_GET_ARCH(pointer,arch) \ + ( ((arch) == ARCH_NOCONVERT) \ + ? \ + (INT_GET_UNALIGNED_32(pointer)) \ + : \ + (INT_GET_UNALIGNED_32_BE(pointer)) \ + ) + +#if XFS_BIG_FILESYSTEMS +#define DIRINO_GET_ARCH(pointer,arch) \ + ( ((arch) == ARCH_NOCONVERT) \ + ? \ + (INT_GET_UNALIGNED_64(pointer)) \ + : \ + (INT_GET_UNALIGNED_64_BE(pointer)) \ + ) +#else +/* MACHINE ARCHITECTURE dependent */ +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define DIRINO_GET_ARCH(pointer,arch) \ + DIRINO4_GET_ARCH((((__u8*)pointer)+4),arch) +#else +#define DIRINO_GET_ARCH(pointer,arch) \ + DIRINO4_GET_ARCH(pointer,arch) +#endif +#endif + +#define DIRINO_COPY_ARCH(from,to,arch) \ + if ((arch) == ARCH_NOCONVERT) { \ + bcopy(from,to,sizeof(xfs_ino_t)); \ + } else { \ + INT_SWAP_UNALIGNED_64(from,to); \ + } +#define DIRINO4_COPY_ARCH(from,to,arch) \ + if ((arch) == ARCH_NOCONVERT) { \ + bcopy((((__u8*)from+4)),to,sizeof(xfs_dir2_ino4_t)); \ + } else { \ + INT_SWAP_UNALIGNED_32(from,to); \ + } + +#endif /* __XFS_ARCH_H__ */ diff --git a/include/xfs_attr_leaf.h b/include/xfs_attr_leaf.h new file mode 100644 index 000000000..41d63b526 --- /dev/null +++ b/include/xfs_attr_leaf.h @@ -0,0 +1,305 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_ATTR_LEAF_H__ +#define __XFS_ATTR_LEAF_H__ + +/* + * Attribute storage layout, internal structure, access macros, etc. + * + * Attribute lists are structured around Btrees where all the data + * elements are in the leaf nodes. Attribute names are hashed into an int, + * then that int is used as the index into the Btree. Since the hashval + * of an attribute name may not be unique, we may have duplicate keys. The + * internal links in the Btree are logical block offsets into the file. + */ + +struct attrlist; +struct attrlist_cursor_kern; +struct xfs_dabuf; +struct xfs_da_args; +struct xfs_da_state; +struct xfs_da_state_blk; +struct xfs_inode; +struct xfs_trans; + +/*======================================================================== + * Attribute structure when equal to XFS_LBSIZE(mp) bytes. + *========================================================================*/ + +/* + * This is the structure of the leaf nodes in the Btree. + * + * Struct leaf_entry's are packed from the top. Name/values grow from the + * bottom but are not packed. The freemap contains run-length-encoded entries + * for the free bytes after the leaf_entry's, but only the N largest such, + * smaller runs are dropped. When the freemap doesn't show enough space + * for an allocation, we compact the name/value area and try again. If we + * still don't have enough space, then we have to split the block. The + * name/value structs (both local and remote versions) must be 32bit aligned. + * + * Since we have duplicate hash keys, for each key that matches, compare + * the actual name string. The root and intermediate node search always + * takes the first-in-the-block key match found, so we should only have + * to work "forw"ard. If none matches, continue with the "forw"ard leaf + * nodes until the hash key changes or the attribute name is found. + * + * We store the fact that an attribute is a ROOT versus USER attribute in + * the leaf_entry. The namespaces are independent only because we also look + * at the root/user bit when we are looking for a matching attribute name. + * + * We also store a "incomplete" bit in the leaf_entry. It shows that an + * attribute is in the middle of being created and should not be shown to + * the user if we crash during the time that the bit is set. We clear the + * bit when we have finished setting up the attribute. We do this because + * we cannot create some large attributes inside a single transaction, and we + * need some indication that we weren't finished if we crash in the middle. + */ +#define XFS_ATTR_LEAF_MAPSIZE 3 /* how many freespace slots */ + +typedef struct xfs_attr_leafblock { + struct xfs_attr_leaf_hdr { /* constant-structure header block */ + xfs_da_blkinfo_t info; /* block type, links, etc. */ + __uint16_t count; /* count of active leaf_entry's */ + __uint16_t usedbytes; /* num bytes of names/values stored */ + __uint16_t firstused; /* first used byte in name area */ + __uint8_t holes; /* != 0 if blk needs compaction */ + __uint8_t pad1; + struct xfs_attr_leaf_map { /* RLE map of free bytes */ + __uint16_t base; /* base of free region */ + __uint16_t size; /* length of free region */ + } freemap[XFS_ATTR_LEAF_MAPSIZE]; /* N largest free regions */ + } hdr; + struct xfs_attr_leaf_entry { /* sorted on key, not name */ + xfs_dahash_t hashval; /* hash value of name */ + __uint16_t nameidx; /* index into buffer of name/value */ + __uint8_t flags; /* LOCAL, ROOT and INCOMPLETE flags */ + __uint8_t pad2; /* unused pad byte */ + } entries[1]; /* variable sized array */ + struct xfs_attr_leaf_name_local { + __uint16_t valuelen; /* number of bytes in value */ + __uint8_t namelen; /* length of name bytes */ + __uint8_t nameval[1]; /* name/value bytes */ + } namelist; /* grows from bottom of buf */ + struct xfs_attr_leaf_name_remote { + xfs_dablk_t valueblk; /* block number of value bytes */ + __uint32_t valuelen; /* number of bytes in value */ + __uint8_t namelen; /* length of name bytes */ + __uint8_t name[1]; /* name bytes */ + } valuelist; /* grows from bottom of buf */ +} xfs_attr_leafblock_t; +typedef struct xfs_attr_leaf_hdr xfs_attr_leaf_hdr_t; +typedef struct xfs_attr_leaf_map xfs_attr_leaf_map_t; +typedef struct xfs_attr_leaf_entry xfs_attr_leaf_entry_t; +typedef struct xfs_attr_leaf_name_local xfs_attr_leaf_name_local_t; +typedef struct xfs_attr_leaf_name_remote xfs_attr_leaf_name_remote_t; + +/* + * Flags used in the leaf_entry[i].flags field. + * NOTE: the INCOMPLETE bit must not collide with the flags bits specified + * on the system call, they are "or"ed together for various operations. + */ +#define XFS_ATTR_LOCAL_BIT 0 /* attr is stored locally */ +#define XFS_ATTR_ROOT_BIT 1 /* limit access to attr to userid 0 */ +#define XFS_ATTR_INCOMPLETE_BIT 7 /* attr in middle of create/delete */ +#define XFS_ATTR_LOCAL (1 << XFS_ATTR_LOCAL_BIT) +#define XFS_ATTR_ROOT (1 << XFS_ATTR_ROOT_BIT) +#define XFS_ATTR_INCOMPLETE (1 << XFS_ATTR_INCOMPLETE_BIT) + +/* + * Alignment for namelist and valuelist entries (since they are mixed + * there can be only one alignment value) + */ +#define XFS_ATTR_LEAF_NAME_ALIGN ((uint)sizeof(xfs_dablk_t)) + +/* + * Cast typed pointers for "local" and "remote" name/value structs. + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_LEAF_NAME_REMOTE) +xfs_attr_leaf_name_remote_t * +xfs_attr_leaf_name_remote(xfs_attr_leafblock_t *leafp, int idx); +#define XFS_ATTR_LEAF_NAME_REMOTE(leafp,idx) \ + xfs_attr_leaf_name_remote(leafp,idx) +#else +#define XFS_ATTR_LEAF_NAME_REMOTE(leafp,idx) /* remote name struct ptr */ \ + ((xfs_attr_leaf_name_remote_t *) \ + &((char *)(leafp))[ INT_GET((leafp)->entries[idx].nameidx, ARCH_CONVERT) ]) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_LEAF_NAME_LOCAL) +xfs_attr_leaf_name_local_t * +xfs_attr_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx); +#define XFS_ATTR_LEAF_NAME_LOCAL(leafp,idx) \ + xfs_attr_leaf_name_local(leafp,idx) +#else +#define XFS_ATTR_LEAF_NAME_LOCAL(leafp,idx) /* local name struct ptr */ \ + ((xfs_attr_leaf_name_local_t *) \ + &((char *)(leafp))[ INT_GET((leafp)->entries[idx].nameidx, ARCH_CONVERT) ]) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_LEAF_NAME) +char *xfs_attr_leaf_name(xfs_attr_leafblock_t *leafp, int idx); +#define XFS_ATTR_LEAF_NAME(leafp,idx) xfs_attr_leaf_name(leafp,idx) +#else +#define XFS_ATTR_LEAF_NAME(leafp,idx) /* generic name struct ptr */ \ + (&((char *)(leafp))[ INT_GET((leafp)->entries[idx].nameidx, ARCH_CONVERT) ]) +#endif + +/* + * Calculate total bytes used (including trailing pad for alignment) for + * a "local" name/value structure, a "remote" name/value structure, and + * a pointer which might be either. + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_LEAF_ENTSIZE_REMOTE) +int xfs_attr_leaf_entsize_remote(int nlen); +#define XFS_ATTR_LEAF_ENTSIZE_REMOTE(nlen) \ + xfs_attr_leaf_entsize_remote(nlen) +#else +#define XFS_ATTR_LEAF_ENTSIZE_REMOTE(nlen) /* space for remote struct */ \ + (((uint)sizeof(xfs_attr_leaf_name_remote_t) - 1 + (nlen) + \ + XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1)) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_LEAF_ENTSIZE_LOCAL) +int xfs_attr_leaf_entsize_local(int nlen, int vlen); +#define XFS_ATTR_LEAF_ENTSIZE_LOCAL(nlen,vlen) \ + xfs_attr_leaf_entsize_local(nlen,vlen) +#else +#define XFS_ATTR_LEAF_ENTSIZE_LOCAL(nlen,vlen) /* space for local struct */ \ + (((uint)sizeof(xfs_attr_leaf_name_local_t) - 1 + (nlen) + (vlen) + \ + XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1)) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_LEAF_ENTSIZE_LOCAL_MAX) +int xfs_attr_leaf_entsize_local_max(int bsize); +#define XFS_ATTR_LEAF_ENTSIZE_LOCAL_MAX(bsize) \ + xfs_attr_leaf_entsize_local_max(bsize) +#else +#define XFS_ATTR_LEAF_ENTSIZE_LOCAL_MAX(bsize) /* max local struct size */ \ + (((bsize) >> 1) + ((bsize) >> 2)) +#endif + + +/*======================================================================== + * Structure used to pass context around among the routines. + *========================================================================*/ + +typedef struct xfs_attr_list_context { + struct xfs_inode *dp; /* inode */ + struct attrlist_cursor_kern *cursor;/* position in list */ + struct attrlist *alist; /* output buffer */ + int count; /* num used entries */ + int dupcnt; /* count dup hashvals seen */ + int bufsize;/* total buffer size */ + int firstu; /* first used byte in buffer */ + int flags; /* from VOP call */ + int resynch;/* T/F: resynch with cursor */ +} xfs_attr_list_context_t; + +/* + * Used to keep a list of "remote value" extents when unlinking an inode. + */ +typedef struct xfs_attr_inactive_list { + xfs_dablk_t valueblk; /* block number of value bytes */ + int valuelen; /* number of bytes in value */ +} xfs_attr_inactive_list_t; + + +/*======================================================================== + * Function prototypes for the kernel. + *========================================================================*/ + +/* + * Internal routines when dirsize < XFS_LITINO(mp). + */ +int xfs_attr_shortform_create(struct xfs_da_args *args); +int xfs_attr_shortform_add(struct xfs_da_args *add); +int xfs_attr_shortform_lookup(struct xfs_da_args *args); +int xfs_attr_shortform_getvalue(struct xfs_da_args *args); +int xfs_attr_shortform_to_leaf(struct xfs_da_args *args); +int xfs_attr_shortform_remove(struct xfs_da_args *remove); +int xfs_attr_shortform_list(struct xfs_attr_list_context *context); +int xfs_attr_shortform_replace(struct xfs_da_args *args); +int xfs_attr_shortform_allfit(struct xfs_dabuf *bp, struct xfs_inode *dp); + +/* + * Internal routines when dirsize == XFS_LBSIZE(mp). + */ +int xfs_attr_leaf_to_node(struct xfs_da_args *args); +int xfs_attr_leaf_to_shortform(struct xfs_dabuf *bp, + struct xfs_da_args *args); +int xfs_attr_leaf_clearflag(struct xfs_da_args *args); +int xfs_attr_leaf_setflag(struct xfs_da_args *args); +int xfs_attr_leaf_flipflags(xfs_da_args_t *args); + +/* + * Routines used for growing the Btree. + */ +int xfs_attr_leaf_create(struct xfs_da_args *args, xfs_dablk_t which_block, + struct xfs_dabuf **bpp); +int xfs_attr_leaf_split(struct xfs_da_state *state, + struct xfs_da_state_blk *oldblk, + struct xfs_da_state_blk *newblk); +int xfs_attr_leaf_lookup_int(struct xfs_dabuf *leaf, + struct xfs_da_args *args); +int xfs_attr_leaf_getvalue(struct xfs_dabuf *bp, struct xfs_da_args *args); +int xfs_attr_leaf_add(struct xfs_dabuf *leaf_buffer, + struct xfs_da_args *args); +int xfs_attr_leaf_remove(struct xfs_dabuf *leaf_buffer, + struct xfs_da_args *args); +int xfs_attr_leaf_list_int(struct xfs_dabuf *bp, + struct xfs_attr_list_context *context); + +/* + * Routines used for shrinking the Btree. + */ +int xfs_attr_leaf_toosmall(struct xfs_da_state *state, int *retval); +void xfs_attr_leaf_unbalance(struct xfs_da_state *state, + struct xfs_da_state_blk *drop_blk, + struct xfs_da_state_blk *save_blk); +int xfs_attr_root_inactive(struct xfs_trans **trans, struct xfs_inode *dp); +int xfs_attr_node_inactive(struct xfs_trans **trans, struct xfs_inode *dp, + struct xfs_dabuf *bp, int level); +int xfs_attr_leaf_inactive(struct xfs_trans **trans, struct xfs_inode *dp, + struct xfs_dabuf *bp); +int xfs_attr_leaf_freextent(struct xfs_trans **trans, struct xfs_inode *dp, + xfs_dablk_t blkno, int blkcnt); + +/* + * Utility routines. + */ +xfs_dahash_t xfs_attr_leaf_lasthash(struct xfs_dabuf *bp, int *count); +int xfs_attr_leaf_order(struct xfs_dabuf *leaf1_bp, + struct xfs_dabuf *leaf2_bp); +int xfs_attr_leaf_newentsize(struct xfs_da_args *args, int blocksize, + int *local); +int xfs_attr_leaf_entsize(struct xfs_attr_leafblock *leaf, int index); +int xfs_attr_put_listent(struct xfs_attr_list_context *context, + char *name, int namelen, int valuelen); +int xfs_attr_rolltrans(struct xfs_trans **transp, struct xfs_inode *dp); + +#endif /* __XFS_ATTR_LEAF_H__ */ diff --git a/include/xfs_attr_sf.h b/include/xfs_attr_sf.h new file mode 100644 index 000000000..c5106f87c --- /dev/null +++ b/include/xfs_attr_sf.h @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_ATTR_SF_H__ +#define __XFS_ATTR_SF_H__ + +/* + * Attribute storage when stored inside the inode. + * + * Small attribute lists are packed as tightly as possible so as + * to fit into the literal area of the inode. + */ + +struct xfs_inode; + +/* + * Entries are packed toward the top as tight as possible. + */ +typedef struct xfs_attr_shortform { + struct xfs_attr_sf_hdr { /* constant-structure header block */ + __uint16_t totsize; /* total bytes in shortform list */ + __uint8_t count; /* count of active entries */ + } hdr; + struct xfs_attr_sf_entry { + __uint8_t namelen; /* actual length of name (no NULL) */ + __uint8_t valuelen; /* actual length of value (no NULL) */ + __uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */ + __uint8_t nameval[1]; /* name & value bytes concatenated */ + } list[1]; /* variable sized array */ +} xfs_attr_shortform_t; +typedef struct xfs_attr_sf_hdr xfs_attr_sf_hdr_t; +typedef struct xfs_attr_sf_entry xfs_attr_sf_entry_t; + +/* + * We generate this then sort it, attr_list() must return things in hash-order. + */ +typedef struct xfs_attr_sf_sort { + __uint8_t entno; /* entry number in original list */ + __uint8_t namelen; /* length of name value (no null) */ + __uint8_t valuelen; /* length of value */ + xfs_dahash_t hash; /* this entry's hash value */ + char *name; /* name value, pointer into buffer */ +} xfs_attr_sf_sort_t; + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_SF_ENTSIZE_BYNAME) +int xfs_attr_sf_entsize_byname(int nlen, int vlen); +#define XFS_ATTR_SF_ENTSIZE_BYNAME(nlen,vlen) \ + xfs_attr_sf_entsize_byname(nlen,vlen) +#else +#define XFS_ATTR_SF_ENTSIZE_BYNAME(nlen,vlen) /* space name/value uses */ \ + ((int)sizeof(xfs_attr_sf_entry_t)-1 + (nlen)+(vlen)) +#endif +#define XFS_ATTR_SF_ENTSIZE_MAX /* max space for name&value */ \ + ((1 << (NBBY*(int)sizeof(__uint8_t))) - 1) +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_SF_ENTSIZE) +int xfs_attr_sf_entsize(xfs_attr_sf_entry_t *sfep); +#define XFS_ATTR_SF_ENTSIZE(sfep) xfs_attr_sf_entsize(sfep) +#else +#define XFS_ATTR_SF_ENTSIZE(sfep) /* space an entry uses */ \ + ((int)sizeof(xfs_attr_sf_entry_t)-1 + (sfep)->namelen+(sfep)->valuelen) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_SF_NEXTENTRY) +xfs_attr_sf_entry_t *xfs_attr_sf_nextentry(xfs_attr_sf_entry_t *sfep); +#define XFS_ATTR_SF_NEXTENTRY(sfep) xfs_attr_sf_nextentry(sfep) +#else +#define XFS_ATTR_SF_NEXTENTRY(sfep) /* next entry in struct */ \ + ((xfs_attr_sf_entry_t *) \ + ((char *)(sfep) + XFS_ATTR_SF_ENTSIZE(sfep))) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ATTR_SF_TOTSIZE) +int xfs_attr_sf_totsize(struct xfs_inode *dp); +#define XFS_ATTR_SF_TOTSIZE(dp) xfs_attr_sf_totsize(dp) +#else +#define XFS_ATTR_SF_TOTSIZE(dp) /* total space in use */ \ + (INT_GET(((xfs_attr_shortform_t *)((dp)->i_afp->if_u1.if_data))->hdr.totsize, ARCH_CONVERT)) +#endif + +#ifdef XFS_ALL_TRACE +#define XFS_ATTR_TRACE +#endif + +#if !defined(DEBUG) +#undef XFS_ATTR_TRACE +#endif + +/* + * Kernel tracing support for attribute lists + */ +struct xfs_attr_list_context; +struct xfs_da_intnode; +struct xfs_da_node_entry; +struct xfs_attr_leafblock; + +#define XFS_ATTR_TRACE_SIZE 4096 /* size of global trace buffer */ + +/* + * Trace record types. + */ +#define XFS_ATTR_KTRACE_L_C 1 /* context */ +#define XFS_ATTR_KTRACE_L_CN 2 /* context, node */ +#define XFS_ATTR_KTRACE_L_CB 3 /* context, btree */ +#define XFS_ATTR_KTRACE_L_CL 4 /* context, leaf */ + +#if defined(XFS_ATTR_TRACE) + +void xfs_attr_trace_l_c(char *where, struct xfs_attr_list_context *context); +void xfs_attr_trace_l_cn(char *where, struct xfs_attr_list_context *context, + struct xfs_da_intnode *node); +void xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context, + struct xfs_da_node_entry *btree); +void xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context, + struct xfs_attr_leafblock *leaf); +void xfs_attr_trace_enter(int type, char *where, + __psunsigned_t a2, __psunsigned_t a3, + __psunsigned_t a4, __psunsigned_t a5, + __psunsigned_t a6, __psunsigned_t a7, + __psunsigned_t a8, __psunsigned_t a9, + __psunsigned_t a10, __psunsigned_t a11, + __psunsigned_t a12, __psunsigned_t a13, + __psunsigned_t a14, __psunsigned_t a15); +#else +#define xfs_attr_trace_l_c(w,c) +#define xfs_attr_trace_l_cn(w,c,n) +#define xfs_attr_trace_l_cb(w,c,b) +#define xfs_attr_trace_l_cl(w,c,l) +#endif /* XFS_ATTR_TRACE */ + +#endif /* __XFS_ATTR_SF_H__ */ diff --git a/include/xfs_bit.h b/include/xfs_bit.h new file mode 100644 index 000000000..80eccc5e5 --- /dev/null +++ b/include/xfs_bit.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_BIT_H__ +#define __XFS_BIT_H__ + +/* + * XFS bit manipulation routines. + */ + +/* + * masks with n high/low bits set, 32-bit values & 64-bit values + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MASK32HI) +__uint32_t xfs_mask32hi(int n); +#define XFS_MASK32HI(n) xfs_mask32hi(n) +#else +#define XFS_MASK32HI(n) ((__uint32_t)-1 << (32 - (n))) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MASK64HI) +__uint64_t xfs_mask64hi(int n); +#define XFS_MASK64HI(n) xfs_mask64hi(n) +#else +#define XFS_MASK64HI(n) ((__uint64_t)-1 << (64 - (n))) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MASK32LO) +__uint32_t xfs_mask32lo(int n); +#define XFS_MASK32LO(n) xfs_mask32lo(n) +#else +#define XFS_MASK32LO(n) (((__uint32_t)1 << (n)) - 1) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MASK64LO) +__uint64_t xfs_mask64lo(int n); +#define XFS_MASK64LO(n) xfs_mask64lo(n) +#else +#define XFS_MASK64LO(n) (((__uint64_t)1 << (n)) - 1) +#endif + +/* + * Index of low bit number in byte, -1 for none set, 0..7 otherwise. + */ +extern const char xfs_lowbit[256]; + +/* + * Index of high bit number in byte, -1 for none set, 0..7 otherwise. + */ +extern const char xfs_highbit[256]; + +/* + * Count of bits set in byte, 0..8. + */ +extern const char xfs_countbit[256]; + +/* + * xfs_lowbit32: get low bit set out of 32-bit argument, -1 if none set. + */ +extern int xfs_lowbit32(__uint32_t v); + +/* + * xfs_highbit32: get high bit set out of 32-bit argument, -1 if none set. + */ +extern int xfs_highbit32(__uint32_t v); + +/* + * xfs_lowbit64: get low bit set out of 64-bit argument, -1 if none set. + */ +extern int xfs_lowbit64(__uint64_t v); + +/* + * xfs_highbit64: get high bit set out of 64-bit argument, -1 if none set. + */ +extern int xfs_highbit64(__uint64_t); + +#endif /* __XFS_BIT_H__ */ diff --git a/include/xfs_bmap.h b/include/xfs_bmap.h new file mode 100644 index 000000000..fa1a9e45c --- /dev/null +++ b/include/xfs_bmap.h @@ -0,0 +1,397 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_BMAP_H__ +#define __XFS_BMAP_H__ + +struct getbmap; +struct xfs_bmbt_irec; +struct xfs_inode; +struct xfs_mount; +struct xfs_trans; + +/* + * List of extents to be free "later". + * The list is kept sorted on xbf_startblock. + */ +typedef struct xfs_bmap_free_item +{ + xfs_fsblock_t xbfi_startblock;/* starting fs block number */ + xfs_extlen_t xbfi_blockcount;/* number of blocks in extent */ + struct xfs_bmap_free_item *xbfi_next; /* link to next entry */ +} xfs_bmap_free_item_t; + +/* + * Header for free extent list. + */ +typedef struct xfs_bmap_free +{ + xfs_bmap_free_item_t *xbf_first; /* list of to-be-free extents */ + int xbf_count; /* count of items on list */ + int xbf_low; /* kludge: alloc in low mode */ +} xfs_bmap_free_t; + +#define XFS_BMAP_MAX_NMAP 4 + +/* + * Flags for xfs_bmapi + */ +#define XFS_BMAPI_WRITE 0x001 /* write operation: allocate space */ +#define XFS_BMAPI_DELAY 0x002 /* delayed write operation */ +#define XFS_BMAPI_ENTIRE 0x004 /* return entire extent, not trimmed */ +#define XFS_BMAPI_METADATA 0x008 /* mapping metadata not user data */ +#define XFS_BMAPI_EXACT 0x010 /* allocate only to spec'd bounds */ +#define XFS_BMAPI_ATTRFORK 0x020 /* use attribute fork not data */ +#define XFS_BMAPI_ASYNC 0x040 /* bunmapi xactions can be async */ +#define XFS_BMAPI_RSVBLOCKS 0x080 /* OK to alloc. reserved data blocks */ +#define XFS_BMAPI_PREALLOC 0x100 /* preallocation op: unwritten space */ +#define XFS_BMAPI_IGSTATE 0x200 /* Ignore state - */ + /* combine contig. space */ +#define XFS_BMAPI_CONTIG 0x400 /* must allocate only one extent */ +#define XFS_BMAPI_DIRECT_IO 0x800 /* Flag from cxfs client, not used + * by xfs directly. Indicates alloc + * request is for direct I/O not + * extent conversion by server */ + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAPI_AFLAG) +int xfs_bmapi_aflag(int w); +#define XFS_BMAPI_AFLAG(w) xfs_bmapi_aflag(w) +#else +#define XFS_BMAPI_AFLAG(w) ((w) == XFS_ATTR_FORK ? XFS_BMAPI_ATTRFORK : 0) +#endif + +/* + * Special values for xfs_bmbt_irec_t br_startblock field. + */ +#define DELAYSTARTBLOCK ((xfs_fsblock_t)-1LL) +#define HOLESTARTBLOCK ((xfs_fsblock_t)-2LL) + +/* + * Trace operations for bmap extent tracing + */ +#define XFS_BMAP_KTRACE_DELETE 1 +#define XFS_BMAP_KTRACE_INSERT 2 +#define XFS_BMAP_KTRACE_PRE_UP 3 +#define XFS_BMAP_KTRACE_POST_UP 4 + +#define XFS_BMAP_TRACE_SIZE 4096 /* size of global trace buffer */ +#define XFS_BMAP_KTRACE_SIZE 32 /* size of per-inode trace buffer */ + +#if defined(XFS_ALL_TRACE) +#define XFS_BMAP_TRACE +#endif + +#if !defined(DEBUG) +#undef XFS_BMAP_TRACE +#endif + + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_INIT) +void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp); +#define XFS_BMAP_INIT(flp,fbp) xfs_bmap_init(flp,fbp) +#else +#define XFS_BMAP_INIT(flp,fbp) \ + ((flp)->xbf_first = NULL, (flp)->xbf_count = 0, \ + (flp)->xbf_low = 0, *(fbp) = NULLFSBLOCK) +#endif + +/* + * Argument structure for xfs_bmap_alloc. + */ +typedef struct xfs_bmalloca { + xfs_fsblock_t firstblock; /* i/o first block allocated */ + xfs_fsblock_t rval; /* starting block of new extent */ + xfs_fileoff_t off; /* offset in file filling in */ + struct xfs_trans *tp; /* transaction pointer */ + struct xfs_inode *ip; /* incore inode pointer */ + struct xfs_bmbt_irec *prevp; /* extent before the new one */ + struct xfs_bmbt_irec *gotp; /* extent after, or delayed */ + xfs_extlen_t alen; /* i/o length asked/allocated */ + xfs_extlen_t total; /* total blocks needed for xaction */ + xfs_extlen_t minlen; /* mininum allocation size (blocks) */ + xfs_extlen_t minleft; /* amount must be left after alloc */ + int eof; /* set if allocating past last extent */ + int wasdel; /* replacing a delayed allocation */ + int userdata;/* set if is user data */ + int low; /* low on space, using seq'l ags */ + int aeof; /* allocated space at eof */ +} xfs_bmalloca_t; + +#ifdef __KERNEL__ +/* + * Convert inode from non-attributed to attributed. + * Must not be in a transaction, ip must not be locked. + */ +int /* error code */ +xfs_bmap_add_attrfork( + struct xfs_inode *ip, /* incore inode pointer */ + int rsvd); /* flag for reserved block allocation */ + +/* + * Add the extent to the list of extents to be free at transaction end. + * The list is maintained sorted (by block number). + */ +void +xfs_bmap_add_free( + xfs_fsblock_t bno, /* fs block number of extent */ + xfs_filblks_t len, /* length of extent */ + xfs_bmap_free_t *flist, /* list of extents */ + struct xfs_mount *mp); /* mount point structure */ + +/* + * Routine to clean up the free list data structure when + * an error occurs during a transaction. + */ +void +xfs_bmap_cancel( + xfs_bmap_free_t *flist); /* free list to clean up */ + +/* + * Routine to check if a specified inode is swap capable. + */ +int +xfs_bmap_check_swappable( + struct xfs_inode *ip); /* incore inode */ + +/* + * Compute and fill in the value of the maximum depth of a bmap btree + * in this filesystem. Done once, during mount. + */ +void +xfs_bmap_compute_maxlevels( + struct xfs_mount *mp, /* file system mount structure */ + int whichfork); /* data or attr fork */ + +/* + * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi + * caller. Frees all the extents that need freeing, which must be done + * last due to locking considerations. + * + * Return 1 if the given transaction was committed and a new one allocated, + * and 0 otherwise. + */ +int /* error */ +xfs_bmap_finish( + struct xfs_trans **tp, /* transaction pointer addr */ + xfs_bmap_free_t *flist, /* i/o: list extents to free */ + xfs_fsblock_t firstblock, /* controlled a.g. for allocs */ + int *committed); /* xact committed or not */ + +/* + * Returns the file-relative block number of the first unused block in the file. + * This is the lowest-address hole if the file has holes, else the first block + * past the end of file. + */ +int /* error */ +xfs_bmap_first_unused( + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_inode *ip, /* incore inode */ + xfs_extlen_t len, /* size of hole to find */ + xfs_fileoff_t *unused, /* unused block num */ + int whichfork); /* data or attr fork */ + +/* + * Returns the file-relative block number of the last block + 1 before + * last_block (input value) in the file. + * This is not based on i_size, it is based on the extent list. + * Returns 0 for local files, as they do not have an extent list. + */ +int /* error */ +xfs_bmap_last_before( + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_inode *ip, /* incore inode */ + xfs_fileoff_t *last_block, /* last block */ + int whichfork); /* data or attr fork */ + +/* + * Returns the file-relative block number of the first block past eof in + * the file. This is not based on i_size, it is based on the extent list. + * Returns 0 for local files, as they do not have an extent list. + */ +int /* error */ +xfs_bmap_last_offset( + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_inode *ip, /* incore inode */ + xfs_fileoff_t *unused, /* last block num */ + int whichfork); /* data or attr fork */ + +/* + * Returns whether the selected fork of the inode has exactly one + * block or not. For the data fork we check this matches di_size, + * implying the file's range is 0..bsize-1. + */ +int +xfs_bmap_one_block( + struct xfs_inode *ip, /* incore inode */ + int whichfork); /* data or attr fork */ + +/* + * Read in the extents to iu_extents. + * All inode fields are set up by caller, we just traverse the btree + * and copy the records in. + */ +int /* error */ +xfs_bmap_read_extents( + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_inode *ip, /* incore inode */ + int whichfork); /* data or attr fork */ + +#if defined(XFS_BMAP_TRACE) +/* + * Add bmap trace insert entries for all the contents of the extent list. + */ +void +xfs_bmap_trace_exlist( + char *fname, /* function name */ + struct xfs_inode *ip, /* incore inode pointer */ + xfs_extnum_t cnt, /* count of entries in list */ + int whichfork); /* data or attr fork */ +#else +#define xfs_bmap_trace_exlist(f,ip,c,w) +#endif + +/* + * Map file blocks to filesystem blocks. + * File range is given by the bno/len pair. + * Adds blocks to file if a write ("flags & XFS_BMAPI_WRITE" set) + * into a hole or past eof. + * Only allocates blocks from a single allocation group, + * to avoid locking problems. + * The returned value in "firstblock" from the first call in a transaction + * must be remembered and presented to subsequent calls in "firstblock". + * An upper bound for the number of blocks to be allocated is supplied to + * the first call in "total"; if no allocation group has that many free + * blocks then the call will fail (return NULLFSBLOCK in "firstblock"). + */ +int /* error */ +xfs_bmapi( + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_inode *ip, /* incore inode */ + xfs_fileoff_t bno, /* starting file offs. mapped */ + xfs_filblks_t len, /* length to map in file */ + int flags, /* XFS_BMAPI_... */ + xfs_fsblock_t *firstblock, /* first allocated block + controls a.g. for allocs */ + xfs_extlen_t total, /* total blocks needed */ + struct xfs_bmbt_irec *mval, /* output: map values */ + int *nmap, /* i/o: mval size/count */ + xfs_bmap_free_t *flist); /* i/o: list extents to free */ + +/* + * Map file blocks to filesystem blocks, simple version. + * One block only, read-only. + * For flags, only the XFS_BMAPI_ATTRFORK flag is examined. + * For the other flag values, the effect is as if XFS_BMAPI_METADATA + * was set and all the others were clear. + */ +int /* error */ +xfs_bmapi_single( + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_inode *ip, /* incore inode */ + int whichfork, /* data or attr fork */ + xfs_fsblock_t *fsb, /* output: mapped block */ + xfs_fileoff_t bno); /* starting file offs. mapped */ + +/* + * Unmap (remove) blocks from a file. + * If nexts is nonzero then the number of extents to remove is limited to + * that value. If not all extents in the block range can be removed then + * *done is set. + */ +int /* error */ +xfs_bunmapi( + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_inode *ip, /* incore inode */ + xfs_fileoff_t bno, /* starting offset to unmap */ + xfs_filblks_t len, /* length to unmap in file */ + int flags, /* XFS_BMAPI_... */ + xfs_extnum_t nexts, /* number of extents max */ + xfs_fsblock_t *firstblock, /* first allocated block + controls a.g. for allocs */ + xfs_bmap_free_t *flist, /* i/o: list extents to free */ + int *done); /* set if not done yet */ + +/* + * Fcntl interface to xfs_bmapi. + */ +int /* error code */ +xfs_getbmap( + bhv_desc_t *bdp, /* XFS behavior descriptor*/ + struct getbmap *bmv, /* user bmap structure */ + void *ap, /* pointer to user's array */ + int iflags); /* interface flags */ + +/* + * Check the last inode extent to determine whether this allocation will result + * in blocks being allocated at the end of the file. When we allocate new data + * blocks at the end of the file which do not start at the previous data block, + * we will try to align the new blocks at stripe unit boundaries. + */ +int +xfs_bmap_isaeof( + struct xfs_inode *ip, + xfs_fileoff_t off, + int whichfork, + int *aeof); + +/* + * Check if the endoff is outside the last extent. If so the caller will grow + * the allocation to a stripe unit boundary + */ +int +xfs_bmap_eof( + struct xfs_inode *ip, + xfs_fileoff_t endoff, + int whichfork, + int *eof); + +/* + * Count fsblocks of the given fork. + */ +int +xfs_bmap_count_blocks( + xfs_trans_t *tp, + xfs_inode_t *ip, + int whichfork, + int *count); + +/* + * Check an extent list, which has just been read, for + * any bit in the extent flag field. + */ +int +xfs_check_nostate_extents( + xfs_bmbt_rec_t *ep, + xfs_extnum_t num); + +#endif /* __KERNEL__ */ + +#endif /* __XFS_BMAP_H__ */ diff --git a/include/xfs_bmap_btree.h b/include/xfs_bmap_btree.h new file mode 100644 index 000000000..af8ac671b --- /dev/null +++ b/include/xfs_bmap_btree.h @@ -0,0 +1,661 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_BMAP_BTREE_H__ +#define __XFS_BMAP_BTREE_H__ + +#define XFS_BMAP_MAGIC 0x424d4150 /* 'BMAP' */ + +struct xfs_btree_cur; +struct xfs_btree_lblock; +struct xfs_mount; +struct xfs_inode; + +/* + * Bmap root header, on-disk form only. + */ +typedef struct xfs_bmdr_block +{ + __uint16_t bb_level; /* 0 is a leaf */ + __uint16_t bb_numrecs; /* current # of data records */ +} xfs_bmdr_block_t; + +/* + * Bmap btree record and extent descriptor. + * For 32-bit kernels, + * l0:31 is an extent flag (value 1 indicates non-normal). + * l0:0-30 and l1:9-31 are startoff. + * l1:0-8, l2:0-31, and l3:21-31 are startblock. + * l3:0-20 are blockcount. + * For 64-bit kernels, + * l0:63 is an extent flag (value 1 indicates non-normal). + * l0:9-62 are startoff. + * l0:0-8 and l1:21-63 are startblock. + * l1:0-20 are blockcount. + */ + +#if __BYTE_ORDER == __LITTLE_ENDIAN + +#define BMBT_TOTAL_BITLEN 128 /* 128 bits, 16 bytes */ +#define BMBT_EXNTFLAG_BITOFF 0 +#define BMBT_EXNTFLAG_BITLEN 1 +#define BMBT_STARTOFF_BITOFF (BMBT_EXNTFLAG_BITOFF + BMBT_EXNTFLAG_BITLEN) +#define BMBT_STARTOFF_BITLEN 54 +#define BMBT_STARTBLOCK_BITOFF (BMBT_STARTOFF_BITOFF + BMBT_STARTOFF_BITLEN) +#define BMBT_STARTBLOCK_BITLEN 52 +#define BMBT_BLOCKCOUNT_BITOFF \ + (BMBT_STARTBLOCK_BITOFF + BMBT_STARTBLOCK_BITLEN) +#define BMBT_BLOCKCOUNT_BITLEN (BMBT_TOTAL_BITLEN - BMBT_BLOCKCOUNT_BITOFF) + +#else + +#define BMBT_TOTAL_BITLEN 128 /* 128 bits, 16 bytes */ +#define BMBT_EXNTFLAG_BITOFF 63 +#define BMBT_EXNTFLAG_BITLEN 1 +#define BMBT_STARTOFF_BITOFF (BMBT_EXNTFLAG_BITOFF - BMBT_STARTOFF_BITLEN) +#define BMBT_STARTOFF_BITLEN 54 +#define BMBT_STARTBLOCK_BITOFF 85 /* 128 - 43 (other 9 is in first word) */ +#define BMBT_STARTBLOCK_BITLEN 52 +#define BMBT_BLOCKCOUNT_BITOFF 64 /* Start of second 64 bit container */ +#define BMBT_BLOCKCOUNT_BITLEN 21 + +#endif + + +#define BMBT_USE_64 1 + +typedef struct xfs_bmbt_rec_32 +{ + __uint32_t l0, l1, l2, l3; +} xfs_bmbt_rec_32_t; +typedef struct xfs_bmbt_rec_64 +{ + __uint64_t l0, l1; +} xfs_bmbt_rec_64_t; + +#if BMBT_USE_64 +typedef __uint64_t xfs_bmbt_rec_base_t; /* use this for casts */ +typedef xfs_bmbt_rec_64_t xfs_bmbt_rec_t, xfs_bmdr_rec_t; +#else /* !BMBT_USE_64 */ +typedef __uint32_t xfs_bmbt_rec_base_t; /* use this for casts */ +typedef xfs_bmbt_rec_32_t xfs_bmbt_rec_t, xfs_bmdr_rec_t; +#endif /* BMBT_USE_64 */ + +/* + * Values and macros for delayed-allocation startblock fields. + */ +#define STARTBLOCKVALBITS 17 +#define STARTBLOCKMASKBITS (15 + XFS_BIG_FILESYSTEMS * 20) +#define DSTARTBLOCKMASKBITS (15 + 20) +#define STARTBLOCKMASK \ + (((((xfs_fsblock_t)1) << STARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS) +#define DSTARTBLOCKMASK \ + (((((xfs_dfsbno_t)1) << DSTARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS) +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_ISNULLSTARTBLOCK) +int isnullstartblock(xfs_fsblock_t x); +#define ISNULLSTARTBLOCK(x) isnullstartblock(x) +#else +#define ISNULLSTARTBLOCK(x) (((x) & STARTBLOCKMASK) == STARTBLOCKMASK) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_ISNULLDSTARTBLOCK) +int isnulldstartblock(xfs_dfsbno_t x); +#define ISNULLDSTARTBLOCK(x) isnulldstartblock(x) +#else +#define ISNULLDSTARTBLOCK(x) (((x) & DSTARTBLOCKMASK) == DSTARTBLOCKMASK) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_NULLSTARTBLOCK) +xfs_fsblock_t nullstartblock(int k); +#define NULLSTARTBLOCK(k) nullstartblock(k) +#else +#define NULLSTARTBLOCK(k) \ + ((ASSERT(k < (1 << STARTBLOCKVALBITS))), (STARTBLOCKMASK | (k))) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_STARTBLOCKVAL) +xfs_filblks_t startblockval(xfs_fsblock_t x); +#define STARTBLOCKVAL(x) startblockval(x) +#else +#define STARTBLOCKVAL(x) ((xfs_filblks_t)((x) & ~STARTBLOCKMASK)) +#endif + +/* + * Possible extent formats. + */ +typedef enum { + XFS_EXTFMT_NOSTATE = 0, + XFS_EXTFMT_HASSTATE +} xfs_exntfmt_t; + +/* + * Possible extent states. + */ +typedef enum { + XFS_EXT_NORM, XFS_EXT_UNWRITTEN, + XFS_EXT_DMAPI_OFFLINE +} xfs_exntst_t; + +/* + * Extent state and extent format macros. + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_EXTFMT_INODE ) +xfs_exntfmt_t xfs_extfmt_inode(struct xfs_inode *ip); +#define XFS_EXTFMT_INODE(x) xfs_extfmt_inode(x) +#else +#define XFS_EXTFMT_INODE(x) \ + (XFS_SB_VERSION_HASEXTFLGBIT(&((x)->i_mount->m_sb)) ? \ + XFS_EXTFMT_HASSTATE : XFS_EXTFMT_NOSTATE) +#endif +#define ISUNWRITTEN(x) ((x) == XFS_EXT_UNWRITTEN) + +/* + * Incore version of above. + */ +typedef struct xfs_bmbt_irec +{ + xfs_fileoff_t br_startoff; /* starting file offset */ + xfs_fsblock_t br_startblock; /* starting block number */ + xfs_filblks_t br_blockcount; /* number of blocks */ + xfs_exntst_t br_state; /* extent state */ +} xfs_bmbt_irec_t; + +/* + * Key structure for non-leaf levels of the tree. + */ +typedef struct xfs_bmbt_key +{ + xfs_dfiloff_t br_startoff; /* starting file offset */ +} xfs_bmbt_key_t, xfs_bmdr_key_t; + +typedef xfs_dfsbno_t xfs_bmbt_ptr_t, xfs_bmdr_ptr_t; /* btree pointer type */ + /* btree block header type */ +typedef struct xfs_btree_lblock xfs_bmbt_block_t; + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_BMBT_BLOCK) +xfs_bmbt_block_t *xfs_buf_to_bmbt_block(struct xfs_buf *bp); +#define XFS_BUF_TO_BMBT_BLOCK(bp) xfs_buf_to_bmbt_block(bp) +#else +#define XFS_BUF_TO_BMBT_BLOCK(bp) ((xfs_bmbt_block_t *)(XFS_BUF_PTR(bp))) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_RBLOCK_DSIZE) +int xfs_bmap_rblock_dsize(int lev, struct xfs_btree_cur *cur); +#define XFS_BMAP_RBLOCK_DSIZE(lev,cur) xfs_bmap_rblock_dsize(lev,cur) +#else +#define XFS_BMAP_RBLOCK_DSIZE(lev,cur) ((cur)->bc_private.b.forksize) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_RBLOCK_ISIZE) +int xfs_bmap_rblock_isize(int lev, struct xfs_btree_cur *cur); +#define XFS_BMAP_RBLOCK_ISIZE(lev,cur) xfs_bmap_rblock_isize(lev,cur) +#else +#define XFS_BMAP_RBLOCK_ISIZE(lev,cur) \ + ((int)XFS_IFORK_PTR((cur)->bc_private.b.ip, \ + (cur)->bc_private.b.whichfork)->if_broot_bytes) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_IBLOCK_SIZE) +int xfs_bmap_iblock_size(int lev, struct xfs_btree_cur *cur); +#define XFS_BMAP_IBLOCK_SIZE(lev,cur) xfs_bmap_iblock_size(lev,cur) +#else +#define XFS_BMAP_IBLOCK_SIZE(lev,cur) (1 << (cur)->bc_blocklog) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BLOCK_DSIZE) +int xfs_bmap_block_dsize(int lev, struct xfs_btree_cur *cur); +#define XFS_BMAP_BLOCK_DSIZE(lev,cur) xfs_bmap_block_dsize(lev,cur) +#else +#define XFS_BMAP_BLOCK_DSIZE(lev,cur) \ + ((lev) == (cur)->bc_nlevels - 1 ? \ + XFS_BMAP_RBLOCK_DSIZE(lev,cur) : \ + XFS_BMAP_IBLOCK_SIZE(lev,cur)) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BLOCK_ISIZE) +int xfs_bmap_block_isize(int lev, struct xfs_btree_cur *cur); +#define XFS_BMAP_BLOCK_ISIZE(lev,cur) xfs_bmap_block_isize(lev,cur) +#else +#define XFS_BMAP_BLOCK_ISIZE(lev,cur) \ + ((lev) == (cur)->bc_nlevels - 1 ? \ + XFS_BMAP_RBLOCK_ISIZE(lev,cur) : \ + XFS_BMAP_IBLOCK_SIZE(lev,cur)) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BLOCK_DMAXRECS) +int xfs_bmap_block_dmaxrecs(int lev, struct xfs_btree_cur *cur); +#define XFS_BMAP_BLOCK_DMAXRECS(lev,cur) xfs_bmap_block_dmaxrecs(lev,cur) +#else +#define XFS_BMAP_BLOCK_DMAXRECS(lev,cur) \ + ((lev) == (cur)->bc_nlevels - 1 ? \ + XFS_BTREE_BLOCK_MAXRECS(XFS_BMAP_RBLOCK_DSIZE(lev,cur), \ + xfs_bmdr, (lev) == 0) : \ + ((cur)->bc_mp->m_bmap_dmxr[(lev) != 0])) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BLOCK_IMAXRECS) +int xfs_bmap_block_imaxrecs(int lev, struct xfs_btree_cur *cur); +#define XFS_BMAP_BLOCK_IMAXRECS(lev,cur) xfs_bmap_block_imaxrecs(lev,cur) +#else +#define XFS_BMAP_BLOCK_IMAXRECS(lev,cur) \ + ((lev) == (cur)->bc_nlevels - 1 ? \ + XFS_BTREE_BLOCK_MAXRECS(XFS_BMAP_RBLOCK_ISIZE(lev,cur), \ + xfs_bmbt, (lev) == 0) : \ + ((cur)->bc_mp->m_bmap_dmxr[(lev) != 0])) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BLOCK_DMINRECS) +int xfs_bmap_block_dminrecs(int lev, struct xfs_btree_cur *cur); +#define XFS_BMAP_BLOCK_DMINRECS(lev,cur) xfs_bmap_block_dminrecs(lev,cur) +#else +#define XFS_BMAP_BLOCK_DMINRECS(lev,cur) \ + ((lev) == (cur)->bc_nlevels - 1 ? \ + XFS_BTREE_BLOCK_MINRECS(XFS_BMAP_RBLOCK_DSIZE(lev,cur), \ + xfs_bmdr, (lev) == 0) : \ + ((cur)->bc_mp->m_bmap_dmnr[(lev) != 0])) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BLOCK_IMINRECS) +int xfs_bmap_block_iminrecs(int lev, struct xfs_btree_cur *cur); +#define XFS_BMAP_BLOCK_IMINRECS(lev,cur) xfs_bmap_block_iminrecs(lev,cur) +#else +#define XFS_BMAP_BLOCK_IMINRECS(lev,cur) \ + ((lev) == (cur)->bc_nlevels - 1 ? \ + XFS_BTREE_BLOCK_MINRECS(XFS_BMAP_RBLOCK_ISIZE(lev,cur), \ + xfs_bmbt, (lev) == 0) : \ + ((cur)->bc_mp->m_bmap_dmnr[(lev) != 0])) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_REC_DADDR) +xfs_bmbt_rec_t * +xfs_bmap_rec_daddr(xfs_bmbt_block_t *bb, int i, struct xfs_btree_cur *cur); +#define XFS_BMAP_REC_DADDR(bb,i,cur) xfs_bmap_rec_daddr(bb,i,cur) +#else +#define XFS_BMAP_REC_DADDR(bb,i,cur) \ + XFS_BTREE_REC_ADDR(XFS_BMAP_BLOCK_DSIZE( \ + INT_GET((bb)->bb_level, ARCH_CONVERT), cur), \ + xfs_bmbt, bb, i, XFS_BMAP_BLOCK_DMAXRECS( \ + INT_GET((bb)->bb_level, ARCH_CONVERT), cur)) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_REC_IADDR) +xfs_bmbt_rec_t * +xfs_bmap_rec_iaddr(xfs_bmbt_block_t *bb, int i, struct xfs_btree_cur *cur); +#define XFS_BMAP_REC_IADDR(bb,i,cur) xfs_bmap_rec_iaddr(bb,i,cur) +#else +#define XFS_BMAP_REC_IADDR(bb,i,cur) \ + XFS_BTREE_REC_ADDR(XFS_BMAP_BLOCK_ISIZE( \ + INT_GET((bb)->bb_level, ARCH_CONVERT), cur), \ + xfs_bmbt, bb, i, XFS_BMAP_BLOCK_IMAXRECS( \ + INT_GET((bb)->bb_level, ARCH_CONVERT), cur)) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_KEY_DADDR) +xfs_bmbt_key_t * +xfs_bmap_key_daddr(xfs_bmbt_block_t *bb, int i, struct xfs_btree_cur *cur); +#define XFS_BMAP_KEY_DADDR(bb,i,cur) xfs_bmap_key_daddr(bb,i,cur) +#else +#define XFS_BMAP_KEY_DADDR(bb,i,cur) \ + XFS_BTREE_KEY_ADDR(XFS_BMAP_BLOCK_DSIZE( \ + INT_GET((bb)->bb_level, ARCH_CONVERT), cur), \ + xfs_bmbt, bb, i, XFS_BMAP_BLOCK_DMAXRECS( \ + INT_GET((bb)->bb_level, ARCH_CONVERT), cur)) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_KEY_IADDR) +xfs_bmbt_key_t * +xfs_bmap_key_iaddr(xfs_bmbt_block_t *bb, int i, struct xfs_btree_cur *cur); +#define XFS_BMAP_KEY_IADDR(bb,i,cur) xfs_bmap_key_iaddr(bb,i,cur) +#else +#define XFS_BMAP_KEY_IADDR(bb,i,cur) \ + XFS_BTREE_KEY_ADDR(XFS_BMAP_BLOCK_ISIZE( \ + INT_GET((bb)->bb_level, ARCH_CONVERT), cur), \ + xfs_bmbt, bb, i, XFS_BMAP_BLOCK_IMAXRECS( \ + INT_GET((bb)->bb_level, ARCH_CONVERT), cur)) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_PTR_DADDR) +xfs_bmbt_ptr_t * +xfs_bmap_ptr_daddr(xfs_bmbt_block_t *bb, int i, struct xfs_btree_cur *cur); +#define XFS_BMAP_PTR_DADDR(bb,i,cur) xfs_bmap_ptr_daddr(bb,i,cur) +#else +#define XFS_BMAP_PTR_DADDR(bb,i,cur) \ + XFS_BTREE_PTR_ADDR(XFS_BMAP_BLOCK_DSIZE( \ + INT_GET((bb)->bb_level, ARCH_CONVERT), cur), \ + xfs_bmbt, bb, i, XFS_BMAP_BLOCK_DMAXRECS( \ + INT_GET((bb)->bb_level, ARCH_CONVERT), cur)) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_PTR_IADDR) +xfs_bmbt_ptr_t * +xfs_bmap_ptr_iaddr(xfs_bmbt_block_t *bb, int i, struct xfs_btree_cur *cur); +#define XFS_BMAP_PTR_IADDR(bb,i,cur) xfs_bmap_ptr_iaddr(bb,i,cur) +#else +#define XFS_BMAP_PTR_IADDR(bb,i,cur) \ + XFS_BTREE_PTR_ADDR(XFS_BMAP_BLOCK_ISIZE( \ + INT_GET((bb)->bb_level, ARCH_CONVERT), cur), \ + xfs_bmbt, bb, i, XFS_BMAP_BLOCK_IMAXRECS( \ + INT_GET((bb)->bb_level, ARCH_CONVERT), cur)) +#endif + +/* + * These are to be used when we know the size of the block and + * we don't have a cursor. + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BROOT_REC_ADDR) +xfs_bmbt_rec_t *xfs_bmap_broot_rec_addr(xfs_bmbt_block_t *bb, int i, int sz); +#define XFS_BMAP_BROOT_REC_ADDR(bb,i,sz) xfs_bmap_broot_rec_addr(bb,i,sz) +#else +#define XFS_BMAP_BROOT_REC_ADDR(bb,i,sz) \ + XFS_BTREE_REC_ADDR(sz,xfs_bmbt,bb,i,XFS_BMAP_BROOT_MAXRECS(sz)) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BROOT_KEY_ADDR) +xfs_bmbt_key_t *xfs_bmap_broot_key_addr(xfs_bmbt_block_t *bb, int i, int sz); +#define XFS_BMAP_BROOT_KEY_ADDR(bb,i,sz) xfs_bmap_broot_key_addr(bb,i,sz) +#else +#define XFS_BMAP_BROOT_KEY_ADDR(bb,i,sz) \ + XFS_BTREE_KEY_ADDR(sz,xfs_bmbt,bb,i,XFS_BMAP_BROOT_MAXRECS(sz)) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BROOT_PTR_ADDR) +xfs_bmbt_ptr_t *xfs_bmap_broot_ptr_addr(xfs_bmbt_block_t *bb, int i, int sz); +#define XFS_BMAP_BROOT_PTR_ADDR(bb,i,sz) xfs_bmap_broot_ptr_addr(bb,i,sz) +#else +#define XFS_BMAP_BROOT_PTR_ADDR(bb,i,sz) \ + XFS_BTREE_PTR_ADDR(sz,xfs_bmbt,bb,i,XFS_BMAP_BROOT_MAXRECS(sz)) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BROOT_NUMRECS) +int xfs_bmap_broot_numrecs(xfs_bmdr_block_t *bb); +#define XFS_BMAP_BROOT_NUMRECS(bb) xfs_bmap_broot_numrecs(bb) +#else +#define XFS_BMAP_BROOT_NUMRECS(bb) (INT_GET((bb)->bb_numrecs, ARCH_CONVERT)) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BROOT_MAXRECS) +int xfs_bmap_broot_maxrecs(int sz); +#define XFS_BMAP_BROOT_MAXRECS(sz) xfs_bmap_broot_maxrecs(sz) +#else +#define XFS_BMAP_BROOT_MAXRECS(sz) XFS_BTREE_BLOCK_MAXRECS(sz,xfs_bmbt,0) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BROOT_SPACE_CALC) +int xfs_bmap_broot_space_calc(int nrecs); +#define XFS_BMAP_BROOT_SPACE_CALC(nrecs) xfs_bmap_broot_space_calc(nrecs) +#else +#define XFS_BMAP_BROOT_SPACE_CALC(nrecs) \ + ((int)(sizeof(xfs_bmbt_block_t) + \ + ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t))))) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_BROOT_SPACE) +int xfs_bmap_broot_space(xfs_bmdr_block_t *bb); +#define XFS_BMAP_BROOT_SPACE(bb) xfs_bmap_broot_space(bb) +#else +#define XFS_BMAP_BROOT_SPACE(bb) \ + XFS_BMAP_BROOT_SPACE_CALC(INT_GET((bb)->bb_numrecs, ARCH_CONVERT)) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMDR_SPACE_CALC) +int xfs_bmdr_space_calc(int nrecs); +#define XFS_BMDR_SPACE_CALC(nrecs) xfs_bmdr_space_calc(nrecs) +#else +#define XFS_BMDR_SPACE_CALC(nrecs) \ + ((int)(sizeof(xfs_bmdr_block_t) + \ + ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t))))) +#endif + +/* + * Maximum number of bmap btree levels. + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BM_MAXLEVELS) +int xfs_bm_maxlevels(struct xfs_mount *mp, int w); +#define XFS_BM_MAXLEVELS(mp,w) xfs_bm_maxlevels(mp,w) +#else +#define XFS_BM_MAXLEVELS(mp,w) ((mp)->m_bm_maxlevels[w]) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BMAP_SANITY_CHECK) +int xfs_bmap_sanity_check(struct xfs_mount *mp, xfs_bmbt_block_t *bb, + int level); +#define XFS_BMAP_SANITY_CHECK(mp,bb,level) \ + xfs_bmap_sanity_check(mp,bb,level) +#else +#define XFS_BMAP_SANITY_CHECK(mp,bb,level) \ + (INT_GET((bb)->bb_magic, ARCH_CONVERT) == XFS_BMAP_MAGIC && \ + INT_GET((bb)->bb_level, ARCH_CONVERT) == level && \ + INT_GET((bb)->bb_numrecs, ARCH_CONVERT) > 0 && \ + INT_GET((bb)->bb_numrecs, ARCH_CONVERT) <= (mp)->m_bmap_dmxr[(level) != 0]) +#endif + +/* + * Trace buffer entry types. + */ +#define XFS_BMBT_KTRACE_ARGBI 1 +#define XFS_BMBT_KTRACE_ARGBII 2 +#define XFS_BMBT_KTRACE_ARGFFFI 3 +#define XFS_BMBT_KTRACE_ARGI 4 +#define XFS_BMBT_KTRACE_ARGIFK 5 +#define XFS_BMBT_KTRACE_ARGIFR 6 +#define XFS_BMBT_KTRACE_ARGIK 7 +#define XFS_BMBT_KTRACE_CUR 8 + +#define XFS_BMBT_TRACE_SIZE 4096 /* size of global trace buffer */ +#define XFS_BMBT_KTRACE_SIZE 32 /* size of per-inode trace buffer */ + +#if defined(XFS_ALL_TRACE) +#define XFS_BMBT_TRACE +#endif + +#if !defined(DEBUG) +#undef XFS_BMBT_TRACE +#endif + + +/* + * Prototypes for xfs_bmap.c to call. + */ + +void +xfs_bmdr_to_bmbt( + xfs_bmdr_block_t *, + int, + xfs_bmbt_block_t *, + int); + +int +xfs_bmbt_decrement( + struct xfs_btree_cur *, + int, + int *); + +int +xfs_bmbt_delete( + struct xfs_btree_cur *, + int, + int *); + +void +xfs_bmbt_get_all( + xfs_bmbt_rec_t *r, + xfs_bmbt_irec_t *s); + +xfs_bmbt_block_t * +xfs_bmbt_get_block( + struct xfs_btree_cur *cur, + int level, + struct xfs_buf **bpp); + +xfs_filblks_t +xfs_bmbt_get_blockcount( + xfs_bmbt_rec_t *r); + +xfs_fsblock_t +xfs_bmbt_get_startblock( + xfs_bmbt_rec_t *r); + +xfs_fileoff_t +xfs_bmbt_get_startoff( + xfs_bmbt_rec_t *r); + +xfs_exntst_t +xfs_bmbt_get_state( + xfs_bmbt_rec_t *r); + +int +xfs_bmbt_increment( + struct xfs_btree_cur *, + int, + int *); + +int +xfs_bmbt_insert( + struct xfs_btree_cur *, + int *); + +int +xfs_bmbt_insert_many( + struct xfs_btree_cur *, + int, + xfs_bmbt_rec_t *, + int *); + +void +xfs_bmbt_log_block( + struct xfs_btree_cur *, + struct xfs_buf *, + int); + +void +xfs_bmbt_log_recs( + struct xfs_btree_cur *, + struct xfs_buf *, + int, + int); + +int +xfs_bmbt_lookup_eq( + struct xfs_btree_cur *, + xfs_fileoff_t, + xfs_fsblock_t, + xfs_filblks_t, + int *); + +int +xfs_bmbt_lookup_ge( + struct xfs_btree_cur *, + xfs_fileoff_t, + xfs_fsblock_t, + xfs_filblks_t, + int *); + +int +xfs_bmbt_lookup_le( + struct xfs_btree_cur *, + xfs_fileoff_t, + xfs_fsblock_t, + xfs_filblks_t, + int *); + +/* + * Give the bmap btree a new root block. Copy the old broot contents + * down into a real block and make the broot point to it. + */ +int /* error */ +xfs_bmbt_newroot( + struct xfs_btree_cur *cur, /* btree cursor */ + int *logflags, /* logging flags for inode */ + int *stat); /* return status - 0 fail */ + +void +xfs_bmbt_set_all( + xfs_bmbt_rec_t *r, + xfs_bmbt_irec_t *s); + +void +xfs_bmbt_set_allf( + xfs_bmbt_rec_t *r, + xfs_fileoff_t o, + xfs_fsblock_t b, + xfs_filblks_t c, + xfs_exntst_t v); + +void +xfs_bmbt_set_blockcount( + xfs_bmbt_rec_t *r, + xfs_filblks_t v); + +void +xfs_bmbt_set_startblock( + xfs_bmbt_rec_t *r, + xfs_fsblock_t v); + +void +xfs_bmbt_set_startoff( + xfs_bmbt_rec_t *r, + xfs_fileoff_t v); + +void +xfs_bmbt_set_state( + xfs_bmbt_rec_t *r, + xfs_exntst_t v); + +void +xfs_bmbt_to_bmdr( + xfs_bmbt_block_t *, + int, + xfs_bmdr_block_t *, + int); + +int +xfs_bmbt_update( + struct xfs_btree_cur *, + xfs_fileoff_t, + xfs_fsblock_t, + xfs_filblks_t, + xfs_exntst_t); + +#ifdef XFSDEBUG +/* + * Get the data from the pointed-to record. + */ +int +xfs_bmbt_get_rec( + struct xfs_btree_cur *, + xfs_fileoff_t *, + xfs_fsblock_t *, + xfs_filblks_t *, + xfs_exntst_t *, + int *); +#endif + + +/* + * Search an extent list for the extent which includes block + * bno. + */ +xfs_bmbt_rec_t * +xfs_bmap_do_search_extents( + xfs_bmbt_rec_t *, + xfs_extnum_t, + xfs_extnum_t, + xfs_fileoff_t, + int *, + xfs_extnum_t *, + xfs_bmbt_irec_t *, + xfs_bmbt_irec_t *); + + +#endif /* __XFS_BMAP_BTREE_H__ */ diff --git a/include/xfs_btree.h b/include/xfs_btree.h new file mode 100644 index 000000000..6f00a8c68 --- /dev/null +++ b/include/xfs_btree.h @@ -0,0 +1,573 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_BTREE_H__ +#define __XFS_BTREE_H__ + +struct xfs_buf; +struct xfs_bmap_free; +struct xfs_inode; +struct xfs_mount; +struct xfs_trans; + +/* + * This nonsense is to make -wlint happy. + */ +#define XFS_LOOKUP_EQ ((xfs_lookup_t)XFS_LOOKUP_EQi) +#define XFS_LOOKUP_LE ((xfs_lookup_t)XFS_LOOKUP_LEi) +#define XFS_LOOKUP_GE ((xfs_lookup_t)XFS_LOOKUP_GEi) + +#define XFS_BTNUM_BNO ((xfs_btnum_t)XFS_BTNUM_BNOi) +#define XFS_BTNUM_CNT ((xfs_btnum_t)XFS_BTNUM_CNTi) +#define XFS_BTNUM_BMAP ((xfs_btnum_t)XFS_BTNUM_BMAPi) +#define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi) + +/* + * Short form header: space allocation btrees. + */ +typedef struct xfs_btree_sblock +{ + __uint32_t bb_magic; /* magic number for block type */ + __uint16_t bb_level; /* 0 is a leaf */ + __uint16_t bb_numrecs; /* current # of data records */ + xfs_agblock_t bb_leftsib; /* left sibling block or NULLAGBLOCK */ + xfs_agblock_t bb_rightsib; /* right sibling block or NULLAGBLOCK */ +} xfs_btree_sblock_t; + +/* + * Long form header: bmap btrees. + */ +typedef struct xfs_btree_lblock +{ + __uint32_t bb_magic; /* magic number for block type */ + __uint16_t bb_level; /* 0 is a leaf */ + __uint16_t bb_numrecs; /* current # of data records */ + xfs_dfsbno_t bb_leftsib; /* left sibling block or NULLDFSBNO */ + xfs_dfsbno_t bb_rightsib; /* right sibling block or NULLDFSBNO */ +} xfs_btree_lblock_t; + +/* + * Combined header and structure, used by common code. + */ +typedef struct xfs_btree_hdr +{ + __uint32_t bb_magic; /* magic number for block type */ + __uint16_t bb_level; /* 0 is a leaf */ + __uint16_t bb_numrecs; /* current # of data records */ +} xfs_btree_hdr_t; + +typedef struct xfs_btree_block +{ + xfs_btree_hdr_t bb_h; /* header */ + union { + struct { + xfs_agblock_t bb_leftsib; + xfs_agblock_t bb_rightsib; + } s; /* short form pointers */ + struct { + xfs_dfsbno_t bb_leftsib; + xfs_dfsbno_t bb_rightsib; + } l; /* long form pointers */ + } bb_u; /* rest */ +} xfs_btree_block_t; + +/* + * For logging record fields. + */ +#define XFS_BB_MAGIC 0x01 +#define XFS_BB_LEVEL 0x02 +#define XFS_BB_NUMRECS 0x04 +#define XFS_BB_LEFTSIB 0x08 +#define XFS_BB_RIGHTSIB 0x10 +#define XFS_BB_NUM_BITS 5 +#define XFS_BB_ALL_BITS ((1 << XFS_BB_NUM_BITS) - 1) + +/* + * Boolean to select which form of xfs_btree_block_t.bb_u to use. + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BTREE_LONG_PTRS) +int xfs_btree_long_ptrs(xfs_btnum_t btnum); +#define XFS_BTREE_LONG_PTRS(btnum) ((btnum) == XFS_BTNUM_BMAP) +#else +#define XFS_BTREE_LONG_PTRS(btnum) ((btnum) == XFS_BTNUM_BMAP) +#endif + +/* + * Magic numbers for btree blocks. + */ +extern const __uint32_t xfs_magics[]; + +/* + * Maximum and minimum records in a btree block. + * Given block size, type prefix, and leaf flag (0 or 1). + * The divisor below is equivalent to lf ? (e1) : (e2) but that produces + * compiler warnings. + */ +#define XFS_BTREE_BLOCK_MAXRECS(bsz,t,lf) \ + ((int)(((bsz) - (uint)sizeof(t ## _block_t)) / \ + (((lf) * (uint)sizeof(t ## _rec_t)) + \ + ((1 - (lf)) * \ + ((uint)sizeof(t ## _key_t) + (uint)sizeof(t ## _ptr_t)))))) +#define XFS_BTREE_BLOCK_MINRECS(bsz,t,lf) \ + (XFS_BTREE_BLOCK_MAXRECS(bsz,t,lf) / 2) + +/* + * Record, key, and pointer address calculation macros. + * Given block size, type prefix, block pointer, and index of requested entry + * (first entry numbered 1). + */ +#define XFS_BTREE_REC_ADDR(bsz,t,bb,i,mxr) \ + ((t ## _rec_t *)((char *)(bb) + sizeof(t ## _block_t) + \ + ((i) - 1) * sizeof(t ## _rec_t))) +#define XFS_BTREE_KEY_ADDR(bsz,t,bb,i,mxr) \ + ((t ## _key_t *)((char *)(bb) + sizeof(t ## _block_t) + \ + ((i) - 1) * sizeof(t ## _key_t))) +#define XFS_BTREE_PTR_ADDR(bsz,t,bb,i,mxr) \ + ((t ## _ptr_t *)((char *)(bb) + sizeof(t ## _block_t) + \ + (mxr) * sizeof(t ## _key_t) + ((i) - 1) * sizeof(t ## _ptr_t))) + +#define XFS_BTREE_MAXLEVELS 8 /* max of all btrees */ + +/* + * Btree cursor structure. + * This collects all information needed by the btree code in one place. + */ +typedef struct xfs_btree_cur +{ + struct xfs_trans *bc_tp; /* transaction we're in, if any */ + struct xfs_mount *bc_mp; /* file system mount struct */ + union { + xfs_alloc_rec_t a; + xfs_bmbt_irec_t b; + xfs_inobt_rec_t i; + } bc_rec; /* current insert/search record value */ + struct xfs_buf *bc_bufs[XFS_BTREE_MAXLEVELS]; /* buf ptr per level */ + int bc_ptrs[XFS_BTREE_MAXLEVELS]; /* key/record # */ + __uint8_t bc_ra[XFS_BTREE_MAXLEVELS]; /* readahead bits */ +#define XFS_BTCUR_LEFTRA 1 /* left sibling has been read-ahead */ +#define XFS_BTCUR_RIGHTRA 2 /* right sibling has been read-ahead */ + __uint8_t bc_nlevels; /* number of levels in the tree */ + __uint8_t bc_blocklog; /* log2(blocksize) of btree blocks */ + xfs_btnum_t bc_btnum; /* identifies which btree type */ + union { + struct { /* needed for BNO, CNT */ + struct xfs_buf *agbp; /* agf buffer pointer */ + xfs_agnumber_t agno; /* ag number */ + } a; + struct { /* needed for BMAP */ + struct xfs_inode *ip; /* pointer to our inode */ + struct xfs_bmap_free *flist; /* list to free after */ + xfs_fsblock_t firstblock; /* 1st blk allocated */ + int allocated; /* count of alloced */ + short forksize; /* fork's inode space */ + char whichfork; /* data or attr fork */ + char flags; /* flags */ +#define XFS_BTCUR_BPRV_WASDEL 1 /* was delayed */ + } b; + struct { /* needed for INO */ + struct xfs_buf *agbp; /* agi buffer pointer */ + xfs_agnumber_t agno; /* ag number */ + } i; + } bc_private; /* per-btree type data */ +} xfs_btree_cur_t; + +#define XFS_BTREE_NOERROR 0 +#define XFS_BTREE_ERROR 1 + +/* + * Convert from buffer to btree block header. + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_BLOCK) +xfs_btree_block_t *xfs_buf_to_block(struct xfs_buf *bp); +#define XFS_BUF_TO_BLOCK(bp) xfs_buf_to_block(bp) +#else +#define XFS_BUF_TO_BLOCK(bp) ((xfs_btree_block_t *)(XFS_BUF_PTR(bp))) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_LBLOCK) +xfs_btree_lblock_t *xfs_buf_to_lblock(struct xfs_buf *bp); +#define XFS_BUF_TO_LBLOCK(bp) xfs_buf_to_lblock(bp) +#else +#define XFS_BUF_TO_LBLOCK(bp) ((xfs_btree_lblock_t *)(XFS_BUF_PTR(bp))) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_SBLOCK) +xfs_btree_sblock_t *xfs_buf_to_sblock(struct xfs_buf *bp); +#define XFS_BUF_TO_SBLOCK(bp) xfs_buf_to_sblock(bp) +#else +#define XFS_BUF_TO_SBLOCK(bp) ((xfs_btree_sblock_t *)(XFS_BUF_PTR(bp))) +#endif + +#ifdef __KERNEL__ + +#ifdef DEBUG +/* + * Debug routine: check that block header is ok. + */ +void +xfs_btree_check_block( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_btree_block_t *block, /* generic btree block pointer */ + int level, /* level of the btree block */ + struct xfs_buf *bp); /* buffer containing block, if any */ + +/* + * Debug routine: check that keys are in the right order. + */ +void +xfs_btree_check_key( + xfs_btnum_t btnum, /* btree identifier */ + void *ak1, /* pointer to left (lower) key */ + void *ak2); /* pointer to right (higher) key */ + +/* + * Debug routine: check that records are in the right order. + */ +void +xfs_btree_check_rec( + xfs_btnum_t btnum, /* btree identifier */ + void *ar1, /* pointer to left (lower) record */ + void *ar2); /* pointer to right (higher) record */ +#else +#define xfs_btree_check_block(a,b,c,d) +#define xfs_btree_check_key(a,b,c) +#define xfs_btree_check_rec(a,b,c) +#endif /* DEBUG */ + +/* + * Checking routine: check that long form block header is ok. + */ +int /* error (0 or EFSCORRUPTED) */ +xfs_btree_check_lblock( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_btree_lblock_t *block, /* btree long form block pointer */ + int level, /* level of the btree block */ + struct xfs_buf *bp); /* buffer containing block, if any */ + +/* + * Checking routine: check that (long) pointer is ok. + */ +int /* error (0 or EFSCORRUPTED) */ +xfs_btree_check_lptr( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_dfsbno_t ptr, /* btree block disk address */ + int level); /* btree block level */ + +/* + * Checking routine: check that short form block header is ok. + */ +int /* error (0 or EFSCORRUPTED) */ +xfs_btree_check_sblock( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_btree_sblock_t *block, /* btree short form block pointer */ + int level, /* level of the btree block */ + struct xfs_buf *bp); /* buffer containing block */ + +/* + * Checking routine: check that (short) pointer is ok. + */ +int /* error (0 or EFSCORRUPTED) */ +xfs_btree_check_sptr( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_agblock_t ptr, /* btree block disk address */ + int level); /* btree block level */ + +/* + * Delete the btree cursor. + */ +void +xfs_btree_del_cursor( + xfs_btree_cur_t *cur, /* btree cursor */ + int error); /* del because of error */ + +/* + * Duplicate the btree cursor. + * Allocate a new one, copy the record, re-get the buffers. + */ +int /* error */ +xfs_btree_dup_cursor( + xfs_btree_cur_t *cur, /* input cursor */ + xfs_btree_cur_t **ncur);/* output cursor */ + +/* + * Change the cursor to point to the first record in the current block + * at the given level. Other levels are unaffected. + */ +int /* success=1, failure=0 */ +xfs_btree_firstrec( + xfs_btree_cur_t *cur, /* btree cursor */ + int level); /* level to change */ + +/* + * Retrieve the block pointer from the cursor at the given level. + * This may be a bmap btree root or from a buffer. + */ +xfs_btree_block_t * /* generic btree block pointer */ +xfs_btree_get_block( + xfs_btree_cur_t *cur, /* btree cursor */ + int level, /* level in btree */ + struct xfs_buf **bpp); /* buffer containing the block */ + +/* + * Get a buffer for the block, return it with no data read. + * Long-form addressing. + */ +struct xfs_buf * /* buffer for fsbno */ +xfs_btree_get_bufl( + struct xfs_mount *mp, /* file system mount point */ + struct xfs_trans *tp, /* transaction pointer */ + xfs_fsblock_t fsbno, /* file system block number */ + uint lock); /* lock flags for get_buf */ + +/* + * Get a buffer for the block, return it with no data read. + * Short-form addressing. + */ +struct xfs_buf * /* buffer for agno/agbno */ +xfs_btree_get_bufs( + struct xfs_mount *mp, /* file system mount point */ + struct xfs_trans *tp, /* transaction pointer */ + xfs_agnumber_t agno, /* allocation group number */ + xfs_agblock_t agbno, /* allocation group block number */ + uint lock); /* lock flags for get_buf */ + +/* + * Allocate a new btree cursor. + * The cursor is either for allocation (A) or bmap (B). + */ +xfs_btree_cur_t * /* new btree cursor */ +xfs_btree_init_cursor( + struct xfs_mount *mp, /* file system mount point */ + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_buf *agbp, /* (A only) buffer for agf structure */ + xfs_agnumber_t agno, /* (A only) allocation group number */ + xfs_btnum_t btnum, /* btree identifier */ + struct xfs_inode *ip, /* (B only) inode owning the btree */ + int whichfork); /* (B only) data/attr fork */ + +/* + * Check for the cursor referring to the last block at the given level. + */ +int /* 1=is last block, 0=not last block */ +xfs_btree_islastblock( + xfs_btree_cur_t *cur, /* btree cursor */ + int level); /* level to check */ + +/* + * Change the cursor to point to the last record in the current block + * at the given level. Other levels are unaffected. + */ +int /* success=1, failure=0 */ +xfs_btree_lastrec( + xfs_btree_cur_t *cur, /* btree cursor */ + int level); /* level to change */ + +/* + * Compute first and last byte offsets for the fields given. + * Interprets the offsets table, which contains struct field offsets. + */ +void +xfs_btree_offsets( + __int64_t fields, /* bitmask of fields */ + const short *offsets,/* table of field offsets */ + int nbits, /* number of bits to inspect */ + int *first, /* output: first byte offset */ + int *last); /* output: last byte offset */ + +/* + * Get a buffer for the block, return it read in. + * Long-form addressing. + */ +int /* error */ +xfs_btree_read_bufl( + struct xfs_mount *mp, /* file system mount point */ + struct xfs_trans *tp, /* transaction pointer */ + xfs_fsblock_t fsbno, /* file system block number */ + uint lock, /* lock flags for read_buf */ + struct xfs_buf **bpp, /* buffer for fsbno */ + int refval);/* ref count value for buffer */ + +/* + * Get a buffer for the block, return it read in. + * Short-form addressing. + */ +int /* error */ +xfs_btree_read_bufs( + struct xfs_mount *mp, /* file system mount point */ + struct xfs_trans *tp, /* transaction pointer */ + xfs_agnumber_t agno, /* allocation group number */ + xfs_agblock_t agbno, /* allocation group block number */ + uint lock, /* lock flags for read_buf */ + struct xfs_buf **bpp, /* buffer for agno/agbno */ + int refval);/* ref count value for buffer */ + +/* + * Read-ahead the block, don't wait for it, don't return a buffer. + * Long-form addressing. + */ +void /* error */ +xfs_btree_reada_bufl( + struct xfs_mount *mp, /* file system mount point */ + xfs_fsblock_t fsbno, /* file system block number */ + xfs_extlen_t count); /* count of filesystem blocks */ + +/* + * Read-ahead the block, don't wait for it, don't return a buffer. + * Short-form addressing. + */ +void /* error */ +xfs_btree_reada_bufs( + struct xfs_mount *mp, /* file system mount point */ + xfs_agnumber_t agno, /* allocation group number */ + xfs_agblock_t agbno, /* allocation group block number */ + xfs_extlen_t count); /* count of filesystem blocks */ + +/* + * Read-ahead btree blocks, at the given level. + * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA. + */ +int /* readahead block count */ +xfs_btree_readahead( + xfs_btree_cur_t *cur, /* btree cursor */ + int lev, /* level in btree */ + int lr); /* left/right bits */ +/* + * Set the buffer for level "lev" in the cursor to bp, releasing + * any previous buffer. + */ +void +xfs_btree_setbuf( + xfs_btree_cur_t *cur, /* btree cursor */ + int lev, /* level in btree */ + struct xfs_buf *bp); /* new buffer to set */ + +#endif /* __KERNEL__ */ + + +/* + * Min and max functions for extlen, agblock, fileoff, and filblks types. + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_EXTLEN_MIN) +xfs_extlen_t xfs_extlen_min(xfs_extlen_t a, xfs_extlen_t b); +#define XFS_EXTLEN_MIN(a,b) xfs_extlen_min(a,b) +#else +#define XFS_EXTLEN_MIN(a,b) \ + ((xfs_extlen_t)(a) < (xfs_extlen_t)(b) ? \ + (xfs_extlen_t)(a) : (xfs_extlen_t)(b)) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_EXTLEN_MAX) +xfs_extlen_t xfs_extlen_max(xfs_extlen_t a, xfs_extlen_t b); +#define XFS_EXTLEN_MAX(a,b) xfs_extlen_max(a,b) +#else +#define XFS_EXTLEN_MAX(a,b) \ + ((xfs_extlen_t)(a) > (xfs_extlen_t)(b) ? \ + (xfs_extlen_t)(a) : (xfs_extlen_t)(b)) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGBLOCK_MIN) +xfs_agblock_t xfs_agblock_min(xfs_agblock_t a, xfs_agblock_t b); +#define XFS_AGBLOCK_MIN(a,b) xfs_agblock_min(a,b) +#else +#define XFS_AGBLOCK_MIN(a,b) \ + ((xfs_agblock_t)(a) < (xfs_agblock_t)(b) ? \ + (xfs_agblock_t)(a) : (xfs_agblock_t)(b)) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGBLOCK_MAX) +xfs_agblock_t xfs_agblock_max(xfs_agblock_t a, xfs_agblock_t b); +#define XFS_AGBLOCK_MAX(a,b) xfs_agblock_max(a,b) +#else +#define XFS_AGBLOCK_MAX(a,b) \ + ((xfs_agblock_t)(a) > (xfs_agblock_t)(b) ? \ + (xfs_agblock_t)(a) : (xfs_agblock_t)(b)) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_FILEOFF_MIN) +xfs_fileoff_t xfs_fileoff_min(xfs_fileoff_t a, xfs_fileoff_t b); +#define XFS_FILEOFF_MIN(a,b) xfs_fileoff_min(a,b) +#else +#define XFS_FILEOFF_MIN(a,b) \ + ((xfs_fileoff_t)(a) < (xfs_fileoff_t)(b) ? \ + (xfs_fileoff_t)(a) : (xfs_fileoff_t)(b)) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_FILEOFF_MAX) +xfs_fileoff_t xfs_fileoff_max(xfs_fileoff_t a, xfs_fileoff_t b); +#define XFS_FILEOFF_MAX(a,b) xfs_fileoff_max(a,b) +#else +#define XFS_FILEOFF_MAX(a,b) \ + ((xfs_fileoff_t)(a) > (xfs_fileoff_t)(b) ? \ + (xfs_fileoff_t)(a) : (xfs_fileoff_t)(b)) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_FILBLKS_MIN) +xfs_filblks_t xfs_filblks_min(xfs_filblks_t a, xfs_filblks_t b); +#define XFS_FILBLKS_MIN(a,b) xfs_filblks_min(a,b) +#else +#define XFS_FILBLKS_MIN(a,b) \ + ((xfs_filblks_t)(a) < (xfs_filblks_t)(b) ? \ + (xfs_filblks_t)(a) : (xfs_filblks_t)(b)) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_FILBLKS_MAX) +xfs_filblks_t xfs_filblks_max(xfs_filblks_t a, xfs_filblks_t b); +#define XFS_FILBLKS_MAX(a,b) xfs_filblks_max(a,b) +#else +#define XFS_FILBLKS_MAX(a,b) \ + ((xfs_filblks_t)(a) > (xfs_filblks_t)(b) ? \ + (xfs_filblks_t)(a) : (xfs_filblks_t)(b)) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_FSB_SANITY_CHECK) +int xfs_fsb_sanity_check(struct xfs_mount *mp, xfs_fsblock_t fsb); +#define XFS_FSB_SANITY_CHECK(mp,fsb) xfs_fsb_sanity_check(mp,fsb) +#else +#define XFS_FSB_SANITY_CHECK(mp,fsb) \ + (XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \ + XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks) +#endif + +/* + * Macros to set EFSCORRUPTED & return/branch. + */ +#define XFS_WANT_CORRUPTED_GOTO(x,l) \ + { \ + int fs_is_ok = (x); \ + ASSERT(fs_is_ok); \ + if (!fs_is_ok) { \ + error = XFS_ERROR(EFSCORRUPTED); \ + goto l; \ + } \ + } + +#define XFS_WANT_CORRUPTED_RETURN(x) \ + { \ + int fs_is_ok = (x); \ + ASSERT(fs_is_ok); \ + if (!fs_is_ok) \ + return XFS_ERROR(EFSCORRUPTED); \ + } + +#endif /* __XFS_BTREE_H__ */ diff --git a/include/xfs_buf_item.h b/include/xfs_buf_item.h new file mode 100644 index 000000000..5d097f8e3 --- /dev/null +++ b/include/xfs_buf_item.h @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_BUF_ITEM_H__ +#define __XFS_BUF_ITEM_H__ + +/* + * This is the structure used to lay out a buf log item in the + * log. The data map describes which 128 byte chunks of the buffer + * have been logged. This structure works only on buffers that + * reside up to the first TB in the filesystem. These buffers are + * generated only by pre-6.2 systems and are known as XFS_LI_6_1_BUF. + */ +typedef struct xfs_buf_log_format_v1 { + unsigned short blf_type; /* buf log item type indicator */ + unsigned short blf_size; /* size of this item */ + __int32_t blf_blkno; /* starting blkno of this buf */ + ushort blf_flags; /* misc state */ + ushort blf_len; /* number of blocks in this buf */ + unsigned int blf_map_size; /* size of data bitmap in words */ + unsigned int blf_data_map[1];/* variable size bitmap of */ + /* regions of buffer in this item */ +} xfs_buf_log_format_v1_t; + +/* + * This is a form of the above structure with a 64 bit blkno field. + * For 6.2 and beyond, this is XFS_LI_BUF. We use this to log everything. + */ +typedef struct xfs_buf_log_format_t { + unsigned short blf_type; /* buf log item type indicator */ + unsigned short blf_size; /* size of this item */ + ushort blf_flags; /* misc state */ + ushort blf_len; /* number of blocks in this buf */ + __int64_t blf_blkno; /* starting blkno of this buf */ + unsigned int blf_map_size; /* size of data bitmap in words */ + unsigned int blf_data_map[1];/* variable size bitmap of */ + /* regions of buffer in this item */ +} xfs_buf_log_format_t; + +/* + * This flag indicates that the buffer contains on disk inodes + * and requires special recovery handling. + */ +#define XFS_BLI_INODE_BUF 0x1 +/* + * This flag indicates that the buffer should not be replayed + * during recovery because its blocks are being freed. + */ +#define XFS_BLI_CANCEL 0x2 +/* + * This flag indicates that the buffer contains on disk + * user or projectdquots and may require special recovery handling. + */ +#define XFS_BLI_UDQUOT_BUF 0x4 +#define XFS_BLI_PDQUOT_BUF 0x8 + +#define XFS_BLI_CHUNK 128 +#define XFS_BLI_SHIFT 7 +#define BIT_TO_WORD_SHIFT 5 +#define NBWORD (NBBY * sizeof(unsigned int)) + +/* + * buf log item flags + */ +#define XFS_BLI_HOLD 0x01 +#define XFS_BLI_DIRTY 0x02 +#define XFS_BLI_STALE 0x04 +#define XFS_BLI_LOGGED 0x08 +#define XFS_BLI_INODE_ALLOC_BUF 0x10 + + +#ifdef __KERNEL__ + +struct xfs_buf; +struct ktrace; +struct xfs_mount; + +/* + * This is the in core log item structure used to track information + * needed to log buffers. It tracks how many times the lock has been + * locked, and which 128 byte chunks of the buffer are dirty. + */ +typedef struct xfs_buf_log_item { + xfs_log_item_t bli_item; /* common item structure */ + struct xfs_buf *bli_buf; /* real buffer pointer */ + unsigned int bli_flags; /* misc flags */ + unsigned int bli_recur; /* lock recursion count */ + atomic_t bli_refcount; /* cnt of tp refs */ +#ifdef DEBUG + struct ktrace *bli_trace; /* event trace buf */ +#endif +#ifdef XFS_TRANS_DEBUG + char *bli_orig; /* original buffer copy */ + char *bli_logged; /* bytes logged (bitmap) */ +#endif + xfs_buf_log_format_t bli_format; /* in-log header */ +} xfs_buf_log_item_t; + +/* + * This structure is used during recovery to record the buf log + * items which have been canceled and should not be replayed. + */ +typedef struct xfs_buf_cancel { + xfs_daddr_t bc_blkno; + uint bc_len; + int bc_refcount; + struct xfs_buf_cancel *bc_next; +} xfs_buf_cancel_t; + +#define XFS_BLI_TRACE_SIZE 32 + + +#if defined(XFS_ALL_TRACE) +#define XFS_BLI_TRACE +#endif + +#if !defined(DEBUG) +#undef XFS_BLI_TRACE +#endif + +#if defined(XFS_BLI_TRACE) +void xfs_buf_item_trace(char *, xfs_buf_log_item_t *); +#else +#define xfs_buf_item_trace(id, bip) +#endif + +void xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); +void xfs_buf_item_relse(struct xfs_buf *); +void xfs_buf_item_log(xfs_buf_log_item_t *, uint, uint); +uint xfs_buf_item_dirty(xfs_buf_log_item_t *); +int xfs_buf_item_bits(uint *, uint, uint); +int xfs_buf_item_contig_bits(uint *, uint, uint); +int xfs_buf_item_next_bit(uint *, uint, uint); +void xfs_buf_attach_iodone(struct xfs_buf *, + void(*)(struct xfs_buf *, xfs_log_item_t *), + xfs_log_item_t *); +void xfs_buf_iodone_callbacks(struct xfs_buf *); +void xfs_buf_iodone(struct xfs_buf *, xfs_buf_log_item_t *); + +#ifdef XFS_TRANS_DEBUG +void +xfs_buf_item_flush_log_debug( + struct xfs_buf *bp, + uint first, + uint last); +#else +#define xfs_buf_item_flush_log_debug(bp, first, last) +#endif + +#endif /* __KERNEL__ */ + +#endif /* __XFS_BUF_ITEM_H__ */ diff --git a/include/xfs_cred.h b/include/xfs_cred.h new file mode 100644 index 000000000..523dcddf6 --- /dev/null +++ b/include/xfs_cred.h @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#ifndef __XFS_CRED_H__ +#define __XFS_CRED_H__ + +#include /* For NGROUPS */ +#ifdef __KERNEL__ +#include +#include +#endif + +/* + * Capabilities + */ +typedef __uint64_t cap_value_t; + +typedef struct cap_set { + cap_value_t cap_effective; /* use in capability checks */ + cap_value_t cap_permitted; /* combined with file attrs */ + cap_value_t cap_inheritable;/* pass through exec */ +} cap_set_t; + + +/* + * Mandatory Access Control + * + * Layout of a composite MAC label: + * ml_list contains the list of categories (MSEN) followed by the list of + * divisions (MINT). This is actually a header for the data structure which + * will have an ml_list with more than one element. + * + * ------------------------------- + * | ml_msen_type | ml_mint_type | + * ------------------------------- + * | ml_level | ml_grade | + * ------------------------------- + * | ml_catcount | + * ------------------------------- + * | ml_divcount | + * ------------------------------- + * | category 1 | + * | . . . | + * | category N | (where N = ml_catcount) + * ------------------------------- + * | division 1 | + * | . . . | + * | division M | (where M = ml_divcount) + * ------------------------------- + */ +#define MAC_MAX_SETS 250 +typedef struct mac_label { + unsigned char ml_msen_type; /* MSEN label type */ + unsigned char ml_mint_type; /* MINT label type */ + unsigned char ml_level; /* Hierarchical level */ + unsigned char ml_grade; /* Hierarchical grade */ + unsigned short ml_catcount; /* Category count */ + unsigned short ml_divcount; /* Division count */ + /* Category set, then Division set */ + unsigned short ml_list[MAC_MAX_SETS]; +} mac_label; + +/* Data types required by POSIX P1003.1eD15 */ +typedef struct mac_label * mac_t; + +#ifdef __KERNEL__ +extern int mac_enabled; +extern mac_label *mac_high_low_lp; +static __inline void mac_never(void) {} +struct xfs_inode; +extern int mac_xfs_iaccess(struct xfs_inode *, mode_t); +#define _MAC_XFS_IACCESS(i,m) \ + (mac_enabled? (mac_never(), mac_xfs_iaccess(i,m)): 0) +#endif /* __KERNEL__ */ + +#define MACWRITE 00200 +#define SGI_MAC_FILE "/dev/null" +#define SGI_MAC_FILE_SIZE 10 +#define SGI_CAP_FILE "/dev/null" +#define SGI_CAP_FILE_SIZE 10 + +/* MSEN label type names. Choose an upper case ASCII character. */ +#define MSEN_ADMIN_LABEL 'A' /* Admin: lowm_da_node_ents) +#endif + +#define XFS_DA_MAXHASH ((xfs_dahash_t)-1) /* largest valid hash value */ + +/* + * Macros used by directory code to interface to the filesystem. + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LBSIZE) +int xfs_lbsize(struct xfs_mount *mp); +#define XFS_LBSIZE(mp) xfs_lbsize(mp) +#else +#define XFS_LBSIZE(mp) ((mp)->m_sb.sb_blocksize) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LBLOG) +int xfs_lblog(struct xfs_mount *mp); +#define XFS_LBLOG(mp) xfs_lblog(mp) +#else +#define XFS_LBLOG(mp) ((mp)->m_sb.sb_blocklog) +#endif + +/* + * Macros used by directory code to interface to the kernel + */ + +/* + * Macros used to manipulate directory off_t's + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DA_MAKE_BNOENTRY) +__uint32_t xfs_da_make_bnoentry(struct xfs_mount *mp, xfs_dablk_t bno, + int entry); +#define XFS_DA_MAKE_BNOENTRY(mp,bno,entry) \ + xfs_da_make_bnoentry(mp,bno,entry) +#else +#define XFS_DA_MAKE_BNOENTRY(mp,bno,entry) \ + (((bno) << (mp)->m_dircook_elog) | (entry)) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DA_MAKE_COOKIE) +xfs_off_t xfs_da_make_cookie(struct xfs_mount *mp, xfs_dablk_t bno, int entry, + xfs_dahash_t hash); +#define XFS_DA_MAKE_COOKIE(mp,bno,entry,hash) \ + xfs_da_make_cookie(mp,bno,entry,hash) +#else +#define XFS_DA_MAKE_COOKIE(mp,bno,entry,hash) \ + (((xfs_off_t)XFS_DA_MAKE_BNOENTRY(mp, bno, entry) << 32) | (hash)) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DA_COOKIE_HASH) +xfs_dahash_t xfs_da_cookie_hash(struct xfs_mount *mp, xfs_off_t cookie); +#define XFS_DA_COOKIE_HASH(mp,cookie) xfs_da_cookie_hash(mp,cookie) +#else +#define XFS_DA_COOKIE_HASH(mp,cookie) ((xfs_dahash_t)(cookie)) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DA_COOKIE_BNO) +xfs_dablk_t xfs_da_cookie_bno(struct xfs_mount *mp, xfs_off_t cookie); +#define XFS_DA_COOKIE_BNO(mp,cookie) xfs_da_cookie_bno(mp,cookie) +#else +#define XFS_DA_COOKIE_BNO(mp,cookie) \ + (((xfs_off_t)(cookie) >> 31) == -1LL ? \ + (xfs_dablk_t)0 : \ + (xfs_dablk_t)((xfs_off_t)(cookie) >> ((mp)->m_dircook_elog + 32))) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DA_COOKIE_ENTRY) +int xfs_da_cookie_entry(struct xfs_mount *mp, xfs_off_t cookie); +#define XFS_DA_COOKIE_ENTRY(mp,cookie) xfs_da_cookie_entry(mp,cookie) +#else +#define XFS_DA_COOKIE_ENTRY(mp,cookie) \ + (((xfs_off_t)(cookie) >> 31) == -1LL ? \ + (xfs_dablk_t)0 : \ + (xfs_dablk_t)(((xfs_off_t)(cookie) >> 32) & \ + ((1 << (mp)->m_dircook_elog) - 1))) +#endif + + +/*======================================================================== + * Btree searching and modification structure definitions. + *========================================================================*/ + +/* + * Structure to ease passing around component names. + */ +typedef struct xfs_da_args { + char *name; /* string (maybe not NULL terminated) */ + int namelen; /* length of string (maybe no NULL) */ + char *value; /* set of bytes (maybe contain NULLs) */ + int valuelen; /* length of value */ + int flags; /* argument flags (eg: ATTR_NOCREATE) */ + xfs_dahash_t hashval; /* hash value of name */ + xfs_ino_t inumber; /* input/output inode number */ + struct xfs_inode *dp; /* directory inode to manipulate */ + xfs_fsblock_t *firstblock; /* ptr to firstblock for bmap calls */ + struct xfs_bmap_free *flist; /* ptr to freelist for bmap_finish */ + struct xfs_trans *trans; /* current trans (changes over time) */ + xfs_extlen_t total; /* total blocks needed, for 1st bmap */ + int whichfork; /* data or attribute fork */ + xfs_dablk_t blkno; /* blkno of attr leaf of interest */ + int index; /* index of attr of interest in blk */ + xfs_dablk_t rmtblkno; /* remote attr value starting blkno */ + int rmtblkcnt; /* remote attr value block count */ + int rename; /* T/F: this is an atomic rename op */ + xfs_dablk_t blkno2; /* blkno of 2nd attr leaf of interest */ + int index2; /* index of 2nd attr in blk */ + xfs_dablk_t rmtblkno2; /* remote attr value starting blkno */ + int rmtblkcnt2; /* remote attr value block count */ + int justcheck; /* check for ok with no space */ + int addname; /* T/F: this is an add operation */ + int oknoent; /* T/F: ok to return ENOENT, else die */ +} xfs_da_args_t; + +/* + * Structure to describe buffer(s) for a block. + * This is needed in the directory version 2 format case, when + * multiple non-contiguous fsblocks might be needed to cover one + * logical directory block. + * If the buffer count is 1 then the data pointer points to the + * same place as the b_addr field for the buffer, else to kmem_alloced memory. + */ +typedef struct xfs_dabuf { + int nbuf; /* number of buffer pointers present */ + short dirty; /* data needs to be copied back */ + short bbcount; /* how large is data in bbs */ + void *data; /* pointer for buffers' data */ +#ifdef XFS_DABUF_DEBUG + inst_t *ra; /* return address of caller to make */ + struct xfs_dabuf *next; /* next in global chain */ + struct xfs_dabuf *prev; /* previous in global chain */ + dev_t dev; /* device for buffer */ + xfs_daddr_t blkno; /* daddr first in bps[0] */ +#endif + struct xfs_buf *bps[1]; /* actually nbuf of these */ +} xfs_dabuf_t; +#define XFS_DA_BUF_SIZE(n) \ + (sizeof(xfs_dabuf_t) + sizeof(struct xfs_buf *) * ((n) - 1)) + +#ifdef XFS_DABUF_DEBUG +extern xfs_dabuf_t *xfs_dabuf_global_list; +#endif + +/* + * Storage for holding state during Btree searches and split/join ops. + * + * Only need space for 5 intermediate nodes. With a minimum of 62-way + * fanout to the Btree, we can support over 900 million directory blocks, + * which is slightly more than enough. + */ +typedef struct xfs_da_state_blk { + xfs_dabuf_t *bp; /* buffer containing block */ + xfs_dablk_t blkno; /* filesystem blkno of buffer */ + xfs_daddr_t disk_blkno; /* on-disk blkno (in BBs) of buffer */ + int index; /* relevant index into block */ + xfs_dahash_t hashval; /* last hash value in block */ + int magic; /* blk's magic number, ie: blk type */ +} xfs_da_state_blk_t; + +typedef struct xfs_da_state_path { + int active; /* number of active levels */ + xfs_da_state_blk_t blk[XFS_DA_NODE_MAXDEPTH]; +} xfs_da_state_path_t; + +typedef struct xfs_da_state { + xfs_da_args_t *args; /* filename arguments */ + struct xfs_mount *mp; /* filesystem mount point */ + int blocksize; /* logical block size */ + int inleaf; /* insert into 1->lf, 0->splf */ + xfs_da_state_path_t path; /* search/split paths */ + xfs_da_state_path_t altpath; /* alternate path for join */ + int extravalid; /* T/F: extrablk is in use */ + int extraafter; /* T/F: extrablk is after new */ + xfs_da_state_blk_t extrablk; /* for double-splits on leafs */ + /* for dirv2 extrablk is data */ +} xfs_da_state_t; + +/* + * Utility macros to aid in logging changed structure fields. + */ +#define XFS_DA_LOGOFF(BASE, ADDR) ((char *)(ADDR) - (char *)(BASE)) +#define XFS_DA_LOGRANGE(BASE, ADDR, SIZE) \ + (uint)(XFS_DA_LOGOFF(BASE, ADDR)), \ + (uint)(XFS_DA_LOGOFF(BASE, ADDR)+(SIZE)-1) + +/*======================================================================== + * Function prototypes for the kernel. + *========================================================================*/ + +/* + * Routines used for growing the Btree. + */ +int xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level, + xfs_dabuf_t **bpp, int whichfork); +int xfs_da_split(xfs_da_state_t *state); + +/* + * Routines used for shrinking the Btree. + */ +int xfs_da_join(xfs_da_state_t *state); +void xfs_da_fixhashpath(xfs_da_state_t *state, + xfs_da_state_path_t *path_to_to_fix); + +/* + * Routines used for finding things in the Btree. + */ +int xfs_da_node_lookup_int(xfs_da_state_t *state, int *result); +int xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path, + int forward, int release, int *result); +/* + * Utility routines. + */ +int xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, + xfs_da_state_blk_t *save_blk); +int xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk, + xfs_da_state_blk_t *new_blk); + +/* + * Utility routines. + */ +int xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno); +int xfs_da_get_buf(struct xfs_trans *trans, struct xfs_inode *dp, + xfs_dablk_t bno, xfs_daddr_t mappedbno, + xfs_dabuf_t **bp, int whichfork); +int xfs_da_read_buf(struct xfs_trans *trans, struct xfs_inode *dp, + xfs_dablk_t bno, xfs_daddr_t mappedbno, + xfs_dabuf_t **bpp, int whichfork); +xfs_daddr_t xfs_da_reada_buf(struct xfs_trans *trans, struct xfs_inode *dp, + xfs_dablk_t bno, int whichfork); +int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, + xfs_dabuf_t *dead_buf); + +uint xfs_da_hashname(char *name_string, int name_length); +uint xfs_da_log2_roundup(uint i); +xfs_da_state_t *xfs_da_state_alloc(void); +void xfs_da_state_free(xfs_da_state_t *state); +void xfs_da_state_kill_altpath(xfs_da_state_t *state); + +void xfs_da_buf_done(xfs_dabuf_t *dabuf); +void xfs_da_log_buf(struct xfs_trans *tp, xfs_dabuf_t *dabuf, uint first, + uint last); +void xfs_da_brelse(struct xfs_trans *tp, xfs_dabuf_t *dabuf); +void xfs_da_binval(struct xfs_trans *tp, xfs_dabuf_t *dabuf); +xfs_daddr_t xfs_da_blkno(xfs_dabuf_t *dabuf); + +extern struct xfs_zone *xfs_da_state_zone; + +#endif /* __XFS_DA_BTREE_H__ */ diff --git a/include/xfs_dfrag.h b/include/xfs_dfrag.h new file mode 100644 index 000000000..a6f1b0937 --- /dev/null +++ b/include/xfs_dfrag.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_DFRAG_H__ +#define __XFS_DFRAG_H__ + +/* + * Structure passed to xfs_swapext + */ + +typedef struct xfs_swapext +{ + __int64_t sx_version; /* version */ + __int64_t sx_fdtarget; /* fd of target file */ + __int64_t sx_fdtmp; /* fd of tmp file */ + xfs_off_t sx_offset; /* offset into file */ + xfs_off_t sx_length; /* leng from offset */ + char sx_pad[16]; /* pad space, unused */ + xfs_bstat_t sx_stat; /* stat of target b4 copy */ +} xfs_swapext_t; + +/* + * Version flag + */ +#define XFS_SX_VERSION 0 + +#ifdef __KERNEL__ +/* + * Prototypes for visible xfs_dfrag.c routines. + */ + +/* + * Syscall interface for xfs_swapext + */ +int xfs_swapext(struct xfs_swapext *sx); + +#endif /* __KERNEL__ */ + +#endif /* __XFS_DFRAG_H__ */ diff --git a/include/xfs_dinode.h b/include/xfs_dinode.h new file mode 100644 index 000000000..7bda53ebe --- /dev/null +++ b/include/xfs_dinode.h @@ -0,0 +1,476 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_DINODE_H__ +#define __XFS_DINODE_H__ + +struct xfs_buf; +struct xfs_mount; + +#define XFS_DINODE_VERSION_1 1 +#define XFS_DINODE_VERSION_2 2 +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DINODE_GOOD_VERSION) +int xfs_dinode_good_version(int v); +#define XFS_DINODE_GOOD_VERSION(v) xfs_dinode_good_version(v) +#else +#define XFS_DINODE_GOOD_VERSION(v) (((v) == XFS_DINODE_VERSION_1) || \ + ((v) == XFS_DINODE_VERSION_2)) +#endif +#define XFS_DINODE_MAGIC 0x494e /* 'IN' */ + +/* + * Disk inode structure. + * This is just the header; the inode is expanded to fill a variable size + * with the last field expanding. It is split into the core and "other" + * because we only need the core part in the in-core inode. + */ +typedef struct xfs_timestamp { + __int32_t t_sec; /* timestamp seconds */ + __int32_t t_nsec; /* timestamp nanoseconds */ +} xfs_timestamp_t; + +/* + * Note: Coordinate changes to this structure with the XFS_DI_* #defines + * below and the offsets table in xfs_ialloc_log_di(). + */ +typedef struct xfs_dinode_core +{ + __uint16_t di_magic; /* inode magic # = XFS_DINODE_MAGIC */ + __uint16_t di_mode; /* mode and type of file */ + __int8_t di_version; /* inode version */ + __int8_t di_format; /* format of di_c data */ + __uint16_t di_onlink; /* old number of links to file */ + __uint32_t di_uid; /* owner's user id */ + __uint32_t di_gid; /* owner's group id */ + __uint32_t di_nlink; /* number of links to file */ + __uint16_t di_projid; /* owner's project id */ + __uint8_t di_pad[10]; /* unused, zeroed space */ + xfs_timestamp_t di_atime; /* time last accessed */ + xfs_timestamp_t di_mtime; /* time last modified */ + xfs_timestamp_t di_ctime; /* time created/inode modified */ + xfs_fsize_t di_size; /* number of bytes in file */ + xfs_drfsbno_t di_nblocks; /* # of direct & btree blocks used */ + xfs_extlen_t di_extsize; /* basic/minimum extent size for file */ + xfs_extnum_t di_nextents; /* number of extents in data fork */ + xfs_aextnum_t di_anextents; /* number of extents in attribute fork*/ + __uint8_t di_forkoff; /* attr fork offs, <<3 for 64b align */ + __int8_t di_aformat; /* format of attr fork's data */ + __uint32_t di_dmevmask; /* DMIG event mask */ + __uint16_t di_dmstate; /* DMIG state info */ + __uint16_t di_flags; /* random flags, XFS_DIFLAG_... */ + __uint32_t di_gen; /* generation number */ +} xfs_dinode_core_t; + +typedef struct xfs_dinode +{ + xfs_dinode_core_t di_core; + /* + * In adding anything between the core and the union, be + * sure to update the macros like XFS_LITINO below and + * XFS_BMAP_RBLOCK_DSIZE in xfs_bmap_btree.h. + */ + xfs_agino_t di_next_unlinked;/* agi unlinked list ptr */ + union { + xfs_bmdr_block_t di_bmbt; /* btree root block */ + xfs_bmbt_rec_32_t di_bmx[1]; /* extent list */ + xfs_dir_shortform_t di_dirsf; /* shortform directory */ + xfs_dir2_sf_t di_dir2sf; /* shortform directory v2 */ + char di_c[1]; /* local contents */ + xfs_dev_t di_dev; /* device for IFCHR/IFBLK */ + uuid_t di_muuid; /* mount point value */ + char di_symlink[1]; /* local symbolic link */ + } di_u; + union { + xfs_bmdr_block_t di_abmbt; /* btree root block */ + xfs_bmbt_rec_32_t di_abmx[1]; /* extent list */ + xfs_attr_shortform_t di_attrsf; /* shortform attribute list */ + } di_a; +} xfs_dinode_t; + +/* + * The 32 bit link count in the inode theoretically maxes out at UINT_MAX. + * Since the pathconf interface is signed, we use 2^31 - 1 instead. + * The old inode format had a 16 bit link count, so its maximum is USHRT_MAX. + */ +#define XFS_MAXLINK ((1U << 31) - 1U) +#define XFS_MAXLINK_1 65535U + +/* + * Bit names for logging disk inodes only + */ +#define XFS_DI_MAGIC 0x0000001 +#define XFS_DI_MODE 0x0000002 +#define XFS_DI_VERSION 0x0000004 +#define XFS_DI_FORMAT 0x0000008 +#define XFS_DI_ONLINK 0x0000010 +#define XFS_DI_UID 0x0000020 +#define XFS_DI_GID 0x0000040 +#define XFS_DI_NLINK 0x0000080 +#define XFS_DI_PROJID 0x0000100 +#define XFS_DI_PAD 0x0000200 +#define XFS_DI_ATIME 0x0000400 +#define XFS_DI_MTIME 0x0000800 +#define XFS_DI_CTIME 0x0001000 +#define XFS_DI_SIZE 0x0002000 +#define XFS_DI_NBLOCKS 0x0004000 +#define XFS_DI_EXTSIZE 0x0008000 +#define XFS_DI_NEXTENTS 0x0010000 +#define XFS_DI_NAEXTENTS 0x0020000 +#define XFS_DI_FORKOFF 0x0040000 +#define XFS_DI_AFORMAT 0x0080000 +#define XFS_DI_DMEVMASK 0x0100000 +#define XFS_DI_DMSTATE 0x0200000 +#define XFS_DI_FLAGS 0x0400000 +#define XFS_DI_GEN 0x0800000 +#define XFS_DI_NEXT_UNLINKED 0x1000000 +#define XFS_DI_U 0x2000000 +#define XFS_DI_A 0x4000000 +#define XFS_DI_NUM_BITS 27 +#define XFS_DI_ALL_BITS ((1 << XFS_DI_NUM_BITS) - 1) +#define XFS_DI_CORE_BITS (XFS_DI_ALL_BITS & ~(XFS_DI_U|XFS_DI_A)) + +/* + * Values for di_format + */ +typedef enum xfs_dinode_fmt +{ + XFS_DINODE_FMT_DEV, /* CHR, BLK: di_dev */ + XFS_DINODE_FMT_LOCAL, /* DIR, REG: di_c */ + /* LNK: di_symlink */ + XFS_DINODE_FMT_EXTENTS, /* DIR, REG, LNK: di_bmx */ + XFS_DINODE_FMT_BTREE, /* DIR, REG, LNK: di_bmbt */ + XFS_DINODE_FMT_UUID /* MNT: di_uuid */ +} xfs_dinode_fmt_t; + +/* + * Inode minimum and maximum sizes. + */ +#define XFS_DINODE_MIN_LOG 8 +#define XFS_DINODE_MAX_LOG 11 +#define XFS_DINODE_MIN_SIZE (1 << XFS_DINODE_MIN_LOG) +#define XFS_DINODE_MAX_SIZE (1 << XFS_DINODE_MAX_LOG) + +/* + * Inode size for given fs. + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LITINO) +int xfs_litino(struct xfs_mount *mp); +#define XFS_LITINO(mp) xfs_litino(mp) +#else +#define XFS_LITINO(mp) ((mp)->m_litino) +#endif +#define XFS_BROOT_SIZE_ADJ \ + (sizeof(xfs_bmbt_block_t) - sizeof(xfs_bmdr_block_t)) + +/* + * Fork identifiers. Here so utilities can use them without including + * xfs_inode.h. + */ +#define XFS_DATA_FORK 0 +#define XFS_ATTR_FORK 1 + +/* + * Inode data & attribute fork sizes, per inode. + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_Q) +int xfs_cfork_q_arch(xfs_dinode_core_t *dcp, xfs_arch_t arch); +int xfs_cfork_q(xfs_dinode_core_t *dcp); +#define XFS_CFORK_Q_ARCH(dcp,arch) xfs_cfork_q_arch(dcp,arch) +#define XFS_CFORK_Q(dcp) xfs_cfork_q(dcp) +#else +#define XFS_CFORK_Q_ARCH(dcp,arch) (INT_GET((dcp)->di_forkoff, arch) != 0) +#define XFS_CFORK_Q(dcp) XFS_CFORK_Q_ARCH(dcp,ARCH_NOCONVERT) + +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_BOFF) +int xfs_cfork_boff_arch(xfs_dinode_core_t *dcp, xfs_arch_t arch); +int xfs_cfork_boff(xfs_dinode_core_t *dcp); +#define XFS_CFORK_BOFF_ARCH(dcp,arch) xfs_cfork_boff_arch(dcp,arch) +#define XFS_CFORK_BOFF(dcp) xfs_cfork_boff(dcp) +#else +#define XFS_CFORK_BOFF_ARCH(dcp,arch) ((int)(INT_GET((dcp)->di_forkoff, arch) << 3)) +#define XFS_CFORK_BOFF(dcp) XFS_CFORK_BOFF_ARCH(dcp,ARCH_NOCONVERT) + +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_DSIZE) +int xfs_cfork_dsize_arch(xfs_dinode_core_t *dcp, struct xfs_mount *mp, xfs_arch_t arch); +int xfs_cfork_dsize(xfs_dinode_core_t *dcp, struct xfs_mount *mp); +#define XFS_CFORK_DSIZE_ARCH(dcp,mp,arch) xfs_cfork_dsize_arch(dcp,mp,arch) +#define XFS_CFORK_DSIZE(dcp,mp) xfs_cfork_dsize(dcp,mp) +#else +#define XFS_CFORK_DSIZE_ARCH(dcp,mp,arch) \ + (XFS_CFORK_Q_ARCH(dcp, arch) ? XFS_CFORK_BOFF_ARCH(dcp, arch) : XFS_LITINO(mp)) +#define XFS_CFORK_DSIZE(dcp,mp) XFS_CFORK_DSIZE_ARCH(dcp,mp,ARCH_NOCONVERT) + +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_ASIZE) +int xfs_cfork_asize_arch(xfs_dinode_core_t *dcp, struct xfs_mount *mp, xfs_arch_t arch); +int xfs_cfork_asize(xfs_dinode_core_t *dcp, struct xfs_mount *mp); +#define XFS_CFORK_ASIZE_ARCH(dcp,mp,arch) xfs_cfork_asize_arch(dcp,mp,arch) +#define XFS_CFORK_ASIZE(dcp,mp) xfs_cfork_asize(dcp,mp) +#else +#define XFS_CFORK_ASIZE_ARCH(dcp,mp,arch) \ + (XFS_CFORK_Q_ARCH(dcp, arch) ? XFS_LITINO(mp) - XFS_CFORK_BOFF_ARCH(dcp, arch) : 0) +#define XFS_CFORK_ASIZE(dcp,mp) XFS_CFORK_ASIZE_ARCH(dcp,mp,ARCH_NOCONVERT) + +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_SIZE) +int xfs_cfork_size_arch(xfs_dinode_core_t *dcp, struct xfs_mount *mp, int w, xfs_arch_t arch); +int xfs_cfork_size(xfs_dinode_core_t *dcp, struct xfs_mount *mp, int w); +#define XFS_CFORK_SIZE_ARCH(dcp,mp,w,arch) xfs_cfork_size_arch(dcp,mp,w,arch) +#define XFS_CFORK_SIZE(dcp,mp,w) xfs_cfork_size(dcp,mp,w) +#else +#define XFS_CFORK_SIZE_ARCH(dcp,mp,w,arch) \ + ((w) == XFS_DATA_FORK ? \ + XFS_CFORK_DSIZE_ARCH(dcp, mp, arch) : XFS_CFORK_ASIZE_ARCH(dcp, mp, arch)) +#define XFS_CFORK_SIZE(dcp,mp,w) XFS_CFORK_SIZE_ARCH(dcp,mp,w,ARCH_NOCONVERT) + +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_DSIZE) +int xfs_dfork_dsize_arch(xfs_dinode_t *dip, struct xfs_mount *mp, xfs_arch_t arch); +int xfs_dfork_dsize(xfs_dinode_t *dip, struct xfs_mount *mp); +#define XFS_DFORK_DSIZE_ARCH(dip,mp,arch) xfs_dfork_dsize_arch(dip,mp,arch) +#define XFS_DFORK_DSIZE(dip,mp) xfs_dfork_dsize(dip,mp) +#else +#define XFS_DFORK_DSIZE_ARCH(dip,mp,arch) XFS_CFORK_DSIZE_ARCH(&(dip)->di_core, mp, arch) +#define XFS_DFORK_DSIZE(dip,mp) XFS_DFORK_DSIZE_ARCH(dip,mp,ARCH_NOCONVERT) + +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_ASIZE) +int xfs_dfork_asize_arch(xfs_dinode_t *dip, struct xfs_mount *mp, xfs_arch_t arch); +int xfs_dfork_asize(xfs_dinode_t *dip, struct xfs_mount *mp); +#define XFS_DFORK_ASIZE_ARCH(dip,mp,arch) xfs_dfork_asize_arch(dip,mp,arch) +#define XFS_DFORK_ASIZE(dip,mp) xfs_dfork_asize(dip,mp) +#else +#define XFS_DFORK_ASIZE_ARCH(dip,mp,arch) XFS_CFORK_ASIZE_ARCH(&(dip)->di_core, mp, arch) +#define XFS_DFORK_ASIZE(dip,mp) XFS_DFORK_ASIZE_ARCH(dip,mp,ARCH_NOCONVERT) + +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_SIZE) +int xfs_dfork_size_arch(xfs_dinode_t *dip, struct xfs_mount *mp, int w, xfs_arch_t arch); +int xfs_dfork_size(xfs_dinode_t *dip, struct xfs_mount *mp, int w); +#define XFS_DFORK_SIZE_ARCH(dip,mp,w,arch) xfs_dfork_size_arch(dip,mp,w,arch) +#define XFS_DFORK_SIZE(dip,mp,w) xfs_dfork_size(dip,mp,w) +#else +#define XFS_DFORK_SIZE_ARCH(dip,mp,w,arch) XFS_CFORK_SIZE_ARCH(&(dip)->di_core, mp, w, arch) +#define XFS_DFORK_SIZE(dip,mp,w) XFS_DFORK_SIZE_ARCH(dip,mp,w,ARCH_NOCONVERT) + +#endif + +/* + * Macros for accessing per-fork disk inode information. + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_Q) +int xfs_dfork_q_arch(xfs_dinode_t *dip, xfs_arch_t arch); +int xfs_dfork_q(xfs_dinode_t *dip); +#define XFS_DFORK_Q_ARCH(dip,arch) xfs_dfork_q_arch(dip,arch) +#define XFS_DFORK_Q(dip) xfs_dfork_q(dip) +#else +#define XFS_DFORK_Q_ARCH(dip,arch) XFS_CFORK_Q_ARCH(&(dip)->di_core, arch) +#define XFS_DFORK_Q(dip) XFS_DFORK_Q_ARCH(dip,ARCH_NOCONVERT) + +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_BOFF) +int xfs_dfork_boff_arch(xfs_dinode_t *dip, xfs_arch_t arch); +int xfs_dfork_boff(xfs_dinode_t *dip); +#define XFS_DFORK_BOFF_ARCH(dip,arch) xfs_dfork_boff_arch(dip,arch) +#define XFS_DFORK_BOFF(dip) xfs_dfork_boff(dip) +#else +#define XFS_DFORK_BOFF_ARCH(dip,arch) XFS_CFORK_BOFF_ARCH(&(dip)->di_core, arch) +#define XFS_DFORK_BOFF(dip) XFS_DFORK_BOFF_ARCH(dip,ARCH_NOCONVERT) + +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_DPTR) +char *xfs_dfork_dptr_arch(xfs_dinode_t *dip, xfs_arch_t arch); +char *xfs_dfork_dptr(xfs_dinode_t *dip); +#define XFS_DFORK_DPTR_ARCH(dip,arch) xfs_dfork_dptr_arch(dip,arch) +#define XFS_DFORK_DPTR(dip) xfs_dfork_dptr(dip) +#else +#define XFS_DFORK_DPTR_ARCH(dip,arch) ((dip)->di_u.di_c) +#define XFS_DFORK_DPTR(dip) XFS_DFORK_DPTR_ARCH(dip,ARCH_NOCONVERT) + +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_APTR) +char *xfs_dfork_aptr_arch(xfs_dinode_t *dip, xfs_arch_t arch); +char *xfs_dfork_aptr(xfs_dinode_t *dip); +#define XFS_DFORK_APTR_ARCH(dip,arch) xfs_dfork_aptr_arch(dip,arch) +#define XFS_DFORK_APTR(dip) xfs_dfork_aptr(dip) +#else +#define XFS_DFORK_APTR_ARCH(dip,arch) ((dip)->di_u.di_c + XFS_DFORK_BOFF_ARCH(dip, arch)) +#define XFS_DFORK_APTR(dip) XFS_DFORK_APTR_ARCH(dip,ARCH_NOCONVERT) + +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_PTR) +char *xfs_dfork_ptr_arch(xfs_dinode_t *dip, int w, xfs_arch_t arch); +char *xfs_dfork_ptr(xfs_dinode_t *dip, int w); +#define XFS_DFORK_PTR_ARCH(dip,w,arch) xfs_dfork_ptr_arch(dip,w,arch) +#define XFS_DFORK_PTR(dip,w) xfs_dfork_ptr(dip,w) +#else +#define XFS_DFORK_PTR_ARCH(dip,w,arch) \ + ((w) == XFS_DATA_FORK ? XFS_DFORK_DPTR_ARCH(dip, arch) : XFS_DFORK_APTR_ARCH(dip, arch)) +#define XFS_DFORK_PTR(dip,w) XFS_DFORK_PTR_ARCH(dip,w,ARCH_NOCONVERT) + +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_FORMAT) +int xfs_cfork_format_arch(xfs_dinode_core_t *dcp, int w, xfs_arch_t arch); +int xfs_cfork_format(xfs_dinode_core_t *dcp, int w); +#define XFS_CFORK_FORMAT_ARCH(dcp,w,arch) xfs_cfork_format_arch(dcp,w,arch) +#define XFS_CFORK_FORMAT(dcp,w) xfs_cfork_format(dcp,w) +#else +#define XFS_CFORK_FORMAT_ARCH(dcp,w,arch) \ + ((w) == XFS_DATA_FORK ? INT_GET((dcp)->di_format, arch) : INT_GET((dcp)->di_aformat, arch)) +#define XFS_CFORK_FORMAT(dcp,w) XFS_CFORK_FORMAT_ARCH(dcp,w,ARCH_NOCONVERT) + +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_FMT_SET) +void xfs_cfork_fmt_set_arch(xfs_dinode_core_t *dcp, int w, int n, xfs_arch_t arch); +void xfs_cfork_fmt_set(xfs_dinode_core_t *dcp, int w, int n); +#define XFS_CFORK_FMT_SET_ARCH(dcp,w,n,arch) xfs_cfork_fmt_set_arch(dcp,w,n,arch) +#define XFS_CFORK_FMT_SET(dcp,w,n) xfs_cfork_fmt_set(dcp,w,n) +#else +#define XFS_CFORK_FMT_SET_ARCH(dcp,w,n,arch) \ + ((w) == XFS_DATA_FORK ? \ + (INT_SET((dcp)->di_format, arch, (n))) : \ + (INT_SET((dcp)->di_aformat, arch, (n)))) +#define XFS_CFORK_FMT_SET(dcp,w,n) XFS_CFORK_FMT_SET_ARCH(dcp,w,n,ARCH_NOCONVERT) + +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_NEXTENTS) +int xfs_cfork_nextents_arch(xfs_dinode_core_t *dcp, int w, xfs_arch_t arch); +int xfs_cfork_nextents(xfs_dinode_core_t *dcp, int w); +#define XFS_CFORK_NEXTENTS_ARCH(dcp,w,arch) xfs_cfork_nextents_arch(dcp,w,arch) +#define XFS_CFORK_NEXTENTS(dcp,w) xfs_cfork_nextents(dcp,w) +#else +#define XFS_CFORK_NEXTENTS_ARCH(dcp,w,arch) \ + ((w) == XFS_DATA_FORK ? INT_GET((dcp)->di_nextents, arch) : INT_GET((dcp)->di_anextents, arch)) +#define XFS_CFORK_NEXTENTS(dcp,w) XFS_CFORK_NEXTENTS_ARCH(dcp,w,ARCH_NOCONVERT) + +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_CFORK_NEXT_SET) +void xfs_cfork_next_set_arch(xfs_dinode_core_t *dcp, int w, int n, xfs_arch_t arch); +void xfs_cfork_next_set(xfs_dinode_core_t *dcp, int w, int n); +#define XFS_CFORK_NEXT_SET_ARCH(dcp,w,n,arch) xfs_cfork_next_set_arch(dcp,w,n,arch) +#define XFS_CFORK_NEXT_SET(dcp,w,n) xfs_cfork_next_set(dcp,w,n) +#else +#define XFS_CFORK_NEXT_SET_ARCH(dcp,w,n,arch) \ + ((w) == XFS_DATA_FORK ? \ + (INT_SET((dcp)->di_nextents, arch, (n))) : \ + (INT_SET((dcp)->di_anextents, arch, (n)))) +#define XFS_CFORK_NEXT_SET(dcp,w,n) XFS_CFORK_NEXT_SET_ARCH(dcp,w,n,ARCH_NOCONVERT) + +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_FORMAT) +int xfs_dfork_format_arch(xfs_dinode_t *dip, int w, xfs_arch_t arch); +int xfs_dfork_format(xfs_dinode_t *dip, int w); +#define XFS_DFORK_FORMAT_ARCH(dip,w,arch) xfs_dfork_format_arch(dip,w,arch) +#define XFS_DFORK_FORMAT(dip,w) xfs_dfork_format(dip,w) +#else +#define XFS_DFORK_FORMAT_ARCH(dip,w,arch) XFS_CFORK_FORMAT_ARCH(&(dip)->di_core, w, arch) +#define XFS_DFORK_FORMAT(dip,w) XFS_DFORK_FORMAT_ARCH(dip,w,ARCH_NOCONVERT) + +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_FMT_SET) +void xfs_dfork_fmt_set_arch(xfs_dinode_t *dip, int w, int n, xfs_arch_t arch); +void xfs_dfork_fmt_set(xfs_dinode_t *dip, int w, int n); +#define XFS_DFORK_FMT_SET_ARCH(dip,w,n,arch) xfs_dfork_fmt_set_arch(dip,w,n,arch) +#define XFS_DFORK_FMT_SET(dip,w,n) xfs_dfork_fmt_set(dip,w,n) +#else +#define XFS_DFORK_FMT_SET_ARCH(dip,w,n,arch) XFS_CFORK_FMT_SET_ARCH(&(dip)->di_core, w, n, arch) +#define XFS_DFORK_FMT_SET(dip,w,n) XFS_DFORK_FMT_SET_ARCH(dip,w,n,ARCH_NOCONVERT) + +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_NEXTENTS) +int xfs_dfork_nextents_arch(xfs_dinode_t *dip, int w, xfs_arch_t arch); +int xfs_dfork_nextents(xfs_dinode_t *dip, int w); +#define XFS_DFORK_NEXTENTS_ARCH(dip,w,arch) xfs_dfork_nextents_arch(dip,w,arch) +#define XFS_DFORK_NEXTENTS(dip,w) xfs_dfork_nextents(dip,w) +#else +#define XFS_DFORK_NEXTENTS_ARCH(dip,w,arch) XFS_CFORK_NEXTENTS_ARCH(&(dip)->di_core, w, arch) +#define XFS_DFORK_NEXTENTS(dip,w) XFS_DFORK_NEXTENTS_ARCH(dip,w,ARCH_NOCONVERT) + +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DFORK_NEXT_SET) +void xfs_dfork_next_set_arch(xfs_dinode_t *dip, int w, int n, xfs_arch_t arch); +void xfs_dfork_next_set(xfs_dinode_t *dip, int w, int n); +#define XFS_DFORK_NEXT_SET_ARCH(dip,w,n,arch) xfs_dfork_next_set_arch(dip,w,n,arch) +#define XFS_DFORK_NEXT_SET(dip,w,n) xfs_dfork_next_set(dip,w,n) +#else +#define XFS_DFORK_NEXT_SET_ARCH(dip,w,n,arch) XFS_CFORK_NEXT_SET_ARCH(&(dip)->di_core, w, n, arch) +#define XFS_DFORK_NEXT_SET(dip,w,n) XFS_DFORK_NEXT_SET_ARCH(dip,w,n,ARCH_NOCONVERT) + +#endif + +/* + * File types (mode field) + */ +#define IFMT 0170000 /* type of file */ +#define IFIFO 0010000 /* named pipe (fifo) */ +#define IFCHR 0020000 /* character special */ +#define IFDIR 0040000 /* directory */ +#define IFBLK 0060000 /* block special */ +#define IFREG 0100000 /* regular */ +#define IFLNK 0120000 /* symbolic link */ +#define IFSOCK 0140000 /* socket */ +#define IFMNT 0160000 /* mount point */ + +/* + * File execution and access modes. + */ +#define ISUID 04000 /* set user id on execution */ +#define ISGID 02000 /* set group id on execution */ +#define ISVTX 01000 /* sticky directory */ +#define IREAD 0400 /* read, write, execute permissions */ +#define IWRITE 0200 +#define IEXEC 0100 + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_DINODE) +xfs_dinode_t *xfs_buf_to_dinode(struct xfs_buf *bp); +#define XFS_BUF_TO_DINODE(bp) xfs_buf_to_dinode(bp) +#else +#define XFS_BUF_TO_DINODE(bp) ((xfs_dinode_t *)(XFS_BUF_PTR(bp))) +#endif + +/* + * Values for di_flags + * There should be a one-to-one correspondence between these flags and the + * XFS_XFLAG_s. + */ +#define XFS_DIFLAG_REALTIME_BIT 0 /* file's blocks come from rt area */ +#define XFS_DIFLAG_PREALLOC_BIT 1 /* file space has been preallocated */ +#define XFS_DIFLAG_NEWRTBM_BIT 2 /* for rtbitmap inode, new format */ +#define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT) +#define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT) +#define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT) +#define XFS_DIFLAG_ALL \ + (XFS_DIFLAG_REALTIME|XFS_DIFLAG_PREALLOC|XFS_DIFLAG_NEWRTBM) + +#endif /* __XFS_DINODE_H__ */ diff --git a/include/xfs_dir.h b/include/xfs_dir.h new file mode 100644 index 000000000..ead2621c4 --- /dev/null +++ b/include/xfs_dir.h @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_DIR_H__ +#define __XFS_DIR_H__ + +/* + * Large directories are structured around Btrees where all the data + * elements are in the leaf nodes. Filenames are hashed into an int, + * then that int is used as the index into the Btree. Since the hashval + * of a filename may not be unique, we may have duplicate keys. The + * internal links in the Btree are logical block offsets into the file. + * + * Small directories use a different format and are packed as tightly + * as possible so as to fit into the literal area of the inode. + */ + +#ifdef XFS_ALL_TRACE +#define XFS_DIR_TRACE +#endif + +#if !defined(DEBUG) +#undef XFS_DIR_TRACE +#endif + +/*======================================================================== + * Function prototypes for the kernel. + *========================================================================*/ + +struct uio; +struct xfs_bmap_free; +struct xfs_da_args; +struct xfs_dinode; +struct xfs_inode; +struct xfs_mount; +struct xfs_trans; + +/* + * Directory function types. + * Put in structures (xfs_dirops_t) for v1 and v2 directories. + */ +typedef void (*xfs_dir_mount_t)(struct xfs_mount *mp); +typedef int (*xfs_dir_isempty_t)(struct xfs_inode *dp); +typedef int (*xfs_dir_init_t)(struct xfs_trans *tp, + struct xfs_inode *dp, + struct xfs_inode *pdp); +typedef int (*xfs_dir_createname_t)(struct xfs_trans *tp, + struct xfs_inode *dp, + char *name, + int namelen, + xfs_ino_t inum, + xfs_fsblock_t *first, + struct xfs_bmap_free *flist, + xfs_extlen_t total); +typedef int (*xfs_dir_lookup_t)(struct xfs_trans *tp, + struct xfs_inode *dp, + char *name, + int namelen, + xfs_ino_t *inum); +typedef int (*xfs_dir_removename_t)(struct xfs_trans *tp, + struct xfs_inode *dp, + char *name, + int namelen, + xfs_ino_t ino, + xfs_fsblock_t *first, + struct xfs_bmap_free *flist, + xfs_extlen_t total); +typedef int (*xfs_dir_getdents_t)(struct xfs_trans *tp, + struct xfs_inode *dp, + struct uio *uio, + int *eofp); +typedef int (*xfs_dir_replace_t)(struct xfs_trans *tp, + struct xfs_inode *dp, + char *name, + int namelen, + xfs_ino_t inum, + xfs_fsblock_t *first, + struct xfs_bmap_free *flist, + xfs_extlen_t total); +typedef int (*xfs_dir_canenter_t)(struct xfs_trans *tp, + struct xfs_inode *dp, + char *name, + int namelen); +typedef int (*xfs_dir_shortform_validate_ondisk_t)(struct xfs_mount *mp, + struct xfs_dinode *dip); +typedef int (*xfs_dir_shortform_to_single_t)(struct xfs_da_args *args); + +typedef struct xfs_dirops { + xfs_dir_mount_t xd_mount; + xfs_dir_isempty_t xd_isempty; + xfs_dir_init_t xd_init; + xfs_dir_createname_t xd_createname; + xfs_dir_lookup_t xd_lookup; + xfs_dir_removename_t xd_removename; + xfs_dir_getdents_t xd_getdents; + xfs_dir_replace_t xd_replace; + xfs_dir_canenter_t xd_canenter; + xfs_dir_shortform_validate_ondisk_t xd_shortform_validate_ondisk; + xfs_dir_shortform_to_single_t xd_shortform_to_single; +} xfs_dirops_t; + +/* + * Overall external interface routines. + */ +void xfs_dir_startup(void); /* called exactly once */ + +#define XFS_DIR_MOUNT(mp) \ + ((mp)->m_dirops.xd_mount(mp)) +#define XFS_DIR_ISEMPTY(mp,dp) \ + ((mp)->m_dirops.xd_isempty(dp)) +#define XFS_DIR_INIT(mp,tp,dp,pdp) \ + ((mp)->m_dirops.xd_init(tp,dp,pdp)) +#define XFS_DIR_CREATENAME(mp,tp,dp,name,namelen,inum,first,flist,total) \ + ((mp)->m_dirops.xd_createname(tp,dp,name,namelen,inum,first,flist,\ + total)) +#define XFS_DIR_LOOKUP(mp,tp,dp,name,namelen,inum) \ + ((mp)->m_dirops.xd_lookup(tp,dp,name,namelen,inum)) +#define XFS_DIR_REMOVENAME(mp,tp,dp,name,namelen,ino,first,flist,total) \ + ((mp)->m_dirops.xd_removename(tp,dp,name,namelen,ino,first,flist,total)) +#define XFS_DIR_GETDENTS(mp,tp,dp,uio,eofp) \ + ((mp)->m_dirops.xd_getdents(tp,dp,uio,eofp)) +#define XFS_DIR_REPLACE(mp,tp,dp,name,namelen,inum,first,flist,total) \ + ((mp)->m_dirops.xd_replace(tp,dp,name,namelen,inum,first,flist,total)) +#define XFS_DIR_CANENTER(mp,tp,dp,name,namelen) \ + ((mp)->m_dirops.xd_canenter(tp,dp,name,namelen)) +#define XFS_DIR_SHORTFORM_VALIDATE_ONDISK(mp,dip) \ + ((mp)->m_dirops.xd_shortform_validate_ondisk(mp,dip)) +#define XFS_DIR_SHORTFORM_TO_SINGLE(mp,args) \ + ((mp)->m_dirops.xd_shortform_to_single(args)) + +#define XFS_DIR_IS_V1(mp) ((mp)->m_dirversion == 1) +extern xfs_dirops_t xfsv1_dirops; + +#endif /* __XFS_DIR_H__ */ diff --git a/include/xfs_dir2.h b/include/xfs_dir2.h new file mode 100644 index 000000000..f723933e1 --- /dev/null +++ b/include/xfs_dir2.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_DIR2_H__ +#define __XFS_DIR2_H__ + +struct dirent; +struct uio; +struct xfs_dabuf; +struct xfs_da_args; +struct xfs_dir2_put_args; +struct xfs_inode; +struct xfs_trans; + +/* + * Directory version 2. + * There are 4 possible formats: + * shortform + * single block - data with embedded leaf at the end + * multiple data blocks, single leaf+freeindex block + * data blocks, node&leaf blocks (btree), freeindex blocks + * + * The shortform format is in xfs_dir2_sf.h. + * The single block format is in xfs_dir2_block.h. + * The data block format is in xfs_dir2_data.h. + * The leaf and freeindex block formats are in xfs_dir2_leaf.h. + * Node blocks are the same as the other version, in xfs_da_btree.h. + */ + +/* + * Byte offset in data block and shortform entry. + */ +typedef __uint16_t xfs_dir2_data_off_t; +#define NULLDATAOFF 0xffffU +typedef uint xfs_dir2_data_aoff_t; /* argument form */ + +/* + * Directory block number (logical dirblk in file) + */ +typedef __uint32_t xfs_dir2_db_t; + +/* + * Byte offset in a directory. + */ +typedef xfs_off_t xfs_dir2_off_t; + +/* + * For getdents, argument struct for put routines. + */ +typedef int (*xfs_dir2_put_t)(struct xfs_dir2_put_args *pa); +typedef struct xfs_dir2_put_args { + xfs_off_t cook; /* cookie of (next) entry */ + xfs_intino_t ino; /* inode number */ + struct dirent *dbp; /* buffer pointer */ + char *name; /* directory entry name */ + int namelen; /* length of name */ + int done; /* output: set if value was stored */ + xfs_dir2_put_t put; /* put function ptr (i/o) */ + struct uio *uio; /* uio control structure */ + unsigned char type; /* file type (see include/linux/fs.h) */ +} xfs_dir2_put_args_t; + +#define XFS_DIR_IS_V2(mp) ((mp)->m_dirversion == 2) +extern xfs_dirops_t xfsv2_dirops; + +/* + * Other interfaces used by the rest of the dir v2 code. + */ +extern int + xfs_dir2_grow_inode(struct xfs_da_args *args, int space, + xfs_dir2_db_t *dbp); + +extern int + xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp, int *vp); + +extern int + xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, int *vp); + +extern int + xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db, + struct xfs_dabuf *bp); + +#endif /* __XFS_DIR2_H__ */ diff --git a/include/xfs_dir2_block.h b/include/xfs_dir2_block.h new file mode 100644 index 000000000..049f598da --- /dev/null +++ b/include/xfs_dir2_block.h @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_DIR2_BLOCK_H__ +#define __XFS_DIR2_BLOCK_H__ + +/* + * xfs_dir2_block.h + * Directory version 2, single block format structures + */ + +struct dirent; +struct uio; +struct xfs_dabuf; +struct xfs_da_args; +struct xfs_dir2_data_hdr; +struct xfs_dir2_leaf_entry; +struct xfs_inode; +struct xfs_mount; +struct xfs_trans; + +/* + * The single block format is as follows: + * xfs_dir2_data_hdr_t structure + * xfs_dir2_data_entry_t and xfs_dir2_data_unused_t structures + * xfs_dir2_leaf_entry_t structures + * xfs_dir2_block_tail_t structure + */ + +#define XFS_DIR2_BLOCK_MAGIC 0x58443242 /* XD2B: for one block dirs */ + +typedef struct xfs_dir2_block_tail { + __uint32_t count; /* count of leaf entries */ + __uint32_t stale; /* count of stale lf entries */ +} xfs_dir2_block_tail_t; + +/* + * Generic single-block structure, for xfs_db. + */ +typedef struct xfs_dir2_block { + xfs_dir2_data_hdr_t hdr; /* magic XFS_DIR2_BLOCK_MAGIC */ + xfs_dir2_data_union_t u[1]; + xfs_dir2_leaf_entry_t leaf[1]; + xfs_dir2_block_tail_t tail; +} xfs_dir2_block_t; + +/* + * Pointer to the leaf header embedded in a data block (1-block format) + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_BLOCK_TAIL_P) +xfs_dir2_block_tail_t * +xfs_dir2_block_tail_p(struct xfs_mount *mp, xfs_dir2_block_t *block); +#define XFS_DIR2_BLOCK_TAIL_P(mp,block) xfs_dir2_block_tail_p(mp,block) +#else +#define XFS_DIR2_BLOCK_TAIL_P(mp,block) \ + (((xfs_dir2_block_tail_t *)((char *)(block) + (mp)->m_dirblksize)) - 1) +#endif + +/* + * Pointer to the leaf entries embedded in a data block (1-block format) + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_BLOCK_LEAF_P) +struct xfs_dir2_leaf_entry *xfs_dir2_block_leaf_p_arch( + xfs_dir2_block_tail_t *btp, xfs_arch_t arch); +#define XFS_DIR2_BLOCK_LEAF_P_ARCH(btp,arch) \ + xfs_dir2_block_leaf_p_arch(btp,arch) +#else +#define XFS_DIR2_BLOCK_LEAF_P_ARCH(btp,arch) \ + (((struct xfs_dir2_leaf_entry *)(btp)) - INT_GET((btp)->count, arch)) +#endif + +/* + * Function declarations. + */ + +extern int + xfs_dir2_block_addname(struct xfs_da_args *args); + +extern int + xfs_dir2_block_getdents(struct xfs_trans *tp, struct xfs_inode *dp, + struct uio *uio, int *eofp, struct dirent *dbp, + xfs_dir2_put_t put); + +extern int + xfs_dir2_block_lookup(struct xfs_da_args *args); + +extern int + xfs_dir2_block_removename(struct xfs_da_args *args); + +extern int + xfs_dir2_block_replace(struct xfs_da_args *args); + +extern int + xfs_dir2_leaf_to_block(struct xfs_da_args *args, struct xfs_dabuf *lbp, + struct xfs_dabuf *dbp); + +extern int + xfs_dir2_sf_to_block(struct xfs_da_args *args); + +#endif /* __XFS_DIR2_BLOCK_H__ */ diff --git a/include/xfs_dir2_data.h b/include/xfs_dir2_data.h new file mode 100644 index 000000000..96c850d66 --- /dev/null +++ b/include/xfs_dir2_data.h @@ -0,0 +1,232 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_DIR2_DATA_H__ +#define __XFS_DIR2_DATA_H__ + +/* + * Directory format 2, data block structures. + */ + +struct xfs_dabuf; +struct xfs_da_args; +struct xfs_inode; +struct xfs_trans; + +/* + * Constants. + */ +#define XFS_DIR2_DATA_MAGIC 0x58443244 /* XD2D: for multiblock dirs */ +#define XFS_DIR2_DATA_ALIGN_LOG 3 /* i.e., 8 bytes */ +#define XFS_DIR2_DATA_ALIGN (1 << XFS_DIR2_DATA_ALIGN_LOG) +#define XFS_DIR2_DATA_FREE_TAG 0xffff +#define XFS_DIR2_DATA_FD_COUNT 3 + +/* + * Directory address space divided into sections, + * spaces separated by 32gb. + */ +#define XFS_DIR2_SPACE_SIZE (1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG)) +#define XFS_DIR2_DATA_SPACE 0 +#define XFS_DIR2_DATA_OFFSET (XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE) +#define XFS_DIR2_DATA_FIRSTDB(mp) \ + XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_DATA_OFFSET) + +/* + * Offsets of . and .. in data space (always block 0) + */ +#define XFS_DIR2_DATA_DOT_OFFSET \ + ((xfs_dir2_data_aoff_t)sizeof(xfs_dir2_data_hdr_t)) +#define XFS_DIR2_DATA_DOTDOT_OFFSET \ + (XFS_DIR2_DATA_DOT_OFFSET + XFS_DIR2_DATA_ENTSIZE(1)) +#define XFS_DIR2_DATA_FIRST_OFFSET \ + (XFS_DIR2_DATA_DOTDOT_OFFSET + XFS_DIR2_DATA_ENTSIZE(2)) + +/* + * Structures. + */ + +/* + * Describe a free area in the data block. + * The freespace will be formatted as a xfs_dir2_data_unused_t. + */ +typedef struct xfs_dir2_data_free { + xfs_dir2_data_off_t offset; /* start of freespace */ + xfs_dir2_data_off_t length; /* length of freespace */ +} xfs_dir2_data_free_t; + +/* + * Header for the data blocks. + * Always at the beginning of a directory-sized block. + * The code knows that XFS_DIR2_DATA_FD_COUNT is 3. + */ +typedef struct xfs_dir2_data_hdr { + __uint32_t magic; /* XFS_DIR2_DATA_MAGIC */ + /* or XFS_DIR2_BLOCK_MAGIC */ + xfs_dir2_data_free_t bestfree[XFS_DIR2_DATA_FD_COUNT]; +} xfs_dir2_data_hdr_t; + +/* + * Active entry in a data block. Aligned to 8 bytes. + * Tag appears as the last 2 bytes. + */ +typedef struct xfs_dir2_data_entry { + xfs_ino_t inumber; /* inode number */ + __uint8_t namelen; /* name length */ + __uint8_t name[1]; /* name bytes, no null */ + /* variable offset */ + xfs_dir2_data_off_t tag; /* starting offset of us */ +} xfs_dir2_data_entry_t; + +/* + * Unused entry in a data block. Aligned to 8 bytes. + * Tag appears as the last 2 bytes. + */ +typedef struct xfs_dir2_data_unused { + __uint16_t freetag; /* XFS_DIR2_DATA_FREE_TAG */ + xfs_dir2_data_off_t length; /* total free length */ + /* variable offset */ + xfs_dir2_data_off_t tag; /* starting offset of us */ +} xfs_dir2_data_unused_t; + +typedef union { + xfs_dir2_data_entry_t entry; + xfs_dir2_data_unused_t unused; +} xfs_dir2_data_union_t; + +/* + * Generic data block structure, for xfs_db. + */ +typedef struct xfs_dir2_data { + xfs_dir2_data_hdr_t hdr; /* magic XFS_DIR2_DATA_MAGIC */ + xfs_dir2_data_union_t u[1]; +} xfs_dir2_data_t; + +/* + * Macros. + */ + +/* + * Size of a data entry. + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DATA_ENTSIZE) +int xfs_dir2_data_entsize(int n); +#define XFS_DIR2_DATA_ENTSIZE(n) xfs_dir2_data_entsize(n) +#else +#define XFS_DIR2_DATA_ENTSIZE(n) \ + ((int)(roundup(offsetof(xfs_dir2_data_entry_t, name[0]) + (n) + \ + (uint)sizeof(xfs_dir2_data_off_t), XFS_DIR2_DATA_ALIGN))) +#endif + +/* + * Pointer to an entry's tag word. + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DATA_ENTRY_TAG_P) +xfs_dir2_data_off_t *xfs_dir2_data_entry_tag_p(xfs_dir2_data_entry_t *dep); +#define XFS_DIR2_DATA_ENTRY_TAG_P(dep) xfs_dir2_data_entry_tag_p(dep) +#else +#define XFS_DIR2_DATA_ENTRY_TAG_P(dep) \ + ((xfs_dir2_data_off_t *)\ + ((char *)(dep) + XFS_DIR2_DATA_ENTSIZE((dep)->namelen) - \ + (uint)sizeof(xfs_dir2_data_off_t))) +#endif + +/* + * Pointer to a freespace's tag word. + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DATA_UNUSED_TAG_P) +xfs_dir2_data_off_t *xfs_dir2_data_unused_tag_p_arch( + xfs_dir2_data_unused_t *dup, xfs_arch_t arch); +#define XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup,arch) \ + xfs_dir2_data_unused_tag_p_arch(dup,arch) +#else +#define XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup,arch) \ + ((xfs_dir2_data_off_t *)\ + ((char *)(dup) + INT_GET((dup)->length, arch) \ + - (uint)sizeof(xfs_dir2_data_off_t))) +#endif + +/* + * Function declarations. + */ + +#ifdef DEBUG +extern void + xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_dabuf *bp); +#else +#define xfs_dir2_data_check(dp,bp) +#endif + +extern xfs_dir2_data_free_t * + xfs_dir2_data_freefind(xfs_dir2_data_t *d, + xfs_dir2_data_unused_t *dup); + +extern xfs_dir2_data_free_t * + xfs_dir2_data_freeinsert(xfs_dir2_data_t *d, + xfs_dir2_data_unused_t *dup, int *loghead); + +extern void + xfs_dir2_data_freeremove(xfs_dir2_data_t *d, + xfs_dir2_data_free_t *dfp, int *loghead); + +extern void + xfs_dir2_data_freescan(struct xfs_mount *mp, xfs_dir2_data_t *d, + int *loghead, char *aendp); + +extern int + xfs_dir2_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno, + struct xfs_dabuf **bpp); + +extern void + xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_dabuf *bp, + xfs_dir2_data_entry_t *dep); + +extern void + xfs_dir2_data_log_header(struct xfs_trans *tp, struct xfs_dabuf *bp); + +extern void + xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_dabuf *bp, + xfs_dir2_data_unused_t *dup); + +extern void + xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_dabuf *bp, + xfs_dir2_data_aoff_t offset, + xfs_dir2_data_aoff_t len, int *needlogp, + int *needscanp); + +extern void + xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_dabuf *bp, + xfs_dir2_data_unused_t *dup, + xfs_dir2_data_aoff_t offset, + xfs_dir2_data_aoff_t len, int *needlogp, + int *needscanp); + +#endif /* __XFS_DIR2_DATA_H__ */ diff --git a/include/xfs_dir2_leaf.h b/include/xfs_dir2_leaf.h new file mode 100644 index 000000000..f7ef39678 --- /dev/null +++ b/include/xfs_dir2_leaf.h @@ -0,0 +1,361 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_DIR2_LEAF_H__ +#define __XFS_DIR2_LEAF_H__ + +/* + * Directory version 2, leaf block structures. + */ + +struct dirent; +struct uio; +struct xfs_dabuf; +struct xfs_da_args; +struct xfs_inode; +struct xfs_mount; +struct xfs_trans; + +/* + * Constants. + */ + +/* + * Offset of the leaf/node space. First block in this space + * is the btree root. + */ +#define XFS_DIR2_LEAF_SPACE 1 +#define XFS_DIR2_LEAF_OFFSET (XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE) +#define XFS_DIR2_LEAF_FIRSTDB(mp) \ + XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_LEAF_OFFSET) + +/* + * Types. + */ + +/* + * Offset in data space of a data entry. + */ +typedef __uint32_t xfs_dir2_dataptr_t; +#define XFS_DIR2_MAX_DATAPTR ((xfs_dir2_dataptr_t)0x7fffffff) +#define XFS_DIR2_NULL_DATAPTR ((xfs_dir2_dataptr_t)0) + +/* + * Structures. + */ + +/* + * Leaf block header. + */ +typedef struct xfs_dir2_leaf_hdr { + xfs_da_blkinfo_t info; /* header for da routines */ + __uint16_t count; /* count of entries */ + __uint16_t stale; /* count of stale entries */ +} xfs_dir2_leaf_hdr_t; + +/* + * Leaf block entry. + */ +typedef struct xfs_dir2_leaf_entry { + xfs_dahash_t hashval; /* hash value of name */ + xfs_dir2_dataptr_t address; /* address of data entry */ +} xfs_dir2_leaf_entry_t; + +/* + * Leaf block tail. + */ +typedef struct xfs_dir2_leaf_tail { + __uint32_t bestcount; +} xfs_dir2_leaf_tail_t; + +/* + * Leaf block. + * bests and tail are at the end of the block for single-leaf only + * (magic = XFS_DIR2_LEAF1_MAGIC not XFS_DIR2_LEAFN_MAGIC). + */ +typedef struct xfs_dir2_leaf { + xfs_dir2_leaf_hdr_t hdr; /* leaf header */ + xfs_dir2_leaf_entry_t ents[1]; /* entries */ + /* ... */ + xfs_dir2_data_off_t bests[1]; /* best free counts */ + xfs_dir2_leaf_tail_t tail; /* leaf tail */ +} xfs_dir2_leaf_t; + +/* + * Macros. + * The DB blocks are logical directory block numbers, not filesystem blocks. + */ + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_MAX_LEAF_ENTS) +int +xfs_dir2_max_leaf_ents(struct xfs_mount *mp); +#define XFS_DIR2_MAX_LEAF_ENTS(mp) \ + xfs_dir2_max_leaf_ents(mp) +#else +#define XFS_DIR2_MAX_LEAF_ENTS(mp) \ + ((int)(((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_leaf_hdr_t)) / \ + (uint)sizeof(xfs_dir2_leaf_entry_t))) +#endif + +/* + * Get address of the bestcount field in the single-leaf block. + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_LEAF_TAIL_P) +xfs_dir2_leaf_tail_t * +xfs_dir2_leaf_tail_p(struct xfs_mount *mp, xfs_dir2_leaf_t *lp); +#define XFS_DIR2_LEAF_TAIL_P(mp,lp) \ + xfs_dir2_leaf_tail_p(mp, lp) +#else +#define XFS_DIR2_LEAF_TAIL_P(mp,lp) \ + ((xfs_dir2_leaf_tail_t *)\ + ((char *)(lp) + (mp)->m_dirblksize - \ + (uint)sizeof(xfs_dir2_leaf_tail_t))) +#endif + +/* + * Get address of the bests array in the single-leaf block. + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_LEAF_BESTS_P) +xfs_dir2_data_off_t * +xfs_dir2_leaf_bests_p_arch(xfs_dir2_leaf_tail_t *ltp, xfs_arch_t arch); +#define XFS_DIR2_LEAF_BESTS_P_ARCH(ltp,arch) xfs_dir2_leaf_bests_p_arch(ltp,arch) +#else +#define XFS_DIR2_LEAF_BESTS_P_ARCH(ltp,arch) \ + ((xfs_dir2_data_off_t *)(ltp) - INT_GET((ltp)->bestcount, arch)) +#endif + +/* + * Convert dataptr to byte in file space + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DATAPTR_TO_BYTE) +xfs_dir2_off_t +xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp); +#define XFS_DIR2_DATAPTR_TO_BYTE(mp,dp) xfs_dir2_dataptr_to_byte(mp, dp) +#else +#define XFS_DIR2_DATAPTR_TO_BYTE(mp,dp) \ + ((xfs_dir2_off_t)(dp) << XFS_DIR2_DATA_ALIGN_LOG) +#endif + +/* + * Convert byte in file space to dataptr. It had better be aligned. + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_BYTE_TO_DATAPTR) +xfs_dir2_dataptr_t +xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by); +#define XFS_DIR2_BYTE_TO_DATAPTR(mp,by) xfs_dir2_byte_to_dataptr(mp,by) +#else +#define XFS_DIR2_BYTE_TO_DATAPTR(mp,by) \ + ((xfs_dir2_dataptr_t)((by) >> XFS_DIR2_DATA_ALIGN_LOG)) +#endif + +/* + * Convert dataptr to a block number + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DATAPTR_TO_DB) +xfs_dir2_db_t +xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp); +#define XFS_DIR2_DATAPTR_TO_DB(mp,dp) xfs_dir2_dataptr_to_db(mp, dp) +#else +#define XFS_DIR2_DATAPTR_TO_DB(mp,dp) \ + XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_DATAPTR_TO_BYTE(mp, dp)) +#endif + +/* + * Convert dataptr to a byte offset in a block + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DATAPTR_TO_OFF) +xfs_dir2_data_aoff_t +xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp); +#define XFS_DIR2_DATAPTR_TO_OFF(mp,dp) xfs_dir2_dataptr_to_off(mp, dp) +#else +#define XFS_DIR2_DATAPTR_TO_OFF(mp,dp) \ + XFS_DIR2_BYTE_TO_OFF(mp, XFS_DIR2_DATAPTR_TO_BYTE(mp, dp)) +#endif + +/* + * Convert block and offset to byte in space + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DB_OFF_TO_BYTE) +xfs_dir2_off_t +xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db, + xfs_dir2_data_aoff_t o); +#define XFS_DIR2_DB_OFF_TO_BYTE(mp,db,o) \ + xfs_dir2_db_off_to_byte(mp, db, o) +#else +#define XFS_DIR2_DB_OFF_TO_BYTE(mp,db,o) \ + (((xfs_dir2_off_t)(db) << \ + ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog)) + (o)) +#endif + +/* + * Convert byte in space to (DB) block + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_BYTE_TO_DB) +xfs_dir2_db_t xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by); +#define XFS_DIR2_BYTE_TO_DB(mp,by) xfs_dir2_byte_to_db(mp, by) +#else +#define XFS_DIR2_BYTE_TO_DB(mp,by) \ + ((xfs_dir2_db_t)((by) >> \ + ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog))) +#endif + +/* + * Convert byte in space to (DA) block + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_BYTE_TO_DA) +xfs_dablk_t xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by); +#define XFS_DIR2_BYTE_TO_DA(mp,by) xfs_dir2_byte_to_da(mp, by) +#else +#define XFS_DIR2_BYTE_TO_DA(mp,by) \ + XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_BYTE_TO_DB(mp, by)) +#endif + +/* + * Convert byte in space to offset in a block + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_BYTE_TO_OFF) +xfs_dir2_data_aoff_t +xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by); +#define XFS_DIR2_BYTE_TO_OFF(mp,by) xfs_dir2_byte_to_off(mp, by) +#else +#define XFS_DIR2_BYTE_TO_OFF(mp,by) \ + ((xfs_dir2_data_aoff_t)((by) & \ + ((1 << ((mp)->m_sb.sb_blocklog + \ + (mp)->m_sb.sb_dirblklog)) - 1))) +#endif + +/* + * Convert block and offset to dataptr + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DB_OFF_TO_DATAPTR) +xfs_dir2_dataptr_t +xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db, + xfs_dir2_data_aoff_t o); +#define XFS_DIR2_DB_OFF_TO_DATAPTR(mp,db,o) \ + xfs_dir2_db_off_to_dataptr(mp, db, o) +#else +#define XFS_DIR2_DB_OFF_TO_DATAPTR(mp,db,o) \ + XFS_DIR2_BYTE_TO_DATAPTR(mp, XFS_DIR2_DB_OFF_TO_BYTE(mp, db, o)) +#endif + +/* + * Convert block (DB) to block (dablk) + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DB_TO_DA) +xfs_dablk_t xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db); +#define XFS_DIR2_DB_TO_DA(mp,db) xfs_dir2_db_to_da(mp, db) +#else +#define XFS_DIR2_DB_TO_DA(mp,db) \ + ((xfs_dablk_t)((db) << (mp)->m_sb.sb_dirblklog)) +#endif + +/* + * Convert block (dablk) to block (DB) + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DA_TO_DB) +xfs_dir2_db_t xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da); +#define XFS_DIR2_DA_TO_DB(mp,da) xfs_dir2_da_to_db(mp, da) +#else +#define XFS_DIR2_DA_TO_DB(mp,da) \ + ((xfs_dir2_db_t)((da) >> (mp)->m_sb.sb_dirblklog)) +#endif + +/* + * Convert block (dablk) to byte offset in space + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DA_TO_BYTE) +xfs_dir2_off_t xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da); +#define XFS_DIR2_DA_TO_BYTE(mp,da) xfs_dir2_da_to_byte(mp, da) +#else +#define XFS_DIR2_DA_TO_BYTE(mp,da) \ + XFS_DIR2_DB_OFF_TO_BYTE(mp, XFS_DIR2_DA_TO_DB(mp, da), 0) +#endif + +/* + * Function declarations. + */ + +extern int + xfs_dir2_block_to_leaf(struct xfs_da_args *args, struct xfs_dabuf *dbp); + +extern int + xfs_dir2_leaf_addname(struct xfs_da_args *args); + +extern void + xfs_dir2_leaf_compact(struct xfs_da_args *args, struct xfs_dabuf *bp); + +extern void + xfs_dir2_leaf_compact_x1(struct xfs_dabuf *bp, int *indexp, + int *lowstalep, int *highstalep, int *lowlogp, + int *highlogp); + +extern int + xfs_dir2_leaf_getdents(struct xfs_trans *tp, struct xfs_inode *dp, + struct uio *uio, int *eofp, struct dirent *dbp, + xfs_dir2_put_t put); + +extern int + xfs_dir2_leaf_init(struct xfs_da_args *args, xfs_dir2_db_t bno, + struct xfs_dabuf **bpp, int magic); + +extern void + xfs_dir2_leaf_log_ents(struct xfs_trans *tp, struct xfs_dabuf *bp, + int first, int last); + +extern void + xfs_dir2_leaf_log_bests(struct xfs_trans *tp, struct xfs_dabuf *bp, + int first, int last); + +extern void + xfs_dir2_leaf_log_header(struct xfs_trans *tp, struct xfs_dabuf *bp); + +extern void + xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_dabuf *bp); + +extern int + xfs_dir2_leaf_lookup(struct xfs_da_args *args); + +extern int + xfs_dir2_leaf_removename(struct xfs_da_args *args); + +extern int + xfs_dir2_leaf_replace(struct xfs_da_args *args); + +extern int + xfs_dir2_leaf_search_hash(struct xfs_da_args *args, + struct xfs_dabuf *lbp); +extern int + xfs_dir2_leaf_trim_data(struct xfs_da_args *args, struct xfs_dabuf *lbp, xfs_dir2_db_t db); + +extern int + xfs_dir2_node_to_leaf(struct xfs_da_state *state); + +#endif /* __XFS_DIR2_LEAF_H__ */ diff --git a/include/xfs_dir2_node.h b/include/xfs_dir2_node.h new file mode 100644 index 000000000..4ec4d1e11 --- /dev/null +++ b/include/xfs_dir2_node.h @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_DIR2_NODE_H__ +#define __XFS_DIR2_NODE_H__ + +/* + * Directory version 2, btree node format structures + */ + +struct dirent; +struct uio; +struct xfs_dabuf; +struct xfs_da_args; +struct xfs_da_state; +struct xfs_da_state_blk; +struct xfs_inode; +struct xfs_trans; + +/* + * Constants. + */ + +/* + * Offset of the freespace index. + */ +#define XFS_DIR2_FREE_SPACE 2 +#define XFS_DIR2_FREE_OFFSET (XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE) +#define XFS_DIR2_FREE_FIRSTDB(mp) \ + XFS_DIR2_BYTE_TO_DB(mp, XFS_DIR2_FREE_OFFSET) + +#define XFS_DIR2_FREE_MAGIC 0x58443246 /* XD2F */ + +/* + * Structures. + */ +typedef struct xfs_dir2_free_hdr { + __uint32_t magic; /* XFS_DIR2_FREE_MAGIC */ + __int32_t firstdb; /* db of first entry */ + __int32_t nvalid; /* count of valid entries */ + __int32_t nused; /* count of used entries */ +} xfs_dir2_free_hdr_t; + +typedef struct xfs_dir2_free { + xfs_dir2_free_hdr_t hdr; /* block header */ + xfs_dir2_data_off_t bests[1]; /* best free counts */ + /* unused entries are -1 */ +} xfs_dir2_free_t; +#define XFS_DIR2_MAX_FREE_BESTS(mp) \ + (((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_free_hdr_t)) / \ + (uint)sizeof(xfs_dir2_data_off_t)) + +/* + * Macros. + */ + +/* + * Convert data space db to the corresponding free db. + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DB_TO_FDB) +xfs_dir2_db_t +xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db); +#define XFS_DIR2_DB_TO_FDB(mp,db) xfs_dir2_db_to_fdb(mp, db) +#else +#define XFS_DIR2_DB_TO_FDB(mp,db) \ + (XFS_DIR2_FREE_FIRSTDB(mp) + (db) / XFS_DIR2_MAX_FREE_BESTS(mp)) +#endif + +/* + * Convert data space db to the corresponding index in a free db. + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_DB_TO_FDINDEX) +int +xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db); +#define XFS_DIR2_DB_TO_FDINDEX(mp,db) xfs_dir2_db_to_fdindex(mp, db) +#else +#define XFS_DIR2_DB_TO_FDINDEX(mp,db) ((db) % XFS_DIR2_MAX_FREE_BESTS(mp)) +#endif + +/* + * Functions. + */ + +extern void + xfs_dir2_free_log_bests(struct xfs_trans *tp, struct xfs_dabuf *bp, + int first, int last); + +extern int + xfs_dir2_leaf_to_node(struct xfs_da_args *args, struct xfs_dabuf *lbp); + +extern xfs_dahash_t + xfs_dir2_leafn_lasthash(struct xfs_dabuf *bp, int *count); + +extern int + xfs_dir2_leafn_lookup_int(struct xfs_dabuf *bp, + struct xfs_da_args *args, int *indexp, + struct xfs_da_state *state); + +extern int + xfs_dir2_leafn_order(struct xfs_dabuf *leaf1_bp, + struct xfs_dabuf *leaf2_bp); + +extern int + xfs_dir2_leafn_split(struct xfs_da_state *state, + struct xfs_da_state_blk *oldblk, + struct xfs_da_state_blk *newblk); + +extern int + xfs_dir2_leafn_toosmall(struct xfs_da_state *state, int *action); + +extern void + xfs_dir2_leafn_unbalance(struct xfs_da_state *state, + struct xfs_da_state_blk *drop_blk, + struct xfs_da_state_blk *save_blk); + +extern int + xfs_dir2_node_addname(struct xfs_da_args *args); + +extern int + xfs_dir2_node_lookup(struct xfs_da_args *args); + +extern int + xfs_dir2_node_removename(struct xfs_da_args *args); + +extern int + xfs_dir2_node_replace(struct xfs_da_args *args); + +extern int + xfs_dir2_node_trim_free(struct xfs_da_args *args, xfs_fileoff_t fo, + int *rvalp); + +#endif /* __XFS_DIR2_NODE_H__ */ diff --git a/include/xfs_dir2_sf.h b/include/xfs_dir2_sf.h new file mode 100644 index 000000000..b74dd752e --- /dev/null +++ b/include/xfs_dir2_sf.h @@ -0,0 +1,256 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_DIR2_SF_H__ +#define __XFS_DIR2_SF_H__ + +/* + * Directory layout when stored internal to an inode. + * + * Small directories are packed as tightly as possible so as to + * fit into the literal area of the inode. + */ + +struct dirent; +struct uio; +struct xfs_dabuf; +struct xfs_da_args; +struct xfs_dir2_block; +struct xfs_inode; +struct xfs_mount; +struct xfs_trans; + +/* + * Maximum size of a shortform directory. + */ +#define XFS_DIR2_SF_MAX_SIZE \ + (XFS_DINODE_MAX_SIZE - (uint)sizeof(xfs_dinode_core_t) - \ + (uint)sizeof(xfs_agino_t)) + +/* + * Inode number stored as 8 8-bit values. + */ +typedef struct { __uint8_t i[8]; } xfs_dir2_ino8_t; + +#define XFS_DIR2_SF_GET_INO8_ARCH(di,arch) \ + (xfs_ino_t)(DIRINO_GET_ARCH(&di,arch)) +#define XFS_DIR2_SF_GET_INO8(di) \ + XFS_DIR2_SF_GET_INO8_ARCH(di,ARCH_NOCONVERT) + +/* + * Inode number stored as 4 8-bit values. + * Works a lot of the time, when all the inode numbers in a directory + * fit in 32 bits. + */ +typedef struct { __uint8_t i[4]; } xfs_dir2_ino4_t; +#define XFS_DIR2_SF_GET_INO4_ARCH(di,arch) \ + (xfs_ino_t)(DIRINO4_GET_ARCH(&di,arch)) +#define XFS_DIR2_SF_GET_INO4(di) \ + XFS_DIR2_SF_GET_INO4_ARCH(di,ARCH_NOCONVERT) + +typedef union { + xfs_dir2_ino8_t i8; + xfs_dir2_ino4_t i4; +} xfs_dir2_inou_t; +#define XFS_DIR2_MAX_SHORT_INUM ((xfs_ino_t)0xffffffffULL) + +/* + * Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t. + * Only need 16 bits, this is the byte offset into the single block form. + */ +typedef struct { __uint8_t i[2]; } xfs_dir2_sf_off_t; + +/* + * The parent directory has a dedicated field, and the self-pointer must + * be calculated on the fly. + * + * Entries are packed toward the top as tightly as possible. The header + * and the elements must be bcopy()'d out into a work area to get correct + * alignment for the inode number fields. + */ +typedef struct xfs_dir2_sf_hdr { + __uint8_t count; /* count of entries */ + __uint8_t i8count; /* count of 8-byte inode #s */ + xfs_dir2_inou_t parent; /* parent dir inode number */ +} xfs_dir2_sf_hdr_t; + +typedef struct xfs_dir2_sf_entry { + __uint8_t namelen; /* actual name length */ + xfs_dir2_sf_off_t offset; /* saved offset */ + __uint8_t name[1]; /* name, variable size */ + xfs_dir2_inou_t inumber; /* inode number, var. offset */ +} xfs_dir2_sf_entry_t; + +typedef struct xfs_dir2_sf { + xfs_dir2_sf_hdr_t hdr; /* shortform header */ + xfs_dir2_sf_entry_t list[1]; /* shortform entries */ +} xfs_dir2_sf_t; + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_HDR_SIZE) +int xfs_dir2_sf_hdr_size(int i8count); +#define XFS_DIR2_SF_HDR_SIZE(i8count) xfs_dir2_sf_hdr_size(i8count) +#else +#define XFS_DIR2_SF_HDR_SIZE(i8count) \ + ((uint)sizeof(xfs_dir2_sf_hdr_t) - \ + ((i8count) == 0) * \ + ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t))) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_INUMBERP) +xfs_dir2_inou_t *xfs_dir2_sf_inumberp(xfs_dir2_sf_entry_t *sfep); +#define XFS_DIR2_SF_INUMBERP(sfep) xfs_dir2_sf_inumberp(sfep) +#else +#define XFS_DIR2_SF_INUMBERP(sfep) \ + ((xfs_dir2_inou_t *)&(sfep)->name[(sfep)->namelen]) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_GET_INUMBER) +xfs_intino_t xfs_dir2_sf_get_inumber_arch(xfs_dir2_sf_t *sfp, xfs_dir2_inou_t *from, + xfs_arch_t arch); +#define XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, from, arch) \ + xfs_dir2_sf_get_inumber_arch(sfp, from, arch) + +#else +#define XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, from, arch) \ + ((sfp)->hdr.i8count == 0 ? \ + (xfs_intino_t)XFS_DIR2_SF_GET_INO4_ARCH(*(from), arch) : \ + (xfs_intino_t)XFS_DIR2_SF_GET_INO8_ARCH(*(from), arch)) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_PUT_INUMBER) +void xfs_dir2_sf_put_inumber_arch(xfs_dir2_sf_t *sfp, xfs_ino_t *from, + xfs_dir2_inou_t *to, xfs_arch_t arch); +#define XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp,from,to,arch) \ + xfs_dir2_sf_put_inumber_arch(sfp,from,to,arch) +#else +#define XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp,from,to,arch) \ + if ((sfp)->hdr.i8count == 0) { \ + DIRINO4_COPY_ARCH(from,to,arch); \ + } else { \ + DIRINO_COPY_ARCH(from,to,arch); \ + } +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_GET_OFFSET) +xfs_dir2_data_aoff_t xfs_dir2_sf_get_offset_arch(xfs_dir2_sf_entry_t *sfep, + xfs_arch_t arch); +xfs_dir2_data_aoff_t xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep); +#define XFS_DIR2_SF_GET_OFFSET_ARCH(sfep,arch) \ + xfs_dir2_sf_get_offset_arch(sfep,arch) +#else +#define XFS_DIR2_SF_GET_OFFSET_ARCH(sfep,arch) \ + INT_GET_UNALIGNED_16_ARCH(&(sfep)->offset.i,arch) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_PUT_OFFSET) +void xfs_dir2_sf_put_offset_arch(xfs_dir2_sf_entry_t *sfep, + xfs_dir2_data_aoff_t off, xfs_arch_t arch); +#define XFS_DIR2_SF_PUT_OFFSET_ARCH(sfep,off,arch) \ + xfs_dir2_sf_put_offset_arch(sfep,off,arch) +#else +#define XFS_DIR2_SF_PUT_OFFSET_ARCH(sfep,off,arch) \ + INT_SET_UNALIGNED_16_ARCH(&(sfep)->offset.i,off,arch) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_ENTSIZE_BYNAME) +int xfs_dir2_sf_entsize_byname(xfs_dir2_sf_t *sfp, int len); +#define XFS_DIR2_SF_ENTSIZE_BYNAME(sfp,len) \ + xfs_dir2_sf_entsize_byname(sfp,len) +#else +#define XFS_DIR2_SF_ENTSIZE_BYNAME(sfp,len) /* space a name uses */ \ + ((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (len) - \ + ((sfp)->hdr.i8count == 0) * \ + ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t))) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_ENTSIZE_BYENTRY) +int xfs_dir2_sf_entsize_byentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep); +#define XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp,sfep) \ + xfs_dir2_sf_entsize_byentry(sfp,sfep) +#else +#define XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp,sfep) /* space an entry uses */ \ + ((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (sfep)->namelen - \ + ((sfp)->hdr.i8count == 0) * \ + ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t))) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_FIRSTENTRY) +xfs_dir2_sf_entry_t *xfs_dir2_sf_firstentry(xfs_dir2_sf_t *sfp); +#define XFS_DIR2_SF_FIRSTENTRY(sfp) xfs_dir2_sf_firstentry(sfp) +#else +#define XFS_DIR2_SF_FIRSTENTRY(sfp) /* first entry in struct */ \ + ((xfs_dir2_sf_entry_t *) \ + ((char *)(sfp) + XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count))) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR2_SF_NEXTENTRY) +xfs_dir2_sf_entry_t *xfs_dir2_sf_nextentry(xfs_dir2_sf_t *sfp, + xfs_dir2_sf_entry_t *sfep); +#define XFS_DIR2_SF_NEXTENTRY(sfp,sfep) xfs_dir2_sf_nextentry(sfp,sfep) +#else +#define XFS_DIR2_SF_NEXTENTRY(sfp,sfep) /* next entry in struct */ \ + ((xfs_dir2_sf_entry_t *) \ + ((char *)(sfep) + XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp,sfep))) +#endif + +/* + * Functions. + */ + +extern int + xfs_dir2_block_sfsize(struct xfs_inode *dp, + struct xfs_dir2_block *block, + xfs_dir2_sf_hdr_t *sfhp); + +extern int + xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_dabuf *bp, + int size, xfs_dir2_sf_hdr_t *sfhp); + +extern int + xfs_dir2_sf_addname(struct xfs_da_args *args); + +extern int + xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino); + +extern int + xfs_dir2_sf_getdents(struct xfs_inode *dp, struct uio *uio, int *eofp, + struct dirent *dbp, xfs_dir2_put_t put); + +extern int + xfs_dir2_sf_lookup(struct xfs_da_args *args); + +extern int + xfs_dir2_sf_removename(struct xfs_da_args *args); + +extern int + xfs_dir2_sf_replace(struct xfs_da_args *args); + +#endif /* __XFS_DIR2_SF_H__ */ diff --git a/include/xfs_dir_leaf.h b/include/xfs_dir_leaf.h new file mode 100644 index 000000000..d330ff8ec --- /dev/null +++ b/include/xfs_dir_leaf.h @@ -0,0 +1,257 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_DIR_LEAF_H__ +#define __XFS_DIR_LEAF_H__ + +/* + * Directory layout, internal structure, access macros, etc. + * + * Large directories are structured around Btrees where all the data + * elements are in the leaf nodes. Filenames are hashed into an int, + * then that int is used as the index into the Btree. Since the hashval + * of a filename may not be unique, we may have duplicate keys. The + * internal links in the Btree are logical block offsets into the file. + */ + +struct dirent; +struct uio; +struct xfs_bmap_free; +struct xfs_dabuf; +struct xfs_da_args; +struct xfs_da_state; +struct xfs_da_state_blk; +struct xfs_dir_put_args; +struct xfs_inode; +struct xfs_mount; +struct xfs_trans; + +/*======================================================================== + * Directory Structure when equal to XFS_LBSIZE(mp) bytes. + *========================================================================*/ + +/* + * This is the structure of the leaf nodes in the Btree. + * + * Struct leaf_entry's are packed from the top. Names grow from the bottom + * but are not packed. The freemap contains run-length-encoded entries + * for the free bytes after the leaf_entry's, but only the N largest such, + * smaller runs are dropped. When the freemap doesn't show enough space + * for an allocation, we compact the namelist area and try again. If we + * still don't have enough space, then we have to split the block. + * + * Since we have duplicate hash keys, for each key that matches, compare + * the actual string. The root and intermediate node search always takes + * the first-in-the-block key match found, so we should only have to work + * "forw"ard. If none matches, continue with the "forw"ard leaf nodes + * until the hash key changes or the filename is found. + * + * The parent directory and the self-pointer are explicitly represented + * (ie: there are entries for "." and ".."). + * + * Note that the count being a __uint16_t limits us to something like a + * blocksize of 1.3MB in the face of worst case (short) filenames. + */ +#define XFS_DIR_LEAF_MAPSIZE 3 /* how many freespace slots */ + +typedef struct xfs_dir_leafblock { + struct xfs_dir_leaf_hdr { /* constant-structure header block */ + xfs_da_blkinfo_t info; /* block type, links, etc. */ + __uint16_t count; /* count of active leaf_entry's */ + __uint16_t namebytes; /* num bytes of name strings stored */ + __uint16_t firstused; /* first used byte in name area */ + __uint8_t holes; /* != 0 if blk needs compaction */ + __uint8_t pad1; + struct xfs_dir_leaf_map {/* RLE map of free bytes */ + __uint16_t base; /* base of free region */ + __uint16_t size; /* run length of free region */ + } freemap[XFS_DIR_LEAF_MAPSIZE]; /* N largest free regions */ + } hdr; + struct xfs_dir_leaf_entry { /* sorted on key, not name */ + xfs_dahash_t hashval; /* hash value of name */ + __uint16_t nameidx; /* index into buffer of name */ + __uint8_t namelen; /* length of name string */ + __uint8_t pad2; + } entries[1]; /* var sized array */ + struct xfs_dir_leaf_name { + xfs_dir_ino_t inumber; /* inode number for this key */ + __uint8_t name[1]; /* name string itself */ + } namelist[1]; /* grows from bottom of buf */ +} xfs_dir_leafblock_t; +typedef struct xfs_dir_leaf_hdr xfs_dir_leaf_hdr_t; +typedef struct xfs_dir_leaf_map xfs_dir_leaf_map_t; +typedef struct xfs_dir_leaf_entry xfs_dir_leaf_entry_t; +typedef struct xfs_dir_leaf_name xfs_dir_leaf_name_t; + +/* + * Length of name for which a 512-byte block filesystem + * can get a double split. + */ +#define XFS_DIR_LEAF_CAN_DOUBLE_SPLIT_LEN \ + (512 - (uint)sizeof(xfs_dir_leaf_hdr_t) - \ + (uint)sizeof(xfs_dir_leaf_entry_t) * 2 - \ + (uint)sizeof(xfs_dir_leaf_name_t) * 2 - (MAXNAMELEN - 2) + 1 + 1) + +typedef int (*xfs_dir_put_t)(struct xfs_dir_put_args *pa); + +typedef union { + xfs_off_t o; /* offset (cookie) */ + /* + * Watch the order here (endian-ness dependent). + */ + struct { +#if __BYTE_ORDER == __LITTLE_ENDIAN + xfs_dahash_t h; /* hash value */ + __uint32_t be; /* block and entry */ +#else /* __BYTE_ORDER == __BIG_ENDIAN */ + __uint32_t be; /* block and entry */ + xfs_dahash_t h; /* hash value */ +#endif /* __BYTE_ORDER == __BIG_ENDIAN */ + } s; +} xfs_dircook_t; + +#define XFS_PUT_COOKIE(c,mp,bno,entry,hash) \ + ((c).s.be = XFS_DA_MAKE_BNOENTRY(mp, bno, entry), (c).s.h = (hash)) + +#define XFS_GET_DIR_INO_ARCH(mp,di,arch) \ + DIRINO_GET_ARCH(&(di),arch) +#define XFS_GET_DIR_INO(mp,di) \ + XFS_GET_DIR_INO_ARCH(mp,di,ARCH_NOCONVERT) + +typedef struct xfs_dir_put_args +{ + xfs_dircook_t cook; /* cookie of (next) entry */ + xfs_intino_t ino; /* inode number */ + struct dirent *dbp; /* buffer pointer */ + char *name; /* directory entry name */ + int namelen; /* length of name */ + int done; /* output: set if value was stored */ + xfs_dir_put_t put; /* put function ptr (i/o) */ + struct uio *uio; /* uio control structure */ + unsigned char type; /* file type (see include/linux/fs.h) */ +} xfs_dir_put_args_t; + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_LEAF_ENTSIZE_BYNAME) +int xfs_dir_leaf_entsize_byname(int len); +#define XFS_DIR_LEAF_ENTSIZE_BYNAME(len) xfs_dir_leaf_entsize_byname(len) +#else +#define XFS_DIR_LEAF_ENTSIZE_BYNAME(len) /* space a name will use */ \ + ((uint)sizeof(xfs_dir_leaf_name_t)-1 + len) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_LEAF_ENTSIZE_BYENTRY) +int xfs_dir_leaf_entsize_byentry(xfs_dir_leaf_entry_t *entry); +#define XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry) \ + xfs_dir_leaf_entsize_byentry(entry) +#else +#define XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry) /* space an entry will use */ \ + ((uint)sizeof(xfs_dir_leaf_name_t)-1 + (entry)->namelen) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_LEAF_NAMESTRUCT) +xfs_dir_leaf_name_t * +xfs_dir_leaf_namestruct(xfs_dir_leafblock_t *leafp, int offset); +#define XFS_DIR_LEAF_NAMESTRUCT(leafp,offset) \ + xfs_dir_leaf_namestruct(leafp,offset) +#else +#define XFS_DIR_LEAF_NAMESTRUCT(leafp,offset) /* point to name struct */ \ + ((xfs_dir_leaf_name_t *)&((char *)(leafp))[offset]) +#endif + +/*======================================================================== + * Function prototypes for the kernel. + *========================================================================*/ + +/* + * Internal routines when dirsize < XFS_LITINO(mp). + */ +int xfs_dir_shortform_create(struct xfs_da_args *args, xfs_ino_t parent); +int xfs_dir_shortform_addname(struct xfs_da_args *args); +int xfs_dir_shortform_lookup(struct xfs_da_args *args); +int xfs_dir_shortform_to_leaf(struct xfs_da_args *args); +int xfs_dir_shortform_removename(struct xfs_da_args *args); +int xfs_dir_shortform_getdents(struct xfs_inode *dp, struct uio *uio, int *eofp, + struct dirent *dbp, xfs_dir_put_t put); +int xfs_dir_shortform_replace(struct xfs_da_args *args); + +/* + * Internal routines when dirsize == XFS_LBSIZE(mp). + */ +int xfs_dir_leaf_to_node(struct xfs_da_args *args); +int xfs_dir_leaf_to_shortform(struct xfs_da_args *args); + +/* + * Routines used for growing the Btree. + */ +int xfs_dir_leaf_create(struct xfs_da_args *args, xfs_dablk_t which_block, + struct xfs_dabuf **bpp); +int xfs_dir_leaf_split(struct xfs_da_state *state, + struct xfs_da_state_blk *oldblk, + struct xfs_da_state_blk *newblk); +int xfs_dir_leaf_add(struct xfs_dabuf *leaf_buffer, + struct xfs_da_args *args, int insertion_index); +int xfs_dir_leaf_addname(struct xfs_da_args *args); +int xfs_dir_leaf_lookup_int(struct xfs_dabuf *leaf_buffer, + struct xfs_da_args *args, + int *index_found_at); +int xfs_dir_leaf_remove(struct xfs_trans *trans, + struct xfs_dabuf *leaf_buffer, + int index_to_remove); +int xfs_dir_leaf_getdents_int(struct xfs_dabuf *bp, struct xfs_inode *dp, + xfs_dablk_t bno, struct uio *uio, + int *eobp, struct dirent *dbp, + xfs_dir_put_t put, xfs_daddr_t nextda); + +/* + * Routines used for shrinking the Btree. + */ +int xfs_dir_leaf_toosmall(struct xfs_da_state *state, int *retval); +void xfs_dir_leaf_unbalance(struct xfs_da_state *state, + struct xfs_da_state_blk *drop_blk, + struct xfs_da_state_blk *save_blk); + +/* + * Utility routines. + */ +uint xfs_dir_leaf_lasthash(struct xfs_dabuf *bp, int *count); +int xfs_dir_leaf_order(struct xfs_dabuf *leaf1_bp, + struct xfs_dabuf *leaf2_bp); +int xfs_dir_put_dirent32_direct(xfs_dir_put_args_t *pa); +int xfs_dir_put_dirent32_uio(xfs_dir_put_args_t *pa); +int xfs_dir_put_dirent64_direct(xfs_dir_put_args_t *pa); +int xfs_dir_put_dirent64_uio(xfs_dir_put_args_t *pa); +int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino); + + +/* + * Global data. + */ +extern xfs_dahash_t xfs_dir_hash_dot, xfs_dir_hash_dotdot; + +#endif /* __XFS_DIR_LEAF_H__ */ diff --git a/include/xfs_dir_sf.h b/include/xfs_dir_sf.h new file mode 100644 index 000000000..d875da4b3 --- /dev/null +++ b/include/xfs_dir_sf.h @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_DIR_SF_H__ +#define __XFS_DIR_SF_H__ + +/* + * Directory layout when stored internal to an inode. + * + * Small directories are packed as tightly as possible so as to + * fit into the literal area of the inode. + */ + +typedef struct { __uint8_t i[sizeof(xfs_ino_t)]; } xfs_dir_ino_t; + +/* + * The parent directory has a dedicated field, and the self-pointer must + * be calculated on the fly. + * + * Entries are packed toward the top as tight as possible. The header + * and the elements much be bcopy()'d out into a work area to get correct + * alignment for the inode number fields. + */ +typedef struct xfs_dir_shortform { + struct xfs_dir_sf_hdr { /* constant-structure header block */ + xfs_dir_ino_t parent; /* parent dir inode number */ + __uint8_t count; /* count of active entries */ + } hdr; + struct xfs_dir_sf_entry { + xfs_dir_ino_t inumber; /* referenced inode number */ + __uint8_t namelen; /* actual length of name (no NULL) */ + __uint8_t name[1]; /* name */ + } list[1]; /* variable sized array */ +} xfs_dir_shortform_t; +typedef struct xfs_dir_sf_hdr xfs_dir_sf_hdr_t; +typedef struct xfs_dir_sf_entry xfs_dir_sf_entry_t; + +/* + * We generate this then sort it, so that readdirs are returned in + * hash-order. Else seekdir won't work. + */ +typedef struct xfs_dir_sf_sort { + __uint8_t entno; /* .=0, ..=1, else entry# + 2 */ + __uint8_t seqno; /* sequence # with same hash value */ + __uint8_t namelen; /* length of name value (no null) */ + xfs_dahash_t hash; /* this entry's hash value */ + xfs_intino_t ino; /* this entry's inode number */ + char *name; /* name value, pointer into buffer */ +} xfs_dir_sf_sort_t; + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_SF_GET_DIRINO) +void xfs_dir_sf_get_dirino_arch(xfs_dir_ino_t *from, xfs_ino_t *to, xfs_arch_t arch); +void xfs_dir_sf_get_dirino(xfs_dir_ino_t *from, xfs_ino_t *to); +#define XFS_DIR_SF_GET_DIRINO_ARCH(from,to,arch) xfs_dir_sf_get_dirino_arch(from, to, arch) +#define XFS_DIR_SF_GET_DIRINO(from,to) xfs_dir_sf_get_dirino(from, to) +#else +#define XFS_DIR_SF_GET_DIRINO_ARCH(from,to,arch) DIRINO_COPY_ARCH(from,to,arch) +#define XFS_DIR_SF_GET_DIRINO(from,to) DIRINO_COPY_ARCH(from,to,ARCH_NOCONVERT) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_SF_PUT_DIRINO) +void xfs_dir_sf_put_dirino_arch(xfs_ino_t *from, xfs_dir_ino_t *to, xfs_arch_t arch); +void xfs_dir_sf_put_dirino(xfs_ino_t *from, xfs_dir_ino_t *to); +#define XFS_DIR_SF_PUT_DIRINO_ARCH(from,to,arch) xfs_dir_sf_put_dirino_arch(from, to, arch) +#define XFS_DIR_SF_PUT_DIRINO(from,to) xfs_dir_sf_put_dirino(from, to) +#else +#define XFS_DIR_SF_PUT_DIRINO_ARCH(from,to,arch) DIRINO_COPY_ARCH(from,to,arch) +#define XFS_DIR_SF_PUT_DIRINO(from,to) DIRINO_COPY_ARCH(from,to,ARCH_NOCONVERT) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_SF_ENTSIZE_BYNAME) +int xfs_dir_sf_entsize_byname(int len); +#define XFS_DIR_SF_ENTSIZE_BYNAME(len) xfs_dir_sf_entsize_byname(len) +#else +#define XFS_DIR_SF_ENTSIZE_BYNAME(len) /* space a name uses */ \ + ((uint)sizeof(xfs_dir_sf_entry_t)-1 + (len)) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_SF_ENTSIZE_BYENTRY) +int xfs_dir_sf_entsize_byentry(xfs_dir_sf_entry_t *sfep); +#define XFS_DIR_SF_ENTSIZE_BYENTRY(sfep) xfs_dir_sf_entsize_byentry(sfep) +#else +#define XFS_DIR_SF_ENTSIZE_BYENTRY(sfep) /* space an entry uses */ \ + ((uint)sizeof(xfs_dir_sf_entry_t)-1 + (sfep)->namelen) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_SF_NEXTENTRY) +xfs_dir_sf_entry_t *xfs_dir_sf_nextentry(xfs_dir_sf_entry_t *sfep); +#define XFS_DIR_SF_NEXTENTRY(sfep) xfs_dir_sf_nextentry(sfep) +#else +#define XFS_DIR_SF_NEXTENTRY(sfep) /* next entry in struct */ \ + ((xfs_dir_sf_entry_t *) \ + ((char *)(sfep) + XFS_DIR_SF_ENTSIZE_BYENTRY(sfep))) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DIR_SF_ALLFIT) +int xfs_dir_sf_allfit(int count, int totallen); +#define XFS_DIR_SF_ALLFIT(count,totallen) \ + xfs_dir_sf_allfit(count,totallen) +#else +#define XFS_DIR_SF_ALLFIT(count,totallen) /* will all entries fit? */ \ + ((uint)sizeof(xfs_dir_sf_hdr_t) + \ + ((uint)sizeof(xfs_dir_sf_entry_t)-1)*(count) + (totallen)) +#endif + +#ifdef XFS_ALL_TRACE +#define XFS_DIR_TRACE +#endif + +#if !defined(DEBUG) +#undef XFS_DIR_TRACE +#endif + +/* + * Kernel tracing support for directories. + */ +struct uio; +struct xfs_inode; +struct xfs_da_intnode; +struct xfs_dinode; +struct xfs_dir_leafblock; +struct xfs_dir_leaf_entry; + +#define XFS_DIR_TRACE_SIZE 4096 /* size of global trace buffer */ + +/* + * Trace record types. + */ +#define XFS_DIR_KTRACE_G_DU 1 /* dp, uio */ +#define XFS_DIR_KTRACE_G_DUB 2 /* dp, uio, bno */ +#define XFS_DIR_KTRACE_G_DUN 3 /* dp, uio, node */ +#define XFS_DIR_KTRACE_G_DUL 4 /* dp, uio, leaf */ +#define XFS_DIR_KTRACE_G_DUE 5 /* dp, uio, leaf entry */ +#define XFS_DIR_KTRACE_G_DUC 6 /* dp, uio, cookie */ + +#if defined(XFS_DIR_TRACE) + +void xfs_dir_trace_g_du(char *where, struct xfs_inode *dp, struct uio *uio); +void xfs_dir_trace_g_dub(char *where, struct xfs_inode *dp, struct uio *uio, + xfs_dablk_t bno); +void xfs_dir_trace_g_dun(char *where, struct xfs_inode *dp, struct uio *uio, + struct xfs_da_intnode *node); +void xfs_dir_trace_g_dul(char *where, struct xfs_inode *dp, struct uio *uio, + struct xfs_dir_leafblock *leaf); +void xfs_dir_trace_g_due(char *where, struct xfs_inode *dp, struct uio *uio, + struct xfs_dir_leaf_entry *entry); +void xfs_dir_trace_g_duc(char *where, struct xfs_inode *dp, struct uio *uio, + xfs_off_t cookie); +void xfs_dir_trace_enter(int type, char *where, + __psunsigned_t a0, __psunsigned_t a1, + __psunsigned_t a2, __psunsigned_t a3, + __psunsigned_t a4, __psunsigned_t a5, + __psunsigned_t a6, __psunsigned_t a7, + __psunsigned_t a8, __psunsigned_t a9, + __psunsigned_t a10, __psunsigned_t a11); +#else +#define xfs_dir_trace_g_du(w,d,u) +#define xfs_dir_trace_g_dub(w,d,u,b) +#define xfs_dir_trace_g_dun(w,d,u,n) +#define xfs_dir_trace_g_dul(w,d,u,l) +#define xfs_dir_trace_g_due(w,d,u,e) +#define xfs_dir_trace_g_duc(w,d,u,c) +#endif /* DEBUG */ + +#endif /* __XFS_DIR_SF_H__ */ diff --git a/include/xfs_dqblk.h b/include/xfs_dqblk.h new file mode 100644 index 000000000..ec1704d86 --- /dev/null +++ b/include/xfs_dqblk.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_DQBLK_H__ +#define __XFS_DQBLK_H__ + +/* + * The ondisk form of a dquot structure. + */ +#define XFS_DQUOT_MAGIC 0x4451 /* 'DQ' */ +#define XFS_DQUOT_VERSION (u_int8_t)0x01 /* latest version number */ + +/* + * This is the main portion of the on-disk representation of quota + * information for a user. This is the q_core of the xfs_dquot_t that + * is kept in kernel memory. We pad this with some more expansion room + * to construct the on disk structure. + */ +typedef struct xfs_disk_dquot { +/*16*/ u_int16_t d_magic; /* dquot magic = XFS_DQUOT_MAGIC */ +/*8 */ u_int8_t d_version; /* dquot version */ +/*8 */ u_int8_t d_flags; /* XFS_DQ_USER/DQ_PROJ */ +/*32*/ xfs_dqid_t d_id; /* user id or proj id */ +/*64*/ xfs_qcnt_t d_blk_hardlimit;/* absolute limit on disk blks */ +/*64*/ xfs_qcnt_t d_blk_softlimit;/* preferred limit on disk blks */ +/*64*/ xfs_qcnt_t d_ino_hardlimit;/* maximum # allocated inodes */ +/*64*/ xfs_qcnt_t d_ino_softlimit;/* preferred inode limit */ +/*64*/ xfs_qcnt_t d_bcount; /* disk blocks owned by the user */ +/*64*/ xfs_qcnt_t d_icount; /* inodes owned by the user */ +/*32*/ __int32_t d_itimer; /* zero if within inode limits if not, + this is when we refuse service */ +/*32*/ __int32_t d_btimer; /* similar to above; for disk blocks */ +/*16*/ xfs_qwarncnt_t d_iwarns; /* warnings issued wrt num inodes */ +/*16*/ xfs_qwarncnt_t d_bwarns; /* warnings issued wrt disk blocks */ +/*32*/ __int32_t d_pad0; /* 64 bit align */ +/*64*/ xfs_qcnt_t d_rtb_hardlimit;/* absolute limit on realtime blks */ +/*64*/ xfs_qcnt_t d_rtb_softlimit;/* preferred limit on RT disk blks */ +/*64*/ xfs_qcnt_t d_rtbcount; /* realtime blocks owned */ +/*32*/ __int32_t d_rtbtimer; /* similar to above; for RT disk blocks */ +/*16*/ xfs_qwarncnt_t d_rtbwarns; /* warnings issued wrt RT disk blocks */ +/*16*/ __uint16_t d_pad; +} xfs_disk_dquot_t; + +/* + * This is what goes on disk. This is separated from the xfs_disk_dquot because + * carrying the unnecessary padding would be a waste of memory. + */ +typedef struct xfs_dqblk { + xfs_disk_dquot_t dd_diskdq; /* portion that lives incore as well */ + char dd_fill[32]; /* filling for posterity */ +} xfs_dqblk_t; + +/* + * flags for q_flags field in the dquot. + */ +#define XFS_DQ_USER 0x0001 /* a user quota */ +#define XFS_DQ_PROJ 0x0002 /* a project quota */ + +#define XFS_DQ_FLOCKED 0x0008 /* flush lock taken */ +#define XFS_DQ_DIRTY 0x0010 /* dquot is dirty */ +#define XFS_DQ_WANT 0x0020 /* for lookup/reclaim race */ +#define XFS_DQ_INACTIVE 0x0040 /* dq off mplist & hashlist */ +#define XFS_DQ_MARKER 0x0080 /* sentinel */ + +/* + * In the worst case, when both user and proj quotas on, + * we can have a max of three dquots changing in a single transaction. + */ +#define XFS_DQUOT_LOGRES(mp) (sizeof(xfs_disk_dquot_t) * 3) + +#endif /* __XFS_DQBLK_H__ */ diff --git a/include/xfs_dquot_item.h b/include/xfs_dquot_item.h new file mode 100644 index 000000000..f8d9049a0 --- /dev/null +++ b/include/xfs_dquot_item.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_DQUOT_ITEM_H__ +#define __XFS_DQUOT_ITEM_H__ + +/* + * These are the structures used to lay out dquots and quotaoff + * records on the log. Quite similar to those of inodes. + */ + +/* + * log format struct for dquots. + * The first two fields must be the type and size fitting into + * 32 bits : log_recovery code assumes that. + */ +typedef struct xfs_dq_logformat { + __uint16_t qlf_type; /* dquot log item type */ + __uint16_t qlf_size; /* size of this item */ + xfs_dqid_t qlf_id; /* usr/proj id number : 32 bits */ + __int64_t qlf_blkno; /* blkno of dquot buffer */ + __int32_t qlf_len; /* len of dquot buffer */ + __uint32_t qlf_boffset; /* off of dquot in buffer */ +} xfs_dq_logformat_t; + +/* + * log format struct for QUOTAOFF records. + * The first two fields must be the type and size fitting into + * 32 bits : log_recovery code assumes that. + * We write two LI_QUOTAOFF logitems per quotaoff, the last one keeps a pointer + * to the first and ensures that the first logitem is taken out of the AIL + * only when the last one is securely committed. + */ +typedef struct xfs_qoff_logformat { + unsigned short qf_type; /* quotaoff log item type */ + unsigned short qf_size; /* size of this item */ + unsigned int qf_flags; /* USR and/or PRJ */ + char qf_pad[12]; /* padding for future */ +} xfs_qoff_logformat_t; + + +#ifdef __KERNEL__ + +struct xfs_dquot; +struct xfs_trans; +struct xfs_mount; +typedef struct xfs_dq_logitem { + xfs_log_item_t qli_item; /* common portion */ + struct xfs_dquot *qli_dquot; /* dquot ptr */ + xfs_lsn_t qli_flush_lsn; /* lsn at last flush */ + unsigned short qli_pushbuf_flag; /* one bit used in push_ail */ +#ifdef DEBUG + uint64_t qli_push_owner; +#endif + xfs_dq_logformat_t qli_format; /* logged structure */ +} xfs_dq_logitem_t; + + +typedef struct xfs_qoff_logitem { + xfs_log_item_t qql_item; /* common portion */ + struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */ + xfs_qoff_logformat_t qql_format; /* logged structure */ +} xfs_qoff_logitem_t; + + +extern void xfs_qm_dquot_logitem_init(struct xfs_dquot *); +extern xfs_qoff_logitem_t *xfs_qm_qoff_logitem_init(struct xfs_mount *, + xfs_qoff_logitem_t *, uint); +extern xfs_qoff_logitem_t *xfs_trans_get_qoff_item(struct xfs_trans *, + xfs_qoff_logitem_t *, uint); +extern void xfs_trans_log_quotaoff_item(struct xfs_trans *, + xfs_qoff_logitem_t *); + +#endif /* __KERNEL__ */ + +#endif /* __XFS_DQUOT_ITEM_H__ */ diff --git a/include/xfs_extfree_item.h b/include/xfs_extfree_item.h new file mode 100644 index 000000000..640f8e220 --- /dev/null +++ b/include/xfs_extfree_item.h @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_EXTFREE_ITEM_H__ +#define __XFS_EXTFREE_ITEM_H__ + +struct xfs_mount; +struct xfs_zone; + +typedef struct xfs_extent { + xfs_dfsbno_t ext_start; + xfs_extlen_t ext_len; +} xfs_extent_t; + +/* + * This is the structure used to lay out an efi log item in the + * log. The efi_extents field is a variable size array whose + * size is given by efi_nextents. + */ +typedef struct xfs_efi_log_format { + unsigned short efi_type; /* efi log item type */ + unsigned short efi_size; /* size of this item */ + uint efi_nextents; /* # extents to free */ + __uint64_t efi_id; /* efi identifier */ + xfs_extent_t efi_extents[1]; /* array of extents to free */ +} xfs_efi_log_format_t; + +/* + * This is the structure used to lay out an efd log item in the + * log. The efd_extents array is a variable size array whose + * size is given by efd_nextents; + */ +typedef struct xfs_efd_log_format { + unsigned short efd_type; /* efd log item type */ + unsigned short efd_size; /* size of this item */ + uint efd_nextents; /* # of extents freed */ + __uint64_t efd_efi_id; /* id of corresponding efi */ + xfs_extent_t efd_extents[1]; /* array of extents freed */ +} xfs_efd_log_format_t; + + +#ifdef __KERNEL__ + +/* + * Max number of extents in fast allocation path. + */ +#define XFS_EFI_MAX_FAST_EXTENTS 16 + +/* + * Define EFI flags. + */ +#define XFS_EFI_RECOVERED 0x1 +#define XFS_EFI_COMMITTED 0x2 +#define XFS_EFI_CANCELED 0x4 + +/* + * This is the "extent free intention" log item. It is used + * to log the fact that some extents need to be free. It is + * used in conjunction with the "extent free done" log item + * described below. + */ +typedef struct xfs_efi_log_item { + xfs_log_item_t efi_item; + uint efi_flags; /* misc flags */ + uint efi_next_extent; + xfs_efi_log_format_t efi_format; +} xfs_efi_log_item_t; + +/* + * This is the "extent free done" log item. It is used to log + * the fact that some extents earlier mentioned in an efi item + * have been freed. + */ +typedef struct xfs_efd_log_item { + xfs_log_item_t efd_item; + xfs_efi_log_item_t *efd_efip; + uint efd_next_extent; + xfs_efd_log_format_t efd_format; +} xfs_efd_log_item_t; + +/* + * Max number of extents in fast allocation path. + */ +#define XFS_EFD_MAX_FAST_EXTENTS 16 + +extern struct xfs_zone *xfs_efi_zone; +extern struct xfs_zone *xfs_efd_zone; + +xfs_efi_log_item_t *xfs_efi_init(struct xfs_mount *, uint); +xfs_efd_log_item_t *xfs_efd_init(struct xfs_mount *, xfs_efi_log_item_t *, + uint); + +#endif /* __KERNEL__ */ + +#endif /* __XFS_EXTFREE_ITEM_H__ */ diff --git a/include/xfs_fs.h b/include/xfs_fs.h new file mode 100644 index 000000000..81a8c3c98 --- /dev/null +++ b/include/xfs_fs.h @@ -0,0 +1,476 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef _LINUX_XFS_FS_H +#define _LINUX_XFS_FS_H + +#include +#include + + +/* + * SGI's XFS filesystem's major stuff (constants, structures) + */ + +#define XFS_SUPER_MAGIC 0x58465342 +#define XFS_NAME "xfs" + +struct biosize { + __u32 biosz_flags; + __s32 biosz_read; + __s32 biosz_write; + __s32 dfl_biosz_read; + __s32 dfl_biosz_write; +}; + +/* + * direct I/O attribute record used with F_DIOINFO + * d_miniosz is the min xfer size, xfer size multiple and file seek offset + * alignment. + */ +struct dioattr { + __u32 d_mem; /* data buffer memory alignment */ + __u32 d_miniosz; /* min xfer size */ + __u32 d_maxiosz; /* max xfer size */ +}; + +/* + * Structure for F_FSGETXATTR[A] and F_FSSETXATTR. + */ +struct fsxattr { + __u32 fsx_xflags; /* xflags field value (get/set) */ + __u32 fsx_extsize; /* extsize field value (get/set)*/ + __u32 fsx_nextents; /* nextents field value (get) */ + unsigned char fsx_pad[16]; +}; + +/* + * Flags for the bs_xflags/fsx_xflags field + * There should be a one-to-one correspondence between these flags and the + * XFS_DIFLAG_s. + */ +#define XFS_XFLAG_REALTIME 0x00000001 +#define XFS_XFLAG_PREALLOC 0x00000002 +#define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ +#define XFS_XFLAG_ALL \ + ( XFS_XFLAG_REALTIME|XFS_XFLAG_PREALLOC|XFS_XFLAG_HASATTR ) + + +/* + * Structure for F_GETBMAP. + * On input, fill in bmv_offset and bmv_length of the first structure + * to indicate the area of interest in the file, and bmv_entry with the + * number of array elements given. The first structure is updated on + * return to give the offset and length for the next call. + */ +struct getbmap { + __s64 bmv_offset; /* file offset of segment in blocks */ + __s64 bmv_block; /* starting block (64-bit daddr_t) */ + __s64 bmv_length; /* length of segment, blocks */ + __s32 bmv_count; /* # of entries in array incl. 1st */ + __s32 bmv_entries; /* # of entries filled in (output) */ +}; + +/* + * Structure for F_GETBMAPX. The fields bmv_offset through bmv_entries + * are used exactly as in the getbmap structure. The getbmapx structure + * has additional bmv_iflags and bmv_oflags fields. The bmv_iflags field + * is only used for the first structure. It contains input flags + * specifying F_GETBMAPX actions. The bmv_oflags field is filled in + * by the F_GETBMAPX command for each returned structure after the first. + */ +struct getbmapx { + __s64 bmv_offset; /* file offset of segment in blocks */ + __s64 bmv_block; /* starting block (64-bit daddr_t) */ + __s64 bmv_length; /* length of segment, blocks */ + __s32 bmv_count; /* # of entries in array incl. 1st */ + __s32 bmv_entries; /* # of entries filled in (output). */ + __s32 bmv_iflags; /* input flags (1st structure) */ + __s32 bmv_oflags; /* output flags (after 1st structure)*/ + __s32 bmv_unused1; /* future use */ + __s32 bmv_unused2; /* future use */ +}; + +/* bmv_iflags values - set by F_GETBMAPX caller. */ + +#define BMV_IF_ATTRFORK 0x1 /* return attr fork rather than data */ +#define BMV_IF_NO_DMAPI_READ 0x2 /* Do not generate DMAPI read event */ +#define BMV_IF_PREALLOC 0x4 /* rtn status BMV_OF_PREALLOC if req */ + +#define BMV_IF_VALID (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC) + +/* bmv_oflags values - returned from F_GETBMAPX for each non-header segment */ + +#define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */ + +/* Convert getbmap <-> getbmapx - move fields from p1 to p2. */ + +#define GETBMAP_CONVERT(p1,p2) { \ + p2.bmv_offset = p1.bmv_offset; \ + p2.bmv_block = p1.bmv_block; \ + p2.bmv_length = p1.bmv_length; \ + p2.bmv_count = p1.bmv_count; \ + p2.bmv_entries = p1.bmv_entries; } + +#ifdef __KERNEL__ + +/* Kernel only bmv_iflags value. */ +#define BMV_IF_EXTENDED 0x40000000 /* getpmapx if set */ + +#endif /* __KERNEL__ */ + +/* + * Structure for F_FSSETDM. + * For use by backup and restore programs to set the XFS on-disk inode + * fields di_dmevmask and di_dmstate. These must be set to exactly and + * only values previously obtained via xfs_bulkstat! (Specifically the + * xfs_bstat_t fields bs_dmevmask and bs_dmstate.) + */ +struct fsdmidata { + __s32 fsd_dmevmask; /* corresponds to di_dmevmask */ + __u16 fsd_padding; + __u16 fsd_dmstate; /* corresponds to di_dmstate */ +}; + +/* + * File segment locking set data type for 64 bit access. + * Also used for all the RESV/FREE interfaces. + */ +typedef struct xfs_flock64 { + __s16 l_type; + __s16 l_whence; + __s64 l_start; + __s64 l_len; /* len == 0 means until end of file */ + __s32 l_sysid; + pid_t l_pid; + __s32 l_pad[4]; /* reserve area */ +} xfs_flock64_t; + +/* + * Output for XFS_IOC_FSGEOMETRY + */ +typedef struct xfs_fsop_geom { + __u32 blocksize; /* filesystem (data) block size */ + __u32 rtextsize; /* realtime extent size */ + __u32 agblocks; /* fsblocks in an AG */ + __u32 agcount; /* number of allocation groups */ + __u32 logblocks; /* fsblocks in the log */ + __u32 sectsize; /* (data) sector size, bytes */ + __u32 inodesize; /* inode size in bytes */ + __u32 imaxpct; /* max allowed inode space(%) */ + __u64 datablocks; /* fsblocks in data subvolume */ + __u64 rtblocks; /* fsblocks in realtime subvol */ + __u64 rtextents; /* rt extents in realtime subvol*/ + __u64 logstart; /* starting fsblock of the log */ + unsigned char uuid[16]; /* unique id of the filesystem */ + __u32 sunit; /* stripe unit, fsblocks */ + __u32 swidth; /* stripe width, fsblocks */ + __s32 version; /* structure version */ + __u32 flags; /* superblock version flags */ + __u32 logsectsize; /* log sector size, bytes */ + __u32 rtsectsize; /* realtime sector size, bytes */ + __u32 dirblocksize; /* directory block size, bytes */ +} xfs_fsop_geom_t; + +/* Output for XFS_FS_COUNTS */ +typedef struct xfs_fsop_counts { + __u64 freedata; /* free data section blocks */ + __u64 freertx; /* free rt extents */ + __u64 freeino; /* free inodes */ + __u64 allocino; /* total allocated inodes */ +} xfs_fsop_counts_t; + +/* Input/Output for XFS_GET_RESBLKS and XFS_SET_RESBLKS */ +typedef struct xfs_fsop_resblks { + __u64 resblks; + __u64 resblks_avail; +} xfs_fsop_resblks_t; + +#define XFS_FSOP_GEOM_VERSION 0 + +#define XFS_FSOP_GEOM_FLAGS_ATTR 0x01 /* attributes in use */ +#define XFS_FSOP_GEOM_FLAGS_NLINK 0x02 /* 32-bit nlink values */ +#define XFS_FSOP_GEOM_FLAGS_QUOTA 0x04 /* quotas enabled */ +#define XFS_FSOP_GEOM_FLAGS_IALIGN 0x08 /* inode alignment */ +#define XFS_FSOP_GEOM_FLAGS_DALIGN 0x10 /* large data alignment */ +#define XFS_FSOP_GEOM_FLAGS_SHARED 0x20 /* read-only shared */ +#define XFS_FSOP_GEOM_FLAGS_EXTFLG 0x40 /* special extent flag */ +#define XFS_FSOP_GEOM_FLAGS_DIRV2 0x80 /* directory version 2 */ + + +/* + * Minimum and maximum sizes need for growth checks + */ +#define XFS_MIN_AG_BLOCKS 64 +#define XFS_MIN_LOG_BLOCKS 512 +#define XFS_MAX_LOG_BLOCKS (64 * 1024) +#define XFS_MIN_LOG_BYTES (256 * 1024) +#define XFS_MAX_LOG_BYTES (128 * 1024 * 1024) + +/* + * XFS_IOC_FSGROWFSDATA + */ +typedef struct xfs_growfs_data { + __u64 newblocks; /* new data subvol size, fsblocks */ + __u32 imaxpct; /* new inode space percentage limit */ +} xfs_growfs_data_t; + +/* + * XFS_IOC_FSGROWFSLOG + */ +typedef struct xfs_growfs_log { + __u32 newblocks; /* new log size, fsblocks */ + __u32 isint; /* 1 if new log is internal */ +} xfs_growfs_log_t; + +/* + * XFS_IOC_FSGROWFSRT + */ +typedef struct xfs_growfs_rt { + __u64 newblocks; /* new realtime size, fsblocks */ + __u32 extsize; /* new realtime extent size, fsblocks */ +} xfs_growfs_rt_t; + + +/* + * Structures returned from ioctl XFS_IOC_FSBULKSTAT & XFS_IOC_FSBULKSTAT_SINGLE + */ +typedef struct xfs_bstime { + time_t tv_sec; /* seconds */ + __s32 tv_nsec; /* and nanoseconds */ +} xfs_bstime_t; + +typedef struct xfs_bstat { + __u64 bs_ino; /* inode number */ + __u16 bs_mode; /* type and mode */ + __u16 bs_nlink; /* number of links */ + __u32 bs_uid; /* user id */ + __u32 bs_gid; /* group id */ + __u32 bs_rdev; /* device value */ + __s32 bs_blksize; /* block size */ + __s64 bs_size; /* file size */ + xfs_bstime_t bs_atime; /* access time */ + xfs_bstime_t bs_mtime; /* modify time */ + xfs_bstime_t bs_ctime; /* inode change time */ + int64_t bs_blocks; /* number of blocks */ + __u32 bs_xflags; /* extended flags */ + __s32 bs_extsize; /* extent size */ + __s32 bs_extents; /* number of extents */ + __u32 bs_gen; /* generation count */ + __u16 bs_projid; /* project id */ + unsigned char bs_pad[14]; /* pad space, unused */ + __u32 bs_dmevmask; /* DMIG event mask */ + __u16 bs_dmstate; /* DMIG state info */ + __u16 bs_aextents; /* attribute number of extents */ +} xfs_bstat_t; + +/* + * The user-level BulkStat Request interface structure. + */ +typedef struct xfs_fsop_bulkreq { + __u64 *lastip; /* last inode # pointer */ + __s32 icount; /* count of entries in buffer */ + void *ubuffer; /* user buffer for inode desc. */ + __s32 *ocount; /* output count pointer */ +} xfs_fsop_bulkreq_t; + + +/* + * Structures returned from xfs_inumbers syssgi routine. + */ +typedef struct xfs_inogrp { + __u64 xi_startino; /* starting inode number */ + __s32 xi_alloccount; /* # bits set in allocmask */ + __u64 xi_allocmask; /* mask of allocated inodes */ +} xfs_inogrp_t; + + +/* + * The user-level Handle Request interface structure. + */ +typedef struct xfs_fsop_handlereq { + __u32 fd; /* fd for FD_TO_HANDLE */ + void *path; /* user pathname */ + __u32 oflags; /* open flags */ + void *ihandle; /* user supplied handle */ + __u32 ihandlen; /* user supplied length */ + void *ohandle; /* user buffer for handle */ + __u32 *ohandlen; /* user buffer length */ +} xfs_fsop_handlereq_t; + +#if (defined(DEBUG) || defined(INDUCE_IO_ERROR)) +/* + * Error injection. + */ +typedef struct xfs_error_injection { + __s32 fd; + __s32 errtag; +} xfs_error_injection_t; +#endif /* DEBUG || INDUCE_IO_ERROR */ + +/* + * File system identifier. Should be unique (at least per machine). + */ +typedef struct { + __u32 val[2]; /* file system id type */ +} xfs_fsid_t; + +/* + * File identifier. Should be unique per filesystem on a single machine. + * This is typically called by a stateless file server in order to generate + * "file handles". + */ +#define MAXFIDSZ 46 + +typedef struct fid { + __u16 fid_len; /* length of data in bytes */ + unsigned char fid_data[MAXFIDSZ]; /* data (variable length) */ +} fid_t; + +typedef struct xfs_fid { + __u16 xfs_fid_len; /* length of remainder */ + __u16 xfs_fid_pad; + __u32 xfs_fid_gen; /* generation number */ + __u64 xfs_fid_ino; /* 64 bits inode number */ +} xfs_fid_t; + +typedef struct xfs_fid2 { + __u16 fid_len; /* length of remainder */ + __u16 fid_pad; /* padding, must be zero */ + __u32 fid_gen; /* generation number */ + __u64 fid_ino; /* inode number */ +} xfs_fid2_t; + +typedef struct xfs_handle { + union { + __s64 align; /* force alignment of ha_fid */ + xfs_fsid_t _ha_fsid; /* unique file system identifier */ + } ha_u; + xfs_fid_t ha_fid; /* file system specific file ID */ +} xfs_handle_t; + +#define ha_fsid ha_u._ha_fsid + +#define XFS_HSIZE(handle) (((char *) &(handle).ha_fid.xfs_fid_pad \ + - (char *) &(handle)) \ + + (handle).ha_fid.xfs_fid_len) + +#define XFS_HANDLE_CMP(h1, h2) bcmp(h1, h2, sizeof (xfs_handle_t)) + +#define FSHSIZE sizeof (fsid_t) + + +/* + * ioctl commands that replace IRIX fcntl()'s + * For 'documentation' purposed more than anything else, + * the "cmd #" field reflects the IRIX fcntl number. + */ +#define XFS_IOC_ALLOCSP _IOW ('X', 10, struct xfs_flock64) +#define XFS_IOC_FREESP _IOW ('X', 11, struct xfs_flock64) +#define XFS_IOC_DIOINFO _IOR ('X', 30, struct dioattr) +#define XFS_IOC_FSGETXATTR _IOR ('X', 31, struct fsxattr) +#define XFS_IOC_FSSETXATTR _IOW ('X', 32, struct fsxattr) +#define XFS_IOC_ALLOCSP64 _IOW ('X', 36, struct xfs_flock64) +#define XFS_IOC_FREESP64 _IOW ('X', 37, struct xfs_flock64) +#define XFS_IOC_GETBMAP _IOWR('X', 38, struct getbmap) +#define XFS_IOC_FSSETDM _IOW ('X', 39, struct fsdmidata) +#define XFS_IOC_RESVSP _IOW ('X', 40, struct xfs_flock64) +#define XFS_IOC_UNRESVSP _IOW ('X', 41, struct xfs_flock64) +#define XFS_IOC_RESVSP64 _IOW ('X', 42, struct xfs_flock64) +#define XFS_IOC_UNRESVSP64 _IOW ('X', 43, struct xfs_flock64) +#define XFS_IOC_GETBMAPA _IOWR('X', 44, struct getbmap) +#define XFS_IOC_FSGETXATTRA _IOR ('X', 45, struct fsxattr) +#define XFS_IOC_SETBIOSIZE _IOW ('X', 46, struct biosize) +#define XFS_IOC_GETBIOSIZE _IOR ('X', 47, struct biosize) +#define XFS_IOC_GETBMAPX _IOWR('X', 56, struct getbmap) + +/* + * ioctl commands that replace IRIX syssgi()'s + */ +#define XFS_IOC_FSGEOMETRY _IOR ('X', 100, struct xfs_fsop_geom) +#define XFS_IOC_FSBULKSTAT _IOWR('X', 101, struct xfs_fsop_bulkreq) +#define XFS_IOC_FSBULKSTAT_SINGLE _IOWR('X', 102, struct xfs_fsop_bulkreq) +#define XFS_IOC_FSINUMBERS _IOWR('X', 103, struct xfs_fsop_bulkreq) +#define XFS_IOC_PATH_TO_FSHANDLE _IOWR('X', 104, struct xfs_fsop_handlereq) +#define XFS_IOC_PATH_TO_HANDLE _IOWR('X', 105, struct xfs_fsop_handlereq) +#define XFS_IOC_FD_TO_HANDLE _IOWR('X', 106, struct xfs_fsop_handlereq) +#define XFS_IOC_OPEN_BY_HANDLE _IOWR('X', 107, struct xfs_fsop_handlereq) +#define XFS_IOC_READLINK_BY_HANDLE _IOWR('X', 108, struct xfs_fsop_handlereq) +#define XFS_IOC_SWAPEXT _IOWR('X', 109, struct xfs_swapext) +#define XFS_IOC_FSGROWFSDATA _IOW('X', 110, struct xfs_growfs_data) +#define XFS_IOC_FSGROWFSLOG _IOW('X', 111, struct xfs_growfs_log) +#define XFS_IOC_FSGROWFSRT _IOW('X', 112, struct xfs_growfs_rt) +#define XFS_IOC_FSCOUNTS _IOR ('X', 113, struct xfs_fsop_counts) +#define XFS_IOC_SET_RESBLKS _IOR ('X', 114, struct xfs_fsop_resblks) +#define XFS_IOC_GET_RESBLKS _IOR ('X', 115, struct xfs_fsop_resblks) +#if (defined(DEBUG) || defined(INDUCE_IO_ERROR)) +#define XFS_IOC_ERROR_INJECTION _IOW('X', 116, struct xfs_error_injection) +#define XFS_IOC_ERROR_CLEARALL _IOW('X', 117, struct xfs_error_injection) +#endif /* DEBUG || INDUCE_IO_ERROR */ + +/* + * ioctl command to export information not in standard interfaces + * 140: IRIX statvfs.f_fstr field - UUID from the superblock + */ +#define XFS_IOC_GETFSUUID _IOR ('X', 140, unsigned char[16]) + + +/* + * Block I/O parameterization. A basic block (BB) is the lowest size of + * filesystem allocation, and must == NBPSCTR. Length units given to bio + * routines are in BB's. + */ +#define BBSHIFT 9 +#define BBSIZE (1<> BBSHIFT) +#define BTOBBT(bytes) ((__u64)(bytes) >> BBSHIFT) +#define BBTOB(bbs) ((bbs) << BBSHIFT) +#define OFFTOBB(bytes) (((__u64)(bytes) + BBSIZE - 1) >> BBSHIFT) +#define OFFTOBBT(bytes) ((__u64)(bytes) >> BBSHIFT) +#define BBTOOFF(bbs) ((__u64)(bbs) << BBSHIFT) + +#define SEEKLIMIT32 0x7fffffff +#define BBSEEKLIMIT32 BTOBBT(SEEKLIMIT32) +#define SEEKLIMIT 0x7fffffffffffffffLL +#define BBSEEKLIMIT OFFTOBBT(SEEKLIMIT) + + +#ifdef __KERNEL__ + +/* + * Function prototypes + */ +extern int init_xfs_fs(void); + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_XFS_FS_H */ diff --git a/include/xfs_ialloc.h b/include/xfs_ialloc.h new file mode 100644 index 000000000..2693501ac --- /dev/null +++ b/include/xfs_ialloc.h @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_IALLOC_H__ +#define __XFS_IALLOC_H__ + +struct xfs_buf; +struct xfs_dinode; +struct xfs_mount; +struct xfs_trans; + +/* + * Allocation parameters for inode allocation. + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IALLOC_INODES) +int xfs_ialloc_inodes(struct xfs_mount *mp); +#define XFS_IALLOC_INODES(mp) xfs_ialloc_inodes(mp) +#else +#define XFS_IALLOC_INODES(mp) ((mp)->m_ialloc_inos) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IALLOC_BLOCKS) +xfs_extlen_t xfs_ialloc_blocks(struct xfs_mount *mp); +#define XFS_IALLOC_BLOCKS(mp) xfs_ialloc_blocks(mp) +#else +#define XFS_IALLOC_BLOCKS(mp) ((mp)->m_ialloc_blks) +#endif + +/* + * For small block file systems, move inodes in clusters of this size. + * When we don't have a lot of memory, however, we go a bit smaller + * to reduce the number of AGI and ialloc btree blocks we need to keep + * around for xfs_dilocate(). We choose which one to use in + * xfs_mount_int(). + */ +#define XFS_INODE_BIG_CLUSTER_SIZE 8192 +#define XFS_INODE_SMALL_CLUSTER_SIZE 4096 +#define XFS_INODE_CLUSTER_SIZE(mp) (mp)->m_inode_cluster_size + +/* + * Make an inode pointer out of the buffer/offset. + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MAKE_IPTR) +struct xfs_dinode *xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o); +#define XFS_MAKE_IPTR(mp,b,o) xfs_make_iptr(mp,b,o) +#else +#define XFS_MAKE_IPTR(mp,b,o) \ + ((xfs_dinode_t *)(xfs_buf_offset(b, (o) << (mp)->m_sb.sb_inodelog))) +#endif + +/* + * Find a free (set) bit in the inode bitmask. + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IALLOC_FIND_FREE) +int xfs_ialloc_find_free(xfs_inofree_t *fp); +#define XFS_IALLOC_FIND_FREE(fp) xfs_ialloc_find_free(fp) +#else +#define XFS_IALLOC_FIND_FREE(fp) xfs_lowbit64(*(fp)) +#endif + + +#ifdef __KERNEL__ + +/* + * Prototypes for visible xfs_ialloc.c routines. + */ + +/* + * Allocate an inode on disk. + * Mode is used to tell whether the new inode will need space, and whether + * it is a directory. + * + * To work within the constraint of one allocation per transaction, + * xfs_dialloc() is designed to be called twice if it has to do an + * allocation to make more free inodes. If an inode is + * available without an allocation, agbp would be set to the current + * agbp and alloc_done set to false. + * If an allocation needed to be done, agbp would be set to the + * inode header of the allocation group and alloc_done set to true. + * The caller should then commit the current transaction and allocate a new + * transaction. xfs_dialloc() should then be called again with + * the agbp value returned from the previous call. + * + * Once we successfully pick an inode its number is returned and the + * on-disk data structures are updated. The inode itself is not read + * in, since doing so would break ordering constraints with xfs_reclaim. + * + * *agbp should be set to NULL on the first call, *alloc_done set to FALSE. + */ +int /* error */ +xfs_dialloc( + struct xfs_trans *tp, /* transaction pointer */ + xfs_ino_t parent, /* parent inode (directory) */ + mode_t mode, /* mode bits for new inode */ + int okalloc, /* ok to allocate more space */ + struct xfs_buf **agbp, /* buf for a.g. inode header */ + boolean_t *alloc_done, /* an allocation was done to replenish + the free inodes */ + xfs_ino_t *inop); /* inode number allocated */ + +/* + * Free disk inode. Carefully avoids touching the incore inode, all + * manipulations incore are the caller's responsibility. + * The on-disk inode is not changed by this operation, only the + * btree (free inode mask) is changed. + */ +int /* error */ +xfs_difree( + struct xfs_trans *tp, /* transaction pointer */ + xfs_ino_t inode); /* inode to be freed */ + +/* + * Return the location of the inode in bno/len/off, + * for mapping it into a buffer. + */ +int +xfs_dilocate( + struct xfs_mount *mp, /* file system mount structure */ + struct xfs_trans *tp, /* transaction pointer */ + xfs_ino_t ino, /* inode to locate */ + xfs_fsblock_t *bno, /* output: block containing inode */ + int *len, /* output: num blocks in cluster*/ + int *off, /* output: index in block of inode */ + uint flags); /* flags for inode btree lookup */ + +/* + * Compute and fill in value of m_in_maxlevels. + */ +void +xfs_ialloc_compute_maxlevels( + struct xfs_mount *mp); /* file system mount structure */ + +/* + * Log specified fields for the ag hdr (inode section) + */ +void +xfs_ialloc_log_agi( + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_buf *bp, /* allocation group header buffer */ + int fields); /* bitmask of fields to log */ + +/* + * Read in the allocation group header (inode allocation section) + */ +int /* error */ +xfs_ialloc_read_agi( + struct xfs_mount *mp, /* file system mount structure */ + struct xfs_trans *tp, /* transaction pointer */ + xfs_agnumber_t agno, /* allocation group number */ + struct xfs_buf **bpp); /* allocation group hdr buf */ + +#endif /* __KERNEL__ */ + +#endif /* __XFS_IALLOC_H__ */ diff --git a/include/xfs_ialloc_btree.h b/include/xfs_ialloc_btree.h new file mode 100644 index 000000000..e49b2597b --- /dev/null +++ b/include/xfs_ialloc_btree.h @@ -0,0 +1,318 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_IALLOC_BTREE_H__ +#define __XFS_IALLOC_BTREE_H__ + +/* + * Inode map on-disk structures + */ + +struct xfs_buf; +struct xfs_btree_cur; +struct xfs_btree_sblock; +struct xfs_mount; + +/* + * There is a btree for the inode map per allocation group. + */ +#define XFS_IBT_MAGIC 0x49414254 /* 'IABT' */ + +typedef __uint64_t xfs_inofree_t; +#define XFS_INODES_PER_CHUNK (NBBY * sizeof(xfs_inofree_t)) +#define XFS_INODES_PER_CHUNK_LOG (XFS_NBBYLOG + 3) +#define XFS_INOBT_ALL_FREE ((xfs_inofree_t)-1) + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_MASKN) +xfs_inofree_t xfs_inobt_maskn(int i, int n); +#define XFS_INOBT_MASKN(i,n) xfs_inobt_maskn(i,n) +#else +#define XFS_INOBT_MASKN(i,n) \ + ((((n) >= XFS_INODES_PER_CHUNK ? \ + (xfs_inofree_t)0 : ((xfs_inofree_t)1 << (n))) - 1) << (i)) +#endif + +/* + * Data record structure + */ +typedef struct xfs_inobt_rec +{ + xfs_agino_t ir_startino; /* starting inode number */ + __int32_t ir_freecount; /* count of free inodes (set bits) */ + xfs_inofree_t ir_free; /* free inode mask */ +} xfs_inobt_rec_t; + +/* + * Key structure + */ +typedef struct xfs_inobt_key +{ + xfs_agino_t ir_startino; /* starting inode number */ +} xfs_inobt_key_t; + +typedef xfs_agblock_t xfs_inobt_ptr_t; /* btree pointer type */ + /* btree block header type */ +typedef struct xfs_btree_sblock xfs_inobt_block_t; + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_INOBT_BLOCK) +xfs_inobt_block_t *xfs_buf_to_inobt_block(struct xfs_buf *bp); +#define XFS_BUF_TO_INOBT_BLOCK(bp) xfs_buf_to_inobt_block(bp) +#else +#define XFS_BUF_TO_INOBT_BLOCK(bp) ((xfs_inobt_block_t *)(XFS_BUF_PTR(bp))) +#endif + +/* + * Bit manipulations for ir_free. + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_MASK) +xfs_inofree_t xfs_inobt_mask(int i); +#define XFS_INOBT_MASK(i) xfs_inobt_mask(i) +#else +#define XFS_INOBT_MASK(i) ((xfs_inofree_t)1 << (i)) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_IS_FREE) +int xfs_inobt_is_free(xfs_inobt_rec_t *rp, int i, xfs_arch_t arch); +#define XFS_INOBT_IS_FREE(rp,i,arch) xfs_inobt_is_free(rp,i,arch) +#else +#define XFS_INOBT_IS_FREE(rp,i,arch) ((INT_GET((rp)->ir_free, arch) \ + & XFS_INOBT_MASK(i)) != 0) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_SET_FREE) +void xfs_inobt_set_free(xfs_inobt_rec_t *rp, int i, xfs_arch_t arch); +#define XFS_INOBT_SET_FREE(rp,i,arch) xfs_inobt_set_free(rp,i,arch) +#else +#define XFS_INOBT_SET_FREE(rp,i,arch) (INT_MOD_EXPR((rp)->ir_free, arch, |= XFS_INOBT_MASK(i))) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_CLR_FREE) +void xfs_inobt_clr_free(xfs_inobt_rec_t *rp, int i, xfs_arch_t arch); +#define XFS_INOBT_CLR_FREE(rp,i,arch) xfs_inobt_clr_free(rp,i,arch) +#else +#define XFS_INOBT_CLR_FREE(rp,i,arch) (INT_MOD_EXPR((rp)->ir_free, arch, &= ~XFS_INOBT_MASK(i))) +#endif + +/* + * Real block structures have a size equal to the disk block size. + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_BLOCK_SIZE) +int xfs_inobt_block_size(int lev, struct xfs_btree_cur *cur); +#define XFS_INOBT_BLOCK_SIZE(lev,cur) xfs_inobt_block_size(lev,cur) +#else +#define XFS_INOBT_BLOCK_SIZE(lev,cur) (1 << (cur)->bc_blocklog) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_BLOCK_MAXRECS) +int xfs_inobt_block_maxrecs(int lev, struct xfs_btree_cur *cur); +#define XFS_INOBT_BLOCK_MAXRECS(lev,cur) xfs_inobt_block_maxrecs(lev,cur) +#else +#define XFS_INOBT_BLOCK_MAXRECS(lev,cur) \ + ((cur)->bc_mp->m_inobt_mxr[lev != 0]) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_BLOCK_MINRECS) +int xfs_inobt_block_minrecs(int lev, struct xfs_btree_cur *cur); +#define XFS_INOBT_BLOCK_MINRECS(lev,cur) xfs_inobt_block_minrecs(lev,cur) +#else +#define XFS_INOBT_BLOCK_MINRECS(lev,cur) \ + ((cur)->bc_mp->m_inobt_mnr[lev != 0]) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_IS_LAST_REC) +int xfs_inobt_is_last_rec(struct xfs_btree_cur *cur); +#define XFS_INOBT_IS_LAST_REC(cur) xfs_inobt_is_last_rec(cur) +#else +#define XFS_INOBT_IS_LAST_REC(cur) \ + ((cur)->bc_ptrs[0] == \ + INT_GET(XFS_BUF_TO_INOBT_BLOCK((cur)->bc_bufs[0])->bb_numrecs, ARCH_CONVERT)) +#endif + +/* + * Maximum number of inode btree levels. + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IN_MAXLEVELS) +int xfs_in_maxlevels(struct xfs_mount *mp); +#define XFS_IN_MAXLEVELS(mp) xfs_in_maxlevels(mp) +#else +#define XFS_IN_MAXLEVELS(mp) ((mp)->m_in_maxlevels) +#endif + +/* + * block numbers in the AG. + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IBT_BLOCK) +xfs_agblock_t xfs_ibt_block(struct xfs_mount *mp); +#define XFS_IBT_BLOCK(mp) xfs_ibt_block(mp) +#else +#define XFS_IBT_BLOCK(mp) ((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1)) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_PREALLOC_BLOCKS) +xfs_agblock_t xfs_prealloc_blocks(struct xfs_mount *mp); +#define XFS_PREALLOC_BLOCKS(mp) xfs_prealloc_blocks(mp) +#else +#define XFS_PREALLOC_BLOCKS(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1)) +#endif + +/* + * Record, key, and pointer address macros for btree blocks. + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_REC_ADDR) +xfs_inobt_rec_t * +xfs_inobt_rec_addr(xfs_inobt_block_t *bb, int i, struct xfs_btree_cur *cur); +#define XFS_INOBT_REC_ADDR(bb,i,cur) xfs_inobt_rec_addr(bb,i,cur) +#else +#define XFS_INOBT_REC_ADDR(bb,i,cur) \ + XFS_BTREE_REC_ADDR(XFS_INOBT_BLOCK_SIZE(0,cur), xfs_inobt, bb, i, \ + XFS_INOBT_BLOCK_MAXRECS(0, cur)) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_KEY_ADDR) +xfs_inobt_key_t * +xfs_inobt_key_addr(xfs_inobt_block_t *bb, int i, struct xfs_btree_cur *cur); +#define XFS_INOBT_KEY_ADDR(bb,i,cur) xfs_inobt_key_addr(bb,i,cur) +#else +#define XFS_INOBT_KEY_ADDR(bb,i,cur) \ + XFS_BTREE_KEY_ADDR(XFS_INOBT_BLOCK_SIZE(1,cur), xfs_inobt, bb, i, \ + XFS_INOBT_BLOCK_MAXRECS(1, cur)) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INOBT_PTR_ADDR) +xfs_inobt_ptr_t * +xfs_inobt_ptr_addr(xfs_inobt_block_t *bb, int i, struct xfs_btree_cur *cur); +#define XFS_INOBT_PTR_ADDR(bb,i,cur) xfs_inobt_ptr_addr(bb,i,cur) +#else +#define XFS_INOBT_PTR_ADDR(bb,i,cur) \ + XFS_BTREE_PTR_ADDR(XFS_INOBT_BLOCK_SIZE(1,cur), xfs_inobt, bb, i, \ + XFS_INOBT_BLOCK_MAXRECS(1, cur)) +#endif + +/* + * Prototypes for externally visible routines. + */ + +/* + * Decrement cursor by one record at the level. + * For nonzero levels the leaf-ward information is untouched. + */ +int /* error */ +xfs_inobt_decrement( + struct xfs_btree_cur *cur, /* btree cursor */ + int level, /* level in btree, 0 is leaf */ + int *stat); /* success/failure */ + +#ifdef _NOTYET_ +/* + * Delete the record pointed to by cur. + * The cursor refers to the place where the record was (could be inserted) + * when the operation returns. + */ +int /* error */ +xfs_inobt_delete( + struct xfs_btree_cur *cur, /* btree cursor */ + int *stat); /* success/failure */ +#endif /* _NOTYET_ */ + +/* + * Get the data from the pointed-to record. + */ +int /* error */ +xfs_inobt_get_rec( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agino_t *ino, /* output: starting inode of chunk */ + __int32_t *fcnt, /* output: number of free inodes */ + xfs_inofree_t *free, /* output: free inode mask */ + int *stat, /* output: success/failure */ + xfs_arch_t arch); /* output: architecture */ + +/* + * Increment cursor by one record at the level. + * For nonzero levels the leaf-ward information is untouched. + */ +int /* error */ +xfs_inobt_increment( + struct xfs_btree_cur *cur, /* btree cursor */ + int level, /* level in btree, 0 is leaf */ + int *stat); /* success/failure */ + +/* + * Insert the current record at the point referenced by cur. + * The cursor may be inconsistent on return if splits have been done. + */ +int /* error */ +xfs_inobt_insert( + struct xfs_btree_cur *cur, /* btree cursor */ + int *stat); /* success/failure */ + +/* + * Lookup the record equal to ino in the btree given by cur. + */ +int /* error */ +xfs_inobt_lookup_eq( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agino_t ino, /* starting inode of chunk */ + __int32_t fcnt, /* free inode count */ + xfs_inofree_t free, /* free inode mask */ + int *stat); /* success/failure */ + +/* + * Lookup the first record greater than or equal to ino + * in the btree given by cur. + */ +int /* error */ +xfs_inobt_lookup_ge( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agino_t ino, /* starting inode of chunk */ + __int32_t fcnt, /* free inode count */ + xfs_inofree_t free, /* free inode mask */ + int *stat); /* success/failure */ + +/* + * Lookup the first record less than or equal to ino + * in the btree given by cur. + */ +int /* error */ +xfs_inobt_lookup_le( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agino_t ino, /* starting inode of chunk */ + __int32_t fcnt, /* free inode count */ + xfs_inofree_t free, /* free inode mask */ + int *stat); /* success/failure */ + +/* + * Update the record referred to by cur, to the value given + * by [ino, fcnt, free]. + * This either works (return 0) or gets an EFSCORRUPTED error. + */ +int /* error */ +xfs_inobt_update( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agino_t ino, /* starting inode of chunk */ + __int32_t fcnt, /* free inode count */ + xfs_inofree_t free); /* free inode mask */ + +#endif /* __XFS_IALLOC_BTREE_H__ */ diff --git a/include/xfs_imap.h b/include/xfs_imap.h new file mode 100644 index 000000000..54b58d6af --- /dev/null +++ b/include/xfs_imap.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_IMAP_H__ +#define __XFS_IMAP_H__ + +/* + * This is the structure passed to xfs_imap() to map + * an inode number to its on disk location. + */ +typedef struct xfs_imap { + xfs_daddr_t im_blkno; /* starting BB of inode chunk */ + uint im_len; /* length in BBs of inode chunk */ + xfs_agblock_t im_agblkno; /* logical block of inode chunk in ag */ + ushort im_ioffset; /* inode offset in block in "inodes" */ + ushort im_boffset; /* inode offset in block in bytes */ +} xfs_imap_t; + +#ifdef __KERNEL__ +struct xfs_mount; +struct xfs_trans; +int xfs_imap(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, + xfs_imap_t *, uint); +#endif + +#endif /* __XFS_IMAP_H__ */ diff --git a/include/xfs_inode.h b/include/xfs_inode.h new file mode 100644 index 000000000..742ca12d7 --- /dev/null +++ b/include/xfs_inode.h @@ -0,0 +1,615 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_INODE_H__ +#define __XFS_INODE_H__ + +/* + * File incore extent information, present for each of data & attr forks. + */ +#define XFS_INLINE_EXTS 2 +#define XFS_INLINE_DATA 32 +typedef struct xfs_ifork { + int if_bytes; /* bytes in if_u1 */ + int if_real_bytes; /* bytes allocated in if_u1 */ + xfs_bmbt_block_t *if_broot; /* file's incore btree root */ + short if_broot_bytes; /* bytes allocated for root */ + unsigned char if_flags; /* per-fork flags */ + unsigned char if_ext_max; /* max # of extent records */ + xfs_extnum_t if_lastex; /* last if_extents used */ + union { + xfs_bmbt_rec_t *if_extents; /* linear map file exts */ + char *if_data; /* inline file data */ + } if_u1; + union { + xfs_bmbt_rec_t if_inline_ext[XFS_INLINE_EXTS]; + /* very small file extents */ + char if_inline_data[XFS_INLINE_DATA]; + /* very small file data */ + xfs_dev_t if_rdev; /* dev number if special */ + uuid_t if_uuid; /* mount point value */ + } if_u2; +} xfs_ifork_t; + +/* + * Flags for xfs_ichgtime(). + */ +#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */ +#define XFS_ICHGTIME_ACC 0x2 /* data fork access timestamp */ +#define XFS_ICHGTIME_CHG 0x4 /* inode field change timestamp */ + +/* + * Per-fork incore inode flags. + */ +#define XFS_IFINLINE 0x0001 /* Inline data is read in */ +#define XFS_IFEXTENTS 0x0002 /* All extent pointers are read in */ +#define XFS_IFBROOT 0x0004 /* i_broot points to the bmap b-tree root */ + +/* + * Flags for xfs_imap() and xfs_dilocate(). + */ +#define XFS_IMAP_LOOKUP 0x1 + +/* + * Maximum number of extent pointers in if_u1.if_extents. + */ +#define XFS_MAX_INCORE_EXTENTS 32768 + + +#ifdef __KERNEL__ +struct bhv_desc; +struct cred; +struct ktrace; +struct vnode; +struct xfs_buf; +struct xfs_bmap_free; +struct xfs_bmbt_irec; +struct xfs_bmbt_block; +struct xfs_ext_attr; +struct xfs_inode; +struct xfs_inode_log_item; +struct xfs_mount; +struct xfs_trans; +struct xfs_dquot; +struct pm; + + +/* + * This structure is used to communicate which extents of a file + * were holes when a write started from xfs_write_file() to + * xfs_strat_read(). This is necessary so that we can know which + * blocks need to be zeroed when they are read in in xfs_strat_read() + * if they weren\'t allocated when the buffer given to xfs_strat_read() + * was mapped. + * + * We keep a list of these attached to the inode. The list is + * protected by the inode lock and the fact that the io lock is + * held exclusively by writers. + */ +typedef struct xfs_gap { + struct xfs_gap *xg_next; + xfs_fileoff_t xg_offset_fsb; + xfs_extlen_t xg_count_fsb; +} xfs_gap_t; + +/* + * This structure is used to hold common pieces of the buffer + * and file for xfs_dio_write and xfs_dio_read. + */ +typedef struct xfs_dio { + struct xfs_buf *xd_bp; + bhv_desc_t *xd_bdp; + struct xfs_inode *xd_ip; + struct xfs_iocore *xd_io; + struct cred *xd_cr; + struct pm *xd_pmp; + int xd_blkalgn; + int xd_ioflag; + xfs_off_t xd_start; + size_t xd_length; +} xfs_dio_t; + + +typedef struct xfs_iocore { + void *io_obj; /* pointer to container + * inode or dcxvn structure */ + struct xfs_mount *io_mount; /* fs mount struct ptr */ + mrlock_t *io_lock; /* inode lock */ + mrlock_t *io_iolock; /* inode IO lock */ + sema_t *io_flock; /* inode flush lock */ + mutex_t io_rlock; /* inode readahead mutex */ + + /* I/O state */ + xfs_off_t io_offset; /* last buf offset */ + xfs_off_t io_next_offset; /* seq read detector */ + unsigned int io_last_req_sz; /* last read size */ + unsigned int io_size; /* file io buffer len */ + xfs_fsize_t io_new_size; /* sz when write completes */ + xfs_off_t io_write_offset; + /* start off of curr write */ + xfs_fileoff_t io_reada_blkno; /* next blk to start ra */ + xfs_gap_t *io_gap_list; /* hole list in write range */ + unsigned int io_readio_blocks; /* read buffer size */ + unsigned int io_writeio_blocks; /* write buffer size */ + uchar_t io_readio_log; /* log2 of read buffer size */ + uchar_t io_writeio_log; /* log2 of write buffer size */ + uchar_t io_max_io_log; /* max r/w io value */ + int io_queued_bufs; /* count of xfsd queued bufs*/ + + /* Miscellaneous state. */ + unsigned int io_flags; /* IO related flags */ + + /* DMAPI state */ + __uint32_t io_dmevmask; /* DMIG event mask */ + __uint16_t io_dmstate; /* DMIG state info */ +} xfs_iocore_t; + +#define XFS_IO_INODE(io) ((xfs_inode_t *) ((io)->io_obj)) +#define XFS_IO_DCXVN(io) ((dcxvn_t *) ((io)->io_obj)) + +/* + * Flags in the flags field + */ + +#define XFS_IOCORE_ISXFS 0x01 +#define XFS_IOCORE_ISCXFS 0x02 +#define XFS_IOCORE_RT 0x04 +#define XFS_IOCORE_UIOSZ 0x08 + +#define IO_IS_XFS(io) ((io)->io_flags & XFS_IOCORE_ISXFS) + +/* + * Clear out the read-ahead state in the in-core inode. + * We actually only need to clear i_next_offset and + * i_last_req_sz to get the effect of making all the + * read ahead state unusable. + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INODE_CLEAR_READ_AHEAD) +void xfs_inode_clear_read_ahead(xfs_iocore_t *io); +#define XFS_INODE_CLEAR_READ_AHEAD(io) xfs_inode_clear_read_ahead(io) +#else +#define XFS_INODE_CLEAR_READ_AHEAD(io) { \ + mutex_lock(&((io)->io_rlock), PINOD); \ + (io)->io_next_offset = 0; \ + (io)->io_last_req_sz = 0; \ + mutex_unlock(&((io)->io_rlock)); } +#endif + + +/* + * xfs_iocore prototypes + */ + +extern void xfs_iocore_inode_init(struct xfs_inode *); +extern void xfs_iocore_inode_reinit(struct xfs_inode *); +extern void xfs_iocore_reset(xfs_iocore_t *); +extern void xfs_iocore_destroy(xfs_iocore_t *); + + +/* + * This is the type used in the xfs inode hash table. + * An array of these is allocated for each mounted + * file system to hash the inodes for that file system. + */ +typedef struct xfs_ihash { + struct xfs_inode *ih_next; + mrlock_t ih_lock; + uint ih_version; +} xfs_ihash_t; +#if defined(MP) +#pragma set type attribute xfs_ihash align=128 +#endif + +/* + * Inode hashing and hash bucket locking. + */ +#define XFS_BUCKETS(mp) (37*(mp)->m_sb.sb_agcount-1) +#define XFS_IHASH(mp,ino) ((mp)->m_ihash + (((uint)ino) % (mp)->m_ihsize)) + +/* + * This is the xfs inode cluster hash. This hash is used by xfs_iflush to + * find inodes that share a cluster and can be flushed to disk at the same + * time. + */ + +typedef struct xfs_chashlist { + struct xfs_chashlist *chl_next; + struct xfs_inode *chl_ip; + xfs_daddr_t chl_blkno; /* starting block number of + * the cluster */ +#ifdef DEBUG + struct xfs_buf *chl_buf; /* debug: the inode buffer */ +#endif +} xfs_chashlist_t; + +typedef struct xfs_chash { + xfs_chashlist_t *ch_list; + lock_t ch_lock; +} xfs_chash_t; + + +/* + * This is the xfs in-core inode structure. + * Most of the on-disk inode is embedded in the i_d field. + * + * The extent pointers/inline file space, however, are managed + * separately. The memory for this information is pointed to by + * the if_u1 unions depending on the type of the data. + * This is used to linearize the array of extents for fast in-core + * access. This is used until the file's number of extents + * surpasses XFS_MAX_INCORE_EXTENTS, at which point all extent pointers + * are accessed through the buffer cache. + * + * Other state kept in the in-core inode is used for identification, + * locking, transactional updating, etc of the inode. + * + * Generally, we do not want to hold the i_rlock while holding the + * i_ilock. Hierarchy is i_iolock followed by i_rlock. + * + * xfs_iptr_t contains all the inode fields upto and including the + * i_mnext and i_mprev fields, it is used as a marker in the inode + * chain off the mount structure by xfs_sync calls. + */ + +typedef struct { + struct xfs_ihash *ip_hash; /* pointer to hash header */ + struct xfs_inode *ip_next; /* inode hash link forw */ + struct xfs_inode *ip_mnext; /* next inode in mount list */ + struct xfs_inode *ip_mprev; /* ptr to prev inode */ + struct xfs_inode **ip_prevp; /* ptr to prev i_next */ + struct xfs_mount *ip_mount; /* fs mount struct ptr */ +} xfs_iptr_t; + +typedef struct xfs_inode { + /* Inode linking and identification information. */ + struct xfs_ihash *i_hash; /* pointer to hash header */ + struct xfs_inode *i_next; /* inode hash link forw */ + struct xfs_inode *i_mnext; /* next inode in mount list */ + struct xfs_inode *i_mprev; /* ptr to prev inode */ + struct xfs_inode **i_prevp; /* ptr to prev i_next */ + struct xfs_mount *i_mount; /* fs mount struct ptr */ + struct bhv_desc i_bhv_desc; /* inode behavior descriptor*/ + struct xfs_dquot *i_udquot; /* user dquot */ + struct xfs_dquot *i_pdquot; /* project dquot */ + + /* Inode location stuff */ + xfs_ino_t i_ino; /* inode number (agno/agino)*/ + xfs_daddr_t i_blkno; /* blkno of inode buffer */ + dev_t i_dev; /* dev for this inode */ + ushort i_len; /* len of inode buffer */ + ushort i_boffset; /* off of inode in buffer */ + + /* Extent information. */ + xfs_ifork_t *i_afp; /* attribute fork pointer */ + xfs_ifork_t i_df; /* data fork */ + + /* Transaction and locking information. */ + struct xfs_trans *i_transp; /* ptr to owning transaction*/ + struct xfs_inode_log_item *i_itemp; /* logging information */ + mrlock_t i_lock; /* inode lock */ + mrlock_t i_iolock; /* inode IO lock */ + sema_t i_flock; /* inode flush lock */ + unsigned int i_pincount; /* inode pin count */ + sv_t i_pinsema; /* inode pin sema */ + lock_t i_ipinlock; /* inode pinning mutex */ + struct xfs_inode *i_release; /* inode to unref */ + + /* I/O state */ + xfs_iocore_t i_iocore; /* I/O core */ + + /* Miscellaneous state. */ + unsigned short i_flags; /* see defined flags below */ + unsigned short i_update_core; /* timestamps/size is dirty */ + unsigned short i_update_size; /* di_size field is dirty */ + unsigned int i_gen; /* generation count */ + unsigned int i_delayed_blks; /* count of delay alloc blks */ + struct xfs_ext_attr *i_ext_attr; /* Critical ext attributes */ + void *i_ilock_ra; /* current ilock ret addr */ + + xfs_dinode_core_t i_d; /* most of ondisk inode */ + xfs_chashlist_t *i_chash; /* cluster hash list header */ + struct xfs_inode *i_cnext; /* cluster hash link forward */ + struct xfs_inode *i_cprev; /* cluster hash link backward */ + +#ifdef DEBUG + /* Trace buffers per inode. */ + struct ktrace *i_xtrace; /* inode extent list trace */ + struct ktrace *i_btrace; /* inode bmap btree trace */ + struct ktrace *i_rwtrace; /* inode read/write trace */ + struct ktrace *i_strat_trace; /* inode strat_write trace */ + struct ktrace *i_lock_trace; /* inode lock/unlock trace */ + struct ktrace *i_dir_trace; /* inode directory trace */ +#endif /* DEBUG */ +} xfs_inode_t; + +#endif /* __KERNEL__ */ + + +/* + * Fork handling. + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_PTR) +xfs_ifork_t *xfs_ifork_ptr(xfs_inode_t *ip, int w); +#define XFS_IFORK_PTR(ip,w) xfs_ifork_ptr(ip,w) +#else +#define XFS_IFORK_PTR(ip,w) ((w) == XFS_DATA_FORK ? &(ip)->i_df : (ip)->i_afp) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_Q) +int xfs_ifork_q(xfs_inode_t *ip); +#define XFS_IFORK_Q(ip) xfs_ifork_q(ip) +#else +#define XFS_IFORK_Q(ip) XFS_CFORK_Q(&(ip)->i_d) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_DSIZE) +int xfs_ifork_dsize(xfs_inode_t *ip); +#define XFS_IFORK_DSIZE(ip) xfs_ifork_dsize(ip) +#else +#define XFS_IFORK_DSIZE(ip) XFS_CFORK_DSIZE(&ip->i_d, ip->i_mount) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_ASIZE) +int xfs_ifork_asize(xfs_inode_t *ip); +#define XFS_IFORK_ASIZE(ip) xfs_ifork_asize(ip) +#else +#define XFS_IFORK_ASIZE(ip) XFS_CFORK_ASIZE(&ip->i_d, ip->i_mount) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_SIZE) +int xfs_ifork_size(xfs_inode_t *ip, int w); +#define XFS_IFORK_SIZE(ip,w) xfs_ifork_size(ip,w) +#else +#define XFS_IFORK_SIZE(ip,w) XFS_CFORK_SIZE(&ip->i_d, ip->i_mount, w) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_FORMAT) +int xfs_ifork_format(xfs_inode_t *ip, int w); +#define XFS_IFORK_FORMAT(ip,w) xfs_ifork_format(ip,w) +#else +#define XFS_IFORK_FORMAT(ip,w) XFS_CFORK_FORMAT(&ip->i_d, w) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_FMT_SET) +void xfs_ifork_fmt_set(xfs_inode_t *ip, int w, int n); +#define XFS_IFORK_FMT_SET(ip,w,n) xfs_ifork_fmt_set(ip,w,n) +#else +#define XFS_IFORK_FMT_SET(ip,w,n) XFS_CFORK_FMT_SET(&ip->i_d, w, n) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_NEXTENTS) +int xfs_ifork_nextents(xfs_inode_t *ip, int w); +#define XFS_IFORK_NEXTENTS(ip,w) xfs_ifork_nextents(ip,w) +#else +#define XFS_IFORK_NEXTENTS(ip,w) XFS_CFORK_NEXTENTS(&ip->i_d, w) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_IFORK_NEXT_SET) +void xfs_ifork_next_set(xfs_inode_t *ip, int w, int n); +#define XFS_IFORK_NEXT_SET(ip,w,n) xfs_ifork_next_set(ip,w,n) +#else +#define XFS_IFORK_NEXT_SET(ip,w,n) XFS_CFORK_NEXT_SET(&ip->i_d, w, n) +#endif + + +#ifdef __KERNEL__ + +/* + * In-core inode flags. + */ +#define XFS_IGRIO 0x0001 /* inode used for guaranteed rate i/o */ +#define XFS_IUIOSZ 0x0002 /* inode i/o sizes have been explicitly set */ +#define XFS_IQUIESCE 0x0004 /* we have started quiescing for this inode */ +#define XFS_IRECLAIM 0x0008 /* we have started reclaiming this inode */ + +/* + * Flags for inode locking. + */ +#define XFS_IOLOCK_EXCL 0x001 +#define XFS_IOLOCK_SHARED 0x002 +#define XFS_ILOCK_EXCL 0x004 +#define XFS_ILOCK_SHARED 0x008 +#define XFS_IUNLOCK_NONOTIFY 0x010 +#define XFS_IOLOCK_NESTED 0x020 +#define XFS_EXTENT_TOKEN_RD 0x040 +#define XFS_SIZE_TOKEN_RD 0x080 +#define XFS_EXTSIZE_RD (XFS_EXTENT_TOKEN_RD|XFS_SIZE_TOKEN_RD) +#define XFS_WILLLEND 0x100 /* Always acquire tokens for lending */ +#define XFS_EXTENT_TOKEN_WR (XFS_EXTENT_TOKEN_RD | XFS_WILLLEND) +#define XFS_SIZE_TOKEN_WR (XFS_SIZE_TOKEN_RD | XFS_WILLLEND) +#define XFS_EXTSIZE_WR (XFS_EXTSIZE_RD | XFS_WILLLEND) + + +#define XFS_LOCK_MASK \ + (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL | \ + XFS_IOLOCK_NESTED | \ + XFS_ILOCK_SHARED | XFS_EXTENT_TOKEN_RD | XFS_SIZE_TOKEN_RD | \ + XFS_WILLLEND) + +/* + * Flags for xfs_iflush() + */ +#define XFS_IFLUSH_DELWRI_ELSE_SYNC 1 +#define XFS_IFLUSH_DELWRI_ELSE_ASYNC 2 +#define XFS_IFLUSH_SYNC 3 +#define XFS_IFLUSH_ASYNC 4 +#define XFS_IFLUSH_DELWRI 5 + +/* + * Flags for xfs_iflush_all. + */ +#define XFS_FLUSH_ALL 0x1 + +/* + * Flags for xfs_itruncate_start(). + */ +#define XFS_ITRUNC_DEFINITE 0x1 +#define XFS_ITRUNC_MAYBE 0x2 + +/* + * Maximum file size. + * if XFS_BIG_FILES 2^63 - 1 (largest positive value of xfs_fsize_t) + * else 2^40 - 1 (40=31+9) (might be an int holding a block #) + * Note, we allow seeks to this offset, although you can't read or write. + * For the not XFS_BIG_FILES case, the value could be 1 higher but we don't + * do that, for symmetry. + */ +#if XFS_BIG_FILES +#define XFS_MAX_FILE_OFFSET ((long long)((1ULL<<63)-1ULL)) +#else +#define XFS_MAX_FILE_OFFSET ((1LL<<40)-1LL) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ITOV) +struct vnode *xfs_itov(xfs_inode_t *ip); +#define XFS_ITOV(ip) xfs_itov(ip) +#else +#define XFS_ITOV(ip) BHV_TO_VNODE(XFS_ITOBHV(ip)) +#endif +#define XFS_ITOV_NULL(ip) BHV_TO_VNODE_NULL(XFS_ITOBHV(ip)) +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ITOBHV) +struct bhv_desc *xfs_itobhv(xfs_inode_t *ip); +#define XFS_ITOBHV(ip) xfs_itobhv(ip) +#else +#define XFS_ITOBHV(ip) ((struct bhv_desc *)(&((ip)->i_bhv_desc))) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BHVTOI) +xfs_inode_t *xfs_bhvtoi(struct bhv_desc *bhvp); +#define XFS_BHVTOI(bhvp) xfs_bhvtoi(bhvp) +#else +#define XFS_BHVTOI(bhvp) \ + ((xfs_inode_t *)((char *)(bhvp) - \ + (char *)&(((xfs_inode_t *)0)->i_bhv_desc))) +#endif + +#define BHV_IS_XFS(bdp) (BHV_OPS(bdp) == &xfs_vnodeops) + +/* + * Pick the inode cluster hash bucket + * (m_chash is the same size as m_ihash) + */ +#define XFS_CHASH(mp,blk) ((mp)->m_chash + (((uint)blk) % (mp)->m_chsize)) + + +/* + * xfs_iget.c prototypes. + */ +void xfs_ihash_init(struct xfs_mount *); +void xfs_ihash_free(struct xfs_mount *); +void xfs_chash_init(struct xfs_mount *); +void xfs_chash_free(struct xfs_mount *); +xfs_inode_t *xfs_inode_incore(struct xfs_mount *, xfs_ino_t, + struct xfs_trans *); +void xfs_inode_lock_init(xfs_inode_t *, struct vnode *); +int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, + uint, xfs_inode_t **, xfs_daddr_t); +int xfs_vn_iget(struct vnode *, struct xfs_mount *, + struct xfs_trans *, xfs_ino_t, + uint, xfs_inode_t **, xfs_daddr_t); +void xfs_iput(xfs_inode_t *, uint); +void xfs_ilock(xfs_inode_t *, uint); +int xfs_ilock_nowait(xfs_inode_t *, uint); +void xfs_iunlock(xfs_inode_t *, uint); +void xfs_ilock_demote(xfs_inode_t *, uint); +void xfs_iflock(xfs_inode_t *); +int xfs_iflock_nowait(xfs_inode_t *); +uint xfs_ilock_map_shared(xfs_inode_t *); +void xfs_iunlock_map_shared(xfs_inode_t *, uint); +void xfs_ifunlock(xfs_inode_t *); +void xfs_ireclaim(xfs_inode_t *); +int xfs_finish_reclaim(xfs_inode_t *, int); + +/* + * xfs_inode.c prototypes. + */ +int xfs_inotobp(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, + xfs_dinode_t **, struct xfs_buf **, int *); +int xfs_itobp(struct xfs_mount *, struct xfs_trans *, + xfs_inode_t *, xfs_dinode_t **, struct xfs_buf **, + xfs_daddr_t); +int xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, + xfs_inode_t **, xfs_daddr_t); +int xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int); +int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, nlink_t, + dev_t, struct cred *, xfs_prid_t, int, + struct xfs_buf **, boolean_t *, xfs_inode_t **); +void xfs_xlate_dinode_core(xfs_caddr_t, struct xfs_dinode_core *, int, + xfs_arch_t); +int xfs_ifree(struct xfs_trans *, xfs_inode_t *); +int xfs_atruncate_start(xfs_inode_t *); +void xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t); +int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *, + xfs_fsize_t, int, int); +int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); +int xfs_igrow_start(xfs_inode_t *, xfs_fsize_t, struct cred *); +void xfs_igrow_finish(struct xfs_trans *, xfs_inode_t *, + xfs_fsize_t, int); + +void xfs_idestroy_fork(xfs_inode_t *, int); +void xfs_idestroy(xfs_inode_t *); +void xfs_idata_realloc(xfs_inode_t *, int, int); +void xfs_iextract(xfs_inode_t *); +void xfs_iext_realloc(xfs_inode_t *, int, int); +void xfs_iroot_realloc(xfs_inode_t *, int, int); +void xfs_ipin(xfs_inode_t *); +void xfs_iunpin(xfs_inode_t *); +unsigned int xfs_ipincount(xfs_inode_t *); +int xfs_iextents_copy(xfs_inode_t *, xfs_bmbt_rec_32_t *, int); +int xfs_iflush(xfs_inode_t *, uint); +int xfs_iflush_all(struct xfs_mount *, int); +int xfs_ibusy_check(xfs_inode_t *, int); +int xfs_iaccess(xfs_inode_t *, mode_t); +uint xfs_iroundup(uint); +void xfs_ichgtime(xfs_inode_t *, int); +xfs_fsize_t xfs_file_last_byte(xfs_inode_t *); +xfs_inode_t *xfs_get_inode(dev_t, xfs_ino_t); +void xfs_lock_inodes(xfs_inode_t **, int, int, uint); + + +#ifdef DEBUG +void xfs_isize_check(struct xfs_mount *, xfs_inode_t *, xfs_fsize_t); +#else /* DEBUG */ +#define xfs_isize_check(mp, ip, isize) +#endif /* DEBUG */ + +#if defined(DEBUG) +void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); +#else +#define xfs_inobp_check(mp, bp) +#endif /* DEBUG */ + +extern struct xfs_zone *xfs_chashlist_zone; +extern struct xfs_zone *xfs_ifork_zone; +extern struct xfs_zone *xfs_inode_zone; +extern struct xfs_zone *xfs_ili_zone; +extern struct vnodeops xfs_vnodeops; + +#ifdef XFS_ILOCK_TRACE +#define XFS_ILOCK_KTRACE_SIZE 32 +void xfs_ilock_trace(xfs_inode_t *ip, int lock, unsigned int lockflags, + inst_t *ra); +#endif + +#endif /* __KERNEL__ */ + +#endif /* __XFS_INODE_H__ */ diff --git a/include/xfs_inode_item.h b/include/xfs_inode_item.h new file mode 100644 index 000000000..d3433aaee --- /dev/null +++ b/include/xfs_inode_item.h @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_INODE_ITEM_H__ +#define __XFS_INODE_ITEM_H__ + +/* + * This is the structure used to lay out an inode log item in the + * log. The size of the inline data/extents/b-tree root to be logged + * (if any) is indicated in the ilf_dsize field. Changes to this structure + * must be added on to the end. + * + * Convention for naming inode log item versions : The current version + * is always named XFS_LI_INODE. When an inode log item gets superseded, + * add the latest version of IRIX that will generate logs with that item + * to the version name. + * + * -Version 1 of this structure (XFS_LI_5_3_INODE) included up to the first + * union (ilf_u) field. This was released with IRIX 5.3-XFS. + * -Version 2 of this structure (XFS_LI_6_1_INODE) is currently the entire + * structure. This was released with IRIX 6.0.1-XFS and IRIX 6.1. + * -Version 3 of this structure (XFS_LI_INODE) is the same as version 2 + * so a new structure definition wasn't necessary. However, we had + * to add a new type because the inode cluster size changed from 4K + * to 8K and the version number had to be rev'ved to keep older kernels + * from trying to recover logs with the 8K buffers in them. The logging + * code can handle recovery on different-sized clusters now so hopefully + * this'll be the last time we need to change the inode log item just + * for a change in the inode cluster size. This new version was + * released with IRIX 6.2. + */ +typedef struct xfs_inode_log_format { + unsigned short ilf_type; /* inode log item type */ + unsigned short ilf_size; /* size of this item */ + uint ilf_fields; /* flags for fields logged */ + ushort ilf_asize; /* size of attr d/ext/root */ + ushort ilf_dsize; /* size of data/ext/root */ + xfs_ino_t ilf_ino; /* inode number */ + union { + xfs_dev_t ilfu_rdev; /* rdev value for dev inode*/ + uuid_t ilfu_uuid; /* mount point value */ + } ilf_u; + __int64_t ilf_blkno; /* blkno of inode buffer */ + int ilf_len; /* len of inode buffer */ + int ilf_boffset; /* off of inode in buffer */ +} xfs_inode_log_format_t; + +/* Initial version shipped with IRIX 5.3-XFS */ +typedef struct xfs_inode_log_format_v1 { + unsigned short ilf_type; /* inode log item type */ + unsigned short ilf_size; /* size of this item */ + uint ilf_fields; /* flags for fields logged */ + uint ilf_dsize; /* size of data/ext/root */ + xfs_ino_t ilf_ino; /* inode number */ + union { + xfs_dev_t ilfu_rdev; /* rdev value for dev inode*/ + uuid_t ilfu_uuid; /* mount point value */ + } ilf_u; +} xfs_inode_log_format_t_v1; + +/* + * Flags for xfs_trans_log_inode flags field. + */ +#define XFS_ILOG_CORE 0x001 /* log standard inode fields */ +#define XFS_ILOG_DDATA 0x002 /* log i_df.if_data */ +#define XFS_ILOG_DEXT 0x004 /* log i_df.if_extents */ +#define XFS_ILOG_DBROOT 0x008 /* log i_df.i_broot */ +#define XFS_ILOG_DEV 0x010 /* log the dev field */ +#define XFS_ILOG_UUID 0x020 /* log the uuid field */ +#define XFS_ILOG_ADATA 0x040 /* log i_af.if_data */ +#define XFS_ILOG_AEXT 0x080 /* log i_af.if_extents */ +#define XFS_ILOG_ABROOT 0x100 /* log i_af.i_broot */ + +#define XFS_ILOG_NONCORE (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \ + XFS_ILOG_DBROOT | XFS_ILOG_DEV | \ + XFS_ILOG_UUID | XFS_ILOG_ADATA | \ + XFS_ILOG_AEXT | XFS_ILOG_ABROOT) + +#define XFS_ILOG_DFORK (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \ + XFS_ILOG_DBROOT) + +#define XFS_ILOG_AFORK (XFS_ILOG_ADATA | XFS_ILOG_AEXT | \ + XFS_ILOG_ABROOT) + +#define XFS_ILOG_ALL (XFS_ILOG_CORE | XFS_ILOG_DDATA | \ + XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \ + XFS_ILOG_DEV | XFS_ILOG_UUID | \ + XFS_ILOG_ADATA | XFS_ILOG_AEXT | \ + XFS_ILOG_ABROOT) + +#define XFS_ILI_HOLD 0x1 +#define XFS_ILI_IOLOCKED_EXCL 0x2 +#define XFS_ILI_IOLOCKED_SHARED 0x4 + +#define XFS_ILI_IOLOCKED_ANY (XFS_ILI_IOLOCKED_EXCL | XFS_ILI_IOLOCKED_SHARED) + + +#ifdef __KERNEL__ + +struct xfs_buf; +struct xfs_bmbt_rec_32; +struct xfs_inode; +struct xfs_mount; + + +typedef struct xfs_inode_log_item { + xfs_log_item_t ili_item; /* common portion */ + struct xfs_inode *ili_inode; /* inode ptr */ + xfs_lsn_t ili_flush_lsn; /* lsn at last flush */ + xfs_lsn_t ili_last_lsn; /* lsn at last transaction */ + unsigned short ili_ilock_recur; /* lock recursion count */ + unsigned short ili_iolock_recur; /* lock recursion count */ + unsigned short ili_flags; /* misc flags */ + unsigned short ili_logged; /* flushed logged data */ + unsigned int ili_last_fields; /* fields when flushed */ + struct xfs_bmbt_rec_32 *ili_extents_buf; /* array of logged exts */ + unsigned int ili_pushbuf_flag; /* one bit used in push_ail */ + +#ifdef DEBUG + uint64_t ili_push_owner; /* one who sets pushbuf_flag + above gets to push the buf */ +#endif +#ifdef XFS_TRANS_DEBUG + int ili_root_size; + char *ili_orig_root; +#endif + xfs_inode_log_format_t ili_format; /* logged structure */ +} xfs_inode_log_item_t; + + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ILOG_FDATA) +int xfs_ilog_fdata(int w); +#define XFS_ILOG_FDATA(w) xfs_ilog_fdata(w) +#else +#define XFS_ILOG_FDATA(w) \ + ((w) == XFS_DATA_FORK ? XFS_ILOG_DDATA : XFS_ILOG_ADATA) +#endif + +#endif /* __KERNEL__ */ + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ILOG_FBROOT) +int xfs_ilog_fbroot(int w); +#define XFS_ILOG_FBROOT(w) xfs_ilog_fbroot(w) +#else +#define XFS_ILOG_FBROOT(w) \ + ((w) == XFS_DATA_FORK ? XFS_ILOG_DBROOT : XFS_ILOG_ABROOT) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_ILOG_FEXT) +int xfs_ilog_fext(int w); +#define XFS_ILOG_FEXT(w) xfs_ilog_fext(w) +#else +#define XFS_ILOG_FEXT(w) \ + ((w) == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT) +#endif + +#ifdef __KERNEL__ + +void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *); +void xfs_inode_item_destroy(struct xfs_inode *); +void xfs_iflush_done(struct xfs_buf *, xfs_inode_log_item_t *); +void xfs_iflush_abort(struct xfs_inode *); + +#endif /* __KERNEL__ */ + +#endif /* __XFS_INODE_ITEM_H__ */ diff --git a/include/xfs_inum.h b/include/xfs_inum.h new file mode 100644 index 000000000..fb3ec3c3b --- /dev/null +++ b/include/xfs_inum.h @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_INUM_H__ +#define __XFS_INUM_H__ + +/* + * Inode number format: + * low inopblog bits - offset in block + * next agblklog bits - block number in ag + * next agno_log bits - ag number + * high agno_log-agblklog-inopblog bits - 0 + */ + +typedef __uint32_t xfs_agino_t; /* within allocation grp inode number */ + +/* + * Useful inode bits for this kernel. + * Used in some places where having 64-bits in the 32-bit kernels + * costs too much. + */ +#if XFS_BIG_FILESYSTEMS +typedef xfs_ino_t xfs_intino_t; +#else +typedef __uint32_t xfs_intino_t; +#endif + +#define NULLFSINO ((xfs_ino_t)-1) +#define NULLAGINO ((xfs_agino_t)-1) + +struct xfs_mount; + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_MASK) +__uint32_t xfs_ino_mask(int k); +#define XFS_INO_MASK(k) xfs_ino_mask(k) +#else +#define XFS_INO_MASK(k) ((__uint32_t)((1ULL << (k)) - 1)) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_OFFSET_BITS) +int xfs_ino_offset_bits(struct xfs_mount *mp); +#define XFS_INO_OFFSET_BITS(mp) xfs_ino_offset_bits(mp) +#else +#define XFS_INO_OFFSET_BITS(mp) ((mp)->m_sb.sb_inopblog) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_AGBNO_BITS) +int xfs_ino_agbno_bits(struct xfs_mount *mp); +#define XFS_INO_AGBNO_BITS(mp) xfs_ino_agbno_bits(mp) +#else +#define XFS_INO_AGBNO_BITS(mp) ((mp)->m_sb.sb_agblklog) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_AGINO_BITS) +int xfs_ino_agino_bits(struct xfs_mount *mp); +#define XFS_INO_AGINO_BITS(mp) xfs_ino_agino_bits(mp) +#else +#define XFS_INO_AGINO_BITS(mp) ((mp)->m_agino_log) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_AGNO_BITS) +int xfs_ino_agno_bits(struct xfs_mount *mp); +#define XFS_INO_AGNO_BITS(mp) xfs_ino_agno_bits(mp) +#else +#define XFS_INO_AGNO_BITS(mp) ((mp)->m_agno_log) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_BITS) +int xfs_ino_bits(struct xfs_mount *mp); +#define XFS_INO_BITS(mp) xfs_ino_bits(mp) +#else +#define XFS_INO_BITS(mp) (XFS_INO_AGNO_BITS(mp) + XFS_INO_AGINO_BITS(mp)) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_TO_AGNO) +xfs_agnumber_t xfs_ino_to_agno(struct xfs_mount *mp, xfs_ino_t i); +#define XFS_INO_TO_AGNO(mp,i) xfs_ino_to_agno(mp,i) +#else +#define XFS_INO_TO_AGNO(mp,i) \ + ((xfs_agnumber_t)((i) >> XFS_INO_AGINO_BITS(mp))) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_TO_AGINO) +xfs_agino_t xfs_ino_to_agino(struct xfs_mount *mp, xfs_ino_t i); +#define XFS_INO_TO_AGINO(mp,i) xfs_ino_to_agino(mp,i) +#else +#define XFS_INO_TO_AGINO(mp,i) \ + ((xfs_agino_t)(i) & XFS_INO_MASK(XFS_INO_AGINO_BITS(mp))) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_TO_AGBNO) +xfs_agblock_t xfs_ino_to_agbno(struct xfs_mount *mp, xfs_ino_t i); +#define XFS_INO_TO_AGBNO(mp,i) xfs_ino_to_agbno(mp,i) +#else +#define XFS_INO_TO_AGBNO(mp,i) \ + (((xfs_agblock_t)(i) >> XFS_INO_OFFSET_BITS(mp)) & \ + XFS_INO_MASK(XFS_INO_AGBNO_BITS(mp))) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_TO_OFFSET) +int xfs_ino_to_offset(struct xfs_mount *mp, xfs_ino_t i); +#define XFS_INO_TO_OFFSET(mp,i) xfs_ino_to_offset(mp,i) +#else +#define XFS_INO_TO_OFFSET(mp,i) \ + ((int)(i) & XFS_INO_MASK(XFS_INO_OFFSET_BITS(mp))) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_INO_TO_FSB) +xfs_fsblock_t xfs_ino_to_fsb(struct xfs_mount *mp, xfs_ino_t i); +#define XFS_INO_TO_FSB(mp,i) xfs_ino_to_fsb(mp,i) +#else +#define XFS_INO_TO_FSB(mp,i) \ + XFS_AGB_TO_FSB(mp, XFS_INO_TO_AGNO(mp,i), XFS_INO_TO_AGBNO(mp,i)) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGINO_TO_INO) +xfs_ino_t +xfs_agino_to_ino(struct xfs_mount *mp, xfs_agnumber_t a, xfs_agino_t i); +#define XFS_AGINO_TO_INO(mp,a,i) xfs_agino_to_ino(mp,a,i) +#else +#define XFS_AGINO_TO_INO(mp,a,i) \ + (((xfs_ino_t)(a) << XFS_INO_AGINO_BITS(mp)) | (i)) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGINO_TO_AGBNO) +xfs_agblock_t xfs_agino_to_agbno(struct xfs_mount *mp, xfs_agino_t i); +#define XFS_AGINO_TO_AGBNO(mp,i) xfs_agino_to_agbno(mp,i) +#else +#define XFS_AGINO_TO_AGBNO(mp,i) ((i) >> XFS_INO_OFFSET_BITS(mp)) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_AGINO_TO_OFFSET) +int xfs_agino_to_offset(struct xfs_mount *mp, xfs_agino_t i); +#define XFS_AGINO_TO_OFFSET(mp,i) xfs_agino_to_offset(mp,i) +#else +#define XFS_AGINO_TO_OFFSET(mp,i) \ + ((i) & XFS_INO_MASK(XFS_INO_OFFSET_BITS(mp))) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_OFFBNO_TO_AGINO) +xfs_agino_t xfs_offbno_to_agino(struct xfs_mount *mp, xfs_agblock_t b, int o); +#define XFS_OFFBNO_TO_AGINO(mp,b,o) xfs_offbno_to_agino(mp,b,o) +#else +#define XFS_OFFBNO_TO_AGINO(mp,b,o) \ + ((xfs_agino_t)(((b) << XFS_INO_OFFSET_BITS(mp)) | (o))) +#endif + +#if XFS_BIG_FILESYSTEMS +#define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 56) - 1ULL)) +#define XFS_INO64_OFFSET ((xfs_ino_t)(1ULL << 32)) +#else +#define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 32) - 1ULL)) +#endif +#define XFS_MAXINUMBER_32 ((xfs_ino_t)((1ULL << 32) - 1ULL)) + +#endif /* __XFS_INUM_H__ */ diff --git a/include/xfs_log.h b/include/xfs_log.h new file mode 100644 index 000000000..c333cefc4 --- /dev/null +++ b/include/xfs_log.h @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_LOG_H__ +#define __XFS_LOG_H__ + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define LSN_FIELD_CYCLE(arch) (((arch)==ARCH_NOCONVERT)?1:0) +#define LSN_FIELD_BLOCK(arch) (((arch)==ARCH_NOCONVERT)?0:1) +#else +#define LSN_FIELD_CYCLE(arch) (0) +#define LSN_FIELD_BLOCK(arch) (1) +#endif + +/* get lsn fields */ + +#define CYCLE_LSN(lsn,arch) (INT_GET(((uint *)&(lsn))[LSN_FIELD_CYCLE(arch)], arch)) +#define BLOCK_LSN(lsn,arch) (INT_GET(((uint *)&(lsn))[LSN_FIELD_BLOCK(arch)], arch)) + +#ifdef __KERNEL__ +/* + * By comparing each compnent, we don't have to worry about extra + * endian issues in treating two 32 bit numbers as one 64 bit number + */ +static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2, xfs_arch_t arch) +{ + if (CYCLE_LSN(lsn1, arch) != CYCLE_LSN(lsn2, arch)) + return (CYCLE_LSN(lsn1, arch)> XLOG_RECORD_BSHIFT) +#endif + +#define XLOG_HEADER_SIZE 512 + +/* + * set lsns + */ + +#define ASSIGN_LSN_CYCLE(lsn,cycle,arch) \ + INT_SET(((uint *)&(lsn))[LSN_FIELD_CYCLE(arch)], arch, (cycle)); +#define ASSIGN_LSN_BLOCK(lsn,block,arch) \ + INT_SET(((uint *)&(lsn))[LSN_FIELD_BLOCK(arch)], arch, (block)); +#define ASSIGN_ANY_LSN(lsn,cycle,block,arch) \ + { \ + ASSIGN_LSN_CYCLE(lsn,cycle,arch); \ + ASSIGN_LSN_BLOCK(lsn,block,arch); \ + } +#define ASSIGN_LSN(lsn,log,arch) \ + ASSIGN_ANY_LSN(lsn,(log)->l_curr_cycle,(log)->l_curr_block,arch); + +#define XLOG_SET(f,b) (((f) & (b)) == (b)) + +#define GET_CYCLE(ptr, arch) \ + (INT_GET(*(uint *)(ptr), arch) == XLOG_HEADER_MAGIC_NUM ? \ + INT_GET(*((uint *)(ptr)+1), arch) : \ + INT_GET(*(uint *)(ptr), arch) \ + ) + +#define BLK_AVG(blk1, blk2) ((blk1+blk2) >> 1) + + +#ifdef __KERNEL__ +/* + * get client id from packed copy. + * + * this hack is here because the xlog_pack code copies four bytes + * of xlog_op_header containing the fields oh_clientid, oh_flags + * and oh_res2 into the packed copy. + * + * later on this four byte chunk is treated as an int and the + * client id is pulled out. + * + * this has endian issues, of course. + */ + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define GET_CLIENT_ID(i,arch) \ + ((i) & 0xff) +#else +#define GET_CLIENT_ID(i,arch) \ + ((i) >> 24) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XLOG_GRANT_SUB_SPACE) +void xlog_grant_sub_space(struct log *log, int bytes, int type); +#define XLOG_GRANT_SUB_SPACE(log,bytes,type) \ + xlog_grant_sub_space(log,bytes,type) +#else +#define XLOG_GRANT_SUB_SPACE(log,bytes,type) \ + { \ + if (type == 'w') { \ + (log)->l_grant_write_bytes -= (bytes); \ + if ((log)->l_grant_write_bytes < 0) { \ + (log)->l_grant_write_bytes += (log)->l_logsize; \ + (log)->l_grant_write_cycle--; \ + } \ + } else { \ + (log)->l_grant_reserve_bytes -= (bytes); \ + if ((log)->l_grant_reserve_bytes < 0) { \ + (log)->l_grant_reserve_bytes += (log)->l_logsize;\ + (log)->l_grant_reserve_cycle--; \ + } \ + } \ + } +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XLOG_GRANT_ADD_SPACE) +void xlog_grant_add_space(struct log *log, int bytes, int type); +#define XLOG_GRANT_ADD_SPACE(log,bytes,type) \ + xlog_grant_add_space(log,bytes,type) +#else +#define XLOG_GRANT_ADD_SPACE(log,bytes,type) \ + { \ + if (type == 'w') { \ + (log)->l_grant_write_bytes += (bytes); \ + if ((log)->l_grant_write_bytes > (log)->l_logsize) { \ + (log)->l_grant_write_bytes -= (log)->l_logsize; \ + (log)->l_grant_write_cycle++; \ + } \ + } else { \ + (log)->l_grant_reserve_bytes += (bytes); \ + if ((log)->l_grant_reserve_bytes > (log)->l_logsize) { \ + (log)->l_grant_reserve_bytes -= (log)->l_logsize;\ + (log)->l_grant_reserve_cycle++; \ + } \ + } \ + } +#endif +#define XLOG_INS_TICKETQ(q,tic) \ + { \ + if (q) { \ + (tic)->t_next = (q); \ + (tic)->t_prev = (q)->t_prev; \ + (q)->t_prev->t_next = (tic); \ + (q)->t_prev = (tic); \ + } else { \ + (tic)->t_prev = (tic)->t_next = (tic); \ + (q) = (tic); \ + } \ + (tic)->t_flags |= XLOG_TIC_IN_Q; \ + } +#define XLOG_DEL_TICKETQ(q,tic) \ + { \ + if ((tic) == (tic)->t_next) { \ + (q) = NULL; \ + } else { \ + (q) = (tic)->t_next; \ + (tic)->t_next->t_prev = (tic)->t_prev; \ + (tic)->t_prev->t_next = (tic)->t_next; \ + } \ + (tic)->t_next = (tic)->t_prev = NULL; \ + (tic)->t_flags &= ~XLOG_TIC_IN_Q; \ + } + + +#define GRANT_LOCK(log) mutex_spinlock(&(log)->l_grant_lock) +#define GRANT_UNLOCK(log, s) mutex_spinunlock(&(log)->l_grant_lock, s) +#define LOG_LOCK(log) mutex_spinlock(&(log)->l_icloglock) +#define LOG_UNLOCK(log, s) mutex_spinunlock(&(log)->l_icloglock, s) + +#define xlog_panic(s) {cmn_err(CE_PANIC, s); } +#define xlog_exit(s) {cmn_err(CE_PANIC, s); } +#define xlog_warn(s) {cmn_err(CE_WARN, s); } + +/* + * In core log state + */ +#define XLOG_STATE_ACTIVE 0x0001 /* Current IC log being written to */ +#define XLOG_STATE_WANT_SYNC 0x0002 /* Want to sync this iclog; no more writes */ +#define XLOG_STATE_SYNCING 0x0004 /* This IC log is syncing */ +#define XLOG_STATE_DONE_SYNC 0x0008 /* Done syncing to disk */ +#define XLOG_STATE_DO_CALLBACK \ + 0x0010 /* Process callback functions */ +#define XLOG_STATE_CALLBACK 0x0020 /* Callback functions now */ +#define XLOG_STATE_DIRTY 0x0040 /* Dirty IC log, not ready for ACTIVE status*/ +#define XLOG_STATE_IOERROR 0x0080 /* IO error happened in sync'ing log */ +#define XLOG_STATE_ALL 0x7FFF /* All possible valid flags */ +#define XLOG_STATE_NOTUSED 0x8000 /* This IC log not being used */ +#endif /* __KERNEL__ */ + +/* + * Flags to log operation header + * + * The first write of a new transaction will be preceded with a start + * record, XLOG_START_TRANS. Once a transaction is committed, a commit + * record is written, XLOG_COMMIT_TRANS. If a single region can not fit into + * the remainder of the current active in-core log, it is split up into + * multiple regions. Each partial region will be marked with a + * XLOG_CONTINUE_TRANS until the last one, which gets marked with XLOG_END_TRANS. + * + */ +#define XLOG_START_TRANS 0x01 /* Start a new transaction */ +#define XLOG_COMMIT_TRANS 0x02 /* Commit this transaction */ +#define XLOG_CONTINUE_TRANS 0x04 /* Cont this trans into new region */ +#define XLOG_WAS_CONT_TRANS 0x08 /* Cont this trans into new region */ +#define XLOG_END_TRANS 0x10 /* End a continued transaction */ +#define XLOG_UNMOUNT_TRANS 0x20 /* Unmount a filesystem transaction */ +#define XLOG_SKIP_TRANS (XLOG_COMMIT_TRANS | XLOG_CONTINUE_TRANS | \ + XLOG_WAS_CONT_TRANS | XLOG_END_TRANS | \ + XLOG_UNMOUNT_TRANS) + +#ifdef __KERNEL__ +/* + * Flags to log ticket + */ +#define XLOG_TIC_INITED 0x1 /* has been initialized */ +#define XLOG_TIC_PERM_RESERV 0x2 /* permanent reservation */ +#define XLOG_TIC_IN_Q 0x4 +#endif /* __KERNEL__ */ + +#define XLOG_UNMOUNT_TYPE 0x556e /* Un for Unmount */ + +/* + * Flags for log structure + */ +#define XLOG_CHKSUM_MISMATCH 0x1 /* used only during recovery */ +#define XLOG_ACTIVE_RECOVERY 0x2 /* in the middle of recovery */ +#define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */ +#define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being + shutdown */ +typedef __uint32_t xlog_tid_t; + + +#ifdef __KERNEL__ +/* + * Below are states for covering allocation transactions. + * By covering, we mean changing the h_tail_lsn in the last on-disk + * log write such that no allocation transactions will be re-done during + * recovery after a system crash. Recovery starts at the last on-disk + * log write. + * + * These states are used to insert dummy log entries to cover + * space allocation transactions which can undo non-transactional changes + * after a crash. Writes to a file with space + * already allocated do not result in any transactions. Allocations + * might include space beyond the EOF. So if we just push the EOF a + * little, the last transaction for the file could contain the wrong + * size. If there is no file system activity, after an allocation + * transaction, and the system crashes, the allocation transaction + * will get replayed and the file will be truncated. This could + * be hours/days/... after the allocation occurred. + * + * The fix for this is to do two dummy transactions when the + * system is idle. We need two dummy transaction because the h_tail_lsn + * in the log record header needs to point beyond the last possible + * non-dummy transaction. The first dummy changes the h_tail_lsn to + * the first transaction before the dummy. The second dummy causes + * h_tail_lsn to point to the first dummy. Recovery starts at h_tail_lsn. + * + * These dummy transactions get committed when everything + * is idle (after there has been some activity). + * + * There are 5 states used to control this. + * + * IDLE -- no logging has been done on the file system or + * we are done covering previous transactions. + * NEED -- logging has occurred and we need a dummy transaction + * when the log becomes idle. + * DONE -- we were in the NEED state and have committed a dummy + * transaction. + * NEED2 -- we detected that a dummy transaction has gone to the + * on disk log with no other transactions. + * DONE2 -- we committed a dummy transaction when in the NEED2 state. + * + * There are two places where we switch states: + * + * 1.) In xfs_sync, when we detect an idle log and are in NEED or NEED2. + * We commit the dummy transaction and switch to DONE or DONE2, + * respectively. In all other states, we don't do anything. + * + * 2.) When we finish writing the on-disk log (xlog_state_clean_log). + * + * No matter what state we are in, if this isn't the dummy + * transaction going out, the next state is NEED. + * So, if we aren't in the DONE or DONE2 states, the next state + * is NEED. We can't be finishing a write of the dummy record + * unless it was committed and the state switched to DONE or DONE2. + * + * If we are in the DONE state and this was a write of the + * dummy transaction, we move to NEED2. + * + * If we are in the DONE2 state and this was a write of the + * dummy transaction, we move to IDLE. + * + * + * Writing only one dummy transaction can get appended to + * one file space allocation. When this happens, the log recovery + * code replays the space allocation and a file could be truncated. + * This is why we have the NEED2 and DONE2 states before going idle. + */ + +#define XLOG_STATE_COVER_IDLE 0 +#define XLOG_STATE_COVER_NEED 1 +#define XLOG_STATE_COVER_DONE 2 +#define XLOG_STATE_COVER_NEED2 3 +#define XLOG_STATE_COVER_DONE2 4 + +#define XLOG_COVER_OPS 5 + +typedef struct xlog_ticket { + sv_t t_sema; /* sleep on this semaphore :20 */ + struct xlog_ticket *t_next; /* : 4 */ + struct xlog_ticket *t_prev; /* : 4 */ + xlog_tid_t t_tid; /* transaction identifier : 4 */ + int t_curr_res; /* current reservation in bytes : 4 */ + int t_unit_res; /* unit reservation in bytes : 4 */ + char t_ocnt; /* original count : 1 */ + char t_cnt; /* current count : 1 */ + char t_clientid; /* who does this belong to; : 1 */ + char t_flags; /* properties of reservation : 1 */ +} xlog_ticket_t; +#endif + + +typedef struct xlog_op_header { + xlog_tid_t oh_tid; /* transaction id of operation : 4 b */ + int oh_len; /* bytes in data region : 2 b */ + char oh_clientid; /* who sent me this : 1 b */ + char oh_flags; /* : 1 b */ + ushort oh_res2; /* 32 bit align : 2 b */ +} xlog_op_header_t; + + +/* valid values for h_fmt */ +#define XLOG_FMT_UNKNOWN 0 +#define XLOG_FMT_LINUX_LE 1 +#define XLOG_FMT_LINUX_BE 2 +#define XLOG_FMT_IRIX_BE 3 + +/* our fmt */ +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define XLOG_FMT XLOG_FMT_LINUX_LE +#else +#if __BYTE_ORDER == __BIG_ENDIAN +#define XLOG_FMT XLOG_FMT_LINUX_BE +#else +#error unknown byte order +#endif +#endif + +typedef struct xlog_rec_header { + uint h_magicno; /* log record (LR) identifier : 4 */ + uint h_cycle; /* write cycle of log : 4 */ + int h_version; /* LR version : 4 */ + int h_len; /* len in bytes; should be 64-bit aligned: 4 */ + xfs_lsn_t h_lsn; /* lsn of this LR : 8 */ + xfs_lsn_t h_tail_lsn; /* lsn of 1st LR w/ buffers not committed: 8 */ + uint h_chksum; /* may not be used; non-zero if used : 4 */ + int h_prev_block; /* block number to previous LR : 4 */ + int h_num_logops; /* number of log operations in this LR : 4 */ + uint h_cycle_data[XLOG_MAX_RECORD_BSIZE / BBSIZE]; + /* new fields */ + int h_fmt; /* format of log record : 4 */ + uuid_t h_fs_uuid; /* uuid of FS : 16 */ +} xlog_rec_header_t; + +#ifdef __KERNEL__ +/* + * - A log record header is 512 bytes. There is plenty of room to grow the + * xlog_rec_header_t into the reserved space. + * - ic_data follows, so a write to disk can start at the beginning of + * the iclog. + * - ic_forcesema is used to implement synchronous forcing of the iclog to disk. + * - ic_next is the pointer to the next iclog in the ring. + * - ic_bp is a pointer to the buffer used to write this incore log to disk. + * - ic_log is a pointer back to the global log structure. + * - ic_callback is a linked list of callback function/argument pairs to be + * called after an iclog finishes writing. + * - ic_size is the full size of the header plus data. + * - ic_offset is the current number of bytes written to in this iclog. + * - ic_refcnt is bumped when someone is writing to the log. + * - ic_state is the state of the iclog. + */ +typedef struct xlog_iclog_fields { + sv_t ic_forcesema; + struct xlog_in_core *ic_next; + struct xlog_in_core *ic_prev; + struct xfs_buf *ic_bp; + struct log *ic_log; + xfs_log_callback_t *ic_callback; + xfs_log_callback_t **ic_callback_tail; +#ifdef DEBUG + struct ktrace *ic_trace; +#endif + int ic_size; + int ic_offset; + int ic_refcnt; + int ic_roundoff; + int ic_bwritecnt; + ushort_t ic_state; +} xlog_iclog_fields_t; + +typedef struct xlog_in_core { + union { + xlog_iclog_fields_t hic_fields; + char hic_pad[BBSIZE]; + } ic_h1; + union { + xlog_rec_header_t hic_header; + char hic_sector[XLOG_HEADER_SIZE]; + } ic_h2; + char ic_data[1]; +} xlog_in_core_t; + +/* + * Defines to save our code from this glop. + */ +#define ic_forcesema ic_h1.hic_fields.ic_forcesema +#define ic_next ic_h1.hic_fields.ic_next +#define ic_prev ic_h1.hic_fields.ic_prev +#define ic_bp ic_h1.hic_fields.ic_bp +#define ic_log ic_h1.hic_fields.ic_log +#define ic_callback ic_h1.hic_fields.ic_callback +#define ic_callback_tail ic_h1.hic_fields.ic_callback_tail +#define ic_trace ic_h1.hic_fields.ic_trace +#define ic_size ic_h1.hic_fields.ic_size +#define ic_offset ic_h1.hic_fields.ic_offset +#define ic_refcnt ic_h1.hic_fields.ic_refcnt +#define ic_roundoff ic_h1.hic_fields.ic_roundoff +#define ic_bwritecnt ic_h1.hic_fields.ic_bwritecnt +#define ic_state ic_h1.hic_fields.ic_state +#define ic_header ic_h2.hic_header + +/* + * The reservation head lsn is not made up of a cycle number and block number. + * Instead, it uses a cycle number and byte number. Logs don't expect to + * overflow 31 bits worth of byte offset, so using a byte number will mean + * that round off problems won't occur when releasing partial reservations. + */ +typedef struct log { + /* The following block of fields are changed while holding icloglock */ + sema_t l_flushsema; /* iclog flushing semaphore */ + int l_flushcnt; /* # of procs waiting on this sema */ + int l_ticket_cnt; /* free ticket count */ + int l_ticket_tcnt; /* total ticket count */ + int l_covered_state;/* state of "covering disk log entries" */ + xlog_ticket_t *l_freelist; /* free list of tickets */ + xlog_ticket_t *l_unmount_free;/* kmem_free these addresses */ + xlog_ticket_t *l_tail; /* free list of tickets */ + xlog_in_core_t *l_iclog; /* head log queue */ + lock_t l_icloglock; /* grab to change iclog state */ + xfs_lsn_t l_tail_lsn; /* lsn of 1st LR w/ unflush buffers */ + xfs_lsn_t l_last_sync_lsn;/* lsn of last LR on disk */ + struct xfs_mount *l_mp; /* mount point */ + struct xfs_buf *l_xbuf; /* extra buffer for log wrapping */ + dev_t l_dev; /* dev_t of log */ + xfs_daddr_t l_logBBstart; /* start block of log */ + int l_logsize; /* size of log in bytes */ + int l_logBBsize; /* size of log in 512 byte chunks */ + int l_roundoff; /* round off error of all iclogs */ + int l_curr_cycle; /* Cycle number of log writes */ + int l_prev_cycle; /* Cycle # b4 last block increment */ + int l_curr_block; /* current logical block of log */ + int l_prev_block; /* previous logical block of log */ + int l_iclog_size; /* size of log in bytes */ + int l_iclog_size_log;/* log power size of log */ + int l_iclog_bufs; /* number of iclog buffers */ + + /* The following field are used for debugging; need to hold icloglock */ + char *l_iclog_bak[XLOG_MAX_ICLOGS]; + + /* The following block of fields are changed while holding grant_lock */ + lock_t l_grant_lock; /* protects below fields */ + xlog_ticket_t *l_reserve_headq; /* */ + xlog_ticket_t *l_write_headq; /* */ + int l_grant_reserve_cycle; /* */ + int l_grant_reserve_bytes; /* */ + int l_grant_write_cycle; /* */ + int l_grant_write_bytes; /* */ + + /* The following fields don't need locking */ +#ifdef DEBUG + struct ktrace *l_trace; + struct ktrace *l_grant_trace; +#endif + uint l_flags; + uint l_quotaoffs_flag;/* XFS_DQ_*, if QUOTAOFFs found */ + struct xfs_buf_cancel **l_buf_cancel_table; +} xlog_t; + + +/* common routines */ +extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp, + xlog_in_core_t *iclog); +extern int xlog_find_head(xlog_t *log, xfs_daddr_t *head_blk); +extern int xlog_find_tail(xlog_t *log, + xfs_daddr_t *head_blk, + xfs_daddr_t *tail_blk, + int readonly); +extern int xlog_print_find_oldest(xlog_t *log, xfs_daddr_t *last_blk); +extern int xlog_recover(xlog_t *log, int readonly); +extern int xlog_recover_finish(xlog_t *log, int mfsi_flags); +extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog); +extern struct xfs_buf *xlog_get_bp(int,xfs_mount_t *); +extern void xlog_put_bp(struct xfs_buf *); +extern int xlog_bread(xlog_t *, xfs_daddr_t blkno, int bblks, struct xfs_buf *bp); +extern void xlog_recover_process_iunlinks(xlog_t *log); + +#define XLOG_TRACE_GRAB_FLUSH 1 +#define XLOG_TRACE_REL_FLUSH 2 +#define XLOG_TRACE_SLEEP_FLUSH 3 +#define XLOG_TRACE_WAKE_FLUSH 4 + +#endif /* __KERNEL__ */ + +#endif /* __XFS_LOG_PRIV_H__ */ diff --git a/include/xfs_log_recover.h b/include/xfs_log_recover.h new file mode 100644 index 000000000..233cb1635 --- /dev/null +++ b/include/xfs_log_recover.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_LOG_RECOVER_H__ +#define __XFS_LOG_RECOVER_H__ + +/* + * Macros, structures, prototypes for internal log manager use. + */ + +#define XLOG_RHASH_BITS 4 +#define XLOG_RHASH_SIZE 16 +#define XLOG_RHASH_SHIFT 2 +#define XLOG_RHASH(tid) \ + ((((__uint32_t)tid)>>XLOG_RHASH_SHIFT) & (XLOG_RHASH_SIZE-1)) + +#define XLOG_MAX_REGIONS_IN_ITEM (XFS_MAX_BLOCKSIZE / XFS_BLI_CHUNK / 2 + 1) + + +/* + * item headers are in ri_buf[0]. Additional buffers follow. + */ +typedef struct xlog_recover_item { + struct xlog_recover_item *ri_next; + struct xlog_recover_item *ri_prev; + int ri_type; + int ri_cnt; /* count of regions found */ + int ri_total; /* total regions */ + xfs_log_iovec_t *ri_buf; /* ptr to regions buffer */ +} xlog_recover_item_t; + +struct xlog_tid; +typedef struct xlog_recover { + struct xlog_recover *r_next; + xlog_tid_t r_log_tid; /* log's transaction id */ + xfs_trans_header_t r_theader; /* trans header for partial */ + int r_state; /* not needed */ + xfs_lsn_t r_lsn; /* xact lsn */ + xlog_recover_item_t *r_itemq; /* q for items */ +} xlog_recover_t; + +#define ITEM_TYPE(i) (*(ushort *)(i)->ri_buf[0].i_addr) + +/* + * This is the number of entries in the l_buf_cancel_table used during + * recovery. + */ +#define XLOG_BC_TABLE_SIZE 64 + +#define XLOG_RECOVER_PASS1 1 +#define XLOG_RECOVER_PASS2 2 + +#endif /* __XFS_LOG_RECOVER_H__ */ diff --git a/include/xfs_mount.h b/include/xfs_mount.h new file mode 100644 index 000000000..b026f2005 --- /dev/null +++ b/include/xfs_mount.h @@ -0,0 +1,490 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_MOUNT_H__ +#define __XFS_MOUNT_H__ + + +typedef struct xfs_trans_reservations { + uint tr_write; /* extent alloc trans */ + uint tr_itruncate; /* truncate trans */ + uint tr_rename; /* rename trans */ + uint tr_link; /* link trans */ + uint tr_remove; /* unlink trans */ + uint tr_symlink; /* symlink trans */ + uint tr_create; /* create trans */ + uint tr_mkdir; /* mkdir trans */ + uint tr_ifree; /* inode free trans */ + uint tr_ichange; /* inode update trans */ + uint tr_growdata; /* fs data section grow trans */ + uint tr_swrite; /* sync write inode trans */ + uint tr_addafork; /* cvt inode to attributed trans */ + uint tr_writeid; /* write setuid/setgid file */ + uint tr_attrinval; /* attr fork buffer invalidation */ + uint tr_attrset; /* set/create an attribute */ + uint tr_attrrm; /* remove an attribute */ + uint tr_clearagi; /* clear bad agi unlinked ino bucket */ + uint tr_growrtalloc; /* grow realtime allocations */ + uint tr_growrtzero; /* grow realtime zeroing */ + uint tr_growrtfree; /* grow realtime freeing */ +} xfs_trans_reservations_t; + + +#ifndef __KERNEL__ +/* + * Moved here from xfs_ag.h to avoid reordering header files + */ +#define XFS_DADDR_TO_AGNO(mp,d) \ + ((xfs_agnumber_t)(XFS_BB_TO_FSBT(mp, d) / (mp)->m_sb.sb_agblocks)) +#define XFS_DADDR_TO_AGBNO(mp,d) \ + ((xfs_agblock_t)(XFS_BB_TO_FSBT(mp, d) % (mp)->m_sb.sb_agblocks)) +#else +struct cred; +struct mounta; +struct vfs; +struct vnode; +struct xfs_args; +struct xfs_ihash; +struct xfs_chash; +struct xfs_inode; +struct xfs_perag; +struct xfs_quotainfo; +struct xfs_iocore; +struct xfs_dio; +struct xfs_bmbt_irec; +struct xfs_bmap_free; + +#if defined(INTERRUPT_LATENCY_TESTING) +#define SPLDECL(s) +#define AIL_LOCK_T mutex_t +#define AIL_LOCKINIT(x,y) mutex_init(x,MUTEX_DEFAULT, y) +#define AIL_LOCK_DESTROY(x) mutex_destroy(x) +#define AIL_LOCK(mp,s) mutex_lock(&(mp)->m_ail_lock, PZERO) +#define AIL_UNLOCK(mp,s) mutex_unlock(&(mp)->m_ail_lock) +#else /* !INTERRUPT_LATENCY_TESTING */ +#define SPLDECL(s) int s +#define AIL_LOCK_T lock_t +#define AIL_LOCKINIT(x,y) spinlock_init(x,y) +#define AIL_LOCK_DESTROY(x) spinlock_destroy(x) +#define AIL_LOCK(mp,s) s=mutex_spinlock(&(mp)->m_ail_lock) +#define AIL_UNLOCK(mp,s) mutex_spinunlock(&(mp)->m_ail_lock, s) +#endif /* !INTERRUPT_LATENCY_TESTING */ + + +/* Prototypes and functions for I/O core modularization, a vector + * of functions is used to indirect from xfs/cxfs independent code + * to the xfs/cxfs dependent code. + * The vector is placed in the mount structure so that we can + * minimize the number of memory indirections involved. + */ + +typedef int (*xfs_dio_write_t)(struct xfs_dio *); +typedef int (*xfs_dio_read_t)(struct xfs_dio *); +typedef int (*xfs_strat_write_t)(struct xfs_iocore *, struct xfs_buf *); +typedef int (*xfs_bmapi_t)(struct xfs_trans *, void *, + xfs_fileoff_t, xfs_filblks_t, int, + xfs_fsblock_t *, xfs_extlen_t, + struct xfs_bmbt_irec *, int *, + struct xfs_bmap_free *); +typedef int (*xfs_bmap_eof_t)(void *, xfs_fileoff_t, int, int *); +typedef int (*xfs_rsync_t)(void *, int, xfs_off_t, xfs_off_t); +typedef uint (*xfs_lck_map_shared_t)(void *); +typedef void (*xfs_lock_t)(void *, uint); +typedef void (*xfs_lock_demote_t)(void *, uint); +typedef int (*xfs_lock_nowait_t)(void *, uint); +typedef void (*xfs_unlk_t)(void *, unsigned int); +typedef void (*xfs_chgtime_t)(void *, int); +typedef xfs_fsize_t (*xfs_size_t)(void *); +typedef xfs_fsize_t (*xfs_setsize_t)(void *, xfs_off_t); +typedef xfs_fsize_t (*xfs_lastbyte_t)(void *); + +#ifdef CELL_CAPABLE +typedef int (*xfs_checklock_t)(bhv_desc_t *, struct vnode *, + int, off_t, off_t, int, struct cred *, + struct flid *, vrwlock_t, int); +#endif + +typedef struct xfs_ioops { + xfs_dio_write_t xfs_dio_write_func; + xfs_dio_read_t xfs_dio_read_func; + xfs_strat_write_t xfs_strat_write_func; + xfs_bmapi_t xfs_bmapi_func; + xfs_bmap_eof_t xfs_bmap_eof_func; + xfs_rsync_t xfs_rsync_func; + xfs_lck_map_shared_t xfs_lck_map_shared; + xfs_lock_t xfs_ilock; + xfs_lock_demote_t xfs_ilock_demote; + xfs_lock_nowait_t xfs_ilock_nowait; + xfs_unlk_t xfs_unlock; + xfs_chgtime_t xfs_chgtime; + xfs_size_t xfs_size_func; + xfs_setsize_t xfs_setsize_func; + xfs_lastbyte_t xfs_lastbyte; +#ifdef CELL_CAPABLE + xfs_checklock_t xfs_checklock; +#endif +} xfs_ioops_t; + + +#define XFS_DIO_WRITE(mp, diop) \ + (*(mp)->m_io_ops.xfs_dio_write_func)(diop) + +#define XFS_DIO_READ(mp, diop) \ + (*(mp)->m_io_ops.xfs_dio_read_func)(diop) + +#define XFS_STRAT_WRITE(mp, io, bp) \ + (*(mp)->m_io_ops.xfs_strat_write_func)(io, bp) + +#define XFS_BMAPI(mp, trans,io,bno,len,f,first,tot,mval,nmap,flist) \ + (*(mp)->m_io_ops.xfs_bmapi_func) \ + (trans,(io)->io_obj,bno,len,f,first,tot,mval,nmap,flist) + +#define XFS_BMAP_EOF(mp, io, endoff, whichfork, eof) \ + (*(mp)->m_io_ops.xfs_bmap_eof_func) \ + ((io)->io_obj, endoff, whichfork, eof) + +#define XFS_RSYNC(mp, io, ioflag, start, end) \ + (*(mp)->m_io_ops.xfs_rsync_func)((io)->io_obj, ioflag, start, end) + +#define XFS_LCK_MAP_SHARED(mp, io) \ + (*(mp)->m_io_ops.xfs_lck_map_shared)((io)->io_obj) + +#define XFS_UNLK_MAP_SHARED(mp, io, mode) \ + (*(mp)->m_io_ops.xfs_unlock)((io)->io_obj, mode) + +#define XFS_ILOCK(mp, io, mode) \ + (*(mp)->m_io_ops.xfs_ilock)((io)->io_obj, mode) + +#define XFS_ILOCK_NOWAIT(mp, io, mode) \ + (*(mp)->m_io_ops.xfs_ilock_nowait)((io)->io_obj, mode) + +#define XFS_IUNLOCK(mp, io, mode) \ + (*(mp)->m_io_ops.xfs_unlock)((io)->io_obj, mode) + +#define XFS_ILOCK_DEMOTE(mp, io, mode) \ + (*(mp)->m_io_ops.xfs_ilock_demote)((io)->io_obj, mode) + +#define XFS_CHGTIME(mp, io, flags) \ + (*(mp)->m_io_ops.xfs_chgtime)((io)->io_obj, flags) + +#define XFS_SIZE(mp, io) \ + (*(mp)->m_io_ops.xfs_size_func)((io)->io_obj) + +#define XFS_SETSIZE(mp, io, newsize) \ + (*(mp)->m_io_ops.xfs_setsize_func)((io)->io_obj, newsize) + +#define XFS_LASTBYTE(mp, io) \ + (*(mp)->m_io_ops.xfs_lastbyte)((io)->io_obj) + + +typedef struct xfs_mount { + bhv_desc_t m_bhv; /* vfs xfs behavior */ + xfs_tid_t m_tid; /* next unused tid for fs */ + AIL_LOCK_T m_ail_lock; /* fs AIL mutex */ + xfs_ail_entry_t m_ail; /* fs active log item list */ + uint m_ail_gen; /* fs AIL generation count */ + xfs_sb_t m_sb; /* copy of fs superblock */ + lock_t m_sb_lock; /* sb counter mutex */ + struct xfs_buf *m_sb_bp; /* buffer for superblock */ + char *m_fsname; /* filesystem name */ + int m_fsname_len; /* strlen of fs name */ + int m_bsize; /* fs logical block size */ + xfs_agnumber_t m_agfrotor; /* last ag where space found */ + xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */ + int m_ihsize; /* size of next field */ + struct xfs_ihash *m_ihash; /* fs private inode hash table*/ + struct xfs_inode *m_inodes; /* active inode list */ + mutex_t m_ilock; /* inode list mutex */ + uint m_ireclaims; /* count of calls to reclaim*/ + uint m_readio_log; /* min read size log bytes */ + uint m_readio_blocks; /* min read size blocks */ + uint m_writeio_log; /* min write size log bytes */ + uint m_writeio_blocks; /* min write size blocks */ + void *m_log; /* log specific stuff */ + int m_logbufs; /* number of log buffers */ + int m_logbsize; /* size of each log buffer */ + uint m_rsumlevels; /* rt summary levels */ + uint m_rsumsize; /* size of rt summary, bytes */ + struct xfs_inode *m_rbmip; /* pointer to bitmap inode */ + struct xfs_inode *m_rsumip; /* pointer to summary inode */ + struct xfs_inode *m_rootip; /* pointer to root directory */ + struct xfs_quotainfo *m_quotainfo; /* disk quota information */ + buftarg_t m_ddev_targ; /* ptr to data device */ + buftarg_t m_logdev_targ; /* ptr to log device */ + buftarg_t m_rtdev_targ; /* ptr to rt device */ + buftarg_t *m_ddev_targp; /* saves taking the address */ +#define m_dev m_ddev_targ.dev +#define m_logdev m_logdev_targ.dev +#define m_rtdev m_rtdev_targ.dev + __uint8_t m_dircook_elog; /* log d-cookie entry bits */ + __uint8_t m_blkbit_log; /* blocklog + NBBY */ + __uint8_t m_blkbb_log; /* blocklog - BBSHIFT */ + __uint8_t m_agno_log; /* log #ag's */ + __uint8_t m_agino_log; /* #bits for agino in inum */ + __uint8_t m_nreadaheads; /* #readahead buffers */ + __uint16_t m_inode_cluster_size;/* min inode buf size */ + uint m_blockmask; /* sb_blocksize-1 */ + uint m_blockwsize; /* sb_blocksize in words */ + uint m_blockwmask; /* blockwsize-1 */ + uint m_alloc_mxr[2]; /* XFS_ALLOC_BLOCK_MAXRECS */ + uint m_alloc_mnr[2]; /* XFS_ALLOC_BLOCK_MINRECS */ + uint m_bmap_dmxr[2]; /* XFS_BMAP_BLOCK_DMAXRECS */ + uint m_bmap_dmnr[2]; /* XFS_BMAP_BLOCK_DMINRECS */ + uint m_inobt_mxr[2]; /* XFS_INOBT_BLOCK_MAXRECS */ + uint m_inobt_mnr[2]; /* XFS_INOBT_BLOCK_MINRECS */ + uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */ + uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */ + uint m_in_maxlevels; /* XFS_IN_MAXLEVELS */ + struct xfs_perag *m_perag; /* per-ag accounting info */ + mrlock_t m_peraglock; /* lock for m_perag (pointer) */ + sema_t m_growlock; /* growfs mutex */ + int m_fixedfsid[2]; /* unchanged for life of FS */ + uint m_dmevmask; /* DMI events for this FS */ + uint m_flags; /* global mount flags */ + uint m_attroffset; /* inode attribute offset */ + int m_da_node_ents; /* how many entries in danode */ + int m_ialloc_inos; /* inodes in inode allocation */ + int m_ialloc_blks; /* blocks in inode allocation */ + int m_litino; /* size of inode union area */ + int m_inoalign_mask;/* mask sb_inoalignmt if used */ + uint m_qflags; /* quota status flags */ + xfs_trans_reservations_t m_reservations;/* precomputed res values */ + __uint64_t m_maxicount; /* maximum inode count */ + __uint64_t m_resblks; /* total reserved blocks */ + __uint64_t m_resblks_avail;/* available reserved blocks */ +#if XFS_BIG_FILESYSTEMS + xfs_ino_t m_inoadd; /* add value for ino64_offset */ +#endif + int m_dalign; /* stripe unit */ + int m_swidth; /* stripe width */ + int m_sinoalign; /* stripe unit inode alignmnt */ + int m_attr_magicpct;/* 37% of the blocksize */ + int m_dir_magicpct; /* 37% of the dir blocksize */ + __uint8_t m_mk_sharedro; /* mark shared ro on unmount */ + __uint8_t m_inode_quiesce;/* call quiesce on new inodes. + field governed by m_ilock */ + __uint8_t m_dirversion; /* 1 or 2 */ + xfs_dirops_t m_dirops; /* table of dir funcs */ + int m_dirblksize; /* directory block sz--bytes */ + int m_dirblkfsbs; /* directory block sz--fsbs */ + xfs_dablk_t m_dirdatablk; /* blockno of dir data v2 */ + xfs_dablk_t m_dirleafblk; /* blockno of dir non-data v2 */ + xfs_dablk_t m_dirfreeblk; /* blockno of dirfreeindex v2 */ + int m_chsize; /* size of next field */ + struct xfs_chash *m_chash; /* fs private inode per-cluster + * hash table */ + struct xfs_ioops m_io_ops; /* vector of I/O ops */ + struct xfs_expinfo *m_expinfo; /* info to export to other + cells. */ + uint64_t m_shadow_pinmask; + /* which bits matter in rpc + log item pin masks */ + uint m_cxfstype; /* mounted shared, etc. */ +} xfs_mount_t; + +/* + * Flags for m_flags. + */ +#define XFS_MOUNT_WSYNC 0x00000001 /* for nfs - all metadata ops + must be synchronous except + for space allocations */ +#if XFS_BIG_FILESYSTEMS +#define XFS_MOUNT_INO64 0x00000002 +#endif +#define XFS_MOUNT_ROOTQCHECK 0x00000004 + /* 0x00000008 -- currently unused */ +#define XFS_MOUNT_FS_SHUTDOWN 0x00000010 /* atomic stop of all filesystem + operations, typically for + disk errors in metadata */ +#define XFS_MOUNT_NOATIME 0x00000020 /* don't modify inode access + times on reads */ +#define XFS_MOUNT_RETERR 0x00000040 /* return alignment errors to + user */ +#define XFS_MOUNT_NOALIGN 0x00000080 /* turn off stripe alignment + allocations */ + /* 0x00000100 -- currently unused */ +#define XFS_MOUNT_REGISTERED 0x00000200 /* registered with cxfs master + cell logic */ +#define XFS_MOUNT_NORECOVERY 0x00000400 /* no recovery - dirty fs */ +#define XFS_MOUNT_SHARED 0x00000800 /* shared mount */ +#define XFS_MOUNT_DFLT_IOSIZE 0x00001000 /* set default i/o size */ +#define XFS_MOUNT_OSYNCISDSYNC 0x00002000 /* treat o_sync like o_dsync */ + +/* + * Flags for m_cxfstype + */ +#define XFS_CXFS_NOT 0x00000001 /* local mount */ +#define XFS_CXFS_SERVER 0x00000002 /* we're the CXFS server */ +#define XFS_CXFS_CLIENT 0x00000004 /* We're a CXFS client */ +#define XFS_CXFS_REC_ENABLED 0x00000008 /* recovery is enabled */ + +#define XFS_FORCED_SHUTDOWN(mp) ((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN) + +/* + * Default minimum read and write sizes. + */ +#define XFS_READIO_LOG_SMALL 15 /* <= 32MB memory */ +#define XFS_WRITEIO_LOG_SMALL 15 +#define XFS_READIO_LOG_LARGE 16 /* > 32MB memory */ +#define XFS_WRITEIO_LOG_LARGE 16 + +/* + * max and min values for UIO and mount-option defined I/O sizes + * min value can't be less than a page. Lower limit for 4K machines + * is 8K because that's what was tested. + */ +#define XFS_MAX_IO_LOG 16 /* 64K */ + +#if (_PAGESZ == 16384) || (_PAGESZ == 8192) +#define XFS_MIN_IO_LOG 14 /* 16K */ +#elif _PAGESZ == 4096 +#define XFS_MIN_IO_LOG 13 /* 8K */ +#else +#error "Unknown page size" +#endif + + +/* + * Synchronous read and write sizes. This should be + * better for NFSv2 wsync filesystems. + */ +#define XFS_WSYNC_READIO_LOG 15 /* 32K */ +#define XFS_WSYNC_WRITEIO_LOG 14 /* 16K */ + +/* + * Flags sent to xfs_force_shutdown. + */ +#define XFS_METADATA_IO_ERROR 0x1 +#define XFS_LOG_IO_ERROR 0x2 +#define XFS_FORCE_UMOUNT 0x4 +#define XFS_CORRUPT_INCORE 0x8 /* corrupt in-memory data structures */ +#if CELL_CAPABLE +#define XFS_SHUTDOWN_REMOTE_REQ 0x10 /* shutdown req came from remote cell */ +#endif + +/* + * xflags for xfs_syncsub + */ +#define XFS_XSYNC_RELOC 0x01 + +/* + * Flags for xfs_mountfs + */ +#define XFS_MFSI_SECOND 0x01 /* Is a cxfs secondary mount -- skip */ + /* stuff which should only be done */ + /* once. */ +#define XFS_MFSI_CLIENT 0x02 /* Is a client -- skip lots of stuff */ +#define XFS_MFSI_NOUNLINK 0x08 /* Skip unlinked inode processing in */ + /* log recovery */ + +/* + * Macros for getting from mount to vfs and back. + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_MTOVFS) +struct vfs *xfs_mtovfs(xfs_mount_t *mp); +#define XFS_MTOVFS(mp) xfs_mtovfs(mp) +#else +#define XFS_MTOVFS(mp) (bhvtovfs(&(mp)->m_bhv)) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BHVTOM) +xfs_mount_t *xfs_bhvtom(bhv_desc_t *bdp); +#define XFS_BHVTOM(bdp) xfs_bhvtom(bdp) +#else +#define XFS_BHVTOM(bdp) ((xfs_mount_t *)BHV_PDATA(bdp)) +#endif + + +/* + * Moved here from xfs_ag.h to avoid reordering header files + */ + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DADDR_TO_AGNO) +xfs_agnumber_t xfs_daddr_to_agno(struct xfs_mount *mp, xfs_daddr_t d); +#define XFS_DADDR_TO_AGNO(mp,d) xfs_daddr_to_agno(mp,d) +#else + +static inline xfs_agnumber_t XFS_DADDR_TO_AGNO(xfs_mount_t *mp, xfs_daddr_t d) +{ + d = XFS_BB_TO_FSBT(mp, d); + do_div(d, mp->m_sb.sb_agblocks); + return (xfs_agnumber_t) d; +} + +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DADDR_TO_AGBNO) +xfs_agblock_t xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d); +#define XFS_DADDR_TO_AGBNO(mp,d) xfs_daddr_to_agbno(mp,d) +#else + +static inline xfs_agblock_t XFS_DADDR_TO_AGBNO(xfs_mount_t *mp, xfs_daddr_t d) +{ + d = XFS_BB_TO_FSBT(mp, d); + return (xfs_agblock_t) do_div(d, mp->m_sb.sb_agblocks); +} + +#endif + +/* + * This structure is for use by the xfs_mod_incore_sb_batch() routine. + */ +typedef struct xfs_mod_sb { + xfs_sb_field_t msb_field; /* Field to modify, see below */ + int msb_delta; /* change to make to the specified field */ +} xfs_mod_sb_t; + +#define XFS_MOUNT_ILOCK(mp) mutex_lock(&((mp)->m_ilock), PINOD) +#define XFS_MOUNT_IUNLOCK(mp) mutex_unlock(&((mp)->m_ilock)) +#define XFS_SB_LOCK(mp) mutex_spinlock(&(mp)->m_sb_lock) +#define XFS_SB_UNLOCK(mp,s) mutex_spinunlock(&(mp)->m_sb_lock,(s)) + +void xfs_mod_sb(xfs_trans_t *, __int64_t); +xfs_mount_t *xfs_mount_init(void); +void xfs_mount_free(xfs_mount_t *mp, int remove_bhv); +int xfs_mountfs(struct vfs *, xfs_mount_t *mp, dev_t, int); +int xfs_mountargs(struct mounta *, struct xfs_args *); + +int xfs_unmountfs(xfs_mount_t *, int, struct cred *); +void xfs_unmountfs_close(xfs_mount_t *, int, struct cred *); +int xfs_unmountfs_writesb(xfs_mount_t *); +int xfs_unmount_flush(xfs_mount_t *, int); +int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int, int); +int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, uint, int); +int xfs_readsb(xfs_mount_t *mp, dev_t); +struct xfs_buf *xfs_getsb(xfs_mount_t *, int); +void xfs_freesb(xfs_mount_t *); +void xfs_force_shutdown(struct xfs_mount *, int); +int xfs_syncsub(xfs_mount_t *, int, int, int *); +void xfs_xlatesb(void *, struct xfs_sb *, int, xfs_arch_t, __int64_t); +extern struct vfsops xfs_vfsops; + +#endif /* __KERNEL__ */ + +#endif /* __XFS_MOUNT_H__ */ diff --git a/include/xfs_quota.h b/include/xfs_quota.h new file mode 100644 index 000000000..794b90ee2 --- /dev/null +++ b/include/xfs_quota.h @@ -0,0 +1,320 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_QUOTA_H__ +#define __XFS_QUOTA_H__ + +/* + * We use only 16-bit prid's in the inode, not the 64-bit version in the proc. + * uid_t is hard-coded to 32 bits in the inode. Hence, an 'id' in a dquot is + * 32 bits.. + */ +typedef __int32_t xfs_dqid_t; +/* + * Eventhough users may not have quota limits occupying all 64-bits, + * they may need 64-bit accounting. Hence, 64-bit quota-counters, + * and quota-limits. This is a waste in the common case, but heh ... + */ +typedef __uint64_t xfs_qcnt_t; +typedef __uint16_t xfs_qwarncnt_t; + +/* + * Disk quotas status in m_qflags, and also sb_qflags. 16 bits. + */ +#define XFS_UQUOTA_ACCT 0x0001 /* user quota accounting ON */ +#define XFS_UQUOTA_ENFD 0x0002 /* user quota limits enforced */ +#define XFS_UQUOTA_CHKD 0x0004 /* quotacheck run on usr quotas */ +#define XFS_PQUOTA_ACCT 0x0008 /* project quota accounting ON */ +#define XFS_PQUOTA_ENFD 0x0010 /* proj quota limits enforced */ +#define XFS_PQUOTA_CHKD 0x0020 /* quotacheck run on prj quotas */ + +/* + * Incore only flags for quotaoff - these bits get cleared when quota(s) + * are in the process of getting turned off. These flags are in m_qflags but + * never in sb_qflags. + */ +#define XFS_UQUOTA_ACTIVE 0x0040 /* uquotas are being turned off */ +#define XFS_PQUOTA_ACTIVE 0x0080 /* pquotas are being turned off */ + +/* + * Typically, we turn quotas off if we weren't explicitly asked to + * mount quotas. This is the mount option not to do that. + * This option is handy in the miniroot, when trying to mount /root. + * We can't really know what's in /etc/fstab until /root is already mounted! + * This stops quotas getting turned off in the root filesystem everytime + * the system boots up a miniroot. + */ +#define XFS_QUOTA_MAYBE 0x0100 /* Turn quotas on if SB has quotas on */ + +/* + * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees + * quota will be not be switched off as long as that inode lock is held. + */ +#define XFS_IS_QUOTA_ON(mp) ((mp)->m_qflags & (XFS_UQUOTA_ACTIVE | \ + XFS_PQUOTA_ACTIVE)) +#define XFS_IS_UQUOTA_ON(mp) ((mp)->m_qflags & XFS_UQUOTA_ACTIVE) +#define XFS_IS_PQUOTA_ON(mp) ((mp)->m_qflags & XFS_PQUOTA_ACTIVE) + +/* + * Flags to tell various functions what to do. Not all of these are meaningful + * to a single function. None of these XFS_QMOPT_* flags are meant to have + * persistent values (ie. their values can and will change between versions) + */ +#define XFS_QMOPT_DQLOCK 0x0000001 /* dqlock */ +#define XFS_QMOPT_DQALLOC 0x0000002 /* alloc dquot ondisk if needed */ +#define XFS_QMOPT_UQUOTA 0x0000004 /* user dquot requested */ +#define XFS_QMOPT_PQUOTA 0x0000008 /* proj dquot requested */ +#define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */ +#define XFS_QMOPT_DQSUSER 0x0000020 /* don't cache super users dquot */ +#define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */ +#define XFS_QMOPT_QUOTAOFF 0x0000080 /* quotas are being turned off */ +#define XFS_QMOPT_UMOUNTING 0x0000100 /* filesys is being unmounted */ +#define XFS_QMOPT_DOLOG 0x0000200 /* log buf changes (in quotacheck) */ +#define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if necessary */ +#define XFS_QMOPT_ILOCKED 0x0000800 /* inode is already locked (excl) */ +#define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot, if damaged. */ + +/* + * flags to xfs_trans_mod_dquot to indicate which field needs to be + * modified. + */ +#define XFS_QMOPT_RES_REGBLKS 0x0010000 +#define XFS_QMOPT_RES_RTBLKS 0x0020000 +#define XFS_QMOPT_BCOUNT 0x0040000 +#define XFS_QMOPT_ICOUNT 0x0080000 +#define XFS_QMOPT_RTBCOUNT 0x0100000 +#define XFS_QMOPT_DELBCOUNT 0x0200000 +#define XFS_QMOPT_DELRTBCOUNT 0x0400000 +#define XFS_QMOPT_RES_INOS 0x0800000 + +/* + * flags for dqflush and dqflush_all. + */ +#define XFS_QMOPT_SYNC 0x1000000 +#define XFS_QMOPT_ASYNC 0x2000000 +#define XFS_QMOPT_DELWRI 0x4000000 + +/* + * flags to xfs_trans_mod_dquot. + */ +#define XFS_TRANS_DQ_RES_BLKS XFS_QMOPT_RES_REGBLKS +#define XFS_TRANS_DQ_RES_RTBLKS XFS_QMOPT_RES_RTBLKS +#define XFS_TRANS_DQ_RES_INOS XFS_QMOPT_RES_INOS +#define XFS_TRANS_DQ_BCOUNT XFS_QMOPT_BCOUNT +#define XFS_TRANS_DQ_DELBCOUNT XFS_QMOPT_DELBCOUNT +#define XFS_TRANS_DQ_ICOUNT XFS_QMOPT_ICOUNT +#define XFS_TRANS_DQ_RTBCOUNT XFS_QMOPT_RTBCOUNT +#define XFS_TRANS_DQ_DELRTBCOUNT XFS_QMOPT_DELRTBCOUNT + + +#define XFS_QMOPT_QUOTALL (XFS_QMOPT_UQUOTA|XFS_QMOPT_PQUOTA) +#define XFS_QMOPT_RESBLK_MASK (XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS) + +/* + * This check is done typically without holding the inode lock; + * that may seem racey, but it is harmless in the context that it is used. + * The inode cannot go inactive as long a reference is kept, and + * therefore if dquot(s) were attached, they'll stay consistent. + * If, for example, the ownership of the inode changes while + * we didnt have the inode locked, the appropriate dquot(s) will be + * attached atomically. + */ +#define XFS_NOT_DQATTACHED(mp, ip) ((XFS_IS_UQUOTA_ON(mp) &&\ + (ip)->i_udquot == NULL) || \ + (XFS_IS_PQUOTA_ON(mp) && \ + (ip)->i_pdquot == NULL)) + +#define XFS_QM_NEED_QUOTACHECK(mp) ((XFS_IS_UQUOTA_ON(mp) && \ + (mp->m_sb.sb_qflags & \ + XFS_UQUOTA_CHKD) == 0) || \ + (XFS_IS_PQUOTA_ON(mp) && \ + (mp->m_sb.sb_qflags & \ + XFS_PQUOTA_CHKD) == 0)) + +#define XFS_MOUNT_QUOTA_ALL (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ + XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|\ + XFS_PQUOTA_ENFD|XFS_PQUOTA_CHKD) +#define XFS_MOUNT_QUOTA_MASK (XFS_MOUNT_QUOTA_ALL | XFS_UQUOTA_ACTIVE | \ + XFS_PQUOTA_ACTIVE) + +#define XFS_IS_REALTIME_INODE(ip) ((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME) + + +#ifdef __KERNEL__ +/* + * External Interface to the XFS disk quota subsystem. + */ +struct bhv_desc; +struct vfs; +struct xfs_disk_dquot; +struct xfs_dqhash; +struct xfs_dquot; +struct xfs_inode; +struct xfs_mount; +struct xfs_trans; + +/* + * Quota Manager Interface. + */ +extern struct xfs_qm *xfs_qm_init(void); +extern void xfs_qm_destroy(struct xfs_qm *); +extern int xfs_qm_dqflush_all(struct xfs_mount *, int); +extern int xfs_qm_dqattach(struct xfs_inode *, uint); +extern int xfs_qm_dqpurge_all(struct xfs_mount *, uint); +extern void xfs_qm_mount_quotainit(struct xfs_mount *, uint); +extern void xfs_qm_unmount_quotadestroy(struct xfs_mount *); +extern int xfs_qm_mount_quotas(struct xfs_mount *); +extern int xfs_qm_unmount_quotas(struct xfs_mount *); +extern void xfs_qm_dqdettach_inode(struct xfs_inode *); +extern int xfs_qm_sync(struct xfs_mount *, short); + + +/* + * system call interface + */ +extern int xfs_quotactl(xfs_mount_t *, struct vfs *, int, int, + int, xfs_caddr_t); + +/* + * dquot interface. + */ +extern void xfs_dqlock(struct xfs_dquot *); +extern void xfs_dqunlock(struct xfs_dquot *); +extern void xfs_dqunlock_nonotify(struct xfs_dquot *); +extern void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *); +extern void xfs_qm_dqput(struct xfs_dquot *); +extern void xfs_qm_dqrele(struct xfs_dquot *); +extern xfs_dqid_t xfs_qm_dqid(struct xfs_dquot *); +extern int xfs_qm_dqget(struct xfs_mount *, + struct xfs_inode *, xfs_dqid_t, + uint, uint, struct xfs_dquot **); +extern int xfs_qm_dqcheck(struct xfs_disk_dquot *, + xfs_dqid_t, uint, uint, char *); + +/* + * Vnodeops specific code that should actually be _in_ xfs_vnodeops.c, but + * is here because it's nicer to keep vnodeops (therefore, XFS) lean + * and clean. + */ +extern struct xfs_dquot * xfs_qm_vop_chown(struct xfs_trans *, + struct xfs_inode *, + struct xfs_dquot **, + struct xfs_dquot *); +extern int xfs_qm_vop_dqalloc(struct xfs_mount *, + struct xfs_inode *, + uid_t, xfs_prid_t, uint, + struct xfs_dquot **, + struct xfs_dquot **); + +extern int xfs_qm_vop_chown_dqalloc(struct xfs_mount *, + struct xfs_inode *, + int, uid_t, xfs_prid_t, + struct xfs_dquot **, + struct xfs_dquot **); + +extern int xfs_qm_vop_chown_reserve(struct xfs_trans *, + struct xfs_inode *, + struct xfs_dquot *, + struct xfs_dquot *, + uint); + +extern int xfs_qm_vop_rename_dqattach(struct xfs_inode **); +extern void xfs_qm_vop_dqattach_and_dqmod_newinode( + struct xfs_trans *, + struct xfs_inode *, + struct xfs_dquot *, + struct xfs_dquot *); + + +/* + * Dquot Transaction interface + */ +extern void xfs_trans_alloc_dqinfo(struct xfs_trans *); +extern void xfs_trans_free_dqinfo(struct xfs_trans *); +extern void xfs_trans_dup_dqinfo(struct xfs_trans *, + struct xfs_trans *); +extern void xfs_trans_mod_dquot(struct xfs_trans *, + struct xfs_dquot *, + uint, long); +extern int xfs_trans_mod_dquot_byino(struct xfs_trans *, + struct xfs_inode *, + uint, long); +extern void xfs_trans_apply_dquot_deltas(struct xfs_trans *); +extern void xfs_trans_unreserve_and_mod_dquots(struct xfs_trans *); + +extern int xfs_trans_reserve_quota_nblks(struct xfs_trans *, + struct xfs_inode *, + long, long, uint); + + +extern int xfs_trans_reserve_quota_bydquots(struct xfs_trans *, + struct xfs_dquot *, + struct xfs_dquot *, + long, long, uint); +extern void xfs_trans_log_dquot(struct xfs_trans *, + struct xfs_dquot *); +extern void xfs_trans_dqjoin(struct xfs_trans *, + struct xfs_dquot *); +extern void xfs_qm_dqrele_all_inodes(struct xfs_mount *, uint); + +/* + * Regular disk block quota reservations + */ +#define xfs_trans_reserve_blkquota(tp, ip, nblks) \ +xfs_trans_reserve_quota_nblks(tp, ip, nblks, 0, XFS_QMOPT_RES_REGBLKS) + +#define xfs_trans_unreserve_blkquota(tp, ip, nblks) \ +xfs_trans_reserve_quota_nblks(tp, ip, -(nblks), 0, XFS_QMOPT_RES_REGBLKS) + +#define xfs_trans_reserve_quota(tp, udq, pdq, nb, ni, f) \ +xfs_trans_reserve_quota_bydquots(tp, udq, pdq, nb, ni, f|XFS_QMOPT_RES_REGBLKS) + +#define xfs_trans_unreserve_quota(tp, ud, pd, b, i, f) \ +xfs_trans_reserve_quota_bydquots(tp, ud, pd, -(b), -(i), f|XFS_QMOPT_RES_REGBLKS) + +/* + * Realtime disk block quota reservations + */ +#define xfs_trans_reserve_rtblkquota(mp, tp, ip, nblks) \ +xfs_trans_reserve_quota_nblks(tp, ip, nblks, 0, XFS_QMOPT_RES_RTBLKS) + +#define xfs_trans_unreserve_rtblkquota(tp, ip, nblks) \ +xfs_trans_reserve_quota_nblks(tp, ip, -(nblks), 0, XFS_QMOPT_RES_RTBLKS) + +#define xfs_trans_reserve_rtquota(mp, tp, uq, pq, blks, f) \ +xfs_trans_reserve_quota_bydquots(mp, tp, uq, pq, blks, 0, f|XFS_QMOPT_RES_RTBLKS) + +#define xfs_trans_unreserve_rtquota(tp, uq, pq, blks) \ +xfs_trans_reserve_quota_bydquots(tp, uq, pq, -(blks), XFS_QMOPT_RES_RTBLKS) + +#endif /* __KERNEL__ */ + +#endif /* __XFS_QUOTA_H__ */ diff --git a/include/xfs_rtalloc.h b/include/xfs_rtalloc.h new file mode 100644 index 000000000..be2b88a34 --- /dev/null +++ b/include/xfs_rtalloc.h @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_RTALLOC_H__ +#define __XFS_RTALLOC_H__ + +struct xfs_mount; +struct xfs_trans; + +/* Min and max rt extent sizes, specified in bytes */ +#define XFS_MAX_RTEXTSIZE (1024 * 1024 * 1024) /* 1GB */ +#define XFS_DFL_RTEXTSIZE (64 * 1024) /* 64KB */ +#define XFS_MIN_RTEXTSIZE (4 * 1024) /* 4KB */ + +/* + * Constants for bit manipulations. + */ +#define XFS_NBBYLOG 3 /* log2(NBBY) */ +#define XFS_WORDLOG 2 /* log2(sizeof(xfs_rtword_t)) */ +#define XFS_NBWORDLOG (XFS_NBBYLOG + XFS_WORDLOG) +#define XFS_NBWORD (1 << XFS_NBWORDLOG) +#define XFS_WORDMASK ((1 << XFS_WORDLOG) - 1) + +#define XFS_BLOCKSIZE(mp) ((mp)->m_sb.sb_blocksize) +#define XFS_BLOCKMASK(mp) ((mp)->m_blockmask) +#define XFS_BLOCKWSIZE(mp) ((mp)->m_blockwsize) +#define XFS_BLOCKWMASK(mp) ((mp)->m_blockwmask) + +/* + * Summary and bit manipulation macros. + */ +#define XFS_SUMOFFS(mp,ls,bb) ((int)((ls) * (mp)->m_sb.sb_rbmblocks + (bb))) +#define XFS_SUMOFFSTOBLOCK(mp,s) \ + (((s) * (uint)sizeof(xfs_suminfo_t)) >> (mp)->m_sb.sb_blocklog) +#define XFS_SUMPTR(mp,bp,so) \ + ((xfs_suminfo_t *)((char *)XFS_BUF_PTR(bp) + \ + (((so) * (uint)sizeof(xfs_suminfo_t)) & XFS_BLOCKMASK(mp)))) + +#define XFS_BITTOBLOCK(mp,bi) ((bi) >> (mp)->m_blkbit_log) +#define XFS_BLOCKTOBIT(mp,bb) ((bb) << (mp)->m_blkbit_log) +#define XFS_BITTOWORD(mp,bi) \ + ((int)(((bi) >> XFS_NBWORDLOG) & XFS_BLOCKWMASK(mp))) + +#define XFS_RTMIN(a,b) ((a) < (b) ? (a) : (b)) +#define XFS_RTMAX(a,b) ((a) > (b) ? (a) : (b)) + +#define XFS_RTLOBIT(w) xfs_lowbit32(w) +#define XFS_RTHIBIT(w) xfs_highbit32(w) + +#if XFS_BIG_FILESYSTEMS +#define XFS_RTBLOCKLOG(b) xfs_highbit64(b) +#else +#define XFS_RTBLOCKLOG(b) xfs_highbit32(b) +#endif + +/* + * Function prototypes for exported functions. + */ + +/* + * Allocate an extent in the realtime subvolume, with the usual allocation + * parameters. The length units are all in realtime extents, as is the + * result block number. + */ +int /* error */ +xfs_rtallocate_extent( + struct xfs_trans *tp, /* transaction pointer */ + xfs_rtblock_t bno, /* starting block number to allocate */ + xfs_extlen_t minlen, /* minimum length to allocate */ + xfs_extlen_t maxlen, /* maximum length to allocate */ + xfs_extlen_t *len, /* out: actual length allocated */ + xfs_alloctype_t type, /* allocation type XFS_ALLOCTYPE... */ + int wasdel, /* was a delayed allocation extent */ + xfs_extlen_t prod, /* extent product factor */ + xfs_rtblock_t *rtblock); /* out: start block allocated */ + +/* + * Free an extent in the realtime subvolume. Length is expressed in + * realtime extents, as is the block number. + */ +int /* error */ +xfs_rtfree_extent( + struct xfs_trans *tp, /* transaction pointer */ + xfs_rtblock_t bno, /* starting block number to free */ + xfs_extlen_t len); /* length of extent freed */ + +/* + * Initialize realtime fields in the mount structure. + */ +int /* error */ +xfs_rtmount_init( + struct xfs_mount *mp); /* file system mount structure */ + +/* + * Get the bitmap and summary inodes into the mount structure + * at mount time. + */ +int /* error */ +xfs_rtmount_inodes( + struct xfs_mount *mp); /* file system mount structure */ + +/* + * Pick an extent for allocation at the start of a new realtime file. + * Use the sequence number stored in the atime field of the bitmap inode. + * Translate this to a fraction of the rtextents, and return the product + * of rtextents and the fraction. + * The fraction sequence is 0, 1/2, 1/4, 3/4, 1/8, ..., 7/8, 1/16, ... + */ +int /* error */ +xfs_rtpick_extent( + struct xfs_mount *mp, /* file system mount point */ + struct xfs_trans *tp, /* transaction pointer */ + xfs_extlen_t len, /* allocation length (rtextents) */ + xfs_rtblock_t *pick); /* result rt extent */ + +#ifdef XFSDEBUG +/* + * Debug code: print out the value of a range in the bitmap. + */ +void +xfs_rtprint_range( + struct xfs_mount *mp, /* file system mount structure */ + struct xfs_trans *tp, /* transaction pointer */ + xfs_rtblock_t start, /* starting block to print */ + xfs_extlen_t len); /* length to print */ + +/* + * Debug code: print the summary file. + */ +void +xfs_rtprint_summary( + struct xfs_mount *mp, /* file system mount structure */ + struct xfs_trans *tp); /* transaction pointer */ +#endif /* XFSDEBUG */ + +#endif /* __XFS_RTALLOC_H__ */ diff --git a/include/xfs_sb.h b/include/xfs_sb.h new file mode 100644 index 000000000..6526d107e --- /dev/null +++ b/include/xfs_sb.h @@ -0,0 +1,490 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_SB_H__ +#define __XFS_SB_H__ + +/* + * Super block + * Fits into a 512-byte buffer at daddr_t 0 of each allocation group. + * Only the first of these is ever updated except during growfs. + */ + +struct xfs_buf; +struct xfs_mount; + +#define XFS_SB_MAGIC 0x58465342 /* 'XFSB' */ +#define XFS_SB_VERSION_1 1 /* 5.3, 6.0.1, 6.1 */ +#define XFS_SB_VERSION_2 2 /* 6.2 - attributes */ +#define XFS_SB_VERSION_3 3 /* 6.2 - new inode version */ +#define XFS_SB_VERSION_4 4 /* 6.2+ - bitmask version */ +#define XFS_SB_VERSION_NUMBITS 0x000f +#define XFS_SB_VERSION_ALLFBITS 0xfff0 +#define XFS_SB_VERSION_SASHFBITS 0xf000 +#define XFS_SB_VERSION_REALFBITS 0x0ff0 +#define XFS_SB_VERSION_ATTRBIT 0x0010 +#define XFS_SB_VERSION_NLINKBIT 0x0020 +#define XFS_SB_VERSION_QUOTABIT 0x0040 +#define XFS_SB_VERSION_ALIGNBIT 0x0080 +#define XFS_SB_VERSION_DALIGNBIT 0x0100 +#define XFS_SB_VERSION_SHAREDBIT 0x0200 +#define XFS_SB_VERSION_EXTFLGBIT 0x1000 +#define XFS_SB_VERSION_DIRV2BIT 0x2000 +#define XFS_SB_VERSION_OKSASHFBITS \ + (XFS_SB_VERSION_EXTFLGBIT | \ + XFS_SB_VERSION_DIRV2BIT) +#define XFS_SB_VERSION_OKREALFBITS \ + (XFS_SB_VERSION_ATTRBIT | \ + XFS_SB_VERSION_NLINKBIT | \ + XFS_SB_VERSION_QUOTABIT | \ + XFS_SB_VERSION_ALIGNBIT | \ + XFS_SB_VERSION_DALIGNBIT | \ + XFS_SB_VERSION_SHAREDBIT) +#define XFS_SB_VERSION_OKSASHBITS \ + (XFS_SB_VERSION_NUMBITS | \ + XFS_SB_VERSION_REALFBITS | \ + XFS_SB_VERSION_OKSASHFBITS) +#define XFS_SB_VERSION_OKREALBITS \ + (XFS_SB_VERSION_NUMBITS | \ + XFS_SB_VERSION_OKREALFBITS | \ + XFS_SB_VERSION_OKSASHFBITS) +#define XFS_SB_VERSION_MKFS(ia,dia,extflag,dirv2) \ + (((ia) || (dia) || (extflag) || (dirv2)) ? \ + (XFS_SB_VERSION_4 | \ + ((ia) ? XFS_SB_VERSION_ALIGNBIT : 0) | \ + ((dia) ? XFS_SB_VERSION_DALIGNBIT : 0) | \ + ((extflag) ? XFS_SB_VERSION_EXTFLGBIT : 0) | \ + ((dirv2) ? XFS_SB_VERSION_DIRV2BIT : 0)) : \ + XFS_SB_VERSION_1) + +typedef struct xfs_sb +{ + __uint32_t sb_magicnum; /* magic number == XFS_SB_MAGIC */ + __uint32_t sb_blocksize; /* logical block size, bytes */ + xfs_drfsbno_t sb_dblocks; /* number of data blocks */ + xfs_drfsbno_t sb_rblocks; /* number of realtime blocks */ + xfs_drtbno_t sb_rextents; /* number of realtime extents */ + uuid_t sb_uuid; /* file system unique id */ + xfs_dfsbno_t sb_logstart; /* starting block of log if internal */ + xfs_ino_t sb_rootino; /* root inode number */ + xfs_ino_t sb_rbmino; /* bitmap inode for realtime extents */ + xfs_ino_t sb_rsumino; /* summary inode for rt bitmap */ + xfs_agblock_t sb_rextsize; /* realtime extent size, blocks */ + xfs_agblock_t sb_agblocks; /* size of an allocation group */ + xfs_agnumber_t sb_agcount; /* number of allocation groups */ + xfs_extlen_t sb_rbmblocks; /* number of rt bitmap blocks */ + xfs_extlen_t sb_logblocks; /* number of log blocks */ + __uint16_t sb_versionnum; /* header version == XFS_SB_VERSION */ + __uint16_t sb_sectsize; /* volume sector size, bytes */ + __uint16_t sb_inodesize; /* inode size, bytes */ + __uint16_t sb_inopblock; /* inodes per block */ + char sb_fname[12]; /* file system name */ + __uint8_t sb_blocklog; /* log2 of sb_blocksize */ + __uint8_t sb_sectlog; /* log2 of sb_sectsize */ + __uint8_t sb_inodelog; /* log2 of sb_inodesize */ + __uint8_t sb_inopblog; /* log2 of sb_inopblock */ + __uint8_t sb_agblklog; /* log2 of sb_agblocks (rounded up) */ + __uint8_t sb_rextslog; /* log2 of sb_rextents */ + __uint8_t sb_inprogress; /* mkfs is in progress, don't mount */ + __uint8_t sb_imax_pct; /* max % of fs for inode space */ + /* statistics */ + /* + * These fields must remain contiguous. If you really + * want to change their layout, make sure you fix the + * code in xfs_trans_apply_sb_deltas(). + */ + __uint64_t sb_icount; /* allocated inodes */ + __uint64_t sb_ifree; /* free inodes */ + __uint64_t sb_fdblocks; /* free data blocks */ + __uint64_t sb_frextents; /* free realtime extents */ + /* + * End contiguous fields. + */ + xfs_ino_t sb_uquotino; /* user quota inode */ + xfs_ino_t sb_pquotino; /* project quota inode */ + __uint16_t sb_qflags; /* quota flags */ + __uint8_t sb_flags; /* misc. flags */ + __uint8_t sb_shared_vn; /* shared version number */ + xfs_extlen_t sb_inoalignmt; /* inode chunk alignment, fsblocks */ + __uint32_t sb_unit; /* stripe or raid unit */ + __uint32_t sb_width; /* stripe or raid width */ + __uint8_t sb_dirblklog; /* log2 of dir block size (fsbs) */ + __uint8_t sb_dummy[7]; /* padding */ +} xfs_sb_t; + +/* + * Sequence number values for the fields. + */ +typedef enum { + XFS_SBS_MAGICNUM, XFS_SBS_BLOCKSIZE, XFS_SBS_DBLOCKS, XFS_SBS_RBLOCKS, + XFS_SBS_REXTENTS, XFS_SBS_UUID, XFS_SBS_LOGSTART, XFS_SBS_ROOTINO, + XFS_SBS_RBMINO, XFS_SBS_RSUMINO, XFS_SBS_REXTSIZE, XFS_SBS_AGBLOCKS, + XFS_SBS_AGCOUNT, XFS_SBS_RBMBLOCKS, XFS_SBS_LOGBLOCKS, + XFS_SBS_VERSIONNUM, XFS_SBS_SECTSIZE, XFS_SBS_INODESIZE, + XFS_SBS_INOPBLOCK, XFS_SBS_FNAME, XFS_SBS_BLOCKLOG, + XFS_SBS_SECTLOG, XFS_SBS_INODELOG, XFS_SBS_INOPBLOG, XFS_SBS_AGBLKLOG, + XFS_SBS_REXTSLOG, XFS_SBS_INPROGRESS, XFS_SBS_IMAX_PCT, XFS_SBS_ICOUNT, + XFS_SBS_IFREE, XFS_SBS_FDBLOCKS, XFS_SBS_FREXTENTS, XFS_SBS_UQUOTINO, + XFS_SBS_PQUOTINO, XFS_SBS_QFLAGS, XFS_SBS_FLAGS, XFS_SBS_SHARED_VN, + XFS_SBS_INOALIGNMT, XFS_SBS_UNIT, XFS_SBS_WIDTH, XFS_SBS_DIRBLKLOG, + XFS_SBS_DUMMY, + XFS_SBS_FIELDCOUNT +} xfs_sb_field_t; + +/* + * Mask values, defined based on the xfs_sb_field_t values. + * Only define the ones we're using. + */ +#define XFS_SB_MVAL(x) (1LL << XFS_SBS_ ## x) +#define XFS_SB_UUID XFS_SB_MVAL(UUID) +#define XFS_SB_FNAME XFS_SB_MVAL(FNAME) +#define XFS_SB_ROOTINO XFS_SB_MVAL(ROOTINO) +#define XFS_SB_RBMINO XFS_SB_MVAL(RBMINO) +#define XFS_SB_RSUMINO XFS_SB_MVAL(RSUMINO) +#define XFS_SB_VERSIONNUM XFS_SB_MVAL(VERSIONNUM) +#define XFS_SB_UQUOTINO XFS_SB_MVAL(UQUOTINO) +#define XFS_SB_PQUOTINO XFS_SB_MVAL(PQUOTINO) +#define XFS_SB_QFLAGS XFS_SB_MVAL(QFLAGS) +#define XFS_SB_SHARED_VN XFS_SB_MVAL(SHARED_VN) +#define XFS_SB_UNIT XFS_SB_MVAL(UNIT) +#define XFS_SB_WIDTH XFS_SB_MVAL(WIDTH) +#define XFS_SB_NUM_BITS ((int)XFS_SBS_FIELDCOUNT) +#define XFS_SB_ALL_BITS ((1LL << XFS_SB_NUM_BITS) - 1) +#define XFS_SB_MOD_BITS \ + (XFS_SB_UUID | XFS_SB_ROOTINO | XFS_SB_RBMINO | XFS_SB_RSUMINO | \ + XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_PQUOTINO | \ + XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH) + +/* + * Misc. Flags - warning - these will be cleared by xfs_repair unless + * a feature bit is set when the flag is used. + */ +#define XFS_SBF_NOFLAGS 0x00 /* no flags set */ +#define XFS_SBF_READONLY 0x01 /* only read-only mounts allowed */ + +/* + * define max. shared version we can interoperate with + */ +#define XFS_SB_MAX_SHARED_VN 0 + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_NUM) +int xfs_sb_version_num(xfs_sb_t *sbp); +#define XFS_SB_VERSION_NUM(sbp) xfs_sb_version_num(sbp) +#else +#define XFS_SB_VERSION_NUM(sbp) ((sbp)->sb_versionnum & XFS_SB_VERSION_NUMBITS) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_GOOD_VERSION) +int xfs_sb_good_version(xfs_sb_t *sbp); +#define XFS_SB_GOOD_VERSION(sbp) xfs_sb_good_version(sbp) +#else +#define XFS_SB_GOOD_VERSION_INT(sbp) \ + ((((sbp)->sb_versionnum >= XFS_SB_VERSION_1) && \ + ((sbp)->sb_versionnum <= XFS_SB_VERSION_3)) || \ + ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ + !((sbp)->sb_versionnum & ~XFS_SB_VERSION_OKREALBITS) +#ifdef __KERNEL__ +#define XFS_SB_GOOD_VERSION(sbp) \ + (XFS_SB_GOOD_VERSION_INT(sbp) && \ + (sbp)->sb_shared_vn <= XFS_SB_MAX_SHARED_VN) )) +#else +/* + * extra 2 paren's here (( to unconfuse paren-matching editors + * like vi because XFS_SB_GOOD_VERSION_INT is a partial expression + * and the two XFS_SB_GOOD_VERSION's each 2 more close paren's to + * complete the expression. + */ +#define XFS_SB_GOOD_VERSION(sbp) \ + (XFS_SB_GOOD_VERSION_INT(sbp) && \ + (!((sbp)->sb_versionnum & XFS_SB_VERSION_SHAREDBIT) || \ + (sbp)->sb_shared_vn <= XFS_SB_MAX_SHARED_VN)) )) +#endif /* __KERNEL__ */ +#endif + +#define XFS_SB_GOOD_SASH_VERSION(sbp) \ + ((((sbp)->sb_versionnum >= XFS_SB_VERSION_1) && \ + ((sbp)->sb_versionnum <= XFS_SB_VERSION_3)) || \ + ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ + !((sbp)->sb_versionnum & ~XFS_SB_VERSION_OKSASHBITS))) + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_TONEW) +unsigned xfs_sb_version_tonew(unsigned v); +#define XFS_SB_VERSION_TONEW(v) xfs_sb_version_tonew(v) +#else +#define XFS_SB_VERSION_TONEW(v) \ + ((((v) == XFS_SB_VERSION_1) ? \ + 0 : \ + (((v) == XFS_SB_VERSION_2) ? \ + XFS_SB_VERSION_ATTRBIT : \ + (XFS_SB_VERSION_ATTRBIT | XFS_SB_VERSION_NLINKBIT))) | \ + XFS_SB_VERSION_4) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_TOOLD) +unsigned xfs_sb_version_toold(unsigned v); +#define XFS_SB_VERSION_TOOLD(v) xfs_sb_version_toold(v) +#else +#define XFS_SB_VERSION_TOOLD(v) \ + (((v) & (XFS_SB_VERSION_QUOTABIT | XFS_SB_VERSION_ALIGNBIT)) ? \ + 0 : \ + (((v) & XFS_SB_VERSION_NLINKBIT) ? \ + XFS_SB_VERSION_3 : \ + (((v) & XFS_SB_VERSION_ATTRBIT) ? \ + XFS_SB_VERSION_2 : \ + XFS_SB_VERSION_1))) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_HASATTR) +int xfs_sb_version_hasattr(xfs_sb_t *sbp); +#define XFS_SB_VERSION_HASATTR(sbp) xfs_sb_version_hasattr(sbp) +#else +#define XFS_SB_VERSION_HASATTR(sbp) \ + (((sbp)->sb_versionnum == XFS_SB_VERSION_2) || \ + ((sbp)->sb_versionnum == XFS_SB_VERSION_3) || \ + ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ + ((sbp)->sb_versionnum & XFS_SB_VERSION_ATTRBIT))) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_ADDATTR) +void xfs_sb_version_addattr(xfs_sb_t *sbp); +#define XFS_SB_VERSION_ADDATTR(sbp) xfs_sb_version_addattr(sbp) +#else +#define XFS_SB_VERSION_ADDATTR(sbp) \ + ((sbp)->sb_versionnum = \ + (((sbp)->sb_versionnum == XFS_SB_VERSION_1) ? \ + XFS_SB_VERSION_2 : \ + ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) ? \ + ((sbp)->sb_versionnum | XFS_SB_VERSION_ATTRBIT) : \ + (XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT)))) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_HASNLINK) +int xfs_sb_version_hasnlink(xfs_sb_t *sbp); +#define XFS_SB_VERSION_HASNLINK(sbp) xfs_sb_version_hasnlink(sbp) +#else +#define XFS_SB_VERSION_HASNLINK(sbp) \ + (((sbp)->sb_versionnum == XFS_SB_VERSION_3) || \ + ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ + ((sbp)->sb_versionnum & XFS_SB_VERSION_NLINKBIT))) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_ADDNLINK) +void xfs_sb_version_addnlink(xfs_sb_t *sbp); +#define XFS_SB_VERSION_ADDNLINK(sbp) xfs_sb_version_addnlink(sbp) +#else +#define XFS_SB_VERSION_ADDNLINK(sbp) \ + ((sbp)->sb_versionnum = \ + ((sbp)->sb_versionnum <= XFS_SB_VERSION_2 ? \ + XFS_SB_VERSION_3 : \ + ((sbp)->sb_versionnum | XFS_SB_VERSION_NLINKBIT))) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_HASQUOTA) +int xfs_sb_version_hasquota(xfs_sb_t *sbp); +#define XFS_SB_VERSION_HASQUOTA(sbp) xfs_sb_version_hasquota(sbp) +#else +#define XFS_SB_VERSION_HASQUOTA(sbp) \ + ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ + ((sbp)->sb_versionnum & XFS_SB_VERSION_QUOTABIT)) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_ADDQUOTA) +void xfs_sb_version_addquota(xfs_sb_t *sbp); +#define XFS_SB_VERSION_ADDQUOTA(sbp) xfs_sb_version_addquota(sbp) +#else +#define XFS_SB_VERSION_ADDQUOTA(sbp) \ + ((sbp)->sb_versionnum = \ + (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 ? \ + ((sbp)->sb_versionnum | XFS_SB_VERSION_QUOTABIT) : \ + (XFS_SB_VERSION_TONEW((sbp)->sb_versionnum) | \ + XFS_SB_VERSION_QUOTABIT))) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_HASALIGN) +int xfs_sb_version_hasalign(xfs_sb_t *sbp); +#define XFS_SB_VERSION_HASALIGN(sbp) xfs_sb_version_hasalign(sbp) +#else +#define XFS_SB_VERSION_HASALIGN(sbp) \ + ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ + ((sbp)->sb_versionnum & XFS_SB_VERSION_ALIGNBIT)) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_SUBALIGN) +void xfs_sb_version_subalign(xfs_sb_t *sbp); +#define XFS_SB_VERSION_SUBALIGN(sbp) xfs_sb_version_subalign(sbp) +#else +#define XFS_SB_VERSION_SUBALIGN(sbp) \ + ((sbp)->sb_versionnum = \ + XFS_SB_VERSION_TOOLD((sbp)->sb_versionnum & ~XFS_SB_VERSION_ALIGNBIT)) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_HASDALIGN) +int xfs_sb_version_hasdalign(xfs_sb_t *sbp); +#define XFS_SB_VERSION_HASDALIGN(sbp) xfs_sb_version_hasdalign(sbp) +#else +#define XFS_SB_VERSION_HASDALIGN(sbp) \ + ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ + ((sbp)->sb_versionnum & XFS_SB_VERSION_DALIGNBIT)) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_ADDDALIGN) +int xfs_sb_version_adddalign(xfs_sb_t *sbp); +#define XFS_SB_VERSION_ADDDALIGN(sbp) xfs_sb_version_adddalign(sbp) +#else +#define XFS_SB_VERSION_ADDDALIGN(sbp) \ + ((sbp)->sb_versionnum = \ + ((sbp)->sb_versionnum | XFS_SB_VERSION_DALIGNBIT)) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_HASSHARED) +int xfs_sb_version_hasshared(xfs_sb_t *sbp); +#define XFS_SB_VERSION_HASSHARED(sbp) xfs_sb_version_hasshared(sbp) +#else +#define XFS_SB_VERSION_HASSHARED(sbp) \ + ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ + ((sbp)->sb_versionnum & XFS_SB_VERSION_SHAREDBIT)) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_ADDSHARED) +int xfs_sb_version_addshared(xfs_sb_t *sbp); +#define XFS_SB_VERSION_ADDSHARED(sbp) xfs_sb_version_addshared(sbp) +#else +#define XFS_SB_VERSION_ADDSHARED(sbp) \ + ((sbp)->sb_versionnum = \ + ((sbp)->sb_versionnum | XFS_SB_VERSION_SHAREDBIT)) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_SUBSHARED) +int xfs_sb_version_subshared(xfs_sb_t *sbp); +#define XFS_SB_VERSION_SUBSHARED(sbp) xfs_sb_version_subshared(sbp) +#else +#define XFS_SB_VERSION_SUBSHARED(sbp) \ + ((sbp)->sb_versionnum = \ + ((sbp)->sb_versionnum & ~XFS_SB_VERSION_SHAREDBIT)) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_HASDIRV2) +int xfs_sb_version_hasdirv2(xfs_sb_t *sbp); +#define XFS_SB_VERSION_HASDIRV2(sbp) xfs_sb_version_hasdirv2(sbp) +#else +#define XFS_SB_VERSION_HASDIRV2(sbp) \ + ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ + ((sbp)->sb_versionnum & XFS_SB_VERSION_DIRV2BIT)) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_HASEXTFLGBIT) +int xfs_sb_version_hasextflgbit(xfs_sb_t *sbp); +#define XFS_SB_VERSION_HASEXTFLGBIT(sbp) xfs_sb_version_hasextflgbit(sbp) +#else +#define XFS_SB_VERSION_HASEXTFLGBIT(sbp) \ + ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ + ((sbp)->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT)) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_ADDEXTFLGBIT) +int xfs_sb_version_addextflgbit(xfs_sb_t *sbp); +#define XFS_SB_VERSION_ADDEXTFLGBIT(sbp) xfs_sb_version_addextflgbit(sbp) +#else +#define XFS_SB_VERSION_ADDEXTFLGBIT(sbp) \ + ((sbp)->sb_versionnum = \ + ((sbp)->sb_versionnum | XFS_SB_VERSION_EXTFLGBIT)) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_VERSION_SUBEXTFLGBIT) +int xfs_sb_version_subextflgbit(xfs_sb_t *sbp); +#define XFS_SB_VERSION_SUBEXTFLGBIT(sbp) xfs_sb_version_subextflgbit(sbp) +#else +#define XFS_SB_VERSION_SUBEXTFLGBIT(sbp) \ + ((sbp)->sb_versionnum = \ + ((sbp)->sb_versionnum & ~XFS_SB_VERSION_EXTFLGBIT)) +#endif + +/* + * end of superblock version macros + */ + +#define XFS_SB_DADDR ((xfs_daddr_t)0) /* daddr in filesystem/ag */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_SB_BLOCK) +xfs_agblock_t xfs_sb_block(struct xfs_mount *mp); +#define XFS_SB_BLOCK(mp) xfs_sb_block(mp) +#else +#define XFS_SB_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_SB_DADDR) +#endif + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_HDR_BLOCK) +xfs_agblock_t xfs_hdr_block(struct xfs_mount *mp, xfs_daddr_t d); +#define XFS_HDR_BLOCK(mp,d) xfs_hdr_block(mp,d) +#else +#define XFS_HDR_BLOCK(mp,d) ((xfs_agblock_t)(XFS_BB_TO_FSBT(mp,d))) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_DADDR_TO_FSB) +xfs_fsblock_t xfs_daddr_to_fsb(struct xfs_mount *mp, xfs_daddr_t d); +#define XFS_DADDR_TO_FSB(mp,d) xfs_daddr_to_fsb(mp,d) +#else +#define XFS_DADDR_TO_FSB(mp,d) \ + XFS_AGB_TO_FSB(mp, XFS_DADDR_TO_AGNO(mp,d), XFS_DADDR_TO_AGBNO(mp,d)) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_FSB_TO_DADDR) +xfs_daddr_t xfs_fsb_to_daddr(struct xfs_mount *mp, xfs_fsblock_t fsbno); +#define XFS_FSB_TO_DADDR(mp,fsbno) xfs_fsb_to_daddr(mp,fsbno) +#else +#define XFS_FSB_TO_DADDR(mp,fsbno) \ + XFS_AGB_TO_DADDR(mp, XFS_FSB_TO_AGNO(mp,fsbno), \ + XFS_FSB_TO_AGBNO(mp,fsbno)) +#endif + +/* + * File system block to basic block conversions. + */ +#define XFS_FSB_TO_BB(mp,fsbno) ((fsbno) << (mp)->m_blkbb_log) +#define XFS_BB_TO_FSB(mp,bb) \ + (((bb) + (XFS_FSB_TO_BB(mp,1) - 1)) >> (mp)->m_blkbb_log) +#define XFS_BB_TO_FSBT(mp,bb) ((bb) >> (mp)->m_blkbb_log) +#define XFS_BB_FSB_OFFSET(mp,bb) ((bb) & ((mp)->m_bsize - 1)) + +/* + * File system block to byte conversions. + */ +#define XFS_FSB_TO_B(mp,fsbno) ((xfs_fsize_t)(fsbno) << \ + (mp)->m_sb.sb_blocklog) +#define XFS_B_TO_FSB(mp,b) \ + ((((__uint64_t)(b)) + (mp)->m_blockmask) >> (mp)->m_sb.sb_blocklog) +#define XFS_B_TO_FSBT(mp,b) (((__uint64_t)(b)) >> (mp)->m_sb.sb_blocklog) +#define XFS_B_FSB_OFFSET(mp,b) ((b) & (mp)->m_blockmask) + +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_BUF_TO_SBP) +xfs_sb_t *xfs_buf_to_sbp(struct xfs_buf *bp); +#define XFS_BUF_TO_SBP(bp) xfs_buf_to_sbp(bp) +#else +#define XFS_BUF_TO_SBP(bp) ((xfs_sb_t *)XFS_BUF_PTR(bp)) +#endif + +#endif /* __XFS_SB_H__ */ diff --git a/include/xfs_trans.h b/include/xfs_trans.h new file mode 100644 index 000000000..49fbc0adf --- /dev/null +++ b/include/xfs_trans.h @@ -0,0 +1,1000 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_TRANS_H__ +#define __XFS_TRANS_H__ + +/* + * This is the structure written in the log at the head of + * every transaction. It identifies the type and id of the + * transaction, and contains the number of items logged by + * the transaction so we know how many to expect during recovery. + * + * Do not change the below structure without redoing the code in + * xlog_recover_add_to_trans() and xlog_recover_add_to_cont_trans(). + */ +typedef struct xfs_trans_header { + uint th_magic; /* magic number */ + uint th_type; /* transaction type */ + __int32_t th_tid; /* transaction id (unused) */ + uint th_num_items; /* num items logged by trans */ +} xfs_trans_header_t; + +#define XFS_TRANS_HEADER_MAGIC 0x5452414e /* TRAN */ + +/* + * Log item types. + */ +#define XFS_LI_5_3_BUF 0x1234 /* v1 bufs, 1-block inode buffers */ +#define XFS_LI_5_3_INODE 0x1235 /* 1-block inode buffers */ +#define XFS_LI_EFI 0x1236 +#define XFS_LI_EFD 0x1237 +#define XFS_LI_IUNLINK 0x1238 +#define XFS_LI_6_1_INODE 0x1239 /* 4K non-aligned inode bufs */ +#define XFS_LI_6_1_BUF 0x123a /* v1, 4K inode buffers */ +#define XFS_LI_INODE 0x123b /* aligned ino chunks, var-size ibufs */ +#define XFS_LI_BUF 0x123c /* v2 bufs, variable sized inode bufs */ +#define XFS_LI_DQUOT 0x123d +#define XFS_LI_QUOTAOFF 0x123e +#define XFS_LI_RPC 0x123f /* CXFS RPC return info */ + +/* + * Transaction types. Used to distinguish types of buffers. + */ +#define XFS_TRANS_SETATTR_NOT_SIZE 1 +#define XFS_TRANS_SETATTR_SIZE 2 +#define XFS_TRANS_INACTIVE 3 +#define XFS_TRANS_CREATE 4 +#define XFS_TRANS_CREATE_TRUNC 5 +#define XFS_TRANS_TRUNCATE_FILE 6 +#define XFS_TRANS_REMOVE 7 +#define XFS_TRANS_LINK 8 +#define XFS_TRANS_RENAME 9 +#define XFS_TRANS_MKDIR 10 +#define XFS_TRANS_RMDIR 11 +#define XFS_TRANS_SYMLINK 12 +#define XFS_TRANS_SET_DMATTRS 13 +#define XFS_TRANS_GROWFS 14 +#define XFS_TRANS_STRAT_WRITE 15 +#define XFS_TRANS_DIOSTRAT 16 +#define XFS_TRANS_WRITE_SYNC 17 +#define XFS_TRANS_WRITEID 18 +#define XFS_TRANS_ADDAFORK 19 +#define XFS_TRANS_ATTRINVAL 20 +#define XFS_TRANS_ATRUNCATE 21 +#define XFS_TRANS_ATTR_SET 22 +#define XFS_TRANS_ATTR_RM 23 +#define XFS_TRANS_ATTR_FLAG 24 +#define XFS_TRANS_CLEAR_AGI_BUCKET 25 +#define XFS_TRANS_QM_SBCHANGE 26 +/* + * Dummy entries since we use the transaction type to index into the + * trans_type[] in xlog_recover_print_trans_head() + */ +#define XFS_TRANS_DUMMY1 27 +#define XFS_TRANS_DUMMY2 28 +#define XFS_TRANS_QM_QUOTAOFF 29 +#define XFS_TRANS_QM_DQALLOC 30 +#define XFS_TRANS_QM_SETQLIM 31 +#define XFS_TRANS_QM_DQCLUSTER 32 +#define XFS_TRANS_QM_QINOCREATE 33 +#define XFS_TRANS_QM_QUOTAOFF_END 34 +#define XFS_TRANS_SB_UNIT 35 +#define XFS_TRANS_FSYNC_TS 36 +#define XFS_TRANS_GROWFSRT_ALLOC 37 +#define XFS_TRANS_GROWFSRT_ZERO 38 +#define XFS_TRANS_GROWFSRT_FREE 39 +#define XFS_TRANS_SWAPEXT 40 +/* new transaction types need to be reflected in xfs_logprint(8) */ + + +#ifdef __KERNEL__ +struct xfs_buf; +struct buftarg; +struct xfs_efd_log_item; +struct xfs_efi_log_item; +struct xfs_inode; +struct xfs_item_ops; +struct xfs_log_iovec; +struct xfs_log_item; +struct xfs_log_item_desc; +struct xfs_mount; +struct xfs_trans; +struct xfs_dquot_acct; + +typedef struct xfs_ail_entry { + struct xfs_log_item *ail_forw; /* AIL forw pointer */ + struct xfs_log_item *ail_back; /* AIL back pointer */ +} xfs_ail_entry_t; + +/* + * This structure is passed as a parameter to xfs_trans_push_ail() + * and is used to track the what LSN the waiting processes are + * waiting to become unused. + */ +typedef struct xfs_ail_ticket { + xfs_lsn_t at_lsn; /* lsn waitin for */ + struct xfs_ail_ticket *at_forw; /* wait list ptr */ + struct xfs_ail_ticket *at_back; /* wait list ptr */ + sv_t at_sema; /* wait sema */ +} xfs_ail_ticket_t; + + +typedef struct xfs_log_item { + xfs_ail_entry_t li_ail; /* AIL pointers */ + xfs_lsn_t li_lsn; /* last on-disk lsn */ + struct xfs_log_item_desc *li_desc; /* ptr to current desc*/ + struct xfs_mount *li_mountp; /* ptr to fs mount */ + uint li_type; /* item type */ + uint li_flags; /* misc flags */ + struct xfs_log_item *li_bio_list; /* buffer item list */ + void (*li_cb)(struct xfs_buf *, + struct xfs_log_item *); + /* buffer item iodone */ + /* callback func */ + struct xfs_item_ops *li_ops; /* function list */ +} xfs_log_item_t; + +#define XFS_LI_IN_AIL 0x1 +#define XFS_LI_ABORTED 0x2 + +typedef struct xfs_item_ops { + uint (*iop_size)(xfs_log_item_t *); + void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *); + void (*iop_pin)(xfs_log_item_t *); + void (*iop_unpin)(xfs_log_item_t *); + void (*iop_unpin_remove)(xfs_log_item_t *, struct xfs_trans *); + uint (*iop_trylock)(xfs_log_item_t *); + void (*iop_unlock)(xfs_log_item_t *); + xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t); + void (*iop_push)(xfs_log_item_t *); + void (*iop_abort)(xfs_log_item_t *); + void (*iop_pushbuf)(xfs_log_item_t *); + void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t); +} xfs_item_ops_t; + +#define IOP_SIZE(ip) (*(ip)->li_ops->iop_size)(ip) +#define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp) +#define IOP_PIN(ip) (*(ip)->li_ops->iop_pin)(ip) +#define IOP_UNPIN(ip) (*(ip)->li_ops->iop_unpin)(ip) +#define IOP_UNPIN_REMOVE(ip,tp) (*(ip)->li_ops->iop_unpin_remove)(ip, tp) +#define IOP_TRYLOCK(ip) (*(ip)->li_ops->iop_trylock)(ip) +#define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip) +#define IOP_COMMITTED(ip, lsn) (*(ip)->li_ops->iop_committed)(ip, lsn) +#define IOP_PUSH(ip) (*(ip)->li_ops->iop_push)(ip) +#define IOP_ABORT(ip) (*(ip)->li_ops->iop_abort)(ip) +#define IOP_PUSHBUF(ip) (*(ip)->li_ops->iop_pushbuf)(ip) +#define IOP_COMMITTING(ip, lsn) (*(ip)->li_ops->iop_committing)(ip, lsn) + +/* + * Return values for the IOP_TRYLOCK() routines. + */ +#define XFS_ITEM_SUCCESS 0 +#define XFS_ITEM_PINNED 1 +#define XFS_ITEM_LOCKED 2 +#define XFS_ITEM_FLUSHING 3 +#define XFS_ITEM_PUSHBUF 4 + +#endif /* __KERNEL__ */ + +/* + * This structure is used to track log items associated with + * a transaction. It points to the log item and keeps some + * flags to track the state of the log item. It also tracks + * the amount of space needed to log the item it describes + * once we get to commit processing (see xfs_trans_commit()). + */ +typedef struct xfs_log_item_desc { + xfs_log_item_t *lid_item; + ushort lid_size; + unsigned char lid_flags; + unsigned char lid_index; +} xfs_log_item_desc_t; + +#define XFS_LID_DIRTY 0x1 +#define XFS_LID_PINNED 0x2 +#define XFS_LID_SYNC_UNLOCK 0x4 + +/* + * This structure is used to maintain a chunk list of log_item_desc + * structures. The free field is a bitmask indicating which descriptors + * in this chunk's array are free. The unused field is the first value + * not used since this chunk was allocated. + */ +#define XFS_LIC_NUM_SLOTS 15 +typedef struct xfs_log_item_chunk { + struct xfs_log_item_chunk *lic_next; + ushort lic_free; + ushort lic_unused; + xfs_log_item_desc_t lic_descs[XFS_LIC_NUM_SLOTS]; +} xfs_log_item_chunk_t; + +#define XFS_LIC_MAX_SLOT (XFS_LIC_NUM_SLOTS - 1) +#define XFS_LIC_FREEMASK ((1 << XFS_LIC_NUM_SLOTS) - 1) + + +/* + * Initialize the given chunk. Set the chunk's free descriptor mask + * to indicate that all descriptors are free. The caller gets to set + * lic_unused to the right value (0 matches all free). The + * lic_descs.lid_index values are set up as each desc is allocated. + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_INIT) +void xfs_lic_init(xfs_log_item_chunk_t *cp); +#define XFS_LIC_INIT(cp) xfs_lic_init(cp) +#else +#define XFS_LIC_INIT(cp) ((cp)->lic_free = XFS_LIC_FREEMASK) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_INIT_SLOT) +void xfs_lic_init_slot(xfs_log_item_chunk_t *cp, int slot); +#define XFS_LIC_INIT_SLOT(cp,slot) xfs_lic_init_slot(cp, slot) +#else +#define XFS_LIC_INIT_SLOT(cp,slot) \ + ((cp)->lic_descs[slot].lid_index = (unsigned char)(slot)) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_VACANCY) +int xfs_lic_vacancy(xfs_log_item_chunk_t *cp); +#define XFS_LIC_VACANCY(cp) xfs_lic_vacancy(cp) +#else +#define XFS_LIC_VACANCY(cp) (((cp)->lic_free) & XFS_LIC_FREEMASK) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_ALL_FREE) +void xfs_lic_all_free(xfs_log_item_chunk_t *cp); +#define XFS_LIC_ALL_FREE(cp) xfs_lic_all_free(cp) +#else +#define XFS_LIC_ALL_FREE(cp) ((cp)->lic_free = XFS_LIC_FREEMASK) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_ARE_ALL_FREE) +int xfs_lic_are_all_free(xfs_log_item_chunk_t *cp); +#define XFS_LIC_ARE_ALL_FREE(cp) xfs_lic_are_all_free(cp) +#else +#define XFS_LIC_ARE_ALL_FREE(cp) (((cp)->lic_free & XFS_LIC_FREEMASK) ==\ + XFS_LIC_FREEMASK) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_ISFREE) +int xfs_lic_isfree(xfs_log_item_chunk_t *cp, int slot); +#define XFS_LIC_ISFREE(cp,slot) xfs_lic_isfree(cp,slot) +#else +#define XFS_LIC_ISFREE(cp,slot) ((cp)->lic_free & (1 << (slot))) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_CLAIM) +void xfs_lic_claim(xfs_log_item_chunk_t *cp, int slot); +#define XFS_LIC_CLAIM(cp,slot) xfs_lic_claim(cp,slot) +#else +#define XFS_LIC_CLAIM(cp,slot) ((cp)->lic_free &= ~(1 << (slot))) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_RELSE) +void xfs_lic_relse(xfs_log_item_chunk_t *cp, int slot); +#define XFS_LIC_RELSE(cp,slot) xfs_lic_relse(cp,slot) +#else +#define XFS_LIC_RELSE(cp,slot) ((cp)->lic_free |= 1 << (slot)) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_SLOT) +xfs_log_item_desc_t *xfs_lic_slot(xfs_log_item_chunk_t *cp, int slot); +#define XFS_LIC_SLOT(cp,slot) xfs_lic_slot(cp,slot) +#else +#define XFS_LIC_SLOT(cp,slot) (&((cp)->lic_descs[slot])) +#endif +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_DESC_TO_SLOT) +int xfs_lic_desc_to_slot(xfs_log_item_desc_t *dp); +#define XFS_LIC_DESC_TO_SLOT(dp) xfs_lic_desc_to_slot(dp) +#else +#define XFS_LIC_DESC_TO_SLOT(dp) ((uint)((dp)->lid_index)) +#endif +/* + * Calculate the address of a chunk given a descriptor pointer: + * dp - dp->lid_index give the address of the start of the lic_descs array. + * From this we subtract the offset of the lic_descs field in a chunk. + * All of this yields the address of the chunk, which is + * cast to a chunk pointer. + */ +#if XFS_WANT_FUNCS || (XFS_WANT_SPACE && XFSSO_XFS_LIC_DESC_TO_CHUNK) +xfs_log_item_chunk_t *xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp); +#define XFS_LIC_DESC_TO_CHUNK(dp) xfs_lic_desc_to_chunk(dp) +#else +#define XFS_LIC_DESC_TO_CHUNK(dp) ((xfs_log_item_chunk_t*) \ + (((xfs_caddr_t)((dp) - (dp)->lid_index)) -\ + (xfs_caddr_t)(((xfs_log_item_chunk_t*) \ + 0)->lic_descs))) +#endif + +#ifdef __KERNEL__ +/* + * This is the type of function which can be given to xfs_trans_callback() + * to be called upon the transaction's commit to disk. + */ +typedef void (*xfs_trans_callback_t)(struct xfs_trans *, void *); + +/* + * This is the structure maintained for every active transaction. + */ +typedef struct xfs_trans { + unsigned int t_magic; /* magic number */ + xfs_log_callback_t t_logcb; /* log callback struct */ + struct xfs_trans *t_forw; /* async list pointers */ + struct xfs_trans *t_back; /* async list pointers */ + unsigned int t_type; /* transaction type */ + unsigned int t_log_res; /* amt of log space resvd */ + unsigned int t_log_count; /* count for perm log res */ + unsigned int t_blk_res; /* # of blocks resvd */ + unsigned int t_blk_res_used; /* # of resvd blocks used */ + unsigned int t_rtx_res; /* # of rt extents resvd */ + unsigned int t_rtx_res_used; /* # of resvd rt extents used */ + xfs_log_ticket_t t_ticket; /* log mgr ticket */ + sema_t t_sema; /* sema for commit completion */ + xfs_lsn_t t_lsn; /* log seq num of trans commit*/ + struct xfs_mount *t_mountp; /* ptr to fs mount struct */ + struct xfs_dquot_acct *t_dqinfo; /* accting info for dquots */ + xfs_trans_callback_t t_callback; /* transaction callback */ + void *t_callarg; /* callback arg */ + unsigned int t_flags; /* misc flags */ + long t_icount_delta; /* superblock icount change */ + long t_ifree_delta; /* superblock ifree change */ + long t_fdblocks_delta; /* superblock fdblocks chg */ + long t_res_fdblocks_delta; /* on-disk only chg */ + long t_frextents_delta;/* superblock freextents chg*/ + long t_res_frextents_delta; /* on-disk only chg */ + long t_ag_freeblks_delta; /* debugging counter */ + long t_ag_flist_delta; /* debugging counter */ + long t_ag_btree_delta; /* debugging counter */ + long t_dblocks_delta;/* superblock dblocks change */ + long t_agcount_delta;/* superblock agcount change */ + long t_imaxpct_delta;/* superblock imaxpct change */ + long t_rextsize_delta;/* superblock rextsize chg */ + long t_rbmblocks_delta;/* superblock rbmblocks chg */ + long t_rblocks_delta;/* superblock rblocks change */ + long t_rextents_delta;/* superblocks rextents chg */ + long t_rextslog_delta;/* superblocks rextslog chg */ + unsigned int t_items_free; /* log item descs free */ + xfs_log_item_chunk_t t_items; /* first log item desc chunk */ + xfs_trans_header_t t_header; /* header for in-log trans */ +} xfs_trans_t; + +#endif /* __KERNEL__ */ + + +#define XFS_TRANS_MAGIC 0x5452414E /* 'TRAN' */ +/* + * Values for t_flags. + */ +#define XFS_TRANS_DIRTY 0x01 /* something needs to be logged */ +#define XFS_TRANS_SB_DIRTY 0x02 /* superblock is modified */ +#define XFS_TRANS_PERM_LOG_RES 0x04 /* xact took a permanent log res */ +#define XFS_TRANS_SYNC 0x08 /* make commit synchronous */ +#define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */ +#define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */ + +/* + * Values for call flags parameter. + */ +#define XFS_TRANS_NOSLEEP 0x1 +#define XFS_TRANS_WAIT 0x2 +#define XFS_TRANS_RELEASE_LOG_RES 0x4 +#define XFS_TRANS_ABORT 0x8 + +/* + * Field values for xfs_trans_mod_sb. + */ +#define XFS_TRANS_SB_ICOUNT 0x00000001 +#define XFS_TRANS_SB_IFREE 0x00000002 +#define XFS_TRANS_SB_FDBLOCKS 0x00000004 +#define XFS_TRANS_SB_RES_FDBLOCKS 0x00000008 +#define XFS_TRANS_SB_FREXTENTS 0x00000010 +#define XFS_TRANS_SB_RES_FREXTENTS 0x00000020 +#define XFS_TRANS_SB_DBLOCKS 0x00000040 +#define XFS_TRANS_SB_AGCOUNT 0x00000080 +#define XFS_TRANS_SB_IMAXPCT 0x00000100 +#define XFS_TRANS_SB_REXTSIZE 0x00000200 +#define XFS_TRANS_SB_RBMBLOCKS 0x00000400 +#define XFS_TRANS_SB_RBLOCKS 0x00000800 +#define XFS_TRANS_SB_REXTENTS 0x00001000 +#define XFS_TRANS_SB_REXTSLOG 0x00002000 + + +/* + * Various log reservation values. + * These are based on the size of the file system block + * because that is what most transactions manipulate. + * Each adds in an additional 128 bytes per item logged to + * try to account for the overhead of the transaction mechanism. + * + * Note: + * Most of the reservations underestimate the number of allocation + * groups into which they could free extents in the xfs_bmap_finish() + * call. This is because the number in the worst case is quite high + * and quite unusual. In order to fix this we need to change + * xfs_bmap_finish() to free extents in only a single AG at a time. + * This will require changes to the EFI code as well, however, so that + * the EFI for the extents not freed is logged again in each transaction. + * See bug 261917. + */ + +/* + * Per-extent log reservation for the allocation btree changes + * involved in freeing or allocating an extent. + * 2 trees * (2 blocks/level * max depth - 1) * block size + */ +#define XFS_ALLOCFREE_LOG_RES(mp,nx) \ + ((nx) * (2 * XFS_FSB_TO_B((mp), 2 * XFS_AG_MAXLEVELS(mp) - 1))) +#define XFS_ALLOCFREE_LOG_COUNT(mp,nx) \ + ((nx) * (2 * (2 * XFS_AG_MAXLEVELS(mp) - 1))) + +/* + * Per-directory log reservation for any directory change. + * dir blocks: (1 btree block per level + data block + free block) * dblock size + * bmap btree: (levels + 2) * max depth * block size + * v2 directory blocks can be fragmented below the dirblksize down to the fsb + * size, so account for that in the DAENTER macros. + */ +#define XFS_DIROP_LOG_RES(mp) \ + (XFS_FSB_TO_B(mp, XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK)) + \ + (XFS_FSB_TO_B(mp, XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1))) +#define XFS_DIROP_LOG_COUNT(mp) \ + (XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK) + \ + XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1) + +/* + * In a write transaction we can allocate a maximum of 2 + * extents. This gives: + * the inode getting the new extents: inode size + * the inode\'s bmap btree: max depth * block size + * the agfs of the ags from which the extents are allocated: 2 * sector + * the superblock free block counter: sector size + * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size + * And the bmap_finish transaction can free bmap blocks in a join: + * the agfs of the ags containing the blocks: 2 * sector size + * the agfls of the ags containing the blocks: 2 * sector size + * the super block free block counter: sector size + * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size + */ +#define XFS_CALC_WRITE_LOG_RES(mp) \ + (MAX( \ + ((mp)->m_sb.sb_inodesize + \ + XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + \ + (2 * (mp)->m_sb.sb_sectsize) + \ + (mp)->m_sb.sb_sectsize + \ + XFS_ALLOCFREE_LOG_RES(mp, 2) + \ + (128 * (4 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + XFS_ALLOCFREE_LOG_COUNT(mp, 2)))),\ + ((2 * (mp)->m_sb.sb_sectsize) + \ + (2 * (mp)->m_sb.sb_sectsize) + \ + (mp)->m_sb.sb_sectsize + \ + XFS_ALLOCFREE_LOG_RES(mp, 2) + \ + (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2)))))) + +#define XFS_WRITE_LOG_RES(mp) ((mp)->m_reservations.tr_write) + +/* + * In truncating a file we free up to two extents at once. We can modify: + * the inode being truncated: inode size + * the inode\'s bmap btree: (max depth + 1) * block size + * And the bmap_finish transaction can free the blocks and bmap blocks: + * the agf for each of the ags: 4 * sector size + * the agfl for each of the ags: 4 * sector size + * the super block to reflect the freed blocks: sector size + * worst case split in allocation btrees per extent assuming 4 extents: + * 4 exts * 2 trees * (2 * max depth - 1) * block size + */ +#define XFS_CALC_ITRUNCATE_LOG_RES(mp) \ + (MAX( \ + ((mp)->m_sb.sb_inodesize + \ + XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1) + \ + (128 * (2 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)))), \ + ((4 * (mp)->m_sb.sb_sectsize) + \ + (4 * (mp)->m_sb.sb_sectsize) + \ + (mp)->m_sb.sb_sectsize + \ + XFS_ALLOCFREE_LOG_RES(mp, 4) + \ + (128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4)))))) + +#define XFS_ITRUNCATE_LOG_RES(mp) ((mp)->m_reservations.tr_itruncate) + +/* + * In renaming a files we can modify: + * the four inodes involved: 4 * inode size + * the two directory btrees: 2 * (max depth + v2) * dir block size + * the two directory bmap btrees: 2 * max depth * block size + * And the bmap_finish transaction can free dir and bmap blocks (two sets + * of bmap blocks) giving: + * the agf for the ags in which the blocks live: 3 * sector size + * the agfl for the ags in which the blocks live: 3 * sector size + * the superblock for the free block count: sector size + * the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size + */ +#define XFS_CALC_RENAME_LOG_RES(mp) \ + (MAX( \ + ((4 * (mp)->m_sb.sb_inodesize) + \ + (2 * XFS_DIROP_LOG_RES(mp)) + \ + (128 * (4 + 2 * XFS_DIROP_LOG_COUNT(mp)))), \ + ((3 * (mp)->m_sb.sb_sectsize) + \ + (3 * (mp)->m_sb.sb_sectsize) + \ + (mp)->m_sb.sb_sectsize + \ + XFS_ALLOCFREE_LOG_RES(mp, 3) + \ + (128 * (7 + XFS_ALLOCFREE_LOG_COUNT(mp, 3)))))) + +#define XFS_RENAME_LOG_RES(mp) ((mp)->m_reservations.tr_rename) + +/* + * For creating a link to an inode: + * the parent directory inode: inode size + * the linked inode: inode size + * the directory btree could split: (max depth + v2) * dir block size + * the directory bmap btree could join or split: (max depth + v2) * blocksize + * And the bmap_finish transaction can free some bmap blocks giving: + * the agf for the ag in which the blocks live: sector size + * the agfl for the ag in which the blocks live: sector size + * the superblock for the free block count: sector size + * the allocation btrees: 2 trees * (2 * max depth - 1) * block size + */ +#define XFS_CALC_LINK_LOG_RES(mp) \ + (MAX( \ + ((mp)->m_sb.sb_inodesize + \ + (mp)->m_sb.sb_inodesize + \ + XFS_DIROP_LOG_RES(mp) + \ + (128 * (2 + XFS_DIROP_LOG_COUNT(mp)))), \ + ((mp)->m_sb.sb_sectsize + \ + (mp)->m_sb.sb_sectsize + \ + (mp)->m_sb.sb_sectsize + \ + XFS_ALLOCFREE_LOG_RES(mp, 1) + \ + (128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1)))))) + +#define XFS_LINK_LOG_RES(mp) ((mp)->m_reservations.tr_link) + +/* + * For removing a directory entry we can modify: + * the parent directory inode: inode size + * the removed inode: inode size + * the directory btree could join: (max depth + v2) * dir block size + * the directory bmap btree could join or split: (max depth + v2) * blocksize + * And the bmap_finish transaction can free the dir and bmap blocks giving: + * the agf for the ag in which the blocks live: 2 * sector size + * the agfl for the ag in which the blocks live: 2 * sector size + * the superblock for the free block count: sector size + * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size + */ +#define XFS_CALC_REMOVE_LOG_RES(mp) \ + (MAX( \ + ((mp)->m_sb.sb_inodesize + \ + (mp)->m_sb.sb_inodesize + \ + XFS_DIROP_LOG_RES(mp) + \ + (128 * (2 + XFS_DIROP_LOG_COUNT(mp)))), \ + ((2 * (mp)->m_sb.sb_sectsize) + \ + (2 * (mp)->m_sb.sb_sectsize) + \ + (mp)->m_sb.sb_sectsize + \ + XFS_ALLOCFREE_LOG_RES(mp, 2) + \ + (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2)))))) + +#define XFS_REMOVE_LOG_RES(mp) ((mp)->m_reservations.tr_remove) + +/* + * For symlink we can modify: + * the parent directory inode: inode size + * the new inode: inode size + * the inode btree entry: 1 block + * the directory btree: (max depth + v2) * dir block size + * the directory inode\'s bmap btree: (max depth + v2) * block size + * the blocks for the symlink: 1 KB + * Or in the first xact we allocate some inodes giving: + * the agi and agf of the ag getting the new inodes: 2 * sectorsize + * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize + * the inode btree: max depth * blocksize + * the allocation btrees: 2 trees * (2 * max depth - 1) * block size + */ +#define XFS_CALC_SYMLINK_LOG_RES(mp) \ + (MAX( \ + ((mp)->m_sb.sb_inodesize + \ + (mp)->m_sb.sb_inodesize + \ + XFS_FSB_TO_B(mp, 1) + \ + XFS_DIROP_LOG_RES(mp) + \ + 1024 + \ + (128 * (4 + XFS_DIROP_LOG_COUNT(mp)))), \ + (2 * (mp)->m_sb.sb_sectsize + \ + XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \ + XFS_FSB_TO_B((mp), XFS_IN_MAXLEVELS(mp)) + \ + XFS_ALLOCFREE_LOG_RES(mp, 1) + \ + (128 * (2 + XFS_IALLOC_BLOCKS(mp) + XFS_IN_MAXLEVELS(mp) + \ + XFS_ALLOCFREE_LOG_COUNT(mp, 1)))))) + +#define XFS_SYMLINK_LOG_RES(mp) ((mp)->m_reservations.tr_symlink) + +/* + * For create we can modify: + * the parent directory inode: inode size + * the new inode: inode size + * the inode btree entry: block size + * the superblock for the nlink flag: sector size + * the directory btree: (max depth + v2) * dir block size + * the directory inode\'s bmap btree: (max depth + v2) * block size + * Or in the first xact we allocate some inodes giving: + * the agi and agf of the ag getting the new inodes: 2 * sectorsize + * the superblock for the nlink flag: sector size + * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize + * the inode btree: max depth * blocksize + * the allocation btrees: 2 trees * (max depth - 1) * block size + */ +#define XFS_CALC_CREATE_LOG_RES(mp) \ + (MAX( \ + ((mp)->m_sb.sb_inodesize + \ + (mp)->m_sb.sb_inodesize + \ + (mp)->m_sb.sb_sectsize + \ + XFS_FSB_TO_B(mp, 1) + \ + XFS_DIROP_LOG_RES(mp) + \ + (128 * (3 + XFS_DIROP_LOG_COUNT(mp)))), \ + (3 * (mp)->m_sb.sb_sectsize + \ + XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \ + XFS_FSB_TO_B((mp), XFS_IN_MAXLEVELS(mp)) + \ + XFS_ALLOCFREE_LOG_RES(mp, 1) + \ + (128 * (2 + XFS_IALLOC_BLOCKS(mp) + XFS_IN_MAXLEVELS(mp) + \ + XFS_ALLOCFREE_LOG_COUNT(mp, 1)))))) + +#define XFS_CREATE_LOG_RES(mp) ((mp)->m_reservations.tr_create) + +/* + * Making a new directory is the same as creating a new file. + */ +#define XFS_CALC_MKDIR_LOG_RES(mp) XFS_CALC_CREATE_LOG_RES(mp) + +#define XFS_MKDIR_LOG_RES(mp) ((mp)->m_reservations.tr_mkdir) + +/* + * In freeing an inode we can modify: + * the inode being freed: inode size + * the super block free inode counter: sector size + * the agi hash list and counters: sector size + * the inode btree entry: block size + * the on disk inode before ours in the agi hash list: inode cluster size + */ +#define XFS_CALC_IFREE_LOG_RES(mp) \ + ((mp)->m_sb.sb_inodesize + \ + (mp)->m_sb.sb_sectsize + \ + (mp)->m_sb.sb_sectsize + \ + XFS_FSB_TO_B((mp), 1) + \ + MAX(XFS_FSB_TO_B((mp), 1), XFS_INODE_CLUSTER_SIZE(mp)) + \ + (128 * 5)) + +#define XFS_IFREE_LOG_RES(mp) ((mp)->m_reservations.tr_ifree) + +/* + * When only changing the inode we log the inode and possibly the superblock + * We also add a bit of slop for the transaction stuff. + */ +#define XFS_CALC_ICHANGE_LOG_RES(mp) ((mp)->m_sb.sb_inodesize + \ + (mp)->m_sb.sb_sectsize + 512) + +#define XFS_ICHANGE_LOG_RES(mp) ((mp)->m_reservations.tr_ichange) + +/* + * Growing the data section of the filesystem. + * superblock + * agi and agf + * allocation btrees + */ +#define XFS_CALC_GROWDATA_LOG_RES(mp) \ + ((mp)->m_sb.sb_sectsize * 3 + \ + XFS_ALLOCFREE_LOG_RES(mp, 1) + \ + (128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1)))) + +#define XFS_GROWDATA_LOG_RES(mp) ((mp)->m_reservations.tr_growdata) + +/* + * Growing the rt section of the filesystem. + * In the first set of transactions (ALLOC) we allocate space to the + * bitmap or summary files. + * superblock: sector size + * agf of the ag from which the extent is allocated: sector size + * bmap btree for bitmap/summary inode: max depth * blocksize + * bitmap/summary inode: inode size + * allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize + */ +#define XFS_CALC_GROWRTALLOC_LOG_RES(mp) \ + (2 * (mp)->m_sb.sb_sectsize + \ + XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + \ + (mp)->m_sb.sb_inodesize + \ + XFS_ALLOCFREE_LOG_RES(mp, 1) + \ + (128 * \ + (3 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + \ + XFS_ALLOCFREE_LOG_COUNT(mp, 1)))) + +#define XFS_GROWRTALLOC_LOG_RES(mp) ((mp)->m_reservations.tr_growrtalloc) + +/* + * Growing the rt section of the filesystem. + * In the second set of transactions (ZERO) we zero the new metadata blocks. + * one bitmap/summary block: blocksize + */ +#define XFS_CALC_GROWRTZERO_LOG_RES(mp) \ + ((mp)->m_sb.sb_blocksize + 128) + +#define XFS_GROWRTZERO_LOG_RES(mp) ((mp)->m_reservations.tr_growrtzero) + +/* + * Growing the rt section of the filesystem. + * In the third set of transactions (FREE) we update metadata without + * allocating any new blocks. + * superblock: sector size + * bitmap inode: inode size + * summary inode: inode size + * one bitmap block: blocksize + * summary blocks: new summary size + */ +#define XFS_CALC_GROWRTFREE_LOG_RES(mp) \ + ((mp)->m_sb.sb_sectsize + \ + 2 * (mp)->m_sb.sb_inodesize + \ + (mp)->m_sb.sb_blocksize + \ + (mp)->m_rsumsize + \ + (128 * 5)) + +#define XFS_GROWRTFREE_LOG_RES(mp) ((mp)->m_reservations.tr_growrtfree) + +/* + * Logging the inode modification timestamp on a synchronous write. + * inode + */ +#define XFS_CALC_SWRITE_LOG_RES(mp) \ + ((mp)->m_sb.sb_inodesize + 128) + +#define XFS_SWRITE_LOG_RES(mp) ((mp)->m_reservations.tr_swrite) + +/* + * Logging the inode timestamps on an fsync -- same as SWRITE + * as long as SWRITE logs the entire inode core + */ +#define XFS_FSYNC_TS_LOG_RES(mp) ((mp)->m_reservations.tr_swrite) + +/* + * Logging the inode mode bits when writing a setuid/setgid file + * inode + */ +#define XFS_CALC_WRITEID_LOG_RES(mp) \ + ((mp)->m_sb.sb_inodesize + 128) + +#define XFS_WRITEID_LOG_RES(mp) ((mp)->m_reservations.tr_swrite) + +/* + * Converting the inode from non-attributed to attributed. + * the inode being converted: inode size + * agf block and superblock (for block allocation) + * the new block (directory sized) + * bmap blocks for the new directory block + * allocation btrees + */ +#define XFS_CALC_ADDAFORK_LOG_RES(mp) \ + ((mp)->m_sb.sb_inodesize + \ + (mp)->m_sb.sb_sectsize * 2 + \ + (mp)->m_dirblksize + \ + (XFS_DIR_IS_V1(mp) ? 0 : \ + XFS_FSB_TO_B(mp, (XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1))) + \ + XFS_ALLOCFREE_LOG_RES(mp, 1) + \ + (128 * (4 + \ + (XFS_DIR_IS_V1(mp) ? 0 : \ + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) + \ + XFS_ALLOCFREE_LOG_COUNT(mp, 1)))) + +#define XFS_ADDAFORK_LOG_RES(mp) ((mp)->m_reservations.tr_addafork) + +/* + * Removing the attribute fork of a file + * the inode being truncated: inode size + * the inode\'s bmap btree: max depth * block size + * And the bmap_finish transaction can free the blocks and bmap blocks: + * the agf for each of the ags: 4 * sector size + * the agfl for each of the ags: 4 * sector size + * the super block to reflect the freed blocks: sector size + * worst case split in allocation btrees per extent assuming 4 extents: + * 4 exts * 2 trees * (2 * max depth - 1) * block size + */ +#define XFS_CALC_ATTRINVAL_LOG_RES(mp) \ + (MAX( \ + ((mp)->m_sb.sb_inodesize + \ + XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + \ + (128 * (1 + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)))), \ + ((4 * (mp)->m_sb.sb_sectsize) + \ + (4 * (mp)->m_sb.sb_sectsize) + \ + (mp)->m_sb.sb_sectsize + \ + XFS_ALLOCFREE_LOG_RES(mp, 4) + \ + (128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4)))))) + +#define XFS_ATTRINVAL_LOG_RES(mp) ((mp)->m_reservations.tr_attrinval) + +/* + * Setting an attribute. + * the inode getting the attribute + * the superblock for allocations + * the agfs extents are allocated from + * the attribute btree * max depth + * the inode allocation btree + * Since attribute transaction space is dependent on the size of the attribute, + * the calculation is done partially at mount time and partially at runtime. + */ +#define XFS_CALC_ATTRSET_LOG_RES(mp) \ + ((mp)->m_sb.sb_inodesize + \ + (mp)->m_sb.sb_sectsize + \ + XFS_FSB_TO_B((mp), XFS_DA_NODE_MAXDEPTH) + \ + (128 * (2 + XFS_DA_NODE_MAXDEPTH))) + +#define XFS_ATTRSET_LOG_RES(mp, ext) \ + ((mp)->m_reservations.tr_attrset + \ + (ext * (mp)->m_sb.sb_sectsize) + \ + (ext * XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))) + \ + (128 * (ext + (ext * XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))))) + +/* + * Removing an attribute. + * the inode: inode size + * the attribute btree could join: max depth * block size + * the inode bmap btree could join or split: max depth * block size + * And the bmap_finish transaction can free the attr blocks freed giving: + * the agf for the ag in which the blocks live: 2 * sector size + * the agfl for the ag in which the blocks live: 2 * sector size + * the superblock for the free block count: sector size + * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size + */ +#define XFS_CALC_ATTRRM_LOG_RES(mp) \ + (MAX( \ + ((mp)->m_sb.sb_inodesize + \ + XFS_FSB_TO_B((mp), XFS_DA_NODE_MAXDEPTH) + \ + XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + \ + (128 * (1 + XFS_DA_NODE_MAXDEPTH + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)))), \ + ((2 * (mp)->m_sb.sb_sectsize) + \ + (2 * (mp)->m_sb.sb_sectsize) + \ + (mp)->m_sb.sb_sectsize + \ + XFS_ALLOCFREE_LOG_RES(mp, 2) + \ + (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2)))))) + +#define XFS_ATTRRM_LOG_RES(mp) ((mp)->m_reservations.tr_attrrm) + +/* + * Clearing a bad agino number in an agi hash bucket. + */ +#define XFS_CALC_CLEAR_AGI_BUCKET_LOG_RES(mp) \ + ((mp)->m_sb.sb_sectsize + 128) + +#define XFS_CLEAR_AGI_BUCKET_LOG_RES(mp) ((mp)->m_reservations.tr_clearagi) + + +/* + * Various log count values. + */ +#define XFS_DEFAULT_LOG_COUNT 1 +#define XFS_DEFAULT_PERM_LOG_COUNT 2 +#define XFS_ITRUNCATE_LOG_COUNT 2 +#define XFS_CREATE_LOG_COUNT 2 +#define XFS_MKDIR_LOG_COUNT 3 +#define XFS_SYMLINK_LOG_COUNT 3 +#define XFS_REMOVE_LOG_COUNT 2 +#define XFS_LINK_LOG_COUNT 2 +#define XFS_RENAME_LOG_COUNT 2 +#define XFS_WRITE_LOG_COUNT 2 +#define XFS_ADDAFORK_LOG_COUNT 2 +#define XFS_ATTRINVAL_LOG_COUNT 1 +#define XFS_ATTRSET_LOG_COUNT 3 +#define XFS_ATTRRM_LOG_COUNT 3 + +/* + * Here we centralize the specification of XFS meta-data buffer + * reference count values. This determine how hard the buffer + * cache tries to hold onto the buffer. + */ +#define XFS_AGF_REF 4 +#define XFS_AGI_REF 4 +#define XFS_AGFL_REF 3 +#define XFS_INO_BTREE_REF 3 +#define XFS_ALLOC_BTREE_REF 2 +#define XFS_BMAP_BTREE_REF 2 +#define XFS_DIR_BTREE_REF 2 +#define XFS_ATTR_BTREE_REF 1 +#define XFS_INO_REF 1 +#define XFS_DQUOT_REF 1 + +#ifdef __KERNEL__ +/* + * XFS transaction mechanism exported interfaces that are + * actually macros. + */ +#define xfs_trans_get_log_res(tp) ((tp)->t_log_res) +#define xfs_trans_get_log_count(tp) ((tp)->t_log_count) +#define xfs_trans_get_block_res(tp) ((tp)->t_blk_res) +#define xfs_trans_set_sync(tp) ((tp)->t_flags |= XFS_TRANS_SYNC) + +#ifdef DEBUG +#define xfs_trans_agblocks_delta(tp, d) ((tp)->t_ag_freeblks_delta += (long)d) +#define xfs_trans_agflist_delta(tp, d) ((tp)->t_ag_flist_delta += (long)d) +#define xfs_trans_agbtree_delta(tp, d) ((tp)->t_ag_btree_delta += (long)d) +#else +#define xfs_trans_agblocks_delta(tp, d) +#define xfs_trans_agflist_delta(tp, d) +#define xfs_trans_agbtree_delta(tp, d) +#endif + +/* + * XFS transaction mechanism exported interfaces. + */ +void xfs_trans_init(struct xfs_mount *); +xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint); +xfs_trans_t *xfs_trans_dup(xfs_trans_t *); +int xfs_trans_reserve(xfs_trans_t *, uint, uint, uint, + uint, uint); +void xfs_trans_callback(xfs_trans_t *, + void (*)(xfs_trans_t *, void *), void *); +void xfs_trans_mod_sb(xfs_trans_t *, uint, long); +struct xfs_buf *xfs_trans_get_buf(xfs_trans_t *, struct buftarg *, xfs_daddr_t, + int, uint); +int xfs_trans_read_buf(struct xfs_mount *, xfs_trans_t *, + struct buftarg *, xfs_daddr_t, int, uint, + struct xfs_buf **); +struct xfs_buf *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *, int); + +void xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *); +void xfs_trans_bjoin(xfs_trans_t *, struct xfs_buf *); +void xfs_trans_bhold(xfs_trans_t *, struct xfs_buf *); +void xfs_trans_bhold_until_committed(xfs_trans_t *, struct xfs_buf *); +void xfs_trans_binval(xfs_trans_t *, struct xfs_buf *); +void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *); +void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint); +void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *); +int xfs_trans_iget(struct xfs_mount *, xfs_trans_t *, + xfs_ino_t , uint, struct xfs_inode **); +void xfs_trans_iput(xfs_trans_t *, struct xfs_inode *, uint); +void xfs_trans_ijoin(xfs_trans_t *, struct xfs_inode *, uint); +void xfs_trans_ihold(xfs_trans_t *, struct xfs_inode *); +void xfs_trans_ihold_release(xfs_trans_t *, struct xfs_inode *); +void xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint); +void xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint); +struct xfs_efi_log_item *xfs_trans_get_efi(xfs_trans_t *, uint); +void xfs_efi_release(struct xfs_efi_log_item *, uint); +void xfs_trans_log_efi_extent(xfs_trans_t *, + struct xfs_efi_log_item *, + xfs_fsblock_t, + xfs_extlen_t); +struct xfs_efd_log_item *xfs_trans_get_efd(xfs_trans_t *, + struct xfs_efi_log_item *, + uint); +void xfs_trans_log_efd_extent(xfs_trans_t *, + struct xfs_efd_log_item *, + xfs_fsblock_t, + xfs_extlen_t); +void xfs_trans_log_create_rpc(xfs_trans_t *, int, xfs_ino_t); +void xfs_trans_log_setattr_rpc(xfs_trans_t *, int); +int xfs_trans_commit(xfs_trans_t *, uint flags, xfs_lsn_t *); +void xfs_trans_commit_async(struct xfs_mount *); +void xfs_trans_cancel(xfs_trans_t *, int); +void xfs_trans_ail_init(struct xfs_mount *); +xfs_lsn_t xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t); +xfs_lsn_t xfs_trans_tail_ail(struct xfs_mount *); +void xfs_trans_unlocked_item(struct xfs_mount *, + xfs_log_item_t *); + +/* + * Not necessarily exported, but used outside a single file. + */ +int xfs_trans_lsn_danger(struct xfs_mount *, xfs_lsn_t); + +#endif /* __KERNEL__ */ + +#endif /* __XFS_TRANS_H__ */ diff --git a/include/xfs_trans_space.h b/include/xfs_trans_space.h new file mode 100644 index 000000000..c377a4476 --- /dev/null +++ b/include/xfs_trans_space.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_TRANS_SPACE_H__ +#define __XFS_TRANS_SPACE_H__ + +/* + * Components of space reservations. + */ +#define XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) \ + (((mp)->m_alloc_mxr[0]) - ((mp)->m_alloc_mnr[0])) +#define XFS_EXTENTADD_SPACE_RES(mp,w) (XFS_BM_MAXLEVELS(mp,w) - 1) +#define XFS_NEXTENTADD_SPACE_RES(mp,b,w)\ + (((b + XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) - 1) / \ + XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp)) * \ + XFS_EXTENTADD_SPACE_RES(mp,w)) +#define XFS_DAENTER_1B(mp,w) ((w) == XFS_DATA_FORK ? (mp)->m_dirblkfsbs : 1) +#define XFS_DAENTER_DBS(mp,w) \ + (XFS_DA_NODE_MAXDEPTH + \ + ((XFS_DIR_IS_V2(mp) && (w) == XFS_DATA_FORK) ? 2 : 0)) +#define XFS_DAENTER_BLOCKS(mp,w) \ + (XFS_DAENTER_1B(mp,w) * XFS_DAENTER_DBS(mp,w)) +#define XFS_DAENTER_BMAP1B(mp,w) \ + XFS_NEXTENTADD_SPACE_RES(mp, XFS_DAENTER_1B(mp, w), w) +#define XFS_DAENTER_BMAPS(mp,w) \ + (XFS_DAENTER_DBS(mp,w) * XFS_DAENTER_BMAP1B(mp,w)) +#define XFS_DAENTER_SPACE_RES(mp,w) \ + (XFS_DAENTER_BLOCKS(mp,w) + XFS_DAENTER_BMAPS(mp,w)) +#define XFS_DAREMOVE_SPACE_RES(mp,w) XFS_DAENTER_BMAPS(mp,w) +#define XFS_DIRENTER_MAX_SPLIT(mp,nl) \ + (((mp)->m_sb.sb_blocksize == 512 && \ + XFS_DIR_IS_V1(mp) && \ + (nl) >= XFS_DIR_LEAF_CAN_DOUBLE_SPLIT_LEN) ? 2 : 1) +#define XFS_DIRENTER_SPACE_RES(mp,nl) \ + (XFS_DAENTER_SPACE_RES(mp, XFS_DATA_FORK) * \ + XFS_DIRENTER_MAX_SPLIT(mp,nl)) +#define XFS_DIRREMOVE_SPACE_RES(mp) \ + XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK) +#define XFS_IALLOC_SPACE_RES(mp) \ + (XFS_IALLOC_BLOCKS(mp) + XFS_IN_MAXLEVELS(mp)-1) + +/* + * Space reservation values for various transactions. + */ +#define XFS_ADDAFORK_SPACE_RES(mp) \ + ((mp)->m_dirblkfsbs + \ + (XFS_DIR_IS_V1(mp) ? 0 : XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK))) +#define XFS_ATTRRM_SPACE_RES(mp) \ + XFS_DAREMOVE_SPACE_RES(mp, XFS_ATTR_FORK) +/* This macro is not used - see inline code in xfs_attr_set */ +#define XFS_ATTRSET_SPACE_RES(mp, v) \ + (XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK) + XFS_B_TO_FSB(mp, v)) +#define XFS_CREATE_SPACE_RES(mp,nl) \ + (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl)) +#define XFS_DIOSTRAT_SPACE_RES(mp, v) \ + (XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK) + (v)) +#define XFS_GROWFS_SPACE_RES(mp) \ + (2 * XFS_AG_MAXLEVELS(mp)) +#define XFS_GROWFSRT_SPACE_RES(mp,b) \ + ((b) + XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK)) +#define XFS_LINK_SPACE_RES(mp,nl) \ + XFS_DIRENTER_SPACE_RES(mp,nl) +#define XFS_MKDIR_SPACE_RES(mp,nl) \ + (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl)) +#define XFS_QM_DQALLOC_SPACE_RES(mp) \ + (XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK) + \ + XFS_DQUOT_CLUSTER_SIZE_FSB) +#define XFS_QM_QINOCREATE_SPACE_RES(mp) \ + XFS_IALLOC_SPACE_RES(mp) +#define XFS_REMOVE_SPACE_RES(mp) \ + XFS_DIRREMOVE_SPACE_RES(mp) +#define XFS_RENAME_SPACE_RES(mp,nl) \ + (XFS_DIRREMOVE_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl)) +#define XFS_SYMLINK_SPACE_RES(mp,nl,b) \ + (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl) + (b)) + +#endif /* __XFS_TRANS_SPACE_H__ */ diff --git a/include/xfs_types.h b/include/xfs_types.h new file mode 100644 index 000000000..83d07197a --- /dev/null +++ b/include/xfs_types.h @@ -0,0 +1,303 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_TYPES_H__ +#define __XFS_TYPES_H__ + +/* + * Some types are conditional based on the selected configuration. + * Set XFS_BIG_FILES=1 or 0 and XFS_BIG_FILESYSTEMS=1 or 0 depending + * on the desired configuration. + * XFS_BIG_FILES needs pgno_t to be 64 bits (64-bit kernels). + * XFS_BIG_FILESYSTEMS needs daddr_t to be 64 bits (N32 and 64-bit kernels). + * + * Expect these to be set from klocaldefs, or from the machine-type + * defs files for the normal case. + */ + +#define XFS_BIG_FILES 1 +#define XFS_BIG_FILESYSTEMS 1 + +typedef __uint32_t xfs_agblock_t; /* blockno in alloc. group */ +typedef __uint32_t xfs_extlen_t; /* extent length in blocks */ +typedef __uint32_t xfs_agnumber_t; /* allocation group number */ +typedef __int32_t xfs_extnum_t; /* # of extents in a file */ +typedef __int16_t xfs_aextnum_t; /* # extents in an attribute fork */ +typedef __int64_t xfs_fsize_t; /* bytes in a file */ +typedef __uint64_t xfs_ufsize_t; /* unsigned bytes in a file */ + +typedef __int32_t xfs_suminfo_t; /* type of bitmap summary info */ +typedef __int32_t xfs_rtword_t; /* word type for bitmap manipulations */ + +typedef __int64_t xfs_lsn_t; /* log sequence number */ +typedef __int32_t xfs_tid_t; /* transaction identifier */ + +typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */ +typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */ + +typedef __uint16_t xfs_prid_t; /* prid_t truncated to 16bits in XFS */ + +/* + * These types are 64 bits on disk but are either 32 or 64 bits in memory. + * Disk based types: + */ +typedef __uint64_t xfs_dfsbno_t; /* blockno in filesystem (agno|agbno) */ +typedef __uint64_t xfs_drfsbno_t; /* blockno in filesystem (raw) */ +typedef __uint64_t xfs_drtbno_t; /* extent (block) in realtime area */ +typedef __uint64_t xfs_dfiloff_t; /* block number in a file */ +typedef __uint64_t xfs_dfilblks_t; /* number of blocks in a file */ + +/* + * Memory based types are conditional. + */ +#if XFS_BIG_FILESYSTEMS +typedef __uint64_t xfs_fsblock_t; /* blockno in filesystem (agno|agbno) */ +typedef __uint64_t xfs_rfsblock_t; /* blockno in filesystem (raw) */ +typedef __uint64_t xfs_rtblock_t; /* extent (block) in realtime area */ +typedef __int64_t xfs_srtblock_t; /* signed version of xfs_rtblock_t */ +#else +typedef __uint32_t xfs_fsblock_t; /* blockno in filesystem (agno|agbno) */ +typedef __uint32_t xfs_rfsblock_t; /* blockno in filesystem (raw) */ +typedef __uint32_t xfs_rtblock_t; /* extent (block) in realtime area */ +typedef __int32_t xfs_srtblock_t; /* signed version of xfs_rtblock_t */ +#endif +#if XFS_BIG_FILES +typedef __uint64_t xfs_fileoff_t; /* block number in a file */ +typedef __int64_t xfs_sfiloff_t; /* signed block number in a file */ +typedef __uint64_t xfs_filblks_t; /* number of blocks in a file */ +#else +typedef __uint32_t xfs_fileoff_t; /* block number in a file */ +typedef __int32_t xfs_sfiloff_t; /* signed block number in a file */ +typedef __uint32_t xfs_filblks_t; /* number of blocks in a file */ +#endif + +typedef __uint8_t xfs_arch_t; /* architecutre of an xfs fs */ + +/* + * Null values for the types. + */ +#define NULLDFSBNO ((xfs_dfsbno_t)-1) +#define NULLDRFSBNO ((xfs_drfsbno_t)-1) +#define NULLDRTBNO ((xfs_drtbno_t)-1) +#define NULLDFILOFF ((xfs_dfiloff_t)-1) + +#define NULLFSBLOCK ((xfs_fsblock_t)-1) +#define NULLRFSBLOCK ((xfs_rfsblock_t)-1) +#define NULLRTBLOCK ((xfs_rtblock_t)-1) +#define NULLFILEOFF ((xfs_fileoff_t)-1) + +#define NULLAGBLOCK ((xfs_agblock_t)-1) +#define NULLAGNUMBER ((xfs_agnumber_t)-1) +#define NULLEXTNUM ((xfs_extnum_t)-1) + +#define NULLCOMMITLSN ((xfs_lsn_t)-1) + +/* + * Max values for extlen, extnum, aextnum. + */ +#define MAXEXTLEN ((xfs_extlen_t)0x001fffff) /* 21 bits */ +#define MAXEXTNUM ((xfs_extnum_t)0x7fffffff) /* signed int */ +#define MAXAEXTNUM ((xfs_aextnum_t)0x7fff) /* signed short */ + +/* + * MAXNAMELEN is the length (including the terminating null) of + * the longest permissible file (component) name. + */ +#define MAXNAMELEN 256 + +typedef enum { + XFS_LOOKUP_EQi, XFS_LOOKUP_LEi, XFS_LOOKUP_GEi +} xfs_lookup_t; + +typedef enum { + XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_BMAPi, XFS_BTNUM_INOi, + XFS_BTNUM_MAX +} xfs_btnum_t; + + +#ifdef CONFIG_PROC_FS +/* + * XFS global statistics + */ +struct xfsstats { +# define XFSSTAT_END_EXTENT_ALLOC 4 + __uint32_t xs_allocx; + __uint32_t xs_allocb; + __uint32_t xs_freex; + __uint32_t xs_freeb; +# define XFSSTAT_END_ALLOC_BTREE (XFSSTAT_END_EXTENT_ALLOC+4) + __uint32_t xs_abt_lookup; + __uint32_t xs_abt_compare; + __uint32_t xs_abt_insrec; + __uint32_t xs_abt_delrec; +# define XFSSTAT_END_BLOCK_MAPPING (XFSSTAT_END_ALLOC_BTREE+7) + __uint32_t xs_blk_mapr; + __uint32_t xs_blk_mapw; + __uint32_t xs_blk_unmap; + __uint32_t xs_add_exlist; + __uint32_t xs_del_exlist; + __uint32_t xs_look_exlist; + __uint32_t xs_cmp_exlist; +# define XFSSTAT_END_BLOCK_MAP_BTREE (XFSSTAT_END_BLOCK_MAPPING+4) + __uint32_t xs_bmbt_lookup; + __uint32_t xs_bmbt_compare; + __uint32_t xs_bmbt_insrec; + __uint32_t xs_bmbt_delrec; +# define XFSSTAT_END_DIRECTORY_OPS (XFSSTAT_END_BLOCK_MAP_BTREE+4) + __uint32_t xs_dir_lookup; + __uint32_t xs_dir_create; + __uint32_t xs_dir_remove; + __uint32_t xs_dir_getdents; +# define XFSSTAT_END_TRANSACTIONS (XFSSTAT_END_DIRECTORY_OPS+3) + __uint32_t xs_trans_sync; + __uint32_t xs_trans_async; + __uint32_t xs_trans_empty; +# define XFSSTAT_END_INODE_OPS (XFSSTAT_END_TRANSACTIONS+7) + __uint32_t xs_ig_attempts; + __uint32_t xs_ig_found; + __uint32_t xs_ig_frecycle; + __uint32_t xs_ig_missed; + __uint32_t xs_ig_dup; + __uint32_t xs_ig_reclaims; + __uint32_t xs_ig_attrchg; +# define XFSSTAT_END_LOG_OPS (XFSSTAT_END_INODE_OPS+5) + __uint32_t xs_log_writes; + __uint32_t xs_log_blocks; + __uint32_t xs_log_noiclogs; + __uint32_t xs_log_force; + __uint32_t xs_log_force_sleep; +# define XFSSTAT_END_TAIL_PUSHING (XFSSTAT_END_LOG_OPS+10) + __uint32_t xs_try_logspace; + __uint32_t xs_sleep_logspace; + __uint32_t xs_push_ail; + __uint32_t xs_push_ail_success; + __uint32_t xs_push_ail_pushbuf; + __uint32_t xs_push_ail_pinned; + __uint32_t xs_push_ail_locked; + __uint32_t xs_push_ail_flushing; + __uint32_t xs_push_ail_restarts; + __uint32_t xs_push_ail_flush; +# define XFSSTAT_END_WRITE_CONVERT (XFSSTAT_END_TAIL_PUSHING+2) + __uint32_t xs_xstrat_quick; + __uint32_t xs_xstrat_split; +# define XFSSTAT_END_READ_WRITE_OPS (XFSSTAT_END_WRITE_CONVERT+2) + __uint32_t xs_write_calls; + __uint32_t xs_read_calls; +# define XFSSTAT_END_ATTRIBUTE_OPS (XFSSTAT_END_READ_WRITE_OPS+4) + __uint32_t xs_attr_get; + __uint32_t xs_attr_set; + __uint32_t xs_attr_remove; + __uint32_t xs_attr_list; +# define XFSSTAT_END_QUOTA_OPS (XFSSTAT_END_ATTRIBUTE_OPS+8) + __uint32_t xs_qm_dqreclaims; + __uint32_t xs_qm_dqreclaim_misses; + __uint32_t xs_qm_dquot_dups; + __uint32_t xs_qm_dqcachemisses; + __uint32_t xs_qm_dqcachehits; + __uint32_t xs_qm_dqwants; + __uint32_t xs_qm_dqshake_reclaims; + __uint32_t xs_qm_dqinact_reclaims; +# define XFSSTAT_END_INODE_CLUSTER (XFSSTAT_END_QUOTA_OPS+3) + __uint32_t xs_iflush_count; + __uint32_t xs_icluster_flushcnt; + __uint32_t xs_icluster_flushinode; +# define XFSSTAT_END_VNODE_OPS (XFSSTAT_END_INODE_CLUSTER+8) + __uint32_t vn_active; /* # vnodes not on free lists */ + __uint32_t vn_alloc; /* # times vn_alloc called */ + __uint32_t vn_get; /* # times vn_get called */ + __uint32_t vn_hold; /* # times vn_hold called */ + __uint32_t vn_rele; /* # times vn_rele called */ + __uint32_t vn_reclaim; /* # times vn_reclaim called */ + __uint32_t vn_remove; /* # times vn_remove called */ + __uint32_t vn_free; /* # times vn_free called */ + struct xfsstats_xpc { + __uint64_t xs_xstrat_bytes; + __uint64_t xs_write_bytes; + __uint64_t xs_read_bytes; + } xpc; +} xfsstats; + +# define XFS_STATS_INC(count) ( xfsstats.##count ++ ) +# define XFS_STATS_DEC(count) ( xfsstats.##count -- ) +# define XFS_STATS_ADD(count, inc) ( xfsstats.##count += (inc) ) +# define XFS_STATS64_INC(count) ( xfsstats.xpc.##count ++ ) +# define XFS_STATS64_ADD(count, inc) ( xfsstats.xpc.##count += (inc) ) +#else /* !CONFIG_PROC_FS */ +# define XFS_STATS_INC(count) +# define XFS_STATS_DEC(count) +# define XFS_STATS_ADD(count, inc) +# define XFS_STATS64_INC(count) +# define XFS_STATS64_ADD(count, inc) +#endif /* !CONFIG_PROC_FS */ + + +#ifdef __KERNEL__ + +/* juggle IRIX device numbers - still used in ondisk structures */ + +#define IRIX_DEV_BITSMAJOR 14 +#define IRIX_DEV_BITSMINOR 18 +#define IRIX_DEV_MAXMAJ 0x1ff +#define IRIX_DEV_MAXMIN 0x3ffff +#define IRIX_DEV_MAJOR(dev) ((int)(((unsigned)(dev)>>IRIX_DEV_BITSMINOR) \ + & IRIX_DEV_MAXMAJ)) +#define IRIX_DEV_MINOR(dev) ((int)((dev)&IRIX_DEV_MAXMIN)) +#define IRIX_MKDEV(major,minor) ((xfs_dev_t)(((major)< + +#define XQM_CMD(cmd) ( ('X'<<8)+(cmd) ) +#define IS_XQM_CMD(cmd) ( ((int)(cmd)>>8) == 'X' ) + +/* + * Disk quota - quotactl(2) commands for XFS Quota Manager (XQM). + */ +#define Q_XQUOTAON XQM_CMD(0x1) /* enable quota accounting/enforcement */ +#define Q_XQUOTAOFF XQM_CMD(0x2) /* disable quota accounting/enforcement */ +#define Q_XGETQUOTA XQM_CMD(0x3) /* get disk limits & usage */ +#define Q_XSETQLIM XQM_CMD(0x4) /* set disk limits only */ +#define Q_XGETQSTAT XQM_CMD(0x5) /* returns fs_quota_stat_t struct */ +#define Q_XQUOTARM XQM_CMD(0x6) /* free quota files' space */ + +/* + * fs_disk_quota structure: + * + * This contains the current quota information regarding a user/proj/group. + * It is 64-bit aligned, and all the blk units are in BBs (Basic Blocks) of + * 512 bytes. + */ +#define FS_DQUOT_VERSION 1 /* fs_disk_quota.d_version */ +typedef struct fs_disk_quota { + __s8 d_version; /* version of this structure */ + __s8 d_flags; /* XFS_{USER,PROJ,GROUP}_QUOTA */ + __u16 d_fieldmask; /* field specifier */ + __u32 d_id; /* user, project, or group ID */ + __u64 d_blk_hardlimit;/* absolute limit on disk blks */ + __u64 d_blk_softlimit;/* preferred limit on disk blks */ + __u64 d_ino_hardlimit;/* maximum # allocated inodes */ + __u64 d_ino_softlimit;/* preferred inode limit */ + __u64 d_bcount; /* # disk blocks owned by the user */ + __u64 d_icount; /* # inodes owned by the user */ + __s32 d_itimer; /* zero if within inode limits */ + /* if not, we refuse service */ + __s32 d_btimer; /* similar to above; for disk blocks */ + __u16 d_iwarns; /* # warnings issued wrt num inodes */ + __u16 d_bwarns; /* # warnings issued wrt disk blocks */ + __s32 d_padding2; /* padding2 - for future use */ + __u64 d_rtb_hardlimit;/* absolute limit on realtime blks */ + __u64 d_rtb_softlimit;/* preferred limit on RT disk blks */ + __u64 d_rtbcount; /* # realtime blocks owned */ + __s32 d_rtbtimer; /* similar to above; for RT disk blks */ + __u16 d_rtbwarns; /* # warnings issued wrt RT disk blks */ + __s16 d_padding3; /* padding3 - for future use */ + char d_padding4[8]; /* yet more padding */ +} fs_disk_quota_t; + +/* + * These fields are sent to Q_XSETQLIM to specify fields that need to change. + */ +#define FS_DQ_ISOFT (1<<0) +#define FS_DQ_IHARD (1<<1) +#define FS_DQ_BSOFT (1<<2) +#define FS_DQ_BHARD (1<<3) +#define FS_DQ_RTBSOFT (1<<4) +#define FS_DQ_RTBHARD (1<<5) +#define FS_DQ_LIMIT_MASK (FS_DQ_ISOFT | FS_DQ_IHARD | FS_DQ_BSOFT | \ + FS_DQ_BHARD | FS_DQ_RTBSOFT | FS_DQ_RTBHARD) +/* + * These timers can only be set in super user's dquot. For others, timers are + * automatically started and stopped. Superusers timer values set the limits + * for the rest. In case these values are zero, the DQ_{F,B}TIMELIMIT values + * defined below are used. + * These values also apply only to the d_fieldmask field for Q_XSETQLIM. + */ +#define FS_DQ_BTIMER (1<<6) +#define FS_DQ_ITIMER (1<<7) +#define FS_DQ_RTBTIMER (1<<8) +#define FS_DQ_TIMER_MASK (FS_DQ_BTIMER | FS_DQ_ITIMER | FS_DQ_RTBTIMER) + +/* + * The following constants define the default amount of time given a user + * before the soft limits are treated as hard limits (usually resulting + * in an allocation failure). These may be modified by the quotactl(2) + * system call with the Q_XSETQLIM command. + */ +#define DQ_FTIMELIMIT (7 * 24*60*60) /* 1 week */ +#define DQ_BTIMELIMIT (7 * 24*60*60) /* 1 week */ + +/* + * Various flags related to quotactl(2). Only relevant to XFS filesystems. + */ +#define XFS_QUOTA_UDQ_ACCT (1<<0) /* user quota accounting */ +#define XFS_QUOTA_UDQ_ENFD (1<<1) /* user quota limits enforcement */ +#define XFS_QUOTA_PDQ_ACCT (1<<2) /* project quota accounting */ +#define XFS_QUOTA_PDQ_ENFD (1<<3) /* project quota limits enforcement */ +#define XFS_QUOTA_GDQ_ACCT (1<<4) /* group quota accounting */ +#define XFS_QUOTA_GDQ_ENFD (1<<5) /* group quota limits enforcement */ + +#define XFS_USER_QUOTA (1<<0) /* user quota type */ +#define XFS_PROJ_QUOTA (1<<1) /* project quota type */ +#define XFS_GROUP_QUOTA (1<<2) /* group quota type */ + +/* + * fs_quota_stat is the struct returned in Q_XGETQSTAT for a given file system. + * Provides a centralized way to get meta infomation about the quota subsystem. + * eg. space taken up for user and aggregate quotas, number of dquots currently + * incore. + */ +#define FS_QSTAT_VERSION 1 /* fs_quota_stat.qs_version */ + +/* + * Some basic infomation about 'quota files'. + */ +typedef struct fs_qfilestat { + __u64 qfs_ino; /* inode number */ + __u64 qfs_nblks; /* number of BBs 512-byte-blks */ + __u32 qfs_nextents; /* number of extents */ +} fs_qfilestat_t; + +typedef struct fs_quota_stat { + __s8 qs_version; /* version number for future changes */ + __u16 qs_flags; /* XFS_QUOTA_{U,P,G}DQ_{ACCT,ENFD} */ + __s8 qs_pad; /* unused */ + fs_qfilestat_t qs_uquota; /* user quota storage information */ + fs_qfilestat_t qs_aquota; /* aggr quota storage information */ + __u32 qs_incoredqs; /* number of dquots incore */ + __s32 qs_btimelimit; /* limit for blks timer */ + __s32 qs_itimelimit; /* limit for inodes timer */ + __s32 qs_rtbtimelimit;/* limit for rt blks timer */ + __u16 qs_bwarnlimit; /* limit for num warnings */ + __u16 qs_iwarnlimit; /* limit for num warnings */ +} fs_quota_stat_t; + + +#ifdef __KERNEL__ +extern int xqm_quotactl(int, const char *, int, caddr_t); +#endif + +#endif /* __XQM_H__ */ diff --git a/libxfs/Makefile b/libxfs/Makefile new file mode 100644 index 000000000..ce45344de --- /dev/null +++ b/libxfs/Makefile @@ -0,0 +1,62 @@ +# +# Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write the Free Software Foundation, Inc., 59 +# Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, +# Mountain View, CA 94043, or: +# +# http://www.sgi.com +# +# For further information regarding this notice, see: +# +# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ +# + +TOPDIR = .. +include $(TOPDIR)/include/builddefs + +STATICLIBTARGET = libxfs.a +HFILES = xfs.h +CFILES = arch.c init.c logitem.c rdwr.c trans.c util.c \ + xfs_bit.c xfs_rtbit.c xfs_alloc.c xfs_ialloc.c xfs_rtalloc.c \ + xfs_inode.c xfs_btree.c xfs_alloc_btree.c xfs_ialloc_btree.c \ + xfs_bmap_btree.c xfs_da_btree.c xfs_dir.c xfs_dir_leaf.c \ + xfs_dir2.c xfs_dir2_leaf.c xfs_attr_leaf.c xfs_dir2_block.c \ + xfs_dir2_node.c xfs_dir2_data.c xfs_dir2_sf.c xfs_bmap.c \ + xfs_mount.c xfs_trans.c + +# xfs_repair is braindead, don't try linking it with a debug libxfs yet. +DEBUG = -DNDEBUG + +# +# Tracing flags: +# -DIO_DEBUG reads and writes of buffers +# -DMEM_DEBUG all zone memory use +# -DLI_DEBUG log item (ino/buf) manipulation +# -DXACT_DEBUG transaction state changes +# +LCFLAGS += -Wno-unknown-pragmas -Wno-unused -Wno-uninitialized -I. + +default: $(STATICLIBTARGET) + +include $(BUILDRULES) + +install: default diff --git a/libxfs/init.c b/libxfs/init.c new file mode 100644 index 000000000..e3142dd47 --- /dev/null +++ b/libxfs/init.c @@ -0,0 +1,764 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#define ustat __kernel_ustat +#include +#include +#include +#include +#include +#undef ustat +#include +#include +#include + +#ifndef BLKSETSIZE /* Baaad m'kay, but it's not in libc yet */ +#define BLKSETSIZE _IO(0x12,108) /* set device block size */ +#endif + +#define findrawpath(x) x +#define findblockpath(x) x + +char *progname = "libxfs"; /* default, changed by each tool */ + +/* + * dev_map - map open devices to fd. + */ +#define MAX_DEVS 10 /* arbitary maximum */ +int nextfakedev = -1; /* device number to give to next fake device */ +static struct dev_to_fd { + dev_t dev; + int fd; +} dev_map[MAX_DEVS]={{0}}; + +static int +check_ismounted(char *name, char *block, int verbose) +{ + struct ustat ust; + struct stat64 st; + + if (stat64(block, &st) < 0) + return 0; + if ((st.st_mode & S_IFMT) != S_IFBLK) + return 0; + if (ustat(st.st_rdev, &ust) >= 0) { + if (verbose) + fprintf(stderr, + "%s: %s contains a mounted filesystem\n", + progname, name); + return 1; + } + return 0; +} + +/* + * Checks whether a given device has a mounted, writable + * filesystem, returns 1 if it does & fatal (just warns + * if not fatal, but allows us to proceed). + * + * Useful to tools which will produce uncertain results + * if the filesystem is active - repair, check, logprint. + */ +static int +check_isactive(char *name, char *block, int fatal) +{ + int sts = 0; + FILE *f; + struct mntent *mnt; + + if (check_ismounted(name, block, 0)) { + if ((f = setmntent(MOUNTED, "r")) == NULL) { + fprintf(stderr, + "%s: %s contains a possibly writable, mounted " + "filesystem\n", progname, name); + return fatal; + } + while ((mnt = getmntent(f)) != NULL) { + if (hasmntopt(mnt, MNTOPT_RO) != NULL) + break; + } + if (mnt == NULL) { + fprintf(stderr, + "%s: %s contains a writable mounted " + "filesystem\n", progname, name); + sts = fatal; + } + endmntent(f); + } + return sts; +} + +static __int64_t +findsize(char *path) +{ + int fd; + int error; + long size; + struct stat64 st; + + /* Test to see if we are dealing with a regular file rather than a + * block device, if we are just use the size returned by stat64 + */ + if (stat64(path, &st) < 0) { + fprintf(stderr, "%s: " + "cannot stat the device special file \"%s\": %s\n", + progname, path, strerror(errno)); + exit(1); + } + if ((st.st_mode & S_IFMT) == S_IFREG) { + return (__int64_t)(st.st_size >> 9); + } + + if ((fd = open(path, 0)) < 0) { + fprintf(stderr, "%s: " + "error opening the device special file \"%s\": %s\n", + progname, path, strerror(errno)); + exit(1); + } + error = ioctl(fd, BLKGETSIZE, &size); + if (error < 0) { + fprintf(stderr, "%s: can't determine device size\n", progname); + exit(1); + } + + close(fd); + + return (__int64_t)size; +} + + +/* libxfs_device_to_fd: + * lookup a device number in the device map + * return the associated fd + */ +int +libxfs_device_to_fd(dev_t device) +{ + int d; + + for (d=0;ddname; + logname = a->logname; + rtname = a->rtname; + a->ddev = a->logdev = a->rtdev = 0; + a->dfd = a->logfd = a->rtfd = -1; + a->dsize = a->logBBsize = a->logBBstart = a->rtsize = 0; + + (void)getcwd(curdir,MAXPATHLEN); + needcd = 0; + fd = -1; + readonly = (a->isreadonly & LIBXFS_ISREADONLY); + inactive = (a->isreadonly & LIBXFS_ISINACTIVE); + if (a->volname) { + if (stat64(a->volname, &stbuf) < 0) { + perror(a->volname); + goto done; + } + if (!(rawfile = findrawpath(a->volname))) { + fprintf(stderr, "%s: " + "can't find a character device matching %s\n", + progname, a->volname); + goto done; + } + if (!(blockfile = findblockpath(a->volname))) { + fprintf(stderr, "%s: " + "can't find a block device matching %s\n", + progname, a->volname); + goto done; + } + if (!readonly && !inactive && check_ismounted( + a->volname, blockfile, 1)) + goto done; + if (inactive && check_isactive( + a->volname, blockfile, readonly)) + goto done; + needcd = 1; + fd = open(rawfile, O_RDONLY); +#ifdef HAVE_VOLUME_MANAGER + xlv_getdev_t getdev; + if (ioctl(fd, DIOCGETVOLDEV, &getdev) < 0) +#else + if (1) +#endif + { + if (a->notvolok) { + dname = a->dname = a->volname; + a->volname = NULL; + goto voldone; + } + fprintf(stderr, "%s: " + "%s is not a volume device name\n", + progname, a->volname); + if (a->notvolmsg) + fprintf(stderr, a->notvolmsg, a->volname); + goto done; + } +#ifdef HAVE_VOLUME_MANAGER + if (getdev.data_subvol_dev && dname) { + fprintf(stderr, "%s: " + "%s has a data subvolume, cannot specify %s\n", + progname, a->volname, dname); + goto done; + } + if (getdev.log_subvol_dev && logname) { + fprintf(stderr, "%s: " + "%s has a log subvolume, cannot specify %s\n", + progname, a->volname, logname); + goto done; + } + if (getdev.rt_subvol_dev && rtname) { + fprintf(stderr, "%s: %s has a realtime subvolume, " + "cannot specify %s\n", + progname, a->volname, rtname); + goto done; + } + if (!dname && getdev.data_subvol_dev) { + strcpy(dpath, "/tmp/libxfsdXXXXXX"); + (void)mktemp(dpath); + if (mknod(dpath, S_IFCHR | 0600, + getdev.data_subvol_dev) < 0) { + fprintf(stderr, "%s: mknod failed: %s\n", + progname, strerror(errno)); + goto done; + } + dname = dpath; + } + if (!logname && getdev.log_subvol_dev) { + strcpy(logpath, "/tmp/libxfslXXXXXX"); + (void)mktemp(logpath); + if (mknod(logpath, S_IFCHR | 0600, + getdev.log_subvol_dev) < 0) { + fprintf(stderr, "%s: mknod failed: %s\n", + progname, strerror(errno)); + goto done; + } + logname = logpath; + } + if (!rtname && getdev.rt_subvol_dev) { + strcpy(rtpath, "/tmp/libxfsrXXXXXX"); + (void)mktemp(rtpath); + if (mknod(rtpath, S_IFCHR | 0600, + getdev.rt_subvol_dev) < 0) { + fprintf(stderr, "%s: mknod failed: %s\n", + progname, strerror(errno)); + goto done; + } + rtname = rtpath; + } +#endif + } +voldone: + if (dname) { + if (dname[0] != '/' && needcd) + chdir(curdir); + if (a->disfile) { + a->ddev= libxfs_device_open(dname, a->dcreat, readonly); + a->dfd = libxfs_device_to_fd(a->ddev); + } else { + if (stat64(dname, &stbuf) < 0) { + fprintf(stderr, "%s: stat64 failed on %s: %s\n", + progname, dname, strerror(errno)); + goto done; + } + if (!(rawfile = findrawpath(dname))) { + fprintf(stderr, "%s: can't find a char device " + "matching %s\n", progname, dname); + goto done; + } + if (!(blockfile = findblockpath(dname))) { + fprintf(stderr, "%s: can't find a block device " + "matching %s\n", progname, dname); + goto done; + } + if (!readonly && !inactive && check_ismounted( + dname, blockfile, 1)) + goto done; + if (inactive && check_isactive( + dname, blockfile, readonly)) + goto done; + a->ddev = libxfs_device_open(rawfile, + a->dcreat, readonly); + a->dfd = libxfs_device_to_fd(a->ddev); + a->dsize = findsize(rawfile); + } + needcd = 1; + } else + a->dsize = 0; + if (logname) { + if (logname[0] != '/' && needcd) + chdir(curdir); + if (a->lisfile) { + a->logdev = libxfs_device_open(logname, + a->lcreat, readonly); + a->logfd = libxfs_device_to_fd(a->logdev); + } else { + if (stat64(logname, &stbuf) < 0) { + fprintf(stderr, "%s: stat64 failed on %s: %s\n", + progname, logname, strerror(errno)); + goto done; + } + if (!(rawfile = findrawpath(logname))) { + fprintf(stderr, "%s: can't find a char device " + "matching %s\n", progname, logname); + goto done; + } + if (!(blockfile = findblockpath(logname))) { + fprintf(stderr, "%s: can't find a block device " + "matching %s\n", progname, logname); + goto done; + } + if (!readonly && !inactive && check_ismounted( + logname, blockfile, 1)) + goto done; + else if (inactive && check_isactive( + logname, blockfile, readonly)) + goto done; + a->logdev = libxfs_device_open(rawfile, + a->lcreat, readonly); + a->logfd = libxfs_device_to_fd(a->logdev); + a->logBBsize = findsize(rawfile); + } + needcd = 1; + } else + a->logBBsize = 0; + if (rtname) { + if (rtname[0] != '/' && needcd) + chdir(curdir); + if (a->risfile) { + a->rtdev = libxfs_device_open(rtname, + a->rcreat, readonly); + a->rtfd = libxfs_device_to_fd(a->rtdev); + } else { + if (stat64(rtname, &stbuf) < 0) { + fprintf(stderr, "%s: stat64 failed on %s: %s\n", + progname, rtname, strerror(errno)); + goto done; + } + if (!(rawfile = findrawpath(rtname))) { + fprintf(stderr, "%s: can't find a char device " + "matching %s\n", progname, rtname); + goto done; + } + if (!(blockfile = findblockpath(rtname))) { + fprintf(stderr, "%s: can't find a block device " + "matching %s\n", progname, rtname); + goto done; + } + if (!readonly && !inactive && check_ismounted( + rtname, blockfile, 1)) + goto done; + if (inactive && check_isactive( + rtname, blockfile, readonly)) + goto done; + a->rtdev = libxfs_device_open(rawfile, + a->rcreat, readonly); + a->rtfd = libxfs_device_to_fd(a->rtdev); + a->rtsize = findsize(rawfile); + } + needcd = 1; + } else + a->rtsize = 0; + if (a->dsize < 0) { + fprintf(stderr, "%s: can't get size for data subvolume\n", + progname); + goto done; + } + if (a->logBBsize < 0) { + fprintf(stderr, "%s: can't get size for log subvolume\n", + progname); + goto done; + } + if (a->rtsize < 0) { + fprintf(stderr, "%s: can't get size for realtime subvolume\n", + progname); + goto done; + } + if (needcd) + chdir(curdir); + rval = 1; +done: + if (dpath[0]) + unlink(dpath); + if (logpath[0]) + unlink(logpath); + if (rtpath[0]) + unlink(rtpath); + if (fd >= 0) + close(fd); + if (!rval && a->ddev) + libxfs_device_close(a->ddev); + if (!rval && a->logdev) + libxfs_device_close(a->logdev); + if (!rval && a->rtdev) + libxfs_device_close(a->rtdev); + return rval; +} + + +/* + * Initialize/destroy all of the zone allocators we use. + */ +static void +manage_zones(int release) +{ + extern xfs_zone_t *xfs_ili_zone; + extern xfs_zone_t *xfs_inode_zone; + extern xfs_zone_t *xfs_ifork_zone; + extern xfs_zone_t *xfs_dabuf_zone; + extern xfs_zone_t *xfs_buf_item_zone; + extern xfs_zone_t *xfs_da_state_zone; + extern xfs_zone_t *xfs_btree_cur_zone; + extern xfs_zone_t *xfs_bmap_free_item_zone; + extern void xfs_dir_startup(); + + if (release) { /* free zone allocation */ + libxfs_free(xfs_inode_zone); + libxfs_free(xfs_ifork_zone); + libxfs_free(xfs_dabuf_zone); + libxfs_free(xfs_buf_item_zone); + libxfs_free(xfs_da_state_zone); + libxfs_free(xfs_btree_cur_zone); + libxfs_free(xfs_bmap_free_item_zone); + return; + } + /* otherwise initialise zone allocation */ + xfs_inode_zone = libxfs_zone_init(sizeof(xfs_inode_t), "xfs_inode"); + xfs_ifork_zone = libxfs_zone_init(sizeof(xfs_ifork_t), "xfs_ifork"); + xfs_dabuf_zone = libxfs_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf"); + xfs_ili_zone = libxfs_zone_init( + sizeof(xfs_inode_log_item_t), "xfs_inode_log_item"); + xfs_buf_item_zone = libxfs_zone_init( + sizeof(xfs_buf_log_item_t), "xfs_buf_log_item"); + xfs_da_state_zone = libxfs_zone_init( + sizeof(xfs_da_state_t), "xfs_da_state"); + xfs_btree_cur_zone = libxfs_zone_init( + sizeof(xfs_btree_cur_t), "xfs_btree_cur"); + xfs_bmap_free_item_zone = libxfs_zone_init( + sizeof(xfs_bmap_free_item_t), "xfs_bmap_free_item"); + xfs_dir_startup(); +} + +/* + * Get the bitmap and summary inodes into the mount structure + * at mount time. + */ +static int +rtmount_inodes(xfs_mount_t *mp) +{ + int error; + xfs_sb_t *sbp; + + sbp = &mp->m_sb; + if (sbp->sb_rbmino == NULLFSINO) + return 0; + error = libxfs_iread(mp, NULL, sbp->sb_rbmino, &mp->m_rbmip, 0); + if (error) { + fprintf(stderr, "%s: cannot read realtime bitmap inode (%d)\n", + progname, error); + return error; + } + ASSERT(mp->m_rbmip != NULL); + ASSERT(sbp->sb_rsumino != NULLFSINO); + error = libxfs_iread(mp, NULL, sbp->sb_rsumino, &mp->m_rsumip, 0); + if (error) { + fprintf(stderr, "%s: cannot read realtime summary inode (%d)\n", + progname, error); + return error; + } + ASSERT(mp->m_rsumip != NULL); + return 0; +} + +/* + * Mount structure initialization, provides a filled-in xfs_mount_t + * such that the numerous XFS_* macros can be used. If dev is zero, + * no IO will be performed (no size checks, read root inodes). + */ +xfs_mount_t * +libxfs_mount( + xfs_mount_t *mp, + xfs_sb_t *sb, + dev_t dev, + dev_t logdev, + dev_t rtdev, + int rrootinos) +{ + xfs_daddr_t d; + xfs_buf_t *bp; + xfs_sb_t *sbp; + size_t size; + int error; + + mp->m_dev = dev; + mp->m_rtdev = rtdev; + mp->m_logdev = logdev; + mp->m_sb = *sb; + sbp = &(mp->m_sb); + manage_zones(0); + + libxfs_mount_common(mp, sb); + + libxfs_alloc_compute_maxlevels(mp); + libxfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK); + libxfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK); + libxfs_ialloc_compute_maxlevels(mp); + + if (sbp->sb_imax_pct) { + /* Make sure the maximum inode count is a multiple of the + * units we allocate inodes in. + */ + mp->m_maxicount = (sbp->sb_dblocks * sbp->sb_imax_pct) / 100; + mp->m_maxicount = ((mp->m_maxicount / mp->m_ialloc_blks) * + mp->m_ialloc_blks) << sbp->sb_inopblog; + } else + mp->m_maxicount = 0; + + mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE; + + /* + * Set whether we're using inode alignment. + */ + if (XFS_SB_VERSION_HASALIGN(&mp->m_sb) && + mp->m_sb.sb_inoalignmt >= + XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) + mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1; + else + mp->m_inoalign_mask = 0; + /* + * If we are using stripe alignment, check whether + * the stripe unit is a multiple of the inode alignment + */ + if ( mp->m_dalign + && mp->m_inoalign_mask && !(mp->m_dalign & mp->m_inoalign_mask)) + mp->m_sinoalign = mp->m_dalign; + else + mp->m_sinoalign = 0; + + /* + * Check that the data (and log if separate) are an ok size. + */ + d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); + if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { + fprintf(stderr, "%s: size check failed\n", progname); + return NULL; + } + + /* Initialize the appropriate directory manager */ + if (XFS_SB_VERSION_HASDIRV2(sbp)) + libxfs_dir2_mount(mp); + else + libxfs_dir_mount(mp); + + /* Initialize the precomputed transaction reservations values */ + libxfs_trans_init(mp); + + if (dev == 0) /* maxtrres, we have no device so leave now */ + return mp; + + bp = libxfs_readbuf(mp->m_dev, d - 1, 1, 0); + if (bp == NULL) { + fprintf(stderr, "%s: data size check failed\n", progname); + return NULL; + } + libxfs_putbuf(bp); + + if (mp->m_logdev && mp->m_logdev != mp->m_dev) { + d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); + if ( (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) || + (!(bp = libxfs_readbuf(mp->m_logdev, d - 1, 1, 1)))) { + fprintf(stderr, "%s: log size checks failed\n", + progname); + return NULL; + } + libxfs_putbuf(bp); + } + + /* Initialize realtime fields in the mount structure */ + if (libxfs_rtmount_init(mp)) { + fprintf(stderr, "%s: real-time device init failed\n", progname); + return NULL; + } + + /* Allocate and initialize the per-ag data */ + size = sbp->sb_agcount * sizeof(xfs_perag_t); + if ((mp->m_perag = calloc(size, 1)) == NULL) { + fprintf(stderr, "%s: failed to alloc %d bytes: %s\n", + progname, size, strerror(errno)); + exit(1); + } + + /* + * mkfs calls mount before the root inode is allocated. + */ + if (rrootinos && sbp->sb_rootino != NULLFSINO) { + error = libxfs_iread(mp, NULL, sbp->sb_rootino, + &mp->m_rootip, 0); + if (error) { + fprintf(stderr, "%s: cannot read root inode (%d)\n", + progname, error); + return NULL; + } + ASSERT(mp->m_rootip != NULL); + } + if (rrootinos && rtmount_inodes(mp)) + return NULL; + return mp; +} + +/* + * Release any resourse obtained during a mount. + */ +void +libxfs_umount(xfs_mount_t *mp) +{ + manage_zones(1); + free(mp->m_perag); +} diff --git a/libxfs/logitem.c b/libxfs/logitem.c new file mode 100644 index 000000000..b26106088 --- /dev/null +++ b/libxfs/logitem.c @@ -0,0 +1,496 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include + +xfs_zone_t *xfs_buf_item_zone; +xfs_zone_t *xfs_ili_zone; /* inode log item zone */ + + +/* + * This is called to add the given log item to the transaction's + * list of log items. It must find a free log item descriptor + * or allocate a new one and add the item to that descriptor. + * The function returns a pointer to item descriptor used to point + * to the new item. The log item will now point to its new descriptor + * with its li_desc field. + */ +xfs_log_item_desc_t * +xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip) +{ + xfs_log_item_desc_t *lidp; + xfs_log_item_chunk_t *licp; + int i; + + /* + * If there are no free descriptors, allocate a new chunk + * of them and put it at the front of the chunk list. + */ + if (tp->t_items_free == 0) { + licp = (xfs_log_item_chunk_t*) + kmem_alloc(sizeof(xfs_log_item_chunk_t), KM_SLEEP); + ASSERT(licp != NULL); + /* + * Initialize the chunk, and then + * claim the first slot in the newly allocated chunk. + */ + XFS_LIC_INIT(licp); + XFS_LIC_CLAIM(licp, 0); + licp->lic_unused = 1; + XFS_LIC_INIT_SLOT(licp, 0); + lidp = XFS_LIC_SLOT(licp, 0); + + /* + * Link in the new chunk and update the free count. + */ + licp->lic_next = tp->t_items.lic_next; + tp->t_items.lic_next = licp; + tp->t_items_free = XFS_LIC_NUM_SLOTS - 1; + + /* + * Initialize the descriptor and the generic portion + * of the log item. + * + * Point the new slot at this item and return it. + * Also point the log item at its currently active + * descriptor and set the item's mount pointer. + */ + lidp->lid_item = lip; + lidp->lid_flags = 0; + lidp->lid_size = 0; + lip->li_desc = lidp; + lip->li_mountp = tp->t_mountp; + return (lidp); + } + + /* + * Find the free descriptor. It is somewhere in the chunklist + * of descriptors. + */ + licp = &tp->t_items; + while (licp != NULL) { + if (XFS_LIC_VACANCY(licp)) { + if (licp->lic_unused <= XFS_LIC_MAX_SLOT) { + i = licp->lic_unused; + ASSERT(XFS_LIC_ISFREE(licp, i)); + break; + } + for (i = 0; i <= XFS_LIC_MAX_SLOT; i++) { + if (XFS_LIC_ISFREE(licp, i)) + break; + } + ASSERT(i <= XFS_LIC_MAX_SLOT); + break; + } + licp = licp->lic_next; + } + ASSERT(licp != NULL); + /* + * If we find a free descriptor, claim it, + * initialize it, and return it. + */ + XFS_LIC_CLAIM(licp, i); + if (licp->lic_unused <= i) { + licp->lic_unused = i + 1; + XFS_LIC_INIT_SLOT(licp, i); + } + lidp = XFS_LIC_SLOT(licp, i); + tp->t_items_free--; + lidp->lid_item = lip; + lidp->lid_flags = 0; + lidp->lid_size = 0; + lip->li_desc = lidp; + lip->li_mountp = tp->t_mountp; + return (lidp); +} + +/* + * Free the given descriptor. + * + * This requires setting the bit in the chunk's free mask corresponding + * to the given slot. + */ +void +xfs_trans_free_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp) +{ + uint slot; + xfs_log_item_chunk_t *licp; + xfs_log_item_chunk_t **licpp; + + slot = XFS_LIC_DESC_TO_SLOT(lidp); + licp = XFS_LIC_DESC_TO_CHUNK(lidp); + XFS_LIC_RELSE(licp, slot); + lidp->lid_item->li_desc = NULL; + tp->t_items_free++; + + /* + * If there are no more used items in the chunk and this is not + * the chunk embedded in the transaction structure, then free + * the chunk. First pull it from the chunk list and then + * free it back to the heap. We didn't bother with a doubly + * linked list here because the lists should be very short + * and this is not a performance path. It's better to save + * the memory of the extra pointer. + * + * Also decrement the transaction structure's count of free items + * by the number in a chunk since we are freeing an empty chunk. + */ + if (XFS_LIC_ARE_ALL_FREE(licp) && (licp != &(tp->t_items))) { + licpp = &(tp->t_items.lic_next); + while (*licpp != licp) { + ASSERT(*licpp != NULL); + licpp = &((*licpp)->lic_next); + } + *licpp = licp->lic_next; + kmem_free(licp, sizeof(xfs_log_item_chunk_t)); + tp->t_items_free -= XFS_LIC_NUM_SLOTS; + } +} + +/* + * This is called to find the descriptor corresponding to the given + * log item. It returns a pointer to the descriptor. + * The log item MUST have a corresponding descriptor in the given + * transaction. This routine does not return NULL, it panics. + * + * The descriptor pointer is kept in the log item's li_desc field. + * Just return it. + */ +xfs_log_item_desc_t * +xfs_trans_find_item(xfs_trans_t *tp, xfs_log_item_t *lip) +{ + ASSERT(lip->li_desc != NULL); + + return (lip->li_desc); +} + +/* + * This is called to unlock all of the items of a transaction and to free + * all the descriptors of that transaction. + * + * It walks the list of descriptors and unlocks each item. It frees + * each chunk except that embedded in the transaction as it goes along. + */ +void +xfs_trans_free_items( + xfs_trans_t *tp, + int flags) +{ + xfs_log_item_chunk_t *licp; + xfs_log_item_chunk_t *next_licp; + int abort; + + abort = flags & XFS_TRANS_ABORT; + licp = &tp->t_items; + /* + * Special case the embedded chunk so we don't free it below. + */ + if (!XFS_LIC_ARE_ALL_FREE(licp)) { + (void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN); + XFS_LIC_ALL_FREE(licp); + licp->lic_unused = 0; + } + licp = licp->lic_next; + + /* + * Unlock each item in each chunk and free the chunks. + */ + while (licp != NULL) { + ASSERT(!XFS_LIC_ARE_ALL_FREE(licp)); + (void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN); + next_licp = licp->lic_next; + kmem_free(licp, sizeof(xfs_log_item_chunk_t)); + licp = next_licp; + } + + /* + * Reset the transaction structure's free item count. + */ + tp->t_items_free = XFS_LIC_NUM_SLOTS; + tp->t_items.lic_next = NULL; +} + +/* + * Check to see if a buffer matching the given parameters is already + * a part of the given transaction. Only check the first, embedded + * chunk, since we don't want to spend all day scanning large transactions. + */ +STATIC xfs_buf_t * +xfs_trans_buf_item_match( + xfs_trans_t *tp, + buftarg_t *target, + xfs_daddr_t blkno, + int len) +{ + xfs_log_item_chunk_t *licp; + xfs_log_item_desc_t *lidp; + xfs_buf_log_item_t *blip; + xfs_buf_t *bp; + int i; + +#ifdef LI_DEBUG + fprintf(stderr, "buf_item_match (fast) log items for xact %p\n", tp); +#endif + + bp = NULL; + len = BBTOB(len); + licp = &tp->t_items; + if (!XFS_LIC_ARE_ALL_FREE(licp)) { + for (i = 0; i < licp->lic_unused; i++) { + /* + * Skip unoccupied slots. + */ + if (XFS_LIC_ISFREE(licp, i)) { + continue; + } + + lidp = XFS_LIC_SLOT(licp, i); + blip = (xfs_buf_log_item_t *)lidp->lid_item; +#ifdef LI_DEBUG + fprintf(stderr, + "\tfound log item, xact %p, blip=%p (%d/%d)\n", + tp, blip, i, licp->lic_unused); +#endif + if (blip->bli_item.li_type != XFS_LI_BUF) { + continue; + } + + bp = blip->bli_buf; +#ifdef LI_DEBUG + fprintf(stderr, + "\tfound buf %p log item, xact %p, blip=%p (%d)\n", + bp, tp, blip, i); +#endif + if ((XFS_BUF_TARGET(bp) == target->dev) && + (XFS_BUF_ADDR(bp) == blkno) && + (XFS_BUF_COUNT(bp) == len)) { + /* + * We found it. Break out and + * return the pointer to the buffer. + */ +#ifdef LI_DEBUG + fprintf(stderr, + "\tfound REAL buf log item, bp=%p\n", + bp); +#endif + break; + } else { + bp = NULL; + } + } + } +#ifdef LI_DEBUG + if (!bp) fprintf(stderr, "\tfast search - got nothing\n"); +#endif + return bp; +} + +/* + * Check to see if a buffer matching the given parameters is already + * a part of the given transaction. Check all the chunks, we + * want to be thorough. + */ +STATIC xfs_buf_t * +xfs_trans_buf_item_match_all( + xfs_trans_t *tp, + buftarg_t *target, + xfs_daddr_t blkno, + int len) +{ + xfs_log_item_chunk_t *licp; + xfs_log_item_desc_t *lidp; + xfs_buf_log_item_t *blip; + xfs_buf_t *bp; + int i; + +#ifdef LI_DEBUG + fprintf(stderr, "buf_item_match_all (slow) log items for xact %p\n", + tp); +#endif + + bp = NULL; + len = BBTOB(len); + for (licp = &tp->t_items; licp != NULL; licp = licp->lic_next) { + if (XFS_LIC_ARE_ALL_FREE(licp)) { + ASSERT(licp == &tp->t_items); + ASSERT(licp->lic_next == NULL); + return NULL; + } + for (i = 0; i < licp->lic_unused; i++) { + /* + * Skip unoccupied slots. + */ + if (XFS_LIC_ISFREE(licp, i)) { + continue; + } + + lidp = XFS_LIC_SLOT(licp, i); + blip = (xfs_buf_log_item_t *)lidp->lid_item; +#ifdef LI_DEBUG + fprintf(stderr, + "\tfound log item, xact %p, blip=%p (%d/%d)\n", + tp, blip, i, licp->lic_unused); +#endif + if (blip->bli_item.li_type != XFS_LI_BUF) { + continue; + } + + bp = blip->bli_buf; + ASSERT(bp); + ASSERT(XFS_BUF_ADDR(bp)); +#ifdef LI_DEBUG + fprintf(stderr, + "\tfound buf %p log item, xact %p, blip=%p (%d)\n", + bp, tp, blip, i); +#endif + if ((XFS_BUF_TARGET(bp) == target->dev) && + (XFS_BUF_ADDR(bp) == blkno) && + (XFS_BUF_COUNT(bp) == len)) { + /* + * We found it. Break out and + * return the pointer to the buffer. + */ +#ifdef LI_DEBUG + fprintf(stderr, + "\tfound REAL buf log item, bp=%p\n", + bp); +#endif + return bp; + } + } + } +#ifdef LI_DEBUG + if (!bp) fprintf(stderr, "slow search - got nothing\n"); +#endif + return NULL; +} + +/* + * Allocate a new buf log item to go with the given buffer. + * Set the buffer's b_fsprivate field to point to the new + * buf log item. If there are other item's attached to the + * buffer (see xfs_buf_attach_iodone() below), then put the + * buf log item at the front. + */ +void +xfs_buf_item_init( + xfs_buf_t *bp, + xfs_mount_t *mp) +{ + xfs_log_item_t *lip; + xfs_buf_log_item_t *bip; + +#ifdef LI_DEBUG + fprintf(stderr, "buf_item_init for buffer %p\n", bp); +#endif + + /* + * Check to see if there is already a buf log item for + * this buffer. If there is, it is guaranteed to be + * the first. If we do already have one, there is + * nothing to do here so return. + */ + if (XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *) != mp) + XFS_BUF_SET_FSPRIVATE3(bp, mp); + XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb); + if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { + lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); + if (lip->li_type == XFS_LI_BUF) { +#ifdef LI_DEBUG + fprintf(stderr, + "reused buf item %p for pre-logged buffer %p\n", + lip, bp); +#endif + return; + } + } + + bip = (xfs_buf_log_item_t *)kmem_zone_zalloc(xfs_buf_item_zone, + KM_SLEEP); +#ifdef LI_DEBUG + fprintf(stderr, "adding buf item %p for not-logged buffer %p\n", + bip, bp); +#endif + bip->bli_item.li_type = XFS_LI_BUF; + bip->bli_item.li_mountp = mp; + bip->bli_buf = bp; + bip->bli_format.blf_type = XFS_LI_BUF; + bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp); + bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp)); + XFS_BUF_SET_FSPRIVATE(bp, bip); +} + + +/* + * Mark bytes first through last inclusive as dirty in the buf + * item's bitmap. + */ +void +xfs_buf_item_log( + xfs_buf_log_item_t *bip, + uint first, + uint last) +{ + /* + * Mark the item as having some dirty data for + * quick reference in xfs_buf_item_dirty. + */ + bip->bli_flags |= XFS_BLI_DIRTY; +} + +/* + * Initialize the inode log item for a newly allocated (in-core) inode. + */ +void +xfs_inode_item_init( + xfs_inode_t *ip, + xfs_mount_t *mp) +{ + xfs_inode_log_item_t *iip; + + ASSERT(ip->i_itemp == NULL); + iip = ip->i_itemp = (xfs_inode_log_item_t *) + kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP); +#ifdef LI_DEBUG + fprintf(stderr, "inode_item_init for inode %llu, iip=%p\n", + ip->i_ino, iip); +#endif + + iip->ili_item.li_type = XFS_LI_INODE; + iip->ili_item.li_mountp = mp; + iip->ili_inode = ip; + iip->ili_format.ilf_type = XFS_LI_INODE; + iip->ili_format.ilf_ino = ip->i_ino; + iip->ili_format.ilf_blkno = ip->i_blkno; + iip->ili_format.ilf_len = ip->i_len; + iip->ili_format.ilf_boffset = ip->i_boffset; +} diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c new file mode 100644 index 000000000..06fb1a5e2 --- /dev/null +++ b/libxfs/rdwr.c @@ -0,0 +1,468 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include +#include +#include + +#include +#include + +#define BBTOOFF64(bbs) (((xfs_off_t)(bbs)) << BBSHIFT) +#define BDSTRAT_SIZE (256 * 1024) + +void +libxfs_device_zero(dev_t dev, xfs_daddr_t start, uint len) +{ + xfs_daddr_t bno; + uint nblks; + int size; + int fd; + char *z; + + size = BDSTRAT_SIZE <= BBTOB(len) ? BDSTRAT_SIZE : BBTOB(len); + if ((z = memalign(getpagesize(), size)) == NULL) { + fprintf(stderr, "%s: device_zero can't memalign %d bytes: %s\n", + progname, size, strerror(errno)); + exit(1); + } + bzero(z, size); + fd = libxfs_device_to_fd(dev); + for (bno = start; bno < start + len; ) { + nblks = (uint)BTOBB(size); + if (bno + nblks > start + len) + nblks = (uint)(start + len - bno); + if (lseek64(fd, BBTOOFF64(bno), SEEK_SET) < 0) { + fprintf(stderr, "%s: device_zero lseek64 failed: %s\n", + progname, strerror(errno)); + exit(1); + } + if (write(fd, z, BBTOB(nblks)) < BBTOB(nblks)) { + fprintf(stderr, "%s: device_zero write failed: %s\n", + progname, strerror(errno)); + exit(1); + } + bno += nblks; + } + free(z); +} + +int +libxfs_log_clear( + dev_t device, + xfs_daddr_t start, + uint length, + uuid_t *fs_uuid, + int fmt) +{ + xfs_buf_t *buf; + xlog_rec_header_t *head; + xlog_op_header_t *op; + /* the data section must be 32 bit size aligned */ + struct { + __uint16_t magic; + __uint16_t pad1; + __uint32_t pad2; /* may as well make it 64 bits */ + } magic = { XLOG_UNMOUNT_TYPE, 0, 0 }; + + if (!device || !fs_uuid) + return -EINVAL; + + /* first zero the log */ + libxfs_device_zero(device, start, length); + + /* then write a log record header */ + buf = libxfs_getbuf(device, start, 1); + if (!buf) + return -1; + + memset(XFS_BUF_PTR(buf), 0, BBSIZE); + head = (xlog_rec_header_t *)XFS_BUF_PTR(buf); + + /* note that oh_tid actually contains the cycle number + * and the tid is stored in h_cycle_data[0] - that's the + * way things end up on disk. + */ + + INT_SET(head->h_magicno, ARCH_CONVERT, XLOG_HEADER_MAGIC_NUM); + INT_SET(head->h_cycle, ARCH_CONVERT, 1); + INT_SET(head->h_version, ARCH_CONVERT, 1); + INT_SET(head->h_len, ARCH_CONVERT, 20); + INT_SET(head->h_chksum, ARCH_CONVERT, 0); + INT_SET(head->h_prev_block, ARCH_CONVERT, -1); + INT_SET(head->h_num_logops, ARCH_CONVERT, 1); + INT_SET(head->h_cycle_data[0], ARCH_CONVERT, 0xb0c0d0d0); + INT_SET(head->h_fmt, ARCH_CONVERT, fmt); + + ASSIGN_ANY_LSN(head->h_lsn, 1, 0, ARCH_CONVERT); + ASSIGN_ANY_LSN(head->h_tail_lsn, 1, 0, ARCH_CONVERT); + + memcpy(head->h_fs_uuid, fs_uuid, sizeof(uuid_t)); + + if (libxfs_writebuf(buf, 0)) + return -1; + + buf = libxfs_getbuf(device, start + 1, 1); + if (!buf) + return -1; + + /* now a log unmount op */ + memset(XFS_BUF_PTR(buf), 0, BBSIZE); + op = (xlog_op_header_t *)XFS_BUF_PTR(buf); + INT_SET(op->oh_tid, ARCH_CONVERT, 1); + INT_SET(op->oh_len, ARCH_CONVERT, sizeof(magic)); + INT_SET(op->oh_clientid, ARCH_CONVERT, XFS_LOG); + INT_SET(op->oh_flags, ARCH_CONVERT, XLOG_UNMOUNT_TRANS); + INT_SET(op->oh_res2, ARCH_CONVERT, 0); + + /* and the data for this op */ + + memcpy(XFS_BUF_PTR(buf) + sizeof(xlog_op_header_t), + &magic, + sizeof(magic)); + + if (libxfs_writebuf(buf, 0)) + return -1; + + return 0; +} + +/* + * Simple I/O interface + */ + +xfs_buf_t * +libxfs_getbuf(dev_t device, xfs_daddr_t blkno, int len) +{ + xfs_buf_t *buf; + size_t total; + + total = sizeof(xfs_buf_t) + BBTOB(len); + if ((buf = calloc(total, 1)) == NULL) { + fprintf(stderr, "%s: buf calloc failed (%d bytes): %s\n", + progname, total, strerror(errno)); + exit(1); + } + /* by default, we allocate buffer directly after the header */ + buf->b_blkno = blkno; + buf->b_bcount = BBTOB(len); + buf->b_dev = device; + buf->b_addr = (char *)(&buf->b_addr + 1); /* must be last field */ +#ifdef IO_DEBUG + fprintf(stderr, "getbuf allocated %ubytes, blkno=%llu(%llu), %p\n", + BBTOB(len), BBTOOFF64(blkno), blkno, buf); +#endif + + return(buf); +} + +int +libxfs_readbufr(dev_t dev, xfs_daddr_t blkno, xfs_buf_t *buf, int len, int die) +{ + int fd = libxfs_device_to_fd(dev); + + buf->b_dev = dev; + buf->b_blkno = blkno; + ASSERT(BBTOB(len) <= buf->b_bcount); + + if (lseek64(fd, BBTOOFF64(blkno), SEEK_SET) < 0) { + fprintf(stderr, "%s: lseek64 to %llu failed: %s\n", + progname, BBTOOFF64(blkno), strerror(errno)); + ASSERT(0); + if (die) + exit(1); + return errno; + } + if (read(fd, buf->b_addr, BBTOB(len)) < 0) { + fprintf(stderr, "%s: read failed: %s\n", + progname, strerror(errno)); + if (die) + exit(1); + return errno; + } +#ifdef IO_DEBUG + fprintf(stderr, "readbufr read %ubytes, blkno=%llu(%llu), %p\n", + BBTOB(len), BBTOOFF64(blkno), blkno, buf); +#endif + return 0; +} + +xfs_buf_t * +libxfs_readbuf(dev_t dev, xfs_daddr_t blkno, int len, int die) +{ + xfs_buf_t *buf; + int error; + + buf = libxfs_getbuf(dev, blkno, len); + error = libxfs_readbufr(dev, blkno, buf, len, die); + if (error) { + libxfs_putbuf(buf); + return NULL; + } + return buf; +} + +xfs_buf_t * +libxfs_getsb(xfs_mount_t *mp, int die) +{ + return libxfs_readbuf(mp->m_dev, XFS_SB_DADDR, + XFS_FSB_TO_BB(mp, 1), die); +} + +int +libxfs_writebuf_int(xfs_buf_t *buf, int die) +{ + int sts; + int fd = libxfs_device_to_fd(buf->b_dev); + + if (lseek64(fd, BBTOOFF64(buf->b_blkno), SEEK_SET) < 0) { + fprintf(stderr, "%s: lseek64 to %llu failed: %s\n", + progname, BBTOOFF64(buf->b_blkno), strerror(errno)); + ASSERT(0); + if (die) + exit(1); + return errno; + } +#ifdef IO_DEBUG + fprintf(stderr, "writing %ubytes at blkno=%llu(%llu), %p\n", + buf->b_bcount, BBTOOFF64(buf->b_blkno), buf->b_blkno, buf); +#endif + sts = write(fd, buf->b_addr, buf->b_bcount); + if (sts < 0) { + fprintf(stderr, "%s: write failed: %s\n", + progname, strerror(errno)); + ASSERT(0); + if (die) + exit(1); + return errno; + } + else if (sts != buf->b_bcount) { + fprintf(stderr, "%s: error - wrote only %d of %d bytes\n", + progname, sts, buf->b_bcount); + if (die) + exit(1); + return EIO; + } + return 0; +} + +int +libxfs_writebuf(xfs_buf_t *buf, int die) +{ + int error = libxfs_writebuf_int(buf, die); + libxfs_putbuf(buf); + return error; +} + +void +libxfs_putbuf(xfs_buf_t *buf) +{ + if (buf != NULL) { + xfs_buf_log_item_t *bip; + extern xfs_zone_t *xfs_buf_item_zone; + + bip = XFS_BUF_FSPRIVATE(buf, xfs_buf_log_item_t *); + + if (bip) + libxfs_zone_free(xfs_buf_item_zone, bip); +#ifdef IO_DEBUG + fprintf(stderr, "putbuf released %ubytes, %p\n", + buf->b_bcount, buf); +#endif + free(buf); + buf = NULL; + } +} + + +/* + * Simple memory interface + */ + +xfs_zone_t * +libxfs_zone_init(int size, char *name) +{ + xfs_zone_t *ptr; + + if ((ptr = malloc(sizeof(xfs_zone_t))) == NULL) { + fprintf(stderr, "%s: zone init failed (%s, %d bytes): %s\n", + progname, name, sizeof(xfs_zone_t), strerror(errno)); + exit(1); + } + ptr->zone_unitsize = size; + ptr->zone_name = name; +#ifdef MEM_DEBUG + ptr->allocated = 0; + fprintf(stderr, "new zone %p for \"%s\", size=%d\n", ptr, name, size); +#endif + return ptr; +} + +void * +libxfs_zone_zalloc(xfs_zone_t *z) +{ + void *ptr; + + ASSERT(z != NULL); + if ((ptr = calloc(z->zone_unitsize, 1)) == NULL) { + fprintf(stderr, "%s: zone calloc failed (%s, %d bytes): %s\n", + progname, z->zone_name, z->zone_unitsize, + strerror(errno)); + exit(1); + } +#ifdef MEM_DEBUG + z->allocated++; + fprintf(stderr, "## zone alloc'd item %p from %s (%d bytes) (%d active)\n", + ptr, z->zone_name, z->zone_unitsize, + z->allocated); +#endif + return ptr; +} + +void +libxfs_zone_free(xfs_zone_t *z, void *ptr) +{ +#ifdef MEM_DEBUG + z->allocated--; + fprintf(stderr, "## zone freed item %p from %s (%d bytes) (%d active)\n", + ptr, z->zone_name, z->zone_unitsize, + z->allocated); +#endif + if (ptr != NULL) { + free(ptr); + ptr = NULL; + } +} + +void * +libxfs_malloc(size_t size) +{ + void *ptr; + + if ((ptr = malloc(size)) == NULL) { + fprintf(stderr, "%s: malloc failed (%d bytes): %s\n", + progname, size, strerror(errno)); + exit(1); + } +#ifdef MEM_DEBUG + fprintf(stderr, "## malloc'd item %p size %d bytes\n", + ptr, size); +#endif + return ptr; +} + +void +libxfs_free(void *ptr) +{ +#ifdef MEM_DEBUG + fprintf(stderr, "## freed item %p\n", + ptr); +#endif + if (ptr != NULL) { + free(ptr); + ptr = NULL; + } +} + +void * +libxfs_realloc(void *ptr, size_t size) +{ +#ifdef MEM_DEBUG + void *optr=ptr; +#endif + if ((ptr = realloc(ptr, size)) == NULL) { + fprintf(stderr, "%s: realloc failed (%d bytes): %s\n", + progname, size, strerror(errno)); + exit(1); + } +#ifdef MEM_DEBUG + fprintf(stderr, "## realloc'd item %p now %p size %d bytes\n", + optr, ptr, size); +#endif + return ptr; +} + + +int +libxfs_iget(xfs_mount_t *mp, xfs_trans_t *tp, xfs_ino_t ino, uint lock_flags, + xfs_inode_t **ipp, xfs_daddr_t bno) +{ + xfs_inode_t *ip; + int error; + + error = libxfs_iread(mp, tp, ino, &ip, bno); + if (error) + return error; + *ipp = ip; + return 0; +} + +void +libxfs_iput(xfs_inode_t *ip, uint lock_flags) +{ + extern xfs_zone_t *xfs_ili_zone; + extern xfs_zone_t *xfs_inode_zone; + + if (ip != NULL) { + + /* free attached inode log item */ + if (ip->i_itemp) + libxfs_zone_free(xfs_ili_zone, ip->i_itemp); + ip->i_itemp = NULL; + + libxfs_zone_free(xfs_inode_zone, ip); + ip = NULL; + } +} + +/* + * libxfs_mod_sb can be used to copy arbitrary changes to the + * in-core superblock into the superblock buffer to be logged. + * + * In user-space, we simply convert to big-endian, and write the + * the whole superblock - the in-core changes have all been made + * already. + */ +void +libxfs_mod_sb(xfs_trans_t *tp, __int64_t fields) +{ + int fd; + xfs_buf_t *bp; + xfs_mount_t *mp; + + mp = tp->t_mountp; + bp = libxfs_getbuf(mp->m_dev, XFS_SB_DADDR, 1); + libxfs_xlate_sb(XFS_BUF_PTR(bp), &mp->m_sb, -1, ARCH_CONVERT, + XFS_SB_ALL_BITS); + libxfs_writebuf(bp, 1); +} diff --git a/libxfs/trans.c b/libxfs/trans.c new file mode 100644 index 000000000..980d69a09 --- /dev/null +++ b/libxfs/trans.c @@ -0,0 +1,754 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include + +/* + * Simple transaction interface + */ + +xfs_trans_t * +libxfs_trans_alloc(xfs_mount_t *mp, int type) +{ + xfs_trans_t *ptr; + + if ((ptr = calloc(sizeof(xfs_trans_t), 1)) == NULL) { + fprintf(stderr, "%s: xact calloc failed (%d bytes): %s\n", + progname, sizeof(xfs_trans_t), strerror(errno)); + exit(1); + } + ptr->t_mountp = mp; + ptr->t_type = type; + ptr->t_items_free = XFS_LIC_NUM_SLOTS; + XFS_LIC_INIT(&(ptr->t_items)); +#ifdef XACT_DEBUG + fprintf(stderr, "allocated new transaction %p\n", ptr); +#endif + return ptr; +} + +xfs_trans_t * +libxfs_trans_dup(xfs_trans_t *tp) +{ + xfs_trans_t *ptr; + + ptr = libxfs_trans_alloc(tp->t_mountp, tp->t_type); +#ifdef XACT_DEBUG + fprintf(stderr, "duplicated transaction %p (new=%p)\n", tp, ptr); +#endif + return ptr; +} + +int +libxfs_trans_reserve(xfs_trans_t *tp, + uint blocks, uint logspace, uint rtextents, uint flags, uint logcount) +{ + xfs_sb_t *mpsb = &tp->t_mountp->m_sb; + + /* + * Attempt to reserve the needed disk blocks by decrementing + * the number needed from the number available. This will + * fail if the count would go below zero. + */ + if (blocks > 0) { + if (mpsb->sb_fdblocks < blocks) + return ENOSPC; + } + /* user space, don't need log/RT stuff (preserve the API though) */ + return 0; +} + +void +libxfs_trans_cancel(xfs_trans_t *tp, int flags) +{ +#ifdef XACT_DEBUG + xfs_trans_t *otp = tp; +#endif + if (tp != NULL) { + xfs_trans_free_items(tp, flags); + free(tp); + tp = NULL; + } +#ifdef XACT_DEBUG + fprintf(stderr, "## cancelled transaction %p\n", otp); +#endif +} + +int +libxfs_trans_iget(xfs_mount_t *mp, xfs_trans_t *tp, xfs_ino_t ino, + uint lock_flags, xfs_inode_t **ipp) +{ + int error; + xfs_inode_t *ip; + xfs_inode_log_item_t *iip; + + if (tp == NULL) + return libxfs_iread(mp, tp, ino, ipp, 0); + + error = libxfs_iread(mp, tp, ino, &ip, 0); + if (error) + return error; + ASSERT(ip != NULL); + + if (ip->i_itemp == NULL) + xfs_inode_item_init(ip, mp); + iip = ip->i_itemp; + xfs_trans_add_item(tp, (xfs_log_item_t *)(iip)); + + /* initialize i_transp so we can find it incore */ + ip->i_transp = tp; + + *ipp = ip; + return 0; +} + +void +libxfs_trans_iput(xfs_trans_t *tp, xfs_inode_t *ip, uint lock_flags) +{ + xfs_inode_log_item_t *iip; + xfs_log_item_desc_t *lidp; + + if (tp == NULL) { + libxfs_iput(ip, lock_flags); + return; + } + + ASSERT(ip->i_transp == tp); + iip = ip->i_itemp; + ASSERT(iip != NULL); + + lidp = xfs_trans_find_item(tp, (xfs_log_item_t *)iip); + ASSERT(lidp != NULL); + ASSERT(lidp->lid_item == (xfs_log_item_t *)iip); + ASSERT(!(lidp->lid_flags & XFS_LID_DIRTY)); + xfs_trans_free_item(tp, lidp); + + libxfs_iput(ip, lock_flags); +} + +void +libxfs_trans_ijoin(xfs_trans_t *tp, xfs_inode_t *ip, uint lock_flags) +{ + xfs_inode_log_item_t *iip; + + ASSERT(ip->i_transp == NULL); + if (ip->i_itemp == NULL) + xfs_inode_item_init(ip, ip->i_mount); + iip = ip->i_itemp; + ASSERT(iip->ili_flags == 0); + ASSERT(iip->ili_inode != NULL); + + xfs_trans_add_item(tp, (xfs_log_item_t *)(iip)); + + ip->i_transp = tp; +#ifdef XACT_DEBUG + fprintf(stderr, "ijoin'd inode %llu, transaction %p\n", ip->i_ino, tp); +#endif +} + +void +libxfs_trans_ihold(xfs_trans_t *tp, xfs_inode_t *ip) +{ + ASSERT(ip->i_transp == tp); + ASSERT(ip->i_itemp != NULL); + + ip->i_itemp->ili_flags |= XFS_ILI_HOLD; +#ifdef XACT_DEBUG + fprintf(stderr, "ihold'd inode %llu, transaction %p\n", ip->i_ino, tp); +#endif +} + +void +libxfs_trans_inode_alloc_buf(xfs_trans_t *tp, xfs_buf_t *bp) +{ + xfs_buf_log_item_t *bip; + + ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); + ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); + bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF; +} + +/* + * This is called to mark the fields indicated in fieldmask as needing + * to be logged when the transaction is committed. The inode must + * already be associated with the given transaction. + * + * The values for fieldmask are defined in xfs_inode_item.h. We always + * log all of the core inode if any of it has changed, and we always log + * all of the inline data/extents/b-tree root if any of them has changed. + */ +void +xfs_trans_log_inode( + xfs_trans_t *tp, + xfs_inode_t *ip, + uint flags) +{ + xfs_log_item_desc_t *lidp; + + ASSERT(ip->i_transp == tp); + ASSERT(ip->i_itemp != NULL); +#ifdef XACT_DEBUG + fprintf(stderr, "dirtied inode %llu, transaction %p\n", ip->i_ino, tp); +#endif + + lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(ip->i_itemp)); + ASSERT(lidp != NULL); + + tp->t_flags |= XFS_TRANS_DIRTY; + lidp->lid_flags |= XFS_LID_DIRTY; + + /* + * Always OR in the bits from the ili_last_fields field. + * This is to coordinate with the xfs_iflush() and xfs_iflush_done() + * routines in the eventual clearing of the ilf_fields bits. + * See the big comment in xfs_iflush() for an explanation of + * this coordination mechanism. + */ + flags |= ip->i_itemp->ili_last_fields; + ip->i_itemp->ili_format.ilf_fields |= flags; +} + +/* + * This is called to mark bytes first through last inclusive of the given + * buffer as needing to be logged when the transaction is committed. + * The buffer must already be associated with the given transaction. + * + * First and last are numbers relative to the beginning of this buffer, + * so the first byte in the buffer is numbered 0 regardless of the + * value of b_blkno. + */ +void +libxfs_trans_log_buf(xfs_trans_t *tp, xfs_buf_t *bp, uint first, uint last) +{ + xfs_buf_log_item_t *bip; + xfs_log_item_desc_t *lidp; + + ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); + ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); + ASSERT((first <= last) && (last < XFS_BUF_COUNT(bp))); +#ifdef XACT_DEBUG + fprintf(stderr, "dirtied buffer %p, transaction %p\n", bp, tp); +#endif + + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); + + lidp = xfs_trans_find_item(tp, (xfs_log_item_t *)bip); + ASSERT(lidp != NULL); + + tp->t_flags |= XFS_TRANS_DIRTY; + lidp->lid_flags |= XFS_LID_DIRTY; + xfs_buf_item_log(bip, first, last); +} + +void +libxfs_trans_brelse(xfs_trans_t *tp, xfs_buf_t *bp) +{ + xfs_buf_log_item_t *bip; + xfs_log_item_desc_t *lidp; +#ifdef XACT_DEBUG + fprintf(stderr, "released buffer %p, transaction %p\n", bp, tp); +#endif + + if (tp == NULL) { + ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL); + libxfs_putbuf(bp); + return; + } + ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); + ASSERT(bip->bli_item.li_type == XFS_LI_BUF); + lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip); + ASSERT(lidp != NULL); + if (bip->bli_recur > 0) { + bip->bli_recur--; + return; + } + /* If dirty, can't release till transaction committed */ + if (lidp->lid_flags & XFS_LID_DIRTY) { + return; + } + xfs_trans_free_item(tp, lidp); + if (bip->bli_flags & XFS_BLI_HOLD) { + bip->bli_flags &= ~XFS_BLI_HOLD; + } + XFS_BUF_SET_FSPRIVATE2(bp, NULL); + libxfs_putbuf(bp); +} + +void +libxfs_trans_binval(xfs_trans_t *tp, xfs_buf_t *bp) +{ + xfs_log_item_desc_t *lidp; + xfs_buf_log_item_t *bip; +#ifdef XACT_DEBUG + fprintf(stderr, "binval'd buffer %p, transaction %p\n", bp, tp); +#endif + + ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); + ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); + + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); + lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip); + ASSERT(lidp != NULL); + bip->bli_flags &= ~(XFS_BLI_DIRTY); + bip->bli_format.blf_flags &= ~XFS_BLI_INODE_BUF; + bip->bli_format.blf_flags |= XFS_BLI_CANCEL; + lidp->lid_flags |= XFS_LID_DIRTY; + tp->t_flags |= XFS_TRANS_DIRTY; +} + +void +libxfs_trans_bjoin(xfs_trans_t *tp, xfs_buf_t *bp) +{ + xfs_buf_log_item_t *bip; + + ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL); +#ifdef XACT_DEBUG + fprintf(stderr, "bjoin'd buffer %p, transaction %p\n", bp, tp); +#endif + + xfs_buf_item_init(bp, tp->t_mountp); + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); + xfs_trans_add_item(tp, (xfs_log_item_t *)bip); + XFS_BUF_SET_FSPRIVATE2(bp, tp); +} + +void +libxfs_trans_bhold(xfs_trans_t *tp, xfs_buf_t *bp) +{ + xfs_buf_log_item_t *bip; + + ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); + ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); +#ifdef XACT_DEBUG + fprintf(stderr, "bhold'd buffer %p, transaction %p\n", bp, tp); +#endif + + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); + bip->bli_flags |= XFS_BLI_HOLD; +} + +xfs_buf_t * +libxfs_trans_get_buf(xfs_trans_t *tp, dev_t dev, xfs_daddr_t d, int len, uint f) +{ + xfs_buf_t *bp; + xfs_buf_log_item_t *bip; + buftarg_t bdev = { dev }; + + if (tp == NULL) + return libxfs_getbuf(dev, d, len); + + if (tp->t_items.lic_next == NULL) + bp = xfs_trans_buf_item_match(tp, &bdev, d, len); + else + bp = xfs_trans_buf_item_match_all(tp, &bdev, d, len); + if (bp != NULL) { + ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); + ASSERT(bip != NULL); + bip->bli_recur++; + return bp; + } + + bp = libxfs_getbuf(dev, d, len); + if (bp == NULL) + return NULL; +#ifdef XACT_DEBUG + fprintf(stderr, "trans_get_buf buffer %p, transaction %p\n", bp, tp); +#endif + + xfs_buf_item_init(bp, tp->t_mountp); + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); + bip->bli_recur = 0; + xfs_trans_add_item(tp, (xfs_log_item_t *)bip); + + /* initialize b_fsprivate2 so we can find it incore */ + XFS_BUF_SET_FSPRIVATE2(bp, tp); + return bp; +} + +int +libxfs_trans_read_buf(xfs_mount_t *mp, xfs_trans_t *tp, dev_t dev, + xfs_daddr_t blkno, int len, uint f, xfs_buf_t **bpp) +{ + xfs_buf_t *bp; + xfs_buf_log_item_t *bip; + int error; + buftarg_t bdev = { dev }; + + if (tp == NULL) { + bp = libxfs_getbuf(mp->m_dev, blkno, len); + error = libxfs_readbufr(dev, blkno, bp, len, 0); + *bpp = bp; + return error; + } + + if (tp->t_items.lic_next == NULL) + bp = xfs_trans_buf_item_match(tp, &bdev, blkno, len); + else + bp = xfs_trans_buf_item_match_all(tp, &bdev, blkno, len); + if (bp != NULL) { + ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); + ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); + bip->bli_recur++; + *bpp = bp; + return 0; + } + + bp = libxfs_getbuf(mp->m_dev, blkno, len); + error = libxfs_readbufr(dev, blkno, bp, len, 0); + if (error) { + *bpp = NULL; + return error; + } +#ifdef XACT_DEBUG + fprintf(stderr, "trans_read_buf buffer %p, transaction %p\n", bp, tp); +#endif + + xfs_buf_item_init(bp, tp->t_mountp); + bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); + bip->bli_recur = 0; + xfs_trans_add_item(tp, (xfs_log_item_t *)bip); + + /* initialise b_fsprivate2 so we can find it incore */ + XFS_BUF_SET_FSPRIVATE2(bp, tp); + *bpp = bp; + return 0; +} + +/* + * Record the indicated change to the given field for application + * to the file system's superblock when the transaction commits. + * For now, just store the change in the transaction structure. + * Mark the transaction structure to indicate that the superblock + * needs to be updated before committing. + * + * Originally derived from xfs_trans_mod_sb(). + */ +void +libxfs_trans_mod_sb(xfs_trans_t *tp, uint field, long delta) +{ + switch (field) { + case XFS_TRANS_SB_RES_FDBLOCKS: + return; + case XFS_TRANS_SB_FDBLOCKS: + tp->t_fdblocks_delta += delta; + break; + case XFS_TRANS_SB_ICOUNT: + ASSERT(delta > 0); + tp->t_icount_delta += delta; + break; + case XFS_TRANS_SB_IFREE: + tp->t_ifree_delta += delta; + break; + case XFS_TRANS_SB_FREXTENTS: + tp->t_frextents_delta += delta; + break; + default: + ASSERT(0); + return; + } + tp->t_flags |= (XFS_TRANS_SB_DIRTY | XFS_TRANS_DIRTY); +} + + +/* + * Transaction commital code follows (i.e. write to disk in libxfs) + */ + +STATIC void +inode_item_done(xfs_inode_log_item_t *iip) +{ + xfs_dinode_t *dip; + xfs_inode_t *ip; + xfs_mount_t *mp; + xfs_buf_t *bp; + int hold; + int error; + extern xfs_zone_t *xfs_ili_zone; + + ip = iip->ili_inode; + mp = iip->ili_item.li_mountp; + hold = iip->ili_flags & XFS_ILI_HOLD; + ASSERT(ip != NULL); + + if (!(iip->ili_format.ilf_fields & XFS_ILOG_ALL)) { + ip->i_transp = NULL; /* disassociate from transaction */ + iip->ili_flags = 0; /* reset all flags */ + if (!hold) + goto ili_done; + return; + } + + /* + * Get the buffer containing the on-disk inode. + */ + error = libxfs_itobp(mp, NULL, ip, &dip, &bp, 0); + if (error) { + fprintf(stderr, "%s: warning - itobp failed (%d)\n", + progname, error); + goto ili_done; + } + + XFS_BUF_SET_FSPRIVATE(bp, iip); + error = libxfs_iflush_int(ip, bp); + if (error) { + fprintf(stderr, "%s: warning - iflush_int failed (%d)\n", + progname, error); + goto ili_done; + } + + ip->i_transp = NULL; /* disassociate from transaction */ + XFS_BUF_SET_FSPRIVATE(bp, NULL); /* remove log item */ + XFS_BUF_SET_FSPRIVATE2(bp, NULL); /* remove xact ptr */ + libxfs_writebuf_int(bp, 0); +#ifdef XACT_DEBUG + fprintf(stderr, "flushing dirty inode %llu, buffer %p (hold=%u)\n", + ip->i_ino, bp, hold); +#endif + if (hold) { + iip->ili_flags &= ~XFS_ILI_HOLD; + return; + } + else { + /*libxfs_iput(iip->ili_inode, 0); - nathans TODO? */ + libxfs_putbuf(bp); + } + +ili_done: + if (ip->i_itemp) + kmem_zone_free(xfs_ili_zone, ip->i_itemp); + else + ASSERT(0); + ip->i_itemp = NULL; +} + +STATIC void +buf_item_done(xfs_buf_log_item_t *bip) +{ + extern xfs_zone_t *xfs_buf_item_zone; + xfs_buf_t *bp; + int hold; + + bp = bip->bli_buf; + ASSERT(bp != NULL); + XFS_BUF_SET_FSPRIVATE(bp, NULL); /* remove log item */ + XFS_BUF_SET_FSPRIVATE2(bp, NULL); /* remove xact ptr */ + + hold = (bip->bli_flags & XFS_BLI_HOLD); + if (bip->bli_flags & XFS_BLI_DIRTY) { +#ifdef XACT_DEBUG + fprintf(stderr, "flushing dirty buffer %p (hold=%d)\n", + bp, hold); +#endif + libxfs_writebuf_int(bp, 0); + if (hold) + bip->bli_flags &= ~XFS_BLI_HOLD; + else + libxfs_putbuf(bp); + } + /* release the buf item */ + kmem_zone_free(xfs_buf_item_zone, bip); +} + +/* + * This is called to perform the commit processing for each + * item described by the given chunk. + */ +static void +trans_chunk_committed(xfs_log_item_chunk_t *licp) +{ + xfs_log_item_desc_t *lidp; + xfs_log_item_t *lip; + int i; + + lidp = licp->lic_descs; + for (i = 0; i < licp->lic_unused; i++, lidp++) { + if (XFS_LIC_ISFREE(licp, i)) + continue; + lip = lidp->lid_item; + if (lip->li_type == XFS_LI_BUF) + buf_item_done((xfs_buf_log_item_t *)lidp->lid_item); + else if (lip->li_type == XFS_LI_INODE) + inode_item_done((xfs_inode_log_item_t *)lidp->lid_item); + else { + fprintf(stderr, "%s: unrecognised log item type\n", + progname); + ASSERT(0); + } + } +} + +/* + * Calls trans_chunk_committed() to process the items in each chunk. + */ +static void +trans_committed(xfs_trans_t *tp) +{ + xfs_log_item_chunk_t *licp; + xfs_log_item_chunk_t *next_licp; + + /* + * Special case the chunk embedded in the transaction. + */ + licp = &(tp->t_items); + if (!(XFS_LIC_ARE_ALL_FREE(licp))) { + trans_chunk_committed(licp); + } + + /* + * Process the items in each chunk in turn. + */ + licp = licp->lic_next; + while (licp != NULL) { + trans_chunk_committed(licp); + next_licp = licp->lic_next; + kmem_free(licp, sizeof(xfs_log_item_chunk_t)); + licp = next_licp; + } +} + +/* + * Unlock each item pointed to by a descriptor in the given chunk. + * Free descriptors pointing to items which are not dirty if freeing_chunk + * is zero. If freeing_chunk is non-zero, then we need to unlock all + * items in the chunk. Return the number of descriptors freed. + * Originally based on xfs_trans_unlock_chunk() - adapted for libxfs + * transactions though. + */ +int +xfs_trans_unlock_chunk( + xfs_log_item_chunk_t *licp, + int freeing_chunk, + int abort, + xfs_lsn_t commit_lsn) /* nb: unused */ +{ + xfs_log_item_desc_t *lidp; + xfs_log_item_t *lip; + int i; + int freed; + + freed = 0; + lidp = licp->lic_descs; + for (i = 0; i < licp->lic_unused; i++, lidp++) { + if (XFS_LIC_ISFREE(licp, i)) { + continue; + } + lip = lidp->lid_item; + lip->li_desc = NULL; + + /* + * Disassociate the logged item from this transaction + */ + if (lip->li_type == XFS_LI_BUF) { + xfs_buf_log_item_t *bip; + + bip = (xfs_buf_log_item_t *)lidp->lid_item; + XFS_BUF_SET_FSPRIVATE2(bip->bli_buf, NULL); + bip->bli_flags &= ~XFS_BLI_HOLD; + } + else if (lip->li_type == XFS_LI_INODE) { + xfs_inode_log_item_t *iip; + + iip = (xfs_inode_log_item_t*)lidp->lid_item; + iip->ili_inode->i_transp = NULL; + iip->ili_flags &= ~XFS_ILI_HOLD; + } + else { + fprintf(stderr, "%s: unrecognised log item type\n", + progname); + ASSERT(0); + } + + /* + * Free the descriptor if the item is not dirty + * within this transaction and the caller is not + * going to just free the entire thing regardless. + */ + if (!(freeing_chunk) && + (!(lidp->lid_flags & XFS_LID_DIRTY) || abort)) { + XFS_LIC_RELSE(licp, i); + freed++; + } + } + + return (freed); +} + + +/* + * Commit the changes represented by this transaction + */ +int +libxfs_trans_commit(xfs_trans_t *tp, uint flags, xfs_lsn_t *commit_lsn_p) +{ + xfs_sb_t *sbp; + int error; + + if (tp == NULL) + return 0; + + if (!(tp->t_flags & XFS_TRANS_DIRTY)) { +#ifdef XACT_DEBUG + fprintf(stderr, "committed clean transaction %p\n", tp); +#endif + xfs_trans_free_items(tp, flags); + free(tp); + tp = NULL; + return 0; + } + + if (tp->t_flags & XFS_TRANS_SB_DIRTY) { + sbp = &(tp->t_mountp->m_sb); + if (tp->t_icount_delta) + sbp->sb_icount += tp->t_icount_delta; + if (tp->t_ifree_delta) + sbp->sb_ifree += tp->t_ifree_delta; + if (tp->t_fdblocks_delta) + sbp->sb_fdblocks += tp->t_fdblocks_delta; + if (tp->t_frextents_delta) + sbp->sb_frextents += tp->t_frextents_delta; + libxfs_mod_sb(tp, XFS_SB_ALL_BITS); + } + +#ifdef XACT_DEBUG + fprintf(stderr, "committing dirty transaction %p\n", tp); +#endif + trans_committed(tp); + + /* That's it for the transaction structure. Free it. */ + free(tp); + tp = NULL; + return 0; +} diff --git a/libxfs/util.c b/libxfs/util.c new file mode 100644 index 000000000..44222c30e --- /dev/null +++ b/libxfs/util.c @@ -0,0 +1,735 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include + +/* + * Wrapper around call to libxfs_ialloc. Takes care of committing and + * allocating a new transaction as needed. + * + * Originally there were two copies of this code - one in mkfs, the + * other in repair - now there is just the one. + */ +int +libxfs_inode_alloc( + xfs_trans_t **tp, + xfs_inode_t *pip, + mode_t mode, + ushort nlink, + dev_t rdev, + cred_t *cr, + xfs_inode_t **ipp) +{ + boolean_t call_again; + int i; + xfs_buf_t *ialloc_context; + xfs_inode_t *ip; + xfs_trans_t *ntp; + int error; + + call_again = B_FALSE; + ialloc_context = (xfs_buf_t *)0; + error = libxfs_ialloc(*tp, pip, mode, nlink, rdev, cr, (xfs_prid_t) 0, + 1, &ialloc_context, &call_again, &ip); + if (error) { + return error; + } + if (call_again) { + xfs_trans_bhold(*tp, ialloc_context); + ntp = xfs_trans_dup(*tp); + xfs_trans_commit(*tp, 0, NULL); + *tp = ntp; + if ((i = xfs_trans_reserve(*tp, 0, 0, 0, 0, 0))) { + fprintf(stderr, "%s: cannot reserve space: %s\n", + progname, strerror(errno)); + exit(1); + } + xfs_trans_bjoin(*tp, ialloc_context); + error = libxfs_ialloc(*tp, pip, mode, nlink, rdev, cr, + (xfs_prid_t) 0, 1, &ialloc_context, + &call_again, &ip); + if (error) { + return error; + } + } + *ipp = ip; + ASSERT(ip); + return error; +} + +/* + * Change the requested timestamp in the given inode. + * + * This was once shared with the kernel, but has diverged to the point + * where its no longer worth the hassle of maintaining common code. + */ +void +libxfs_ichgtime(xfs_inode_t *ip, int flags) +{ + struct timespec tv; + struct timeval stv; + + gettimeofday(&stv, (struct timezone *)0); + tv.tv_sec = stv.tv_sec; + tv.tv_nsec = stv.tv_usec * 1000; + if (flags & XFS_ICHGTIME_MOD) { + ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec; + ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec; + } + if (flags & XFS_ICHGTIME_ACC) { + ip->i_d.di_atime.t_sec = (__int32_t)tv.tv_sec; + ip->i_d.di_atime.t_nsec = (__int32_t)tv.tv_nsec; + } + if (flags & XFS_ICHGTIME_CHG) { + ip->i_d.di_ctime.t_sec = (__int32_t)tv.tv_sec; + ip->i_d.di_ctime.t_nsec = (__int32_t)tv.tv_nsec; + } +} + +/* + * Allocate an inode on disk and return a copy of it's in-core version. + * Set mode, nlink, and rdev appropriately within the inode. + * The uid and gid for the inode are set according to the contents of + * the given cred structure. + * + * This was once shared with the kernel, but has diverged to the point + * where its no longer worth the hassle of maintaining common code. + */ +int +libxfs_ialloc( + xfs_trans_t *tp, + xfs_inode_t *pip, + mode_t mode, + nlink_t nlink, + dev_t rdev, + cred_t *cr, + xfs_prid_t prid, + int okalloc, + xfs_buf_t **ialloc_context, + boolean_t *call_again, + xfs_inode_t **ipp) +{ + xfs_ino_t ino; + xfs_inode_t *ip; + uint flags; + int error; + + /* + * Call the space management code to pick + * the on-disk inode to be allocated. + */ + error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc, + ialloc_context, call_again, &ino); + if (error != 0) + return error; + if (*call_again || ino == NULLFSINO) { + *ipp = NULL; + return 0; + } + ASSERT(*ialloc_context == NULL); + + error = xfs_trans_iget(tp->t_mountp, tp, ino, 0, &ip); + if (error != 0) + return error; + ASSERT(ip != NULL); + + ip->i_d.di_mode = (__uint16_t)mode; + ip->i_d.di_onlink = 0; + ip->i_d.di_nlink = nlink; + ASSERT(ip->i_d.di_nlink == nlink); + ip->i_d.di_uid = cr->cr_uid; + ip->i_d.di_gid = cr->cr_gid; + ip->i_d.di_projid = prid; + bzero(&(ip->i_d.di_pad[0]), sizeof(ip->i_d.di_pad)); + + /* + * If the superblock version is up to where we support new format + * inodes and this is currently an old format inode, then change + * the inode version number now. This way we only do the conversion + * here rather than here and in the flush/logging code. + */ + if (XFS_SB_VERSION_HASNLINK(&tp->t_mountp->m_sb) && + ip->i_d.di_version == XFS_DINODE_VERSION_1) { + ip->i_d.di_version = XFS_DINODE_VERSION_2; + /* old link count, projid field, pad field already zeroed */ + } + + ip->i_d.di_size = 0; + ip->i_d.di_nextents = 0; + ASSERT(ip->i_d.di_nblocks == 0); + xfs_ichgtime(ip, XFS_ICHGTIME_CHG|XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD); + /* + * di_gen will have been taken care of in xfs_iread. + */ + ip->i_d.di_extsize = 0; + ip->i_d.di_dmevmask = 0; + ip->i_d.di_dmstate = 0; + ip->i_d.di_flags = 0; + flags = XFS_ILOG_CORE; + switch (mode & IFMT) { + case IFIFO: + case IFCHR: + case IFBLK: + case IFSOCK: + ip->i_d.di_format = XFS_DINODE_FMT_DEV; + ip->i_df.if_u2.if_rdev = makedev(major(rdev), minor(rdev)); ip->i_df.if_flags = 0; + flags |= XFS_ILOG_DEV; + break; + case IFREG: + case IFDIR: + case IFLNK: + ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; + ip->i_df.if_flags = XFS_IFEXTENTS; + ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0; + ip->i_df.if_u1.if_extents = NULL; + break; + default: + ASSERT(0); + } + /* Attribute fork settings for new inode. */ + ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; + ip->i_d.di_anextents = 0; + + /* + * Log the new values stuffed into the inode. + */ + xfs_trans_log_inode(tp, ip, flags); + *ipp = ip; + return 0; +} + +void +libxfs_iprint(xfs_inode_t *ip) +{ + xfs_dinode_core_t *dip; + xfs_bmbt_rec_t *ep; + xfs_extnum_t i; + xfs_extnum_t nextents; + + printf("Inode %p\n", ip); + printf(" i_dev %x\n", (uint)ip->i_dev); + printf(" i_ino %Lx\n", ip->i_ino); + + if (ip->i_df.if_flags & XFS_IFEXTENTS) + printf("EXTENTS "); + printf("\n"); + printf(" i_df.if_bytes %d\n", ip->i_df.if_bytes); + printf(" i_df.if_u1.if_extents/if_data %p\n", ip->i_df.if_u1.if_extents); + if (ip->i_df.if_flags & XFS_IFEXTENTS) { + nextents = ip->i_df.if_bytes / (uint)sizeof(*ep); + for (ep = ip->i_df.if_u1.if_extents, i = 0; i < nextents; i++, ep++) { + xfs_bmbt_irec_t rec; + + xfs_bmbt_get_all(ep, &rec); + printf("\t%d: startoff %Lu, startblock 0x%Lx," + " blockcount %Lu, state %d\n", + i, (xfs_dfiloff_t)rec.br_startoff, + (xfs_dfsbno_t)rec.br_startblock, + (xfs_dfilblks_t)rec.br_blockcount, + (int)rec.br_state); + } + } + printf(" i_df.if_broot %p\n", ip->i_df.if_broot); + printf(" i_df.if_broot_bytes %x\n", ip->i_df.if_broot_bytes); + + dip = &(ip->i_d); + printf("\nOn disk portion\n"); + printf(" di_magic %x\n", dip->di_magic); + printf(" di_mode %o\n", dip->di_mode); + printf(" di_version %x\n", (uint)dip->di_version); + switch (ip->i_d.di_format) { + case XFS_DINODE_FMT_LOCAL: + printf(" Inline inode\n"); + break; + case XFS_DINODE_FMT_EXTENTS: + printf(" Extents inode\n"); + break; + case XFS_DINODE_FMT_BTREE: + printf(" B-tree inode\n"); + break; + default: + printf(" Other inode\n"); + break; + } + printf(" di_nlink %x\n", dip->di_nlink); + printf(" di_uid %d\n", dip->di_uid); + printf(" di_gid %d\n", dip->di_gid); + printf(" di_nextents %d\n", dip->di_nextents); + printf(" di_size %Ld\n", dip->di_size); + printf(" di_gen %x\n", dip->di_gen); + printf(" di_extsize %d\n", dip->di_extsize); + printf(" di_flags %x\n", dip->di_flags); + printf(" di_nblocks %Ld\n", dip->di_nblocks); +} + +/* + * Writes a modified inode's changes out to the inode's on disk home. + * Originally based on xfs_iflush_int() from xfs_inode.c in the kernel. + */ +int +libxfs_iflush_int(xfs_inode_t *ip, xfs_buf_t *bp) +{ + xfs_inode_log_item_t *iip; + xfs_dinode_t *dip; + xfs_mount_t *mp; + + ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); + ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || + ip->i_d.di_nextents > ip->i_df.if_ext_max); + + iip = ip->i_itemp; + mp = ip->i_mount; + + /* set *dip = inode's place in the buffer */ + dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_boffset); + +#ifdef DEBUG + ASSERT(ip->i_d.di_magic == XFS_DINODE_MAGIC); + if ((ip->i_d.di_mode & IFMT) == IFREG) { + ASSERT( (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS) || + (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) ); + } + else if ((ip->i_d.di_mode & IFMT) == IFDIR) { + ASSERT( (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS) || + (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) || + (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) ); + } + ASSERT(ip->i_d.di_nextents+ip->i_d.di_anextents <= ip->i_d.di_nblocks); + ASSERT(ip->i_d.di_forkoff <= mp->m_sb.sb_inodesize); +#endif + + /* + * Copy the dirty parts of the inode into the on-disk + * inode. We always copy out the core of the inode, + * because if the inode is dirty at all the core must + * be. + */ + xfs_xlate_dinode_core((xfs_caddr_t)&(dip->di_core), &(ip->i_d), -1, + ARCH_CONVERT); + /* + * If this is really an old format inode and the superblock version + * has not been updated to support only new format inodes, then + * convert back to the old inode format. If the superblock version + * has been updated, then make the conversion permanent. + */ + ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1 || + XFS_SB_VERSION_HASNLINK(&mp->m_sb)); + if (ip->i_d.di_version == XFS_DINODE_VERSION_1) { + if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) { + /* + * Convert it back. + */ + ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); + INT_SET(dip->di_core.di_onlink, ARCH_CONVERT, + ip->i_d.di_nlink); + } else { + /* + * The superblock version has already been bumped, + * so just make the conversion to the new inode + * format permanent. + */ + ip->i_d.di_version = XFS_DINODE_VERSION_2; + INT_SET(dip->di_core.di_version, ARCH_CONVERT, + XFS_DINODE_VERSION_2); + ip->i_d.di_onlink = 0; + INT_ZERO(dip->di_core.di_onlink, ARCH_CONVERT); + bzero(&(ip->i_d.di_pad[0]), sizeof(ip->i_d.di_pad)); + bzero(&(dip->di_core.di_pad[0]), + sizeof(dip->di_core.di_pad)); + ASSERT(ip->i_d.di_projid == 0); + } + } + + if (xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp) == EFSCORRUPTED) + return EFSCORRUPTED; + if (XFS_IFORK_Q(ip)) { + /* The only error from xfs_iflush_fork is on the data fork. */ + xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp); + } + + return 0; +} + +/* + * Given a block number in a fork, return the next valid block number + * (not a hole). + * If this is the last block number then NULLFILEOFF is returned. + * + * This was originally in the kernel, but only used in xfs_repair. + */ +int +libxfs_bmap_next_offset( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *ip, /* incore inode */ + xfs_fileoff_t *bnop, /* current block */ + int whichfork) /* data or attr fork */ +{ + xfs_fileoff_t bno; /* current block */ + int eof; /* hit end of file */ + int error; /* error return value */ + xfs_bmbt_irec_t got; /* current extent value */ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_extnum_t lastx; /* last extent used */ + xfs_bmbt_irec_t prev; /* previous extent value */ + + if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL) + return XFS_ERROR(EIO); + if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { + *bnop = NULLFILEOFF; + return 0; + } + ifp = XFS_IFORK_PTR(ip, whichfork); + if (!(ifp->if_flags & XFS_IFEXTENTS) && + (error = xfs_iread_extents(tp, ip, whichfork))) + return error; + bno = *bnop + 1; + xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, &prev); + if (eof) + *bnop = NULLFILEOFF; + else + *bnop = got.br_startoff < bno ? bno : got.br_startoff; + return 0; +} + +/* + * Like xfs_dir_removename, but only for removing entries with + * (name, hashvalue) pairs that may not be consistent (hashvalue + * may not be correctly set for the name). + * + * This was originally in the kernel, but only used in xfs_repair. + */ +int +xfs_dir_bogus_removename(xfs_trans_t *trans, xfs_inode_t *dp, char *name, + xfs_fsblock_t *firstblock, xfs_bmap_free_t *flist, + xfs_extlen_t total, xfs_dahash_t hashval, int namelen) +{ + xfs_da_args_t args; + int count, totallen, newsize, retval; + + ASSERT((dp->i_d.di_mode & IFMT) == IFDIR); + if (namelen >= MAXNAMELEN) { + return EINVAL; + } + + /* + * Fill in the arg structure for this request. + */ + args.name = name; + args.namelen = namelen; + args.hashval = hashval; + args.inumber = 0; + args.dp = dp; + args.firstblock = firstblock; + args.flist = flist; + args.total = total; + args.whichfork = XFS_DATA_FORK; + args.trans = trans; + args.justcheck = args.addname = 0; + args.oknoent = 1; + + /* + * Decide on what work routines to call based on the inode size. + */ + if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { + retval = xfs_dir_shortform_removename(&args); + } else if (xfs_bmap_one_block(dp, XFS_DATA_FORK)) { + retval = xfs_dir_leaf_removename(&args, &count, &totallen); + if (retval == 0) { + newsize = XFS_DIR_SF_ALLFIT(count, totallen); + if (newsize <= XFS_IFORK_DSIZE(dp)) { + retval = xfs_dir_leaf_to_shortform(&args); + } + } + } else { + retval = xfs_dir_node_removename(&args); + } + return(retval); +} + +/* + * Like xfs_dir_removename, but only for removing entries with + * (name, hashvalue) pairs that may not be consistent (hashvalue + * may not be correctly set for the name). + * + * This was originally in the kernel, but only used in xfs_repair. + */ +int +xfs_dir2_bogus_removename( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *dp, /* incore directory inode */ + char *name, /* name of entry to remove */ + xfs_fsblock_t *first, /* bmap's firstblock */ + xfs_bmap_free_t *flist, /* bmap's freeblock list */ + xfs_extlen_t total, /* bmap's total block count */ + xfs_dahash_t hash, /* name's real hash value */ + int namelen) /* entry's name length */ +{ + xfs_da_args_t args; /* operation arguments */ + int rval; /* return value */ + int v; /* type-checking value */ + + ASSERT((dp->i_d.di_mode & IFMT) == IFDIR); + if (namelen >= MAXNAMELEN) + return EINVAL; + + /* + * Fill in the arg structure for this request. + */ + args.name = name; + args.namelen = namelen; + args.hashval = hash; + args.inumber = 0; + args.dp = dp; + args.firstblock = first; + args.flist = flist; + args.total = total; + args.whichfork = XFS_DATA_FORK; + args.trans = tp; + args.justcheck = args.addname = 0; + args.oknoent = 1; + + /* + * Decide on what work routines to call based on the inode size. + */ + if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) + rval = xfs_dir2_sf_removename(&args); + else if (rval = xfs_dir2_isblock(tp, dp, &v)) + return rval; + else if (v) + rval = xfs_dir2_block_removename(&args); + else if (rval = xfs_dir2_isleaf(tp, dp, &v)) + return rval; + else if (v) + rval = xfs_dir2_leaf_removename(&args); + else + rval = xfs_dir2_node_removename(&args); + return rval; +} + +/* + * Utility routine common used to apply a delta to a field in the + * in-core superblock. + * Switch on the field indicated and apply the delta to that field. + * Fields are not allowed to dip below zero, so if the delta would + * do this do not apply it and return EINVAL. + * + * Originally derived from xfs_mod_incore_sb(). + */ +int +libxfs_mod_incore_sb(xfs_mount_t *mp, xfs_sb_field_t field, int delta, int rsvd) +{ + long long lcounter; /* long counter for 64 bit fields */ + + switch (field) { + case XFS_SBS_FDBLOCKS: + lcounter = (long long)mp->m_sb.sb_fdblocks; + lcounter += delta; + if (lcounter < 0) + return (XFS_ERROR(ENOSPC)); + mp->m_sb.sb_fdblocks = lcounter; + break; + default: + ASSERT(0); + } + return 0; +} + +int +libxfs_bmap_finish( + xfs_trans_t **tp, + xfs_bmap_free_t *flist, + xfs_fsblock_t firstblock, + int *committed) +{ + xfs_bmap_free_item_t *free; /* free extent list item */ + xfs_bmap_free_item_t *next; /* next item on free list */ + int error; + xfs_trans_t *ntp; + + if (flist->xbf_count == 0) { + *committed = 0; + return 0; + } + + for (free = flist->xbf_first; free != NULL; free = next) { + next = free->xbfi_next; + if (error = xfs_free_extent(*tp, free->xbfi_startblock, + free->xbfi_blockcount)) + return error; + xfs_bmap_del_free(flist, NULL, free); + } + return 0; +} + +/* + * This routine allocates disk space for the given file. + * Originally derived from xfs_alloc_file_space(). + */ +int +libxfs_alloc_file_space( + xfs_inode_t *ip, + xfs_off_t offset, + xfs_off_t len, + int alloc_type, + int attr_flags) +{ + xfs_mount_t *mp; + xfs_off_t count; + xfs_filblks_t datablocks; + xfs_filblks_t allocated_fsb; + xfs_filblks_t allocatesize_fsb; + xfs_fsblock_t firstfsb; + xfs_bmap_free_t free_list; + xfs_bmbt_irec_t *imapp; + xfs_bmbt_irec_t imaps[1]; + int reccount; + uint resblks; + xfs_fileoff_t startoffset_fsb; + xfs_trans_t *tp; + int xfs_bmapi_flags; + int committed; + int error; + + if (len <= 0) + return EINVAL; + + count = len; + error = 0; + imapp = &imaps[0]; + reccount = 1; + xfs_bmapi_flags = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0); + mp = ip->i_mount; + startoffset_fsb = XFS_B_TO_FSBT(mp, offset); + allocatesize_fsb = XFS_B_TO_FSB(mp, count); + + /* allocate file space until done or until there is an error */ + while (allocatesize_fsb && !error) { + datablocks = allocatesize_fsb; + + tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); + resblks = (uint)XFS_DIOSTRAT_SPACE_RES(mp, datablocks); + error = xfs_trans_reserve(tp, resblks, 0, 0, 0, 0); + if (error) + break; + xfs_trans_ijoin(tp, ip, 0); + xfs_trans_ihold(tp, ip); + + XFS_BMAP_INIT(&free_list, &firstfsb); + error = xfs_bmapi(tp, ip, startoffset_fsb, allocatesize_fsb, + xfs_bmapi_flags, &firstfsb, 0, imapp, + &reccount, &free_list); + if (error) + break; + + /* complete the transaction */ + error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed); + if (error) + break; + + error = xfs_trans_commit(tp, 0, NULL); + if (error) + break; + + allocated_fsb = imapp->br_blockcount; + if (reccount == 0) + return ENOSPC; + + startoffset_fsb += allocated_fsb; + allocatesize_fsb -= allocated_fsb; + } + return error; +} + +unsigned int +libxfs_log2_roundup(unsigned int i) +{ + unsigned int rval; + + for (rval = 0; rval < NBBY * sizeof(i); rval++) { + if ((1 << rval) >= i) + break; + } + return rval; +} + +/* + * Get a buffer for the dir/attr block, fill in the contents. + * Don't check magic number, the caller will (it's xfs_repair). + * + * Originally from xfs_da_btree.c in the kernel, but only used + * in userspace so it now resides here. + */ +int +libxfs_da_read_bufr( + xfs_trans_t *trans, + xfs_inode_t *dp, + xfs_dablk_t bno, + xfs_daddr_t mappedbno, + xfs_dabuf_t **bpp, + int whichfork) +{ + return libxfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 2, + (inst_t *)__return_address); +} + +/* + * Hold dabuf at transaction commit. + * + * Originally from xfs_da_btree.c in the kernel, but only used + * in userspace so it now resides here. + */ +void +libxfs_da_bhold(xfs_trans_t *tp, xfs_dabuf_t *dabuf) +{ + int i; + + for (i = 0; i < dabuf->nbuf; i++) + xfs_trans_bhold(tp, dabuf->bps[i]); +} + +/* + * Join dabuf to transaction. + * + * Originally from xfs_da_btree.c in the kernel, but only used + * in userspace so it now resides here. + */ +void +libxfs_da_bjoin(xfs_trans_t *tp, xfs_dabuf_t *dabuf) +{ + int i; + + for (i = 0; i < dabuf->nbuf; i++) + xfs_trans_bjoin(tp, dabuf->bps[i]); +} diff --git a/libxfs/xfs.h b/libxfs/xfs.h new file mode 100644 index 000000000..d702a385e --- /dev/null +++ b/libxfs/xfs.h @@ -0,0 +1,548 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +/* + * This header is effectively a "namespace multiplexor" for the + * user level XFS code. It provides all of the necessary stuff + * such that we can build some parts of the XFS kernel code in + * user space in a controlled fashion, and translates the names + * used in the kernel into the names which libxfs is going to + * make available to user tools. + * + * It should only ever be #include'd by XFS "kernel" code being + * compiled in user space. + * + * Our goals here are to... + * o "share" large amounts of complex code between user and + * kernel space; + * o shield the user tools from changes in the bleeding + * edge kernel code, merging source changes when + * convenient and not immediately (no symlinks); + * o i.e. be able to merge changes to the kernel source back + * into the affected user tools in a controlled fashion; + * o provide a _minimalist_ life-support system for kernel + * code in user land, not the "everything + the kitchen + * sink" model which libsim had mutated into; + * o allow the kernel code to be completely free of code + * specifically there to support the user level build. + */ + +#include +#include +#include +#include +#include + +/* + * Map XFS kernel routine names to libxfs.h names + */ + +#define xfs_xlatesb libxfs_xlate_sb +#define xfs_xlate_dinode_core libxfs_xlate_dinode_core +#define xfs_bmbt_get_all libxfs_bmbt_get_all +#define xfs_bmbt_get_blockcount libxfs_bmbt_get_blockcount +#define xfs_bmbt_get_startoff libxfs_bmbt_get_startoff +#define xfs_da_hashname libxfs_da_hashname +#define xfs_da_log2_roundup libxfs_da_log2_roundup +#define xfs_highbit32 libxfs_highbit32 +#define xfs_highbit64 libxfs_highbit64 +#define xfs_attr_leaf_newentsize libxfs_attr_leaf_newentsize +#define xfs_alloc_compute_maxlevels libxfs_alloc_compute_maxlevels +#define xfs_bmap_compute_maxlevels libxfs_bmap_compute_maxlevels +#define xfs_ialloc_compute_maxlevels libxfs_ialloc_compute_maxlevels + +#define xfs_dir_init libxfs_dir_init +#define xfs_dir2_init libxfs_dir2_init +#define xfs_dir_mount libxfs_dir_mount +#define xfs_dir2_mount libxfs_dir2_mount +#define xfs_dir_createname libxfs_dir_createname +#define xfs_dir2_createname libxfs_dir2_createname +#define xfs_dir_lookup libxfs_dir_lookup +#define xfs_dir2_lookup libxfs_dir2_lookup +#define xfs_dir_replace libxfs_dir_replace +#define xfs_dir2_replace libxfs_dir2_replace +#define xfs_dir_removename libxfs_dir_removename +#define xfs_dir2_removename libxfs_dir2_removename +#define xfs_dir_bogus_removename libxfs_dir_bogus_removename +#define xfs_dir2_bogus_removename libxfs_dir2_bogus_removename + +#define xfs_mount_common libxfs_mount_common +#define xfs_rtmount_init libxfs_rtmount_init +#define xfs_alloc_fix_freelist libxfs_alloc_fix_freelist +#define xfs_iread libxfs_iread +#define xfs_ialloc libxfs_ialloc +#define xfs_idata_realloc libxfs_idata_realloc +#define xfs_itobp libxfs_itobp +#define xfs_ichgtime libxfs_ichgtime +#define xfs_bmapi libxfs_bmapi +#define xfs_bmap_finish libxfs_bmap_finish +#define xfs_bmap_del_free libxfs_bmap_del_free +#define xfs_bunmapi libxfs_bunmapi +#define xfs_free_extent libxfs_free_extent +#define xfs_rtfree_extent libxfs_rtfree_extent +#define xfs_mod_sb libxfs_mod_sb +#define xfs_mod_incore_sb libxfs_mod_incore_sb + +#define xfs_trans_init libxfs_trans_init +#define xfs_trans_dup libxfs_trans_dup +#define xfs_trans_iget libxfs_trans_iget +#define xfs_trans_ijoin libxfs_trans_ijoin +#define xfs_trans_ihold libxfs_trans_ihold +#define xfs_trans_bjoin libxfs_trans_bjoin +#define xfs_trans_bhold libxfs_trans_bhold +#define xfs_trans_alloc libxfs_trans_alloc +#define xfs_trans_commit libxfs_trans_commit +#define xfs_trans_mod_sb libxfs_trans_mod_sb +#define xfs_trans_reserve libxfs_trans_reserve +#define xfs_trans_get_buf libxfs_trans_get_buf +#define xfs_trans_log_buf libxfs_trans_log_buf +#define xfs_trans_read_buf libxfs_trans_read_buf +#define xfs_trans_log_inode libxfs_trans_log_inode +#define xfs_trans_inode_alloc_buf libxfs_trans_inode_alloc_buf +#define xfs_trans_brelse libxfs_trans_brelse +#define xfs_trans_binval libxfs_trans_binval + +#define xfs_da_shrink_inode libxfs_da_shrink_inode +#define xfs_da_grow_inode libxfs_da_grow_inode +#define xfs_da_brelse libxfs_da_brelse +#define xfs_da_read_buf libxfs_da_read_buf +#define xfs_da_get_buf libxfs_da_get_buf +#define xfs_da_log_buf libxfs_da_log_buf +#define xfs_da_do_buf libxfs_da_do_buf +#define xfs_dir2_shrink_inode libxfs_dir2_shrink_inode +#define xfs_dir2_grow_inode libxfs_dir2_grow_inode +#define xfs_dir2_isleaf libxfs_dir2_isleaf +#define xfs_dir2_isblock libxfs_dir2_isblock +#define xfs_dir2_data_use_free libxfs_dir2_data_use_free +#define xfs_dir2_data_make_free libxfs_dir2_data_make_free +#define xfs_dir2_data_log_entry libxfs_dir2_data_log_entry +#define xfs_dir2_data_log_header libxfs_dir2_data_log_header +#define xfs_dir2_data_freescan libxfs_dir2_data_freescan +#define xfs_dir2_free_log_bests libxfs_dir2_free_log_bests + + +/* + * Infrastructure to support building kernel XFS code in user space + */ + +/* buffer management */ +#define XFS_BUF_LOCK 0 +#define XFS_BUF_MAPPED 0 +#define XFS_BUF_TRYLOCK 0 +#define XFS_BUF_ISDONE(bp) 0 +#define XFS_BUF_GETERROR(bp) 0 +#define XFS_BUF_DONE(bp) ((void) 0) +#define XFS_BUF_SET_REF(a,b) ((void) 0) +#define XFS_BUF_SET_VTYPE(a,b) ((void) 0) +#define XFS_BUF_SET_VTYPE_REF(a,b,c) ((void) 0) +#define XFS_BUF_SET_BDSTRAT_FUNC(a,b) ((void) 0) +#define xfs_baread(a,b,c) ((void) 0) /* no readahead */ +#define xfs_buftrace(x,y) ((void) 0) /* debug only */ +#define xfs_buf_item_log_debug(bip,a,b) ((void) 0) /* debug only */ +#define xfs_validate_extents(e,n,f) ((void) 0) /* debug only */ +#define xfs_buf_relse(bp) libxfs_putbuf(bp) +#define xfs_read_buf(mp,x,blkno,len,f,bpp) \ + ( *(bpp) = libxfs_readbuf( (mp)->m_dev, (blkno), (len), 1), 0 ) + + +/* transaction management */ +#define xfs_trans_set_sync(tp) ((void) 0) +#define xfs_trans_agblocks_delta(tp, d) ((void) 0) /* debug only */ +#define xfs_trans_agflist_delta(tp, d) ((void) 0) /* debug only */ +#define xfs_trans_agbtree_delta(tp, d) ((void) 0) /* debug only */ +#define xfs_trans_mod_dquot_byino(tp,ip,f,d) ((void) 0) +#define xfs_trans_get_block_res(tp) 1 +#define xfs_trans_reserve_blkquota(tp,i,n) 0 +#define xfs_trans_unreserve_blkquota(tp,i,n) ((void) 0) +#define xfs_trans_unreserve_rtblkquota(tp,i,n) ((void) 0) + + +/* memory management */ +#define kmem_zone_init(a, b) libxfs_zone_init(a, b) +#define kmem_zone_alloc(z, f) libxfs_zone_zalloc(z) +#define kmem_zone_zalloc(z, f) libxfs_zone_zalloc(z) +#define kmem_zone_free(z, p) libxfs_zone_free(z, p) +#define kmem_realloc(p,sz,u,f) libxfs_realloc(p,sz) +#define kmem_alloc(size, f) libxfs_malloc(size) +#define kmem_free(p, size) libxfs_free(p) + +/* directory management */ +#define xfs_dir2_trace_args(where, args) ((void) 0) +#define xfs_dir2_trace_args_b(where, args, bp) ((void) 0) +#define xfs_dir2_trace_args_bb(where, args, lbp, dbp) ((void) 0) +#define xfs_dir2_trace_args_bibii(where, args, bs, ss, bd, sd, c) ((void) 0) +#define xfs_dir2_trace_args_db(where, args, db, bp) ((void) 0) +#define xfs_dir2_trace_args_i(where, args, i) ((void) 0) +#define xfs_dir2_trace_args_s(where, args, s) ((void) 0) +#define xfs_dir2_trace_args_sb(where, args, s, bp) ((void) 0) +#define xfs_dir_shortform_validate_ondisk(a,b) ((void) 0) + + +/* block management */ +#define xfs_bmap_check_extents(ip,w) ((void) 0) +#define xfs_bmap_trace_delete(f,d,ip,i,c,w) ((void) 0) +#define xfs_bmap_trace_exlist(f,ip,i,w) ((void) 0) +#define xfs_bmap_trace_insert(f,d,ip,i,c,r1,r2,w) ((void) 0) +#define xfs_bmap_trace_post_update(f,d,ip,i,w) ((void) 0) +#define xfs_bmap_trace_pre_update(f,d,ip,i,w) ((void) 0) +#define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap) ((void) 0) +#define xfs_bunmap_trace(ip, bno, len, flags, ra) ((void) 0) +#define XFS_BMBT_TRACE_ARGBI(c,b,i) ((void) 0) +#define XFS_BMBT_TRACE_ARGBII(c,b,i,j) ((void) 0) +#define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j) ((void) 0) +#define XFS_BMBT_TRACE_ARGI(c,i) ((void) 0) +#define XFS_BMBT_TRACE_ARGIFK(c,i,f,k) ((void) 0) +#define XFS_BMBT_TRACE_ARGIFR(c,i,f,r) ((void) 0) +#define XFS_BMBT_TRACE_ARGIK(c,i,k) ((void) 0) +#define XFS_BMBT_TRACE_CURSOR(c,s) ((void) 0) + + +/* anything else */ +typedef __uint32_t inst_t; /* an instruction */ +typedef enum { B_FALSE, B_TRUE } boolean_t; +typedef struct { dev_t dev; } buftarg_t; +#define STATIC +#define ENOATTR 1009 /* Attribute not found */ +#define EFSCORRUPTED 1010 /* Filesystem is corrupted */ +#define ktrace_t void +#define m_ddev_targp m_dev +#define KERN_WARNING +#define XFS_ERROR(e) (e) +#define xfs_fs_cmn_err(a,b,msg,args...) ( fprintf(stderr, msg, ## args) ) +#define printk(msg,args...) ( fprintf(stderr, msg, ## args) ) +#define XFS_TEST_ERROR(expr,a,b,c) ( expr ) +#define TRACE_FREE(s,a,b,x,f) ((void) 0) +#define TRACE_ALLOC(s,a) ((void) 0) +#define TRACE_MODAGF(a,b,c) ((void) 0) +#define XFS_FORCED_SHUTDOWN(mp) 0 +#define XFS_MOUNT_WSYNC 0 +#define XFS_MOUNT_NOALIGN 0 +#define XFS_ILOCK_EXCL 0 +#define mrlock(a,b,c) ((void) 0) +#define mraccunlock(a) ((void) 0) +#define mrunlock(a) ((void) 0) +#define mraccess(a) ((void) 0) +#define ismrlocked(a,b) 1 +#define ovbcopy(from,to,count) memmove(to,from,count) +#define __return_address __builtin_return_address(0) +#define xfs_btree_reada_bufl(m,fsb,c) ((void) 0) +#define xfs_btree_reada_bufs(m,fsb,c,x) ((void) 0) +#undef XFS_DIR_SHORTFORM_VALIDATE_ONDISK +#define XFS_DIR_SHORTFORM_VALIDATE_ONDISK(mp,dip) 0 + +#define do_mod(a, b) ((a) % (b)) +#define do_div(n,base) ({ \ + int __res; \ + __res = ((unsigned long) n) % (unsigned) base; \ + n = ((unsigned long) n) / (unsigned) base; \ + __res; }) + +#include +#define NBPP PAGE_SIZE + +static inline int atomicIncWithWrap(int *a, int b) +{ + int r = *a; + (*a)++; + if (*a == b) + *a = 0; + return r; +} + + +/* + * Prototypes needed for a clean build + */ + +/* xfs_alloc.c */ +int xfs_alloc_get_freelist (xfs_trans_t *, xfs_buf_t *, xfs_agblock_t *); +void xfs_alloc_log_agf (xfs_trans_t *, xfs_buf_t *, int); +int xfs_alloc_put_freelist (xfs_trans_t *, xfs_buf_t *, xfs_buf_t *, + xfs_agblock_t); +int xfs_alloc_read_agf (xfs_mount_t *, xfs_trans_t *, xfs_agnumber_t, + int, xfs_buf_t **); +int xfs_alloc_vextent (xfs_alloc_arg_t *); +int xfs_alloc_pagf_init (xfs_mount_t *, xfs_trans_t *, xfs_agnumber_t, int); +int xfs_alloc_ag_vextent_size (xfs_alloc_arg_t *); +int xfs_alloc_ag_vextent_near (xfs_alloc_arg_t *); +int xfs_alloc_ag_vextent_exact (xfs_alloc_arg_t *); +int xfs_alloc_ag_vextent_small (xfs_alloc_arg_t *, xfs_btree_cur_t *, + xfs_agblock_t *, xfs_extlen_t *, int *); + +/* xfs_ialloc.c */ +int xfs_dialloc (xfs_trans_t *, xfs_ino_t, mode_t, int, xfs_buf_t **, + boolean_t *, xfs_ino_t *); +void xfs_ialloc_log_agi (xfs_trans_t *, xfs_buf_t *, int); +int xfs_ialloc_read_agi (xfs_mount_t *, xfs_trans_t *, xfs_agnumber_t, + xfs_buf_t **); +int xfs_dilocate (xfs_mount_t *, xfs_trans_t *, xfs_ino_t, xfs_fsblock_t *, + int *, int *, uint); + +/* xfs_rtalloc.c */ +int xfs_rtfree_extent (xfs_trans_t *, xfs_rtblock_t, xfs_extlen_t); +int xfs_rtmodify_range (xfs_mount_t *, xfs_trans_t *, xfs_rtblock_t, + xfs_extlen_t, int); +int xfs_rtmodify_summary (xfs_mount_t *, xfs_trans_t *, int, + xfs_rtblock_t, int, xfs_buf_t **, xfs_fsblock_t *); + +/* xfs_btree.c */ +extern xfs_zone_t *xfs_btree_cur_zone; +void xfs_btree_check_key (xfs_btnum_t, void *, void *); +void xfs_btree_check_rec (xfs_btnum_t, void *, void *); +int xfs_btree_check_lblock (xfs_btree_cur_t *, xfs_btree_lblock_t *, + int, xfs_buf_t *); +int xfs_btree_check_sblock (xfs_btree_cur_t *, xfs_btree_sblock_t *, + int, xfs_buf_t *); +int xfs_btree_check_sptr (xfs_btree_cur_t *, xfs_agblock_t, int); +int xfs_btree_check_lptr (xfs_btree_cur_t *, xfs_dfsbno_t, int); +void xfs_btree_del_cursor (xfs_btree_cur_t *, int); +int xfs_btree_dup_cursor (xfs_btree_cur_t *, xfs_btree_cur_t **); +int xfs_btree_firstrec (xfs_btree_cur_t *, int); +xfs_btree_block_t *xfs_btree_get_block (xfs_btree_cur_t *, int, xfs_buf_t **); +xfs_buf_t *xfs_btree_get_bufs (xfs_mount_t *, xfs_trans_t *, xfs_agnumber_t, + xfs_agblock_t, uint); +xfs_buf_t *xfs_btree_get_bufl (xfs_mount_t *, xfs_trans_t *tp, + xfs_fsblock_t, uint); +xfs_btree_cur_t *xfs_btree_init_cursor (xfs_mount_t *, xfs_trans_t *, + xfs_buf_t *, xfs_agnumber_t, xfs_btnum_t, + xfs_inode_t *, int); +int xfs_btree_islastblock (xfs_btree_cur_t *, int); +int xfs_btree_lastrec (xfs_btree_cur_t *, int); +void xfs_btree_offsets (__int64_t, const short *, int, int *, int *); +int xfs_btree_readahead (xfs_btree_cur_t *, int, int); +void xfs_btree_setbuf (xfs_btree_cur_t *, int, xfs_buf_t *); +int xfs_btree_read_bufs (xfs_mount_t *, xfs_trans_t *, xfs_agnumber_t, + xfs_agblock_t, uint, xfs_buf_t **, int); +int xfs_btree_read_bufl (xfs_mount_t *, xfs_trans_t *, xfs_fsblock_t, + uint, xfs_buf_t **, int); + +/* xfs_inode.c */ +int xfs_ialloc (xfs_trans_t *, xfs_inode_t *, mode_t, nlink_t, dev_t, cred_t *, + xfs_prid_t, int, xfs_buf_t **, boolean_t *, xfs_inode_t **); +int xfs_iread_extents (xfs_trans_t *, xfs_inode_t *, int); +int xfs_imap (xfs_mount_t *, xfs_trans_t *, xfs_ino_t, xfs_imap_t *, uint); +int xfs_iextents_copy (xfs_inode_t *, xfs_bmbt_rec_32_t *, int); +int xfs_iflush_int (xfs_inode_t *, xfs_buf_t *); +int xfs_iflush_fork (xfs_inode_t *, xfs_dinode_t *, xfs_inode_log_item_t *, + int, xfs_buf_t *); +int xfs_iformat_local (xfs_inode_t *, xfs_dinode_t *, int, int); +int xfs_iformat_extents (xfs_inode_t *, xfs_dinode_t *, int); +int xfs_iformat_btree (xfs_inode_t *, xfs_dinode_t *, int); +void xfs_iroot_realloc (xfs_inode_t *, int, int); +void xfs_idata_realloc (xfs_inode_t *, int, int); +void xfs_iext_realloc (xfs_inode_t *, int, int); +void xfs_idestroy_fork (xfs_inode_t *, int); +uint xfs_iroundup (uint); + +/* xfs_bmap.c */ +xfs_bmbt_rec_t *xfs_bmap_search_extents (xfs_inode_t *ip, + xfs_fileoff_t, int, int *, xfs_extnum_t *, + xfs_bmbt_irec_t *, xfs_bmbt_irec_t *); +int xfs_bmap_read_extents (xfs_trans_t *, xfs_inode_t *, int); +void xfs_bmap_add_free (xfs_fsblock_t, xfs_filblks_t, xfs_bmap_free_t *, + xfs_mount_t *); +int xfs_bmap_first_unused (xfs_trans_t *, xfs_inode_t *, xfs_extlen_t, + xfs_fileoff_t *, int); +int xfs_bmap_last_offset (xfs_trans_t *, xfs_inode_t *, xfs_fileoff_t *, int); +int xfs_bmap_last_before (xfs_trans_t *, xfs_inode_t *, xfs_fileoff_t *, int); +int xfs_bmap_one_block (xfs_inode_t *, int); +int xfs_bmapi_single (xfs_trans_t *, xfs_inode_t *, int, xfs_fsblock_t *, + xfs_fileoff_t); +int xfs_bmapi (xfs_trans_t *, xfs_inode_t *, xfs_fileoff_t, + xfs_filblks_t, int, xfs_fsblock_t *, xfs_extlen_t, + xfs_bmbt_irec_t *, int *, xfs_bmap_free_t *); +int xfs_bunmapi (xfs_trans_t *, xfs_inode_t *, xfs_fileoff_t, + xfs_filblks_t, int, xfs_extnum_t, xfs_fsblock_t *, + xfs_bmap_free_t *, int *); +int xfs_bmap_add_extent_hole_delay (xfs_inode_t *ip, xfs_extnum_t, + xfs_btree_cur_t *, xfs_bmbt_irec_t *, int *, int); +int xfs_bmap_add_extent_hole_real (xfs_inode_t *, xfs_extnum_t, + xfs_btree_cur_t *, xfs_bmbt_irec_t *, int *, int); +int xfs_bmap_add_extent_unwritten_real (xfs_inode_t *, xfs_extnum_t, + xfs_btree_cur_t **, xfs_bmbt_irec_t *, int *); +int xfs_bmap_add_extent_delay_real (xfs_inode_t *, xfs_extnum_t, + xfs_btree_cur_t **, xfs_bmbt_irec_t *, xfs_filblks_t *, + xfs_fsblock_t *, xfs_bmap_free_t *, int *, int); +int xfs_bmap_extents_to_btree (xfs_trans_t *, xfs_inode_t *, xfs_fsblock_t *, + xfs_bmap_free_t *, xfs_btree_cur_t **, int, int *, int); +void xfs_bmap_delete_exlist (xfs_inode_t *, xfs_extnum_t, xfs_extnum_t, int); +xfs_filblks_t xfs_bmap_worst_indlen (xfs_inode_t *, xfs_filblks_t); +int xfs_bmap_isaeof (xfs_inode_t *, xfs_fileoff_t, int, int *); +void xfs_bmap_insert_exlist (xfs_inode_t *, xfs_extnum_t, xfs_extnum_t, + xfs_bmbt_irec_t *, int); + +/* xfs_bmap_btree.c */ +int xfs_check_nostate_extents (xfs_bmbt_rec_t *, xfs_extnum_t); +void xfs_bmbt_log_ptrs (xfs_btree_cur_t *, xfs_buf_t *, int, int); +void xfs_bmbt_log_keys (xfs_btree_cur_t *, xfs_buf_t *, int, int); +int xfs_bmbt_killroot (xfs_btree_cur_t *, int); +int xfs_bmbt_updkey (xfs_btree_cur_t *, xfs_bmbt_key_t *, int); +int xfs_bmbt_lshift (xfs_btree_cur_t *, int, int *); +int xfs_bmbt_rshift (xfs_btree_cur_t *, int, int *); +int xfs_bmbt_split (xfs_btree_cur_t *, int, xfs_fsblock_t *, + xfs_bmbt_key_t *, xfs_btree_cur_t **, int *); + +/* xfs_ialloc_btree.c */ +int xfs_inobt_newroot (xfs_btree_cur_t *, int *); +int xfs_inobt_rshift (xfs_btree_cur_t *, int, int *); +int xfs_inobt_lshift (xfs_btree_cur_t *, int, int *); +int xfs_inobt_split (xfs_btree_cur_t *, int, xfs_agblock_t *, + xfs_inobt_key_t *, xfs_btree_cur_t **, int *); +void xfs_inobt_log_keys (xfs_btree_cur_t *, xfs_buf_t *, int, int); +void xfs_inobt_log_ptrs (xfs_btree_cur_t *, xfs_buf_t *, int, int); +void xfs_inobt_log_recs (xfs_btree_cur_t *, xfs_buf_t *, int, int); +void xfs_inobt_log_block (xfs_trans_t *, xfs_buf_t *, int); +int xfs_inobt_updkey (xfs_btree_cur_t *, xfs_inobt_key_t *, int); + +/* xfs_alloc_btree.c */ +void xfs_alloc_log_ptrs (xfs_btree_cur_t *, xfs_buf_t *, int, int); +void xfs_alloc_log_keys (xfs_btree_cur_t *, xfs_buf_t *, int, int); +void xfs_alloc_log_recs (xfs_btree_cur_t *, xfs_buf_t *, int, int); +void xfs_alloc_log_block (xfs_trans_t *, xfs_buf_t *, int); +int xfs_alloc_updkey (xfs_btree_cur_t *, xfs_alloc_key_t *, int); +int xfs_alloc_lshift (xfs_btree_cur_t *, int, int *); +int xfs_alloc_rshift (xfs_btree_cur_t *, int, int *); +int xfs_alloc_newroot (xfs_btree_cur_t *, int *); +int xfs_alloc_split (xfs_btree_cur_t *, int, xfs_agblock_t *, + xfs_alloc_key_t *, xfs_btree_cur_t **, int *); + +/* xfs_da_btree.c */ +xfs_dabuf_t *xfs_da_buf_make (int, xfs_buf_t **, inst_t *); +int xfs_da_root_join (xfs_da_state_t *, xfs_da_state_blk_t *); +int xfs_da_root_split (xfs_da_state_t *, xfs_da_state_blk_t *, + xfs_da_state_blk_t *); +void xfs_da_node_add (xfs_da_state_t *, xfs_da_state_blk_t *, + xfs_da_state_blk_t *); +int xfs_da_node_split (xfs_da_state_t *, xfs_da_state_blk_t *, + xfs_da_state_blk_t *, xfs_da_state_blk_t *, int, int *); +void xfs_da_node_rebalance (xfs_da_state_t *, xfs_da_state_blk_t *, + xfs_da_state_blk_t *); +void xfs_da_node_remove (xfs_da_state_t *, xfs_da_state_blk_t *); +void xfs_da_node_unbalance (xfs_da_state_t *, xfs_da_state_blk_t *, + xfs_da_state_blk_t *); +int xfs_da_node_order (xfs_dabuf_t *, xfs_dabuf_t *); +int xfs_da_node_toosmall (xfs_da_state_t *, int *); +uint xfs_da_node_lasthash (xfs_dabuf_t *, int *); +int xfs_da_do_buf (xfs_trans_t *, xfs_inode_t *, xfs_dablk_t, xfs_daddr_t *, + xfs_dabuf_t **, int, int, inst_t *); + +/* xfs_dir.c */ +int xfs_dir_node_addname (xfs_da_args_t *); +int xfs_dir_leaf_lookup (xfs_da_args_t *); +int xfs_dir_node_lookup (xfs_da_args_t *); +int xfs_dir_leaf_replace (xfs_da_args_t *); +int xfs_dir_node_replace (xfs_da_args_t *); +int xfs_dir_node_removename (xfs_da_args_t *); +int xfs_dir_leaf_removename (xfs_da_args_t *, int *, int *); + +/* xfs_dir_leaf.c */ +void xfs_dir_leaf_rebalance (xfs_da_state_t *, xfs_da_state_blk_t *, + xfs_da_state_blk_t *); +void xfs_dir_leaf_add_work (xfs_dabuf_t *, xfs_da_args_t *, int, int); +int xfs_dir_leaf_compact (xfs_trans_t *, xfs_dabuf_t *, int, int); +int xfs_dir_leaf_figure_balance (xfs_da_state_t *, xfs_da_state_blk_t *, + xfs_da_state_blk_t *, int *, int *); +void xfs_dir_leaf_moveents (xfs_dir_leafblock_t *, int, + xfs_dir_leafblock_t *, int, int, xfs_mount_t *); + +/* xfs_dir2_leaf.c */ +void xfs_dir2_leaf_check (xfs_inode_t *, xfs_dabuf_t *); +int xfs_dir2_leaf_lookup_int (xfs_da_args_t *, xfs_dabuf_t **, + int *, xfs_dabuf_t **); + +/* xfs_dir2_block.c */ +void xfs_dir2_block_log_tail (xfs_trans_t *, xfs_dabuf_t *); +void xfs_dir2_block_log_leaf (xfs_trans_t *, xfs_dabuf_t *, int, int); +int xfs_dir2_block_lookup_int (xfs_da_args_t *, xfs_dabuf_t **, int *); + +/* xfs_dir2_node.c */ +void xfs_dir2_leafn_check (xfs_inode_t *, xfs_dabuf_t *); +int xfs_dir2_leafn_remove (xfs_da_args_t *, xfs_dabuf_t *, int, + xfs_da_state_blk_t *, int *); +int xfs_dir2_node_addname_int (xfs_da_args_t *, xfs_da_state_blk_t *); + +/* xfs_dir2_sf.c */ +void xfs_dir2_sf_check (xfs_da_args_t *); +int xfs_dir2_sf_addname_pick (xfs_da_args_t *, int, + xfs_dir2_sf_entry_t **, xfs_dir2_data_aoff_t *); +void xfs_dir2_sf_addname_easy (xfs_da_args_t *, xfs_dir2_sf_entry_t *, + xfs_dir2_data_aoff_t, int); +void xfs_dir2_sf_addname_hard (xfs_da_args_t *, int, int); +void xfs_dir2_sf_toino8 (xfs_da_args_t *); +void xfs_dir2_sf_toino4 (xfs_da_args_t *); + +/* xfs_attr_leaf.c */ +void xfs_attr_leaf_rebalance (xfs_da_state_t *, xfs_da_state_blk_t *, + xfs_da_state_blk_t *); +int xfs_attr_leaf_add_work (xfs_dabuf_t *, xfs_da_args_t *, int); +void xfs_attr_leaf_compact (xfs_trans_t *, xfs_dabuf_t *); +void xfs_attr_leaf_moveents (xfs_attr_leafblock_t *, int, + xfs_attr_leafblock_t *, int, int, xfs_mount_t *); +int xfs_attr_leaf_figure_balance (xfs_da_state_t *, xfs_da_state_blk_t *, + xfs_da_state_blk_t *, int *, int *); + +/* xfs_trans_item.c */ +xfs_log_item_desc_t *xfs_trans_add_item (xfs_trans_t *, xfs_log_item_t *); +xfs_log_item_desc_t *xfs_trans_find_item (xfs_trans_t *, xfs_log_item_t *); +void xfs_trans_free_item (xfs_trans_t *, xfs_log_item_desc_t *); +void xfs_trans_free_items (xfs_trans_t *, int); + +/* xfs_trans_buf.c */ +xfs_buf_t *xfs_trans_buf_item_match (xfs_trans_t *, buftarg_t *, + xfs_daddr_t, int); +xfs_buf_t *xfs_trans_buf_item_match_all (xfs_trans_t *, buftarg_t *, + xfs_daddr_t, int); + +/* xfs_inode_item.c */ +void xfs_inode_item_init (xfs_inode_t *, xfs_mount_t *); + +/* xfs_buf_item.c */ +void xfs_buf_item_init (xfs_buf_t *, xfs_mount_t *); +void xfs_buf_item_log (xfs_buf_log_item_t *, uint, uint); + +/* local source files */ +int xfs_mod_incore_sb (xfs_mount_t *, xfs_sb_field_t, int, int); +void xfs_trans_mod_sb (xfs_trans_t *, uint, long); +int xfs_trans_unlock_chunk (xfs_log_item_chunk_t *, int, int, xfs_lsn_t); + + +#ifndef DEBUG +#define xfs_inobp_check(mp,bp) ((void) 0) +#define xfs_btree_check_key(a,b,c) ((void) 0) +#define xfs_btree_check_rec(a,b,c) ((void) 0) +#define xfs_btree_check_block(a,b,c,d) ((void) 0) +#define xfs_dir2_sf_check(args) ((void) 0) +#define xfs_dir2_leaf_check(dp,bp) ((void) 0) +#define xfs_dir2_leafn_check(dp,bp) ((void) 0) +#undef xfs_dir2_data_check +#define xfs_dir2_data_check(dp,bp) ((void) 0) +#endif diff --git a/libxfs/xfs_alloc.c b/libxfs/xfs_alloc.c new file mode 100644 index 000000000..9792416be --- /dev/null +++ b/libxfs/xfs_alloc.c @@ -0,0 +1,2355 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include + +#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b))) +#define XFSA_FIXUP_BNO_OK 1 +#define XFSA_FIXUP_CNT_OK 2 + +/* + * Compute aligned version of the found extent. + * Takes alignment and min length into account. + */ +STATIC int /* success (>= minlen) */ +xfs_alloc_compute_aligned( + xfs_agblock_t foundbno, /* starting block in found extent */ + xfs_extlen_t foundlen, /* length in found extent */ + xfs_extlen_t alignment, /* alignment for allocation */ + xfs_extlen_t minlen, /* minimum length for allocation */ + xfs_agblock_t *resbno, /* result block number */ + xfs_extlen_t *reslen) /* result length */ +{ + xfs_agblock_t bno; + xfs_extlen_t diff; + xfs_extlen_t len; + + if (alignment > 1 && foundlen >= minlen) { + bno = roundup(foundbno, alignment); + diff = bno - foundbno; + len = diff >= foundlen ? 0 : foundlen - diff; + } else { + bno = foundbno; + len = foundlen; + } + *resbno = bno; + *reslen = len; + return len >= minlen; +} + +/* + * Compute best start block and diff for "near" allocations. + * freelen >= wantlen already checked by caller. + */ +STATIC xfs_extlen_t /* difference value (absolute) */ +xfs_alloc_compute_diff( + xfs_agblock_t wantbno, /* target starting block */ + xfs_extlen_t wantlen, /* target length */ + xfs_extlen_t alignment, /* target alignment */ + xfs_agblock_t freebno, /* freespace's starting block */ + xfs_extlen_t freelen, /* freespace's length */ + xfs_agblock_t *newbnop) /* result: best start block from free */ +{ + xfs_agblock_t freeend; /* end of freespace extent */ + xfs_agblock_t newbno1; /* return block number */ + xfs_agblock_t newbno2; /* other new block number */ + xfs_extlen_t newlen1; /* length with newbno1 */ + xfs_extlen_t newlen2; /* length with newbno2 */ + xfs_agblock_t wantend; /* end of target extent */ + + ASSERT(freelen >= wantlen); + freeend = freebno + freelen; + wantend = wantbno + wantlen; + if (freebno >= wantbno) { + if ((newbno1 = roundup(freebno, alignment)) >= freeend) + newbno1 = NULLAGBLOCK; + } else if (freeend >= wantend && alignment > 1) { + newbno1 = roundup(wantbno, alignment); + newbno2 = newbno1 - alignment; + if (newbno1 >= freeend) + newbno1 = NULLAGBLOCK; + else + newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1); + if (newbno2 < freebno) + newbno2 = NULLAGBLOCK; + else + newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2); + if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) { + if (newlen1 < newlen2 || + (newlen1 == newlen2 && + XFS_ABSDIFF(newbno1, wantbno) > + XFS_ABSDIFF(newbno2, wantbno))) + newbno1 = newbno2; + } else if (newbno2 != NULLAGBLOCK) + newbno1 = newbno2; + } else if (freeend >= wantend) { + newbno1 = wantbno; + } else if (alignment > 1) { + newbno1 = roundup(freeend - wantlen, alignment); + if (newbno1 > freeend - wantlen && + newbno1 - alignment >= freebno) + newbno1 -= alignment; + else if (newbno1 >= freeend) + newbno1 = NULLAGBLOCK; + } else + newbno1 = freeend - wantlen; + *newbnop = newbno1; + return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno); +} + +/* + * Fix up the length, based on mod and prod. + * len should be k * prod + mod for some k. + * If len is too small it is returned unchanged. + * If len hits maxlen it is left alone. + */ +STATIC void +xfs_alloc_fix_len( + xfs_alloc_arg_t *args) /* allocation argument structure */ +{ + xfs_extlen_t k; + xfs_extlen_t rlen; + + ASSERT(args->mod < args->prod); + rlen = args->len; + ASSERT(rlen >= args->minlen); + ASSERT(rlen <= args->maxlen); + if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen || + (args->mod == 0 && rlen < args->prod)) + return; + k = rlen % args->prod; + if (k == args->mod) + return; + if (k > args->mod) { + if ((int)(rlen = rlen - k - args->mod) < (int)args->minlen) + return; + } else { + if ((int)(rlen = rlen - args->prod - (args->mod - k)) < + (int)args->minlen) + return; + } + ASSERT(rlen >= args->minlen); + ASSERT(rlen <= args->maxlen); + args->len = rlen; +} + +/* + * Fix up length if there is too little space left in the a.g. + * Return 1 if ok, 0 if too little, should give up. + */ +STATIC int +xfs_alloc_fix_minleft( + xfs_alloc_arg_t *args) /* allocation argument structure */ +{ + xfs_agf_t *agf; /* a.g. freelist header */ + int diff; /* free space difference */ + + if (args->minleft == 0) + return 1; + agf = XFS_BUF_TO_AGF(args->agbp); + diff = INT_GET(agf->agf_freeblks, ARCH_CONVERT) + + INT_GET(agf->agf_flcount, ARCH_CONVERT) + - args->len - args->minleft; + if (diff >= 0) + return 1; + args->len += diff; /* shrink the allocated space */ + if (args->len >= args->minlen) + return 1; + args->agbno = NULLAGBLOCK; + return 0; +} + +/* + * Update the two btrees, logically removing from freespace the extent + * starting at rbno, rlen blocks. The extent is contained within the + * actual (current) free extent fbno for flen blocks. + * Flags are passed in indicating whether the cursors are set to the + * relevant records. + */ +STATIC int /* error code */ +xfs_alloc_fixup_trees( + xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */ + xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */ + xfs_agblock_t fbno, /* starting block of free extent */ + xfs_extlen_t flen, /* length of free extent */ + xfs_agblock_t rbno, /* starting block of returned extent */ + xfs_extlen_t rlen, /* length of returned extent */ + int flags) /* flags, XFSA_FIXUP_... */ +{ + int error; /* error code */ + int i; /* operation results */ + xfs_agblock_t nfbno1; /* first new free startblock */ + xfs_agblock_t nfbno2; /* second new free startblock */ + xfs_extlen_t nflen1; /* first new free length */ + xfs_extlen_t nflen2; /* second new free length */ + + /* + * Look up the record in the by-size tree if necessary. + */ + if (flags & XFSA_FIXUP_CNT_OK) { +#ifdef DEBUG + if (error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)) + return error; + XFS_WANT_CORRUPTED_RETURN( + i == 1 && nfbno1 == fbno && nflen1 == flen); +#endif + } else { + if (error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + } + /* + * Look up the record in the by-block tree if necessary. + */ + if (flags & XFSA_FIXUP_BNO_OK) { +#ifdef DEBUG + if (error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)) + return error; + XFS_WANT_CORRUPTED_RETURN( + i == 1 && nfbno1 == fbno && nflen1 == flen); +#endif + } else { + if (error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + } +#ifdef DEBUG + { + xfs_alloc_block_t *bnoblock; + xfs_alloc_block_t *cntblock; + + if (bno_cur->bc_nlevels == 1 && + cnt_cur->bc_nlevels == 1) { + bnoblock = XFS_BUF_TO_ALLOC_BLOCK(bno_cur->bc_bufs[0]); + cntblock = XFS_BUF_TO_ALLOC_BLOCK(cnt_cur->bc_bufs[0]); + XFS_WANT_CORRUPTED_RETURN( + INT_GET(bnoblock->bb_numrecs, ARCH_CONVERT) == INT_GET(cntblock->bb_numrecs, ARCH_CONVERT)); + } + } +#endif + /* + * Deal with all four cases: the allocated record is contained + * within the freespace record, so we can have new freespace + * at either (or both) end, or no freespace remaining. + */ + if (rbno == fbno && rlen == flen) + nfbno1 = nfbno2 = NULLAGBLOCK; + else if (rbno == fbno) { + nfbno1 = rbno + rlen; + nflen1 = flen - rlen; + nfbno2 = NULLAGBLOCK; + } else if (rbno + rlen == fbno + flen) { + nfbno1 = fbno; + nflen1 = flen - rlen; + nfbno2 = NULLAGBLOCK; + } else { + nfbno1 = fbno; + nflen1 = rbno - fbno; + nfbno2 = rbno + rlen; + nflen2 = (fbno + flen) - nfbno2; + } + /* + * Delete the entry from the by-size btree. + */ + if (error = xfs_alloc_delete(cnt_cur, &i)) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + /* + * Add new by-size btree entry(s). + */ + if (nfbno1 != NULLAGBLOCK) { + if (error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 0); + if (error = xfs_alloc_insert(cnt_cur, &i)) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + } + if (nfbno2 != NULLAGBLOCK) { + if (error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 0); + if (error = xfs_alloc_insert(cnt_cur, &i)) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + } + /* + * Fix up the by-block btree entry(s). + */ + if (nfbno1 == NULLAGBLOCK) { + /* + * No remaining freespace, just delete the by-block tree entry. + */ + if (error = xfs_alloc_delete(bno_cur, &i)) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + } else { + /* + * Update the by-block entry to start later|be shorter. + */ + if (error = xfs_alloc_update(bno_cur, nfbno1, nflen1)) + return error; + } + if (nfbno2 != NULLAGBLOCK) { + /* + * 2 resulting free entries, need to add one. + */ + if (error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 0); + if (error = xfs_alloc_insert(bno_cur, &i)) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + } + return 0; +} + +/* + * Read in the allocation group free block array. + */ +STATIC int /* error */ +xfs_alloc_read_agfl( + xfs_mount_t *mp, /* mount point structure */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_agnumber_t agno, /* allocation group number */ + xfs_buf_t **bpp) /* buffer for the ag free block array */ +{ + xfs_buf_t *bp; /* return value */ + xfs_daddr_t d; /* disk block address */ + int error; + + ASSERT(agno != NULLAGNUMBER); + d = XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR); + if (error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, 1, 0, &bp)) + return error; + ASSERT(bp); + ASSERT(!XFS_BUF_GETERROR(bp)); + XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGFL, XFS_AGFL_REF); + *bpp = bp; + return 0; +} + +#if defined(XFS_ALLOC_TRACE) +/* + * Add an allocation trace entry for an alloc call. + */ +STATIC void +xfs_alloc_trace_alloc( + char *name, /* function tag string */ + char *str, /* additional string */ + xfs_alloc_arg_t *args, /* allocation argument structure */ + int line) /* source line number */ +{ + ktrace_enter(xfs_alloc_trace_buf, + (void *)(__psint_t)(XFS_ALLOC_KTRACE_ALLOC | (line << 16)), + (void *)name, + (void *)str, + (void *)args->mp, + (void *)(__psunsigned_t)args->agno, + (void *)(__psunsigned_t)args->agbno, + (void *)(__psunsigned_t)args->minlen, + (void *)(__psunsigned_t)args->maxlen, + (void *)(__psunsigned_t)args->mod, + (void *)(__psunsigned_t)args->prod, + (void *)(__psunsigned_t)args->minleft, + (void *)(__psunsigned_t)args->total, + (void *)(__psunsigned_t)args->alignment, + (void *)(__psunsigned_t)args->len, + (void *)((((__psint_t)args->type) << 16) | + (__psint_t)args->otype), + (void *)(__psint_t)((args->wasdel << 3) | + (args->wasfromfl << 2) | + (args->isfl << 1) | + (args->userdata << 0))); +} + +/* + * Add an allocation trace entry for a free call. + */ +STATIC void +xfs_alloc_trace_free( + char *name, /* function tag string */ + char *str, /* additional string */ + xfs_mount_t *mp, /* file system mount point */ + xfs_agnumber_t agno, /* allocation group number */ + xfs_agblock_t agbno, /* a.g. relative block number */ + xfs_extlen_t len, /* length of extent */ + int isfl, /* set if is freelist allocation/free */ + int line) /* source line number */ +{ + ktrace_enter(xfs_alloc_trace_buf, + (void *)(__psint_t)(XFS_ALLOC_KTRACE_FREE | (line << 16)), + (void *)name, + (void *)str, + (void *)mp, + (void *)(__psunsigned_t)agno, + (void *)(__psunsigned_t)agbno, + (void *)(__psunsigned_t)len, + (void *)(__psint_t)isfl, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); +} + +/* + * Add an allocation trace entry for modifying an agf. + */ +STATIC void +xfs_alloc_trace_modagf( + char *name, /* function tag string */ + char *str, /* additional string */ + xfs_mount_t *mp, /* file system mount point */ + xfs_agf_t *agf, /* new agf value */ + int flags, /* logging flags for agf */ + int line) /* source line number */ +{ + ktrace_enter(xfs_alloc_trace_buf, + (void *)(__psint_t)(XFS_ALLOC_KTRACE_MODAGF | (line << 16)), + (void *)name, + (void *)str, + (void *)mp, + (void *)(__psint_t)flags, + (void *)(__psunsigned_t)INT_GET(agf->agf_seqno, ARCH_CONVERT), + (void *)(__psunsigned_t)INT_GET(agf->agf_length, ARCH_CONVERT), + (void *)(__psunsigned_t)INT_GET(agf->agf_roots[XFS_BTNUM_BNO], + ARCH_CONVERT); + (void *)(__psunsigned_t)INT_GET(agf->agf_roots[XFS_BTNUM_CNT], + ARCH_CONVERT); + (void *)(__psunsigned_t)INT_GET(agf->agf_levels[XFS_BTNUM_BNO], + ARCH_CONVERT); + (void *)(__psunsigned_t)INT_GET(agf->agf_levels[XFS_BTNUM_CNT], + ARCH_CONVERT); + (void *)(__psunsigned_t)INT_GET(agf->agf_flfirst, ARCH_CONVERT), + (void *)(__psunsigned_t)INT_GET(agf->agf_fllast, ARCH_CONVERT), + (void *)(__psunsigned_t)INT_GET(agf->agf_flcount, ARCH_CONVERT), + (void *)(__psunsigned_t)INT_GET(agf->agf_freeblks, ARCH_CONVERT), + (void *)(__psunsigned_t)INT_GET(agf->agf_longest, ARCH_CONVERT)); +} +#endif /* XFS_ALLOC_TRACE */ + +/* + * Allocation group level functions. + */ + +/* + * Allocate a variable extent in the allocation group agno. + * Type and bno are used to determine where in the allocation group the + * extent will start. + * Extent's length (returned in *len) will be between minlen and maxlen, + * and of the form k * prod + mod unless there's nothing that large. + * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. + */ +STATIC int /* error */ +xfs_alloc_ag_vextent( + xfs_alloc_arg_t *args) /* argument structure for allocation */ +{ + int error; +#ifdef XFS_ALLOC_TRACE + static char fname[] = "xfs_alloc_ag_vextent"; +#endif + + ASSERT(args->minlen > 0); + ASSERT(args->maxlen > 0); + ASSERT(args->minlen <= args->maxlen); + ASSERT(args->mod < args->prod); + ASSERT(args->alignment > 0); + /* + * Branch to correct routine based on the type. + */ + args->wasfromfl = 0; + switch (args->type) { + case XFS_ALLOCTYPE_THIS_AG: + error = xfs_alloc_ag_vextent_size(args); + break; + case XFS_ALLOCTYPE_NEAR_BNO: + error = xfs_alloc_ag_vextent_near(args); + break; + case XFS_ALLOCTYPE_THIS_BNO: + error = xfs_alloc_ag_vextent_exact(args); + break; + default: + ASSERT(0); + /* NOTREACHED */ + } + if (error) + return error; + /* + * If the allocation worked, need to change the agf structure + * (and log it), and the superblock. + */ + if (args->agbno != NULLAGBLOCK) { + xfs_agf_t *agf; /* allocation group freelist header */ +#ifdef XFS_ALLOC_TRACE + xfs_mount_t *mp = args->mp; +#endif + long slen = (long)args->len; + + ASSERT(args->len >= args->minlen && args->len <= args->maxlen); + ASSERT(!(args->wasfromfl) || !args->isfl); + ASSERT(args->agbno % args->alignment == 0); + if (!(args->wasfromfl)) { + + agf = XFS_BUF_TO_AGF(args->agbp); + INT_MOD(agf->agf_freeblks, ARCH_CONVERT, -(args->len)); + xfs_trans_agblocks_delta(args->tp, + -((long)(args->len))); + args->pag->pagf_freeblks -= args->len; + ASSERT(INT_GET(agf->agf_freeblks, ARCH_CONVERT) + <= INT_GET(agf->agf_length, ARCH_CONVERT)); + TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS); + xfs_alloc_log_agf(args->tp, args->agbp, + XFS_AGF_FREEBLKS); + } + if (!args->isfl) + xfs_trans_mod_sb(args->tp, + args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS : + XFS_TRANS_SB_FDBLOCKS, -slen); + XFS_STATS_INC(xs_allocx); + XFS_STATS_ADD(xs_allocb, args->len); + } + return 0; +} + +/* + * Allocate a variable extent at exactly agno/bno. + * Extent's length (returned in *len) will be between minlen and maxlen, + * and of the form k * prod + mod unless there's nothing that large. + * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it. + */ +STATIC int /* error */ +xfs_alloc_ag_vextent_exact( + xfs_alloc_arg_t *args) /* allocation argument structure */ +{ + xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ + xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ + xfs_agblock_t end; /* end of allocated extent */ + int error; + xfs_agblock_t fbno; /* start block of found extent */ + xfs_agblock_t fend; /* end block of found extent */ + xfs_extlen_t flen; /* length of found extent */ +#ifdef XFS_ALLOC_TRACE + static char fname[] = "xfs_alloc_ag_vextent_exact"; +#endif + int i; /* success/failure of operation */ + xfs_agblock_t maxend; /* end of maximal extent */ + xfs_agblock_t minend; /* end of minimal extent */ + xfs_extlen_t rlen; /* length of returned extent */ + + ASSERT(args->alignment == 1); + /* + * Allocate/initialize a cursor for the by-number freespace btree. + */ + bno_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_BNO, 0, 0); + /* + * Lookup bno and minlen in the btree (minlen is irrelevant, really). + * Look for the closest free block <= bno, it must contain bno + * if any free block does. + */ + if (error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, + &i)) + goto error0; + if (!i) { + /* + * Didn't find it, return null. + */ + xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); + args->agbno = NULLAGBLOCK; + return 0; + } + /* + * Grab the freespace record. + */ + if (error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + ASSERT(fbno <= args->agbno); + minend = args->agbno + args->minlen; + maxend = args->agbno + args->maxlen; + fend = fbno + flen; + /* + * Give up if the freespace isn't long enough for the minimum request. + */ + if (fend < minend) { + xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); + args->agbno = NULLAGBLOCK; + return 0; + } + /* + * End of extent will be smaller of the freespace end and the + * maximal requested end. + */ + end = XFS_AGBLOCK_MIN(fend, maxend); + /* + * Fix the length according to mod and prod if given. + */ + args->len = end - args->agbno; + xfs_alloc_fix_len(args); + if (!xfs_alloc_fix_minleft(args)) { + xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); + return 0; + } + rlen = args->len; + ASSERT(args->agbno + rlen <= fend); + end = args->agbno + rlen; + /* + * We are allocating agbno for rlen [agbno .. end] + * Allocate/initialize a cursor for the by-size btree. + */ + cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_CNT, 0, 0); + ASSERT(args->agbno + args->len <= + INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_length, + ARCH_CONVERT)); + if (error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, + args->agbno, args->len, XFSA_FIXUP_BNO_OK)) { + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); + goto error0; + } + xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + TRACE_ALLOC("normal", args); + args->wasfromfl = 0; + return 0; + +error0: + xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); + TRACE_ALLOC("error", args); + return error; +} + +/* + * Allocate a variable extent near bno in the allocation group agno. + * Extent's length (returned in len) will be between minlen and maxlen, + * and of the form k * prod + mod unless there's nothing that large. + * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. + */ +STATIC int /* error */ +xfs_alloc_ag_vextent_near( + xfs_alloc_arg_t *args) /* allocation argument structure */ +{ + xfs_btree_cur_t *bno_cur_gt; /* cursor for bno btree, right side */ + xfs_btree_cur_t *bno_cur_lt; /* cursor for bno btree, left side */ + xfs_btree_cur_t *cnt_cur; /* cursor for count btree */ +#ifdef XFS_ALLOC_TRACE + static char fname[] = "xfs_alloc_ag_vextent_near"; +#endif + xfs_agblock_t gtbno; /* start bno of right side entry */ + xfs_agblock_t gtbnoa; /* aligned ... */ + xfs_extlen_t gtdiff; /* difference to right side entry */ + xfs_extlen_t gtlen; /* length of right side entry */ + xfs_extlen_t gtlena; /* aligned ... */ + xfs_agblock_t gtnew; /* useful start bno of right side */ + int error; /* error code */ + int i; /* result code, temporary */ + int j; /* result code, temporary */ + xfs_agblock_t ltbno; /* start bno of left side entry */ + xfs_agblock_t ltbnoa; /* aligned ... */ + xfs_extlen_t ltdiff; /* difference to left side entry */ + /*REFERENCED*/ + xfs_agblock_t ltend; /* end bno of left side entry */ + xfs_extlen_t ltlen; /* length of left side entry */ + xfs_extlen_t ltlena; /* aligned ... */ + xfs_agblock_t ltnew; /* useful start bno of left side */ + xfs_extlen_t rlen; /* length of returned extent */ +#if defined(DEBUG) && defined(__KERNEL__) + /* + * Randomly don't execute the first algorithm. + */ + static int seed; /* randomizing seed value */ + int dofirst; /* set to do first algorithm */ + timespec_t now; /* current time */ + + if (!seed) { + nanotime(&now); + seed = (int)now.tv_sec ^ (int)now.tv_nsec; + } + dofirst = random() & 1; +#endif + /* + * Get a cursor for the by-size btree. + */ + cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_CNT, 0, 0); + ltlen = 0; + bno_cur_lt = bno_cur_gt = NULL; + /* + * See if there are any free extents as big as maxlen. + */ + if (error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i)) + goto error0; + /* + * If none, then pick up the last entry in the tree unless the + * tree is empty. + */ + if (!i) { + if (error = xfs_alloc_ag_vextent_small(args, cnt_cur, <bno, + <len, &i)) + goto error0; + if (i == 0 || ltlen == 0) { + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + return 0; + } + ASSERT(i == 1); + } + args->wasfromfl = 0; + /* + * First algorithm. + * If the requested extent is large wrt the freespaces available + * in this a.g., then the cursor will be pointing to a btree entry + * near the right edge of the tree. If it's in the last btree leaf + * block, then we just examine all the entries in that block + * that are big enough, and pick the best one. + * This is written as a while loop so we can break out of it, + * but we never loop back to the top. + */ + while (xfs_btree_islastblock(cnt_cur, 0)) { + xfs_extlen_t bdiff; + int besti; + xfs_extlen_t blen; + xfs_agblock_t bnew; + +#if defined(DEBUG) && defined(__KERNEL__) + if (!dofirst) + break; +#endif + /* + * Start from the entry that lookup found, sequence through + * all larger free blocks. If we're actually pointing at a + * record smaller than maxlen, go to the start of this block, + * and skip all those smaller than minlen. + */ + if (ltlen || args->alignment > 1) { + cnt_cur->bc_ptrs[0] = 1; + do { + if (error = xfs_alloc_get_rec(cnt_cur, <bno, + <len, &i)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if (ltlen >= args->minlen) + break; + if (error = xfs_alloc_increment(cnt_cur, 0, &i)) + goto error0; + } while (i); + ASSERT(ltlen >= args->minlen); + if (!i) + break; + } + i = cnt_cur->bc_ptrs[0]; + for (j = 1, blen = 0, bdiff = 0; + !error && j && (blen < args->maxlen || bdiff > 0); + error = xfs_alloc_increment(cnt_cur, 0, &j)) { + /* + * For each entry, decide if it's better than + * the previous best entry. + */ + if (error = xfs_alloc_get_rec(cnt_cur, <bno, <len, + &i)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if (!xfs_alloc_compute_aligned(ltbno, ltlen, + args->alignment, args->minlen, + <bnoa, <lena)) + continue; + args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); + xfs_alloc_fix_len(args); + ASSERT(args->len >= args->minlen); + if (args->len < blen) + continue; + ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, + args->alignment, ltbno, ltlen, <new); + if (ltnew != NULLAGBLOCK && + (args->len > blen || ltdiff < bdiff)) { + bdiff = ltdiff; + bnew = ltnew; + blen = args->len; + besti = cnt_cur->bc_ptrs[0]; + } + } + /* + * It didn't work. We COULD be in a case where + * there's a good record somewhere, so try again. + */ + if (blen == 0) + break; + /* + * Point at the best entry, and retrieve it again. + */ + cnt_cur->bc_ptrs[0] = besti; + if (error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + ltend = ltbno + ltlen; + ASSERT(ltend <= INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_length, + ARCH_CONVERT)); + args->len = blen; + if (!xfs_alloc_fix_minleft(args)) { + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + TRACE_ALLOC("nominleft", args); + return 0; + } + blen = args->len; + /* + * We are allocating starting at bnew for blen blocks. + */ + args->agbno = bnew; + ASSERT(bnew >= ltbno); + ASSERT(bnew + blen <= ltend); + /* + * Set up a cursor for the by-bno tree. + */ + bno_cur_lt = xfs_btree_init_cursor(args->mp, args->tp, + args->agbp, args->agno, XFS_BTNUM_BNO, 0, 0); + /* + * Fix up the btree entries. + */ + if (error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, + ltlen, bnew, blen, XFSA_FIXUP_CNT_OK)) + goto error0; + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); + TRACE_ALLOC("first", args); + return 0; + } + /* + * Second algorithm. + * Search in the by-bno tree to the left and to the right + * simultaneously, until in each case we find a space big enough, + * or run into the edge of the tree. When we run into the edge, + * we deallocate that cursor. + * If both searches succeed, we compare the two spaces and pick + * the better one. + * With alignment, it's possible for both to fail; the upper + * level algorithm that picks allocation groups for allocations + * is not supposed to do this. + */ + /* + * Allocate and initialize the cursor for the leftward search. + */ + bno_cur_lt = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_BNO, 0, 0); + /* + * Lookup <= bno to find the leftward search's starting point. + */ + if (error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, + &i)) + goto error0; + if (!i) { + /* + * Didn't find anything; use this cursor for the rightward + * search. + */ + bno_cur_gt = bno_cur_lt; + bno_cur_lt = 0; + } + /* + * Found something. Duplicate the cursor for the rightward search. + */ + else if (error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt)) + goto error0; + /* + * Increment the cursor, so we will point at the entry just right + * of the leftward entry if any, or to the leftmost entry. + */ + if (error = xfs_alloc_increment(bno_cur_gt, 0, &i)) + goto error0; + if (!i) { + /* + * It failed, there are no rightward entries. + */ + xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR); + bno_cur_gt = NULL; + } + /* + * Loop going left with the leftward cursor, right with the + * rightward cursor, until either both directions give up or + * we find an entry at least as big as minlen. + */ + do { + if (bno_cur_lt) { + if (error = xfs_alloc_get_rec(bno_cur_lt, <bno, + <len, &i)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if (xfs_alloc_compute_aligned(ltbno, ltlen, + args->alignment, args->minlen, + <bnoa, <lena)) + break; + if (error = xfs_alloc_decrement(bno_cur_lt, 0, &i)) + goto error0; + if (!i) { + xfs_btree_del_cursor(bno_cur_lt, + XFS_BTREE_NOERROR); + bno_cur_lt = NULL; + } + } + if (bno_cur_gt) { + if (error = xfs_alloc_get_rec(bno_cur_gt, >bno, + >len, &i)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if (xfs_alloc_compute_aligned(gtbno, gtlen, + args->alignment, args->minlen, + >bnoa, >lena)) + break; + if (error = xfs_alloc_increment(bno_cur_gt, 0, &i)) + goto error0; + if (!i) { + xfs_btree_del_cursor(bno_cur_gt, + XFS_BTREE_NOERROR); + bno_cur_gt = NULL; + } + } + } while (bno_cur_lt || bno_cur_gt); + /* + * Got both cursors still active, need to find better entry. + */ + if (bno_cur_lt && bno_cur_gt) { + /* + * Left side is long enough, look for a right side entry. + */ + if (ltlena >= args->minlen) { + /* + * Fix up the length. + */ + args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); + xfs_alloc_fix_len(args); + rlen = args->len; + ltdiff = xfs_alloc_compute_diff(args->agbno, rlen, + args->alignment, ltbno, ltlen, <new); + /* + * Not perfect. + */ + if (ltdiff) { + /* + * Look until we find a better one, run out of + * space, or run off the end. + */ + while (bno_cur_lt && bno_cur_gt) { + if (error = xfs_alloc_get_rec( + bno_cur_gt, >bno, + >len, &i)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + xfs_alloc_compute_aligned(gtbno, gtlen, + args->alignment, args->minlen, + >bnoa, >lena); + /* + * The left one is clearly better. + */ + if (gtbnoa >= args->agbno + ltdiff) { + xfs_btree_del_cursor( + bno_cur_gt, + XFS_BTREE_NOERROR); + bno_cur_gt = NULL; + break; + } + /* + * If we reach a big enough entry, + * compare the two and pick the best. + */ + if (gtlena >= args->minlen) { + args->len = + XFS_EXTLEN_MIN(gtlena, + args->maxlen); + xfs_alloc_fix_len(args); + rlen = args->len; + gtdiff = xfs_alloc_compute_diff( + args->agbno, rlen, + args->alignment, + gtbno, gtlen, >new); + /* + * Right side is better. + */ + if (gtdiff < ltdiff) { + xfs_btree_del_cursor( + bno_cur_lt, + XFS_BTREE_NOERROR); + bno_cur_lt = NULL; + } + /* + * Left side is better. + */ + else { + xfs_btree_del_cursor( + bno_cur_gt, + XFS_BTREE_NOERROR); + bno_cur_gt = NULL; + } + break; + } + /* + * Fell off the right end. + */ + if (error = xfs_alloc_increment( + bno_cur_gt, 0, &i)) + goto error0; + if (!i) { + xfs_btree_del_cursor( + bno_cur_gt, + XFS_BTREE_NOERROR); + bno_cur_gt = NULL; + break; + } + } + } + /* + * The left side is perfect, trash the right side. + */ + else { + xfs_btree_del_cursor(bno_cur_gt, + XFS_BTREE_NOERROR); + bno_cur_gt = NULL; + } + } + /* + * It's the right side that was found first, look left. + */ + else { + /* + * Fix up the length. + */ + args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); + xfs_alloc_fix_len(args); + rlen = args->len; + gtdiff = xfs_alloc_compute_diff(args->agbno, rlen, + args->alignment, gtbno, gtlen, >new); + /* + * Right side entry isn't perfect. + */ + if (gtdiff) { + /* + * Look until we find a better one, run out of + * space, or run off the end. + */ + while (bno_cur_lt && bno_cur_gt) { + if (error = xfs_alloc_get_rec( + bno_cur_lt, <bno, + <len, &i)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + xfs_alloc_compute_aligned(ltbno, ltlen, + args->alignment, args->minlen, + <bnoa, <lena); + /* + * The right one is clearly better. + */ + if (ltbnoa <= args->agbno - gtdiff) { + xfs_btree_del_cursor( + bno_cur_lt, + XFS_BTREE_NOERROR); + bno_cur_lt = NULL; + break; + } + /* + * If we reach a big enough entry, + * compare the two and pick the best. + */ + if (ltlena >= args->minlen) { + args->len = XFS_EXTLEN_MIN( + ltlena, args->maxlen); + xfs_alloc_fix_len(args); + rlen = args->len; + ltdiff = xfs_alloc_compute_diff( + args->agbno, rlen, + args->alignment, + ltbno, ltlen, <new); + /* + * Left side is better. + */ + if (ltdiff < gtdiff) { + xfs_btree_del_cursor( + bno_cur_gt, + XFS_BTREE_NOERROR); + bno_cur_gt = NULL; + } + /* + * Right side is better. + */ + else { + xfs_btree_del_cursor( + bno_cur_lt, + XFS_BTREE_NOERROR); + bno_cur_lt = NULL; + } + break; + } + /* + * Fell off the left end. + */ + if (error = xfs_alloc_decrement( + bno_cur_lt, 0, &i)) + goto error0; + if (!i) { + xfs_btree_del_cursor(bno_cur_lt, + XFS_BTREE_NOERROR); + bno_cur_lt = NULL; + break; + } + } + } + /* + * The right side is perfect, trash the left side. + */ + else { + xfs_btree_del_cursor(bno_cur_lt, + XFS_BTREE_NOERROR); + bno_cur_lt = NULL; + } + } + } + /* + * If we couldn't get anything, give up. + */ + if (bno_cur_lt == NULL && bno_cur_gt == NULL) { + TRACE_ALLOC("neither", args); + args->agbno = NULLAGBLOCK; + return 0; + } + /* + * At this point we have selected a freespace entry, either to the + * left or to the right. If it's on the right, copy all the + * useful variables to the "left" set so we only have one + * copy of this code. + */ + if (bno_cur_gt) { + bno_cur_lt = bno_cur_gt; + bno_cur_gt = NULL; + ltbno = gtbno; + ltbnoa = gtbnoa; + ltlen = gtlen; + ltlena = gtlena; + j = 1; + } else + j = 0; + /* + * Fix up the length and compute the useful address. + */ + ltend = ltbno + ltlen; + args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); + xfs_alloc_fix_len(args); + if (!xfs_alloc_fix_minleft(args)) { + TRACE_ALLOC("nominleft", args); + xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + return 0; + } + rlen = args->len; + (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno, + ltlen, <new); + ASSERT(ltnew >= ltbno); + ASSERT(ltnew + rlen <= ltend); + ASSERT(ltnew + rlen <= INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_length, + ARCH_CONVERT)); + args->agbno = ltnew; + if (error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, + ltnew, rlen, XFSA_FIXUP_BNO_OK)) + goto error0; + TRACE_ALLOC(j ? "gt" : "lt", args); + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); + return 0; + + error0: + TRACE_ALLOC("error", args); + if (cnt_cur != NULL) + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); + if (bno_cur_lt != NULL) + xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR); + if (bno_cur_gt != NULL) + xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR); + return error; +} + +/* + * Allocate a variable extent anywhere in the allocation group agno. + * Extent's length (returned in len) will be between minlen and maxlen, + * and of the form k * prod + mod unless there's nothing that large. + * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. + */ +STATIC int /* error */ +xfs_alloc_ag_vextent_size( + xfs_alloc_arg_t *args) /* allocation argument structure */ +{ + xfs_btree_cur_t *bno_cur; /* cursor for bno btree */ + xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */ + int error; /* error result */ + xfs_agblock_t fbno; /* start of found freespace */ + xfs_extlen_t flen; /* length of found freespace */ +#ifdef XFS_ALLOC_TRACE + static char fname[] = "xfs_alloc_ag_vextent_size"; +#endif + int i; /* temp status variable */ + xfs_agblock_t rbno; /* returned block number */ + xfs_extlen_t rlen; /* length of returned extent */ + + /* + * Allocate and initialize a cursor for the by-size btree. + */ + cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_CNT, 0, 0); + bno_cur = NULL; + /* + * Look for an entry >= maxlen+alignment-1 blocks. + */ + if (error = xfs_alloc_lookup_ge(cnt_cur, 0, + args->maxlen + args->alignment - 1, &i)) + goto error0; + /* + * If none, then pick up the last entry in the tree unless the + * tree is empty. + */ + if (!i) { + if (error = xfs_alloc_ag_vextent_small(args, cnt_cur, &fbno, + &flen, &i)) + goto error0; + if (i == 0 || flen == 0) { + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + TRACE_ALLOC("noentry", args); + return 0; + } + ASSERT(i == 1); + } + /* + * There's a freespace as big as maxlen+alignment-1, get it. + */ + else { + if (error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + } + /* + * In the first case above, we got the last entry in the + * by-size btree. Now we check to see if the space hits maxlen + * once aligned; if not, we search left for something better. + * This can't happen in the second case above. + */ + xfs_alloc_compute_aligned(fbno, flen, args->alignment, args->minlen, + &rbno, &rlen); + rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); + XFS_WANT_CORRUPTED_GOTO(rlen == 0 || + (rlen <= flen && rbno + rlen <= fbno + flen), error0); + if (rlen < args->maxlen) { + xfs_agblock_t bestfbno; + xfs_extlen_t bestflen; + xfs_agblock_t bestrbno; + xfs_extlen_t bestrlen; + + bestrlen = rlen; + bestrbno = rbno; + bestflen = flen; + bestfbno = fbno; + for (;;) { + if (error = xfs_alloc_decrement(cnt_cur, 0, &i)) + goto error0; + if (i == 0) + break; + if (error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, + &i)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if (flen < bestrlen) + break; + xfs_alloc_compute_aligned(fbno, flen, args->alignment, + args->minlen, &rbno, &rlen); + rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); + XFS_WANT_CORRUPTED_GOTO(rlen == 0 || + (rlen <= flen && rbno + rlen <= fbno + flen), + error0); + if (rlen > bestrlen) { + bestrlen = rlen; + bestrbno = rbno; + bestflen = flen; + bestfbno = fbno; + if (rlen == args->maxlen) + break; + } + } + if (error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen, + &i)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + rlen = bestrlen; + rbno = bestrbno; + flen = bestflen; + fbno = bestfbno; + } + args->wasfromfl = 0; + /* + * Fix up the length. + */ + args->len = rlen; + xfs_alloc_fix_len(args); + if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) { + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + TRACE_ALLOC("nominleft", args); + args->agbno = NULLAGBLOCK; + return 0; + } + rlen = args->len; + XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0); + /* + * Allocate and initialize a cursor for the by-block tree. + */ + bno_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_BNO, 0, 0); + if (error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, + rbno, rlen, XFSA_FIXUP_CNT_OK)) + goto error0; + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); + cnt_cur = bno_cur = NULL; + args->len = rlen; + args->agbno = rbno; + XFS_WANT_CORRUPTED_GOTO( + args->agbno + args->len <= + INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_length, + ARCH_CONVERT), + error0); + TRACE_ALLOC("normal", args); + return 0; + +error0: + TRACE_ALLOC("error", args); + if (cnt_cur) + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); + if (bno_cur) + xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); + return error; +} + +/* + * Deal with the case where only small freespaces remain. + * Either return the contents of the last freespace record, + * or allocate space from the freelist if there is nothing in the tree. + */ +STATIC int /* error */ +xfs_alloc_ag_vextent_small( + xfs_alloc_arg_t *args, /* allocation argument structure */ + xfs_btree_cur_t *ccur, /* by-size cursor */ + xfs_agblock_t *fbnop, /* result block number */ + xfs_extlen_t *flenp, /* result length */ + int *stat) /* status: 0-freelist, 1-normal/none */ +{ + int error; + xfs_agblock_t fbno; + xfs_extlen_t flen; +#ifdef XFS_ALLOC_TRACE + static char fname[] = "xfs_alloc_ag_vextent_small"; +#endif + int i; + + if (error = xfs_alloc_decrement(ccur, 0, &i)) + goto error0; + if (i) { + if (error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + } + /* + * Nothing in the btree, try the freelist. Make sure + * to respect minleft even when pulling from the + * freelist. + */ + else if (args->minlen == 1 && args->alignment == 1 && !args->isfl && + (INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_flcount, + ARCH_CONVERT) > args->minleft)) { + if (error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno)) + goto error0; + if (fbno != NULLAGBLOCK) { + if (args->userdata) { + xfs_buf_t *bp; + + bp = xfs_btree_get_bufs(args->mp, args->tp, + args->agno, fbno, 0); + xfs_trans_binval(args->tp, bp); + /* + * Since blocks move to the free list without + * the coordination used in xfs_bmap_finish, + * we can't allow the user to write to the + * block until we know that the transaction + * that moved it to the free list is + * permanently on disk. The only way to + * ensure that is to make this transaction + * synchronous. + */ + xfs_trans_set_sync(args->tp); + } + args->len = 1; + args->agbno = fbno; + XFS_WANT_CORRUPTED_GOTO( + args->agbno + args->len <= + INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_length, + ARCH_CONVERT), + error0); + args->wasfromfl = 1; + TRACE_ALLOC("freelist", args); + *stat = 0; + return 0; + } + /* + * Nothing in the freelist. + */ + else + flen = 0; + } + /* + * Can't allocate from the freelist for some reason. + */ + else + flen = 0; + /* + * Can't do the allocation, give up. + */ + if (flen < args->minlen) { + args->agbno = NULLAGBLOCK; + TRACE_ALLOC("notenough", args); + flen = 0; + } + *fbnop = fbno; + *flenp = flen; + *stat = 1; + TRACE_ALLOC("normal", args); + return 0; + +error0: + TRACE_ALLOC("error", args); + return error; +} + +/* + * Free the extent starting at agno/bno for length. + */ +STATIC int /* error */ +xfs_free_ag_extent( + xfs_trans_t *tp, /* transaction pointer */ + xfs_buf_t *agbp, /* buffer for a.g. freelist header */ + xfs_agnumber_t agno, /* allocation group number */ + xfs_agblock_t bno, /* starting block number */ + xfs_extlen_t len, /* length of extent */ + int isfl) /* set if is freelist blocks - no sb acctg */ +{ + xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */ + xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */ + int error; /* error return value */ +#ifdef XFS_ALLOC_TRACE + static char fname[] = "xfs_free_ag_extent"; +#endif + xfs_agblock_t gtbno; /* start of right neighbor block */ + xfs_extlen_t gtlen; /* length of right neighbor block */ + int haveleft; /* have a left neighbor block */ + int haveright; /* have a right neighbor block */ + int i; /* temp, result code */ + xfs_agblock_t ltbno; /* start of left neighbor block */ + xfs_extlen_t ltlen; /* length of left neighbor block */ + xfs_mount_t *mp; /* mount point struct for filesystem */ + xfs_agblock_t nbno; /* new starting block of freespace */ + xfs_extlen_t nlen; /* new length of freespace */ + + mp = tp->t_mountp; + /* + * Allocate and initialize a cursor for the by-block btree. + */ + bno_cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO, 0, + 0); + cnt_cur = NULL; + /* + * Look for a neighboring block on the left (lower block numbers) + * that is contiguous with this space. + */ + if (error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)) + goto error0; + if (haveleft) { + /* + * There is a block to our left. + */ + if (error = xfs_alloc_get_rec(bno_cur, <bno, <len, &i)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + /* + * It's not contiguous, though. + */ + if (ltbno + ltlen < bno) + haveleft = 0; + else { + /* + * If this failure happens the request to free this + * space was invalid, it's (partly) already free. + * Very bad. + */ + XFS_WANT_CORRUPTED_GOTO(ltbno + ltlen <= bno, error0); + } + } + /* + * Look for a neighboring block on the right (higher block numbers) + * that is contiguous with this space. + */ + if (error = xfs_alloc_increment(bno_cur, 0, &haveright)) + goto error0; + if (haveright) { + /* + * There is a block to our right. + */ + if (error = xfs_alloc_get_rec(bno_cur, >bno, >len, &i)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + /* + * It's not contiguous, though. + */ + if (bno + len < gtbno) + haveright = 0; + else { + /* + * If this failure happens the request to free this + * space was invalid, it's (partly) already free. + * Very bad. + */ + XFS_WANT_CORRUPTED_GOTO(gtbno >= bno + len, error0); + } + } + /* + * Now allocate and initialize a cursor for the by-size tree. + */ + cnt_cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT, 0, + 0); + /* + * Have both left and right contiguous neighbors. + * Merge all three into a single free block. + */ + if (haveleft && haveright) { + /* + * Delete the old by-size entry on the left. + */ + if (error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if (error = xfs_alloc_delete(cnt_cur, &i)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + /* + * Delete the old by-size entry on the right. + */ + if (error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if (error = xfs_alloc_delete(cnt_cur, &i)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + /* + * Delete the old by-block entry for the right block. + */ + if (error = xfs_alloc_delete(bno_cur, &i)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + /* + * Move the by-block cursor back to the left neighbor. + */ + if (error = xfs_alloc_decrement(bno_cur, 0, &i)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); +#ifdef DEBUG + /* + * Check that this is the right record: delete didn't + * mangle the cursor. + */ + { + xfs_agblock_t xxbno; + xfs_extlen_t xxlen; + + if (error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen, + &i)) + goto error0; + XFS_WANT_CORRUPTED_GOTO( + i == 1 && xxbno == ltbno && xxlen == ltlen, + error0); + } +#endif + /* + * Update remaining by-block entry to the new, joined block. + */ + nbno = ltbno; + nlen = len + ltlen + gtlen; + if (error = xfs_alloc_update(bno_cur, nbno, nlen)) + goto error0; + } + /* + * Have only a left contiguous neighbor. + * Merge it together with the new freespace. + */ + else if (haveleft) { + /* + * Delete the old by-size entry on the left. + */ + if (error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if (error = xfs_alloc_delete(cnt_cur, &i)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + /* + * Back up the by-block cursor to the left neighbor, and + * update its length. + */ + if (error = xfs_alloc_decrement(bno_cur, 0, &i)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + nbno = ltbno; + nlen = len + ltlen; + if (error = xfs_alloc_update(bno_cur, nbno, nlen)) + goto error0; + } + /* + * Have only a right contiguous neighbor. + * Merge it together with the new freespace. + */ + else if (haveright) { + /* + * Delete the old by-size entry on the right. + */ + if (error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if (error = xfs_alloc_delete(cnt_cur, &i)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + /* + * Update the starting block and length of the right + * neighbor in the by-block tree. + */ + nbno = bno; + nlen = len + gtlen; + if (error = xfs_alloc_update(bno_cur, nbno, nlen)) + goto error0; + } + /* + * No contiguous neighbors. + * Insert the new freespace into the by-block tree. + */ + else { + nbno = bno; + nlen = len; + if (error = xfs_alloc_insert(bno_cur, &i)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + } + xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); + bno_cur = NULL; + /* + * In all cases we need to insert the new freespace in the by-size tree. + */ + if (error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 0, error0); + if (error = xfs_alloc_insert(cnt_cur, &i)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + cnt_cur = NULL; + /* + * Update the freespace totals in the ag and superblock. + */ + { + xfs_agf_t *agf; + xfs_perag_t *pag; /* per allocation group data */ + + agf = XFS_BUF_TO_AGF(agbp); + pag = &mp->m_perag[agno]; + INT_MOD(agf->agf_freeblks, ARCH_CONVERT, len); + xfs_trans_agblocks_delta(tp, len); + pag->pagf_freeblks += len; + XFS_WANT_CORRUPTED_GOTO( + INT_GET(agf->agf_freeblks, ARCH_CONVERT) + <= INT_GET(agf->agf_length, ARCH_CONVERT), + error0); + TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS); + xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); + if (!isfl) + xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len); + XFS_STATS_INC(xs_freex); + XFS_STATS_ADD(xs_freeb, len); + } + TRACE_FREE(haveleft ? + (haveright ? "both" : "left") : + (haveright ? "right" : "none"), + agno, bno, len, isfl); + return 0; + + error0: + TRACE_FREE("error", agno, bno, len, isfl); + if (bno_cur) + xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); + if (cnt_cur) + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); + return error; +} + +/* + * Visible (exported) allocation/free functions. + * Some of these are used just by xfs_alloc_btree.c and this file. + */ + +/* + * Compute and fill in value of m_ag_maxlevels. + */ +void +xfs_alloc_compute_maxlevels( + xfs_mount_t *mp) /* file system mount structure */ +{ + int level; + uint maxblocks; + uint maxleafents; + int minleafrecs; + int minnoderecs; + + maxleafents = (mp->m_sb.sb_agblocks + 1) / 2; + minleafrecs = mp->m_alloc_mnr[0]; + minnoderecs = mp->m_alloc_mnr[1]; + maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; + for (level = 1; maxblocks > 1; level++) + maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs; + mp->m_ag_maxlevels = level; +} + +/* + * Decide whether to use this allocation group for this allocation. + * If so, fix up the btree freelist's size. + * This is external so mkfs can call it, too. + */ +int /* error */ +xfs_alloc_fix_freelist( + xfs_alloc_arg_t *args, /* allocation argument structure */ + int flags) /* XFS_ALLOC_FLAG_... */ +{ + xfs_buf_t *agbp; /* agf buffer pointer */ + xfs_agf_t *agf; /* a.g. freespace structure pointer */ + xfs_buf_t *agflbp;/* agfl buffer pointer */ + xfs_agblock_t bno; /* freelist block */ + xfs_extlen_t delta; /* new blocks needed in freelist */ + int error; /* error result code */ + xfs_extlen_t longest;/* longest extent in allocation group */ + xfs_mount_t *mp; /* file system mount point structure */ + xfs_extlen_t need; /* total blocks needed in freelist */ + xfs_perag_t *pag; /* per-ag information structure */ + xfs_alloc_arg_t targs; /* local allocation arguments */ + xfs_trans_t *tp; /* transaction pointer */ + + mp = args->mp; + + pag = args->pag; + tp = args->tp; + if (!pag->pagf_init) { + if (error = xfs_alloc_read_agf(mp, tp, args->agno, flags, + &agbp)) + return error; + if (!pag->pagf_init) { + args->agbp = NULL; + return 0; + } + } else + agbp = NULL; + need = XFS_MIN_FREELIST_PAG(pag, mp); + delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0; + /* + * If it looks like there isn't a long enough extent, or enough + * total blocks, reject it. + */ + longest = (pag->pagf_longest > delta) ? + (pag->pagf_longest - delta) : + (pag->pagf_flcount > 0 || pag->pagf_longest > 0); + if (args->minlen + args->alignment + args->minalignslop - 1 > longest || + (args->minleft && + (int)(pag->pagf_freeblks + pag->pagf_flcount - + need - args->total) < + (int)args->minleft)) { + if (agbp) + xfs_trans_brelse(tp, agbp); + args->agbp = NULL; + return 0; + } + /* + * Get the a.g. freespace buffer. + * Can fail if we're not blocking on locks, and it's held. + */ + if (agbp == NULL) { + if (error = xfs_alloc_read_agf(mp, tp, args->agno, flags, + &agbp)) + return error; + if (agbp == NULL) { + args->agbp = NULL; + return 0; + } + } + /* + * Figure out how many blocks we should have in the freelist. + */ + agf = XFS_BUF_TO_AGF(agbp); + need = XFS_MIN_FREELIST(agf, mp); + delta = need > INT_GET(agf->agf_flcount, ARCH_CONVERT) ? + (need - INT_GET(agf->agf_flcount, ARCH_CONVERT)) : 0; + /* + * If there isn't enough total or single-extent, reject it. + */ + longest = INT_GET(agf->agf_longest, ARCH_CONVERT); + longest = (longest > delta) ? (longest - delta) : + (INT_GET(agf->agf_flcount, ARCH_CONVERT) > 0 || longest > 0); + if (args->minlen + args->alignment + args->minalignslop - 1 > longest || + (args->minleft && + (int)(INT_GET(agf->agf_freeblks, ARCH_CONVERT) + + INT_GET(agf->agf_flcount, ARCH_CONVERT) - need - args->total) < + (int)args->minleft)) { + xfs_trans_brelse(tp, agbp); + args->agbp = NULL; + return 0; + } + /* + * Make the freelist shorter if it's too long. + */ + while (INT_GET(agf->agf_flcount, ARCH_CONVERT) > need) { + xfs_buf_t *bp; + + if (error = xfs_alloc_get_freelist(tp, agbp, &bno)) + return error; + if (error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1)) + return error; + bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0); + xfs_trans_binval(tp, bp); + /* + * Since blocks move to the free list without + * the coordination used in xfs_bmap_finish, + * we can't allow block to be available for reallocation + * and non-transaction writing (user data) + * until we know that the transaction + * that moved it to the free list is + * permanently on disk. The only way to + * ensure that is to make this transaction + * synchronous. The one exception to this + * is in the case of wsync-mounted filesystem + * where we know that any block that made it + * onto the freelist won't be seen again in + * the file from which it came since the transactions + * that free metadata blocks or shrink inodes in + * wsync filesystems are all themselves synchronous. + */ + if (!(mp->m_flags & XFS_MOUNT_WSYNC)) + xfs_trans_set_sync(tp); + } + /* + * Initialize the args structure. + */ + targs.tp = tp; + targs.mp = mp; + targs.agbp = agbp; + targs.agno = args->agno; + targs.mod = targs.minleft = targs.wasdel = targs.userdata = + targs.minalignslop = 0; + targs.alignment = targs.minlen = targs.prod = targs.isfl = 1; + targs.type = XFS_ALLOCTYPE_THIS_AG; + targs.pag = pag; + if (error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp)) + return error; + /* + * Make the freelist longer if it's too short. + */ + while (INT_GET(agf->agf_flcount, ARCH_CONVERT) < need) { + targs.agbno = 0; + targs.maxlen = need - INT_GET(agf->agf_flcount, ARCH_CONVERT); + /* + * Allocate as many blocks as possible at once. + */ + if (error = xfs_alloc_ag_vextent(&targs)) + return error; + /* + * Stop if we run out. Won't happen if callers are obeying + * the restrictions correctly. Can happen for free calls + * on a completely full ag. + */ + if (targs.agbno == NULLAGBLOCK) + break; + /* + * Put each allocated block on the list. + */ + for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) { + if (error = xfs_alloc_put_freelist(tp, agbp, agflbp, + bno)) + return error; + } + } + args->agbp = agbp; + return 0; +} + +/* + * Get a block from the freelist. + * Returns with the buffer for the block gotten. + */ +int /* error */ +xfs_alloc_get_freelist( + xfs_trans_t *tp, /* transaction pointer */ + xfs_buf_t *agbp, /* buffer containing the agf structure */ + xfs_agblock_t *bnop) /* block address retrieved from freelist */ +{ + xfs_agf_t *agf; /* a.g. freespace structure */ + xfs_agfl_t *agfl; /* a.g. freelist structure */ + xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */ + xfs_agblock_t bno; /* block number returned */ + int error; +#ifdef XFS_ALLOC_TRACE + static char fname[] = "xfs_alloc_get_freelist"; +#endif + xfs_mount_t *mp; /* mount structure */ + xfs_perag_t *pag; /* per allocation group data */ + + agf = XFS_BUF_TO_AGF(agbp); + /* + * Freelist is empty, give up. + */ + if (INT_GET(agf->agf_flcount, ARCH_CONVERT) == 0) { + *bnop = NULLAGBLOCK; + return 0; + } + /* + * Read the array of free blocks. + */ + mp = tp->t_mountp; + if (error = xfs_alloc_read_agfl(mp, tp, + INT_GET(agf->agf_seqno, ARCH_CONVERT), &agflbp)) + return error; + agfl = XFS_BUF_TO_AGFL(agflbp); + /* + * Get the block number and update the data structures. + */ + bno = INT_GET(agfl->agfl_bno[INT_GET(agf->agf_flfirst, ARCH_CONVERT)], ARCH_CONVERT); + INT_MOD(agf->agf_flfirst, ARCH_CONVERT, 1); + xfs_trans_brelse(tp, agflbp); + if (INT_GET(agf->agf_flfirst, ARCH_CONVERT) == XFS_AGFL_SIZE) + INT_ZERO(agf->agf_flfirst, ARCH_CONVERT); + pag = &mp->m_perag[INT_GET(agf->agf_seqno, ARCH_CONVERT)]; + INT_MOD(agf->agf_flcount, ARCH_CONVERT, -1); + xfs_trans_agflist_delta(tp, -1); + pag->pagf_flcount--; + TRACE_MODAGF(NULL, agf, XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT); + xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT); + *bnop = bno; + return 0; +} + +/* + * Log the given fields from the agf structure. + */ +void +xfs_alloc_log_agf( + xfs_trans_t *tp, /* transaction pointer */ + xfs_buf_t *bp, /* buffer for a.g. freelist header */ + int fields) /* mask of fields to be logged (XFS_AGF_...) */ +{ + int first; /* first byte offset */ + int last; /* last byte offset */ + static const short offsets[] = { + offsetof(xfs_agf_t, agf_magicnum), + offsetof(xfs_agf_t, agf_versionnum), + offsetof(xfs_agf_t, agf_seqno), + offsetof(xfs_agf_t, agf_length), + offsetof(xfs_agf_t, agf_roots[0]), + offsetof(xfs_agf_t, agf_levels[0]), + offsetof(xfs_agf_t, agf_flfirst), + offsetof(xfs_agf_t, agf_fllast), + offsetof(xfs_agf_t, agf_flcount), + offsetof(xfs_agf_t, agf_freeblks), + offsetof(xfs_agf_t, agf_longest), + sizeof(xfs_agf_t) + }; + + xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last); + xfs_trans_log_buf(tp, bp, (uint)first, (uint)last); +} + +/* + * Interface for inode allocation to force the pag data to be initialized. + */ +int /* error */ +xfs_alloc_pagf_init( + xfs_mount_t *mp, /* file system mount structure */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_agnumber_t agno, /* allocation group number */ + int flags) /* XFS_ALLOC_FLAGS_... */ +{ + xfs_buf_t *bp; + int error; + + if (error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)) + return error; + if (bp) + xfs_trans_brelse(tp, bp); + return 0; +} + +/* + * Put the block on the freelist for the allocation group. + */ +int /* error */ +xfs_alloc_put_freelist( + xfs_trans_t *tp, /* transaction pointer */ + xfs_buf_t *agbp, /* buffer for a.g. freelist header */ + xfs_buf_t *agflbp,/* buffer for a.g. free block array */ + xfs_agblock_t bno) /* block being freed */ +{ + xfs_agf_t *agf; /* a.g. freespace structure */ + xfs_agfl_t *agfl; /* a.g. free block array */ + xfs_agblock_t *blockp;/* pointer to array entry */ + int error; +#ifdef XFS_ALLOC_TRACE + static char fname[] = "xfs_alloc_put_freelist"; +#endif + xfs_mount_t *mp; /* mount structure */ + xfs_perag_t *pag; /* per allocation group data */ + + agf = XFS_BUF_TO_AGF(agbp); + mp = tp->t_mountp; + + if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp, + INT_GET(agf->agf_seqno, ARCH_CONVERT), &agflbp))) + return error; + agfl = XFS_BUF_TO_AGFL(agflbp); + INT_MOD(agf->agf_fllast, ARCH_CONVERT, 1); + if (INT_GET(agf->agf_fllast, ARCH_CONVERT) == XFS_AGFL_SIZE) + INT_ZERO(agf->agf_fllast, ARCH_CONVERT); + pag = &mp->m_perag[INT_GET(agf->agf_seqno, ARCH_CONVERT)]; + INT_MOD(agf->agf_flcount, ARCH_CONVERT, 1); + xfs_trans_agflist_delta(tp, 1); + pag->pagf_flcount++; + ASSERT(INT_GET(agf->agf_flcount, ARCH_CONVERT) <= XFS_AGFL_SIZE); + blockp = &agfl->agfl_bno[INT_GET(agf->agf_fllast, ARCH_CONVERT)]; + INT_SET(*blockp, ARCH_CONVERT, bno); + TRACE_MODAGF(NULL, agf, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT); + xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT); + xfs_trans_log_buf(tp, agflbp, + (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl), + (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl + + sizeof(xfs_agblock_t) - 1)); + return 0; +} + +/* + * Read in the allocation group header (free/alloc section). + */ +int /* error */ +xfs_alloc_read_agf( + xfs_mount_t *mp, /* mount point structure */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_agnumber_t agno, /* allocation group number */ + int flags, /* XFS_ALLOC_FLAG_... */ + xfs_buf_t **bpp) /* buffer for the ag freelist header */ +{ + xfs_agf_t *agf; /* ag freelist header */ + int agf_ok; /* set if agf is consistent */ + xfs_buf_t *bp; /* return value */ + xfs_daddr_t d; /* disk block address */ + int error; + xfs_perag_t *pag; /* per allocation group data */ + + ASSERT(agno != NULLAGNUMBER); + d = XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR); + if (error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, 1, + (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XFS_BUF_TRYLOCK : 0U, + &bp)) + return error; + ASSERT(!bp || !XFS_BUF_GETERROR(bp)); + if (!bp) { + *bpp = NULL; + return 0; + } + /* + * Validate the magic number of the agf block. + */ + agf = XFS_BUF_TO_AGF(bp); + agf_ok = + INT_GET(agf->agf_magicnum, ARCH_CONVERT) == XFS_AGF_MAGIC && + XFS_AGF_GOOD_VERSION(INT_GET(agf->agf_versionnum, ARCH_CONVERT)) && + INT_GET(agf->agf_freeblks, ARCH_CONVERT) <= + INT_GET(agf->agf_length, ARCH_CONVERT) && + INT_GET(agf->agf_flfirst, ARCH_CONVERT) < XFS_AGFL_SIZE && + INT_GET(agf->agf_fllast, ARCH_CONVERT) < XFS_AGFL_SIZE && + INT_GET(agf->agf_flcount, ARCH_CONVERT) <= XFS_AGFL_SIZE; + if (XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF, + XFS_RANDOM_ALLOC_READ_AGF)) { + xfs_trans_brelse(tp, bp); + return XFS_ERROR(EFSCORRUPTED); + } + pag = &mp->m_perag[agno]; + if (!pag->pagf_init) { + pag->pagf_freeblks = INT_GET(agf->agf_freeblks, ARCH_CONVERT); + pag->pagf_flcount = INT_GET(agf->agf_flcount, ARCH_CONVERT); + pag->pagf_longest = INT_GET(agf->agf_longest, ARCH_CONVERT); + pag->pagf_levels[XFS_BTNUM_BNOi] = + INT_GET(agf->agf_levels[XFS_BTNUM_BNOi], ARCH_CONVERT); + pag->pagf_levels[XFS_BTNUM_CNTi] = + INT_GET(agf->agf_levels[XFS_BTNUM_CNTi], ARCH_CONVERT); + pag->pagf_init = 1; + } +#ifdef DEBUG + else if (!XFS_FORCED_SHUTDOWN(mp)) { + ASSERT(pag->pagf_freeblks == INT_GET(agf->agf_freeblks, ARCH_CONVERT)); + ASSERT(pag->pagf_flcount == INT_GET(agf->agf_flcount, ARCH_CONVERT)); + ASSERT(pag->pagf_longest == INT_GET(agf->agf_longest, ARCH_CONVERT)); + ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] == + INT_GET(agf->agf_levels[XFS_BTNUM_BNOi], ARCH_CONVERT)); + ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] == + INT_GET(agf->agf_levels[XFS_BTNUM_CNTi], ARCH_CONVERT)); + } +#endif + XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGF, XFS_AGF_REF); + *bpp = bp; + return 0; +} + +/* + * Allocate an extent (variable-size). + * Depending on the allocation type, we either look in a single allocation + * group or loop over the allocation groups to find the result. + */ +int /* error */ +xfs_alloc_vextent( + xfs_alloc_arg_t *args) /* allocation argument structure */ +{ + xfs_agblock_t agsize; /* allocation group size */ + int error; + int flags; /* XFS_ALLOC_FLAG_... locking flags */ +#ifdef XFS_ALLOC_TRACE + static char fname[] = "xfs_alloc_vextent"; +#endif + xfs_extlen_t minleft;/* minimum left value, temp copy */ + xfs_mount_t *mp; /* mount structure pointer */ + xfs_agnumber_t sagno; /* starting allocation group number */ + xfs_alloctype_t type; /* input allocation type */ + + mp = args->mp; + type = args->otype = args->type; + args->agbno = NULLAGBLOCK; + /* + * Just fix this up, for the case where the last a.g. is shorter + * (or there's only one a.g.) and the caller couldn't easily figure + * that out (xfs_bmap_alloc). + */ + agsize = mp->m_sb.sb_agblocks; + if (args->maxlen > agsize) + args->maxlen = agsize; + if (args->alignment == 0) + args->alignment = 1; + ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount); + ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize); + ASSERT(args->minlen <= args->maxlen); + ASSERT(args->minlen <= agsize); + ASSERT(args->mod < args->prod); + if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount || + XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize || + args->minlen > args->maxlen || args->minlen > agsize || + args->mod >= args->prod) { + args->fsbno = NULLFSBLOCK; + TRACE_ALLOC("badargs", args); + return 0; + } + switch (type) { + case XFS_ALLOCTYPE_THIS_AG: + case XFS_ALLOCTYPE_NEAR_BNO: + case XFS_ALLOCTYPE_THIS_BNO: + /* + * These three force us into a single a.g. + */ + args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); + mrlock(&mp->m_peraglock, MR_ACCESS, PINOD); + args->pag = &mp->m_perag[args->agno]; + minleft = args->minleft; + args->minleft = 0; + error = xfs_alloc_fix_freelist(args, 0); + args->minleft = minleft; + if (error) { + TRACE_ALLOC("nofix", args); + goto error0; + } + if (!args->agbp) { + mrunlock(&mp->m_peraglock); + TRACE_ALLOC("noagbp", args); + break; + } + args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); + if (error = xfs_alloc_ag_vextent(args)) + goto error0; + mrunlock(&mp->m_peraglock); + break; + case XFS_ALLOCTYPE_START_BNO: + /* + * Try near allocation first, then anywhere-in-ag after + * the first a.g. fails. + */ + args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); + args->type = XFS_ALLOCTYPE_NEAR_BNO; + /* FALLTHROUGH */ + case XFS_ALLOCTYPE_ANY_AG: + case XFS_ALLOCTYPE_START_AG: + case XFS_ALLOCTYPE_FIRST_AG: + /* + * Rotate through the allocation groups looking for a winner. + */ + if (type == XFS_ALLOCTYPE_ANY_AG) { + /* + * Start with the last place we left off. + */ + args->agno = sagno = mp->m_agfrotor; + args->type = XFS_ALLOCTYPE_THIS_AG; + flags = XFS_ALLOC_FLAG_TRYLOCK; + } else if (type == XFS_ALLOCTYPE_FIRST_AG) { + /* + * Start with allocation group given by bno. + */ + args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); + args->type = XFS_ALLOCTYPE_THIS_AG; + sagno = 0; + flags = 0; + } else { + if (type == XFS_ALLOCTYPE_START_AG) + args->type = XFS_ALLOCTYPE_THIS_AG; + /* + * Start with the given allocation group. + */ + args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno); + flags = XFS_ALLOC_FLAG_TRYLOCK; + } + /* + * Loop over allocation groups twice; first time with + * trylock set, second time without. + */ + for (;;) { + mrlock(&mp->m_peraglock, MR_ACCESS, PINOD); + args->pag = &mp->m_perag[args->agno]; + if (error = xfs_alloc_fix_freelist(args, flags)) { + TRACE_ALLOC("nofix", args); + goto error0; + } + /* + * If we get a buffer back then the allocation will fly. + */ + if (args->agbp) { + if (error = xfs_alloc_ag_vextent(args)) + goto error0; + mrunlock(&mp->m_peraglock); + break; + } + mrunlock(&mp->m_peraglock); + TRACE_ALLOC("loopfailed", args); + /* + * Didn't work, figure out the next iteration. + */ + if (args->agno == sagno && + type == XFS_ALLOCTYPE_START_BNO) + args->type = XFS_ALLOCTYPE_THIS_AG; + if (++(args->agno) == mp->m_sb.sb_agcount) + args->agno = 0; + /* + * Reached the starting a.g., must either be done + * or switch to non-trylock mode. + */ + if (args->agno == sagno) { + if (flags == 0) { + args->agbno = NULLAGBLOCK; + TRACE_ALLOC("allfailed", args); + break; + } + flags = 0; + if (type == XFS_ALLOCTYPE_START_BNO) { + args->agbno = XFS_FSB_TO_AGBNO(mp, + args->fsbno); + args->type = XFS_ALLOCTYPE_NEAR_BNO; + } + } + } + mp->m_agfrotor = (args->agno + 1) % mp->m_sb.sb_agcount; + break; + default: + ASSERT(0); + /* NOTREACHED */ + } + if (args->agbno == NULLAGBLOCK) + args->fsbno = NULLFSBLOCK; + else { + args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno); +#ifdef DEBUG + ASSERT(args->len >= args->minlen); + ASSERT(args->len <= args->maxlen); + ASSERT(args->agbno % args->alignment == 0); + XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), + args->len); +#endif + } + return 0; +error0: + mrunlock(&mp->m_peraglock); + return error; +} + +/* + * Free an extent. + * Just break up the extent address and hand off to xfs_free_ag_extent + * after fixing up the freelist. + */ +int /* error */ +xfs_free_extent( + xfs_trans_t *tp, /* transaction pointer */ + xfs_fsblock_t bno, /* starting block number of extent */ + xfs_extlen_t len) /* length of extent */ +{ +#ifdef DEBUG + xfs_agf_t *agf; /* a.g. freespace header */ +#endif + xfs_alloc_arg_t args; /* allocation argument structure */ + int error; + + ASSERT(len != 0); + args.tp = tp; + args.mp = tp->t_mountp; + args.agno = XFS_FSB_TO_AGNO(args.mp, bno); + ASSERT(args.agno < args.mp->m_sb.sb_agcount); + args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); + args.alignment = 1; + args.minlen = args.minleft = args.minalignslop = 0; + mrlock(&args.mp->m_peraglock, MR_ACCESS, PINOD); + args.pag = &args.mp->m_perag[args.agno]; + if (error = xfs_alloc_fix_freelist(&args, 0)) + goto error0; +#ifdef DEBUG + ASSERT(args.agbp != NULL); + agf = XFS_BUF_TO_AGF(args.agbp); + ASSERT(args.agbno + len <= INT_GET(agf->agf_length, ARCH_CONVERT)); +#endif + error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, + len, 0); +error0: + mrunlock(&args.mp->m_peraglock); + return error; +} diff --git a/libxfs/xfs_alloc_btree.c b/libxfs/xfs_alloc_btree.c new file mode 100644 index 000000000..c6d0e0fdd --- /dev/null +++ b/libxfs/xfs_alloc_btree.c @@ -0,0 +1,2136 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +/* + * Free space allocation for XFS. + */ + +#include + +/* + * Single level of the xfs_alloc_delete record deletion routine. + * Delete record pointed to by cur/level. + * Remove the record from its block then rebalance the tree. + * Return 0 for error, 1 for done, 2 to go on to the next level. + */ +STATIC int /* error */ +xfs_alloc_delrec( + xfs_btree_cur_t *cur, /* btree cursor */ + int level, /* level removing record from */ + int *stat) /* fail/done/go-on */ +{ + xfs_agf_t *agf; /* allocation group freelist header */ + xfs_alloc_block_t *block; /* btree block record/key lives in */ + xfs_agblock_t bno; /* btree block number */ + xfs_buf_t *bp; /* buffer for block */ + int error; /* error return value */ + int i; /* loop index */ + xfs_alloc_key_t key; /* kp points here if block is level 0 */ + xfs_agblock_t lbno; /* left block's block number */ + xfs_buf_t *lbp; /* left block's buffer pointer */ + xfs_alloc_block_t *left; /* left btree block */ + xfs_alloc_key_t *lkp; /* left block key pointer */ + xfs_alloc_ptr_t *lpp; /* left block address pointer */ + int lrecs; /* number of records in left block */ + xfs_alloc_rec_t *lrp; /* left block record pointer */ + xfs_mount_t *mp; /* mount structure */ + int ptr; /* index in btree block for this rec */ + xfs_agblock_t rbno; /* right block's block number */ + xfs_buf_t *rbp; /* right block's buffer pointer */ + xfs_alloc_block_t *right; /* right btree block */ + xfs_alloc_key_t *rkp; /* right block key pointer */ + xfs_alloc_ptr_t *rpp; /* right block address pointer */ + int rrecs; /* number of records in right block */ + xfs_alloc_rec_t *rrp; /* right block record pointer */ + xfs_btree_cur_t *tcur; /* temporary btree cursor */ + + /* + * Get the index of the entry being deleted, check for nothing there. + */ + ptr = cur->bc_ptrs[level]; + if (ptr == 0) { + *stat = 0; + return 0; + } + /* + * Get the buffer & block containing the record or key/ptr. + */ + bp = cur->bc_bufs[level]; + block = XFS_BUF_TO_ALLOC_BLOCK(bp); +#ifdef DEBUG + if (error = xfs_btree_check_sblock(cur, block, level, bp)) + return error; +#endif + /* + * Fail if we're off the end of the block. + */ + if (ptr > INT_GET(block->bb_numrecs, ARCH_CONVERT)) { + *stat = 0; + return 0; + } + XFS_STATS_INC(xs_abt_delrec); + /* + * It's a nonleaf. Excise the key and ptr being deleted, by + * sliding the entries past them down one. + * Log the changed areas of the block. + */ + if (level > 0) { + lkp = XFS_ALLOC_KEY_ADDR(block, 1, cur); + lpp = XFS_ALLOC_PTR_ADDR(block, 1, cur); +#ifdef DEBUG + for (i = ptr; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++) { + if (error = xfs_btree_check_sptr(cur, INT_GET(lpp[i], ARCH_CONVERT), level)) + return error; + } +#endif + if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) { + ovbcopy(&lkp[ptr], &lkp[ptr - 1], + (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*lkp)); /* INT_: mem copy */ + ovbcopy(&lpp[ptr], &lpp[ptr - 1], + (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*lpp)); /* INT_: mem copy */ + xfs_alloc_log_ptrs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1); + xfs_alloc_log_keys(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1); + } + } + /* + * It's a leaf. Excise the record being deleted, by sliding the + * entries past it down one. Log the changed areas of the block. + */ + else { + lrp = XFS_ALLOC_REC_ADDR(block, 1, cur); + if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) { + ovbcopy(&lrp[ptr], &lrp[ptr - 1], + (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*lrp)); + xfs_alloc_log_recs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1); + } + /* + * If it's the first record in the block, we'll need a key + * structure to pass up to the next level (updkey). + */ + if (ptr == 1) { + key.ar_startblock = lrp->ar_startblock; /* INT_: direct copy */ + key.ar_blockcount = lrp->ar_blockcount; /* INT_: direct copy */ + lkp = &key; + } + } + /* + * Decrement and log the number of entries in the block. + */ + INT_MOD(block->bb_numrecs, ARCH_CONVERT, -1); + xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS); + /* + * See if the longest free extent in the allocation group was + * changed by this operation. True if it's the by-size btree, and + * this is the leaf level, and there is no right sibling block, + * and this was the last record. + */ + agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); + mp = cur->bc_mp; + + if (level == 0 && + cur->bc_btnum == XFS_BTNUM_CNT && + INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK && + ptr > INT_GET(block->bb_numrecs, ARCH_CONVERT)) { + ASSERT(ptr == INT_GET(block->bb_numrecs, ARCH_CONVERT) + 1); + /* + * There are still records in the block. Grab the size + * from the last one. + */ + if (INT_GET(block->bb_numrecs, ARCH_CONVERT)) { + rrp = XFS_ALLOC_REC_ADDR(block, INT_GET(block->bb_numrecs, ARCH_CONVERT), cur); + INT_COPY(agf->agf_longest, rrp->ar_blockcount, ARCH_CONVERT); + } + /* + * No free extents left. + */ + else + INT_ZERO(agf->agf_longest, ARCH_CONVERT); + mp->m_perag[INT_GET(agf->agf_seqno, ARCH_CONVERT)].pagf_longest = + INT_GET(agf->agf_longest, ARCH_CONVERT); + xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, + XFS_AGF_LONGEST); + } + /* + * Is this the root level? If so, we're almost done. + */ + if (level == cur->bc_nlevels - 1) { + /* + * If this is the root level, + * and there's only one entry left, + * and it's NOT the leaf level, + * then we can get rid of this level. + */ + if (INT_GET(block->bb_numrecs, ARCH_CONVERT) == 1 && level > 0) { + /* + * lpp is still set to the first pointer in the block. + * Make it the new root of the btree. + */ + bno = INT_GET(agf->agf_roots[cur->bc_btnum], ARCH_CONVERT); + INT_COPY(agf->agf_roots[cur->bc_btnum], *lpp, ARCH_CONVERT); + INT_MOD(agf->agf_levels[cur->bc_btnum], ARCH_CONVERT, -1); + mp->m_perag[INT_GET(agf->agf_seqno, ARCH_CONVERT)].pagf_levels[cur->bc_btnum]--; + /* + * Put this buffer/block on the ag's freelist. + */ + if (error = xfs_alloc_put_freelist(cur->bc_tp, + cur->bc_private.a.agbp, NULL, bno)) + return error; + xfs_trans_agbtree_delta(cur->bc_tp, -1); + xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, + XFS_AGF_ROOTS | XFS_AGF_LEVELS); + /* + * Update the cursor so there's one fewer level. + */ + xfs_btree_setbuf(cur, level, 0); + cur->bc_nlevels--; + } else if (level > 0 && + (error = xfs_alloc_decrement(cur, level, &i))) + return error; + *stat = 1; + return 0; + } + /* + * If we deleted the leftmost entry in the block, update the + * key values above us in the tree. + */ + if (ptr == 1 && (error = xfs_alloc_updkey(cur, lkp, level + 1))) + return error; + /* + * If the number of records remaining in the block is at least + * the minimum, we're done. + */ + if (INT_GET(block->bb_numrecs, ARCH_CONVERT) >= XFS_ALLOC_BLOCK_MINRECS(level, cur)) { + if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i))) + return error; + *stat = 1; + return 0; + } + /* + * Otherwise, we have to move some records around to keep the + * tree balanced. Look at the left and right sibling blocks to + * see if we can re-balance by moving only one record. + */ + rbno = INT_GET(block->bb_rightsib, ARCH_CONVERT); + lbno = INT_GET(block->bb_leftsib, ARCH_CONVERT); + bno = NULLAGBLOCK; + ASSERT(rbno != NULLAGBLOCK || lbno != NULLAGBLOCK); + /* + * Duplicate the cursor so our btree manipulations here won't + * disrupt the next level up. + */ + if (error = xfs_btree_dup_cursor(cur, &tcur)) + return error; + /* + * If there's a right sibling, see if it's ok to shift an entry + * out of it. + */ + if (rbno != NULLAGBLOCK) { + /* + * Move the temp cursor to the last entry in the next block. + * Actually any entry but the first would suffice. + */ + i = xfs_btree_lastrec(tcur, level); + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if (error = xfs_alloc_increment(tcur, level, &i)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + i = xfs_btree_lastrec(tcur, level); + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + /* + * Grab a pointer to the block. + */ + rbp = tcur->bc_bufs[level]; + right = XFS_BUF_TO_ALLOC_BLOCK(rbp); +#ifdef DEBUG + if (error = xfs_btree_check_sblock(cur, right, level, rbp)) + goto error0; +#endif + /* + * Grab the current block number, for future use. + */ + bno = INT_GET(right->bb_leftsib, ARCH_CONVERT); + /* + * If right block is full enough so that removing one entry + * won't make it too empty, and left-shifting an entry out + * of right to us works, we're done. + */ + if (INT_GET(right->bb_numrecs, ARCH_CONVERT) - 1 >= + XFS_ALLOC_BLOCK_MINRECS(level, cur)) { + if (error = xfs_alloc_lshift(tcur, level, &i)) + goto error0; + if (i) { + ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) >= + XFS_ALLOC_BLOCK_MINRECS(level, cur)); + xfs_btree_del_cursor(tcur, + XFS_BTREE_NOERROR); + if (level > 0 && + (error = xfs_alloc_decrement(cur, level, + &i))) + return error; + *stat = 1; + return 0; + } + } + /* + * Otherwise, grab the number of records in right for + * future reference, and fix up the temp cursor to point + * to our block again (last record). + */ + rrecs = INT_GET(right->bb_numrecs, ARCH_CONVERT); + if (lbno != NULLAGBLOCK) { + i = xfs_btree_firstrec(tcur, level); + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if (error = xfs_alloc_decrement(tcur, level, &i)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + } + } + /* + * If there's a left sibling, see if it's ok to shift an entry + * out of it. + */ + if (lbno != NULLAGBLOCK) { + /* + * Move the temp cursor to the first entry in the + * previous block. + */ + i = xfs_btree_firstrec(tcur, level); + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if (error = xfs_alloc_decrement(tcur, level, &i)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + xfs_btree_firstrec(tcur, level); + /* + * Grab a pointer to the block. + */ + lbp = tcur->bc_bufs[level]; + left = XFS_BUF_TO_ALLOC_BLOCK(lbp); +#ifdef DEBUG + if (error = xfs_btree_check_sblock(cur, left, level, lbp)) + goto error0; +#endif + /* + * Grab the current block number, for future use. + */ + bno = INT_GET(left->bb_rightsib, ARCH_CONVERT); + /* + * If left block is full enough so that removing one entry + * won't make it too empty, and right-shifting an entry out + * of left to us works, we're done. + */ + if (INT_GET(left->bb_numrecs, ARCH_CONVERT) - 1 >= + XFS_ALLOC_BLOCK_MINRECS(level, cur)) { + if (error = xfs_alloc_rshift(tcur, level, &i)) + goto error0; + if (i) { + ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) >= + XFS_ALLOC_BLOCK_MINRECS(level, cur)); + xfs_btree_del_cursor(tcur, + XFS_BTREE_NOERROR); + if (level == 0) + cur->bc_ptrs[0]++; + *stat = 1; + return 0; + } + } + /* + * Otherwise, grab the number of records in right for + * future reference. + */ + lrecs = INT_GET(left->bb_numrecs, ARCH_CONVERT); + } + /* + * Delete the temp cursor, we're done with it. + */ + xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); + /* + * If here, we need to do a join to keep the tree balanced. + */ + ASSERT(bno != NULLAGBLOCK); + /* + * See if we can join with the left neighbor block. + */ + if (lbno != NULLAGBLOCK && + lrecs + INT_GET(block->bb_numrecs, ARCH_CONVERT) <= XFS_ALLOC_BLOCK_MAXRECS(level, cur)) { + /* + * Set "right" to be the starting block, + * "left" to be the left neighbor. + */ + rbno = bno; + right = block; + rbp = bp; + if (error = xfs_btree_read_bufs(mp, cur->bc_tp, + cur->bc_private.a.agno, lbno, 0, &lbp, + XFS_ALLOC_BTREE_REF)) + return error; + left = XFS_BUF_TO_ALLOC_BLOCK(lbp); + if (error = xfs_btree_check_sblock(cur, left, level, lbp)) + return error; + } + /* + * If that won't work, see if we can join with the right neighbor block. + */ + else if (rbno != NULLAGBLOCK && + rrecs + INT_GET(block->bb_numrecs, ARCH_CONVERT) <= + XFS_ALLOC_BLOCK_MAXRECS(level, cur)) { + /* + * Set "left" to be the starting block, + * "right" to be the right neighbor. + */ + lbno = bno; + left = block; + lbp = bp; + if (error = xfs_btree_read_bufs(mp, cur->bc_tp, + cur->bc_private.a.agno, rbno, 0, &rbp, + XFS_ALLOC_BTREE_REF)) + return error; + right = XFS_BUF_TO_ALLOC_BLOCK(rbp); + if (error = xfs_btree_check_sblock(cur, right, level, rbp)) + return error; + } + /* + * Otherwise, we can't fix the imbalance. + * Just return. This is probably a logic error, but it's not fatal. + */ + else { + if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i))) + return error; + *stat = 1; + return 0; + } + /* + * We're now going to join "left" and "right" by moving all the stuff + * in "right" to "left" and deleting "right". + */ + if (level > 0) { + /* + * It's a non-leaf. Move keys and pointers. + */ + lkp = XFS_ALLOC_KEY_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur); + lpp = XFS_ALLOC_PTR_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur); + rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur); + rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur); +#ifdef DEBUG + for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) { + if (error = xfs_btree_check_sptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level)) + return error; + } +#endif + bcopy(rkp, lkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lkp)); /* INT_: structure copy */ + bcopy(rpp, lpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lpp)); /* INT_: structure copy */ + xfs_alloc_log_keys(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, + INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT)); + xfs_alloc_log_ptrs(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, + INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT)); + } else { + /* + * It's a leaf. Move records. + */ + lrp = XFS_ALLOC_REC_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur); + rrp = XFS_ALLOC_REC_ADDR(right, 1, cur); + bcopy(rrp, lrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lrp)); + xfs_alloc_log_recs(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, + INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT)); + } + /* + * If we joined with the left neighbor, set the buffer in the + * cursor to the left block, and fix up the index. + */ + if (bp != lbp) { + xfs_btree_setbuf(cur, level, lbp); + cur->bc_ptrs[level] += INT_GET(left->bb_numrecs, ARCH_CONVERT); + } + /* + * If we joined with the right neighbor and there's a level above + * us, increment the cursor at that level. + */ + else if (level + 1 < cur->bc_nlevels && + (error = xfs_alloc_increment(cur, level + 1, &i))) + return error; + /* + * Fix up the number of records in the surviving block. + */ + INT_MOD(left->bb_numrecs, ARCH_CONVERT, INT_GET(right->bb_numrecs, ARCH_CONVERT)); + /* + * Fix up the right block pointer in the surviving block, and log it. + */ + left->bb_rightsib = right->bb_rightsib; /* INT_: direct copy */ + xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); + /* + * If there is a right sibling now, make it point to the + * remaining block. + */ + if (INT_GET(left->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) { + xfs_alloc_block_t *rrblock; + xfs_buf_t *rrbp; + + if (error = xfs_btree_read_bufs(mp, cur->bc_tp, + cur->bc_private.a.agno, INT_GET(left->bb_rightsib, ARCH_CONVERT), 0, + &rrbp, XFS_ALLOC_BTREE_REF)) + return error; + rrblock = XFS_BUF_TO_ALLOC_BLOCK(rrbp); + if (error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)) + return error; + INT_SET(rrblock->bb_leftsib, ARCH_CONVERT, lbno); + xfs_alloc_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB); + } + /* + * Free the deleting block by putting it on the freelist. + */ + if (error = xfs_alloc_put_freelist(cur->bc_tp, cur->bc_private.a.agbp, + NULL, rbno)) + return error; + xfs_trans_agbtree_delta(cur->bc_tp, -1); + /* + * Adjust the current level's cursor so that we're left referring + * to the right node, after we're done. + * If this leaves the ptr value 0 our caller will fix it up. + */ + if (level > 0) + cur->bc_ptrs[level]--; + /* + * Return value means the next level up has something to do. + */ + *stat = 2; + return 0; + +error0: + xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); + return error; +} + +/* + * Insert one record/level. Return information to the caller + * allowing the next level up to proceed if necessary. + */ +STATIC int /* error */ +xfs_alloc_insrec( + xfs_btree_cur_t *cur, /* btree cursor */ + int level, /* level to insert record at */ + xfs_agblock_t *bnop, /* i/o: block number inserted */ + xfs_alloc_rec_t *recp, /* i/o: record data inserted */ + xfs_btree_cur_t **curp, /* output: new cursor replacing cur */ + int *stat) /* output: success/failure */ +{ + xfs_agf_t *agf; /* allocation group freelist header */ + xfs_alloc_block_t *block; /* btree block record/key lives in */ + xfs_buf_t *bp; /* buffer for block */ + int error; /* error return value */ + int i; /* loop index */ + xfs_alloc_key_t key; /* key value being inserted */ + xfs_alloc_key_t *kp; /* pointer to btree keys */ + xfs_agblock_t nbno; /* block number of allocated block */ + xfs_btree_cur_t *ncur; /* new cursor to be used at next lvl */ + xfs_alloc_key_t nkey; /* new key value, from split */ + xfs_alloc_rec_t nrec; /* new record value, for caller */ + int optr; /* old ptr value */ + xfs_alloc_ptr_t *pp; /* pointer to btree addresses */ + int ptr; /* index in btree block for this rec */ + xfs_alloc_rec_t *rp; /* pointer to btree records */ + + ASSERT(INT_GET(recp->ar_blockcount, ARCH_CONVERT) > 0); + /* + * If we made it to the root level, allocate a new root block + * and we're done. + */ + if (level >= cur->bc_nlevels) { + XFS_STATS_INC(xs_abt_insrec); + if (error = xfs_alloc_newroot(cur, &i)) + return error; + *bnop = NULLAGBLOCK; + *stat = i; + return 0; + } + /* + * Make a key out of the record data to be inserted, and save it. + */ + key.ar_startblock = recp->ar_startblock; /* INT_: direct copy */ + key.ar_blockcount = recp->ar_blockcount; /* INT_: direct copy */ + optr = ptr = cur->bc_ptrs[level]; + /* + * If we're off the left edge, return failure. + */ + if (ptr == 0) { + *stat = 0; + return 0; + } + XFS_STATS_INC(xs_abt_insrec); + /* + * Get pointers to the btree buffer and block. + */ + bp = cur->bc_bufs[level]; + block = XFS_BUF_TO_ALLOC_BLOCK(bp); +#ifdef DEBUG + if (error = xfs_btree_check_sblock(cur, block, level, bp)) + return error; + /* + * Check that the new entry is being inserted in the right place. + */ + if (ptr <= INT_GET(block->bb_numrecs, ARCH_CONVERT)) { + if (level == 0) { + rp = XFS_ALLOC_REC_ADDR(block, ptr, cur); + xfs_btree_check_rec(cur->bc_btnum, recp, rp); + } else { + kp = XFS_ALLOC_KEY_ADDR(block, ptr, cur); + xfs_btree_check_key(cur->bc_btnum, &key, kp); + } + } +#endif + nbno = NULLAGBLOCK; + ncur = (xfs_btree_cur_t *)0; + /* + * If the block is full, we can't insert the new entry until we + * make the block un-full. + */ + if (INT_GET(block->bb_numrecs, ARCH_CONVERT) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) { + /* + * First, try shifting an entry to the right neighbor. + */ + if (error = xfs_alloc_rshift(cur, level, &i)) + return error; + if (i) { + /* nothing */ + } + /* + * Next, try shifting an entry to the left neighbor. + */ + else { + if (error = xfs_alloc_lshift(cur, level, &i)) + return error; + if (i) + optr = ptr = cur->bc_ptrs[level]; + else { + /* + * Next, try splitting the current block in + * half. If this works we have to re-set our + * variables because we could be in a + * different block now. + */ + if (error = xfs_alloc_split(cur, level, &nbno, + &nkey, &ncur, &i)) + return error; + if (i) { + bp = cur->bc_bufs[level]; + block = XFS_BUF_TO_ALLOC_BLOCK(bp); +#ifdef DEBUG + if (error = + xfs_btree_check_sblock(cur, + block, level, bp)) + return error; +#endif + ptr = cur->bc_ptrs[level]; + nrec.ar_startblock = nkey.ar_startblock; /* INT_: direct copy */ + nrec.ar_blockcount = nkey.ar_blockcount; /* INT_: direct copy */ + } + /* + * Otherwise the insert fails. + */ + else { + *stat = 0; + return 0; + } + } + } + } + /* + * At this point we know there's room for our new entry in the block + * we're pointing at. + */ + if (level > 0) { + /* + * It's a non-leaf entry. Make a hole for the new data + * in the key and ptr regions of the block. + */ + kp = XFS_ALLOC_KEY_ADDR(block, 1, cur); + pp = XFS_ALLOC_PTR_ADDR(block, 1, cur); +#ifdef DEBUG + for (i = INT_GET(block->bb_numrecs, ARCH_CONVERT); i >= ptr; i--) { + if (error = xfs_btree_check_sptr(cur, INT_GET(pp[i - 1], ARCH_CONVERT), level)) + return error; + } +#endif + ovbcopy(&kp[ptr - 1], &kp[ptr], + (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*kp)); /* INT_: copy */ + ovbcopy(&pp[ptr - 1], &pp[ptr], + (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*pp)); /* INT_: copy */ +#ifdef DEBUG + if (error = xfs_btree_check_sptr(cur, *bnop, level)) + return error; +#endif + /* + * Now stuff the new data in, bump numrecs and log the new data. + */ + kp[ptr - 1] = key; + INT_SET(pp[ptr - 1], ARCH_CONVERT, *bnop); + INT_MOD(block->bb_numrecs, ARCH_CONVERT, +1); + xfs_alloc_log_keys(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT)); + xfs_alloc_log_ptrs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT)); +#ifdef DEBUG + if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) + xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1, + kp + ptr); +#endif + } else { + /* + * It's a leaf entry. Make a hole for the new record. + */ + rp = XFS_ALLOC_REC_ADDR(block, 1, cur); + ovbcopy(&rp[ptr - 1], &rp[ptr], + (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*rp)); + /* + * Now stuff the new record in, bump numrecs + * and log the new data. + */ + rp[ptr - 1] = *recp; /* INT_: struct copy */ + INT_MOD(block->bb_numrecs, ARCH_CONVERT, +1); + xfs_alloc_log_recs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT)); +#ifdef DEBUG + if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) + xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1, + rp + ptr); +#endif + } + /* + * Log the new number of records in the btree header. + */ + xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS); + /* + * If we inserted at the start of a block, update the parents' keys. + */ + if (optr == 1 && (error = xfs_alloc_updkey(cur, &key, level + 1))) + return error; + /* + * Look to see if the longest extent in the allocation group + * needs to be updated. + */ + + agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); + if (level == 0 && + cur->bc_btnum == XFS_BTNUM_CNT && + INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK && + INT_GET(recp->ar_blockcount, ARCH_CONVERT) > INT_GET(agf->agf_longest, ARCH_CONVERT)) { + /* + * If this is a leaf in the by-size btree and there + * is no right sibling block and this block is bigger + * than the previous longest block, update it. + */ + INT_COPY(agf->agf_longest, recp->ar_blockcount, ARCH_CONVERT); + cur->bc_mp->m_perag[INT_GET(agf->agf_seqno, ARCH_CONVERT)].pagf_longest + = INT_GET(recp->ar_blockcount, ARCH_CONVERT); + xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, + XFS_AGF_LONGEST); + } + /* + * Return the new block number, if any. + * If there is one, give back a record value and a cursor too. + */ + *bnop = nbno; + if (nbno != NULLAGBLOCK) { + *recp = nrec; /* INT_: struct copy */ + *curp = ncur; /* INT_: struct copy */ + } + *stat = 1; + return 0; +} + +/* + * Log header fields from a btree block. + */ +STATIC void +xfs_alloc_log_block( + xfs_trans_t *tp, /* transaction pointer */ + xfs_buf_t *bp, /* buffer containing btree block */ + int fields) /* mask of fields: XFS_BB_... */ +{ + int first; /* first byte offset logged */ + int last; /* last byte offset logged */ + static const short offsets[] = { /* table of offsets */ + offsetof(xfs_alloc_block_t, bb_magic), + offsetof(xfs_alloc_block_t, bb_level), + offsetof(xfs_alloc_block_t, bb_numrecs), + offsetof(xfs_alloc_block_t, bb_leftsib), + offsetof(xfs_alloc_block_t, bb_rightsib), + sizeof(xfs_alloc_block_t) + }; + + xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, &last); + xfs_trans_log_buf(tp, bp, first, last); +} + +/* + * Log keys from a btree block (nonleaf). + */ +STATIC void +xfs_alloc_log_keys( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_buf_t *bp, /* buffer containing btree block */ + int kfirst, /* index of first key to log */ + int klast) /* index of last key to log */ +{ + xfs_alloc_block_t *block; /* btree block to log from */ + int first; /* first byte offset logged */ + xfs_alloc_key_t *kp; /* key pointer in btree block */ + int last; /* last byte offset logged */ + + block = XFS_BUF_TO_ALLOC_BLOCK(bp); + kp = XFS_ALLOC_KEY_ADDR(block, 1, cur); + first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block); + last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block); + xfs_trans_log_buf(cur->bc_tp, bp, first, last); +} + +/* + * Log block pointer fields from a btree block (nonleaf). + */ +STATIC void +xfs_alloc_log_ptrs( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_buf_t *bp, /* buffer containing btree block */ + int pfirst, /* index of first pointer to log */ + int plast) /* index of last pointer to log */ +{ + xfs_alloc_block_t *block; /* btree block to log from */ + int first; /* first byte offset logged */ + int last; /* last byte offset logged */ + xfs_alloc_ptr_t *pp; /* block-pointer pointer in btree blk */ + + block = XFS_BUF_TO_ALLOC_BLOCK(bp); + pp = XFS_ALLOC_PTR_ADDR(block, 1, cur); + first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block); + last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block); + xfs_trans_log_buf(cur->bc_tp, bp, first, last); +} + +/* + * Log records from a btree block (leaf). + */ +STATIC void +xfs_alloc_log_recs( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_buf_t *bp, /* buffer containing btree block */ + int rfirst, /* index of first record to log */ + int rlast) /* index of last record to log */ +{ + xfs_alloc_block_t *block; /* btree block to log from */ + int first; /* first byte offset logged */ + int last; /* last byte offset logged */ + xfs_alloc_rec_t *rp; /* record pointer for btree block */ + + + block = XFS_BUF_TO_ALLOC_BLOCK(bp); + rp = XFS_ALLOC_REC_ADDR(block, 1, cur); +#ifdef DEBUG + { + xfs_agf_t *agf; + xfs_alloc_rec_t *p; + + agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); + for (p = &rp[rfirst - 1]; p <= &rp[rlast - 1]; p++) + ASSERT(INT_GET(p->ar_startblock, ARCH_CONVERT) + INT_GET(p->ar_blockcount, ARCH_CONVERT) <= + INT_GET(agf->agf_length, ARCH_CONVERT)); + } +#endif + first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block); + last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block); + xfs_trans_log_buf(cur->bc_tp, bp, first, last); +} + +/* + * Lookup the record. The cursor is made to point to it, based on dir. + * Return 0 if can't find any such record, 1 for success. + */ +STATIC int /* error */ +xfs_alloc_lookup( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_lookup_t dir, /* <=, ==, or >= */ + int *stat) /* success/failure */ +{ + xfs_agblock_t agbno; /* a.g. relative btree block number */ + xfs_agnumber_t agno; /* allocation group number */ + xfs_alloc_block_t *block; /* current btree block */ + int diff; /* difference for the current key */ + int error; /* error return value */ + int keyno; /* current key number */ + int level; /* level in the btree */ + xfs_mount_t *mp; /* file system mount point */ + + XFS_STATS_INC(xs_abt_lookup); + /* + * Get the allocation group header, and the root block number. + */ + mp = cur->bc_mp; + + { + xfs_agf_t *agf; /* a.g. freespace header */ + + agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); + agno = INT_GET(agf->agf_seqno, ARCH_CONVERT); + agbno = INT_GET(agf->agf_roots[cur->bc_btnum], ARCH_CONVERT); + } + /* + * Iterate over each level in the btree, starting at the root. + * For each level above the leaves, find the key we need, based + * on the lookup record, then follow the corresponding block + * pointer down to the next level. + */ + for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) { + xfs_buf_t *bp; /* buffer pointer for btree block */ + xfs_daddr_t d; /* disk address of btree block */ + + /* + * Get the disk address we're looking for. + */ + d = XFS_AGB_TO_DADDR(mp, agno, agbno); + /* + * If the old buffer at this level is for a different block, + * throw it away, otherwise just use it. + */ + bp = cur->bc_bufs[level]; + if (bp && XFS_BUF_ADDR(bp) != d) + bp = (xfs_buf_t *)0; + if (!bp) { + /* + * Need to get a new buffer. Read it, then + * set it in the cursor, releasing the old one. + */ + if (error = xfs_btree_read_bufs(mp, cur->bc_tp, agno, + agbno, 0, &bp, XFS_ALLOC_BTREE_REF)) + return error; + xfs_btree_setbuf(cur, level, bp); + /* + * Point to the btree block, now that we have the buffer + */ + block = XFS_BUF_TO_ALLOC_BLOCK(bp); + if (error = xfs_btree_check_sblock(cur, block, level, + bp)) + return error; + } else + block = XFS_BUF_TO_ALLOC_BLOCK(bp); + /* + * If we already had a key match at a higher level, we know + * we need to use the first entry in this block. + */ + if (diff == 0) + keyno = 1; + /* + * Otherwise we need to search this block. Do a binary search. + */ + else { + int high; /* high entry number */ + xfs_alloc_key_t *kkbase;/* base of keys in block */ + xfs_alloc_rec_t *krbase;/* base of records in block */ + int low; /* low entry number */ + + /* + * Get a pointer to keys or records. + */ + if (level > 0) + kkbase = XFS_ALLOC_KEY_ADDR(block, 1, cur); + else + krbase = XFS_ALLOC_REC_ADDR(block, 1, cur); + /* + * Set low and high entry numbers, 1-based. + */ + low = 1; + if (!(high = INT_GET(block->bb_numrecs, ARCH_CONVERT))) { + /* + * If the block is empty, the tree must + * be an empty leaf. + */ + ASSERT(level == 0 && cur->bc_nlevels == 1); + cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE; + *stat = 0; + return 0; + } + /* + * Binary search the block. + */ + while (low <= high) { + xfs_extlen_t blockcount; /* key value */ + xfs_agblock_t startblock; /* key value */ + + XFS_STATS_INC(xs_abt_compare); + /* + * keyno is average of low and high. + */ + keyno = (low + high) >> 1; + /* + * Get startblock & blockcount. + */ + if (level > 0) { + xfs_alloc_key_t *kkp; + + kkp = kkbase + keyno - 1; + startblock = INT_GET(kkp->ar_startblock, ARCH_CONVERT); + blockcount = INT_GET(kkp->ar_blockcount, ARCH_CONVERT); + } else { + xfs_alloc_rec_t *krp; + + krp = krbase + keyno - 1; + startblock = INT_GET(krp->ar_startblock, ARCH_CONVERT); + blockcount = INT_GET(krp->ar_blockcount, ARCH_CONVERT); + } + /* + * Compute difference to get next direction. + */ + if (cur->bc_btnum == XFS_BTNUM_BNO) + diff = (int)startblock - + (int)cur->bc_rec.a.ar_startblock; + else if (!(diff = (int)blockcount - + (int)cur->bc_rec.a.ar_blockcount)) + diff = (int)startblock - + (int)cur->bc_rec.a.ar_startblock; + /* + * Less than, move right. + */ + if (diff < 0) + low = keyno + 1; + /* + * Greater than, move left. + */ + else if (diff > 0) + high = keyno - 1; + /* + * Equal, we're done. + */ + else + break; + } + } + /* + * If there are more levels, set up for the next level + * by getting the block number and filling in the cursor. + */ + if (level > 0) { + /* + * If we moved left, need the previous key number, + * unless there isn't one. + */ + if (diff > 0 && --keyno < 1) + keyno = 1; + agbno = INT_GET(*XFS_ALLOC_PTR_ADDR(block, keyno, cur), ARCH_CONVERT); +#ifdef DEBUG + if (error = xfs_btree_check_sptr(cur, agbno, level)) + return error; +#endif + cur->bc_ptrs[level] = keyno; + } + } + /* + * Done with the search. + * See if we need to adjust the results. + */ + if (dir != XFS_LOOKUP_LE && diff < 0) { + keyno++; + /* + * If ge search and we went off the end of the block, but it's + * not the last block, we're in the wrong block. + */ + if (dir == XFS_LOOKUP_GE && + keyno > INT_GET(block->bb_numrecs, ARCH_CONVERT) && + INT_GET(block->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) { + int i; + + cur->bc_ptrs[0] = keyno; + if (error = xfs_alloc_increment(cur, 0, &i)) + return error; + XFS_WANT_CORRUPTED_RETURN(i == 1); + *stat = 1; + return 0; + } + } + else if (dir == XFS_LOOKUP_LE && diff > 0) + keyno--; + cur->bc_ptrs[0] = keyno; + /* + * Return if we succeeded or not. + */ + if (keyno == 0 || keyno > INT_GET(block->bb_numrecs, ARCH_CONVERT)) + *stat = 0; + else + *stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0)); + return 0; +} + +/* + * Move 1 record left from cur/level if possible. + * Update cur to reflect the new path. + */ +STATIC int /* error */ +xfs_alloc_lshift( + xfs_btree_cur_t *cur, /* btree cursor */ + int level, /* level to shift record on */ + int *stat) /* success/failure */ +{ + int error; /* error return value */ +#ifdef DEBUG + int i; /* loop index */ +#endif + xfs_alloc_key_t key; /* key value for leaf level upward */ + xfs_buf_t *lbp; /* buffer for left neighbor block */ + xfs_alloc_block_t *left; /* left neighbor btree block */ + int nrec; /* new number of left block entries */ + xfs_buf_t *rbp; /* buffer for right (current) block */ + xfs_alloc_block_t *right; /* right (current) btree block */ + xfs_alloc_key_t *rkp; /* key pointer for right block */ + xfs_alloc_ptr_t *rpp; /* address pointer for right block */ + xfs_alloc_rec_t *rrp; /* record pointer for right block */ + + /* + * Set up variables for this block as "right". + */ + rbp = cur->bc_bufs[level]; + right = XFS_BUF_TO_ALLOC_BLOCK(rbp); +#ifdef DEBUG + if (error = xfs_btree_check_sblock(cur, right, level, rbp)) + return error; +#endif + /* + * If we've got no left sibling then we can't shift an entry left. + */ + if (INT_GET(right->bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK) { + *stat = 0; + return 0; + } + /* + * If the cursor entry is the one that would be moved, don't + * do it... it's too complicated. + */ + if (cur->bc_ptrs[level] <= 1) { + *stat = 0; + return 0; + } + /* + * Set up the left neighbor as "left". + */ + if (error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, + cur->bc_private.a.agno, INT_GET(right->bb_leftsib, ARCH_CONVERT), 0, &lbp, + XFS_ALLOC_BTREE_REF)) + return error; + left = XFS_BUF_TO_ALLOC_BLOCK(lbp); + if (error = xfs_btree_check_sblock(cur, left, level, lbp)) + return error; + /* + * If it's full, it can't take another entry. + */ + if (INT_GET(left->bb_numrecs, ARCH_CONVERT) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) { + *stat = 0; + return 0; + } + nrec = INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1; + /* + * If non-leaf, copy a key and a ptr to the left block. + */ + if (level > 0) { + xfs_alloc_key_t *lkp; /* key pointer for left block */ + xfs_alloc_ptr_t *lpp; /* address pointer for left block */ + + lkp = XFS_ALLOC_KEY_ADDR(left, nrec, cur); + rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur); + *lkp = *rkp; + xfs_alloc_log_keys(cur, lbp, nrec, nrec); + lpp = XFS_ALLOC_PTR_ADDR(left, nrec, cur); + rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur); +#ifdef DEBUG + if (error = xfs_btree_check_sptr(cur, INT_GET(*rpp, ARCH_CONVERT), level)) + return error; +#endif + *lpp = *rpp; /* INT_: copy */ + xfs_alloc_log_ptrs(cur, lbp, nrec, nrec); + xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp); + } + /* + * If leaf, copy a record to the left block. + */ + else { + xfs_alloc_rec_t *lrp; /* record pointer for left block */ + + lrp = XFS_ALLOC_REC_ADDR(left, nrec, cur); + rrp = XFS_ALLOC_REC_ADDR(right, 1, cur); + *lrp = *rrp; + xfs_alloc_log_recs(cur, lbp, nrec, nrec); + xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp); + } + /* + * Bump and log left's numrecs, decrement and log right's numrecs. + */ + INT_MOD(left->bb_numrecs, ARCH_CONVERT, +1); + xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS); + INT_MOD(right->bb_numrecs, ARCH_CONVERT, -1); + xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS); + /* + * Slide the contents of right down one entry. + */ + if (level > 0) { +#ifdef DEBUG + for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) { + if (error = xfs_btree_check_sptr(cur, INT_GET(rpp[i + 1], ARCH_CONVERT), + level)) + return error; + } +#endif + ovbcopy(rkp + 1, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp)); + ovbcopy(rpp + 1, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp)); + xfs_alloc_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT)); + xfs_alloc_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT)); + } else { + ovbcopy(rrp + 1, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp)); + xfs_alloc_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT)); + key.ar_startblock = rrp->ar_startblock; /* INT_: direct copy */ + key.ar_blockcount = rrp->ar_blockcount; /* INT_: direct copy */ + rkp = &key; + } + /* + * Update the parent key values of right. + */ + if (error = xfs_alloc_updkey(cur, rkp, level + 1)) + return error; + /* + * Slide the cursor value left one. + */ + cur->bc_ptrs[level]--; + *stat = 1; + return 0; +} + +/* + * Allocate a new root block, fill it in. + */ +STATIC int /* error */ +xfs_alloc_newroot( + xfs_btree_cur_t *cur, /* btree cursor */ + int *stat) /* success/failure */ +{ + int error; /* error return value */ + xfs_agblock_t lbno; /* left block number */ + xfs_buf_t *lbp; /* left btree buffer */ + xfs_alloc_block_t *left; /* left btree block */ + xfs_mount_t *mp; /* mount structure */ + xfs_agblock_t nbno; /* new block number */ + xfs_buf_t *nbp; /* new (root) buffer */ + xfs_alloc_block_t *new; /* new (root) btree block */ + int nptr; /* new value for key index, 1 or 2 */ + xfs_agblock_t rbno; /* right block number */ + xfs_buf_t *rbp; /* right btree buffer */ + xfs_alloc_block_t *right; /* right btree block */ + + mp = cur->bc_mp; + + ASSERT(cur->bc_nlevels < XFS_AG_MAXLEVELS(mp)); + /* + * Get a buffer from the freelist blocks, for the new root. + */ + if (error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp, + &nbno)) + return error; + /* + * None available, we fail. + */ + if (nbno == NULLAGBLOCK) { + *stat = 0; + return 0; + } + xfs_trans_agbtree_delta(cur->bc_tp, 1); + nbp = xfs_btree_get_bufs(mp, cur->bc_tp, cur->bc_private.a.agno, nbno, + 0); + new = XFS_BUF_TO_ALLOC_BLOCK(nbp); + /* + * Set the root data in the a.g. freespace structure. + */ + { + xfs_agf_t *agf; /* a.g. freespace header */ + xfs_agnumber_t seqno; + + agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); + INT_SET(agf->agf_roots[cur->bc_btnum], ARCH_CONVERT, nbno); + INT_MOD(agf->agf_levels[cur->bc_btnum], ARCH_CONVERT, 1); + seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT); + mp->m_perag[seqno].pagf_levels[cur->bc_btnum]++; + xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, + XFS_AGF_ROOTS | XFS_AGF_LEVELS); + } + /* + * At the previous root level there are now two blocks: the old + * root, and the new block generated when it was split. + * We don't know which one the cursor is pointing at, so we + * set up variables "left" and "right" for each case. + */ + lbp = cur->bc_bufs[cur->bc_nlevels - 1]; + left = XFS_BUF_TO_ALLOC_BLOCK(lbp); +#ifdef DEBUG + if (error = xfs_btree_check_sblock(cur, left, cur->bc_nlevels - 1, lbp)) + return error; +#endif + if (INT_GET(left->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) { + /* + * Our block is left, pick up the right block. + */ + lbno = XFS_DADDR_TO_AGBNO(mp, XFS_BUF_ADDR(lbp)); + rbno = INT_GET(left->bb_rightsib, ARCH_CONVERT); + if (error = xfs_btree_read_bufs(mp, cur->bc_tp, + cur->bc_private.a.agno, rbno, 0, &rbp, + XFS_ALLOC_BTREE_REF)) + return error; + right = XFS_BUF_TO_ALLOC_BLOCK(rbp); + if (error = xfs_btree_check_sblock(cur, right, + cur->bc_nlevels - 1, rbp)) + return error; + nptr = 1; + } else { + /* + * Our block is right, pick up the left block. + */ + rbp = lbp; + right = left; + rbno = XFS_DADDR_TO_AGBNO(mp, XFS_BUF_ADDR(rbp)); + lbno = INT_GET(right->bb_leftsib, ARCH_CONVERT); + if (error = xfs_btree_read_bufs(mp, cur->bc_tp, + cur->bc_private.a.agno, lbno, 0, &lbp, + XFS_ALLOC_BTREE_REF)) + return error; + left = XFS_BUF_TO_ALLOC_BLOCK(lbp); + if (error = xfs_btree_check_sblock(cur, left, + cur->bc_nlevels - 1, lbp)) + return error; + nptr = 2; + } + /* + * Fill in the new block's btree header and log it. + */ + INT_SET(new->bb_magic, ARCH_CONVERT, xfs_magics[cur->bc_btnum]); + INT_SET(new->bb_level, ARCH_CONVERT, (__uint16_t)cur->bc_nlevels); + INT_SET(new->bb_numrecs, ARCH_CONVERT, 2); + INT_SET(new->bb_leftsib, ARCH_CONVERT, NULLAGBLOCK); + INT_SET(new->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK); + xfs_alloc_log_block(cur->bc_tp, nbp, XFS_BB_ALL_BITS); + ASSERT(lbno != NULLAGBLOCK && rbno != NULLAGBLOCK); + /* + * Fill in the key data in the new root. + */ + { + xfs_alloc_key_t *kp; /* btree key pointer */ + + kp = XFS_ALLOC_KEY_ADDR(new, 1, cur); + if (INT_GET(left->bb_level, ARCH_CONVERT) > 0) { + kp[0] = *XFS_ALLOC_KEY_ADDR(left, 1, cur); /* INT_: structure copy */ + kp[1] = *XFS_ALLOC_KEY_ADDR(right, 1, cur);/* INT_: structure copy */ + } else { + xfs_alloc_rec_t *rp; /* btree record pointer */ + + rp = XFS_ALLOC_REC_ADDR(left, 1, cur); + kp[0].ar_startblock = rp->ar_startblock; /* INT_: direct copy */ + kp[0].ar_blockcount = rp->ar_blockcount; /* INT_: direct copy */ + rp = XFS_ALLOC_REC_ADDR(right, 1, cur); + kp[1].ar_startblock = rp->ar_startblock; /* INT_: direct copy */ + kp[1].ar_blockcount = rp->ar_blockcount; /* INT_: direct copy */ + } + } + xfs_alloc_log_keys(cur, nbp, 1, 2); + /* + * Fill in the pointer data in the new root. + */ + { + xfs_alloc_ptr_t *pp; /* btree address pointer */ + + pp = XFS_ALLOC_PTR_ADDR(new, 1, cur); + INT_SET(pp[0], ARCH_CONVERT, lbno); + INT_SET(pp[1], ARCH_CONVERT, rbno); + } + xfs_alloc_log_ptrs(cur, nbp, 1, 2); + /* + * Fix up the cursor. + */ + xfs_btree_setbuf(cur, cur->bc_nlevels, nbp); + cur->bc_ptrs[cur->bc_nlevels] = nptr; + cur->bc_nlevels++; + *stat = 1; + return 0; +} + +/* + * Move 1 record right from cur/level if possible. + * Update cur to reflect the new path. + */ +STATIC int /* error */ +xfs_alloc_rshift( + xfs_btree_cur_t *cur, /* btree cursor */ + int level, /* level to shift record on */ + int *stat) /* success/failure */ +{ + int error; /* error return value */ + int i; /* loop index */ + xfs_alloc_key_t key; /* key value for leaf level upward */ + xfs_buf_t *lbp; /* buffer for left (current) block */ + xfs_alloc_block_t *left; /* left (current) btree block */ + xfs_buf_t *rbp; /* buffer for right neighbor block */ + xfs_alloc_block_t *right; /* right neighbor btree block */ + xfs_alloc_key_t *rkp; /* key pointer for right block */ + xfs_btree_cur_t *tcur; /* temporary cursor */ + + /* + * Set up variables for this block as "left". + */ + lbp = cur->bc_bufs[level]; + left = XFS_BUF_TO_ALLOC_BLOCK(lbp); +#ifdef DEBUG + if (error = xfs_btree_check_sblock(cur, left, level, lbp)) + return error; +#endif + /* + * If we've got no right sibling then we can't shift an entry right. + */ + if (INT_GET(left->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK) { + *stat = 0; + return 0; + } + /* + * If the cursor entry is the one that would be moved, don't + * do it... it's too complicated. + */ + if (cur->bc_ptrs[level] >= INT_GET(left->bb_numrecs, ARCH_CONVERT)) { + *stat = 0; + return 0; + } + /* + * Set up the right neighbor as "right". + */ + if (error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, + cur->bc_private.a.agno, INT_GET(left->bb_rightsib, ARCH_CONVERT), 0, &rbp, + XFS_ALLOC_BTREE_REF)) + return error; + right = XFS_BUF_TO_ALLOC_BLOCK(rbp); + if (error = xfs_btree_check_sblock(cur, right, level, rbp)) + return error; + /* + * If it's full, it can't take another entry. + */ + if (INT_GET(right->bb_numrecs, ARCH_CONVERT) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) { + *stat = 0; + return 0; + } + /* + * Make a hole at the start of the right neighbor block, then + * copy the last left block entry to the hole. + */ + if (level > 0) { + xfs_alloc_key_t *lkp; /* key pointer for left block */ + xfs_alloc_ptr_t *lpp; /* address pointer for left block */ + xfs_alloc_ptr_t *rpp; /* address pointer for right block */ + + lkp = XFS_ALLOC_KEY_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur); + lpp = XFS_ALLOC_PTR_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur); + rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur); + rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur); +#ifdef DEBUG + for (i = INT_GET(right->bb_numrecs, ARCH_CONVERT) - 1; i >= 0; i--) { + if (error = xfs_btree_check_sptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level)) + return error; + } +#endif + ovbcopy(rkp, rkp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp)); + ovbcopy(rpp, rpp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp)); +#ifdef DEBUG + if (error = xfs_btree_check_sptr(cur, INT_GET(*lpp, ARCH_CONVERT), level)) + return error; +#endif + *rkp = *lkp; /* INT_: copy */ + *rpp = *lpp; /* INT_: copy */ + xfs_alloc_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1); + xfs_alloc_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1); + xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1); + } else { + xfs_alloc_rec_t *lrp; /* record pointer for left block */ + xfs_alloc_rec_t *rrp; /* record pointer for right block */ + + lrp = XFS_ALLOC_REC_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur); + rrp = XFS_ALLOC_REC_ADDR(right, 1, cur); + ovbcopy(rrp, rrp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp)); + *rrp = *lrp; + xfs_alloc_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1); + key.ar_startblock = rrp->ar_startblock; /* INT_: direct copy */ + key.ar_blockcount = rrp->ar_blockcount; /* INT_: direct copy */ + rkp = &key; + xfs_btree_check_rec(cur->bc_btnum, rrp, rrp + 1); + } + /* + * Decrement and log left's numrecs, bump and log right's numrecs. + */ + INT_MOD(left->bb_numrecs, ARCH_CONVERT, -1); + xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS); + INT_MOD(right->bb_numrecs, ARCH_CONVERT, +1); + xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS); + /* + * Using a temporary cursor, update the parent key values of the + * block on the right. + */ + if (error = xfs_btree_dup_cursor(cur, &tcur)) + return error; + i = xfs_btree_lastrec(tcur, level); + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if ((error = xfs_alloc_increment(tcur, level, &i)) || + (error = xfs_alloc_updkey(tcur, rkp, level + 1))) + goto error0; + xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); + *stat = 1; + return 0; +error0: + xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); + return error; +} + +/* + * Split cur/level block in half. + * Return new block number and its first record (to be inserted into parent). + */ +STATIC int /* error */ +xfs_alloc_split( + xfs_btree_cur_t *cur, /* btree cursor */ + int level, /* level to split */ + xfs_agblock_t *bnop, /* output: block number allocated */ + xfs_alloc_key_t *keyp, /* output: first key of new block */ + xfs_btree_cur_t **curp, /* output: new cursor */ + int *stat) /* success/failure */ +{ + int error; /* error return value */ + int i; /* loop index/record number */ + xfs_agblock_t lbno; /* left (current) block number */ + xfs_buf_t *lbp; /* buffer for left block */ + xfs_alloc_block_t *left; /* left (current) btree block */ + xfs_agblock_t rbno; /* right (new) block number */ + xfs_buf_t *rbp; /* buffer for right block */ + xfs_alloc_block_t *right; /* right (new) btree block */ + + /* + * Allocate the new block from the freelist. + * If we can't do it, we're toast. Give up. + */ + if (error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp, + &rbno)) + return error; + if (rbno == NULLAGBLOCK) { + *stat = 0; + return 0; + } + xfs_trans_agbtree_delta(cur->bc_tp, 1); + rbp = xfs_btree_get_bufs(cur->bc_mp, cur->bc_tp, cur->bc_private.a.agno, + rbno, 0); + /* + * Set up the new block as "right". + */ + right = XFS_BUF_TO_ALLOC_BLOCK(rbp); + /* + * "Left" is the current (according to the cursor) block. + */ + lbp = cur->bc_bufs[level]; + left = XFS_BUF_TO_ALLOC_BLOCK(lbp); +#ifdef DEBUG + if (error = xfs_btree_check_sblock(cur, left, level, lbp)) + return error; +#endif + /* + * Fill in the btree header for the new block. + */ + INT_SET(right->bb_magic, ARCH_CONVERT, xfs_magics[cur->bc_btnum]); + right->bb_level = left->bb_level; /* INT_: direct copy */ + INT_SET(right->bb_numrecs, ARCH_CONVERT, (__uint16_t)(INT_GET(left->bb_numrecs, ARCH_CONVERT) / 2)); + /* + * Make sure that if there's an odd number of entries now, that + * each new block will have the same number of entries. + */ + if ((INT_GET(left->bb_numrecs, ARCH_CONVERT) & 1) && + cur->bc_ptrs[level] <= INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1) + INT_MOD(right->bb_numrecs, ARCH_CONVERT, +1); + i = INT_GET(left->bb_numrecs, ARCH_CONVERT) - INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1; + /* + * For non-leaf blocks, copy keys and addresses over to the new block. + */ + if (level > 0) { + xfs_alloc_key_t *lkp; /* left btree key pointer */ + xfs_alloc_ptr_t *lpp; /* left btree address pointer */ + xfs_alloc_key_t *rkp; /* right btree key pointer */ + xfs_alloc_ptr_t *rpp; /* right btree address pointer */ + + lkp = XFS_ALLOC_KEY_ADDR(left, i, cur); + lpp = XFS_ALLOC_PTR_ADDR(left, i, cur); + rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur); + rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur); +#ifdef DEBUG + for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) { + if (error = xfs_btree_check_sptr(cur, INT_GET(lpp[i], ARCH_CONVERT), level)) + return error; + } +#endif + bcopy(lkp, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp)); /* INT_: copy */ + bcopy(lpp, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));/* INT_: copy */ + xfs_alloc_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT)); + xfs_alloc_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT)); + *keyp = *rkp; + } + /* + * For leaf blocks, copy records over to the new block. + */ + else { + xfs_alloc_rec_t *lrp; /* left btree record pointer */ + xfs_alloc_rec_t *rrp; /* right btree record pointer */ + + lrp = XFS_ALLOC_REC_ADDR(left, i, cur); + rrp = XFS_ALLOC_REC_ADDR(right, 1, cur); + bcopy(lrp, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp)); + xfs_alloc_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT)); + keyp->ar_startblock = rrp->ar_startblock; /* INT_: direct copy */ + keyp->ar_blockcount = rrp->ar_blockcount; /* INT_: direct copy */ + } + /* + * Find the left block number by looking in the buffer. + * Adjust numrecs, sibling pointers. + */ + lbno = XFS_DADDR_TO_AGBNO(cur->bc_mp, XFS_BUF_ADDR(lbp)); + INT_MOD(left->bb_numrecs, ARCH_CONVERT, -(INT_GET(right->bb_numrecs, ARCH_CONVERT))); + right->bb_rightsib = left->bb_rightsib; /* INT_: direct copy */ + INT_SET(left->bb_rightsib, ARCH_CONVERT, rbno); + INT_SET(right->bb_leftsib, ARCH_CONVERT, lbno); + xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_ALL_BITS); + xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); + /* + * If there's a block to the new block's right, make that block + * point back to right instead of to left. + */ + if (INT_GET(right->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) { + xfs_alloc_block_t *rrblock; /* rr btree block */ + xfs_buf_t *rrbp; /* buffer for rrblock */ + + if (error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, + cur->bc_private.a.agno, INT_GET(right->bb_rightsib, ARCH_CONVERT), 0, + &rrbp, XFS_ALLOC_BTREE_REF)) + return error; + rrblock = XFS_BUF_TO_ALLOC_BLOCK(rrbp); + if (error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)) + return error; + INT_SET(rrblock->bb_leftsib, ARCH_CONVERT, rbno); + xfs_alloc_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB); + } + /* + * If the cursor is really in the right block, move it there. + * If it's just pointing past the last entry in left, then we'll + * insert there, so don't change anything in that case. + */ + if (cur->bc_ptrs[level] > INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1) { + xfs_btree_setbuf(cur, level, rbp); + cur->bc_ptrs[level] -= INT_GET(left->bb_numrecs, ARCH_CONVERT); + } + /* + * If there are more levels, we'll need another cursor which refers to + * the right block, no matter where this cursor was. + */ + if (level + 1 < cur->bc_nlevels) { + if (error = xfs_btree_dup_cursor(cur, curp)) + return error; + (*curp)->bc_ptrs[level + 1]++; + } + *bnop = rbno; + *stat = 1; + return 0; +} + +/* + * Update keys at all levels from here to the root along the cursor's path. + */ +STATIC int /* error */ +xfs_alloc_updkey( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_alloc_key_t *keyp, /* new key value to update to */ + int level) /* starting level for update */ +{ + int ptr; /* index of key in block */ + + /* + * Go up the tree from this level toward the root. + * At each level, update the key value to the value input. + * Stop when we reach a level where the cursor isn't pointing + * at the first entry in the block. + */ + for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) { + xfs_alloc_block_t *block; /* btree block */ + xfs_buf_t *bp; /* buffer for block */ +#ifdef DEBUG + int error; /* error return value */ +#endif + xfs_alloc_key_t *kp; /* ptr to btree block keys */ + + bp = cur->bc_bufs[level]; + block = XFS_BUF_TO_ALLOC_BLOCK(bp); +#ifdef DEBUG + if (error = xfs_btree_check_sblock(cur, block, level, bp)) + return error; +#endif + ptr = cur->bc_ptrs[level]; + kp = XFS_ALLOC_KEY_ADDR(block, ptr, cur); + *kp = *keyp; + xfs_alloc_log_keys(cur, bp, ptr, ptr); + } + return 0; +} + +/* + * Externally visible routines. + */ + +/* + * Decrement cursor by one record at the level. + * For nonzero levels the leaf-ward information is untouched. + */ +int /* error */ +xfs_alloc_decrement( + xfs_btree_cur_t *cur, /* btree cursor */ + int level, /* level in btree, 0 is leaf */ + int *stat) /* success/failure */ +{ + xfs_alloc_block_t *block; /* btree block */ + int error; /* error return value */ + int lev; /* btree level */ + + ASSERT(level < cur->bc_nlevels); + /* + * Read-ahead to the left at this level. + */ + xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA); + /* + * Decrement the ptr at this level. If we're still in the block + * then we're done. + */ + if (--cur->bc_ptrs[level] > 0) { + *stat = 1; + return 0; + } + /* + * Get a pointer to the btree block. + */ + block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[level]); +#ifdef DEBUG + if (error = xfs_btree_check_sblock(cur, block, level, + cur->bc_bufs[level])) + return error; +#endif + /* + * If we just went off the left edge of the tree, return failure. + */ + if (INT_GET(block->bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK) { + *stat = 0; + return 0; + } + /* + * March up the tree decrementing pointers. + * Stop when we don't go off the left edge of a block. + */ + for (lev = level + 1; lev < cur->bc_nlevels; lev++) { + if (--cur->bc_ptrs[lev] > 0) + break; + /* + * Read-ahead the left block, we're going to read it + * in the next loop. + */ + xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA); + } + /* + * If we went off the root then we are seriously confused. + */ + ASSERT(lev < cur->bc_nlevels); + /* + * Now walk back down the tree, fixing up the cursor's buffer + * pointers and key numbers. + */ + for (block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[lev]); lev > level; ) { + xfs_agblock_t agbno; /* block number of btree block */ + xfs_buf_t *bp; /* buffer pointer for block */ + + agbno = INT_GET(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT); + if (error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, + cur->bc_private.a.agno, agbno, 0, &bp, + XFS_ALLOC_BTREE_REF)) + return error; + lev--; + xfs_btree_setbuf(cur, lev, bp); + block = XFS_BUF_TO_ALLOC_BLOCK(bp); + if (error = xfs_btree_check_sblock(cur, block, lev, bp)) + return error; + cur->bc_ptrs[lev] = INT_GET(block->bb_numrecs, ARCH_CONVERT); + } + *stat = 1; + return 0; +} + +/* + * Delete the record pointed to by cur. + * The cursor refers to the place where the record was (could be inserted) + * when the operation returns. + */ +int /* error */ +xfs_alloc_delete( + xfs_btree_cur_t *cur, /* btree cursor */ + int *stat) /* success/failure */ +{ + int error; /* error return value */ + int i; /* result code */ + int level; /* btree level */ + + /* + * Go up the tree, starting at leaf level. + * If 2 is returned then a join was done; go to the next level. + * Otherwise we are done. + */ + for (level = 0, i = 2; i == 2; level++) { + if (error = xfs_alloc_delrec(cur, level, &i)) + return error; + } + if (i == 0) { + for (level = 1; level < cur->bc_nlevels; level++) { + if (cur->bc_ptrs[level] == 0) { + if (error = xfs_alloc_decrement(cur, level, &i)) + return error; + break; + } + } + } + *stat = i; + return 0; +} + +/* + * Get the data from the pointed-to record. + */ +int /* error */ +xfs_alloc_get_rec( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_agblock_t *bno, /* output: starting block of extent */ + xfs_extlen_t *len, /* output: length of extent */ + int *stat) /* output: success/failure */ +{ + xfs_alloc_block_t *block; /* btree block */ +#ifdef DEBUG + int error; /* error return value */ +#endif + int ptr; /* record number */ + + ptr = cur->bc_ptrs[0]; + block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[0]); +#ifdef DEBUG + if (error = xfs_btree_check_sblock(cur, block, 0, cur->bc_bufs[0])) + return error; +#endif + /* + * Off the right end or left end, return failure. + */ + if (ptr > INT_GET(block->bb_numrecs, ARCH_CONVERT) || ptr <= 0) { + *stat = 0; + return 0; + } + /* + * Point to the record and extract its data. + */ + { + xfs_alloc_rec_t *rec; /* record data */ + + rec = XFS_ALLOC_REC_ADDR(block, ptr, cur); + *bno = INT_GET(rec->ar_startblock, ARCH_CONVERT); + *len = INT_GET(rec->ar_blockcount, ARCH_CONVERT); + } + *stat = 1; + return 0; +} + +/* + * Increment cursor by one record at the level. + * For nonzero levels the leaf-ward information is untouched. + */ +int /* error */ +xfs_alloc_increment( + xfs_btree_cur_t *cur, /* btree cursor */ + int level, /* level in btree, 0 is leaf */ + int *stat) /* success/failure */ +{ + xfs_alloc_block_t *block; /* btree block */ + xfs_buf_t *bp; /* tree block buffer */ + int error; /* error return value */ + int lev; /* btree level */ + + ASSERT(level < cur->bc_nlevels); + /* + * Read-ahead to the right at this level. + */ + xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); + /* + * Get a pointer to the btree block. + */ + bp = cur->bc_bufs[level]; + block = XFS_BUF_TO_ALLOC_BLOCK(bp); +#ifdef DEBUG + if (error = xfs_btree_check_sblock(cur, block, level, bp)) + return error; +#endif + /* + * Increment the ptr at this level. If we're still in the block + * then we're done. + */ + if (++cur->bc_ptrs[level] <= INT_GET(block->bb_numrecs, ARCH_CONVERT)) { + *stat = 1; + return 0; + } + /* + * If we just went off the right edge of the tree, return failure. + */ + if (INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK) { + *stat = 0; + return 0; + } + /* + * March up the tree incrementing pointers. + * Stop when we don't go off the right edge of a block. + */ + for (lev = level + 1; lev < cur->bc_nlevels; lev++) { + bp = cur->bc_bufs[lev]; + block = XFS_BUF_TO_ALLOC_BLOCK(bp); +#ifdef DEBUG + if (error = xfs_btree_check_sblock(cur, block, lev, bp)) + return error; +#endif + if (++cur->bc_ptrs[lev] <= INT_GET(block->bb_numrecs, ARCH_CONVERT)) + break; + /* + * Read-ahead the right block, we're going to read it + * in the next loop. + */ + xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA); + } + /* + * If we went off the root then we are seriously confused. + */ + ASSERT(lev < cur->bc_nlevels); + /* + * Now walk back down the tree, fixing up the cursor's buffer + * pointers and key numbers. + */ + for (bp = cur->bc_bufs[lev], block = XFS_BUF_TO_ALLOC_BLOCK(bp); + lev > level; ) { + xfs_agblock_t agbno; /* block number of btree block */ + + agbno = INT_GET(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT); + if (error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, + cur->bc_private.a.agno, agbno, 0, &bp, + XFS_ALLOC_BTREE_REF)) + return error; + lev--; + xfs_btree_setbuf(cur, lev, bp); + block = XFS_BUF_TO_ALLOC_BLOCK(bp); + if (error = xfs_btree_check_sblock(cur, block, lev, bp)) + return error; + cur->bc_ptrs[lev] = 1; + } + *stat = 1; + return 0; +} + +/* + * Insert the current record at the point referenced by cur. + * The cursor may be inconsistent on return if splits have been done. + */ +int /* error */ +xfs_alloc_insert( + xfs_btree_cur_t *cur, /* btree cursor */ + int *stat) /* success/failure */ +{ + int error; /* error return value */ + int i; /* result value, 0 for failure */ + int level; /* current level number in btree */ + xfs_agblock_t nbno; /* new block number (split result) */ + xfs_btree_cur_t *ncur; /* new cursor (split result) */ + xfs_alloc_rec_t nrec; /* record being inserted this level */ + xfs_btree_cur_t *pcur; /* previous level's cursor */ + + level = 0; + nbno = NULLAGBLOCK; + INT_SET(nrec.ar_startblock, ARCH_CONVERT, cur->bc_rec.a.ar_startblock); + INT_SET(nrec.ar_blockcount, ARCH_CONVERT, cur->bc_rec.a.ar_blockcount); + ncur = (xfs_btree_cur_t *)0; + pcur = cur; + /* + * Loop going up the tree, starting at the leaf level. + * Stop when we don't get a split block, that must mean that + * the insert is finished with this level. + */ + do { + /* + * Insert nrec/nbno into this level of the tree. + * Note if we fail, nbno will be null. + */ + if (error = xfs_alloc_insrec(pcur, level++, &nbno, &nrec, &ncur, + &i)) { + if (pcur != cur) + xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); + return error; + } + /* + * See if the cursor we just used is trash. + * Can't trash the caller's cursor, but otherwise we should + * if ncur is a new cursor or we're about to be done. + */ + if (pcur != cur && (ncur || nbno == NULLAGBLOCK)) { + cur->bc_nlevels = pcur->bc_nlevels; + xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR); + } + /* + * If we got a new cursor, switch to it. + */ + if (ncur) { + pcur = ncur; + ncur = (xfs_btree_cur_t *)0; + } + } while (nbno != NULLAGBLOCK); + *stat = i; + return 0; +} + +/* + * Lookup the record equal to [bno, len] in the btree given by cur. + */ +int /* error */ +xfs_alloc_lookup_eq( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len, /* length of extent */ + int *stat) /* success/failure */ +{ + cur->bc_rec.a.ar_startblock = bno; + cur->bc_rec.a.ar_blockcount = len; + return xfs_alloc_lookup(cur, XFS_LOOKUP_EQ, stat); +} + +/* + * Lookup the first record greater than or equal to [bno, len] + * in the btree given by cur. + */ +int /* error */ +xfs_alloc_lookup_ge( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len, /* length of extent */ + int *stat) /* success/failure */ +{ + cur->bc_rec.a.ar_startblock = bno; + cur->bc_rec.a.ar_blockcount = len; + return xfs_alloc_lookup(cur, XFS_LOOKUP_GE, stat); +} + +/* + * Lookup the first record less than or equal to [bno, len] + * in the btree given by cur. + */ +int /* error */ +xfs_alloc_lookup_le( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len, /* length of extent */ + int *stat) /* success/failure */ +{ + cur->bc_rec.a.ar_startblock = bno; + cur->bc_rec.a.ar_blockcount = len; + return xfs_alloc_lookup(cur, XFS_LOOKUP_LE, stat); +} + +/* + * Update the record referred to by cur, to the value given by [bno, len]. + * This either works (return 0) or gets an EFSCORRUPTED error. + */ +int /* error */ +xfs_alloc_update( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len) /* length of extent */ +{ + xfs_alloc_block_t *block; /* btree block to update */ + int error; /* error return value */ + int ptr; /* current record number (updating) */ + + ASSERT(len > 0); + /* + * Pick up the a.g. freelist struct and the current block. + */ + block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[0]); +#ifdef DEBUG + if (error = xfs_btree_check_sblock(cur, block, 0, cur->bc_bufs[0])) + return error; +#endif + /* + * Get the address of the rec to be updated. + */ + ptr = cur->bc_ptrs[0]; + { + xfs_alloc_rec_t *rp; /* pointer to updated record */ + + rp = XFS_ALLOC_REC_ADDR(block, ptr, cur); + /* + * Fill in the new contents and log them. + */ + INT_SET(rp->ar_startblock, ARCH_CONVERT, bno); + INT_SET(rp->ar_blockcount, ARCH_CONVERT, len); + xfs_alloc_log_recs(cur, cur->bc_bufs[0], ptr, ptr); + } + /* + * If it's the by-size btree and it's the last leaf block and + * it's the last record... then update the size of the longest + * extent in the a.g., which we cache in the a.g. freelist header. + */ + if (cur->bc_btnum == XFS_BTNUM_CNT && + INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK && + ptr == INT_GET(block->bb_numrecs, ARCH_CONVERT)) { + xfs_agf_t *agf; /* a.g. freespace header */ + xfs_agnumber_t seqno; + + agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); + seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT); + cur->bc_mp->m_perag[seqno].pagf_longest = len; + INT_SET(agf->agf_longest, ARCH_CONVERT, len); + xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, + XFS_AGF_LONGEST); + } + /* + * Updating first record in leaf. Pass new key value up to our parent. + */ + if (ptr == 1) { + xfs_alloc_key_t key; /* key containing [bno, len] */ + + INT_SET(key.ar_startblock, ARCH_CONVERT, bno); + INT_SET(key.ar_blockcount, ARCH_CONVERT, len); + if (error = xfs_alloc_updkey(cur, &key, 1)) + return error; + } + return 0; +} diff --git a/libxfs/xfs_attr_leaf.c b/libxfs/xfs_attr_leaf.c new file mode 100644 index 000000000..f3b02e0b8 --- /dev/null +++ b/libxfs/xfs_attr_leaf.c @@ -0,0 +1,1169 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include + +/* + * xfs_attr_leaf.c + * + * Routines to implement leaf blocks of attributes as Btrees of hashed names. + */ + +/*======================================================================== + * Routines used for growing the Btree. + *========================================================================*/ + +/* + * Create the initial contents of a leaf attribute list + * or a leaf in a node attribute list. + */ +int +xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp) +{ + xfs_attr_leafblock_t *leaf; + xfs_attr_leaf_hdr_t *hdr; + xfs_inode_t *dp; + xfs_dabuf_t *bp; + int error; + + dp = args->dp; + ASSERT(dp != NULL); + error = xfs_da_get_buf(args->trans, args->dp, blkno, -1, &bp, + XFS_ATTR_FORK); + if (error) + return(error); + ASSERT(bp != NULL); + leaf = bp->data; + bzero((char *)leaf, XFS_LBSIZE(dp->i_mount)); + hdr = &leaf->hdr; + INT_SET(hdr->info.magic, ARCH_CONVERT, XFS_ATTR_LEAF_MAGIC); + INT_SET(hdr->firstused, ARCH_CONVERT, XFS_LBSIZE(dp->i_mount)); + if (INT_GET(hdr->firstused, ARCH_CONVERT) == 0) { + INT_SET(hdr->firstused, ARCH_CONVERT, + XFS_LBSIZE(dp->i_mount) - XFS_ATTR_LEAF_NAME_ALIGN); + } + + INT_SET(hdr->freemap[0].base, ARCH_CONVERT, + sizeof(xfs_attr_leaf_hdr_t)); + INT_SET(hdr->freemap[0].size, ARCH_CONVERT, + INT_GET(hdr->firstused, ARCH_CONVERT) + - INT_GET(hdr->freemap[0].base, + ARCH_CONVERT)); + + xfs_da_log_buf(args->trans, bp, 0, XFS_LBSIZE(dp->i_mount) - 1); + + *bpp = bp; + return(0); +} + +/* + * Split the leaf node, rebalance, then add the new entry. + */ +int +xfs_attr_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, + xfs_da_state_blk_t *newblk) +{ + xfs_dablk_t blkno; + int error; + + /* + * Allocate space for a new leaf node. + */ + ASSERT(oldblk->magic == XFS_ATTR_LEAF_MAGIC); + error = xfs_da_grow_inode(state->args, &blkno); + if (error) + return(error); + error = xfs_attr_leaf_create(state->args, blkno, &newblk->bp); + if (error) + return(error); + newblk->blkno = blkno; + newblk->magic = XFS_ATTR_LEAF_MAGIC; + + /* + * Rebalance the entries across the two leaves. + * NOTE: rebalance() currently depends on the 2nd block being empty. + */ + xfs_attr_leaf_rebalance(state, oldblk, newblk); + error = xfs_da_blk_link(state, oldblk, newblk); + if (error) + return(error); + + /* + * Save info on "old" attribute for "atomic rename" ops, leaf_add() + * modifies the index/blkno/rmtblk/rmtblkcnt fields to show the + * "new" attrs info. Will need the "old" info to remove it later. + * + * Insert the "new" entry in the correct block. + */ + if (state->inleaf) + error = xfs_attr_leaf_add(oldblk->bp, state->args); + else + error = xfs_attr_leaf_add(newblk->bp, state->args); + + /* + * Update last hashval in each block since we added the name. + */ + oldblk->hashval = xfs_attr_leaf_lasthash(oldblk->bp, NULL); + newblk->hashval = xfs_attr_leaf_lasthash(newblk->bp, NULL); + return(error); +} + +/* + * Add a name to the leaf attribute list structure. + */ +int +xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args) +{ + xfs_attr_leafblock_t *leaf; + xfs_attr_leaf_hdr_t *hdr; + xfs_attr_leaf_map_t *map; + int tablesize, entsize, sum, tmp, i; + + leaf = bp->data; + ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) + == XFS_ATTR_LEAF_MAGIC); + ASSERT((args->index >= 0) + && (args->index <= INT_GET(leaf->hdr.count, ARCH_CONVERT))); + hdr = &leaf->hdr; + entsize = xfs_attr_leaf_newentsize(args, + args->trans->t_mountp->m_sb.sb_blocksize, NULL); + + /* + * Search through freemap for first-fit on new name length. + * (may need to figure in size of entry struct too) + */ + tablesize = (INT_GET(hdr->count, ARCH_CONVERT) + 1) + * sizeof(xfs_attr_leaf_entry_t) + + sizeof(xfs_attr_leaf_hdr_t); + map = &hdr->freemap[XFS_ATTR_LEAF_MAPSIZE-1]; + for (sum = 0, i = XFS_ATTR_LEAF_MAPSIZE-1; i >= 0; map--, i--) { + if (tablesize > INT_GET(hdr->firstused, ARCH_CONVERT)) { + sum += INT_GET(map->size, ARCH_CONVERT); + continue; + } + if (INT_GET(map->size, ARCH_CONVERT) == 0) + continue; /* no space in this map */ + tmp = entsize; + if (INT_GET(map->base, ARCH_CONVERT) + < INT_GET(hdr->firstused, ARCH_CONVERT)) + tmp += sizeof(xfs_attr_leaf_entry_t); + if (INT_GET(map->size, ARCH_CONVERT) >= tmp) { + tmp = xfs_attr_leaf_add_work(bp, args, i); + return(tmp); + } + sum += INT_GET(map->size, ARCH_CONVERT); + } + + /* + * If there are no holes in the address space of the block, + * and we don't have enough freespace, then compaction will do us + * no good and we should just give up. + */ + if (!hdr->holes && (sum < entsize)) + return(XFS_ERROR(ENOSPC)); + + /* + * Compact the entries to coalesce free space. + * This may change the hdr->count via dropping INCOMPLETE entries. + */ + xfs_attr_leaf_compact(args->trans, bp); + + /* + * After compaction, the block is guaranteed to have only one + * free region, in freemap[0]. If it is not big enough, give up. + */ + if (INT_GET(hdr->freemap[0].size, ARCH_CONVERT) + < (entsize + sizeof(xfs_attr_leaf_entry_t))) + return(XFS_ERROR(ENOSPC)); + + return(xfs_attr_leaf_add_work(bp, args, 0)); +} + +/* + * Add a name to a leaf attribute list structure. + */ +STATIC int +xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex) +{ + xfs_attr_leafblock_t *leaf; + xfs_attr_leaf_hdr_t *hdr; + xfs_attr_leaf_entry_t *entry; + xfs_attr_leaf_name_local_t *name_loc; + xfs_attr_leaf_name_remote_t *name_rmt; + xfs_attr_leaf_map_t *map; + xfs_mount_t *mp; + int tmp, i; + + leaf = bp->data; + ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) + == XFS_ATTR_LEAF_MAGIC); + hdr = &leaf->hdr; + ASSERT((mapindex >= 0) && (mapindex < XFS_ATTR_LEAF_MAPSIZE)); + ASSERT((args->index >= 0) + && (args->index <= INT_GET(hdr->count, ARCH_CONVERT))); + + /* + * Force open some space in the entry array and fill it in. + */ + entry = &leaf->entries[args->index]; + if (args->index < INT_GET(hdr->count, ARCH_CONVERT)) { + tmp = INT_GET(hdr->count, ARCH_CONVERT) - args->index; + tmp *= sizeof(xfs_attr_leaf_entry_t); + ovbcopy((char *)entry, (char *)(entry+1), tmp); + xfs_da_log_buf(args->trans, bp, + XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry))); + } + INT_MOD(hdr->count, ARCH_CONVERT, 1); + + /* + * Allocate space for the new string (at the end of the run). + */ + map = &hdr->freemap[mapindex]; + mp = args->trans->t_mountp; + ASSERT(INT_GET(map->base, ARCH_CONVERT) < XFS_LBSIZE(mp)); + ASSERT((INT_GET(map->base, ARCH_CONVERT) & 0x3) == 0); + ASSERT(INT_GET(map->size, ARCH_CONVERT) + >= xfs_attr_leaf_newentsize(args, + mp->m_sb.sb_blocksize, NULL)); + ASSERT(INT_GET(map->size, ARCH_CONVERT) < XFS_LBSIZE(mp)); + ASSERT((INT_GET(map->size, ARCH_CONVERT) & 0x3) == 0); + INT_MOD(map->size, ARCH_CONVERT, + -xfs_attr_leaf_newentsize(args, mp->m_sb.sb_blocksize, &tmp)); + INT_SET(entry->nameidx, ARCH_CONVERT, + INT_GET(map->base, ARCH_CONVERT) + + INT_GET(map->size, ARCH_CONVERT)); + INT_SET(entry->hashval, ARCH_CONVERT, args->hashval); + entry->flags = tmp ? XFS_ATTR_LOCAL : 0; + entry->flags |= (args->flags & ATTR_ROOT) ? XFS_ATTR_ROOT : 0; + if (args->rename) { + entry->flags |= XFS_ATTR_INCOMPLETE; + if ((args->blkno2 == args->blkno) && + (args->index2 <= args->index)) { + args->index2++; + } + } + xfs_da_log_buf(args->trans, bp, + XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry))); + ASSERT((args->index == 0) || (INT_GET(entry->hashval, ARCH_CONVERT) + >= INT_GET((entry-1)->hashval, + ARCH_CONVERT))); + ASSERT((args->index == INT_GET(hdr->count, ARCH_CONVERT)-1) || + (INT_GET(entry->hashval, ARCH_CONVERT) + <= (INT_GET((entry+1)->hashval, ARCH_CONVERT)))); + + /* + * Copy the attribute name and value into the new space. + * + * For "remote" attribute values, simply note that we need to + * allocate space for the "remote" value. We can't actually + * allocate the extents in this transaction, and we can't decide + * which blocks they should be as we might allocate more blocks + * as part of this transaction (a split operation for example). + */ + if (entry->flags & XFS_ATTR_LOCAL) { + name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, args->index); + name_loc->namelen = args->namelen; + INT_SET(name_loc->valuelen, ARCH_CONVERT, args->valuelen); + bcopy(args->name, (char *)name_loc->nameval, args->namelen); + bcopy(args->value, (char *)&name_loc->nameval[args->namelen], + INT_GET(name_loc->valuelen, ARCH_CONVERT)); + } else { + name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, args->index); + name_rmt->namelen = args->namelen; + bcopy(args->name, (char *)name_rmt->name, args->namelen); + entry->flags |= XFS_ATTR_INCOMPLETE; + /* just in case */ + INT_SET(name_rmt->valuelen, ARCH_CONVERT, 0); + INT_SET(name_rmt->valueblk, ARCH_CONVERT, 0); + args->rmtblkno = 1; + args->rmtblkcnt = XFS_B_TO_FSB(mp, args->valuelen); + } + xfs_da_log_buf(args->trans, bp, + XFS_DA_LOGRANGE(leaf, XFS_ATTR_LEAF_NAME(leaf, args->index), + xfs_attr_leaf_entsize(leaf, args->index))); + + /* + * Update the control info for this leaf node + */ + if (INT_GET(entry->nameidx, ARCH_CONVERT) + < INT_GET(hdr->firstused, ARCH_CONVERT)) { + INT_SET(hdr->firstused, ARCH_CONVERT, + INT_GET(entry->nameidx, ARCH_CONVERT)); + } + ASSERT(INT_GET(hdr->firstused, ARCH_CONVERT) + >= ((INT_GET(hdr->count, ARCH_CONVERT) + * sizeof(*entry))+sizeof(*hdr))); + tmp = (INT_GET(hdr->count, ARCH_CONVERT)-1) + * sizeof(xfs_attr_leaf_entry_t) + + sizeof(xfs_attr_leaf_hdr_t); + map = &hdr->freemap[0]; + for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; map++, i++) { + if (INT_GET(map->base, ARCH_CONVERT) == tmp) { + INT_MOD(map->base, ARCH_CONVERT, + sizeof(xfs_attr_leaf_entry_t)); + INT_MOD(map->size, ARCH_CONVERT, + -sizeof(xfs_attr_leaf_entry_t)); + } + } + INT_MOD(hdr->usedbytes, ARCH_CONVERT, + xfs_attr_leaf_entsize(leaf, args->index)); + xfs_da_log_buf(args->trans, bp, + XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr))); + return(0); +} + +/* + * Garbage collect a leaf attribute list block by copying it to a new buffer. + */ +STATIC void +xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp) +{ + xfs_attr_leafblock_t *leaf_s, *leaf_d; + xfs_attr_leaf_hdr_t *hdr_s, *hdr_d; + xfs_mount_t *mp; + char *tmpbuffer; + + mp = trans->t_mountp; + tmpbuffer = kmem_alloc(XFS_LBSIZE(mp), KM_SLEEP); + ASSERT(tmpbuffer != NULL); + bcopy(bp->data, tmpbuffer, XFS_LBSIZE(mp)); + bzero(bp->data, XFS_LBSIZE(mp)); + + /* + * Copy basic information + */ + leaf_s = (xfs_attr_leafblock_t *)tmpbuffer; + leaf_d = bp->data; + hdr_s = &leaf_s->hdr; + hdr_d = &leaf_d->hdr; + hdr_d->info = hdr_s->info; /* struct copy */ + INT_SET(hdr_d->firstused, ARCH_CONVERT, XFS_LBSIZE(mp)); + /* handle truncation gracefully */ + if (INT_GET(hdr_d->firstused, ARCH_CONVERT) == 0) { + INT_SET(hdr_d->firstused, ARCH_CONVERT, + XFS_LBSIZE(mp) - XFS_ATTR_LEAF_NAME_ALIGN); + } + INT_SET(hdr_d->usedbytes, ARCH_CONVERT, 0); + INT_SET(hdr_d->count, ARCH_CONVERT, 0); + hdr_d->holes = 0; + INT_SET(hdr_d->freemap[0].base, ARCH_CONVERT, + sizeof(xfs_attr_leaf_hdr_t)); + INT_SET(hdr_d->freemap[0].size, ARCH_CONVERT, + INT_GET(hdr_d->firstused, ARCH_CONVERT) + - INT_GET(hdr_d->freemap[0].base, ARCH_CONVERT)); + + /* + * Copy all entry's in the same (sorted) order, + * but allocate name/value pairs packed and in sequence. + */ + xfs_attr_leaf_moveents(leaf_s, 0, leaf_d, 0, + (int)INT_GET(hdr_s->count, ARCH_CONVERT), mp); + + xfs_da_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1); + + kmem_free(tmpbuffer, XFS_LBSIZE(mp)); +} + +/* + * Redistribute the attribute list entries between two leaf nodes, + * taking into account the size of the new entry. + * + * NOTE: if new block is empty, then it will get the upper half of the + * old block. At present, all (one) callers pass in an empty second block. + * + * This code adjusts the args->index/blkno and args->index2/blkno2 fields + * to match what it is doing in splitting the attribute leaf block. Those + * values are used in "atomic rename" operations on attributes. Note that + * the "new" and "old" values can end up in different blocks. + */ +STATIC void +xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, + xfs_da_state_blk_t *blk2) +{ + xfs_da_args_t *args; + xfs_da_state_blk_t *tmp_blk; + xfs_attr_leafblock_t *leaf1, *leaf2; + xfs_attr_leaf_hdr_t *hdr1, *hdr2; + int count, totallen, max, space, swap; + + /* + * Set up environment. + */ + ASSERT(blk1->magic == XFS_ATTR_LEAF_MAGIC); + ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC); + leaf1 = blk1->bp->data; + leaf2 = blk2->bp->data; + ASSERT(INT_GET(leaf1->hdr.info.magic, ARCH_CONVERT) + == XFS_ATTR_LEAF_MAGIC); + ASSERT(INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT) + == XFS_ATTR_LEAF_MAGIC); + args = state->args; + + /* + * Check ordering of blocks, reverse if it makes things simpler. + * + * NOTE: Given that all (current) callers pass in an empty + * second block, this code should never set "swap". + */ + swap = 0; + if (xfs_attr_leaf_order(blk1->bp, blk2->bp)) { + tmp_blk = blk1; + blk1 = blk2; + blk2 = tmp_blk; + leaf1 = blk1->bp->data; + leaf2 = blk2->bp->data; + swap = 1; + } + hdr1 = &leaf1->hdr; + hdr2 = &leaf2->hdr; + + /* + * Examine entries until we reduce the absolute difference in + * byte usage between the two blocks to a minimum. Then get + * the direction to copy and the number of elements to move. + * + * "inleaf" is true if the new entry should be inserted into blk1. + * If "swap" is also true, then reverse the sense of "inleaf". + */ + state->inleaf = xfs_attr_leaf_figure_balance(state, blk1, blk2, + &count, &totallen); + if (swap) + state->inleaf = !state->inleaf; + + /* + * Move any entries required from leaf to leaf: + */ + if (count < INT_GET(hdr1->count, ARCH_CONVERT)) { + /* + * Figure the total bytes to be added to the destination leaf. + */ + /* number entries being moved */ + count = INT_GET(hdr1->count, ARCH_CONVERT) - count; + space = INT_GET(hdr1->usedbytes, ARCH_CONVERT) - totallen; + space += count * sizeof(xfs_attr_leaf_entry_t); + + /* + * leaf2 is the destination, compact it if it looks tight. + */ + max = INT_GET(hdr2->firstused, ARCH_CONVERT) + - sizeof(xfs_attr_leaf_hdr_t); + max -= INT_GET(hdr2->count, ARCH_CONVERT) + * sizeof(xfs_attr_leaf_entry_t); + if (space > max) { + xfs_attr_leaf_compact(args->trans, blk2->bp); + } + + /* + * Move high entries from leaf1 to low end of leaf2. + */ + xfs_attr_leaf_moveents(leaf1, + INT_GET(hdr1->count, ARCH_CONVERT)-count, + leaf2, 0, count, state->mp); + + xfs_da_log_buf(args->trans, blk1->bp, 0, state->blocksize-1); + xfs_da_log_buf(args->trans, blk2->bp, 0, state->blocksize-1); + } else if (count > INT_GET(hdr1->count, ARCH_CONVERT)) { + /* + * I assert that since all callers pass in an empty + * second buffer, this code should never execute. + */ + + /* + * Figure the total bytes to be added to the destination leaf. + */ + /* number entries being moved */ + count -= INT_GET(hdr1->count, ARCH_CONVERT); + space = totallen - INT_GET(hdr1->usedbytes, ARCH_CONVERT); + space += count * sizeof(xfs_attr_leaf_entry_t); + + /* + * leaf1 is the destination, compact it if it looks tight. + */ + max = INT_GET(hdr1->firstused, ARCH_CONVERT) + - sizeof(xfs_attr_leaf_hdr_t); + max -= INT_GET(hdr1->count, ARCH_CONVERT) + * sizeof(xfs_attr_leaf_entry_t); + if (space > max) { + xfs_attr_leaf_compact(args->trans, blk1->bp); + } + + /* + * Move low entries from leaf2 to high end of leaf1. + */ + xfs_attr_leaf_moveents(leaf2, 0, leaf1, + (int)INT_GET(hdr1->count, ARCH_CONVERT), count, + state->mp); + + xfs_da_log_buf(args->trans, blk1->bp, 0, state->blocksize-1); + xfs_da_log_buf(args->trans, blk2->bp, 0, state->blocksize-1); + } + + /* + * Copy out last hashval in each block for B-tree code. + */ + blk1->hashval = + INT_GET(leaf1->entries[INT_GET(leaf1->hdr.count, + ARCH_CONVERT)-1].hashval, ARCH_CONVERT); + blk2->hashval = + INT_GET(leaf2->entries[INT_GET(leaf2->hdr.count, + ARCH_CONVERT)-1].hashval, ARCH_CONVERT); + + /* + * Adjust the expected index for insertion. + * NOTE: this code depends on the (current) situation that the + * second block was originally empty. + * + * If the insertion point moved to the 2nd block, we must adjust + * the index. We must also track the entry just following the + * new entry for use in an "atomic rename" operation, that entry + * is always the "old" entry and the "new" entry is what we are + * inserting. The index/blkno fields refer to the "old" entry, + * while the index2/blkno2 fields refer to the "new" entry. + */ + if (blk1->index > INT_GET(leaf1->hdr.count, ARCH_CONVERT)) { + ASSERT(state->inleaf == 0); + blk2->index = blk1->index + - INT_GET(leaf1->hdr.count, ARCH_CONVERT); + args->index = args->index2 = blk2->index; + args->blkno = args->blkno2 = blk2->blkno; + } else if (blk1->index == INT_GET(leaf1->hdr.count, ARCH_CONVERT)) { + if (state->inleaf) { + args->index = blk1->index; + args->blkno = blk1->blkno; + args->index2 = 0; + args->blkno2 = blk2->blkno; + } else { + blk2->index = blk1->index + - INT_GET(leaf1->hdr.count, ARCH_CONVERT); + args->index = args->index2 = blk2->index; + args->blkno = args->blkno2 = blk2->blkno; + } + } else { + ASSERT(state->inleaf == 1); + args->index = args->index2 = blk1->index; + args->blkno = args->blkno2 = blk1->blkno; + } +} + +/* + * Examine entries until we reduce the absolute difference in + * byte usage between the two blocks to a minimum. + * GROT: Is this really necessary? With other than a 512 byte blocksize, + * GROT: there will always be enough room in either block for a new entry. + * GROT: Do a double-split for this case? + */ +STATIC int +xfs_attr_leaf_figure_balance(xfs_da_state_t *state, + xfs_da_state_blk_t *blk1, + xfs_da_state_blk_t *blk2, + int *countarg, int *usedbytesarg) +{ + xfs_attr_leafblock_t *leaf1, *leaf2; + xfs_attr_leaf_hdr_t *hdr1, *hdr2; + xfs_attr_leaf_entry_t *entry; + int count, max, index, totallen, half; + int lastdelta, foundit, tmp; + + /* + * Set up environment. + */ + leaf1 = blk1->bp->data; + leaf2 = blk2->bp->data; + hdr1 = &leaf1->hdr; + hdr2 = &leaf2->hdr; + foundit = 0; + totallen = 0; + + /* + * Examine entries until we reduce the absolute difference in + * byte usage between the two blocks to a minimum. + */ + max = INT_GET(hdr1->count, ARCH_CONVERT) + + INT_GET(hdr2->count, ARCH_CONVERT); + half = (max+1) * sizeof(*entry); + half += INT_GET(hdr1->usedbytes, ARCH_CONVERT) + + INT_GET(hdr2->usedbytes, ARCH_CONVERT) + + xfs_attr_leaf_newentsize(state->args, + state->blocksize, NULL); + half /= 2; + lastdelta = state->blocksize; + entry = &leaf1->entries[0]; + for (count = index = 0; count < max; entry++, index++, count++) { + +#define XFS_ATTR_ABS(A) (((A) < 0) ? -(A) : (A)) + /* + * The new entry is in the first block, account for it. + */ + if (count == blk1->index) { + tmp = totallen + sizeof(*entry) + + xfs_attr_leaf_newentsize(state->args, + state->blocksize, + NULL); + if (XFS_ATTR_ABS(half - tmp) > lastdelta) + break; + lastdelta = XFS_ATTR_ABS(half - tmp); + totallen = tmp; + foundit = 1; + } + + /* + * Wrap around into the second block if necessary. + */ + if (count == INT_GET(hdr1->count, ARCH_CONVERT)) { + leaf1 = leaf2; + entry = &leaf1->entries[0]; + index = 0; + } + + /* + * Figure out if next leaf entry would be too much. + */ + tmp = totallen + sizeof(*entry) + xfs_attr_leaf_entsize(leaf1, + index); + if (XFS_ATTR_ABS(half - tmp) > lastdelta) + break; + lastdelta = XFS_ATTR_ABS(half - tmp); + totallen = tmp; +#undef XFS_ATTR_ABS + } + + /* + * Calculate the number of usedbytes that will end up in lower block. + * If new entry not in lower block, fix up the count. + */ + totallen -= count * sizeof(*entry); + if (foundit) { + totallen -= sizeof(*entry) + + xfs_attr_leaf_newentsize(state->args, + state->blocksize, + NULL); + } + + *countarg = count; + *usedbytesarg = totallen; + return(foundit); +} + +/*======================================================================== + * Routines used for shrinking the Btree. + *========================================================================*/ + +/* + * Check a leaf block and its neighbors to see if the block should be + * collapsed into one or the other neighbor. Always keep the block + * with the smaller block number. + * If the current block is over 50% full, don't try to join it, return 0. + * If the block is empty, fill in the state structure and return 2. + * If it can be collapsed, fill in the state structure and return 1. + * If nothing can be done, return 0. + * + * GROT: allow for INCOMPLETE entries in calculation. + */ +int +xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action) +{ + xfs_attr_leafblock_t *leaf; + xfs_da_state_blk_t *blk; + xfs_da_blkinfo_t *info; + int count, bytes, forward, error, retval, i; + xfs_dablk_t blkno; + xfs_dabuf_t *bp; + + /* + * Check for the degenerate case of the block being over 50% full. + * If so, it's not worth even looking to see if we might be able + * to coalesce with a sibling. + */ + blk = &state->path.blk[ state->path.active-1 ]; + info = blk->bp->data; + ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC); + leaf = (xfs_attr_leafblock_t *)info; + count = INT_GET(leaf->hdr.count, ARCH_CONVERT); + bytes = sizeof(xfs_attr_leaf_hdr_t) + + count * sizeof(xfs_attr_leaf_entry_t) + + INT_GET(leaf->hdr.usedbytes, ARCH_CONVERT); + if (bytes > (state->blocksize >> 1)) { + *action = 0; /* blk over 50%, dont try to join */ + return(0); + } + + /* + * Check for the degenerate case of the block being empty. + * If the block is empty, we'll simply delete it, no need to + * coalesce it with a sibling block. We choose (aribtrarily) + * to merge with the forward block unless it is NULL. + */ + if (count == 0) { + /* + * Make altpath point to the block we want to keep and + * path point to the block we want to drop (this one). + */ + forward = (INT_GET(info->forw, ARCH_CONVERT) != 0); + bcopy(&state->path, &state->altpath, sizeof(state->path)); + error = xfs_da_path_shift(state, &state->altpath, forward, + 0, &retval); + if (error) + return(error); + if (retval) { + *action = 0; + } else { + *action = 2; + } + return(0); + } + + /* + * Examine each sibling block to see if we can coalesce with + * at least 25% free space to spare. We need to figure out + * whether to merge with the forward or the backward block. + * We prefer coalescing with the lower numbered sibling so as + * to shrink an attribute list over time. + */ + /* start with smaller blk num */ + forward = (INT_GET(info->forw, ARCH_CONVERT) + < INT_GET(info->back, ARCH_CONVERT)); + for (i = 0; i < 2; forward = !forward, i++) { + if (forward) + blkno = INT_GET(info->forw, ARCH_CONVERT); + else + blkno = INT_GET(info->back, ARCH_CONVERT); + if (blkno == 0) + continue; + error = xfs_da_read_buf(state->args->trans, state->args->dp, + blkno, -1, &bp, XFS_ATTR_FORK); + if (error) + return(error); + ASSERT(bp != NULL); + + leaf = (xfs_attr_leafblock_t *)info; + count = INT_GET(leaf->hdr.count, ARCH_CONVERT); + bytes = state->blocksize - (state->blocksize>>2); + bytes -= INT_GET(leaf->hdr.usedbytes, ARCH_CONVERT); + leaf = bp->data; + ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) + == XFS_ATTR_LEAF_MAGIC); + count += INT_GET(leaf->hdr.count, ARCH_CONVERT); + bytes -= INT_GET(leaf->hdr.usedbytes, ARCH_CONVERT); + bytes -= count * sizeof(xfs_attr_leaf_entry_t); + bytes -= sizeof(xfs_attr_leaf_hdr_t); + xfs_da_brelse(state->args->trans, bp); + if (bytes >= 0) + break; /* fits with at least 25% to spare */ + } + if (i >= 2) { + *action = 0; + return(0); + } + + /* + * Make altpath point to the block we want to keep (the lower + * numbered block) and path point to the block we want to drop. + */ + bcopy(&state->path, &state->altpath, sizeof(state->path)); + if (blkno < blk->blkno) { + error = xfs_da_path_shift(state, &state->altpath, forward, + 0, &retval); + } else { + error = xfs_da_path_shift(state, &state->path, forward, + 0, &retval); + } + if (error) + return(error); + if (retval) { + *action = 0; + } else { + *action = 1; + } + return(0); +} + +/* + * Move all the attribute list entries from drop_leaf into save_leaf. + */ +void +xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, + xfs_da_state_blk_t *save_blk) +{ + xfs_attr_leafblock_t *drop_leaf, *save_leaf, *tmp_leaf; + xfs_attr_leaf_hdr_t *drop_hdr, *save_hdr, *tmp_hdr; + xfs_mount_t *mp; + char *tmpbuffer; + + /* + * Set up environment. + */ + mp = state->mp; + ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC); + ASSERT(save_blk->magic == XFS_ATTR_LEAF_MAGIC); + drop_leaf = drop_blk->bp->data; + save_leaf = save_blk->bp->data; + ASSERT(INT_GET(drop_leaf->hdr.info.magic, ARCH_CONVERT) + == XFS_ATTR_LEAF_MAGIC); + ASSERT(INT_GET(save_leaf->hdr.info.magic, ARCH_CONVERT) + == XFS_ATTR_LEAF_MAGIC); + drop_hdr = &drop_leaf->hdr; + save_hdr = &save_leaf->hdr; + + /* + * Save last hashval from dying block for later Btree fixup. + */ + drop_blk->hashval = + INT_GET(drop_leaf->entries[INT_GET(drop_leaf->hdr.count, + ARCH_CONVERT)-1].hashval, + ARCH_CONVERT); + + /* + * Check if we need a temp buffer, or can we do it in place. + * Note that we don't check "leaf" for holes because we will + * always be dropping it, toosmall() decided that for us already. + */ + if (save_hdr->holes == 0) { + /* + * dest leaf has no holes, so we add there. May need + * to make some room in the entry array. + */ + if (xfs_attr_leaf_order(save_blk->bp, drop_blk->bp)) { + xfs_attr_leaf_moveents(drop_leaf, 0, save_leaf, 0, + (int)INT_GET(drop_hdr->count, ARCH_CONVERT), mp); + } else { + xfs_attr_leaf_moveents(drop_leaf, 0, save_leaf, + INT_GET(save_hdr->count, ARCH_CONVERT), + (int)INT_GET(drop_hdr->count, ARCH_CONVERT), + mp); + } + } else { + /* + * Destination has holes, so we make a temporary copy + * of the leaf and add them both to that. + */ + tmpbuffer = kmem_alloc(state->blocksize, KM_SLEEP); + ASSERT(tmpbuffer != NULL); + bzero(tmpbuffer, state->blocksize); + tmp_leaf = (xfs_attr_leafblock_t *)tmpbuffer; + tmp_hdr = &tmp_leaf->hdr; + tmp_hdr->info = save_hdr->info; /* struct copy */ + INT_SET(tmp_hdr->count, ARCH_CONVERT, 0); + INT_SET(tmp_hdr->firstused, ARCH_CONVERT, state->blocksize); + if (INT_GET(tmp_hdr->firstused, ARCH_CONVERT) == 0) { + INT_SET(tmp_hdr->firstused, ARCH_CONVERT, + state->blocksize - XFS_ATTR_LEAF_NAME_ALIGN); + } + INT_SET(tmp_hdr->usedbytes, ARCH_CONVERT, 0); + if (xfs_attr_leaf_order(save_blk->bp, drop_blk->bp)) { + xfs_attr_leaf_moveents(drop_leaf, 0, tmp_leaf, 0, + (int)INT_GET(drop_hdr->count, ARCH_CONVERT), + mp); + xfs_attr_leaf_moveents(save_leaf, 0, tmp_leaf, + INT_GET(tmp_leaf->hdr.count, ARCH_CONVERT), + (int)INT_GET(save_hdr->count, ARCH_CONVERT), + mp); + } else { + xfs_attr_leaf_moveents(save_leaf, 0, tmp_leaf, 0, + (int)INT_GET(save_hdr->count, ARCH_CONVERT), + mp); + xfs_attr_leaf_moveents(drop_leaf, 0, tmp_leaf, + INT_GET(tmp_leaf->hdr.count, ARCH_CONVERT), + (int)INT_GET(drop_hdr->count, ARCH_CONVERT), + mp); + } + bcopy((char *)tmp_leaf, (char *)save_leaf, state->blocksize); + kmem_free(tmpbuffer, state->blocksize); + } + + xfs_da_log_buf(state->args->trans, save_blk->bp, 0, + state->blocksize - 1); + + /* + * Copy out last hashval in each block for B-tree code. + */ + save_blk->hashval = + INT_GET(save_leaf->entries[INT_GET(save_leaf->hdr.count, + ARCH_CONVERT)-1].hashval, + ARCH_CONVERT); +} + + +/*======================================================================== + * Utility routines. + *========================================================================*/ + +/* + * Move the indicated entries from one leaf to another. + * NOTE: this routine modifies both source and destination leaves. + */ +/*ARGSUSED*/ +STATIC void +xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s, + xfs_attr_leafblock_t *leaf_d, int start_d, + int count, xfs_mount_t *mp) +{ + xfs_attr_leaf_hdr_t *hdr_s, *hdr_d; + xfs_attr_leaf_entry_t *entry_s, *entry_d; + int desti, tmp, i; + + /* + * Check for nothing to do. + */ + if (count == 0) + return; + + /* + * Set up environment. + */ + ASSERT(INT_GET(leaf_s->hdr.info.magic, ARCH_CONVERT) + == XFS_ATTR_LEAF_MAGIC); + ASSERT(INT_GET(leaf_d->hdr.info.magic, ARCH_CONVERT) + == XFS_ATTR_LEAF_MAGIC); + hdr_s = &leaf_s->hdr; + hdr_d = &leaf_d->hdr; + ASSERT((INT_GET(hdr_s->count, ARCH_CONVERT) > 0) + && (INT_GET(hdr_s->count, ARCH_CONVERT) + < (XFS_LBSIZE(mp)/8))); + ASSERT(INT_GET(hdr_s->firstused, ARCH_CONVERT) >= + ((INT_GET(hdr_s->count, ARCH_CONVERT) + * sizeof(*entry_s))+sizeof(*hdr_s))); + ASSERT(INT_GET(hdr_d->count, ARCH_CONVERT) < (XFS_LBSIZE(mp)/8)); + ASSERT(INT_GET(hdr_d->firstused, ARCH_CONVERT) >= + ((INT_GET(hdr_d->count, ARCH_CONVERT) + * sizeof(*entry_d))+sizeof(*hdr_d))); + + ASSERT(start_s < INT_GET(hdr_s->count, ARCH_CONVERT)); + ASSERT(start_d <= INT_GET(hdr_d->count, ARCH_CONVERT)); + ASSERT(count <= INT_GET(hdr_s->count, ARCH_CONVERT)); + + /* + * Move the entries in the destination leaf up to make a hole? + */ + if (start_d < INT_GET(hdr_d->count, ARCH_CONVERT)) { + tmp = INT_GET(hdr_d->count, ARCH_CONVERT) - start_d; + tmp *= sizeof(xfs_attr_leaf_entry_t); + entry_s = &leaf_d->entries[start_d]; + entry_d = &leaf_d->entries[start_d + count]; + ovbcopy((char *)entry_s, (char *)entry_d, tmp); + } + + /* + * Copy all entry's in the same (sorted) order, + * but allocate attribute info packed and in sequence. + */ + entry_s = &leaf_s->entries[start_s]; + entry_d = &leaf_d->entries[start_d]; + desti = start_d; + for (i = 0; i < count; entry_s++, entry_d++, desti++, i++) { + ASSERT(INT_GET(entry_s->nameidx, ARCH_CONVERT) + >= INT_GET(hdr_s->firstused, ARCH_CONVERT)); + tmp = xfs_attr_leaf_entsize(leaf_s, start_s + i); +#ifdef GROT + /* + * Code to drop INCOMPLETE entries. Difficult to use as we + * may also need to change the insertion index. Code turned + * off for 6.2, should be revisited later. + */ + if (entry_s->flags & XFS_ATTR_INCOMPLETE) { /* skip partials? */ + bzero(XFS_ATTR_LEAF_NAME(leaf_s, start_s + i), tmp); + INT_MOD(hdr_s->usedbytes, ARCH_CONVERT, -tmp); + INT_MOD(hdr_s->count, ARCH_CONVERT, -1); + entry_d--; /* to compensate for ++ in loop hdr */ + desti--; + if ((start_s + i) < offset) + result++; /* insertion index adjustment */ + } else { +#endif /* GROT */ + INT_MOD(hdr_d->firstused, ARCH_CONVERT, -tmp); + INT_SET(entry_d->hashval, ARCH_CONVERT, + INT_GET(entry_s->hashval, ARCH_CONVERT)); + INT_SET(entry_d->nameidx, ARCH_CONVERT, + INT_GET(hdr_d->firstused, + ARCH_CONVERT)); + entry_d->flags = entry_s->flags; + ASSERT(INT_GET(entry_d->nameidx, ARCH_CONVERT) + tmp + <= XFS_LBSIZE(mp)); + ovbcopy(XFS_ATTR_LEAF_NAME(leaf_s, start_s + i), + XFS_ATTR_LEAF_NAME(leaf_d, desti), tmp); + ASSERT(INT_GET(entry_s->nameidx, ARCH_CONVERT) + tmp + <= XFS_LBSIZE(mp)); + bzero(XFS_ATTR_LEAF_NAME(leaf_s, start_s + i), tmp); + INT_MOD(hdr_s->usedbytes, ARCH_CONVERT, -tmp); + INT_MOD(hdr_d->usedbytes, ARCH_CONVERT, tmp); + INT_MOD(hdr_s->count, ARCH_CONVERT, -1); + INT_MOD(hdr_d->count, ARCH_CONVERT, 1); + tmp = INT_GET(hdr_d->count, ARCH_CONVERT) + * sizeof(xfs_attr_leaf_entry_t) + + sizeof(xfs_attr_leaf_hdr_t); + ASSERT(INT_GET(hdr_d->firstused, ARCH_CONVERT) >= tmp); +#ifdef GROT + } +#endif /* GROT */ + } + + /* + * Zero out the entries we just copied. + */ + if (start_s == INT_GET(hdr_s->count, ARCH_CONVERT)) { + tmp = count * sizeof(xfs_attr_leaf_entry_t); + entry_s = &leaf_s->entries[start_s]; + ASSERT(((char *)entry_s + tmp) <= + ((char *)leaf_s + XFS_LBSIZE(mp))); + bzero((char *)entry_s, tmp); + } else { + /* + * Move the remaining entries down to fill the hole, + * then zero the entries at the top. + */ + tmp = INT_GET(hdr_s->count, ARCH_CONVERT) - count; + tmp *= sizeof(xfs_attr_leaf_entry_t); + entry_s = &leaf_s->entries[start_s + count]; + entry_d = &leaf_s->entries[start_s]; + ovbcopy((char *)entry_s, (char *)entry_d, tmp); + + tmp = count * sizeof(xfs_attr_leaf_entry_t); + entry_s = &leaf_s->entries[INT_GET(hdr_s->count, + ARCH_CONVERT)]; + ASSERT(((char *)entry_s + tmp) <= + ((char *)leaf_s + XFS_LBSIZE(mp))); + bzero((char *)entry_s, tmp); + } + + /* + * Fill in the freemap information + */ + INT_SET(hdr_d->freemap[0].base, ARCH_CONVERT, + sizeof(xfs_attr_leaf_hdr_t)); + INT_MOD(hdr_d->freemap[0].base, ARCH_CONVERT, + INT_GET(hdr_d->count, ARCH_CONVERT) + * sizeof(xfs_attr_leaf_entry_t)); + INT_SET(hdr_d->freemap[0].size, ARCH_CONVERT, + INT_GET(hdr_d->firstused, ARCH_CONVERT) + - INT_GET(hdr_d->freemap[0].base, ARCH_CONVERT)); + INT_SET(hdr_d->freemap[1].base, ARCH_CONVERT, 0); + INT_SET(hdr_d->freemap[2].base, ARCH_CONVERT, 0); + INT_SET(hdr_d->freemap[1].size, ARCH_CONVERT, 0); + INT_SET(hdr_d->freemap[2].size, ARCH_CONVERT, 0); + hdr_s->holes = 1; /* leaf may not be compact */ +} + +/* + * Compare two leaf blocks "order". + * Return 0 unless leaf2 should go before leaf1. + */ +int +xfs_attr_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp) +{ + xfs_attr_leafblock_t *leaf1, *leaf2; + + leaf1 = leaf1_bp->data; + leaf2 = leaf2_bp->data; + ASSERT((INT_GET(leaf1->hdr.info.magic, ARCH_CONVERT) + == XFS_ATTR_LEAF_MAGIC) && + (INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT) + == XFS_ATTR_LEAF_MAGIC)); + if ( (INT_GET(leaf1->hdr.count, ARCH_CONVERT) > 0) + && (INT_GET(leaf2->hdr.count, ARCH_CONVERT) > 0) + && ( (INT_GET(leaf2->entries[ 0 ].hashval, ARCH_CONVERT) < + INT_GET(leaf1->entries[ 0 ].hashval, ARCH_CONVERT)) + || (INT_GET(leaf2->entries[INT_GET(leaf2->hdr.count, + ARCH_CONVERT)-1].hashval, ARCH_CONVERT) < + INT_GET(leaf1->entries[INT_GET(leaf1->hdr.count, + ARCH_CONVERT)-1].hashval, ARCH_CONVERT))) ) { + return(1); + } + return(0); +} + +/* + * Pick up the last hashvalue from a leaf block. + */ +xfs_dahash_t +xfs_attr_leaf_lasthash(xfs_dabuf_t *bp, int *count) +{ + xfs_attr_leafblock_t *leaf; + + leaf = bp->data; + ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) + == XFS_ATTR_LEAF_MAGIC); + if (count) + *count = INT_GET(leaf->hdr.count, ARCH_CONVERT); + if (INT_GET(leaf->hdr.count, ARCH_CONVERT) == 0) + return(0); + return(INT_GET(leaf->entries[INT_GET(leaf->hdr.count, + ARCH_CONVERT)-1].hashval, ARCH_CONVERT)); +} + +/* + * Calculate the number of bytes used to store the indicated attribute + * (whether local or remote only calculate bytes in this block). + */ +int +xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index) +{ + xfs_attr_leaf_name_local_t *name_loc; + xfs_attr_leaf_name_remote_t *name_rmt; + int size; + + ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) + == XFS_ATTR_LEAF_MAGIC); + if (leaf->entries[index].flags & XFS_ATTR_LOCAL) { + name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, index); + size = XFS_ATTR_LEAF_ENTSIZE_LOCAL(name_loc->namelen, + INT_GET(name_loc->valuelen, + ARCH_CONVERT)); + } else { + name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, index); + size = XFS_ATTR_LEAF_ENTSIZE_REMOTE(name_rmt->namelen); + } + return(size); +} + +/* + * Calculate the number of bytes that would be required to store the new + * attribute (whether local or remote only calculate bytes in this block). + * This routine decides as a side effect whether the attribute will be + * a "local" or a "remote" attribute. + */ +int +xfs_attr_leaf_newentsize(xfs_da_args_t *args, int blocksize, int *local) +{ + int size; + + size = XFS_ATTR_LEAF_ENTSIZE_LOCAL(args->namelen, args->valuelen); + if (size < XFS_ATTR_LEAF_ENTSIZE_LOCAL_MAX(blocksize)) { + if (local) { + *local = 1; + } + } else { + size = XFS_ATTR_LEAF_ENTSIZE_REMOTE(args->namelen); + if (local) { + *local = 0; + } + } + return(size); +} diff --git a/libxfs/xfs_bit.c b/libxfs/xfs_bit.c new file mode 100644 index 000000000..52ab69a51 --- /dev/null +++ b/libxfs/xfs_bit.c @@ -0,0 +1,307 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +/* + * XFS bit manipulation routines, used in non-realtime code. + */ + +#include + +/* + * Index of low bit number in byte, -1 for none set, 0..7 otherwise. + */ +const char xfs_lowbit[256] = { + -1, 0, 1, 0, 2, 0, 1, 0, /* 00 .. 07 */ + 3, 0, 1, 0, 2, 0, 1, 0, /* 08 .. 0f */ + 4, 0, 1, 0, 2, 0, 1, 0, /* 10 .. 17 */ + 3, 0, 1, 0, 2, 0, 1, 0, /* 18 .. 1f */ + 5, 0, 1, 0, 2, 0, 1, 0, /* 20 .. 27 */ + 3, 0, 1, 0, 2, 0, 1, 0, /* 28 .. 2f */ + 4, 0, 1, 0, 2, 0, 1, 0, /* 30 .. 37 */ + 3, 0, 1, 0, 2, 0, 1, 0, /* 38 .. 3f */ + 6, 0, 1, 0, 2, 0, 1, 0, /* 40 .. 47 */ + 3, 0, 1, 0, 2, 0, 1, 0, /* 48 .. 4f */ + 4, 0, 1, 0, 2, 0, 1, 0, /* 50 .. 57 */ + 3, 0, 1, 0, 2, 0, 1, 0, /* 58 .. 5f */ + 5, 0, 1, 0, 2, 0, 1, 0, /* 60 .. 67 */ + 3, 0, 1, 0, 2, 0, 1, 0, /* 68 .. 6f */ + 4, 0, 1, 0, 2, 0, 1, 0, /* 70 .. 77 */ + 3, 0, 1, 0, 2, 0, 1, 0, /* 78 .. 7f */ + 7, 0, 1, 0, 2, 0, 1, 0, /* 80 .. 87 */ + 3, 0, 1, 0, 2, 0, 1, 0, /* 88 .. 8f */ + 4, 0, 1, 0, 2, 0, 1, 0, /* 90 .. 97 */ + 3, 0, 1, 0, 2, 0, 1, 0, /* 98 .. 9f */ + 5, 0, 1, 0, 2, 0, 1, 0, /* a0 .. a7 */ + 3, 0, 1, 0, 2, 0, 1, 0, /* a8 .. af */ + 4, 0, 1, 0, 2, 0, 1, 0, /* b0 .. b7 */ + 3, 0, 1, 0, 2, 0, 1, 0, /* b8 .. bf */ + 6, 0, 1, 0, 2, 0, 1, 0, /* c0 .. c7 */ + 3, 0, 1, 0, 2, 0, 1, 0, /* c8 .. cf */ + 4, 0, 1, 0, 2, 0, 1, 0, /* d0 .. d7 */ + 3, 0, 1, 0, 2, 0, 1, 0, /* d8 .. df */ + 5, 0, 1, 0, 2, 0, 1, 0, /* e0 .. e7 */ + 3, 0, 1, 0, 2, 0, 1, 0, /* e8 .. ef */ + 4, 0, 1, 0, 2, 0, 1, 0, /* f0 .. f7 */ + 3, 0, 1, 0, 2, 0, 1, 0, /* f8 .. ff */ +}; + +/* + * Index of high bit number in byte, -1 for none set, 0..7 otherwise. + */ +const char xfs_highbit[256] = { + -1, 0, 1, 1, 2, 2, 2, 2, /* 00 .. 07 */ + 3, 3, 3, 3, 3, 3, 3, 3, /* 08 .. 0f */ + 4, 4, 4, 4, 4, 4, 4, 4, /* 10 .. 17 */ + 4, 4, 4, 4, 4, 4, 4, 4, /* 18 .. 1f */ + 5, 5, 5, 5, 5, 5, 5, 5, /* 20 .. 27 */ + 5, 5, 5, 5, 5, 5, 5, 5, /* 28 .. 2f */ + 5, 5, 5, 5, 5, 5, 5, 5, /* 30 .. 37 */ + 5, 5, 5, 5, 5, 5, 5, 5, /* 38 .. 3f */ + 6, 6, 6, 6, 6, 6, 6, 6, /* 40 .. 47 */ + 6, 6, 6, 6, 6, 6, 6, 6, /* 48 .. 4f */ + 6, 6, 6, 6, 6, 6, 6, 6, /* 50 .. 57 */ + 6, 6, 6, 6, 6, 6, 6, 6, /* 58 .. 5f */ + 6, 6, 6, 6, 6, 6, 6, 6, /* 60 .. 67 */ + 6, 6, 6, 6, 6, 6, 6, 6, /* 68 .. 6f */ + 6, 6, 6, 6, 6, 6, 6, 6, /* 70 .. 77 */ + 6, 6, 6, 6, 6, 6, 6, 6, /* 78 .. 7f */ + 7, 7, 7, 7, 7, 7, 7, 7, /* 80 .. 87 */ + 7, 7, 7, 7, 7, 7, 7, 7, /* 88 .. 8f */ + 7, 7, 7, 7, 7, 7, 7, 7, /* 90 .. 97 */ + 7, 7, 7, 7, 7, 7, 7, 7, /* 98 .. 9f */ + 7, 7, 7, 7, 7, 7, 7, 7, /* a0 .. a7 */ + 7, 7, 7, 7, 7, 7, 7, 7, /* a8 .. af */ + 7, 7, 7, 7, 7, 7, 7, 7, /* b0 .. b7 */ + 7, 7, 7, 7, 7, 7, 7, 7, /* b8 .. bf */ + 7, 7, 7, 7, 7, 7, 7, 7, /* c0 .. c7 */ + 7, 7, 7, 7, 7, 7, 7, 7, /* c8 .. cf */ + 7, 7, 7, 7, 7, 7, 7, 7, /* d0 .. d7 */ + 7, 7, 7, 7, 7, 7, 7, 7, /* d8 .. df */ + 7, 7, 7, 7, 7, 7, 7, 7, /* e0 .. e7 */ + 7, 7, 7, 7, 7, 7, 7, 7, /* e8 .. ef */ + 7, 7, 7, 7, 7, 7, 7, 7, /* f0 .. f7 */ + 7, 7, 7, 7, 7, 7, 7, 7, /* f8 .. ff */ +}; + +/* + * Count of bits set in byte, 0..8. + */ +const char xfs_countbit[256] = { + 0, 1, 1, 2, 1, 2, 2, 3, /* 00 .. 07 */ + 1, 2, 2, 3, 2, 3, 3, 4, /* 08 .. 0f */ + 1, 2, 2, 3, 2, 3, 3, 4, /* 10 .. 17 */ + 2, 3, 3, 4, 3, 4, 4, 5, /* 18 .. 1f */ + 1, 2, 2, 3, 2, 3, 3, 4, /* 20 .. 27 */ + 2, 3, 3, 4, 3, 4, 4, 5, /* 28 .. 2f */ + 2, 3, 3, 4, 3, 4, 4, 5, /* 30 .. 37 */ + 3, 4, 4, 5, 4, 5, 5, 6, /* 38 .. 3f */ + 1, 2, 2, 3, 2, 3, 3, 4, /* 40 .. 47 */ + 2, 3, 3, 4, 3, 4, 4, 5, /* 48 .. 4f */ + 2, 3, 3, 4, 3, 4, 4, 5, /* 50 .. 57 */ + 3, 4, 4, 5, 4, 5, 5, 6, /* 58 .. 5f */ + 2, 3, 3, 4, 3, 4, 4, 5, /* 60 .. 67 */ + 3, 4, 4, 5, 4, 5, 5, 6, /* 68 .. 6f */ + 3, 4, 4, 5, 4, 5, 5, 6, /* 70 .. 77 */ + 4, 5, 5, 6, 5, 6, 6, 7, /* 78 .. 7f */ + 1, 2, 2, 3, 2, 3, 3, 4, /* 80 .. 87 */ + 2, 3, 3, 4, 3, 4, 4, 5, /* 88 .. 8f */ + 2, 3, 3, 4, 3, 4, 4, 5, /* 90 .. 97 */ + 3, 4, 4, 5, 4, 5, 5, 6, /* 98 .. 9f */ + 2, 3, 3, 4, 3, 4, 4, 5, /* a0 .. a7 */ + 3, 4, 4, 5, 4, 5, 5, 6, /* a8 .. af */ + 3, 4, 4, 5, 4, 5, 5, 6, /* b0 .. b7 */ + 4, 5, 5, 6, 5, 6, 6, 7, /* b8 .. bf */ + 2, 3, 3, 4, 3, 4, 4, 5, /* c0 .. c7 */ + 3, 4, 4, 5, 4, 5, 5, 6, /* c8 .. cf */ + 3, 4, 4, 5, 4, 5, 5, 6, /* d0 .. d7 */ + 4, 5, 5, 6, 5, 6, 6, 7, /* d8 .. df */ + 3, 4, 4, 5, 4, 5, 5, 6, /* e0 .. e7 */ + 4, 5, 5, 6, 5, 6, 6, 7, /* e8 .. ef */ + 4, 5, 5, 6, 5, 6, 6, 7, /* f0 .. f7 */ + 5, 6, 6, 7, 6, 7, 7, 8, /* f8 .. ff */ +}; + +/* + * xfs_highbit32: get high bit set out of 32-bit argument, -1 if none set. + */ +int +xfs_highbit32( + __uint32_t v) +{ + int i; + + if (v & 0xffff0000) + if (v & 0xff000000) + i = 24; + else + i = 16; + else if (v & 0x0000ffff) + if (v & 0x0000ff00) + i = 8; + else + i = 0; + else + return -1; + return i + xfs_highbit[(v >> i) & 0xff]; +} + +/* + * xfs_lowbit64: get low bit set out of 64-bit argument, -1 if none set. + */ +int +xfs_lowbit64( + __uint64_t v) +{ + int i; +#if XFS_64 + if (v & 0x00000000ffffffff) + if (v & 0x000000000000ffff) + if (v & 0x00000000000000ff) + i = 0; + else + i = 8; + else + if (v & 0x0000000000ff0000) + i = 16; + else + i = 24; + else if (v & 0xffffffff00000000) + if (v & 0x0000ffff00000000) + if (v & 0x000000ff00000000) + i = 32; + else + i = 40; + else + if (v & 0x00ff000000000000) + i = 48; + else + i = 56; + else + return -1; + return i + xfs_lowbit[(v >> i) & 0xff]; +#else + __uint32_t vw; + + if (vw = v) { + if (vw & 0x0000ffff) + if (vw & 0x000000ff) + i = 0; + else + i = 8; + else + if (vw & 0x00ff0000) + i = 16; + else + i = 24; + return i + xfs_lowbit[(vw >> i) & 0xff]; + } else if (vw = v >> 32) { + if (vw & 0x0000ffff) + if (vw & 0x000000ff) + i = 32; + else + i = 40; + else + if (vw & 0x00ff0000) + i = 48; + else + i = 56; + return i + xfs_lowbit[(vw >> (i - 32)) & 0xff]; + } else + return -1; +#endif +} + +/* + * xfs_highbit64: get high bit set out of 64-bit argument, -1 if none set. + */ +int +xfs_highbit64( + __uint64_t v) +{ + int i; +#if XFS_64 + if (v & 0xffffffff00000000) + if (v & 0xffff000000000000) + if (v & 0xff00000000000000) + i = 56; + else + i = 48; + else + if (v & 0x0000ff0000000000) + i = 40; + else + i = 32; + else if (v & 0x00000000ffffffff) + if (v & 0x00000000ffff0000) + if (v & 0x00000000ff000000) + i = 24; + else + i = 16; + else + if (v & 0x000000000000ff00) + i = 8; + else + i = 0; + else + return -1; + return i + xfs_highbit[(v >> i) & 0xff]; +#else + __uint32_t vw; + + if (vw = v >> 32) { + if (vw & 0xffff0000) + if (vw & 0xff000000) + i = 56; + else + i = 48; + else + if (vw & 0x0000ff00) + i = 40; + else + i = 32; + return i + xfs_highbit[(vw >> (i - 32)) & 0xff]; + } else if (vw = v) { + if (vw & 0xffff0000) + if (vw & 0xff000000) + i = 24; + else + i = 16; + else + if (vw & 0x0000ff00) + i = 8; + else + i = 0; + return i + xfs_highbit[(vw >> i) & 0xff]; + } else + return -1; +#endif +} diff --git a/libxfs/xfs_bmap.c b/libxfs/xfs_bmap.c new file mode 100644 index 000000000..88e597e72 --- /dev/null +++ b/libxfs/xfs_bmap.c @@ -0,0 +1,4511 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include + +xfs_zone_t *xfs_bmap_free_item_zone; + +/* + * Called by xfs_bmapi to update extent list structure and the btree + * after allocating space (or doing a delayed allocation). + */ +STATIC int /* error */ +xfs_bmap_add_extent( + xfs_inode_t *ip, /* incore inode pointer */ + xfs_extnum_t idx, /* extent number to update/insert */ + xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ + xfs_bmbt_irec_t *new, /* new data to put in extent list */ + xfs_fsblock_t *first, /* pointer to firstblock variable */ + xfs_bmap_free_t *flist, /* list of extents to be freed */ + int *logflagsp, /* inode logging flags */ + int whichfork, /* data or attr fork */ + int rsvd) /* OK to use reserved data blocks */ +{ + xfs_btree_cur_t *cur; /* btree cursor or null */ + xfs_filblks_t da_new; /* new count del alloc blocks used */ + xfs_filblks_t da_old; /* old count del alloc blocks used */ + int error; /* error return value */ +#ifdef XFS_BMAP_TRACE + static char fname[] = "xfs_bmap_add_extent"; +#endif + xfs_ifork_t *ifp; /* inode fork ptr */ + int logflags; /* returned value */ + xfs_extnum_t nextents; /* number of extents in file now */ + + XFS_STATS_INC(xs_add_exlist); + cur = *curp; + ifp = XFS_IFORK_PTR(ip, whichfork); + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + ASSERT(idx <= nextents); + da_old = da_new = 0; + error = 0; + /* + * This is the first extent added to a new/empty file. + * Special case this one, so other routines get to assume there are + * already extents in the list. + */ + if (nextents == 0) { + xfs_bmap_trace_insert(fname, "insert empty", ip, 0, 1, new, + NULL, whichfork); + xfs_bmap_insert_exlist(ip, 0, 1, new, whichfork); + ASSERT(cur == NULL); + ifp->if_lastex = 0; + if (!ISNULLSTARTBLOCK(new->br_startblock)) { + XFS_IFORK_NEXT_SET(ip, whichfork, 1); + logflags = XFS_ILOG_CORE | XFS_ILOG_FEXT(whichfork); + } else + logflags = 0; + } + /* + * Any kind of new delayed allocation goes here. + */ + else if (ISNULLSTARTBLOCK(new->br_startblock)) { + if (cur) + ASSERT((cur->bc_private.b.flags & + XFS_BTCUR_BPRV_WASDEL) == 0); + if (error = xfs_bmap_add_extent_hole_delay(ip, idx, cur, new, + &logflags, rsvd)) + goto done; + } + /* + * Real allocation off the end of the file. + */ + else if (idx == nextents) { + if (cur) + ASSERT((cur->bc_private.b.flags & + XFS_BTCUR_BPRV_WASDEL) == 0); + if (error = xfs_bmap_add_extent_hole_real(ip, idx, cur, new, + &logflags, whichfork)) + goto done; + } else { + xfs_bmbt_irec_t prev; /* old extent at offset idx */ + + /* + * Get the record referred to by idx. + */ + xfs_bmbt_get_all(&ifp->if_u1.if_extents[idx], &prev); + /* + * If it's a real allocation record, and the new allocation ends + * after the start of the referred to record, then we're filling + * in a delayed or unwritten allocation with a real one, or + * converting real back to unwritten. + */ + if (!ISNULLSTARTBLOCK(new->br_startblock) && + new->br_startoff + new->br_blockcount > prev.br_startoff) { + if (prev.br_state != XFS_EXT_UNWRITTEN && + ISNULLSTARTBLOCK(prev.br_startblock)) { + da_old = STARTBLOCKVAL(prev.br_startblock); + if (cur) + ASSERT(cur->bc_private.b.flags & + XFS_BTCUR_BPRV_WASDEL); + if (error = xfs_bmap_add_extent_delay_real(ip, + idx, &cur, new, &da_new, first, flist, + &logflags, rsvd)) + goto done; + } else if (new->br_state == XFS_EXT_NORM) { + ASSERT(new->br_state == XFS_EXT_NORM); + if (error = xfs_bmap_add_extent_unwritten_real( + ip, idx, &cur, new, &logflags)) + goto done; + } else { + ASSERT(new->br_state == XFS_EXT_UNWRITTEN); + if (error = xfs_bmap_add_extent_unwritten_real( + ip, idx, &cur, new, &logflags)) + goto done; + } + ASSERT(*curp == cur || *curp == NULL); + } + /* + * Otherwise we're filling in a hole with an allocation. + */ + else { + if (cur) + ASSERT((cur->bc_private.b.flags & + XFS_BTCUR_BPRV_WASDEL) == 0); + if (error = xfs_bmap_add_extent_hole_real(ip, idx, cur, + new, &logflags, whichfork)) + goto done; + } + } + + ASSERT(*curp == cur || *curp == NULL); + /* + * Convert to a btree if necessary. + */ + if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && + XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max) { + int tmp_logflags; /* partial log flag return val */ + + ASSERT(cur == NULL); + error = xfs_bmap_extents_to_btree(ip->i_transp, ip, first, + flist, &cur, da_old > 0, &tmp_logflags, whichfork); + logflags |= tmp_logflags; + if (error) + goto done; + } + /* + * Adjust for changes in reserved delayed indirect blocks. + * Nothing to do for disk quotas here. + */ + if (da_old || da_new) { + xfs_filblks_t nblks; + + nblks = da_new; + if (cur) + nblks += cur->bc_private.b.allocated; + ASSERT(nblks <= da_old); + if (nblks < da_old) + xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS, + (int)(da_old - nblks), rsvd); + } + /* + * Clear out the allocated field, done with it now in any case. + */ + if (cur) { + cur->bc_private.b.allocated = 0; + *curp = cur; + } +done: +#ifdef XFSDEBUG + if (!error) + xfs_bmap_check_leaf_extents(*curp, ip, whichfork); +#endif + *logflagsp = logflags; + return error; +} + +/* + * Called by xfs_bmap_add_extent to handle cases converting a delayed + * allocation to a real allocation. + */ +STATIC int /* error */ +xfs_bmap_add_extent_delay_real( + xfs_inode_t *ip, /* incore inode pointer */ + xfs_extnum_t idx, /* extent number to update/insert */ + xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ + xfs_bmbt_irec_t *new, /* new data to put in extent list */ + xfs_filblks_t *dnew, /* new delayed-alloc indirect blocks */ + xfs_fsblock_t *first, /* pointer to firstblock variable */ + xfs_bmap_free_t *flist, /* list of extents to be freed */ + int *logflagsp, /* inode logging flags */ + int rsvd) /* OK to use reserved data block allocation */ +{ + xfs_bmbt_rec_t *base; /* base of extent entry list */ + xfs_btree_cur_t *cur; /* btree cursor */ + int diff; /* temp value */ + xfs_bmbt_rec_t *ep; /* extent entry for idx */ + int error; /* error return value */ +#ifdef XFS_BMAP_TRACE + static char fname[] = "xfs_bmap_add_extent_delay_real"; +#endif + int i; /* temp state */ + xfs_fileoff_t new_endoff; /* end offset of new entry */ + xfs_bmbt_irec_t r[3]; /* neighbor extent entries */ + /* left is 0, right is 1, prev is 2 */ + int rval; /* return value (logging flags) */ + int state = 0;/* state bits, accessed thru macros */ + xfs_filblks_t temp; /* value for dnew calculations */ + xfs_filblks_t temp2; /* value for dnew calculations */ + int tmp_rval; /* partial logging flags */ + enum { /* bit number definitions for state */ + LEFT_CONTIG, RIGHT_CONTIG, + LEFT_FILLING, RIGHT_FILLING, + LEFT_DELAY, RIGHT_DELAY, + LEFT_VALID, RIGHT_VALID + }; + +#define LEFT r[0] +#define RIGHT r[1] +#define PREV r[2] +#define MASK(b) (1 << (b)) +#define MASK2(a,b) (MASK(a) | MASK(b)) +#define MASK3(a,b,c) (MASK2(a,b) | MASK(c)) +#define MASK4(a,b,c,d) (MASK3(a,b,c) | MASK(d)) +#define STATE_SET(b,v) ((v) ? (state |= MASK(b)) : (state &= ~MASK(b))) +#define STATE_TEST(b) (state & MASK(b)) +#define STATE_SET_TEST(b,v) ((v) ? ((state |= MASK(b)), 1) : \ + ((state &= ~MASK(b)), 0)) +#define SWITCH_STATE \ + (state & MASK4(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG)) + + /* + * Set up a bunch of variables to make the tests simpler. + */ + cur = *curp; + base = ip->i_df.if_u1.if_extents; + ep = &base[idx]; + xfs_bmbt_get_all(ep, &PREV); + new_endoff = new->br_startoff + new->br_blockcount; + ASSERT(PREV.br_startoff <= new->br_startoff); + ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); + /* + * Set flags determining what part of the previous delayed allocation + * extent is being replaced by a real allocation. + */ + STATE_SET(LEFT_FILLING, PREV.br_startoff == new->br_startoff); + STATE_SET(RIGHT_FILLING, + PREV.br_startoff + PREV.br_blockcount == new_endoff); + /* + * Check and set flags if this segment has a left neighbor. + * Don't set contiguous if the combined extent would be too large. + */ + if (STATE_SET_TEST(LEFT_VALID, idx > 0)) { + xfs_bmbt_get_all(ep - 1, &LEFT); + STATE_SET(LEFT_DELAY, ISNULLSTARTBLOCK(LEFT.br_startblock)); + } + STATE_SET(LEFT_CONTIG, + STATE_TEST(LEFT_VALID) && !STATE_TEST(LEFT_DELAY) && + LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && + LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && + LEFT.br_state == new->br_state && + LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN); + /* + * Check and set flags if this segment has a right neighbor. + * Don't set contiguous if the combined extent would be too large. + * Also check for all-three-contiguous being too large. + */ + if (STATE_SET_TEST(RIGHT_VALID, + idx < + ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1)) { + xfs_bmbt_get_all(ep + 1, &RIGHT); + STATE_SET(RIGHT_DELAY, ISNULLSTARTBLOCK(RIGHT.br_startblock)); + } + STATE_SET(RIGHT_CONTIG, + STATE_TEST(RIGHT_VALID) && !STATE_TEST(RIGHT_DELAY) && + new_endoff == RIGHT.br_startoff && + new->br_startblock + new->br_blockcount == + RIGHT.br_startblock && + new->br_state == RIGHT.br_state && + new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN && + ((state & MASK3(LEFT_CONTIG, LEFT_FILLING, RIGHT_FILLING)) != + MASK3(LEFT_CONTIG, LEFT_FILLING, RIGHT_FILLING) || + LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount + <= MAXEXTLEN)); + error = 0; + /* + * Switch out based on the FILLING and CONTIG state bits. + */ + switch (SWITCH_STATE) { + + case MASK4(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG): + /* + * Filling in all of a previously delayed allocation extent. + * The left and right neighbors are both contiguous with new. + */ + xfs_bmap_trace_pre_update(fname, "LF|RF|LC|RC", ip, idx - 1, + XFS_DATA_FORK); + xfs_bmbt_set_blockcount(ep - 1, + LEFT.br_blockcount + PREV.br_blockcount + + RIGHT.br_blockcount); + xfs_bmap_trace_post_update(fname, "LF|RF|LC|RC", ip, idx - 1, + XFS_DATA_FORK); + xfs_bmap_trace_delete(fname, "LF|RF|LC|RC", ip, idx, 2, + XFS_DATA_FORK); + xfs_bmap_delete_exlist(ip, idx, 2, XFS_DATA_FORK); + ip->i_df.if_lastex = idx - 1; + ip->i_d.di_nextents--; + if (cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { + rval = XFS_ILOG_CORE; + if (error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff, + RIGHT.br_startblock, + RIGHT.br_blockcount, &i)) + goto done; + ASSERT(i == 1); + if (error = xfs_bmbt_delete(cur, 0, &i)) + goto done; + ASSERT(i == 1); + if (error = xfs_bmbt_decrement(cur, 0, &i)) + goto done; + ASSERT(i == 1); + if (error = xfs_bmbt_update(cur, LEFT.br_startoff, + LEFT.br_startblock, + LEFT.br_blockcount + + PREV.br_blockcount + + RIGHT.br_blockcount, LEFT.br_state)) + goto done; + } + *dnew = 0; + break; + + case MASK3(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG): + /* + * Filling in all of a previously delayed allocation extent. + * The left neighbor is contiguous, the right is not. + */ + xfs_bmap_trace_pre_update(fname, "LF|RF|LC", ip, idx - 1, + XFS_DATA_FORK); + xfs_bmbt_set_blockcount(ep - 1, + LEFT.br_blockcount + PREV.br_blockcount); + xfs_bmap_trace_post_update(fname, "LF|RF|LC", ip, idx - 1, + XFS_DATA_FORK); + ip->i_df.if_lastex = idx - 1; + xfs_bmap_trace_delete(fname, "LF|RF|LC", ip, idx, 1, + XFS_DATA_FORK); + xfs_bmap_delete_exlist(ip, idx, 1, XFS_DATA_FORK); + if (cur == NULL) + rval = XFS_ILOG_DEXT; + else { + rval = 0; + if (error = xfs_bmbt_lookup_eq(cur, LEFT.br_startoff, + LEFT.br_startblock, LEFT.br_blockcount, + &i)) + goto done; + ASSERT(i == 1); + if (error = xfs_bmbt_update(cur, LEFT.br_startoff, + LEFT.br_startblock, + LEFT.br_blockcount + + PREV.br_blockcount, LEFT.br_state)) + goto done; + } + *dnew = 0; + break; + + case MASK3(LEFT_FILLING, RIGHT_FILLING, RIGHT_CONTIG): + /* + * Filling in all of a previously delayed allocation extent. + * The right neighbor is contiguous, the left is not. + */ + xfs_bmap_trace_pre_update(fname, "LF|RF|RC", ip, idx, + XFS_DATA_FORK); + xfs_bmbt_set_startblock(ep, new->br_startblock); + xfs_bmbt_set_blockcount(ep, + PREV.br_blockcount + RIGHT.br_blockcount); + xfs_bmap_trace_post_update(fname, "LF|RF|RC", ip, idx, + XFS_DATA_FORK); + ip->i_df.if_lastex = idx; + xfs_bmap_trace_delete(fname, "LF|RF|RC", ip, idx + 1, 1, + XFS_DATA_FORK); + xfs_bmap_delete_exlist(ip, idx + 1, 1, XFS_DATA_FORK); + if (cur == NULL) + rval = XFS_ILOG_DEXT; + else { + rval = 0; + if (error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff, + RIGHT.br_startblock, + RIGHT.br_blockcount, &i)) + goto done; + ASSERT(i == 1); + if (error = xfs_bmbt_update(cur, PREV.br_startoff, + new->br_startblock, + PREV.br_blockcount + + RIGHT.br_blockcount, PREV.br_state)) + goto done; + } + *dnew = 0; + break; + + case MASK2(LEFT_FILLING, RIGHT_FILLING): + /* + * Filling in all of a previously delayed allocation extent. + * Neither the left nor right neighbors are contiguous with + * the new one. + */ + xfs_bmap_trace_pre_update(fname, "LF|RF", ip, idx, + XFS_DATA_FORK); + xfs_bmbt_set_startblock(ep, new->br_startblock); + xfs_bmap_trace_post_update(fname, "LF|RF", ip, idx, + XFS_DATA_FORK); + ip->i_df.if_lastex = idx; + ip->i_d.di_nextents++; + if (cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { + rval = XFS_ILOG_CORE; + if (error = xfs_bmbt_lookup_eq(cur, new->br_startoff, + new->br_startblock, new->br_blockcount, + &i)) + goto done; + ASSERT(i == 0); + cur->bc_rec.b.br_state = XFS_EXT_NORM; + if (error = xfs_bmbt_insert(cur, &i)) + goto done; + ASSERT(i == 1); + } + *dnew = 0; + break; + + case MASK2(LEFT_FILLING, LEFT_CONTIG): + /* + * Filling in the first part of a previous delayed allocation. + * The left neighbor is contiguous. + */ + xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx - 1, + XFS_DATA_FORK); + xfs_bmbt_set_blockcount(ep - 1, + LEFT.br_blockcount + new->br_blockcount); + xfs_bmbt_set_startoff(ep, + PREV.br_startoff + new->br_blockcount); + xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx - 1, + XFS_DATA_FORK); + temp = PREV.br_blockcount - new->br_blockcount; + xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx, + XFS_DATA_FORK); + xfs_bmbt_set_blockcount(ep, temp); + ip->i_df.if_lastex = idx - 1; + if (cur == NULL) + rval = XFS_ILOG_DEXT; + else { + rval = 0; + if (error = xfs_bmbt_lookup_eq(cur, LEFT.br_startoff, + LEFT.br_startblock, LEFT.br_blockcount, + &i)) + goto done; + ASSERT(i == 1); + if (error = xfs_bmbt_update(cur, LEFT.br_startoff, + LEFT.br_startblock, + LEFT.br_blockcount + + new->br_blockcount, + LEFT.br_state)) + goto done; + } + temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), + STARTBLOCKVAL(PREV.br_startblock)); + xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp)); + xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx, + XFS_DATA_FORK); + *dnew = temp; + break; + + case MASK(LEFT_FILLING): + /* + * Filling in the first part of a previous delayed allocation. + * The left neighbor is not contiguous. + */ + xfs_bmap_trace_pre_update(fname, "LF", ip, idx, XFS_DATA_FORK); + xfs_bmbt_set_startoff(ep, new_endoff); + temp = PREV.br_blockcount - new->br_blockcount; + xfs_bmbt_set_blockcount(ep, temp); + xfs_bmap_trace_insert(fname, "LF", ip, idx, 1, new, NULL, + XFS_DATA_FORK); + xfs_bmap_insert_exlist(ip, idx, 1, new, XFS_DATA_FORK); + ip->i_df.if_lastex = idx; + ip->i_d.di_nextents++; + if (cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { + rval = XFS_ILOG_CORE; + if (error = xfs_bmbt_lookup_eq(cur, new->br_startoff, + new->br_startblock, new->br_blockcount, + &i)) + goto done; + ASSERT(i == 0); + cur->bc_rec.b.br_state = XFS_EXT_NORM; + if (error = xfs_bmbt_insert(cur, &i)) + goto done; + ASSERT(i == 1); + } + if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && + ip->i_d.di_nextents > ip->i_df.if_ext_max) { + error = xfs_bmap_extents_to_btree(ip->i_transp, ip, + first, flist, &cur, 1, &tmp_rval, + XFS_DATA_FORK); + rval |= tmp_rval; + if (error) + goto done; + } + temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), + STARTBLOCKVAL(PREV.br_startblock) - + (cur ? cur->bc_private.b.allocated : 0)); + base = ip->i_df.if_u1.if_extents; + ep = &base[idx + 1]; + xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp)); + xfs_bmap_trace_post_update(fname, "LF", ip, idx + 1, + XFS_DATA_FORK); + *dnew = temp; + break; + + case MASK2(RIGHT_FILLING, RIGHT_CONTIG): + /* + * Filling in the last part of a previous delayed allocation. + * The right neighbor is contiguous with the new allocation. + */ + temp = PREV.br_blockcount - new->br_blockcount; + xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx, + XFS_DATA_FORK); + xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx + 1, + XFS_DATA_FORK); + xfs_bmbt_set_blockcount(ep, temp); + xfs_bmbt_set_allf(ep + 1, new->br_startoff, new->br_startblock, + new->br_blockcount + RIGHT.br_blockcount, + RIGHT.br_state); + xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx + 1, + XFS_DATA_FORK); + ip->i_df.if_lastex = idx + 1; + if (cur == NULL) + rval = XFS_ILOG_DEXT; + else { + rval = 0; + if (error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff, + RIGHT.br_startblock, + RIGHT.br_blockcount, &i)) + goto done; + ASSERT(i == 1); + if (error = xfs_bmbt_update(cur, new->br_startoff, + new->br_startblock, + new->br_blockcount + + RIGHT.br_blockcount, + RIGHT.br_state)) + goto done; + } + temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), + STARTBLOCKVAL(PREV.br_startblock)); + xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp)); + xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx, + XFS_DATA_FORK); + *dnew = temp; + break; + + case MASK(RIGHT_FILLING): + /* + * Filling in the last part of a previous delayed allocation. + * The right neighbor is not contiguous. + */ + temp = PREV.br_blockcount - new->br_blockcount; + xfs_bmap_trace_pre_update(fname, "RF", ip, idx, XFS_DATA_FORK); + xfs_bmbt_set_blockcount(ep, temp); + xfs_bmap_trace_insert(fname, "RF", ip, idx + 1, 1, + new, NULL, XFS_DATA_FORK); + xfs_bmap_insert_exlist(ip, idx + 1, 1, new, XFS_DATA_FORK); + ip->i_df.if_lastex = idx + 1; + ip->i_d.di_nextents++; + if (cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { + rval = XFS_ILOG_CORE; + if (error = xfs_bmbt_lookup_eq(cur, new->br_startoff, + new->br_startblock, new->br_blockcount, + &i)) + goto done; + ASSERT(i == 0); + cur->bc_rec.b.br_state = XFS_EXT_NORM; + if (error = xfs_bmbt_insert(cur, &i)) + goto done; + ASSERT(i == 1); + } + if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && + ip->i_d.di_nextents > ip->i_df.if_ext_max) { + error = xfs_bmap_extents_to_btree(ip->i_transp, ip, + first, flist, &cur, 1, &tmp_rval, + XFS_DATA_FORK); + rval |= tmp_rval; + if (error) + goto done; + } + temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), + STARTBLOCKVAL(PREV.br_startblock) - + (cur ? cur->bc_private.b.allocated : 0)); + base = ip->i_df.if_u1.if_extents; + ep = &base[idx]; + xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp)); + xfs_bmap_trace_post_update(fname, "RF", ip, idx, XFS_DATA_FORK); + *dnew = temp; + break; + + case 0: + /* + * Filling in the middle part of a previous delayed allocation. + * Contiguity is impossible here. + * This case is avoided almost all the time. + */ + temp = new->br_startoff - PREV.br_startoff; + xfs_bmap_trace_pre_update(fname, "0", ip, idx, XFS_DATA_FORK); + xfs_bmbt_set_blockcount(ep, temp); + r[0] = *new; + r[1].br_startoff = new_endoff; + temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff; + r[1].br_blockcount = temp2; + xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 2, &r[0], &r[1], + XFS_DATA_FORK); + xfs_bmap_insert_exlist(ip, idx + 1, 2, &r[0], XFS_DATA_FORK); + ip->i_df.if_lastex = idx + 1; + ip->i_d.di_nextents++; + if (cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { + rval = XFS_ILOG_CORE; + if (error = xfs_bmbt_lookup_eq(cur, new->br_startoff, + new->br_startblock, new->br_blockcount, + &i)) + goto done; + ASSERT(i == 0); + cur->bc_rec.b.br_state = XFS_EXT_NORM; + if (error = xfs_bmbt_insert(cur, &i)) + goto done; + ASSERT(i == 1); + } + if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && + ip->i_d.di_nextents > ip->i_df.if_ext_max) { + error = xfs_bmap_extents_to_btree(ip->i_transp, ip, + first, flist, &cur, 1, &tmp_rval, + XFS_DATA_FORK); + rval |= tmp_rval; + if (error) + goto done; + } + temp = xfs_bmap_worst_indlen(ip, temp); + temp2 = xfs_bmap_worst_indlen(ip, temp2); + diff = (int)(temp + temp2 - STARTBLOCKVAL(PREV.br_startblock) - + (cur ? cur->bc_private.b.allocated : 0)); + if (diff > 0 && + xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS, -diff, rsvd)) { + /* + * Ick gross gag me with a spoon. + */ + ASSERT(0); /* want to see if this ever happens! */ + while (diff > 0) { + if (temp) { + temp--; + diff--; + if (!diff || + !xfs_mod_incore_sb(ip->i_mount, + XFS_SBS_FDBLOCKS, -diff, rsvd)) + break; + } + if (temp2) { + temp2--; + diff--; + if (!diff || + !xfs_mod_incore_sb(ip->i_mount, + XFS_SBS_FDBLOCKS, -diff, rsvd)) + break; + } + } + } + base = ip->i_df.if_u1.if_extents; + ep = &base[idx]; + xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp)); + xfs_bmap_trace_post_update(fname, "0", ip, idx, XFS_DATA_FORK); + xfs_bmap_trace_pre_update(fname, "0", ip, idx + 2, + XFS_DATA_FORK); + xfs_bmbt_set_startblock(ep + 2, NULLSTARTBLOCK((int)temp2)); + xfs_bmap_trace_post_update(fname, "0", ip, idx + 2, + XFS_DATA_FORK); + *dnew = temp + temp2; + break; + + case MASK3(LEFT_FILLING, LEFT_CONTIG, RIGHT_CONTIG): + case MASK3(RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG): + case MASK2(LEFT_FILLING, RIGHT_CONTIG): + case MASK2(RIGHT_FILLING, LEFT_CONTIG): + case MASK2(LEFT_CONTIG, RIGHT_CONTIG): + case MASK(LEFT_CONTIG): + case MASK(RIGHT_CONTIG): + /* + * These cases are all impossible. + */ + ASSERT(0); + } + *curp = cur; +done: + *logflagsp = rval; + return error; +#undef LEFT +#undef RIGHT +#undef PREV +#undef MASK +#undef MASK2 +#undef MASK3 +#undef MASK4 +#undef STATE_SET +#undef STATE_TEST +#undef STATE_SET_TEST +#undef SWITCH_STATE +} + +/* + * Called by xfs_bmap_add_extent to handle cases converting an unwritten + * allocation to a real allocation or vice versa. + */ +STATIC int /* error */ +xfs_bmap_add_extent_unwritten_real( + xfs_inode_t *ip, /* incore inode pointer */ + xfs_extnum_t idx, /* extent number to update/insert */ + xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ + xfs_bmbt_irec_t *new, /* new data to put in extent list */ + int *logflagsp) /* inode logging flags */ +{ + xfs_bmbt_rec_t *base; /* base of extent entry list */ + xfs_btree_cur_t *cur; /* btree cursor */ + xfs_bmbt_rec_t *ep; /* extent entry for idx */ + int error; /* error return value */ +#ifdef XFS_BMAP_TRACE + static char fname[] = "xfs_bmap_add_extent_unwritten_real"; +#endif + int i; /* temp state */ + xfs_fileoff_t new_endoff; /* end offset of new entry */ + xfs_exntst_t newext; /* new extent state */ + xfs_exntst_t oldext; /* old extent state */ + xfs_bmbt_irec_t r[3]; /* neighbor extent entries */ + /* left is 0, right is 1, prev is 2 */ + int rval; /* return value (logging flags) */ + int state = 0;/* state bits, accessed thru macros */ + enum { /* bit number definitions for state */ + LEFT_CONTIG, RIGHT_CONTIG, + LEFT_FILLING, RIGHT_FILLING, + LEFT_DELAY, RIGHT_DELAY, + LEFT_VALID, RIGHT_VALID + }; + +#define LEFT r[0] +#define RIGHT r[1] +#define PREV r[2] +#define MASK(b) (1 << (b)) +#define MASK2(a,b) (MASK(a) | MASK(b)) +#define MASK3(a,b,c) (MASK2(a,b) | MASK(c)) +#define MASK4(a,b,c,d) (MASK3(a,b,c) | MASK(d)) +#define STATE_SET(b,v) ((v) ? (state |= MASK(b)) : (state &= ~MASK(b))) +#define STATE_TEST(b) (state & MASK(b)) +#define STATE_SET_TEST(b,v) ((v) ? ((state |= MASK(b)), 1) : \ + ((state &= ~MASK(b)), 0)) +#define SWITCH_STATE \ + (state & MASK4(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG)) + + /* + * Set up a bunch of variables to make the tests simpler. + */ + error = 0; + cur = *curp; + base = ip->i_df.if_u1.if_extents; + ep = &base[idx]; + xfs_bmbt_get_all(ep, &PREV); + newext = new->br_state; + oldext = (newext == XFS_EXT_UNWRITTEN) ? + XFS_EXT_NORM : XFS_EXT_UNWRITTEN; + ASSERT(PREV.br_state == oldext); + new_endoff = new->br_startoff + new->br_blockcount; + ASSERT(PREV.br_startoff <= new->br_startoff); + ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); + /* + * Set flags determining what part of the previous oldext allocation + * extent is being replaced by a newext allocation. + */ + STATE_SET(LEFT_FILLING, PREV.br_startoff == new->br_startoff); + STATE_SET(RIGHT_FILLING, + PREV.br_startoff + PREV.br_blockcount == new_endoff); + /* + * Check and set flags if this segment has a left neighbor. + * Don't set contiguous if the combined extent would be too large. + */ + if (STATE_SET_TEST(LEFT_VALID, idx > 0)) { + xfs_bmbt_get_all(ep - 1, &LEFT); + STATE_SET(LEFT_DELAY, ISNULLSTARTBLOCK(LEFT.br_startblock)); + } + STATE_SET(LEFT_CONTIG, + STATE_TEST(LEFT_VALID) && !STATE_TEST(LEFT_DELAY) && + LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && + LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && + LEFT.br_state == newext && + LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN); + /* + * Check and set flags if this segment has a right neighbor. + * Don't set contiguous if the combined extent would be too large. + * Also check for all-three-contiguous being too large. + */ + if (STATE_SET_TEST(RIGHT_VALID, + idx < + ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1)) { + xfs_bmbt_get_all(ep + 1, &RIGHT); + STATE_SET(RIGHT_DELAY, ISNULLSTARTBLOCK(RIGHT.br_startblock)); + } + STATE_SET(RIGHT_CONTIG, + STATE_TEST(RIGHT_VALID) && !STATE_TEST(RIGHT_DELAY) && + new_endoff == RIGHT.br_startoff && + new->br_startblock + new->br_blockcount == + RIGHT.br_startblock && + newext == RIGHT.br_state && + new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN && + ((state & MASK3(LEFT_CONTIG, LEFT_FILLING, RIGHT_FILLING)) != + MASK3(LEFT_CONTIG, LEFT_FILLING, RIGHT_FILLING) || + LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount + <= MAXEXTLEN)); + /* + * Switch out based on the FILLING and CONTIG state bits. + */ + switch (SWITCH_STATE) { + + case MASK4(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG): + /* + * Setting all of a previous oldext extent to newext. + * The left and right neighbors are both contiguous with new. + */ + xfs_bmap_trace_pre_update(fname, "LF|RF|LC|RC", ip, idx - 1, + XFS_DATA_FORK); + xfs_bmbt_set_blockcount(ep - 1, + LEFT.br_blockcount + PREV.br_blockcount + + RIGHT.br_blockcount); + xfs_bmap_trace_post_update(fname, "LF|RF|LC|RC", ip, idx - 1, + XFS_DATA_FORK); + xfs_bmap_trace_delete(fname, "LF|RF|LC|RC", ip, idx, 2, + XFS_DATA_FORK); + xfs_bmap_delete_exlist(ip, idx, 2, XFS_DATA_FORK); + ip->i_df.if_lastex = idx - 1; + ip->i_d.di_nextents -= 2; + if (cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { + rval = XFS_ILOG_CORE; + if (error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff, + RIGHT.br_startblock, + RIGHT.br_blockcount, &i)) + goto done; + ASSERT(i == 1); + if (error = xfs_bmbt_delete(cur, 0, &i)) + goto done; + ASSERT(i == 1); + if (error = xfs_bmbt_decrement(cur, 0, &i)) + goto done; + ASSERT(i == 1); + if (error = xfs_bmbt_delete(cur, 0, &i)) + goto done; + ASSERT(i == 1); + if (error = xfs_bmbt_decrement(cur, 0, &i)) + goto done; + ASSERT(i == 1); + if (error = xfs_bmbt_update(cur, LEFT.br_startoff, + LEFT.br_startblock, + LEFT.br_blockcount + PREV.br_blockcount + + RIGHT.br_blockcount, LEFT.br_state)) + goto done; + } + break; + + case MASK3(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG): + /* + * Setting all of a previous oldext extent to newext. + * The left neighbor is contiguous, the right is not. + */ + xfs_bmap_trace_pre_update(fname, "LF|RF|LC", ip, idx - 1, + XFS_DATA_FORK); + xfs_bmbt_set_blockcount(ep - 1, + LEFT.br_blockcount + PREV.br_blockcount); + xfs_bmap_trace_post_update(fname, "LF|RF|LC", ip, idx - 1, + XFS_DATA_FORK); + ip->i_df.if_lastex = idx - 1; + xfs_bmap_trace_delete(fname, "LF|RF|LC", ip, idx, 1, + XFS_DATA_FORK); + xfs_bmap_delete_exlist(ip, idx, 1, XFS_DATA_FORK); + ip->i_d.di_nextents--; + if (cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { + rval = XFS_ILOG_CORE; + if (error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, + PREV.br_startblock, PREV.br_blockcount, + &i)) + goto done; + ASSERT(i == 1); + if (error = xfs_bmbt_delete(cur, 0, &i)) + goto done; + ASSERT(i == 1); + if (error = xfs_bmbt_decrement(cur, 0, &i)) + goto done; + ASSERT(i == 1); + if (error = xfs_bmbt_update(cur, LEFT.br_startoff, + LEFT.br_startblock, + LEFT.br_blockcount + PREV.br_blockcount, + LEFT.br_state)) + goto done; + } + break; + + case MASK3(LEFT_FILLING, RIGHT_FILLING, RIGHT_CONTIG): + /* + * Setting all of a previous oldext extent to newext. + * The right neighbor is contiguous, the left is not. + */ + xfs_bmap_trace_pre_update(fname, "LF|RF|RC", ip, idx, + XFS_DATA_FORK); + xfs_bmbt_set_blockcount(ep, + PREV.br_blockcount + RIGHT.br_blockcount); + xfs_bmbt_set_state(ep, newext); + xfs_bmap_trace_post_update(fname, "LF|RF|RC", ip, idx, + XFS_DATA_FORK); + ip->i_df.if_lastex = idx; + xfs_bmap_trace_delete(fname, "LF|RF|RC", ip, idx + 1, 1, + XFS_DATA_FORK); + xfs_bmap_delete_exlist(ip, idx + 1, 1, XFS_DATA_FORK); + ip->i_d.di_nextents--; + if (cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { + rval = XFS_ILOG_CORE; + if (error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff, + RIGHT.br_startblock, + RIGHT.br_blockcount, &i)) + goto done; + ASSERT(i == 1); + if (error = xfs_bmbt_delete(cur, 0, &i)) + goto done; + ASSERT(i == 1); + if (error = xfs_bmbt_decrement(cur, 0, &i)) + goto done; + ASSERT(i == 1); + if (error = xfs_bmbt_update(cur, new->br_startoff, + new->br_startblock, + new->br_blockcount + RIGHT.br_blockcount, + newext)) + goto done; + } + break; + + case MASK2(LEFT_FILLING, RIGHT_FILLING): + /* + * Setting all of a previous oldext extent to newext. + * Neither the left nor right neighbors are contiguous with + * the new one. + */ + xfs_bmap_trace_pre_update(fname, "LF|RF", ip, idx, + XFS_DATA_FORK); + xfs_bmbt_set_state(ep, newext); + xfs_bmap_trace_post_update(fname, "LF|RF", ip, idx, + XFS_DATA_FORK); + ip->i_df.if_lastex = idx; + if (cur == NULL) + rval = XFS_ILOG_DEXT; + else { + rval = 0; + if (error = xfs_bmbt_lookup_eq(cur, new->br_startoff, + new->br_startblock, new->br_blockcount, + &i)) + goto done; + ASSERT(i == 1); + if (error = xfs_bmbt_update(cur, new->br_startoff, + new->br_startblock, new->br_blockcount, + newext)) + goto done; + } + break; + + case MASK2(LEFT_FILLING, LEFT_CONTIG): + /* + * Setting the first part of a previous oldext extent to newext. + * The left neighbor is contiguous. + */ + xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx - 1, + XFS_DATA_FORK); + xfs_bmbt_set_blockcount(ep - 1, + LEFT.br_blockcount + new->br_blockcount); + xfs_bmbt_set_startoff(ep, + PREV.br_startoff + new->br_blockcount); + xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx - 1, + XFS_DATA_FORK); + xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx, + XFS_DATA_FORK); + xfs_bmbt_set_startblock(ep, + new->br_startblock + new->br_blockcount); + xfs_bmbt_set_blockcount(ep, + PREV.br_blockcount - new->br_blockcount); + xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx, + XFS_DATA_FORK); + ip->i_df.if_lastex = idx - 1; + if (cur == NULL) + rval = XFS_ILOG_DEXT; + else { + rval = 0; + if (error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, + PREV.br_startblock, PREV.br_blockcount, + &i)) + goto done; + ASSERT(i == 1); + if (error = xfs_bmbt_update(cur, + PREV.br_startoff + new->br_blockcount, + PREV.br_startblock + new->br_blockcount, + PREV.br_blockcount - new->br_blockcount, + oldext)) + goto done; + if (error = xfs_bmbt_decrement(cur, 0, &i)) + goto done; + if (xfs_bmbt_update(cur, LEFT.br_startoff, + LEFT.br_startblock, + LEFT.br_blockcount + new->br_blockcount, + LEFT.br_state)) + goto done; + } + break; + + case MASK(LEFT_FILLING): + /* + * Setting the first part of a previous oldext extent to newext. + * The left neighbor is not contiguous. + */ + xfs_bmap_trace_pre_update(fname, "LF", ip, idx, XFS_DATA_FORK); + ASSERT(ep && xfs_bmbt_get_state(ep) == oldext); + xfs_bmbt_set_startoff(ep, new_endoff); + xfs_bmbt_set_blockcount(ep, + PREV.br_blockcount - new->br_blockcount); + xfs_bmbt_set_startblock(ep, + new->br_startblock + new->br_blockcount); + xfs_bmap_trace_post_update(fname, "LF", ip, idx, XFS_DATA_FORK); + xfs_bmap_trace_insert(fname, "LF", ip, idx, 1, new, NULL, + XFS_DATA_FORK); + xfs_bmap_insert_exlist(ip, idx, 1, new, XFS_DATA_FORK); + ip->i_df.if_lastex = idx; + ip->i_d.di_nextents++; + if (cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { + rval = XFS_ILOG_CORE; + if (error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, + PREV.br_startblock, PREV.br_blockcount, + &i)) + goto done; + ASSERT(i == 1); + if (error = xfs_bmbt_update(cur, + PREV.br_startoff + new->br_blockcount, + PREV.br_startblock + new->br_blockcount, + PREV.br_blockcount - new->br_blockcount, + oldext)) + goto done; + cur->bc_rec.b = *new; + if (error = xfs_bmbt_insert(cur, &i)) + goto done; + ASSERT(i == 1); + } + break; + + case MASK2(RIGHT_FILLING, RIGHT_CONTIG): + /* + * Setting the last part of a previous oldext extent to newext. + * The right neighbor is contiguous with the new allocation. + */ + xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx, + XFS_DATA_FORK); + xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx + 1, + XFS_DATA_FORK); + xfs_bmbt_set_blockcount(ep, + PREV.br_blockcount - new->br_blockcount); + xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx, + XFS_DATA_FORK); + xfs_bmbt_set_allf(ep + 1, new->br_startoff, new->br_startblock, + new->br_blockcount + RIGHT.br_blockcount, newext); + xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx + 1, + XFS_DATA_FORK); + ip->i_df.if_lastex = idx + 1; + if (cur == NULL) + rval = XFS_ILOG_DEXT; + else { + rval = 0; + if (error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, + PREV.br_startblock, + PREV.br_blockcount, &i)) + goto done; + ASSERT(i == 1); + if (error = xfs_bmbt_update(cur, PREV.br_startoff, + PREV.br_startblock, + PREV.br_blockcount - new->br_blockcount, + oldext)) + goto done; + if (error = xfs_bmbt_increment(cur, 0, &i)) + goto done; + if (error = xfs_bmbt_update(cur, new->br_startoff, + new->br_startblock, + new->br_blockcount + RIGHT.br_blockcount, + newext)) + goto done; + } + break; + + case MASK(RIGHT_FILLING): + /* + * Setting the last part of a previous oldext extent to newext. + * The right neighbor is not contiguous. + */ + xfs_bmap_trace_pre_update(fname, "RF", ip, idx, XFS_DATA_FORK); + xfs_bmbt_set_blockcount(ep, + PREV.br_blockcount - new->br_blockcount); + xfs_bmap_trace_post_update(fname, "RF", ip, idx, XFS_DATA_FORK); + xfs_bmap_trace_insert(fname, "RF", ip, idx + 1, 1, + new, NULL, XFS_DATA_FORK); + xfs_bmap_insert_exlist(ip, idx + 1, 1, new, XFS_DATA_FORK); + ip->i_df.if_lastex = idx + 1; + ip->i_d.di_nextents++; + if (cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { + rval = XFS_ILOG_CORE; + if (error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, + PREV.br_startblock, PREV.br_blockcount, + &i)) + goto done; + ASSERT(i == 1); + if (error = xfs_bmbt_update(cur, PREV.br_startoff, + PREV.br_startblock, + PREV.br_blockcount - new->br_blockcount, + oldext)) + goto done; + if (error = xfs_bmbt_lookup_eq(cur, new->br_startoff, + new->br_startblock, new->br_blockcount, + &i)) + goto done; + ASSERT(i == 0); + cur->bc_rec.b.br_state = XFS_EXT_NORM; + if (error = xfs_bmbt_insert(cur, &i)) + goto done; + ASSERT(i == 1); + } + break; + + case 0: + /* + * Setting the middle part of a previous oldext extent to + * newext. Contiguity is impossible here. + * One extent becomes three extents. + */ + xfs_bmap_trace_pre_update(fname, "0", ip, idx, XFS_DATA_FORK); + xfs_bmbt_set_blockcount(ep, + new->br_startoff - PREV.br_startoff); + xfs_bmap_trace_post_update(fname, "0", ip, idx, XFS_DATA_FORK); + r[0] = *new; + r[1].br_startoff = new_endoff; + r[1].br_blockcount = + PREV.br_startoff + PREV.br_blockcount - new_endoff; + r[1].br_startblock = new->br_startblock + new->br_blockcount; + r[1].br_state = oldext; + xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 2, &r[0], &r[1], + XFS_DATA_FORK); + xfs_bmap_insert_exlist(ip, idx + 1, 2, &r[0], XFS_DATA_FORK); + ip->i_df.if_lastex = idx + 1; + ip->i_d.di_nextents += 2; + if (cur == NULL) + rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; + else { + rval = XFS_ILOG_CORE; + if (error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff, + PREV.br_startblock, PREV.br_blockcount, + &i)) + goto done; + ASSERT(i == 1); + /* new right extent - oldext */ + if (error = xfs_bmbt_update(cur, r[1].br_startoff, + r[1].br_startblock, r[1].br_blockcount, + r[1].br_state)) + goto done; + /* new left extent - oldext */ + PREV.br_blockcount = + new->br_startoff - PREV.br_startoff; + cur->bc_rec.b = PREV; + if (error = xfs_bmbt_insert(cur, &i)) + goto done; + ASSERT(i == 1); + if (error = xfs_bmbt_increment(cur, 0, &i)) + goto done; + ASSERT(i == 1); + /* new middle extent - newext */ + cur->bc_rec.b = *new; + if (error = xfs_bmbt_insert(cur, &i)) + goto done; + ASSERT(i == 1); + } + break; + + case MASK3(LEFT_FILLING, LEFT_CONTIG, RIGHT_CONTIG): + case MASK3(RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG): + case MASK2(LEFT_FILLING, RIGHT_CONTIG): + case MASK2(RIGHT_FILLING, LEFT_CONTIG): + case MASK2(LEFT_CONTIG, RIGHT_CONTIG): + case MASK(LEFT_CONTIG): + case MASK(RIGHT_CONTIG): + /* + * These cases are all impossible. + */ + ASSERT(0); + } + *curp = cur; +done: + *logflagsp = rval; + return error; +#undef LEFT +#undef RIGHT +#undef PREV +#undef MASK +#undef MASK2 +#undef MASK3 +#undef MASK4 +#undef STATE_SET +#undef STATE_TEST +#undef STATE_SET_TEST +#undef SWITCH_STATE +} + +/* + * Called by xfs_bmap_add_extent to handle cases converting a hole + * to a delayed allocation. + */ +/*ARGSUSED*/ +STATIC int /* error */ +xfs_bmap_add_extent_hole_delay( + xfs_inode_t *ip, /* incore inode pointer */ + xfs_extnum_t idx, /* extent number to update/insert */ + xfs_btree_cur_t *cur, /* if null, not a btree */ + xfs_bmbt_irec_t *new, /* new data to put in extent list */ + int *logflagsp, /* inode logging flags */ + int rsvd) /* OK to allocate reserved blocks */ +{ + xfs_bmbt_rec_t *base; /* base of extent entry list */ + xfs_bmbt_rec_t *ep; /* extent list entry for idx */ +#ifdef XFS_BMAP_TRACE + static char fname[] = "xfs_bmap_add_extent_hole_delay"; +#endif + xfs_bmbt_irec_t left; /* left neighbor extent entry */ + xfs_filblks_t newlen; /* new indirect size */ + xfs_filblks_t oldlen; /* old indirect size */ + xfs_bmbt_irec_t right; /* right neighbor extent entry */ + int state; /* state bits, accessed thru macros */ + xfs_filblks_t temp; /* temp for indirect calculations */ + enum { /* bit number definitions for state */ + LEFT_CONTIG, RIGHT_CONTIG, + LEFT_DELAY, RIGHT_DELAY, + LEFT_VALID, RIGHT_VALID + }; + +#define MASK(b) (1 << (b)) +#define MASK2(a,b) (MASK(a) | MASK(b)) +#define STATE_SET(b,v) ((v) ? (state |= MASK(b)) : (state &= ~MASK(b))) +#define STATE_TEST(b) (state & MASK(b)) +#define STATE_SET_TEST(b,v) ((v) ? ((state |= MASK(b)), 1) : \ + ((state &= ~MASK(b)), 0)) +#define SWITCH_STATE (state & MASK2(LEFT_CONTIG, RIGHT_CONTIG)) + + base = ip->i_df.if_u1.if_extents; + ep = &base[idx]; + state = 0; + ASSERT(ISNULLSTARTBLOCK(new->br_startblock)); + /* + * Check and set flags if this segment has a left neighbor + */ + if (STATE_SET_TEST(LEFT_VALID, idx > 0)) { + xfs_bmbt_get_all(ep - 1, &left); + STATE_SET(LEFT_DELAY, ISNULLSTARTBLOCK(left.br_startblock)); + } + /* + * Check and set flags if the current (right) segment exists. + * If it doesn't exist, we're converting the hole at end-of-file. + */ + if (STATE_SET_TEST(RIGHT_VALID, + idx < + ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) { + xfs_bmbt_get_all(ep, &right); + STATE_SET(RIGHT_DELAY, ISNULLSTARTBLOCK(right.br_startblock)); + } + /* + * Set contiguity flags on the left and right neighbors. + * Don't let extents get too large, even if the pieces are contiguous. + */ + STATE_SET(LEFT_CONTIG, + STATE_TEST(LEFT_VALID) && STATE_TEST(LEFT_DELAY) && + left.br_startoff + left.br_blockcount == new->br_startoff && + left.br_blockcount + new->br_blockcount <= MAXEXTLEN); + STATE_SET(RIGHT_CONTIG, + STATE_TEST(RIGHT_VALID) && STATE_TEST(RIGHT_DELAY) && + new->br_startoff + new->br_blockcount == right.br_startoff && + new->br_blockcount + right.br_blockcount <= MAXEXTLEN && + (!STATE_TEST(LEFT_CONTIG) || + (left.br_blockcount + new->br_blockcount + + right.br_blockcount <= MAXEXTLEN))); + /* + * Switch out based on the contiguity flags. + */ + switch (SWITCH_STATE) { + + case MASK2(LEFT_CONTIG, RIGHT_CONTIG): + /* + * New allocation is contiguous with delayed allocations + * on the left and on the right. + * Merge all three into a single extent list entry. + */ + temp = left.br_blockcount + new->br_blockcount + + right.br_blockcount; + xfs_bmap_trace_pre_update(fname, "LC|RC", ip, idx - 1, + XFS_DATA_FORK); + xfs_bmbt_set_blockcount(ep - 1, temp); + oldlen = STARTBLOCKVAL(left.br_startblock) + + STARTBLOCKVAL(new->br_startblock) + + STARTBLOCKVAL(right.br_startblock); + newlen = xfs_bmap_worst_indlen(ip, temp); + xfs_bmbt_set_startblock(ep - 1, NULLSTARTBLOCK((int)newlen)); + xfs_bmap_trace_post_update(fname, "LC|RC", ip, idx - 1, + XFS_DATA_FORK); + xfs_bmap_trace_delete(fname, "LC|RC", ip, idx, 1, + XFS_DATA_FORK); + xfs_bmap_delete_exlist(ip, idx, 1, XFS_DATA_FORK); + ip->i_df.if_lastex = idx - 1; + break; + + case MASK(LEFT_CONTIG): + /* + * New allocation is contiguous with a delayed allocation + * on the left. + * Merge the new allocation with the left neighbor. + */ + temp = left.br_blockcount + new->br_blockcount; + xfs_bmap_trace_pre_update(fname, "LC", ip, idx - 1, + XFS_DATA_FORK); + xfs_bmbt_set_blockcount(ep - 1, temp); + oldlen = STARTBLOCKVAL(left.br_startblock) + + STARTBLOCKVAL(new->br_startblock); + newlen = xfs_bmap_worst_indlen(ip, temp); + xfs_bmbt_set_startblock(ep - 1, NULLSTARTBLOCK((int)newlen)); + xfs_bmap_trace_post_update(fname, "LC", ip, idx - 1, + XFS_DATA_FORK); + ip->i_df.if_lastex = idx - 1; + break; + + case MASK(RIGHT_CONTIG): + /* + * New allocation is contiguous with a delayed allocation + * on the right. + * Merge the new allocation with the right neighbor. + */ + xfs_bmap_trace_pre_update(fname, "RC", ip, idx, XFS_DATA_FORK); + temp = new->br_blockcount + right.br_blockcount; + oldlen = STARTBLOCKVAL(new->br_startblock) + + STARTBLOCKVAL(right.br_startblock); + newlen = xfs_bmap_worst_indlen(ip, temp); + xfs_bmbt_set_allf(ep, new->br_startoff, + NULLSTARTBLOCK((int)newlen), temp, right.br_state); + xfs_bmap_trace_post_update(fname, "RC", ip, idx, XFS_DATA_FORK); + ip->i_df.if_lastex = idx; + break; + + case 0: + /* + * New allocation is not contiguous with another + * delayed allocation. + * Insert a new entry. + */ + oldlen = newlen = 0; + xfs_bmap_trace_insert(fname, "0", ip, idx, 1, new, NULL, + XFS_DATA_FORK); + xfs_bmap_insert_exlist(ip, idx, 1, new, XFS_DATA_FORK); + ip->i_df.if_lastex = idx; + break; + } + if (oldlen != newlen) { + ASSERT(oldlen > newlen); + xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS, + (int)(oldlen - newlen), rsvd); + /* + * Nothing to do for disk quota accounting here. + */ + } + *logflagsp = 0; + return 0; +#undef MASK +#undef MASK2 +#undef STATE_SET +#undef STATE_TEST +#undef STATE_SET_TEST +#undef SWITCH_STATE +} + +/* + * Called by xfs_bmap_add_extent to handle cases converting a hole + * to a real allocation. + */ +STATIC int /* error */ +xfs_bmap_add_extent_hole_real( + xfs_inode_t *ip, /* incore inode pointer */ + xfs_extnum_t idx, /* extent number to update/insert */ + xfs_btree_cur_t *cur, /* if null, not a btree */ + xfs_bmbt_irec_t *new, /* new data to put in extent list */ + int *logflagsp, /* inode logging flags */ + int whichfork) /* data or attr fork */ +{ + xfs_bmbt_rec_t *ep; /* pointer to extent entry ins. point */ + int error; /* error return value */ +#ifdef XFS_BMAP_TRACE + static char fname[] = "xfs_bmap_add_extent_hole_real"; +#endif + int i; /* temp state */ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_bmbt_irec_t left; /* left neighbor extent entry */ + xfs_bmbt_irec_t right; /* right neighbor extent entry */ + int state; /* state bits, accessed thru macros */ + enum { /* bit number definitions for state */ + LEFT_CONTIG, RIGHT_CONTIG, + LEFT_DELAY, RIGHT_DELAY, + LEFT_VALID, RIGHT_VALID + }; + +#define MASK(b) (1 << (b)) +#define MASK2(a,b) (MASK(a) | MASK(b)) +#define STATE_SET(b,v) ((v) ? (state |= MASK(b)) : (state &= ~MASK(b))) +#define STATE_TEST(b) (state & MASK(b)) +#define STATE_SET_TEST(b,v) ((v) ? ((state |= MASK(b)), 1) : \ + ((state &= ~MASK(b)), 0)) +#define SWITCH_STATE (state & MASK2(LEFT_CONTIG, RIGHT_CONTIG)) + + ifp = XFS_IFORK_PTR(ip, whichfork); + ASSERT(idx <= ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)); + ep = &ifp->if_u1.if_extents[idx]; + state = 0; + /* + * Check and set flags if this segment has a left neighbor. + */ + if (STATE_SET_TEST(LEFT_VALID, idx > 0)) { + xfs_bmbt_get_all(ep - 1, &left); + STATE_SET(LEFT_DELAY, ISNULLSTARTBLOCK(left.br_startblock)); + } + /* + * Check and set flags if this segment has a current value. + * Not true if we're inserting into the "hole" at eof. + */ + if (STATE_SET_TEST(RIGHT_VALID, + idx < + ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) { + xfs_bmbt_get_all(ep, &right); + STATE_SET(RIGHT_DELAY, ISNULLSTARTBLOCK(right.br_startblock)); + } + /* + * We're inserting a real allocation between "left" and "right". + * Set the contiguity flags. Don't let extents get too large. + */ + STATE_SET(LEFT_CONTIG, + STATE_TEST(LEFT_VALID) && !STATE_TEST(LEFT_DELAY) && + left.br_startoff + left.br_blockcount == new->br_startoff && + left.br_startblock + left.br_blockcount == new->br_startblock && + left.br_state == new->br_state && + left.br_blockcount + new->br_blockcount <= MAXEXTLEN); + STATE_SET(RIGHT_CONTIG, + STATE_TEST(RIGHT_VALID) && !STATE_TEST(RIGHT_DELAY) && + new->br_startoff + new->br_blockcount == right.br_startoff && + new->br_startblock + new->br_blockcount == + right.br_startblock && + new->br_state == right.br_state && + new->br_blockcount + right.br_blockcount <= MAXEXTLEN && + (!STATE_TEST(LEFT_CONTIG) || + left.br_blockcount + new->br_blockcount + + right.br_blockcount <= MAXEXTLEN)); + + /* + * Select which case we're in here, and implement it. + */ + switch (SWITCH_STATE) { + + case MASK2(LEFT_CONTIG, RIGHT_CONTIG): + /* + * New allocation is contiguous with real allocations on the + * left and on the right. + * Merge all three into a single extent list entry. + */ + xfs_bmap_trace_pre_update(fname, "LC|RC", ip, idx - 1, + whichfork); + xfs_bmbt_set_blockcount(ep - 1, + left.br_blockcount + new->br_blockcount + + right.br_blockcount); + xfs_bmap_trace_post_update(fname, "LC|RC", ip, idx - 1, + whichfork); + xfs_bmap_trace_delete(fname, "LC|RC", ip, + idx, 1, whichfork); + xfs_bmap_delete_exlist(ip, idx, 1, whichfork); + ifp->if_lastex = idx - 1; + XFS_IFORK_NEXT_SET(ip, whichfork, + XFS_IFORK_NEXTENTS(ip, whichfork) - 1); + if (cur == NULL) { + *logflagsp = XFS_ILOG_CORE | XFS_ILOG_FEXT(whichfork); + return 0; + } + *logflagsp = XFS_ILOG_CORE; + if (error = xfs_bmbt_lookup_eq(cur, right.br_startoff, + right.br_startblock, right.br_blockcount, &i)) + return error; + ASSERT(i == 1); + if (error = xfs_bmbt_delete(cur, 0, &i)) + return error; + ASSERT(i == 1); + if (error = xfs_bmbt_decrement(cur, 0, &i)) + return error; + ASSERT(i == 1); + error = xfs_bmbt_update(cur, left.br_startoff, + left.br_startblock, + left.br_blockcount + new->br_blockcount + + right.br_blockcount, left.br_state); + return error; + + case MASK(LEFT_CONTIG): + /* + * New allocation is contiguous with a real allocation + * on the left. + * Merge the new allocation with the left neighbor. + */ + xfs_bmap_trace_pre_update(fname, "LC", ip, idx - 1, whichfork); + xfs_bmbt_set_blockcount(ep - 1, + left.br_blockcount + new->br_blockcount); + xfs_bmap_trace_post_update(fname, "LC", ip, idx - 1, whichfork); + ifp->if_lastex = idx - 1; + if (cur == NULL) { + *logflagsp = XFS_ILOG_FEXT(whichfork); + return 0; + } + *logflagsp = 0; + if (error = xfs_bmbt_lookup_eq(cur, left.br_startoff, + left.br_startblock, left.br_blockcount, &i)) + return error; + ASSERT(i == 1); + error = xfs_bmbt_update(cur, left.br_startoff, + left.br_startblock, + left.br_blockcount + new->br_blockcount, + left.br_state); + return error; + + case MASK(RIGHT_CONTIG): + /* + * New allocation is contiguous with a real allocation + * on the right. + * Merge the new allocation with the right neighbor. + */ + xfs_bmap_trace_pre_update(fname, "RC", ip, idx, whichfork); + xfs_bmbt_set_allf(ep, new->br_startoff, new->br_startblock, + new->br_blockcount + right.br_blockcount, + right.br_state); + xfs_bmap_trace_post_update(fname, "RC", ip, idx, whichfork); + ifp->if_lastex = idx; + if (cur == NULL) { + *logflagsp = XFS_ILOG_FEXT(whichfork); + return 0; + } + *logflagsp = 0; + if (error = xfs_bmbt_lookup_eq(cur, right.br_startoff, + right.br_startblock, right.br_blockcount, &i)) + return error; + ASSERT(i == 1); + error = xfs_bmbt_update(cur, new->br_startoff, + new->br_startblock, + new->br_blockcount + right.br_blockcount, + right.br_state); + return error; + + case 0: + /* + * New allocation is not contiguous with another + * real allocation. + * Insert a new entry. + */ + xfs_bmap_trace_insert(fname, "0", ip, idx, 1, new, NULL, + whichfork); + xfs_bmap_insert_exlist(ip, idx, 1, new, whichfork); + ifp->if_lastex = idx; + XFS_IFORK_NEXT_SET(ip, whichfork, + XFS_IFORK_NEXTENTS(ip, whichfork) + 1); + if (cur == NULL) { + *logflagsp = XFS_ILOG_CORE | XFS_ILOG_FEXT(whichfork); + return 0; + } + *logflagsp = XFS_ILOG_CORE; + if (error = xfs_bmbt_lookup_eq(cur, new->br_startoff, + new->br_startblock, new->br_blockcount, &i)) + return error; + ASSERT(i == 0); + cur->bc_rec.b.br_state = new->br_state; + if (error = xfs_bmbt_insert(cur, &i)) + return error; + ASSERT(i == 1); + return 0; + } +#undef MASK +#undef MASK2 +#undef STATE_SET +#undef STATE_TEST +#undef STATE_SET_TEST +#undef SWITCH_STATE + /* NOTREACHED */ + ASSERT(0); + return 0; /* keep gcc quite */ +} + +#define XFS_ALLOC_GAP_UNITS 4 + +/* + * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file. + * It figures out where to ask the underlying allocator to put the new extent. + */ +STATIC int /* error */ +xfs_bmap_alloc( + xfs_bmalloca_t *ap) /* bmap alloc argument struct */ +{ + xfs_fsblock_t adjust; /* adjustment to block numbers */ + xfs_alloctype_t atype; /* type for allocation routines */ + int error; /* error return value */ + xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */ + xfs_mount_t *mp; /* mount point structure */ + int nullfb; /* true if ap->firstblock isn't set */ + int rt; /* true if inode is realtime */ +#ifdef __KERNEL__ + xfs_extlen_t prod; /* product factor for allocators */ + xfs_extlen_t ralen; /* realtime allocation length */ +#endif + +#define ISLEGAL(x,y) \ + (rt ? \ + (x) < mp->m_sb.sb_rblocks : \ + XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \ + XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \ + XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks) + + /* + * Set up variables. + */ + mp = ap->ip->i_mount; + nullfb = ap->firstblock == NULLFSBLOCK; + rt = (ap->ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && ap->userdata; + fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock); +#ifdef __KERNEL__ + if (rt) { + xfs_extlen_t extsz; /* file extent size for rt */ + xfs_fileoff_t nexto; /* next file offset */ + xfs_extlen_t orig_alen; /* original ap->alen */ + xfs_fileoff_t orig_end; /* original off+len */ + xfs_fileoff_t orig_off; /* original ap->off */ + xfs_extlen_t mod_off; /* modulus calculations */ + xfs_fileoff_t prevo; /* previous file offset */ + xfs_rtblock_t rtx; /* realtime extent number */ + xfs_extlen_t temp; /* temp for rt calculations */ + + /* + * Set prod to match the realtime extent size. + */ + if (!(extsz = ap->ip->i_d.di_extsize)) + extsz = mp->m_sb.sb_rextsize; + prod = extsz / mp->m_sb.sb_rextsize; + orig_off = ap->off; + orig_alen = ap->alen; + orig_end = orig_off + orig_alen; + /* + * If the file offset is unaligned vs. the extent size + * we need to align it. This will be possible unless + * the file was previously written with a kernel that didn't + * perform this alignment. + */ + mod_off = do_mod(orig_off, extsz); + if (mod_off) { + ap->alen += mod_off; + ap->off -= mod_off; + } + /* + * Same adjustment for the end of the requested area. + */ + if (temp = (ap->alen % extsz)) + ap->alen += extsz - temp; + /* + * If the previous block overlaps with this proposed allocation + * then move the start forward without adjusting the length. + */ + prevo = + ap->prevp->br_startoff == NULLFILEOFF ? + 0 : + (ap->prevp->br_startoff + + ap->prevp->br_blockcount); + if (ap->off != orig_off && ap->off < prevo) + ap->off = prevo; + /* + * If the next block overlaps with this proposed allocation + * then move the start back without adjusting the length, + * but not before offset 0. + * This may of course make the start overlap previous block, + * and if we hit the offset 0 limit then the next block + * can still overlap too. + */ + nexto = (ap->eof || ap->gotp->br_startoff == NULLFILEOFF) ? + NULLFILEOFF : ap->gotp->br_startoff; + if (!ap->eof && + ap->off + ap->alen != orig_end && + ap->off + ap->alen > nexto) + ap->off = nexto > ap->alen ? nexto - ap->alen : 0; + /* + * If we're now overlapping the next or previous extent that + * means we can't fit an extsz piece in this hole. Just move + * the start forward to the first legal spot and set + * the length so we hit the end. + */ + if ((ap->off != orig_off && ap->off < prevo) || + (ap->off + ap->alen != orig_end && + ap->off + ap->alen > nexto)) { + ap->off = prevo; + ap->alen = nexto - prevo; + } + /* + * If the result isn't a multiple of rtextents we need to + * remove blocks until it is. + */ + if (temp = (ap->alen % mp->m_sb.sb_rextsize)) { + /* + * We're not covering the original request, or + * we won't be able to once we fix the length. + */ + if (orig_off < ap->off || + orig_end > ap->off + ap->alen || + ap->alen - temp < orig_alen) + return XFS_ERROR(EINVAL); + /* + * Try to fix it by moving the start up. + */ + if (ap->off + temp <= orig_off) { + ap->alen -= temp; + ap->off += temp; + } + /* + * Try to fix it by moving the end in. + */ + else if (ap->off + ap->alen - temp >= orig_end) + ap->alen -= temp; + /* + * Set the start to the minimum then trim the length. + */ + else { + ap->alen -= orig_off - ap->off; + ap->off = orig_off; + ap->alen -= ap->alen % mp->m_sb.sb_rextsize; + } + /* + * Result doesn't cover the request, fail it. + */ + if (orig_off < ap->off || orig_end > ap->off + ap->alen) + return XFS_ERROR(EINVAL); + } + ASSERT(ap->alen % mp->m_sb.sb_rextsize == 0); + /* + * If the offset & length are not perfectly aligned + * then kill prod, it will just get us in trouble. + */ + if (do_mod(ap->off, extsz) || ap->alen % extsz) + prod = 1; + /* + * Set ralen to be the actual requested length in rtextents. + */ + ralen = ap->alen / mp->m_sb.sb_rextsize; + /* + * If the old value was close enough to MAXEXTLEN that + * we rounded up to it, cut it back so it's legal again. + * Note that if it's a really large request (bigger than + * MAXEXTLEN), we don't hear about that number, and can't + * adjust the starting point to match it. + */ + if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN) + ralen = MAXEXTLEN / mp->m_sb.sb_rextsize; + /* + * If it's an allocation to an empty file at offset 0, + * pick an extent that will space things out in the rt area. + */ + if (ap->eof && ap->off == 0) { + error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx); + if (error) + return error; + ap->rval = rtx * mp->m_sb.sb_rextsize; + } else + ap->rval = 0; + } +#else + if (rt) + ap->rval = 0; +#endif /* __KERNEL__ */ + else if (nullfb) + ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino); + else + ap->rval = ap->firstblock; + /* + * If allocating at eof, and there's a previous real block, + * try to use it's last block as our starting point. + */ + if (ap->eof && ap->prevp->br_startoff != NULLFILEOFF && + !ISNULLSTARTBLOCK(ap->prevp->br_startblock) && + ISLEGAL(ap->prevp->br_startblock + ap->prevp->br_blockcount, + ap->prevp->br_startblock)) { + ap->rval = ap->prevp->br_startblock + ap->prevp->br_blockcount; + /* + * Adjust for the gap between prevp and us. + */ + adjust = ap->off - + (ap->prevp->br_startoff + ap->prevp->br_blockcount); + if (adjust && + ISLEGAL(ap->rval + adjust, ap->prevp->br_startblock)) + ap->rval += adjust; + } + /* + * If not at eof, then compare the two neighbor blocks. + * Figure out whether either one gives us a good starting point, + * and pick the better one. + */ + else if (!ap->eof) { + xfs_fsblock_t gotbno; /* right side block number */ + xfs_fsblock_t gotdiff; /* right side difference */ + xfs_fsblock_t prevbno; /* left side block number */ + xfs_fsblock_t prevdiff; /* left side difference */ + + /* + * If there's a previous (left) block, select a requested + * start block based on it. + */ + if (ap->prevp->br_startoff != NULLFILEOFF && + !ISNULLSTARTBLOCK(ap->prevp->br_startblock) && + (prevbno = ap->prevp->br_startblock + + ap->prevp->br_blockcount) && + ISLEGAL(prevbno, ap->prevp->br_startblock)) { + /* + * Calculate gap to end of previous block. + */ + adjust = prevdiff = ap->off - + (ap->prevp->br_startoff + + ap->prevp->br_blockcount); + /* + * Figure the startblock based on the previous block's + * end and the gap size. + * Heuristic! + * If the gap is large relative to the piece we're + * allocating, or using it gives us an illegal block + * number, then just use the end of the previous block. + */ + if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->alen && + ISLEGAL(prevbno + prevdiff, + ap->prevp->br_startblock)) + prevbno += adjust; + else + prevdiff += adjust; + /* + * If the firstblock forbids it, can't use it, + * must use default. + */ + if (!rt && !nullfb && + XFS_FSB_TO_AGNO(mp, prevbno) != fb_agno) + prevbno = NULLFSBLOCK; + } + /* + * No previous block or can't follow it, just default. + */ + else + prevbno = NULLFSBLOCK; + /* + * If there's a following (right) block, select a requested + * start block based on it. + */ + if (!ISNULLSTARTBLOCK(ap->gotp->br_startblock)) { + /* + * Calculate gap to start of next block. + */ + adjust = gotdiff = ap->gotp->br_startoff - ap->off; + /* + * Figure the startblock based on the next block's + * start and the gap size. + */ + gotbno = ap->gotp->br_startblock; + /* + * Heuristic! + * If the gap is large relative to the piece we're + * allocating, or using it gives us an illegal block + * number, then just use the start of the next block + * offset by our length. + */ + if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->alen && + ISLEGAL(gotbno - gotdiff, gotbno)) + gotbno -= adjust; + else if (ISLEGAL(gotbno - ap->alen, gotbno)) { + gotbno -= ap->alen; + gotdiff += adjust - ap->alen; + } else + gotdiff += adjust; + /* + * If the firstblock forbids it, can't use it, + * must use default. + */ + if (!rt && !nullfb && + XFS_FSB_TO_AGNO(mp, gotbno) != fb_agno) + gotbno = NULLFSBLOCK; + } + /* + * No next block, just default. + */ + else + gotbno = NULLFSBLOCK; + /* + * If both valid, pick the better one, else the only good + * one, else ap->rval is already set (to 0 or the inode block). + */ + if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK) + ap->rval = prevdiff <= gotdiff ? prevbno : gotbno; + else if (prevbno != NULLFSBLOCK) + ap->rval = prevbno; + else if (gotbno != NULLFSBLOCK) + ap->rval = gotbno; + } + /* + * If allowed, use ap->rval; otherwise must use firstblock since + * it's in the right allocation group. + */ + if (nullfb || rt || XFS_FSB_TO_AGNO(mp, ap->rval) == fb_agno) + ; + else + ap->rval = ap->firstblock; + /* + * Realtime allocation, done through xfs_rtallocate_extent. + */ + if (rt) { +#ifndef __KERNEL__ + ASSERT(0); +#else + xfs_rtblock_t rtb; + + atype = ap->rval == 0 ? + XFS_ALLOCTYPE_ANY_AG : XFS_ALLOCTYPE_NEAR_BNO; + do_div(ap->rval, mp->m_sb.sb_rextsize); + rtb = ap->rval; + ap->alen = ralen; + if (error = xfs_rtallocate_extent(ap->tp, ap->rval, 1, ap->alen, + &ralen, atype, ap->wasdel, prod, &rtb)) + return error; + if (rtb == NULLFSBLOCK && prod > 1 && + (error = xfs_rtallocate_extent(ap->tp, ap->rval, 1, + ap->alen, &ralen, atype, + ap->wasdel, 1, &rtb))) + return error; + ap->rval = rtb; + if (ap->rval != NULLFSBLOCK) { + ap->rval *= mp->m_sb.sb_rextsize; + ralen *= mp->m_sb.sb_rextsize; + ap->alen = ralen; + ap->ip->i_d.di_nblocks += ralen; + xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); + if (ap->wasdel) + ap->ip->i_delayed_blks -= ralen; + /* + * Adjust the disk quota also. This was reserved + * earlier. + */ + if (XFS_IS_QUOTA_ON(mp) && + ap->ip->i_ino != mp->m_sb.sb_uquotino && + ap->ip->i_ino != mp->m_sb.sb_pquotino) + xfs_trans_mod_dquot_byino(ap->tp, ap->ip, + ap->wasdel ? + XFS_TRANS_DQ_DELRTBCOUNT : + XFS_TRANS_DQ_RTBCOUNT, + (long)ralen); + } else + ap->alen = 0; +#endif /* __KERNEL__ */ + } + /* + * Normal allocation, done through xfs_alloc_vextent. + */ + else { + xfs_agnumber_t ag; + xfs_alloc_arg_t args; + xfs_extlen_t blen; + xfs_extlen_t delta; + int isaligned; + xfs_extlen_t longest; + xfs_extlen_t need; + xfs_extlen_t nextminlen; + int notinit; + xfs_perag_t *pag; + xfs_agnumber_t startag; + int tryagain; + + tryagain = isaligned = 0; + args.tp = ap->tp; + args.mp = mp; + args.fsbno = ap->rval; + args.maxlen = MIN(ap->alen, mp->m_sb.sb_agblocks); + blen = 0; + if (nullfb) { + args.type = XFS_ALLOCTYPE_START_BNO; + args.total = ap->total; + /* + * Find the longest available space. + * We're going to try for the whole allocation at once. + */ + startag = ag = XFS_FSB_TO_AGNO(mp, args.fsbno); + notinit = 0; + mrlock(&mp->m_peraglock, MR_ACCESS, PINOD); + while (blen < ap->alen) { + pag = &mp->m_perag[ag]; + if (!pag->pagf_init && + (error = xfs_alloc_pagf_init(mp, args.tp, + ag, XFS_ALLOC_FLAG_TRYLOCK))) { + mrunlock(&mp->m_peraglock); + return error; + } + /* + * See xfs_alloc_fix_freelist... + */ + if (pag->pagf_init) { + need = XFS_MIN_FREELIST_PAG(pag, mp); + delta = need > pag->pagf_flcount ? + need - pag->pagf_flcount : 0; + longest = (pag->pagf_longest > delta) ? + (pag->pagf_longest - delta) : + (pag->pagf_flcount > 0 || + pag->pagf_longest > 0); + if (blen < longest) + blen = longest; + } else + notinit = 1; + if (++ag == mp->m_sb.sb_agcount) + ag = 0; + if (ag == startag) + break; + } + mrunlock(&mp->m_peraglock); + /* + * Since the above loop did a BUF_TRYLOCK, it is + * possible that there is space for this request. + */ + if (notinit || blen < ap->minlen) + args.minlen = ap->minlen; + /* + * If the best seen length is less than the request + * length, use the best as the minimum. + */ + else if (blen < ap->alen) + args.minlen = blen; + /* + * Otherwise we've seen an extent as big as alen, + * use that as the minimum. + */ + else + args.minlen = ap->alen; + } else if (ap->low) { + args.type = XFS_ALLOCTYPE_FIRST_AG; + args.total = args.minlen = ap->minlen; + } else { + args.type = XFS_ALLOCTYPE_NEAR_BNO; + args.total = ap->total; + args.minlen = ap->minlen; + } + if (ap->ip->i_d.di_extsize) { + args.prod = ap->ip->i_d.di_extsize; + if (args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod)) + args.mod = (xfs_extlen_t)(args.prod - args.mod); + } else if (mp->m_sb.sb_blocksize >= NBPP) { + args.prod = 1; + args.mod = 0; + } else { + args.prod = NBPP >> mp->m_sb.sb_blocklog; + if (args.mod = (xfs_extlen_t)(do_mod(ap->off, args.prod))) + args.mod = (xfs_extlen_t)(args.prod - args.mod); + } + /* + * If we are not low on available data blocks, and the + * underlying logical volume manager is a stripe, and + * the file offset is zero then try to allocate data + * blocks on stripe unit boundary. + * NOTE: ap->aeof is only set if the allocation length + * is >= the stripe unit and the allocation offset is + * at the end of file. + */ + if (!ap->low && ap->aeof) { + if (!ap->off) { + args.alignment = mp->m_dalign; + atype = args.type; + isaligned = 1; + /* + * Adjust for alignment + */ + if (blen > args.alignment && blen <= ap->alen) + args.minlen = blen - args.alignment; + args.minalignslop = 0; + } else { + /* + * First try an exact bno allocation. + * If it fails then do a near or start bno + * allocation with alignment turned on. + */ + atype = args.type; + tryagain = 1; + args.type = XFS_ALLOCTYPE_THIS_BNO; + args.alignment = 1; + /* + * Compute the minlen+alignment for the + * next case. Set slop so that the value + * of minlen+alignment+slop doesn't go up + * between the calls. + */ + if (blen > mp->m_dalign && blen <= ap->alen) + nextminlen = blen - mp->m_dalign; + else + nextminlen = args.minlen; + if (nextminlen + mp->m_dalign > args.minlen + 1) + args.minalignslop = + nextminlen + mp->m_dalign - + args.minlen - 1; + else + args.minalignslop = 0; + } + } else { + args.alignment = 1; + args.minalignslop = 0; + } + args.minleft = ap->minleft; + args.wasdel = ap->wasdel; + args.isfl = 0; + args.userdata = ap->userdata; + if (error = xfs_alloc_vextent(&args)) + return error; + if (tryagain && args.fsbno == NULLFSBLOCK) { + /* + * Exact allocation failed. Now try with alignment + * turned on. + */ + args.type = atype; + args.fsbno = ap->rval; + args.alignment = mp->m_dalign; + args.minlen = nextminlen; + args.minalignslop = 0; + isaligned = 1; + if (error = xfs_alloc_vextent(&args)) + return error; + } + if (isaligned && args.fsbno == NULLFSBLOCK) { + /* + * allocation failed, so turn off alignment and + * try again. + */ + args.type = atype; + args.fsbno = ap->rval; + args.alignment = 0; + if (error = xfs_alloc_vextent(&args)) + return error; + } + if (args.fsbno == NULLFSBLOCK && nullfb && + args.minlen > ap->minlen) { + args.minlen = ap->minlen; + args.type = XFS_ALLOCTYPE_START_BNO; + args.fsbno = ap->rval; + if (error = xfs_alloc_vextent(&args)) + return error; + } + if (args.fsbno == NULLFSBLOCK && nullfb) { + args.fsbno = 0; + args.type = XFS_ALLOCTYPE_FIRST_AG; + args.total = ap->minlen; + args.minleft = 0; + if (error = xfs_alloc_vextent(&args)) + return error; + ap->low = 1; + } + if (args.fsbno != NULLFSBLOCK) { + ap->firstblock = ap->rval = args.fsbno; + ASSERT(nullfb || fb_agno == args.agno || + (ap->low && fb_agno < args.agno)); + ap->alen = args.len; + ap->ip->i_d.di_nblocks += args.len; + xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); + if (ap->wasdel) + ap->ip->i_delayed_blks -= args.len; + /* + * Adjust the disk quota also. This was reserved + * earlier. + */ + if (XFS_IS_QUOTA_ON(mp) && + ap->ip->i_ino != mp->m_sb.sb_uquotino && + ap->ip->i_ino != mp->m_sb.sb_pquotino) + xfs_trans_mod_dquot_byino(ap->tp, ap->ip, + ap->wasdel ? + XFS_TRANS_DQ_DELBCOUNT : + XFS_TRANS_DQ_BCOUNT, + (long)args.len); + } else { + ap->rval = NULLFSBLOCK; + ap->alen = 0; + } + } + return 0; +#undef ISLEGAL +} + +/* + * Transform a btree format file with only one leaf node, where the + * extents list will fit in the inode, into an extents format file. + * Since the extent list is already in-core, all we have to do is + * give up the space for the btree root and pitch the leaf block. + */ +STATIC int /* error */ +xfs_bmap_btree_to_extents( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *ip, /* incore inode pointer */ + xfs_btree_cur_t *cur, /* btree cursor */ + int *logflagsp, /* inode logging flags */ + int whichfork, /* data or attr fork */ + int async) /* xaction can be async */ +{ + /* REFERENCED */ + xfs_bmbt_block_t *cblock;/* child btree block */ + xfs_fsblock_t cbno; /* child block number */ + xfs_buf_t *cbp; /* child block's buffer */ + int error; /* error return value */ + xfs_ifork_t *ifp; /* inode fork data */ + xfs_mount_t *mp; /* mount point structure */ + xfs_bmbt_ptr_t *pp; /* ptr to block address */ + xfs_bmbt_block_t *rblock;/* root btree block */ + + ifp = XFS_IFORK_PTR(ip, whichfork); + ASSERT(ifp->if_flags & XFS_IFEXTENTS); + ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); + rblock = ifp->if_broot; + ASSERT(INT_GET(rblock->bb_level, ARCH_CONVERT) == 1); + ASSERT(INT_GET(rblock->bb_numrecs, ARCH_CONVERT) == 1); + ASSERT(XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes) == 1); + mp = ip->i_mount; + pp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, ifp->if_broot_bytes); + *logflagsp = 0; +#ifdef DEBUG + if (error = xfs_btree_check_lptr(cur, INT_GET(*pp, ARCH_CONVERT), 1)) + return error; +#endif + cbno = INT_GET(*pp, ARCH_CONVERT); + if (error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, + XFS_BMAP_BTREE_REF)) + return error; + cblock = XFS_BUF_TO_BMBT_BLOCK(cbp); + if (error = xfs_btree_check_lblock(cur, cblock, 0, cbp)) + return error; + xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp); + if (!async) + xfs_trans_set_sync(tp); + ip->i_d.di_nblocks--; + if (XFS_IS_QUOTA_ON(mp) && + ip->i_ino != mp->m_sb.sb_uquotino && + ip->i_ino != mp->m_sb.sb_pquotino) + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); + xfs_trans_binval(tp, cbp); + if (cur->bc_bufs[0] == cbp) + cur->bc_bufs[0] = NULL; + xfs_iroot_realloc(ip, -1, whichfork); + ASSERT(ifp->if_broot == NULL); + ASSERT((ifp->if_flags & XFS_IFBROOT) == 0); + XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); + *logflagsp = XFS_ILOG_CORE | XFS_ILOG_FEXT(whichfork); + return 0; +} + +/* + * Called by xfs_bmapi to update extent list structure and the btree + * after removing space (or undoing a delayed allocation). + */ +STATIC int /* error */ +xfs_bmap_del_extent( + xfs_inode_t *ip, /* incore inode pointer */ + xfs_trans_t *tp, /* current transaction pointer */ + xfs_extnum_t idx, /* extent number to update/delete */ + xfs_bmap_free_t *flist, /* list of extents to be freed */ + xfs_btree_cur_t *cur, /* if null, not a btree */ + xfs_bmbt_irec_t *del, /* data to remove from extent list */ + int iflags, /* input flags */ + int *logflagsp, /* inode logging flags */ + int whichfork, /* data or attr fork */ + int rsvd) /* OK to allocate reserved blocks */ +{ + xfs_filblks_t da_new; /* new delay-alloc indirect blocks */ + xfs_filblks_t da_old; /* old delay-alloc indirect blocks */ + xfs_fsblock_t del_endblock; /* first block past del */ + xfs_fileoff_t del_endoff; /* first offset past del */ + int delay; /* current block is delayed allocated */ + int do_fx; /* free extent at end of routine */ + xfs_bmbt_rec_t *ep; /* current extent entry pointer */ + int error; /* error return value */ + int flags; /* inode logging flags */ +#ifdef XFS_BMAP_TRACE + static char fname[] = "xfs_bmap_del_extent"; +#endif + xfs_bmbt_irec_t got; /* current extent entry */ + xfs_fileoff_t got_endoff; /* first offset past got */ + int i; /* temp state */ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_mount_t *mp; /* mount structure */ + xfs_filblks_t nblks; /* quota/sb block count */ + xfs_bmbt_irec_t new; /* new record to be inserted */ + /* REFERENCED */ + xfs_extnum_t nextents; /* number of extents in list */ + uint qfield; /* quota field to update */ + xfs_filblks_t temp; /* for indirect length calculations */ + xfs_filblks_t temp2; /* for indirect length calculations */ + + XFS_STATS_INC(xs_del_exlist); + mp = ip->i_mount; + ifp = XFS_IFORK_PTR(ip, whichfork); + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + ASSERT(idx >= 0 && idx < nextents); + ASSERT(del->br_blockcount > 0); + ep = &ifp->if_u1.if_extents[idx]; + xfs_bmbt_get_all(ep, &got); + ASSERT(got.br_startoff <= del->br_startoff); + del_endoff = del->br_startoff + del->br_blockcount; + got_endoff = got.br_startoff + got.br_blockcount; + ASSERT(got_endoff >= del_endoff); + delay = ISNULLSTARTBLOCK(got.br_startblock); + ASSERT(ISNULLSTARTBLOCK(del->br_startblock) == delay); + flags = 0; + qfield = 0; + error = 0; + /* + * If deleting a real allocation, must free up the disk space. + */ + if (!delay) { + flags = XFS_ILOG_CORE; + /* + * Realtime allocation. Free it and record di_nblocks update. + */ + if (whichfork == XFS_DATA_FORK && + (ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) { + xfs_fsblock_t bno; + xfs_filblks_t len; + + ASSERT(do_mod(del->br_blockcount, + mp->m_sb.sb_rextsize) == 0); + ASSERT(do_mod(del->br_startblock, + mp->m_sb.sb_rextsize) == 0); + bno = del->br_startblock; + do_div(bno, mp->m_sb.sb_rextsize); + len = del->br_blockcount; + do_div(len, mp->m_sb.sb_rextsize); + if (error = xfs_rtfree_extent(ip->i_transp, bno, + (xfs_extlen_t)len)) + goto done; + do_fx = 0; + nblks = len * mp->m_sb.sb_rextsize; + if (XFS_IS_QUOTA_ON(mp) && + ip->i_ino != mp->m_sb.sb_uquotino && + ip->i_ino != mp->m_sb.sb_pquotino) + qfield = XFS_TRANS_DQ_RTBCOUNT; + } + /* + * Ordinary allocation. + */ + else { + do_fx = 1; + nblks = del->br_blockcount; + if (XFS_IS_QUOTA_ON(mp) && + ip->i_ino != mp->m_sb.sb_uquotino && + ip->i_ino != mp->m_sb.sb_pquotino) + qfield = XFS_TRANS_DQ_BCOUNT; + /* + * If we're freeing meta-data, then the transaction + * that frees the blocks must be synchronous. This + * ensures that noone can reuse the blocks before + * they are permanently free. For regular data + * it is the callers responsibility to make the + * data permanently inaccessible before calling + * here to free it. + */ + if (iflags & XFS_BMAPI_METADATA) + xfs_trans_set_sync(tp); + } + /* + * Set up del_endblock and cur for later. + */ + del_endblock = del->br_startblock + del->br_blockcount; + if (cur) { + if (error = xfs_bmbt_lookup_eq(cur, got.br_startoff, + got.br_startblock, got.br_blockcount, + &i)) + goto done; + ASSERT(i == 1); + } + da_old = da_new = 0; + } else { + da_old = STARTBLOCKVAL(got.br_startblock); + da_new = 0; + nblks = 0; + do_fx = 0; + } + /* + * Set flag value to use in switch statement. + * Left-contig is 2, right-contig is 1. + */ + switch (((got.br_startoff == del->br_startoff) << 1) | + (got_endoff == del_endoff)) { + case 3: + /* + * Matches the whole extent. Delete the entry. + */ + xfs_bmap_trace_delete(fname, "3", ip, idx, 1, whichfork); + xfs_bmap_delete_exlist(ip, idx, 1, whichfork); + ifp->if_lastex = idx; + if (delay) + break; + XFS_IFORK_NEXT_SET(ip, whichfork, + XFS_IFORK_NEXTENTS(ip, whichfork) - 1); + flags |= XFS_ILOG_CORE; + if (!cur) { + flags |= XFS_ILOG_FEXT(whichfork); + break; + } + if (error = xfs_bmbt_delete(cur, iflags & XFS_BMAPI_ASYNC, &i)) + goto done; + ASSERT(i == 1); + break; + + case 2: + /* + * Deleting the first part of the extent. + */ + xfs_bmap_trace_pre_update(fname, "2", ip, idx, whichfork); + xfs_bmbt_set_startoff(ep, del_endoff); + temp = got.br_blockcount - del->br_blockcount; + xfs_bmbt_set_blockcount(ep, temp); + ifp->if_lastex = idx; + if (delay) { + temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), + da_old); + xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp)); + xfs_bmap_trace_post_update(fname, "2", ip, idx, + whichfork); + da_new = temp; + break; + } + xfs_bmbt_set_startblock(ep, del_endblock); + xfs_bmap_trace_post_update(fname, "2", ip, idx, whichfork); + if (!cur) { + flags |= XFS_ILOG_FEXT(whichfork); + break; + } + if (error = xfs_bmbt_update(cur, del_endoff, del_endblock, + got.br_blockcount - del->br_blockcount, + got.br_state)) + goto done; + break; + + case 1: + /* + * Deleting the last part of the extent. + */ + temp = got.br_blockcount - del->br_blockcount; + xfs_bmap_trace_pre_update(fname, "1", ip, idx, whichfork); + xfs_bmbt_set_blockcount(ep, temp); + ifp->if_lastex = idx; + if (delay) { + temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), + da_old); + xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp)); + xfs_bmap_trace_post_update(fname, "1", ip, idx, + whichfork); + da_new = temp; + break; + } + xfs_bmap_trace_post_update(fname, "1", ip, idx, whichfork); + if (!cur) { + flags |= XFS_ILOG_FEXT(whichfork); + break; + } + if (error = xfs_bmbt_update(cur, got.br_startoff, + got.br_startblock, + got.br_blockcount - del->br_blockcount, + got.br_state)) + goto done; + break; + + case 0: + /* + * Deleting the middle of the extent. + */ + temp = del->br_startoff - got.br_startoff; + xfs_bmap_trace_pre_update(fname, "0", ip, idx, whichfork); + xfs_bmbt_set_blockcount(ep, temp); + new.br_startoff = del_endoff; + temp2 = got_endoff - del_endoff; + new.br_blockcount = temp2; + new.br_state = got.br_state; + if (!delay) { + new.br_startblock = del_endblock; + flags |= XFS_ILOG_CORE; + if (cur) { + if (error = xfs_bmbt_update(cur, + got.br_startoff, + got.br_startblock, temp, + got.br_state)) + goto done; + if (error = xfs_bmbt_increment(cur, 0, &i)) + goto done; + cur->bc_rec.b = new; + error = xfs_bmbt_insert(cur, &i); + if (error && error != ENOSPC) + goto done; + /* + * If get no-space back from btree insert, + * it tried a split, and we have a zero + * block reservation. + * Fix up our state and return the error. + */ + if (error == ENOSPC) { + /* + * Reset the cursor, don't trust + * it after any insert operation. + */ + if (error = xfs_bmbt_lookup_eq(cur, + got.br_startoff, + got.br_startblock, + temp, &i)) + goto done; + ASSERT(i == 1); + /* + * Update the btree record back + * to the original value. + */ + if (error = xfs_bmbt_update(cur, + got.br_startoff, + got.br_startblock, + got.br_blockcount, + got.br_state)) + goto done; + /* + * Reset the extent record back + * to the original value. + */ + xfs_bmbt_set_blockcount(ep, + got.br_blockcount); + flags = 0; + error = XFS_ERROR(ENOSPC); + goto done; + } + ASSERT(i == 1); + } else + flags |= XFS_ILOG_FEXT(whichfork); + XFS_IFORK_NEXT_SET(ip, whichfork, + XFS_IFORK_NEXTENTS(ip, whichfork) + 1); + } else { + ASSERT(whichfork == XFS_DATA_FORK); + temp = xfs_bmap_worst_indlen(ip, temp); + xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp)); + temp2 = xfs_bmap_worst_indlen(ip, temp2); + new.br_startblock = NULLSTARTBLOCK((int)temp2); + da_new = temp + temp2; + while (da_new > da_old) { + if (temp) { + temp--; + da_new--; + xfs_bmbt_set_startblock(ep, + NULLSTARTBLOCK((int)temp)); + } + if (da_new == da_old) + break; + if (temp2) { + temp2--; + da_new--; + new.br_startblock = + NULLSTARTBLOCK((int)temp2); + } + } + } + xfs_bmap_trace_post_update(fname, "0", ip, idx, whichfork); + xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 1, &new, NULL, + whichfork); + xfs_bmap_insert_exlist(ip, idx + 1, 1, &new, whichfork); + ifp->if_lastex = idx + 1; + break; + } + /* + * If we need to, add to list of extents to delete. + */ + if (do_fx) + xfs_bmap_add_free(del->br_startblock, del->br_blockcount, flist, + mp); + /* + * Adjust inode # blocks in the file. + */ + if (nblks) + ip->i_d.di_nblocks -= nblks; + /* + * Adjust quota data. + */ + if (qfield) + xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks); + /* + * Account for change in delayed indirect blocks. + * Nothing to do for disk quota accounting here. + */ + ASSERT(da_old >= da_new); + if (da_old > da_new) + xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, (int)(da_old - da_new), + rsvd); +done: + *logflagsp = flags; + return error; +} + +/* + * Remove the entry "free" from the free item list. Prev points to the + * previous entry, unless "free" is the head of the list. + */ +STATIC void +xfs_bmap_del_free( + xfs_bmap_free_t *flist, /* free item list header */ + xfs_bmap_free_item_t *prev, /* previous item on list, if any */ + xfs_bmap_free_item_t *free) /* list item to be freed */ +{ + if (prev) + prev->xbfi_next = free->xbfi_next; + else + flist->xbf_first = free->xbfi_next; + flist->xbf_count--; + kmem_zone_free(xfs_bmap_free_item_zone, free); +} + +/* + * Remove count entries from the extents array for inode "ip", starting + * at index "idx". Copies the remaining items down over the deleted ones, + * and gives back the excess memory. + */ +STATIC void +xfs_bmap_delete_exlist( + xfs_inode_t *ip, /* incore inode pointer */ + xfs_extnum_t idx, /* starting delete index */ + xfs_extnum_t count, /* count of items to delete */ + int whichfork) /* data or attr fork */ +{ + xfs_bmbt_rec_t *base; /* base of extent list */ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_extnum_t nextents; /* number of extents in list after */ + + ifp = XFS_IFORK_PTR(ip, whichfork); + ASSERT(ifp->if_flags & XFS_IFEXTENTS); + base = ifp->if_u1.if_extents; + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - count; + ovbcopy(&base[idx + count], &base[idx], + (nextents - idx) * sizeof(*base)); + xfs_iext_realloc(ip, -count, whichfork); +} + +/* + * Convert an extents-format file into a btree-format file. + * The new file will have a root block (in the inode) and a single child block. + */ +STATIC int /* error */ +xfs_bmap_extents_to_btree( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *ip, /* incore inode pointer */ + xfs_fsblock_t *firstblock, /* first-block-allocated */ + xfs_bmap_free_t *flist, /* blocks freed in xaction */ + xfs_btree_cur_t **curp, /* cursor returned to caller */ + int wasdel, /* converting a delayed alloc */ + int *logflagsp, /* inode logging flags */ + int whichfork) /* data or attr fork */ +{ + xfs_bmbt_block_t *ablock; /* allocated (child) bt block */ + xfs_buf_t *abp; /* buffer for ablock */ + xfs_alloc_arg_t args; /* allocation arguments */ + xfs_bmbt_rec_t *arp; /* child record pointer */ + xfs_bmbt_block_t *block; /* btree root block */ + xfs_btree_cur_t *cur; /* bmap btree cursor */ + xfs_bmbt_rec_t *ep; /* extent list pointer */ + int error; /* error return value */ + xfs_extnum_t i; /* extent list index */ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_bmbt_key_t *kp; /* root block key pointer */ + xfs_mount_t *mp; /* mount structure */ + xfs_extnum_t nextents; /* extent list size */ + xfs_bmbt_ptr_t *pp; /* root block address pointer */ + + ifp = XFS_IFORK_PTR(ip, whichfork); + ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS); + ASSERT(ifp->if_ext_max == + XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); + /* + * Make space in the inode incore. + */ + xfs_iroot_realloc(ip, 1, whichfork); + ifp->if_flags |= XFS_IFBROOT; + /* + * Fill in the root. + */ + block = ifp->if_broot; + INT_SET(block->bb_magic, ARCH_CONVERT, XFS_BMAP_MAGIC); + INT_SET(block->bb_level, ARCH_CONVERT, 1); + INT_SET(block->bb_numrecs, ARCH_CONVERT, 1); + INT_SET(block->bb_leftsib, ARCH_CONVERT, NULLDFSBNO); + INT_SET(block->bb_rightsib, ARCH_CONVERT, NULLDFSBNO); + /* + * Need a cursor. Can't allocate until bb_level is filled in. + */ + mp = ip->i_mount; + cur = xfs_btree_init_cursor(mp, tp, NULL, 0, XFS_BTNUM_BMAP, ip, + whichfork); + cur->bc_private.b.firstblock = *firstblock; + cur->bc_private.b.flist = flist; + cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0; + /* + * Convert to a btree with two levels, one record in root. + */ + XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE); + args.tp = tp; + args.mp = mp; + if (*firstblock == NULLFSBLOCK) { + args.type = XFS_ALLOCTYPE_START_BNO; + args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino); + } else if (flist->xbf_low) { + args.type = XFS_ALLOCTYPE_START_BNO; + args.fsbno = *firstblock; + } else { + args.type = XFS_ALLOCTYPE_NEAR_BNO; + args.fsbno = *firstblock; + } + args.minlen = args.maxlen = args.prod = 1; + args.total = args.minleft = args.alignment = args.mod = args.isfl = + args.minalignslop = 0; + args.wasdel = wasdel; + *logflagsp = 0; + if (error = xfs_alloc_vextent(&args)) { + xfs_iroot_realloc(ip, -1, whichfork); + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + return error; + } + /* + * Allocation can't fail, the space was reserved. + */ + ASSERT(args.fsbno != NULLFSBLOCK); + ASSERT(*firstblock == NULLFSBLOCK || + args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) || + (flist->xbf_low && + args.agno > XFS_FSB_TO_AGNO(mp, *firstblock))); + *firstblock = cur->bc_private.b.firstblock = args.fsbno; + cur->bc_private.b.allocated++; + ip->i_d.di_nblocks++; + if (XFS_IS_QUOTA_ON(mp) && + ip->i_ino != mp->m_sb.sb_uquotino && + ip->i_ino != mp->m_sb.sb_pquotino) + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); + abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0); + /* + * Fill in the child block. + */ + ablock = XFS_BUF_TO_BMBT_BLOCK(abp); + INT_SET(ablock->bb_magic, ARCH_CONVERT, XFS_BMAP_MAGIC); + INT_ZERO(ablock->bb_level, ARCH_CONVERT); + INT_ZERO(ablock->bb_numrecs, ARCH_CONVERT); + INT_SET(ablock->bb_leftsib, ARCH_CONVERT, NULLDFSBNO); + INT_SET(ablock->bb_rightsib, ARCH_CONVERT, NULLDFSBNO); + arp = XFS_BMAP_REC_IADDR(ablock, 1, cur); + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + for (ep = ifp->if_u1.if_extents, i = 0; i < nextents; i++, ep++) { + if (!ISNULLSTARTBLOCK(xfs_bmbt_get_startblock(ep))) { + *arp++ = *ep; + INT_MOD(ablock->bb_numrecs, ARCH_CONVERT, +1); + } + } + ASSERT(INT_GET(ablock->bb_numrecs, ARCH_CONVERT) == XFS_IFORK_NEXTENTS(ip, whichfork)); + /* + * Fill in the root key and pointer. + */ + kp = XFS_BMAP_KEY_IADDR(block, 1, cur); + arp = XFS_BMAP_REC_IADDR(ablock, 1, cur); + INT_SET(kp->br_startoff, ARCH_CONVERT, xfs_bmbt_get_startoff(arp)); + pp = XFS_BMAP_PTR_IADDR(block, 1, cur); + INT_SET(*pp, ARCH_CONVERT, args.fsbno); + /* + * Do all this logging at the end so that + * the root is at the right level. + */ + xfs_bmbt_log_block(cur, abp, XFS_BB_ALL_BITS); + xfs_bmbt_log_recs(cur, abp, 1, INT_GET(ablock->bb_numrecs, ARCH_CONVERT)); + ASSERT(*curp == NULL); + *curp = cur; + *logflagsp = XFS_ILOG_CORE | XFS_ILOG_FBROOT(whichfork); + return 0; +} + +/* + * Insert new item(s) in the extent list for inode "ip". + * Count new items are inserted at offset idx. + */ +STATIC void +xfs_bmap_insert_exlist( + xfs_inode_t *ip, /* incore inode pointer */ + xfs_extnum_t idx, /* starting index of new items */ + xfs_extnum_t count, /* number of inserted items */ + xfs_bmbt_irec_t *new, /* items to insert */ + int whichfork) /* data or attr fork */ +{ + xfs_bmbt_rec_t *base; /* extent list base */ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_extnum_t nextents; /* extent list size */ + xfs_extnum_t to; /* extent list index */ + + ifp = XFS_IFORK_PTR(ip, whichfork); + ASSERT(ifp->if_flags & XFS_IFEXTENTS); + xfs_iext_realloc(ip, count, whichfork); + base = ifp->if_u1.if_extents; + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + ovbcopy(&base[idx], &base[idx + count], + (nextents - (idx + count)) * sizeof(*base)); + for (to = idx; to < idx + count; to++, new++) + xfs_bmbt_set_all(&base[to], new); +} + +/* + * Convert a local file to an extents file. + * This code is out of bounds for data forks of regular files, + * since the file data needs to get logged so things will stay consistent. + * (The bmap-level manipulations are ok, though). + */ +STATIC int /* error */ +xfs_bmap_local_to_extents( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *ip, /* incore inode pointer */ + xfs_fsblock_t *firstblock, /* first block allocated in xaction */ + xfs_extlen_t total, /* total blocks needed by transaction */ + int *logflagsp, /* inode logging flags */ + int whichfork) /* data or attr fork */ +{ + int error; /* error return value */ + int flags; /* logging flags returned */ +#ifdef XFS_BMAP_TRACE + static char fname[] = "xfs_bmap_local_to_extents"; +#endif + xfs_ifork_t *ifp; /* inode fork pointer */ + + /* + * We don't want to deal with the case of keeping inode data inline yet. + * So sending the data fork of a regular inode is illegal. + */ + ASSERT(!((ip->i_d.di_mode & IFMT) == IFREG && + whichfork == XFS_DATA_FORK)); + ifp = XFS_IFORK_PTR(ip, whichfork); + ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); + flags = 0; + error = 0; + if (ifp->if_bytes) { + xfs_alloc_arg_t args; /* allocation arguments */ + xfs_buf_t *bp; /* buffer for extent list block */ + xfs_bmbt_rec_t *ep; /* extent list pointer */ + + args.tp = tp; + args.mp = ip->i_mount; + ASSERT(ifp->if_flags & XFS_IFINLINE); + /* + * Allocate a block. We know we need only one, since the + * file currently fits in an inode. + */ + if (*firstblock == NULLFSBLOCK) { + args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino); + args.type = XFS_ALLOCTYPE_START_BNO; + } else { + args.fsbno = *firstblock; + args.type = XFS_ALLOCTYPE_NEAR_BNO; + } + args.total = total; + args.mod = args.minleft = args.alignment = args.wasdel = + args.isfl = args.minalignslop = 0; + args.minlen = args.maxlen = args.prod = 1; + if (error = xfs_alloc_vextent(&args)) + goto done; + /* + * Can't fail, the space was reserved. + */ + ASSERT(args.fsbno != NULLFSBLOCK); + ASSERT(args.len == 1); + *firstblock = args.fsbno; + bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); + bcopy(ifp->if_u1.if_data, (char *)XFS_BUF_PTR(bp), + ifp->if_bytes); + xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); + xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); + xfs_iext_realloc(ip, 1, whichfork); + ep = ifp->if_u1.if_extents; + xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM); + xfs_bmap_trace_post_update(fname, "new", ip, 0, whichfork); + XFS_IFORK_NEXT_SET(ip, whichfork, 1); + ip->i_d.di_nblocks = 1; + if (XFS_IS_QUOTA_ON(args.mp) && + ip->i_ino != args.mp->m_sb.sb_uquotino && + ip->i_ino != args.mp->m_sb.sb_pquotino) + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, + 1L); + flags |= XFS_ILOG_FEXT(whichfork); + } else + ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0); + ifp->if_flags &= ~XFS_IFINLINE; + ifp->if_flags |= XFS_IFEXTENTS; + XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); + flags |= XFS_ILOG_CORE; +done: + *logflagsp = flags; + return error; +} + +xfs_bmbt_rec_t * /* pointer to found extent entry */ +xfs_bmap_do_search_extents( + xfs_bmbt_rec_t *base, /* base of extent list */ + xfs_extnum_t lastx, /* last extent index used */ + xfs_extnum_t nextents, /* extent list size */ + xfs_fileoff_t bno, /* block number searched for */ + int *eofp, /* out: end of file found */ + xfs_extnum_t *lastxp, /* out: last extent index */ + xfs_bmbt_irec_t *gotp, /* out: extent entry found */ + xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */ +{ + xfs_bmbt_rec_t *ep; /* extent list entry pointer */ + xfs_bmbt_irec_t got; /* extent list entry, decoded */ + int high; /* high index of binary search */ + int low; /* low index of binary search */ + + if (lastx != NULLEXTNUM && lastx < nextents) + ep = base + lastx; + else + ep = NULL; + prevp->br_startoff = NULLFILEOFF; + if (ep && bno >= (got.br_startoff = xfs_bmbt_get_startoff(ep)) && + bno < got.br_startoff + + (got.br_blockcount = xfs_bmbt_get_blockcount(ep))) + *eofp = 0; + else if (ep && lastx < nextents - 1 && + bno >= (got.br_startoff = xfs_bmbt_get_startoff(ep + 1)) && + bno < got.br_startoff + + (got.br_blockcount = xfs_bmbt_get_blockcount(ep + 1))) { + lastx++; + ep++; + *eofp = 0; + } else if (nextents == 0) + *eofp = 1; + else if (bno == 0 && + (got.br_startoff = xfs_bmbt_get_startoff(base)) == 0) { + ep = base; + lastx = 0; + got.br_blockcount = xfs_bmbt_get_blockcount(ep); + *eofp = 0; + } else { + /* binary search the extents array */ + low = 0; + high = nextents - 1; + while (low <= high) { + XFS_STATS_INC(xs_cmp_exlist); + lastx = (low + high) >> 1; + ep = base + lastx; + got.br_startoff = xfs_bmbt_get_startoff(ep); + got.br_blockcount = xfs_bmbt_get_blockcount(ep); + if (bno < got.br_startoff) + high = lastx - 1; + else if (bno >= got.br_startoff + got.br_blockcount) + low = lastx + 1; + else { + got.br_startblock = xfs_bmbt_get_startblock(ep); + got.br_state = xfs_bmbt_get_state(ep); + *eofp = 0; + *lastxp = lastx; + *gotp = got; + return ep; + } + } + if (bno >= got.br_startoff + got.br_blockcount) { + lastx++; + if (lastx == nextents) { + *eofp = 1; + got.br_startblock = xfs_bmbt_get_startblock(ep); + got.br_state = xfs_bmbt_get_state(ep); + *prevp = got; + ep = NULL; + } else { + *eofp = 0; + xfs_bmbt_get_all(ep, prevp); + ep++; + got.br_startoff = xfs_bmbt_get_startoff(ep); + got.br_blockcount = xfs_bmbt_get_blockcount(ep); + } + } else { + *eofp = 0; + if (ep > base) + xfs_bmbt_get_all(ep - 1, prevp); + } + } + if (ep) { + got.br_startblock = xfs_bmbt_get_startblock(ep); + got.br_state = xfs_bmbt_get_state(ep); + } + *lastxp = lastx; + *gotp = got; + return ep; +} + +/* + * Search the extents list for the inode, for the extent containing bno. + * If bno lies in a hole, point to the next entry. If bno lies past eof, + * *eofp will be set, and *prevp will contain the last entry (null if none). + * Else, *lastxp will be set to the index of the found + * entry; *gotp will contain the entry. + */ +STATIC xfs_bmbt_rec_t * /* pointer to found extent entry */ +xfs_bmap_search_extents( + xfs_inode_t *ip, /* incore inode pointer */ + xfs_fileoff_t bno, /* block number searched for */ + int whichfork, /* data or attr fork */ + int *eofp, /* out: end of file found */ + xfs_extnum_t *lastxp, /* out: last extent index */ + xfs_bmbt_irec_t *gotp, /* out: extent entry found */ + xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */ +{ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_bmbt_rec_t *base; /* base of extent list */ + xfs_extnum_t lastx; /* last extent index used */ + xfs_extnum_t nextents; /* extent list size */ + + XFS_STATS_INC(xs_look_exlist); + ifp = XFS_IFORK_PTR(ip, whichfork); + lastx = ifp->if_lastex; + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + base = &ifp->if_u1.if_extents[0]; + + return xfs_bmap_do_search_extents(base, lastx, nextents, bno, eofp, + lastxp, gotp, prevp); +} + +/* + * Compute the worst-case number of indirect blocks that will be used + * for ip's delayed extent of length "len". + */ +STATIC xfs_filblks_t +xfs_bmap_worst_indlen( + xfs_inode_t *ip, /* incore inode pointer */ + xfs_filblks_t len) /* delayed extent length */ +{ + int level; /* btree level number */ + int maxrecs; /* maximum record count at this level */ + xfs_mount_t *mp; /* mount structure */ + xfs_filblks_t rval; /* return value */ + + mp = ip->i_mount; + maxrecs = mp->m_bmap_dmxr[0]; + for (level = 0, rval = 0; + level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK); + level++) { + len += maxrecs - 1; + do_div(len, maxrecs); + rval += len; + if (len == 1) + return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - + level - 1; + if (level == 0) + maxrecs = mp->m_bmap_dmxr[1]; + } + return rval; +} + +/* + * Add the extent to the list of extents to be free at transaction end. + * The list is maintained sorted (by block number). + */ +/* ARGSUSED */ +void +xfs_bmap_add_free( + xfs_fsblock_t bno, /* fs block number of extent */ + xfs_filblks_t len, /* length of extent */ + xfs_bmap_free_t *flist, /* list of extents */ + xfs_mount_t *mp) /* mount point structure */ +{ + xfs_bmap_free_item_t *cur; /* current (next) element */ + xfs_bmap_free_item_t *new; /* new element */ + xfs_bmap_free_item_t *prev; /* previous element */ +#ifdef DEBUG + xfs_agnumber_t agno; + xfs_agblock_t agbno; + + ASSERT(bno != NULLFSBLOCK); + ASSERT(len > 0); + ASSERT(len <= MAXEXTLEN); + ASSERT(!ISNULLSTARTBLOCK(bno)); + agno = XFS_FSB_TO_AGNO(mp, bno); + agbno = XFS_FSB_TO_AGBNO(mp, bno); + ASSERT(agno < mp->m_sb.sb_agcount); + ASSERT(agbno < mp->m_sb.sb_agblocks); + ASSERT(len < mp->m_sb.sb_agblocks); + ASSERT(agbno + len <= mp->m_sb.sb_agblocks); +#endif + ASSERT(xfs_bmap_free_item_zone != NULL); + new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP); + new->xbfi_startblock = bno; + new->xbfi_blockcount = (xfs_extlen_t)len; + for (prev = NULL, cur = flist->xbf_first; + cur != NULL; + prev = cur, cur = cur->xbfi_next) { + if (cur->xbfi_startblock >= bno) + break; + } + if (prev) + prev->xbfi_next = new; + else + flist->xbf_first = new; + new->xbfi_next = cur; + flist->xbf_count++; +} + +/* + * Compute and fill in the value of the maximum depth of a bmap btree + * in this filesystem. Done once, during mount. + */ +void +xfs_bmap_compute_maxlevels( + xfs_mount_t *mp, /* file system mount structure */ + int whichfork) /* data or attr fork */ +{ + int level; /* btree level */ + uint maxblocks; /* max blocks at this level */ + uint maxleafents; /* max leaf entries possible */ + int maxrootrecs; /* max records in root block */ + int minleafrecs; /* min records in leaf block */ + int minnoderecs; /* min records in node block */ + int sz; /* root block size */ + + /* + * The maximum number of extents in a file, hence the maximum + * number of leaf entries, is controlled by the type of di_nextents + * (a signed 32-bit number, xfs_extnum_t), or by di_anextents + * (a signed 16-bit number, xfs_aextnum_t). + */ + maxleafents = (whichfork == XFS_DATA_FORK) ? MAXEXTNUM : MAXAEXTNUM; + minleafrecs = mp->m_bmap_dmnr[0]; + minnoderecs = mp->m_bmap_dmnr[1]; + sz = (whichfork == XFS_DATA_FORK) ? + mp->m_attroffset : + mp->m_sb.sb_inodesize - mp->m_attroffset; + maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0); + maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; + for (level = 1; maxblocks > 1; level++) { + if (maxblocks <= maxrootrecs) + maxblocks = 1; + else + maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs; + } + mp->m_bm_maxlevels[whichfork] = level; +} + +/* + * Returns the file-relative block number of the first unused block(s) + * in the file with at least "len" logically contiguous blocks free. + * This is the lowest-address hole if the file has holes, else the first block + * past the end of file. + * Return 0 if the file is currently local (in-inode). + */ +int /* error */ +xfs_bmap_first_unused( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *ip, /* incore inode */ + xfs_extlen_t len, /* size of hole to find */ + xfs_fileoff_t *first_unused, /* unused block */ + int whichfork) /* data or attr fork */ +{ + xfs_bmbt_rec_t *base; /* base of extent array */ + xfs_bmbt_rec_t *ep; /* pointer to an extent entry */ + int error; /* error return value */ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_fileoff_t lastaddr; /* last block number seen */ + xfs_fileoff_t lowest; /* lowest useful block */ + xfs_fileoff_t max; /* starting useful block */ + xfs_fileoff_t off; /* offset for this block */ + xfs_extnum_t nextents; /* number of extent entries */ + + ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE || + XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS || + XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); + if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { + *first_unused = 0; + return 0; + } + ifp = XFS_IFORK_PTR(ip, whichfork); + if (!(ifp->if_flags & XFS_IFEXTENTS) && + (error = xfs_iread_extents(tp, ip, whichfork))) + return error; + lowest = *first_unused; + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + base = &ifp->if_u1.if_extents[0]; + for (lastaddr = 0, max = lowest, ep = base; + ep < &base[nextents]; + ep++) { + off = xfs_bmbt_get_startoff(ep); + /* + * See if the hole before this extent will work. + */ + if (off >= lowest + len && off - max >= len) { + *first_unused = max; + return 0; + } + lastaddr = off + xfs_bmbt_get_blockcount(ep); + max = XFS_FILEOFF_MAX(lastaddr, lowest); + } + *first_unused = max; + return 0; +} + +/* + * Returns the file-relative block number of the last block + 1 before + * last_block (input value) in the file. + * This is not based on i_size, it is based on the extent list. + * Returns 0 for local files, as they do not have an extent list. + */ +int /* error */ +xfs_bmap_last_before( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *ip, /* incore inode */ + xfs_fileoff_t *last_block, /* last block */ + int whichfork) /* data or attr fork */ +{ + xfs_fileoff_t bno; /* input file offset */ + int eof; /* hit end of file */ + xfs_bmbt_rec_t *ep; /* pointer to last extent */ + int error; /* error return value */ + xfs_bmbt_irec_t got; /* current extent value */ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_extnum_t lastx; /* last extent used */ + xfs_bmbt_irec_t prev; /* previous extent value */ + + if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL) + return XFS_ERROR(EIO); + if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { + *last_block = 0; + return 0; + } + ifp = XFS_IFORK_PTR(ip, whichfork); + if (!(ifp->if_flags & XFS_IFEXTENTS) && + (error = xfs_iread_extents(tp, ip, whichfork))) + return error; + bno = *last_block - 1; + ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, + &prev); + if (eof || xfs_bmbt_get_startoff(ep) > bno) { + if (prev.br_startoff == NULLFILEOFF) + *last_block = 0; + else + *last_block = prev.br_startoff + prev.br_blockcount; + } + /* + * Otherwise *last_block is already the right answer. + */ + return 0; +} + +/* + * Returns the file-relative block number of the first block past eof in + * the file. This is not based on i_size, it is based on the extent list. + * Returns 0 for local files, as they do not have an extent list. + */ +int /* error */ +xfs_bmap_last_offset( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *ip, /* incore inode */ + xfs_fileoff_t *last_block, /* last block */ + int whichfork) /* data or attr fork */ +{ + xfs_bmbt_rec_t *base; /* base of extent array */ + xfs_bmbt_rec_t *ep; /* pointer to last extent */ + int error; /* error return value */ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_extnum_t nextents; /* number of extent entries */ + + if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL) + return XFS_ERROR(EIO); + if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { + *last_block = 0; + return 0; + } + ifp = XFS_IFORK_PTR(ip, whichfork); + if (!(ifp->if_flags & XFS_IFEXTENTS) && + (error = xfs_iread_extents(tp, ip, whichfork))) + return error; + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + if (!nextents) { + *last_block = 0; + return 0; + } + base = &ifp->if_u1.if_extents[0]; + ASSERT(base != NULL); + ep = &base[nextents - 1]; + *last_block = xfs_bmbt_get_startoff(ep) + xfs_bmbt_get_blockcount(ep); + return 0; +} + +/* + * Returns whether the selected fork of the inode has exactly one + * block or not. For the data fork we check this matches di_size, + * implying the file's range is 0..bsize-1. + */ +int /* 1=>1 block, 0=>otherwise */ +xfs_bmap_one_block( + xfs_inode_t *ip, /* incore inode */ + int whichfork) /* data or attr fork */ +{ + xfs_bmbt_rec_t *ep; /* ptr to fork's extent */ + xfs_ifork_t *ifp; /* inode fork pointer */ + int rval; /* return value */ + xfs_bmbt_irec_t s; /* internal version of extent */ + +#ifndef DEBUG + if (whichfork == XFS_DATA_FORK) + return ip->i_d.di_size == ip->i_mount->m_sb.sb_blocksize; +#endif /* !DEBUG */ + if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1) + return 0; + if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) + return 0; + ifp = XFS_IFORK_PTR(ip, whichfork); + ASSERT(ifp->if_flags & XFS_IFEXTENTS); + ep = ifp->if_u1.if_extents; + xfs_bmbt_get_all(ep, &s); + rval = s.br_startoff == 0 && s.br_blockcount == 1; + if (rval && whichfork == XFS_DATA_FORK) + ASSERT(ip->i_d.di_size == ip->i_mount->m_sb.sb_blocksize); + return rval; +} + +/* + * Read in the extents to if_extents. + * All inode fields are set up by caller, we just traverse the btree + * and copy the records in. If the file system cannot contain unwritten + * extents, the records are checked for no "state" flags. + */ +int /* error */ +xfs_bmap_read_extents( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *ip, /* incore inode */ + int whichfork) /* data or attr fork */ +{ + xfs_bmbt_block_t *block; /* current btree block */ + xfs_fsblock_t bno; /* block # of "block" */ + xfs_buf_t *bp; /* buffer for "block" */ + int error; /* error return value */ + xfs_exntfmt_t exntf; /* XFS_EXTFMT_NOSTATE, if checking */ +#ifdef XFS_BMAP_TRACE + static char fname[] = "xfs_bmap_read_extents"; +#endif + xfs_extnum_t i; /* index into the extents list */ + xfs_ifork_t *ifp; /* fork structure */ + int level; /* btree level, for checking */ + xfs_mount_t *mp; /* file system mount structure */ + xfs_bmbt_ptr_t *pp; /* pointer to block address */ + /* REFERENCED */ + xfs_extnum_t room; /* number of entries there's room for */ + xfs_bmbt_rec_t *trp; /* target record pointer */ + + bno = NULLFSBLOCK; + mp = ip->i_mount; + ifp = XFS_IFORK_PTR(ip, whichfork); + exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE : + XFS_EXTFMT_INODE(ip); + block = ifp->if_broot; + /* + * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. + */ + ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0); + level = INT_GET(block->bb_level, ARCH_CONVERT); + pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes); + ASSERT(INT_GET(*pp, ARCH_CONVERT) != NULLDFSBNO); + ASSERT(XFS_FSB_TO_AGNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agcount); + ASSERT(XFS_FSB_TO_AGBNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agblocks); + bno = INT_GET(*pp, ARCH_CONVERT); + /* + * Go down the tree until leaf level is reached, following the first + * pointer (leftmost) at each level. + */ + while (level-- > 0) { + if (error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, + XFS_BMAP_BTREE_REF)) + return error; + block = XFS_BUF_TO_BMBT_BLOCK(bp); + XFS_WANT_CORRUPTED_GOTO( + XFS_BMAP_SANITY_CHECK(mp, block, level), + error0); + if (level == 0) + break; + pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, + 1, mp->m_bmap_dmxr[1]); + XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, INT_GET(*pp, ARCH_CONVERT)), error0); + bno = INT_GET(*pp, ARCH_CONVERT); + xfs_trans_brelse(tp, bp); + } + /* + * Here with bp and block set to the leftmost leaf node in the tree. + */ + room = ifp->if_bytes / (uint)sizeof(*trp); + trp = ifp->if_u1.if_extents; + i = 0; + /* + * Loop over all leaf nodes. Copy information to the extent list. + */ + for (;;) { + xfs_bmbt_rec_t *frp; + xfs_fsblock_t nextbno; + xfs_extnum_t num_recs; + + + num_recs = INT_GET(block->bb_numrecs, ARCH_CONVERT); + if (i + num_recs > room) { + ASSERT(i + num_recs <= room); + xfs_fs_cmn_err(CE_WARN, ip->i_mount, + "corrupt dinode %Lu, (btree extents). Unmount and run xfs_repair.", + ip->i_ino); + goto error0; + } + XFS_WANT_CORRUPTED_GOTO( + XFS_BMAP_SANITY_CHECK(mp, block, 0), + error0); + /* + * Read-ahead the next leaf block, if any. + */ + nextbno = INT_GET(block->bb_rightsib, ARCH_CONVERT); + if (nextbno != NULLFSBLOCK) + xfs_btree_reada_bufl(mp, nextbno, 1); + /* + * Copy records into the extent list. + */ + frp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, + block, 1, mp->m_bmap_dmxr[0]); + bcopy(frp, trp, num_recs * sizeof(*frp)); + if (exntf == XFS_EXTFMT_NOSTATE) { + /* + * Check all attribute bmap btree records and + * any "older" data bmap btree records for a + * set bit in the "extent flag" position. + */ + if (xfs_check_nostate_extents(trp, num_recs)) { + goto error0; + } + } + trp += num_recs; + i += num_recs; + xfs_trans_brelse(tp, bp); + bno = nextbno; + /* + * If we've reached the end, stop. + */ + if (bno == NULLFSBLOCK) + break; + if (error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, + XFS_BMAP_BTREE_REF)) + return error; + block = XFS_BUF_TO_BMBT_BLOCK(bp); + } + ASSERT(i == ifp->if_bytes / (uint)sizeof(*trp)); + ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork)); + xfs_bmap_trace_exlist(fname, ip, i, whichfork); + return 0; +error0: + xfs_trans_brelse(tp, bp); + return XFS_ERROR(EFSCORRUPTED); +} + +/* + * Map file blocks to filesystem blocks. + * File range is given by the bno/len pair. + * Adds blocks to file if a write ("flags & XFS_BMAPI_WRITE" set) + * into a hole or past eof. + * Only allocates blocks from a single allocation group, + * to avoid locking problems. + * The returned value in "firstblock" from the first call in a transaction + * must be remembered and presented to subsequent calls in "firstblock". + * An upper bound for the number of blocks to be allocated is supplied to + * the first call in "total"; if no allocation group has that many free + * blocks then the call will fail (return NULLFSBLOCK in "firstblock"). + */ +int /* error */ +xfs_bmapi( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *ip, /* incore inode */ + xfs_fileoff_t bno, /* starting file offs. mapped */ + xfs_filblks_t len, /* length to map in file */ + int flags, /* XFS_BMAPI_... */ + xfs_fsblock_t *firstblock, /* first allocated block + controls a.g. for allocs */ + xfs_extlen_t total, /* total blocks needed */ + xfs_bmbt_irec_t *mval, /* output: map values */ + int *nmap, /* i/o: mval size/count */ + xfs_bmap_free_t *flist) /* i/o: list extents to free */ +{ + xfs_fsblock_t abno; /* allocated block number */ + xfs_extlen_t alen; /* allocated extent length */ + xfs_fileoff_t aoff; /* allocated file offset */ + xfs_bmalloca_t bma; /* args for xfs_bmap_alloc */ + int contig; /* allocation must be one extent */ + xfs_btree_cur_t *cur; /* bmap btree cursor */ + char delay; /* this request is for delayed alloc */ + xfs_fileoff_t end; /* end of mapped file region */ + int eof; /* we've hit the end of extent list */ + xfs_bmbt_rec_t *ep; /* extent list entry pointer */ + int error; /* error return */ + char exact; /* don't do all of wasdelayed extent */ + xfs_bmbt_irec_t got; /* current extent list record */ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_extlen_t indlen; /* indirect blocks length */ + char inhole; /* current location is hole in file */ + xfs_extnum_t lastx; /* last useful extent number */ + int logflags; /* flags for transaction logging */ + xfs_extlen_t minleft; /* min blocks left after allocation */ + xfs_extlen_t minlen; /* min allocation size */ + xfs_mount_t *mp; /* xfs mount structure */ + int n; /* current extent index */ + int nallocs; /* number of extents alloc\'d */ + xfs_extnum_t nextents; /* number of extents in file */ + xfs_fileoff_t obno; /* old block number (offset) */ + xfs_bmbt_irec_t prev; /* previous extent list record */ + int stateless; /* ignore state flag set */ + int tmp_logflags; /* temp flags holder */ + char trim; /* output trimmed to match range */ + char userdata; /* allocating non-metadata */ + char wasdelay; /* old extent was delayed */ + int whichfork; /* data or attr fork */ + char wr; /* this is a write request */ + int rsvd; /* OK to allocate reserved blocks */ +#ifdef DEBUG + xfs_fileoff_t orig_bno; /* original block number value */ + int orig_flags; /* original flags arg value */ + xfs_filblks_t orig_len; /* original value of len arg */ + xfs_bmbt_irec_t *orig_mval; /* original value of mval */ + int orig_nmap; /* original value of *nmap */ + + orig_bno = bno; + orig_len = len; + orig_flags = flags; + orig_mval = mval; + orig_nmap = *nmap; +#endif + ASSERT(*nmap >= 1); + ASSERT(*nmap <= XFS_BMAP_MAX_NMAP || !(flags & XFS_BMAPI_WRITE)); + whichfork = (flags & XFS_BMAPI_ATTRFORK) ? + XFS_ATTR_FORK : XFS_DATA_FORK; + if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL) + return XFS_ERROR(EFSCORRUPTED); + mp = ip->i_mount; + if (XFS_FORCED_SHUTDOWN(mp)) + return XFS_ERROR(EIO); + ifp = XFS_IFORK_PTR(ip, whichfork); + ASSERT(ifp->if_ext_max == + XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); + if (wr = (flags & XFS_BMAPI_WRITE) != 0) + XFS_STATS_INC(xs_blk_mapw); + else + XFS_STATS_INC(xs_blk_mapr); + delay = (flags & XFS_BMAPI_DELAY) != 0; + trim = (flags & XFS_BMAPI_ENTIRE) == 0; + userdata = (flags & XFS_BMAPI_METADATA) == 0; + exact = (flags & XFS_BMAPI_EXACT) != 0; + rsvd = (flags & XFS_BMAPI_RSVBLOCKS) != 0; + contig = (flags & XFS_BMAPI_CONTIG) != 0; + /* + * stateless is used to combine extents which + * differ only due to the state of the extents. + * This technique is used from xfs_getbmap() + * when the caller does not wish to see the + * separation (which is the default). + * + * This technique is also used when writing a + * buffer which has been partially written, + * (usually by being flushed during a chunkread), + * to ensure one write takes place. This also + * prevents a change in the xfs inode extents at + * this time, intentionally. This change occurs + * on completion of the write operation, in + * xfs_strat_comp(), where the xfs_bmapi() call + * is transactioned, and the extents combined. + */ + stateless = (flags & XFS_BMAPI_IGSTATE) != 0; + if (stateless && wr) /* if writing unwritten space, no */ + wr = 0; /* allocations are allowed */ + ASSERT(wr || !delay); + logflags = 0; + nallocs = 0; + cur = NULL; + if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { + ASSERT(wr && tp); + if (error = xfs_bmap_local_to_extents(tp, ip, firstblock, total, + &logflags, whichfork)) + goto error0; + } + if (wr && *firstblock == NULLFSBLOCK) { + if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE) + minleft = INT_GET(ifp->if_broot->bb_level, ARCH_CONVERT) + 1; + else + minleft = 1; + } else + minleft = 0; + if (!(ifp->if_flags & XFS_IFEXTENTS) && + (error = xfs_iread_extents(tp, ip, whichfork))) + goto error0; + ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, + &prev); + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + n = 0; + end = bno + len; + obno = bno; + bma.ip = NULL; + while (bno < end && n < *nmap) { + /* + * Reading past eof, act as though there's a hole + * up to end. + */ + if (eof && !wr) + got.br_startoff = end; + inhole = eof || got.br_startoff > bno; + wasdelay = wr && !inhole && !delay && + ISNULLSTARTBLOCK(got.br_startblock); + /* + * First, deal with the hole before the allocated space + * that we found, if any. + */ + if (wr && (inhole || wasdelay)) { + /* + * For the wasdelay case, we could also just + * allocate the stuff asked for in this bmap call + * but that wouldn't be as good. + */ + if (wasdelay && !exact) { + alen = (xfs_extlen_t)got.br_blockcount; + aoff = got.br_startoff; + if (lastx != NULLEXTNUM && lastx) { + ep = &ifp->if_u1.if_extents[lastx - 1]; + xfs_bmbt_get_all(ep, &prev); + } + } else if (wasdelay) { + alen = (xfs_extlen_t) + XFS_FILBLKS_MIN(len, + (got.br_startoff + + got.br_blockcount) - bno); + aoff = bno; + } else { + alen = (xfs_extlen_t) + XFS_FILBLKS_MIN(len, MAXEXTLEN); + if (!eof) + alen = (xfs_extlen_t) + XFS_FILBLKS_MIN(alen, + got.br_startoff - bno); + aoff = bno; + } + minlen = contig ? alen : 1; + if (delay) { + indlen = (xfs_extlen_t) + xfs_bmap_worst_indlen(ip, alen); + ASSERT(indlen > 0); + /* + * Make a transaction-less quota reservation for + * delayed allocation blocks. This number gets + * adjusted later. + * We return EDQUOT if we haven't allocated + * blks already inside this loop; + */ + if (XFS_IS_QUOTA_ON(ip->i_mount) && + xfs_trans_reserve_blkquota(NULL, ip, + (long)alen)) { + if (n == 0) { + *nmap = 0; + ASSERT(cur == NULL); + return XFS_ERROR(EDQUOT); + } + break; + } + if (xfs_mod_incore_sb(ip->i_mount, + XFS_SBS_FDBLOCKS, + -(alen + indlen), rsvd)) { + if (XFS_IS_QUOTA_ON(ip->i_mount)) + xfs_trans_unreserve_blkquota( + NULL, ip, (long)alen); + break; + } + ip->i_delayed_blks += alen; + abno = NULLSTARTBLOCK(indlen); + } else { + /* + * If first time, allocate and fill in + * once-only bma fields. + */ + if (bma.ip == NULL) { + bma.tp = tp; + bma.ip = ip; + bma.prevp = &prev; + bma.gotp = &got; + bma.total = total; + bma.userdata = userdata; + } + /* + * Fill in changeable bma fields. + */ + bma.eof = eof; + bma.firstblock = *firstblock; + bma.alen = alen; + bma.off = aoff; + bma.wasdel = wasdelay; + bma.minlen = minlen; + bma.low = flist->xbf_low; + bma.minleft = minleft; + /* + * Only want to do the alignment at the + * eof if it is userdata and allocation length + * is larger than a stripe unit. + */ + if (mp->m_dalign && alen >= mp->m_dalign && + userdata && whichfork == XFS_DATA_FORK) { + if (error = xfs_bmap_isaeof(ip, aoff, + whichfork, &bma.aeof)) + goto error0; + } else + bma.aeof = 0; + /* + * Call allocator. + */ + if (error = xfs_bmap_alloc(&bma)) + goto error0; + /* + * Copy out result fields. + */ + abno = bma.rval; + if (flist->xbf_low = bma.low) + minleft = 0; + alen = bma.alen; + aoff = bma.off; + ASSERT(*firstblock == NULLFSBLOCK || + XFS_FSB_TO_AGNO(ip->i_mount, + *firstblock) == + XFS_FSB_TO_AGNO(ip->i_mount, + bma.firstblock) || + (flist->xbf_low && + XFS_FSB_TO_AGNO(ip->i_mount, + *firstblock) < + XFS_FSB_TO_AGNO(ip->i_mount, + bma.firstblock))); + *firstblock = bma.firstblock; + if (cur) + cur->bc_private.b.firstblock = + *firstblock; + if (abno == NULLFSBLOCK) + break; + if ((ifp->if_flags & XFS_IFBROOT) && !cur) { + cur = xfs_btree_init_cursor(ip->i_mount, + tp, NULL, 0, XFS_BTNUM_BMAP, + ip, whichfork); + cur->bc_private.b.firstblock = + *firstblock; + cur->bc_private.b.flist = flist; + } + /* + * Bump the number of extents we've allocated + * in this call. + */ + nallocs++; + } + if (cur) + cur->bc_private.b.flags = + wasdelay ? XFS_BTCUR_BPRV_WASDEL : 0; + got.br_startoff = aoff; + got.br_startblock = abno; + got.br_blockcount = alen; + got.br_state = XFS_EXT_NORM; /* assume normal */ + /* + * Determine state of extent, and the filesystem. + * A wasdelay extent has been initialized, so + * shouldn't be flagged as unwritten. + */ + if (wr && XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) { + if (!wasdelay && (flags & XFS_BMAPI_PREALLOC)) + got.br_state = XFS_EXT_UNWRITTEN; + } + error = xfs_bmap_add_extent(ip, lastx, &cur, &got, + firstblock, flist, &tmp_logflags, whichfork, + rsvd); + logflags |= tmp_logflags; + if (error) + goto error0; + lastx = ifp->if_lastex; + ep = &ifp->if_u1.if_extents[lastx]; + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + xfs_bmbt_get_all(ep, &got); + ASSERT(got.br_startoff <= aoff); + ASSERT(got.br_startoff + got.br_blockcount >= + aoff + alen); +#ifdef DEBUG + if (delay) { + ASSERT(ISNULLSTARTBLOCK(got.br_startblock)); + ASSERT(STARTBLOCKVAL(got.br_startblock) > 0); + } + ASSERT(got.br_state == XFS_EXT_NORM || + got.br_state == XFS_EXT_UNWRITTEN); +#endif + /* + * Fall down into the found allocated space case. + */ + } else if (inhole) { + /* + * Reading in a hole. + */ + mval->br_startoff = bno; + mval->br_startblock = HOLESTARTBLOCK; + mval->br_blockcount = + XFS_FILBLKS_MIN(len, got.br_startoff - bno); + mval->br_state = XFS_EXT_NORM; + bno += mval->br_blockcount; + len -= mval->br_blockcount; + mval++; + n++; + continue; + } + /* + * Then deal with the allocated space we found. + */ + ASSERT(ep != NULL); + if (trim && (got.br_startoff + got.br_blockcount > obno)) { + if (obno > bno) + bno = obno; + ASSERT((bno >= obno) || (n == 0)); + ASSERT(bno < end); + mval->br_startoff = bno; + if (ISNULLSTARTBLOCK(got.br_startblock)) { + ASSERT(!wr || delay); + mval->br_startblock = DELAYSTARTBLOCK; + } else + mval->br_startblock = + got.br_startblock + + (bno - got.br_startoff); + /* + * Return the minimum of what we got and what we + * asked for for the length. We can use the len + * variable here because it is modified below + * and we could have been there before coming + * here if the first part of the allocation + * didn't overlap what was asked for. + */ + mval->br_blockcount = + XFS_FILBLKS_MIN(end - bno, got.br_blockcount - + (bno - got.br_startoff)); + mval->br_state = got.br_state; + ASSERT(mval->br_blockcount <= len); + } else { + *mval = got; + if (ISNULLSTARTBLOCK(mval->br_startblock)) { + ASSERT(!wr || delay); + mval->br_startblock = DELAYSTARTBLOCK; + } + } + + /* + * Check if writing previously allocated but + * unwritten extents. + */ + if (wr && mval->br_state == XFS_EXT_UNWRITTEN && + ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_DELAY)) == 0)) { + /* + * Modify (by adding) the state flag, if writing. + */ + ASSERT(mval->br_blockcount <= len); + if ((ifp->if_flags & XFS_IFBROOT) && !cur) { + cur = xfs_btree_init_cursor(ip->i_mount, + tp, NULL, 0, XFS_BTNUM_BMAP, + ip, whichfork); + cur->bc_private.b.firstblock = + *firstblock; + cur->bc_private.b.flist = flist; + } + mval->br_state = XFS_EXT_NORM; + error = xfs_bmap_add_extent(ip, lastx, &cur, mval, + firstblock, flist, &tmp_logflags, whichfork, + rsvd); + logflags |= tmp_logflags; + if (error) + goto error0; + lastx = ifp->if_lastex; + ep = &ifp->if_u1.if_extents[lastx]; + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + xfs_bmbt_get_all(ep, &got); + /* + * We may have combined previously unwritten + * space with written space, so generate + * another request. + */ + if (mval->br_blockcount < len) + continue; + } + + ASSERT(!trim || + ((mval->br_startoff + mval->br_blockcount) <= end)); + ASSERT(!trim || (mval->br_blockcount <= len) || + (mval->br_startoff < obno)); + bno = mval->br_startoff + mval->br_blockcount; + len = end - bno; + if (n > 0 && mval->br_startoff == mval[-1].br_startoff) { + ASSERT(mval->br_startblock == mval[-1].br_startblock); + ASSERT(mval->br_blockcount > mval[-1].br_blockcount); + ASSERT(mval->br_state == mval[-1].br_state); + mval[-1].br_blockcount = mval->br_blockcount; + mval[-1].br_state = mval->br_state; + } else if (n > 0 && mval->br_startblock != DELAYSTARTBLOCK && + mval[-1].br_startblock != DELAYSTARTBLOCK && + mval[-1].br_startblock != HOLESTARTBLOCK && + mval->br_startblock == + mval[-1].br_startblock + mval[-1].br_blockcount && + (stateless || mval[-1].br_state == mval->br_state)) { + ASSERT(mval->br_startoff == + mval[-1].br_startoff + mval[-1].br_blockcount); + mval[-1].br_blockcount += mval->br_blockcount; + } else if (n > 0 && + mval->br_startblock == DELAYSTARTBLOCK && + mval[-1].br_startblock == DELAYSTARTBLOCK && + mval->br_startoff == + mval[-1].br_startoff + mval[-1].br_blockcount) { + mval[-1].br_blockcount += mval->br_blockcount; + mval[-1].br_state = mval->br_state; + } else if (!((n == 0) && + ((mval->br_startoff + mval->br_blockcount) <= + obno))) { + mval++; + n++; + } + /* + * If we're done, stop now. Stop when we've allocated + * XFS_BMAP_MAX_NMAP extents no matter what. Otherwise + * the transaction may get too big. + */ + if (bno >= end || n >= *nmap || nallocs >= *nmap) + break; + /* + * Else go on to the next record. + */ + ep++; + lastx++; + if (lastx >= nextents) { + eof = 1; + prev = got; + } else + xfs_bmbt_get_all(ep, &got); + } + ifp->if_lastex = lastx; + *nmap = n; + /* + * Transform from btree to extents, give it cur. + */ + if (tp && XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE && + XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max) { + ASSERT(wr && cur); + error = xfs_bmap_btree_to_extents(tp, ip, cur, + &tmp_logflags, whichfork, 0); + logflags |= tmp_logflags; + if (error) + goto error0; + } + ASSERT(ifp->if_ext_max == + XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); + ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE || + XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max); + error = 0; + +error0: + /* + * Log everything. Do this after conversion, there's no point in + * logging the extent list if we've converted to btree format. + */ + if ((logflags & XFS_ILOG_FEXT(whichfork)) && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) + logflags &= ~XFS_ILOG_FEXT(whichfork); + else if ((logflags & XFS_ILOG_FBROOT(whichfork)) && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) + logflags &= ~XFS_ILOG_FBROOT(whichfork); + /* + * Log whatever the flags say, even if error. Otherwise we might miss + * detecting a case where the data is changed, there's an error, + * and it's not logged so we don't shutdown when we should. + */ + if (logflags) { + ASSERT(tp && wr); + xfs_trans_log_inode(tp, ip, logflags); + } + if (cur) { + if (!error) { + ASSERT(*firstblock == NULLFSBLOCK || + XFS_FSB_TO_AGNO(ip->i_mount, *firstblock) == + XFS_FSB_TO_AGNO(ip->i_mount, + cur->bc_private.b.firstblock) || + (flist->xbf_low && + XFS_FSB_TO_AGNO(ip->i_mount, *firstblock) < + XFS_FSB_TO_AGNO(ip->i_mount, + cur->bc_private.b.firstblock))); + *firstblock = cur->bc_private.b.firstblock; + } + xfs_btree_del_cursor(cur, + error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + } + if (!error) + xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval, + orig_nmap, *nmap); + return error; +} + +/* + * Map file blocks to filesystem blocks, simple version. + * One block (extent) only, read-only. + * For flags, only the XFS_BMAPI_ATTRFORK flag is examined. + * For the other flag values, the effect is as if XFS_BMAPI_METADATA + * was set and all the others were clear. + */ +int /* error */ +xfs_bmapi_single( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *ip, /* incore inode */ + int whichfork, /* data or attr fork */ + xfs_fsblock_t *fsb, /* output: mapped block */ + xfs_fileoff_t bno) /* starting file offs. mapped */ +{ + int eof; /* we've hit the end of extent list */ + int error; /* error return */ + xfs_bmbt_irec_t got; /* current extent list record */ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_extnum_t lastx; /* last useful extent number */ + xfs_bmbt_irec_t prev; /* previous extent list record */ + + ifp = XFS_IFORK_PTR(ip, whichfork); + if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) + return XFS_ERROR(EFSCORRUPTED); + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + return XFS_ERROR(EIO); + XFS_STATS_INC(xs_blk_mapr); + if (!(ifp->if_flags & XFS_IFEXTENTS) && + (error = xfs_iread_extents(tp, ip, whichfork))) + return error; + (void)xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, + &prev); + /* + * Reading past eof, act as though there's a hole + * up to end. + */ + if (eof || got.br_startoff > bno) { + *fsb = NULLFSBLOCK; + return 0; + } + ASSERT(!ISNULLSTARTBLOCK(got.br_startblock)); + ASSERT(bno < got.br_startoff + got.br_blockcount); + *fsb = got.br_startblock + (bno - got.br_startoff); + ifp->if_lastex = lastx; + return 0; +} + +/* + * Unmap (remove) blocks from a file. + * If nexts is nonzero then the number of extents to remove is limited to + * that value. If not all extents in the block range can be removed then + * *done is set. + */ +int /* error */ +xfs_bunmapi( + xfs_trans_t *tp, /* transaction pointer */ + struct xfs_inode *ip, /* incore inode */ + xfs_fileoff_t bno, /* starting offset to unmap */ + xfs_filblks_t len, /* length to unmap in file */ + int flags, /* misc flags */ + xfs_extnum_t nexts, /* number of extents max */ + xfs_fsblock_t *firstblock, /* first allocated block + controls a.g. for allocs */ + xfs_bmap_free_t *flist, /* i/o: list extents to free */ + int *done) /* set if not done yet */ +{ + int async; /* xactions can be async */ + xfs_btree_cur_t *cur; /* bmap btree cursor */ + xfs_bmbt_irec_t del; /* extent being deleted */ + int eof; /* is deleting at eof */ + xfs_bmbt_rec_t *ep; /* extent list entry pointer */ + int error; /* error return value */ + xfs_extnum_t extno; /* extent number in list */ + xfs_bmbt_irec_t got; /* current extent list entry */ + xfs_ifork_t *ifp; /* inode fork pointer */ + int isrt; /* freeing in rt area */ + xfs_extnum_t lastx; /* last extent index used */ + int logflags; /* transaction logging flags */ + xfs_extlen_t mod; /* rt extent offset */ + xfs_mount_t *mp; /* mount structure */ + xfs_extnum_t nextents; /* size of extent list */ + xfs_bmbt_irec_t prev; /* previous extent list entry */ + xfs_fileoff_t start; /* first file offset deleted */ + int tmp_logflags; /* partial logging flags */ + int wasdel; /* was a delayed alloc extent */ + int whichfork; /* data or attribute fork */ + int rsvd; /* OK to allocate reserved blocks */ + xfs_fsblock_t sum; + + xfs_bunmap_trace(ip, bno, len, flags, (inst_t *)__return_address); + whichfork = (flags & XFS_BMAPI_ATTRFORK) ? + XFS_ATTR_FORK : XFS_DATA_FORK; + ifp = XFS_IFORK_PTR(ip, whichfork); + if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) + return XFS_ERROR(EFSCORRUPTED); + mp = ip->i_mount; + if (XFS_FORCED_SHUTDOWN(mp)) + return XFS_ERROR(EIO); + async = flags & XFS_BMAPI_ASYNC; + rsvd = (flags & XFS_BMAPI_RSVBLOCKS) != 0; + ASSERT(len > 0); + ASSERT(nexts >= 0); + ASSERT(ifp->if_ext_max == + XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); + if (!(ifp->if_flags & XFS_IFEXTENTS) && + (error = xfs_iread_extents(tp, ip, whichfork))) + return error; + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + if (nextents == 0) { + *done = 1; + return 0; + } + XFS_STATS_INC(xs_blk_unmap); + isrt = (whichfork == XFS_DATA_FORK) && + (ip->i_d.di_flags & XFS_DIFLAG_REALTIME); + start = bno; + bno = start + len - 1; + ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, + &prev); + /* + * Check to see if the given block number is past the end of the + * file, back up to the last block if so... + */ + if (eof) { + ep = &ifp->if_u1.if_extents[--lastx]; + xfs_bmbt_get_all(ep, &got); + bno = got.br_startoff + got.br_blockcount - 1; + } + logflags = 0; + if (ifp->if_flags & XFS_IFBROOT) { + ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); + cur = xfs_btree_init_cursor(mp, tp, NULL, 0, XFS_BTNUM_BMAP, ip, + whichfork); + cur->bc_private.b.firstblock = *firstblock; + cur->bc_private.b.flist = flist; + cur->bc_private.b.flags = 0; + } else + cur = NULL; + extno = 0; + while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 && + (nexts == 0 || extno < nexts)) { + /* + * Is the found extent after a hole in which bno lives? + * Just back up to the previous extent, if so. + */ + if (got.br_startoff > bno) { + if (--lastx < 0) + break; + ep--; + xfs_bmbt_get_all(ep, &got); + } + /* + * Is the last block of this extent before the range + * we're supposed to delete? If so, we're done. + */ + bno = XFS_FILEOFF_MIN(bno, + got.br_startoff + got.br_blockcount - 1); + if (bno < start) + break; + /* + * Then deal with the (possibly delayed) allocated space + * we found. + */ + ASSERT(ep != NULL); + del = got; + wasdel = ISNULLSTARTBLOCK(del.br_startblock); + if (got.br_startoff < start) { + del.br_startoff = start; + del.br_blockcount -= start - got.br_startoff; + if (!wasdel) + del.br_startblock += start - got.br_startoff; + } + if (del.br_startoff + del.br_blockcount > bno + 1) + del.br_blockcount = bno + 1 - del.br_startoff; + sum = del.br_startblock + del.br_blockcount; + if (isrt && + (mod = do_mod(sum, mp->m_sb.sb_rextsize))) { + /* + * Realtime extent not lined up at the end. + * The extent could have been split into written + * and unwritten pieces, or we could just be + * unmapping part of it. But we can't really + * get rid of part of a realtime extent. + */ + if (del.br_state == XFS_EXT_UNWRITTEN || + !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) { + /* + * This piece is unwritten, or we're not + * using unwritten extents. Skip over it. + */ + ASSERT(bno >= mod); + bno -= mod > del.br_blockcount ? + del.br_blockcount : mod; + if (bno < got.br_startoff) { + if (--lastx >= 0) + xfs_bmbt_get_all(--ep, &got); + } + continue; + } + /* + * It's written, turn it unwritten. + * This is better than zeroing it. + */ + ASSERT(del.br_state == XFS_EXT_NORM); + ASSERT(xfs_trans_get_block_res(tp) > 0); + /* + * If this spans a realtime extent boundary, + * chop it back to the start of the one we end at. + */ + if (del.br_blockcount > mod) { + del.br_startoff += del.br_blockcount - mod; + del.br_startblock += del.br_blockcount - mod; + del.br_blockcount = mod; + } + del.br_state = XFS_EXT_UNWRITTEN; + error = xfs_bmap_add_extent(ip, lastx, &cur, &del, + firstblock, flist, &logflags, XFS_DATA_FORK, 0); + if (error) + goto error0; + goto nodelete; + } + if (isrt && (mod = do_mod(del.br_startblock, mp->m_sb.sb_rextsize))) { + /* + * Realtime extent is lined up at the end but not + * at the front. We'll get rid of full extents if + * we can. + */ + mod = mp->m_sb.sb_rextsize - mod; + if (del.br_blockcount > mod) { + del.br_blockcount -= mod; + del.br_startoff += mod; + del.br_startblock += mod; + } else if ((del.br_startoff == start && + (del.br_state == XFS_EXT_UNWRITTEN || + xfs_trans_get_block_res(tp) == 0)) || + !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) { + /* + * Can't make it unwritten. There isn't + * a full extent here so just skip it. + */ + ASSERT(bno >= del.br_blockcount); + bno -= del.br_blockcount; + if (bno < got.br_startoff) { + if (--lastx >= 0) + xfs_bmbt_get_all(--ep, &got); + } + continue; + } else if (del.br_state == XFS_EXT_UNWRITTEN) { + /* + * This one is already unwritten. + * It must have a written left neighbor. + * Unwrite the killed part of that one and + * try again. + */ + ASSERT(lastx > 0); + xfs_bmbt_get_all(ep - 1, &prev); + ASSERT(prev.br_state == XFS_EXT_NORM); + ASSERT(!ISNULLSTARTBLOCK(prev.br_startblock)); + ASSERT(del.br_startblock == + prev.br_startblock + prev.br_blockcount); + if (prev.br_startoff < start) { + mod = start - prev.br_startoff; + prev.br_blockcount -= mod; + prev.br_startblock += mod; + prev.br_startoff = start; + } + prev.br_state = XFS_EXT_UNWRITTEN; + error = xfs_bmap_add_extent(ip, lastx - 1, &cur, + &prev, firstblock, flist, &logflags, + XFS_DATA_FORK, 0); + if (error) + goto error0; + goto nodelete; + } else { + ASSERT(del.br_state == XFS_EXT_NORM); + del.br_state = XFS_EXT_UNWRITTEN; + error = xfs_bmap_add_extent(ip, lastx, &cur, + &del, firstblock, flist, &logflags, + XFS_DATA_FORK, 0); + if (error) + goto error0; + goto nodelete; + } + } + if (wasdel) { + ASSERT(STARTBLOCKVAL(del.br_startblock) > 0); + xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, + (int)del.br_blockcount, rsvd); + if (XFS_IS_QUOTA_ON(ip->i_mount)) { + ASSERT(ip->i_ino != mp->m_sb.sb_uquotino); + ASSERT(ip->i_ino != mp->m_sb.sb_pquotino); + if (!isrt) + xfs_trans_unreserve_blkquota(NULL, ip, + (long)del.br_blockcount); + else + xfs_trans_unreserve_rtblkquota(NULL, ip, + (long)del.br_blockcount); + } + ip->i_delayed_blks -= del.br_blockcount; + if (cur) + cur->bc_private.b.flags |= + XFS_BTCUR_BPRV_WASDEL; + } else if (cur) + cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL; + /* + * If it's the case where the directory code is running + * with no block reservation, and the deleted block is in + * the middle of its extent, and the resulting insert + * of an extent would cause transformation to btree format, + * then reject it. The calling code will then swap + * blocks around instead. + * We have to do this now, rather than waiting for the + * conversion to btree format, since the transaction + * will be dirty. + */ + if (!wasdel && xfs_trans_get_block_res(tp) == 0 && + XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && + XFS_IFORK_NEXTENTS(ip, whichfork) >= ifp->if_ext_max && + del.br_startoff > got.br_startoff && + del.br_startoff + del.br_blockcount < + got.br_startoff + got.br_blockcount) { + error = XFS_ERROR(ENOSPC); + goto error0; + } + error = xfs_bmap_del_extent(ip, tp, lastx, flist, cur, &del, + flags, &tmp_logflags, whichfork, rsvd); + logflags |= tmp_logflags; + if (error) + goto error0; + bno = del.br_startoff - 1; +nodelete: + lastx = ifp->if_lastex; + /* + * If not done go on to the next (previous) record. + * Reset ep in case the extents array was re-alloced. + */ + ep = &ifp->if_u1.if_extents[lastx]; + if (bno != (xfs_fileoff_t)-1 && bno >= start) { + if (lastx >= XFS_IFORK_NEXTENTS(ip, whichfork) || + xfs_bmbt_get_startoff(ep) > bno) { + lastx--; + ep--; + } + if (lastx >= 0) + xfs_bmbt_get_all(ep, &got); + extno++; + } + } + ifp->if_lastex = lastx; + *done = bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0; + ASSERT(ifp->if_ext_max == + XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); + /* + * Convert to a btree if necessary. + */ + if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && + XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max) { + ASSERT(cur == NULL); + error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist, + &cur, 0, &tmp_logflags, whichfork); + logflags |= tmp_logflags; + if (error) + goto error0; + } + /* + * transform from btree to extents, give it cur + */ + else if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE && + XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max) { + ASSERT(cur != NULL); + error = xfs_bmap_btree_to_extents(tp, ip, cur, &tmp_logflags, + whichfork, async); + logflags |= tmp_logflags; + if (error) + goto error0; + } + /* + * transform from extents to local? + */ + ASSERT(ifp->if_ext_max == + XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); + error = 0; +error0: + /* + * Log everything. Do this after conversion, there's no point in + * logging the extent list if we've converted to btree format. + */ + if ((logflags & XFS_ILOG_FEXT(whichfork)) && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) + logflags &= ~XFS_ILOG_FEXT(whichfork); + else if ((logflags & XFS_ILOG_FBROOT(whichfork)) && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) + logflags &= ~XFS_ILOG_FBROOT(whichfork); + /* + * Log inode even in the error case, if the transaction + * is dirty we'll need to shut down the filesystem. + */ + if (logflags) + xfs_trans_log_inode(tp, ip, logflags); + if (cur) { + if (!error) { + *firstblock = cur->bc_private.b.firstblock; + cur->bc_private.b.allocated = 0; + } + xfs_btree_del_cursor(cur, + error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + } + return error; +} + +/* + * Check the last inode extent to determine whether this allocation will result + * in blocks being allocated at the end of the file. When we allocate new data + * blocks at the end of the file which do not start at the previous data block, + * we will try to align the new blocks at stripe unit boundaries. + */ +int /* error */ +xfs_bmap_isaeof( + xfs_inode_t *ip, /* incore inode pointer */ + xfs_fileoff_t off, /* file offset in fsblocks */ + int whichfork, /* data or attribute fork */ + int *aeof) /* return value */ +{ + int error; /* error return value */ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_bmbt_rec_t *lastrec; /* extent list entry pointer */ + xfs_extnum_t nextents; /* size of extent list */ + xfs_bmbt_irec_t s; /* expanded extent list entry */ + + ASSERT(whichfork == XFS_DATA_FORK); + ifp = XFS_IFORK_PTR(ip, whichfork); + if (!(ifp->if_flags & XFS_IFEXTENTS) && + (error = xfs_iread_extents(NULL, ip, whichfork))) + return error; + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + if (nextents == 0) { + *aeof = 1; + return 0; + } + /* + * Go to the last extent + */ + lastrec = &ifp->if_u1.if_extents[nextents - 1]; + xfs_bmbt_get_all(lastrec, &s); + /* + * Check we are allocating in the last extent (for delayed allocations) + * or past the last extent for non-delayed allocations. + */ + *aeof = (off >= s.br_startoff && + off < s.br_startoff + s.br_blockcount && + ISNULLSTARTBLOCK(s.br_startblock)) || + off >= s.br_startoff + s.br_blockcount; + return 0; +} diff --git a/libxfs/xfs_bmap_btree.c b/libxfs/xfs_bmap_btree.c new file mode 100644 index 000000000..a221892e4 --- /dev/null +++ b/libxfs/xfs_bmap_btree.c @@ -0,0 +1,2528 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include + +/* + * Delete record pointed to by cur/level. + */ +STATIC int /* error */ +xfs_bmbt_delrec( + xfs_btree_cur_t *cur, + int level, + int async, /* deletion can be async */ + int *stat) /* success/failure */ +{ + xfs_bmbt_block_t *block; /* bmap btree block */ + xfs_fsblock_t bno; /* fs-relative block number */ + xfs_buf_t *bp; /* buffer for block */ + int error; /* error return value */ +#ifdef XFS_BMBT_TRACE + static char fname[] = "xfs_bmbt_delrec"; +#endif + int i; /* loop counter */ + int j; /* temp state */ + xfs_bmbt_key_t key; /* bmap btree key */ + xfs_bmbt_key_t *kp; /* pointer to bmap btree key */ + xfs_fsblock_t lbno; /* left sibling block number */ + xfs_buf_t *lbp; /* left buffer pointer */ + xfs_bmbt_block_t *left; /* left btree block */ + xfs_bmbt_key_t *lkp; /* left btree key */ + xfs_bmbt_ptr_t *lpp; /* left address pointer */ + int lrecs; /* left record count */ + xfs_bmbt_rec_t *lrp; /* left record pointer */ + xfs_mount_t *mp; /* file system mount point */ + xfs_bmbt_ptr_t *pp; /* pointer to bmap block addr */ + int ptr; /* key/record index */ + xfs_fsblock_t rbno; /* right sibling block number */ + xfs_buf_t *rbp; /* right buffer pointer */ + xfs_bmbt_block_t *right; /* right btree block */ + xfs_bmbt_key_t *rkp; /* right btree key */ + xfs_bmbt_rec_t *rp; /* pointer to bmap btree rec */ + xfs_bmbt_ptr_t *rpp; /* right address pointer */ + xfs_bmbt_block_t *rrblock; /* right-right btree block */ + xfs_buf_t *rrbp; /* right-right buffer pointer */ + int rrecs; /* right record count */ + xfs_bmbt_rec_t *rrp; /* right record pointer */ + xfs_btree_cur_t *tcur; /* temporary btree cursor */ + + XFS_BMBT_TRACE_CURSOR(cur, ENTRY); + XFS_BMBT_TRACE_ARGI(cur, level); + ptr = cur->bc_ptrs[level]; + tcur = (xfs_btree_cur_t *)0; + if (ptr == 0) { + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = 0; + return 0; + } + block = xfs_bmbt_get_block(cur, level, &bp); +#ifdef DEBUG + if (error = xfs_btree_check_lblock(cur, block, level, bp)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + goto error0; + } +#endif + if (ptr > INT_GET(block->bb_numrecs, ARCH_CONVERT)) { + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = 0; + return 0; + } + XFS_STATS_INC(xs_bmbt_delrec); + if (level > 0) { + kp = XFS_BMAP_KEY_IADDR(block, 1, cur); + pp = XFS_BMAP_PTR_IADDR(block, 1, cur); +#ifdef DEBUG + for (i = ptr; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++) { + if (error = xfs_btree_check_lptr(cur, INT_GET(pp[i], ARCH_CONVERT), level)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + goto error0; + } + } +#endif + if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) { + ovbcopy(&kp[ptr], &kp[ptr - 1], + (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*kp)); + ovbcopy(&pp[ptr], &pp[ptr - 1], /* INT_: direct copy */ + (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*pp)); + xfs_bmbt_log_ptrs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1); + xfs_bmbt_log_keys(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1); + } + } else { + rp = XFS_BMAP_REC_IADDR(block, 1, cur); + if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) { + ovbcopy(&rp[ptr], &rp[ptr - 1], + (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*rp)); + xfs_bmbt_log_recs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1); + } + if (ptr == 1) { + INT_SET(key.br_startoff, ARCH_CONVERT, xfs_bmbt_get_startoff(rp)); + kp = &key; + } + } + INT_MOD(block->bb_numrecs, ARCH_CONVERT, -1); + xfs_bmbt_log_block(cur, bp, XFS_BB_NUMRECS); + /* + * We're at the root level. + * First, shrink the root block in-memory. + * Try to get rid of the next level down. + * If we can't then there's nothing left to do. + */ + if (level == cur->bc_nlevels - 1) { + xfs_iroot_realloc(cur->bc_private.b.ip, -1, + cur->bc_private.b.whichfork); + if (error = xfs_bmbt_killroot(cur, async)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + goto error0; + } + if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &j))) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + goto error0; + } + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = 1; + return 0; + } + if (ptr == 1 && (error = xfs_bmbt_updkey(cur, kp, level + 1))) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + goto error0; + } + if (INT_GET(block->bb_numrecs, ARCH_CONVERT) >= XFS_BMAP_BLOCK_IMINRECS(level, cur)) { + if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &j))) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + goto error0; + } + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = 1; + return 0; + } + rbno = INT_GET(block->bb_rightsib, ARCH_CONVERT); + lbno = INT_GET(block->bb_leftsib, ARCH_CONVERT); + /* + * One child of root, need to get a chance to copy its contents + * into the root and delete it. Can't go up to next level, + * there's nothing to delete there. + */ + if (lbno == NULLFSBLOCK && rbno == NULLFSBLOCK && + level == cur->bc_nlevels - 2) { + if (error = xfs_bmbt_killroot(cur, async)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + goto error0; + } + if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &i))) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + goto error0; + } + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = 1; + return 0; + } + ASSERT(rbno != NULLFSBLOCK || lbno != NULLFSBLOCK); + if (error = xfs_btree_dup_cursor(cur, &tcur)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + goto error0; + } + bno = NULLFSBLOCK; + if (rbno != NULLFSBLOCK) { + i = xfs_btree_lastrec(tcur, level); + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if (error = xfs_bmbt_increment(tcur, level, &i)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + goto error0; + } + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + i = xfs_btree_lastrec(tcur, level); + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + rbp = tcur->bc_bufs[level]; + right = XFS_BUF_TO_BMBT_BLOCK(rbp); +#ifdef DEBUG + if (error = xfs_btree_check_lblock(cur, right, level, rbp)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + goto error0; + } +#endif + bno = INT_GET(right->bb_leftsib, ARCH_CONVERT); + if (INT_GET(right->bb_numrecs, ARCH_CONVERT) - 1 >= + XFS_BMAP_BLOCK_IMINRECS(level, cur)) { + if (error = xfs_bmbt_lshift(tcur, level, &i)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + goto error0; + } + if (i) { + ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) >= + XFS_BMAP_BLOCK_IMINRECS(level, tcur)); + xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); + tcur = NULL; + if (level > 0) { + if (error = xfs_bmbt_decrement(cur, + level, &i)) { + XFS_BMBT_TRACE_CURSOR(cur, + ERROR); + goto error0; + } + } + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = 1; + return 0; + } + } + rrecs = INT_GET(right->bb_numrecs, ARCH_CONVERT); + if (lbno != NULLFSBLOCK) { + i = xfs_btree_firstrec(tcur, level); + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if (error = xfs_bmbt_decrement(tcur, level, &i)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + goto error0; + } + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + } + } + if (lbno != NULLFSBLOCK) { + i = xfs_btree_firstrec(tcur, level); + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + /* + * decrement to last in block + */ + if (error = xfs_bmbt_decrement(tcur, level, &i)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + goto error0; + } + i = xfs_btree_firstrec(tcur, level); + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + lbp = tcur->bc_bufs[level]; + left = XFS_BUF_TO_BMBT_BLOCK(lbp); +#ifdef DEBUG + if (error = xfs_btree_check_lblock(cur, left, level, lbp)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + goto error0; + } +#endif + bno = INT_GET(left->bb_rightsib, ARCH_CONVERT); + if (INT_GET(left->bb_numrecs, ARCH_CONVERT) - 1 >= + XFS_BMAP_BLOCK_IMINRECS(level, cur)) { + if (error = xfs_bmbt_rshift(tcur, level, &i)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + goto error0; + } + if (i) { + ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) >= + XFS_BMAP_BLOCK_IMINRECS(level, tcur)); + xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); + tcur = NULL; + if (level == 0) + cur->bc_ptrs[0]++; + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = 1; + return 0; + } + } + lrecs = INT_GET(left->bb_numrecs, ARCH_CONVERT); + } + xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); + tcur = NULL; + mp = cur->bc_mp; + ASSERT(bno != NULLFSBLOCK); + if (lbno != NULLFSBLOCK && + lrecs + INT_GET(block->bb_numrecs, ARCH_CONVERT) <= XFS_BMAP_BLOCK_IMAXRECS(level, cur)) { + rbno = bno; + right = block; + rbp = bp; + if (error = xfs_btree_read_bufl(mp, cur->bc_tp, lbno, 0, &lbp, + XFS_BMAP_BTREE_REF)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + goto error0; + } + left = XFS_BUF_TO_BMBT_BLOCK(lbp); + if (error = xfs_btree_check_lblock(cur, left, level, lbp)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + goto error0; + } + } else if (rbno != NULLFSBLOCK && + rrecs + INT_GET(block->bb_numrecs, ARCH_CONVERT) <= + XFS_BMAP_BLOCK_IMAXRECS(level, cur)) { + lbno = bno; + left = block; + lbp = bp; + if (error = xfs_btree_read_bufl(mp, cur->bc_tp, rbno, 0, &rbp, + XFS_BMAP_BTREE_REF)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + goto error0; + } + right = XFS_BUF_TO_BMBT_BLOCK(rbp); + if (error = xfs_btree_check_lblock(cur, right, level, rbp)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + goto error0; + } + lrecs = INT_GET(left->bb_numrecs, ARCH_CONVERT); + } else { + if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &i))) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + goto error0; + } + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = 1; + return 0; + } + if (level > 0) { + lkp = XFS_BMAP_KEY_IADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur); + lpp = XFS_BMAP_PTR_IADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur); + rkp = XFS_BMAP_KEY_IADDR(right, 1, cur); + rpp = XFS_BMAP_PTR_IADDR(right, 1, cur); +#ifdef DEBUG + for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) { + if (error = xfs_btree_check_lptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + goto error0; + } + } +#endif + bcopy(rkp, lkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lkp)); + bcopy(rpp, lpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lpp)); + xfs_bmbt_log_keys(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, + INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT)); + xfs_bmbt_log_ptrs(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, + INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT)); + } else { + lrp = XFS_BMAP_REC_IADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur); + rrp = XFS_BMAP_REC_IADDR(right, 1, cur); + bcopy(rrp, lrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lrp)); + xfs_bmbt_log_recs(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, + INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT)); + } + INT_MOD(left->bb_numrecs, ARCH_CONVERT, INT_GET(right->bb_numrecs, ARCH_CONVERT)); + left->bb_rightsib = right->bb_rightsib; /* INT_: direct copy */ + xfs_bmbt_log_block(cur, lbp, XFS_BB_RIGHTSIB | XFS_BB_NUMRECS); + if (INT_GET(left->bb_rightsib, ARCH_CONVERT) != NULLDFSBNO) { + if (error = xfs_btree_read_bufl(mp, cur->bc_tp, + INT_GET(left->bb_rightsib, ARCH_CONVERT), 0, &rrbp, + XFS_BMAP_BTREE_REF)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + goto error0; + } + rrblock = XFS_BUF_TO_BMBT_BLOCK(rrbp); + if (error = xfs_btree_check_lblock(cur, rrblock, level, rrbp)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + goto error0; + } + INT_SET(rrblock->bb_leftsib, ARCH_CONVERT, lbno); + xfs_bmbt_log_block(cur, rrbp, XFS_BB_LEFTSIB); + } + xfs_bmap_add_free(XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(rbp)), 1, + cur->bc_private.b.flist, mp); + if (!async) + xfs_trans_set_sync(cur->bc_tp); + cur->bc_private.b.ip->i_d.di_nblocks--; + xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, XFS_ILOG_CORE); + if (XFS_IS_QUOTA_ON(mp) && + cur->bc_private.b.ip->i_ino != mp->m_sb.sb_uquotino && + cur->bc_private.b.ip->i_ino != mp->m_sb.sb_pquotino) + xfs_trans_mod_dquot_byino(cur->bc_tp, cur->bc_private.b.ip, + XFS_TRANS_DQ_BCOUNT, -1L); + xfs_trans_binval(cur->bc_tp, rbp); + if (bp != lbp) { + cur->bc_bufs[level] = lbp; + cur->bc_ptrs[level] += lrecs; + cur->bc_ra[level] = 0; + } else if (error = xfs_bmbt_increment(cur, level + 1, &i)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + goto error0; + } + if (level > 0) + cur->bc_ptrs[level]--; + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = 2; + return 0; + +error0: + if (tcur) + xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); + return error; +} + +/* + * Insert one record/level. Return information to the caller + * allowing the next level up to proceed if necessary. + */ +STATIC int /* error */ +xfs_bmbt_insrec( + xfs_btree_cur_t *cur, + int level, + xfs_fsblock_t *bnop, + xfs_bmbt_rec_t *recp, + xfs_btree_cur_t **curp, + int *stat) /* no-go/done/continue */ +{ + xfs_bmbt_block_t *block; /* bmap btree block */ + xfs_buf_t *bp; /* buffer for block */ + int error; /* error return value */ +#ifdef XFS_BMBT_TRACE + static char fname[] = "xfs_bmbt_insrec"; +#endif + int i; /* loop index */ + xfs_bmbt_key_t key; /* bmap btree key */ + xfs_bmbt_key_t *kp; /* pointer to bmap btree key */ + int logflags; /* inode logging flags */ + xfs_fsblock_t nbno; /* new block number */ + struct xfs_btree_cur *ncur; /* new btree cursor */ + xfs_bmbt_key_t nkey; /* new btree key value */ + xfs_bmbt_rec_t nrec; /* new record count */ + int optr; /* old key/record index */ + xfs_bmbt_ptr_t *pp; /* pointer to bmap block addr */ + int ptr; /* key/record index */ + xfs_bmbt_rec_t *rp; /* pointer to bmap btree rec */ + + ASSERT(level < cur->bc_nlevels); + XFS_BMBT_TRACE_CURSOR(cur, ENTRY); + XFS_BMBT_TRACE_ARGIFR(cur, level, *bnop, recp); + ncur = (xfs_btree_cur_t *)0; + INT_SET(key.br_startoff, ARCH_CONVERT, xfs_bmbt_get_startoff(recp)); + optr = ptr = cur->bc_ptrs[level]; + if (ptr == 0) { + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = 0; + return 0; + } + XFS_STATS_INC(xs_bmbt_insrec); + block = xfs_bmbt_get_block(cur, level, &bp); +#ifdef DEBUG + if (error = xfs_btree_check_lblock(cur, block, level, bp)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } + if (ptr <= INT_GET(block->bb_numrecs, ARCH_CONVERT)) { + if (level == 0) { + rp = XFS_BMAP_REC_IADDR(block, ptr, cur); + xfs_btree_check_rec(XFS_BTNUM_BMAP, recp, rp); + } else { + kp = XFS_BMAP_KEY_IADDR(block, ptr, cur); + xfs_btree_check_key(XFS_BTNUM_BMAP, &key, kp); + } + } +#endif + nbno = NULLFSBLOCK; + if (INT_GET(block->bb_numrecs, ARCH_CONVERT) == XFS_BMAP_BLOCK_IMAXRECS(level, cur)) { + if (INT_GET(block->bb_numrecs, ARCH_CONVERT) < XFS_BMAP_BLOCK_DMAXRECS(level, cur)) { + /* + * A root block, that can be made bigger. + */ + xfs_iroot_realloc(cur->bc_private.b.ip, 1, + cur->bc_private.b.whichfork); + block = xfs_bmbt_get_block(cur, level, &bp); + } else if (level == cur->bc_nlevels - 1) { + if ((error = xfs_bmbt_newroot(cur, &logflags, stat)) || + *stat == 0) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } + xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, + logflags); + block = xfs_bmbt_get_block(cur, level, &bp); + } else { + if (error = xfs_bmbt_rshift(cur, level, &i)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } + if (i) { + /* nothing */ + } else { + if (error = xfs_bmbt_lshift(cur, level, &i)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } + if (i) { + optr = ptr = cur->bc_ptrs[level]; + } else { + if (error = xfs_bmbt_split(cur, level, + &nbno, &nkey, &ncur, + &i)) { + XFS_BMBT_TRACE_CURSOR(cur, + ERROR); + return error; + } + if (i) { + block = xfs_bmbt_get_block( + cur, level, &bp); +#ifdef DEBUG + if (error = + xfs_btree_check_lblock(cur, + block, level, bp)) { + XFS_BMBT_TRACE_CURSOR( + cur, ERROR); + return error; + } +#endif + ptr = cur->bc_ptrs[level]; + xfs_bmbt_set_allf(&nrec, + nkey.br_startoff, 0, 0, + XFS_EXT_NORM); + } else { + XFS_BMBT_TRACE_CURSOR(cur, + EXIT); + *stat = 0; + return 0; + } + } + } + } + } + if (level > 0) { + kp = XFS_BMAP_KEY_IADDR(block, 1, cur); + pp = XFS_BMAP_PTR_IADDR(block, 1, cur); +#ifdef DEBUG + for (i = INT_GET(block->bb_numrecs, ARCH_CONVERT); i >= ptr; i--) { + if (error = xfs_btree_check_lptr(cur, INT_GET(pp[i - 1], ARCH_CONVERT), + level)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } + } +#endif + ovbcopy(&kp[ptr - 1], &kp[ptr], + (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*kp)); + ovbcopy(&pp[ptr - 1], &pp[ptr], /* INT_: direct copy */ + (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*pp)); +#ifdef DEBUG + if (error = xfs_btree_check_lptr(cur, (xfs_bmbt_ptr_t)*bnop, + level)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } +#endif + kp[ptr - 1] = key; + INT_SET(pp[ptr - 1], ARCH_CONVERT, *bnop); + INT_MOD(block->bb_numrecs, ARCH_CONVERT, +1); + xfs_bmbt_log_keys(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT)); + xfs_bmbt_log_ptrs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT)); + } else { + rp = XFS_BMAP_REC_IADDR(block, 1, cur); + ovbcopy(&rp[ptr - 1], &rp[ptr], + (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*rp)); + rp[ptr - 1] = *recp; + INT_MOD(block->bb_numrecs, ARCH_CONVERT, +1); + xfs_bmbt_log_recs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT)); + } + xfs_bmbt_log_block(cur, bp, XFS_BB_NUMRECS); +#ifdef DEBUG + if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) { + if (level == 0) + xfs_btree_check_rec(XFS_BTNUM_BMAP, rp + ptr - 1, + rp + ptr); + else + xfs_btree_check_key(XFS_BTNUM_BMAP, kp + ptr - 1, + kp + ptr); + } +#endif + if (optr == 1 && (error = xfs_bmbt_updkey(cur, &key, level + 1))) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } + *bnop = nbno; + if (nbno != NULLFSBLOCK) { + *recp = nrec; + *curp = ncur; + } + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = 1; + return 0; +} + +STATIC int +xfs_bmbt_killroot( + xfs_btree_cur_t *cur, + int async) +{ + xfs_bmbt_block_t *block; + xfs_bmbt_block_t *cblock; + xfs_buf_t *cbp; + xfs_bmbt_key_t *ckp; + xfs_bmbt_ptr_t *cpp; +#ifdef DEBUG + int error; +#endif +#ifdef XFS_BMBT_TRACE + static char fname[] = "xfs_bmbt_killroot"; +#endif + int i; + xfs_bmbt_key_t *kp; + xfs_inode_t *ip; + xfs_ifork_t *ifp; + int level; + xfs_bmbt_ptr_t *pp; + + XFS_BMBT_TRACE_CURSOR(cur, ENTRY); + level = cur->bc_nlevels - 1; + ASSERT(level >= 1); + /* + * Don't deal with the root block needs to be a leaf case. + * We're just going to turn the thing back into extents anyway. + */ + if (level == 1) { + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + return 0; + } + block = xfs_bmbt_get_block(cur, level, &cbp); + /* + * Give up if the root has multiple children. + */ + if (INT_GET(block->bb_numrecs, ARCH_CONVERT) != 1) { + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + return 0; + } + /* + * Only do this if the next level will fit. + * Then the data must be copied up to the inode, + * instead of freeing the root you free the next level. + */ + cbp = cur->bc_bufs[level - 1]; + cblock = XFS_BUF_TO_BMBT_BLOCK(cbp); + if (INT_GET(cblock->bb_numrecs, ARCH_CONVERT) > XFS_BMAP_BLOCK_DMAXRECS(level, cur)) { + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + return 0; + } + ASSERT(INT_GET(cblock->bb_leftsib, ARCH_CONVERT) == NULLDFSBNO); + ASSERT(INT_GET(cblock->bb_rightsib, ARCH_CONVERT) == NULLDFSBNO); + ip = cur->bc_private.b.ip; + ifp = XFS_IFORK_PTR(ip, cur->bc_private.b.whichfork); + ASSERT(XFS_BMAP_BLOCK_IMAXRECS(level, cur) == + XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes)); + i = (int)(INT_GET(cblock->bb_numrecs, ARCH_CONVERT) - XFS_BMAP_BLOCK_IMAXRECS(level, cur)); + if (i) { + xfs_iroot_realloc(ip, i, cur->bc_private.b.whichfork); + block = ifp->if_broot; + } + INT_MOD(block->bb_numrecs, ARCH_CONVERT, i); + ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) == INT_GET(cblock->bb_numrecs, ARCH_CONVERT)); + kp = XFS_BMAP_KEY_IADDR(block, 1, cur); + ckp = XFS_BMAP_KEY_IADDR(cblock, 1, cur); + bcopy(ckp, kp, INT_GET(block->bb_numrecs, ARCH_CONVERT) * sizeof(*kp)); + pp = XFS_BMAP_PTR_IADDR(block, 1, cur); + cpp = XFS_BMAP_PTR_IADDR(cblock, 1, cur); +#ifdef DEBUG + for (i = 0; i < INT_GET(cblock->bb_numrecs, ARCH_CONVERT); i++) { + if (error = xfs_btree_check_lptr(cur, INT_GET(cpp[i], ARCH_CONVERT), level - 1)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } + } +#endif + bcopy(cpp, pp, INT_GET(block->bb_numrecs, ARCH_CONVERT) * sizeof(*pp)); + xfs_bmap_add_free(XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(cbp)), 1, + cur->bc_private.b.flist, cur->bc_mp); + if (!async) + xfs_trans_set_sync(cur->bc_tp); + ip->i_d.di_nblocks--; + if (XFS_IS_QUOTA_ON(cur->bc_mp) && + ip->i_ino != cur->bc_mp->m_sb.sb_uquotino && + ip->i_ino != cur->bc_mp->m_sb.sb_pquotino) + xfs_trans_mod_dquot_byino(cur->bc_tp, ip, XFS_TRANS_DQ_BCOUNT, + -1L); + xfs_trans_binval(cur->bc_tp, cbp); + cur->bc_bufs[level - 1] = NULL; + INT_MOD(block->bb_level, ARCH_CONVERT, -1); + xfs_trans_log_inode(cur->bc_tp, ip, + XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork)); + cur->bc_nlevels--; + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + return 0; +} + +/* + * Log key values from the btree block. + */ +STATIC void +xfs_bmbt_log_keys( + xfs_btree_cur_t *cur, + xfs_buf_t *bp, + int kfirst, + int klast) +{ +#ifdef XFS_BMBT_TRACE + static char fname[] = "xfs_bmbt_log_keys"; +#endif + xfs_trans_t *tp; + + XFS_BMBT_TRACE_CURSOR(cur, ENTRY); + XFS_BMBT_TRACE_ARGBII(cur, bp, kfirst, klast); + tp = cur->bc_tp; + if (bp) { + xfs_bmbt_block_t *block; + int first; + xfs_bmbt_key_t *kp; + int last; + + block = XFS_BUF_TO_BMBT_BLOCK(bp); + kp = XFS_BMAP_KEY_DADDR(block, 1, cur); + first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block); + last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block); + xfs_trans_log_buf(tp, bp, first, last); + } else { + xfs_inode_t *ip; + + ip = cur->bc_private.b.ip; + xfs_trans_log_inode(tp, ip, + XFS_ILOG_FBROOT(cur->bc_private.b.whichfork)); + } + XFS_BMBT_TRACE_CURSOR(cur, EXIT); +} + +/* + * Log pointer values from the btree block. + */ +STATIC void +xfs_bmbt_log_ptrs( + xfs_btree_cur_t *cur, + xfs_buf_t *bp, + int pfirst, + int plast) +{ +#ifdef XFS_BMBT_TRACE + static char fname[] = "xfs_bmbt_log_ptrs"; +#endif + xfs_trans_t *tp; + + XFS_BMBT_TRACE_CURSOR(cur, ENTRY); + XFS_BMBT_TRACE_ARGBII(cur, bp, pfirst, plast); + tp = cur->bc_tp; + if (bp) { + xfs_bmbt_block_t *block; + int first; + int last; + xfs_bmbt_ptr_t *pp; + + block = XFS_BUF_TO_BMBT_BLOCK(bp); + pp = XFS_BMAP_PTR_DADDR(block, 1, cur); + first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block); + last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block); + xfs_trans_log_buf(tp, bp, first, last); + } else { + xfs_inode_t *ip; + + ip = cur->bc_private.b.ip; + xfs_trans_log_inode(tp, ip, + XFS_ILOG_FBROOT(cur->bc_private.b.whichfork)); + } + XFS_BMBT_TRACE_CURSOR(cur, EXIT); +} + +/* + * Lookup the record. The cursor is made to point to it, based on dir. + */ +STATIC int /* error */ +xfs_bmbt_lookup( + xfs_btree_cur_t *cur, + xfs_lookup_t dir, + int *stat) /* success/failure */ +{ + xfs_bmbt_block_t *block; + xfs_buf_t *bp; + xfs_daddr_t d; + xfs_sfiloff_t diff; + int error; /* error return value */ +#ifdef XFS_BMBT_TRACE + static char fname[] = "xfs_bmbt_lookup"; +#endif + xfs_fsblock_t fsbno; + int high; + int i; + int keyno; + xfs_bmbt_key_t *kkbase; + xfs_bmbt_key_t *kkp; + xfs_bmbt_rec_t *krbase; + xfs_bmbt_rec_t *krp; + int level; + int low; + xfs_mount_t *mp; + xfs_bmbt_ptr_t *pp; + xfs_bmbt_irec_t *rp; + xfs_fileoff_t startoff; + xfs_trans_t *tp; + + XFS_STATS_INC(xs_bmbt_lookup); + XFS_BMBT_TRACE_CURSOR(cur, ENTRY); + XFS_BMBT_TRACE_ARGI(cur, (int)dir); + tp = cur->bc_tp; + mp = cur->bc_mp; + rp = &cur->bc_rec.b; + for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) { + if (level < cur->bc_nlevels - 1) { + d = XFS_FSB_TO_DADDR(mp, fsbno); + bp = cur->bc_bufs[level]; + if (bp && XFS_BUF_ADDR(bp) != d) + bp = (xfs_buf_t *)0; + if (!bp) { + if (error = xfs_btree_read_bufl(mp, tp, fsbno, + 0, &bp, XFS_BMAP_BTREE_REF)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } + xfs_btree_setbuf(cur, level, bp); + block = XFS_BUF_TO_BMBT_BLOCK(bp); + if (error = xfs_btree_check_lblock(cur, block, + level, bp)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } + } else + block = XFS_BUF_TO_BMBT_BLOCK(bp); + } else + block = xfs_bmbt_get_block(cur, level, &bp); + if (diff == 0) + keyno = 1; + else { + if (level > 0) + kkbase = XFS_BMAP_KEY_IADDR(block, 1, cur); + else + krbase = XFS_BMAP_REC_IADDR(block, 1, cur); + low = 1; + if (!(high = INT_GET(block->bb_numrecs, ARCH_CONVERT))) { + ASSERT(level == 0); + cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE; + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = 0; + return 0; + } + while (low <= high) { + XFS_STATS_INC(xs_bmbt_compare); + keyno = (low + high) >> 1; + if (level > 0) { + kkp = kkbase + keyno - 1; + startoff = INT_GET(kkp->br_startoff, ARCH_CONVERT); + } else { + krp = krbase + keyno - 1; + startoff = xfs_bmbt_get_startoff(krp); + } + diff = (xfs_sfiloff_t) + (startoff - rp->br_startoff); + if (diff < 0) + low = keyno + 1; + else if (diff > 0) + high = keyno - 1; + else + break; + } + } + if (level > 0) { + if (diff > 0 && --keyno < 1) + keyno = 1; + pp = XFS_BMAP_PTR_IADDR(block, keyno, cur); +#ifdef DEBUG + if (error = xfs_btree_check_lptr(cur, INT_GET(*pp, ARCH_CONVERT), level)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } +#endif + fsbno = INT_GET(*pp, ARCH_CONVERT); + cur->bc_ptrs[level] = keyno; + } + } + if (dir != XFS_LOOKUP_LE && diff < 0) { + keyno++; + /* + * If ge search and we went off the end of the block, but it's + * not the last block, we're in the wrong block. + */ + if (dir == XFS_LOOKUP_GE && keyno > INT_GET(block->bb_numrecs, ARCH_CONVERT) && + INT_GET(block->bb_rightsib, ARCH_CONVERT) != NULLDFSBNO) { + cur->bc_ptrs[0] = keyno; + if (error = xfs_bmbt_increment(cur, 0, &i)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } + XFS_WANT_CORRUPTED_RETURN(i == 1); + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = 1; + return 0; + } + } + else if (dir == XFS_LOOKUP_LE && diff > 0) + keyno--; + cur->bc_ptrs[0] = keyno; + if (keyno == 0 || keyno > INT_GET(block->bb_numrecs, ARCH_CONVERT)) { + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = 0; + } else { + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0)); + } + return 0; +} + +/* + * Move 1 record left from cur/level if possible. + * Update cur to reflect the new path. + */ +STATIC int /* error */ +xfs_bmbt_lshift( + xfs_btree_cur_t *cur, + int level, + int *stat) /* success/failure */ +{ + int error; /* error return value */ +#ifdef XFS_BMBT_TRACE + static char fname[] = "xfs_bmbt_lshift"; +#endif +#ifdef DEBUG + int i; /* loop counter */ +#endif + xfs_bmbt_key_t key; /* bmap btree key */ + xfs_buf_t *lbp; /* left buffer pointer */ + xfs_bmbt_block_t *left; /* left btree block */ + xfs_bmbt_key_t *lkp; /* left btree key */ + xfs_bmbt_ptr_t *lpp; /* left address pointer */ + int lrecs; /* left record count */ + xfs_bmbt_rec_t *lrp; /* left record pointer */ + xfs_mount_t *mp; /* file system mount point */ + xfs_buf_t *rbp; /* right buffer pointer */ + xfs_bmbt_block_t *right; /* right btree block */ + xfs_bmbt_key_t *rkp; /* right btree key */ + xfs_bmbt_ptr_t *rpp; /* right address pointer */ + xfs_bmbt_rec_t *rrp; /* right record pointer */ + + XFS_BMBT_TRACE_CURSOR(cur, ENTRY); + XFS_BMBT_TRACE_ARGI(cur, level); + if (level == cur->bc_nlevels - 1) { + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = 0; + return 0; + } + rbp = cur->bc_bufs[level]; + right = XFS_BUF_TO_BMBT_BLOCK(rbp); +#ifdef DEBUG + if (error = xfs_btree_check_lblock(cur, right, level, rbp)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } +#endif + if (INT_GET(right->bb_leftsib, ARCH_CONVERT) == NULLDFSBNO) { + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = 0; + return 0; + } + if (cur->bc_ptrs[level] <= 1) { + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = 0; + return 0; + } + mp = cur->bc_mp; + if (error = xfs_btree_read_bufl(mp, cur->bc_tp, INT_GET(right->bb_leftsib, ARCH_CONVERT), 0, + &lbp, XFS_BMAP_BTREE_REF)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } + left = XFS_BUF_TO_BMBT_BLOCK(lbp); + if (error = xfs_btree_check_lblock(cur, left, level, lbp)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } + if (INT_GET(left->bb_numrecs, ARCH_CONVERT) == XFS_BMAP_BLOCK_IMAXRECS(level, cur)) { + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = 0; + return 0; + } + lrecs = INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1; + if (level > 0) { + lkp = XFS_BMAP_KEY_IADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur); + rkp = XFS_BMAP_KEY_IADDR(right, 1, cur); + *lkp = *rkp; + xfs_bmbt_log_keys(cur, lbp, lrecs, lrecs); + lpp = XFS_BMAP_PTR_IADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur); + rpp = XFS_BMAP_PTR_IADDR(right, 1, cur); +#ifdef DEBUG + if (error = xfs_btree_check_lptr(cur, INT_GET(*rpp, ARCH_CONVERT), level)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } +#endif + *lpp = *rpp; /* INT_: direct copy */ + xfs_bmbt_log_ptrs(cur, lbp, lrecs, lrecs); + } else { + lrp = XFS_BMAP_REC_IADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur); + rrp = XFS_BMAP_REC_IADDR(right, 1, cur); + *lrp = *rrp; + xfs_bmbt_log_recs(cur, lbp, lrecs, lrecs); + } + INT_MOD(left->bb_numrecs, ARCH_CONVERT, +1); + xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS); +#ifdef DEBUG + if (level > 0) + xfs_btree_check_key(XFS_BTNUM_BMAP, lkp - 1, lkp); + else + xfs_btree_check_rec(XFS_BTNUM_BMAP, lrp - 1, lrp); +#endif + INT_MOD(right->bb_numrecs, ARCH_CONVERT, -1); + xfs_bmbt_log_block(cur, rbp, XFS_BB_NUMRECS); + if (level > 0) { +#ifdef DEBUG + for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) { + if (error = xfs_btree_check_lptr(cur, INT_GET(rpp[i + 1], ARCH_CONVERT), + level)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } + } +#endif + ovbcopy(rkp + 1, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp)); + ovbcopy(rpp + 1, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp)); + xfs_bmbt_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT)); + xfs_bmbt_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT)); + } else { + ovbcopy(rrp + 1, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp)); + xfs_bmbt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT)); + INT_SET(key.br_startoff, ARCH_CONVERT, xfs_bmbt_get_startoff(rrp)); + rkp = &key; + } + if (error = xfs_bmbt_updkey(cur, rkp, level + 1)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } + cur->bc_ptrs[level]--; + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = 1; + return 0; +} + +/* + * Move 1 record right from cur/level if possible. + * Update cur to reflect the new path. + */ +STATIC int /* error */ +xfs_bmbt_rshift( + xfs_btree_cur_t *cur, + int level, + int *stat) /* success/failure */ +{ + int error; /* error return value */ +#ifdef XFS_BMBT_TRACE + static char fname[] = "xfs_bmbt_rshift"; +#endif + int i; /* loop counter */ + xfs_bmbt_key_t key; /* bmap btree key */ + xfs_buf_t *lbp; /* left buffer pointer */ + xfs_bmbt_block_t *left; /* left btree block */ + xfs_bmbt_key_t *lkp; /* left btree key */ + xfs_bmbt_ptr_t *lpp; /* left address pointer */ + xfs_bmbt_rec_t *lrp; /* left record pointer */ + xfs_mount_t *mp; /* file system mount point */ + xfs_buf_t *rbp; /* right buffer pointer */ + xfs_bmbt_block_t *right; /* right btree block */ + xfs_bmbt_key_t *rkp; /* right btree key */ + xfs_bmbt_ptr_t *rpp; /* right address pointer */ + xfs_bmbt_rec_t *rrp; /* right record pointer */ + struct xfs_btree_cur *tcur; /* temporary btree cursor */ + + XFS_BMBT_TRACE_CURSOR(cur, ENTRY); + XFS_BMBT_TRACE_ARGI(cur, level); + if (level == cur->bc_nlevels - 1) { + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = 0; + return 0; + } + lbp = cur->bc_bufs[level]; + left = XFS_BUF_TO_BMBT_BLOCK(lbp); +#ifdef DEBUG + if (error = xfs_btree_check_lblock(cur, left, level, lbp)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } +#endif + if (INT_GET(left->bb_rightsib, ARCH_CONVERT) == NULLDFSBNO) { + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = 0; + return 0; + } + if (cur->bc_ptrs[level] >= INT_GET(left->bb_numrecs, ARCH_CONVERT)) { + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = 0; + return 0; + } + mp = cur->bc_mp; + if (error = xfs_btree_read_bufl(mp, cur->bc_tp, INT_GET(left->bb_rightsib, ARCH_CONVERT), 0, + &rbp, XFS_BMAP_BTREE_REF)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } + right = XFS_BUF_TO_BMBT_BLOCK(rbp); + if (error = xfs_btree_check_lblock(cur, right, level, rbp)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } + if (INT_GET(right->bb_numrecs, ARCH_CONVERT) == XFS_BMAP_BLOCK_IMAXRECS(level, cur)) { + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = 0; + return 0; + } + if (level > 0) { + lkp = XFS_BMAP_KEY_IADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur); + lpp = XFS_BMAP_PTR_IADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur); + rkp = XFS_BMAP_KEY_IADDR(right, 1, cur); + rpp = XFS_BMAP_PTR_IADDR(right, 1, cur); +#ifdef DEBUG + for (i = INT_GET(right->bb_numrecs, ARCH_CONVERT) - 1; i >= 0; i--) { + if (error = xfs_btree_check_lptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } + } +#endif + ovbcopy(rkp, rkp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp)); + ovbcopy(rpp, rpp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp)); +#ifdef DEBUG + if (error = xfs_btree_check_lptr(cur, INT_GET(*lpp, ARCH_CONVERT), level)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } +#endif + *rkp = *lkp; + *rpp = *lpp; /* INT_: direct copy */ + xfs_bmbt_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1); + xfs_bmbt_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1); + } else { + lrp = XFS_BMAP_REC_IADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur); + rrp = XFS_BMAP_REC_IADDR(right, 1, cur); + ovbcopy(rrp, rrp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp)); + *rrp = *lrp; + xfs_bmbt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1); + INT_SET(key.br_startoff, ARCH_CONVERT, xfs_bmbt_get_startoff(rrp)); + rkp = &key; + } + INT_MOD(left->bb_numrecs, ARCH_CONVERT, -1); + xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS); + INT_MOD(right->bb_numrecs, ARCH_CONVERT, +1); +#ifdef DEBUG + if (level > 0) + xfs_btree_check_key(XFS_BTNUM_BMAP, rkp, rkp + 1); + else + xfs_btree_check_rec(XFS_BTNUM_BMAP, rrp, rrp + 1); +#endif + xfs_bmbt_log_block(cur, rbp, XFS_BB_NUMRECS); + if (error = xfs_btree_dup_cursor(cur, &tcur)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } + i = xfs_btree_lastrec(tcur, level); + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if (error = xfs_bmbt_increment(tcur, level, &i)) { + XFS_BMBT_TRACE_CURSOR(tcur, ERROR); + goto error1; + } + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if (error = xfs_bmbt_updkey(tcur, rkp, level + 1)) { + XFS_BMBT_TRACE_CURSOR(tcur, ERROR); + goto error1; + } + xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = 1; + return 0; +error0: + XFS_BMBT_TRACE_CURSOR(cur, ERROR); +error1: + xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); + return error; +} + +/* + * Determine the extent state. + */ +/* ARGSUSED */ +STATIC xfs_exntst_t +xfs_extent_state( + xfs_filblks_t blks, + int extent_flag) +{ + if (extent_flag) { + ASSERT(blks != 0); /* saved for DMIG */ + return XFS_EXT_UNWRITTEN; + } + return XFS_EXT_NORM; +} + + +/* + * Split cur/level block in half. + * Return new block number and its first record (to be inserted into parent). + */ +STATIC int /* error */ +xfs_bmbt_split( + xfs_btree_cur_t *cur, + int level, + xfs_fsblock_t *bnop, + xfs_bmbt_key_t *keyp, + xfs_btree_cur_t **curp, + int *stat) /* success/failure */ +{ + xfs_alloc_arg_t args; /* block allocation args */ + int error; /* error return value */ +#ifdef XFS_BMBT_TRACE + static char fname[] = "xfs_bmbt_split"; +#endif + int i; /* loop counter */ + xfs_fsblock_t lbno; /* left sibling block number */ + xfs_buf_t *lbp; /* left buffer pointer */ + xfs_bmbt_block_t *left; /* left btree block */ + xfs_bmbt_key_t *lkp; /* left btree key */ + xfs_bmbt_ptr_t *lpp; /* left address pointer */ + xfs_bmbt_rec_t *lrp; /* left record pointer */ + xfs_buf_t *rbp; /* right buffer pointer */ + xfs_bmbt_block_t *right; /* right btree block */ + xfs_bmbt_key_t *rkp; /* right btree key */ + xfs_bmbt_ptr_t *rpp; /* right address pointer */ + xfs_bmbt_block_t *rrblock; /* right-right btree block */ + xfs_buf_t *rrbp; /* right-right buffer pointer */ + xfs_bmbt_rec_t *rrp; /* right record pointer */ + + XFS_BMBT_TRACE_CURSOR(cur, ENTRY); + XFS_BMBT_TRACE_ARGIFK(cur, level, *bnop, keyp); + args.tp = cur->bc_tp; + args.mp = cur->bc_mp; + lbp = cur->bc_bufs[level]; + lbno = XFS_DADDR_TO_FSB(args.mp, XFS_BUF_ADDR(lbp)); + left = XFS_BUF_TO_BMBT_BLOCK(lbp); + args.fsbno = cur->bc_private.b.firstblock; + if (args.fsbno == NULLFSBLOCK) { + args.fsbno = lbno; + args.type = XFS_ALLOCTYPE_START_BNO; + } else if (cur->bc_private.b.flist->xbf_low) + args.type = XFS_ALLOCTYPE_FIRST_AG; + else + args.type = XFS_ALLOCTYPE_NEAR_BNO; + args.mod = args.minleft = args.alignment = args.total = args.isfl = + args.userdata = args.minalignslop = 0; + args.minlen = args.maxlen = args.prod = 1; + args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL; + if (!args.wasdel && xfs_trans_get_block_res(args.tp) == 0) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return XFS_ERROR(ENOSPC); + } + if (error = xfs_alloc_vextent(&args)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } + if (args.fsbno == NULLFSBLOCK) { + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = 0; + return 0; + } + ASSERT(args.len == 1); + cur->bc_private.b.firstblock = args.fsbno; + cur->bc_private.b.allocated++; + cur->bc_private.b.ip->i_d.di_nblocks++; + xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE); + if (XFS_IS_QUOTA_ON(args.mp) && + cur->bc_private.b.ip->i_ino != args.mp->m_sb.sb_uquotino && + cur->bc_private.b.ip->i_ino != args.mp->m_sb.sb_pquotino) + xfs_trans_mod_dquot_byino(args.tp, cur->bc_private.b.ip, + XFS_TRANS_DQ_BCOUNT, 1L); + rbp = xfs_btree_get_bufl(args.mp, args.tp, args.fsbno, 0); + right = XFS_BUF_TO_BMBT_BLOCK(rbp); +#ifdef DEBUG + if (error = xfs_btree_check_lblock(cur, left, level, rbp)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } +#endif + INT_SET(right->bb_magic, ARCH_CONVERT, XFS_BMAP_MAGIC); + right->bb_level = left->bb_level; /* INT_: direct copy */ + INT_SET(right->bb_numrecs, ARCH_CONVERT, (__uint16_t)(INT_GET(left->bb_numrecs, ARCH_CONVERT) / 2)); + if ((INT_GET(left->bb_numrecs, ARCH_CONVERT) & 1) && + cur->bc_ptrs[level] <= INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1) + INT_MOD(right->bb_numrecs, ARCH_CONVERT, +1); + i = INT_GET(left->bb_numrecs, ARCH_CONVERT) - INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1; + if (level > 0) { + lkp = XFS_BMAP_KEY_IADDR(left, i, cur); + lpp = XFS_BMAP_PTR_IADDR(left, i, cur); + rkp = XFS_BMAP_KEY_IADDR(right, 1, cur); + rpp = XFS_BMAP_PTR_IADDR(right, 1, cur); +#ifdef DEBUG + for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) { + if (error = xfs_btree_check_lptr(cur, INT_GET(lpp[i], ARCH_CONVERT), level)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } + } +#endif + bcopy(lkp, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp)); + bcopy(lpp, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp)); + xfs_bmbt_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT)); + xfs_bmbt_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT)); + keyp->br_startoff = INT_GET(rkp->br_startoff, ARCH_CONVERT); + } else { + lrp = XFS_BMAP_REC_IADDR(left, i, cur); + rrp = XFS_BMAP_REC_IADDR(right, 1, cur); + bcopy(lrp, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp)); + xfs_bmbt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT)); + keyp->br_startoff = xfs_bmbt_get_startoff(rrp); + } + INT_MOD(left->bb_numrecs, ARCH_CONVERT, -(INT_GET(right->bb_numrecs, ARCH_CONVERT))); + right->bb_rightsib = left->bb_rightsib; /* INT_: direct copy */ + INT_SET(left->bb_rightsib, ARCH_CONVERT, args.fsbno); + INT_SET(right->bb_leftsib, ARCH_CONVERT, lbno); + xfs_bmbt_log_block(cur, rbp, XFS_BB_ALL_BITS); + xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); + if (INT_GET(right->bb_rightsib, ARCH_CONVERT) != NULLDFSBNO) { + if (error = xfs_btree_read_bufl(args.mp, args.tp, + INT_GET(right->bb_rightsib, ARCH_CONVERT), 0, &rrbp, + XFS_BMAP_BTREE_REF)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } + rrblock = XFS_BUF_TO_BMBT_BLOCK(rrbp); + if (error = xfs_btree_check_lblock(cur, rrblock, level, rrbp)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } + INT_SET(rrblock->bb_leftsib, ARCH_CONVERT, args.fsbno); + xfs_bmbt_log_block(cur, rrbp, XFS_BB_LEFTSIB); + } + if (cur->bc_ptrs[level] > INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1) { + xfs_btree_setbuf(cur, level, rbp); + cur->bc_ptrs[level] -= INT_GET(left->bb_numrecs, ARCH_CONVERT); + } + if (level + 1 < cur->bc_nlevels) { + if (error = xfs_btree_dup_cursor(cur, curp)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } + (*curp)->bc_ptrs[level + 1]++; + } + *bnop = args.fsbno; + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = 1; + return 0; +} + +/* + * Update keys for the record. + */ +STATIC int +xfs_bmbt_updkey( + xfs_btree_cur_t *cur, + xfs_bmbt_key_t *keyp, /* on-disk format */ + int level) +{ + xfs_bmbt_block_t *block; + xfs_buf_t *bp; +#ifdef DEBUG + int error; +#endif +#ifdef XFS_BMBT_TRACE + static char fname[] = "xfs_bmbt_updkey"; +#endif + xfs_bmbt_key_t *kp; + int ptr; + + ASSERT(level >= 1); + XFS_BMBT_TRACE_CURSOR(cur, ENTRY); + XFS_BMBT_TRACE_ARGIK(cur, level, keyp); + for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) { + block = xfs_bmbt_get_block(cur, level, &bp); +#ifdef DEBUG + if (error = xfs_btree_check_lblock(cur, block, level, bp)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } +#endif + ptr = cur->bc_ptrs[level]; + kp = XFS_BMAP_KEY_IADDR(block, ptr, cur); + *kp = *keyp; + xfs_bmbt_log_keys(cur, bp, ptr, ptr); + } + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + return 0; +} + +/* + * Convert on-disk form of btree root to in-memory form. + */ +void +xfs_bmdr_to_bmbt( + xfs_bmdr_block_t *dblock, + int dblocklen, + xfs_bmbt_block_t *rblock, + int rblocklen) +{ + int dmxr; + xfs_bmbt_key_t *fkp; + xfs_bmbt_ptr_t *fpp; + xfs_bmbt_key_t *tkp; + xfs_bmbt_ptr_t *tpp; + + INT_SET(rblock->bb_magic, ARCH_CONVERT, XFS_BMAP_MAGIC); + rblock->bb_level = dblock->bb_level; /* both in on-disk format */ + ASSERT(INT_GET(rblock->bb_level, ARCH_CONVERT) > 0); + rblock->bb_numrecs = dblock->bb_numrecs;/* both in on-disk format */ + INT_SET(rblock->bb_leftsib, ARCH_CONVERT, NULLDFSBNO); + INT_SET(rblock->bb_rightsib, ARCH_CONVERT, NULLDFSBNO); + dmxr = (int)XFS_BTREE_BLOCK_MAXRECS(dblocklen, xfs_bmdr, 0); + fkp = XFS_BTREE_KEY_ADDR(dblocklen, xfs_bmdr, dblock, 1, dmxr); + tkp = XFS_BMAP_BROOT_KEY_ADDR(rblock, 1, rblocklen); + fpp = XFS_BTREE_PTR_ADDR(dblocklen, xfs_bmdr, dblock, 1, dmxr); + tpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen); + dmxr = INT_GET(dblock->bb_numrecs, ARCH_CONVERT); + bcopy(fkp, tkp, sizeof(*fkp) * dmxr); + bcopy(fpp, tpp, sizeof(*fpp) * dmxr); /* INT_: direct copy */ +} + +/* + * Decrement cursor by one record at the level. + * For nonzero levels the leaf-ward information is untouched. + */ +int /* error */ +xfs_bmbt_decrement( + xfs_btree_cur_t *cur, + int level, + int *stat) /* success/failure */ +{ + xfs_bmbt_block_t *block; + xfs_buf_t *bp; + int error; /* error return value */ +#ifdef XFS_BMBT_TRACE + static char fname[] = "xfs_bmbt_decrement"; +#endif + xfs_fsblock_t fsbno; + int lev; + xfs_mount_t *mp; + xfs_trans_t *tp; + + XFS_BMBT_TRACE_CURSOR(cur, ENTRY); + XFS_BMBT_TRACE_ARGI(cur, level); + ASSERT(level < cur->bc_nlevels); + if (level < cur->bc_nlevels - 1) + xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA); + if (--cur->bc_ptrs[level] > 0) { + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = 1; + return 0; + } + block = xfs_bmbt_get_block(cur, level, &bp); +#ifdef DEBUG + if (error = xfs_btree_check_lblock(cur, block, level, bp)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } +#endif + if (INT_GET(block->bb_leftsib, ARCH_CONVERT) == NULLDFSBNO) { + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = 0; + return 0; + } + for (lev = level + 1; lev < cur->bc_nlevels; lev++) { + if (--cur->bc_ptrs[lev] > 0) + break; + if (lev < cur->bc_nlevels - 1) + xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA); + } + if (lev == cur->bc_nlevels) { + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = 0; + return 0; + } + tp = cur->bc_tp; + mp = cur->bc_mp; + for (block = xfs_bmbt_get_block(cur, lev, &bp); lev > level; ) { + fsbno = INT_GET(*XFS_BMAP_PTR_IADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT); + if (error = xfs_btree_read_bufl(mp, tp, fsbno, 0, &bp, + XFS_BMAP_BTREE_REF)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } + lev--; + xfs_btree_setbuf(cur, lev, bp); + block = XFS_BUF_TO_BMBT_BLOCK(bp); + if (error = xfs_btree_check_lblock(cur, block, lev, bp)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } + cur->bc_ptrs[lev] = INT_GET(block->bb_numrecs, ARCH_CONVERT); + } + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = 1; + return 0; +} + +/* + * Delete the record pointed to by cur. + */ +int /* error */ +xfs_bmbt_delete( + xfs_btree_cur_t *cur, + int async, /* deletion can be async */ + int *stat) /* success/failure */ +{ + int error; /* error return value */ +#ifdef XFS_BMBT_TRACE + static char fname[] = "xfs_bmbt_delete"; +#endif + int i; + int level; + + XFS_BMBT_TRACE_CURSOR(cur, ENTRY); + for (level = 0, i = 2; i == 2; level++) { + if (error = xfs_bmbt_delrec(cur, level, async, &i)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } + } + if (i == 0) { + for (level = 1; level < cur->bc_nlevels; level++) { + if (cur->bc_ptrs[level] == 0) { + if (error = xfs_bmbt_decrement(cur, level, + &i)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } + break; + } + } + } + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = i; + return 0; +} + +/* + * Convert a compressed bmap extent record to an uncompressed form. + * This code must be in sync with the routines xfs_bmbt_get_startoff, + * xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state. + */ +void +xfs_bmbt_get_all( + xfs_bmbt_rec_t *r, + xfs_bmbt_irec_t *s) +{ + int ext_flag; + xfs_exntst_t st; + +#if BMBT_USE_64 + ext_flag = (int)((INT_GET(r->l0, ARCH_CONVERT)) >> (64 - BMBT_EXNTFLAG_BITLEN)); +#if XFS_BIG_FILES + s->br_startoff = ((xfs_fileoff_t)INT_GET(r->l0, ARCH_CONVERT) & + XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; +#else /* !XFS_BIG_FILES */ + { + xfs_dfiloff_t o; + + o = ((xfs_dfiloff_t)INT_GET(r->l0, ARCH_CONVERT) & + XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; + ASSERT((o >> 32) == 0); + s->br_startoff = (xfs_fileoff_t)o; + } +#endif /* XFS_BIG_FILES */ +#if XFS_BIG_FILESYSTEMS + s->br_startblock = (((xfs_fsblock_t)INT_GET(r->l0, ARCH_CONVERT) & XFS_MASK64LO(9)) << 43) | + (((xfs_fsblock_t)INT_GET(r->l1, ARCH_CONVERT)) >> 21); +#else +#ifdef DEBUG + { + xfs_dfsbno_t b; + + b = (((xfs_dfsbno_t)INT_GET(r->l0, ARCH_CONVERT) & XFS_MASK64LO(9)) << 43) | + (((xfs_dfsbno_t)INT_GET(r->l1, ARCH_CONVERT)) >> 21); + ASSERT((b >> 32) == 0 || ISNULLDSTARTBLOCK(b)); + s->br_startblock = (xfs_fsblock_t)b; + } +#else /* !DEBUG */ + s->br_startblock = (xfs_fsblock_t)(((xfs_dfsbno_t)INT_GET(r->l1, ARCH_CONVERT)) >> 21); +#endif /* DEBUG */ +#endif /* XFS_BIG_FILESYSTEMS */ + s->br_blockcount = (xfs_filblks_t)(INT_GET(r->l1, ARCH_CONVERT) & XFS_MASK64LO(21)); +#else /* !BMBT_USE_64 */ + ext_flag = (INT_GET(r->l0, ARCH_CONVERT) >> (32 - BMBT_EXNTFLAG_BITLEN)); +#if XFS_BIG_FILES + s->br_startoff = (((xfs_fileoff_t)INT_GET(r->l0, ARCH_CONVERT) & + XFS_MASK32LO(32 - BMBT_EXNTFLAG_BITLEN)) << 23) | + (((xfs_fileoff_t)INT_GET(r->l1, ARCH_CONVERT)) >> 9); +#else /* !XFS_BIG_FILES */ +#ifdef DEBUG + { + xfs_dfiloff_t o; + + o = (((xfs_dfiloff_t)INT_GET(r->l0, ARCH_CONVERT) & + XFS_MASK32LO(32 - BMBT_EXNTFLAG_BITLEN)) << 23) | + (((xfs_dfiloff_t)INT_GET(r->l1, ARCH_CONVERT)) >> 9); + ASSERT((o >> 32) == 0); + s->br_startoff = (xfs_fileoff_t)o; + } +#else /* !DEBUG */ + s->br_startoff = (((xfs_fileoff_t)INT_GET(r->l0, ARCH_CONVERT) & + XFS_MASK32LO(32 - BMBT_EXNTFLAG_BITLEN)) << 23) | + (((xfs_fileoff_t)INT_GET(r->l1, ARCH_CONVERT)) >> 9); +#endif /* DEBUG */ +#endif /* XFS_BIG_FILES */ +#if XFS_BIG_FILESYSTEMS + s->br_startblock = + (((xfs_fsblock_t)(INT_GET(r->l1, ARCH_CONVERT) & XFS_MASK32LO(9))) << 43) | + (((xfs_fsblock_t)INT_GET(r->l2, ARCH_CONVERT)) << 11) | + (((xfs_fsblock_t)INT_GET(r->l3, ARCH_CONVERT)) >> 21); +#else +#ifdef DEBUG + { + xfs_dfsbno_t b; + + b = (((xfs_dfsbno_t)(INT_GET(r->l1, ARCH_CONVERT) & XFS_MASK32LO(9))) << 43) | + (((xfs_dfsbno_t)INT_GET(r->l2, ARCH_CONVERT)) << 11) | + (((xfs_dfsbno_t)INT_GET(r->l3, ARCH_CONVERT)) >> 21); + ASSERT((b >> 32) == 0 || ISNULLDSTARTBLOCK(b)); + s->br_startblock = (xfs_fsblock_t)b; + } +#else /* !DEBUG */ + s->br_startblock = (((xfs_fsblock_t)INT_GET(r->l2, ARCH_CONVERT)) << 11) | + (((xfs_fsblock_t)INT_GET(r->l3, ARCH_CONVERT)) >> 21); +#endif /* DEBUG */ +#endif /* XFS_BIG_FILESYSTEMS */ + s->br_blockcount = (xfs_filblks_t)(INT_GET(r->l3, ARCH_CONVERT) & XFS_MASK32LO(21)); +#endif /* BMBT_USE_64 */ + /* This is xfs_extent_state() in-line */ + if (ext_flag) { + ASSERT(s->br_blockcount != 0); /* saved for DMIG */ + st = XFS_EXT_UNWRITTEN; + } else + st = XFS_EXT_NORM; + s->br_state = st; +} + +/* + * Get the block pointer for the given level of the cursor. + * Fill in the buffer pointer, if applicable. + */ +xfs_bmbt_block_t * +xfs_bmbt_get_block( + xfs_btree_cur_t *cur, + int level, + xfs_buf_t **bpp) +{ + xfs_ifork_t *ifp; + xfs_bmbt_block_t *rval; + + if (level < cur->bc_nlevels - 1) { + *bpp = cur->bc_bufs[level]; + rval = XFS_BUF_TO_BMBT_BLOCK(*bpp); + } else { + *bpp = 0; + ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, + cur->bc_private.b.whichfork); + rval = ifp->if_broot; + } + return rval; +} + +/* + * Extract the blockcount field from a bmap extent record. + */ +xfs_filblks_t +xfs_bmbt_get_blockcount( + xfs_bmbt_rec_t *r) +{ +#if BMBT_USE_64 + return (xfs_filblks_t)(INT_GET(r->l1, ARCH_CONVERT) & XFS_MASK64LO(21)); +#else /* !BMBT_USE_64 */ + return (xfs_filblks_t)(INT_GET(r->l3, ARCH_CONVERT) & XFS_MASK32LO(21)); +#endif /* BMBT_USE_64 */ +} + +/* + * Extract the startblock field from a bmap extent record. + */ +xfs_fsblock_t +xfs_bmbt_get_startblock( + xfs_bmbt_rec_t *r) +{ +#if BMBT_USE_64 +#if XFS_BIG_FILESYSTEMS + return (((xfs_fsblock_t)INT_GET(r->l0, ARCH_CONVERT) & XFS_MASK64LO(9)) << 43) | + (((xfs_fsblock_t)INT_GET(r->l1, ARCH_CONVERT)) >> 21); +#else +#ifdef DEBUG + xfs_dfsbno_t b; + + b = (((xfs_dfsbno_t)INT_GET(r->l0, ARCH_CONVERT) & XFS_MASK64LO(9)) << 43) | + (((xfs_dfsbno_t)INT_GET(r->l1, ARCH_CONVERT)) >> 21); + ASSERT((b >> 32) == 0 || ISNULLDSTARTBLOCK(b)); + return (xfs_fsblock_t)b; +#else /* !DEBUG */ + return (xfs_fsblock_t)(((xfs_dfsbno_t)INT_GET(r->l1, ARCH_CONVERT)) >> 21); +#endif /* DEBUG */ +#endif /* XFS_BIG_FILESYSTEMS */ +#else /* !BMBT_USE_64 */ +#if XFS_BIG_FILESYSTEMS + return (((xfs_fsblock_t)(INT_GET(r->l1, ARCH_CONVERT) & XFS_MASK32LO(9))) << 43) | + (((xfs_fsblock_t)INT_GET(r->l2, ARCH_CONVERT)) << 11) | + (((xfs_fsblock_t)INT_GET(r->l3, ARCH_CONVERT)) >> 21); +#else +#ifdef DEBUG + xfs_dfsbno_t b; + + b = (((xfs_dfsbno_t)(INT_GET(r->l1, ARCH_CONVERT) & XFS_MASK32LO(9))) << 43) | + (((xfs_dfsbno_t)INT_GET(r->l2, ARCH_CONVERT)) << 11) | + (((xfs_dfsbno_t)INT_GET(r->l3, ARCH_CONVERT)) >> 21); + ASSERT((b >> 32) == 0 || ISNULLDSTARTBLOCK(b)); + return (xfs_fsblock_t)b; +#else /* !DEBUG */ + return (((xfs_fsblock_t)INT_GET(r->l2, ARCH_CONVERT)) << 11) | + (((xfs_fsblock_t)INT_GET(r->l3, ARCH_CONVERT)) >> 21); +#endif /* DEBUG */ +#endif /* XFS_BIG_FILESYSTEMS */ +#endif /* BMBT_USE_64 */ +} + +/* + * Extract the startoff field from a bmap extent record. + */ +xfs_fileoff_t +xfs_bmbt_get_startoff( + xfs_bmbt_rec_t *r) +{ +#if BMBT_USE_64 +#if XFS_BIG_FILES + return ((xfs_fileoff_t)INT_GET(r->l0, ARCH_CONVERT) & + XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; +#else /* !XFS_BIG_FILES */ + xfs_dfiloff_t o; + + o = ((xfs_dfiloff_t)INT_GET(r->l0, ARCH_CONVERT) & + XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; + ASSERT((o >> 32) == 0); + return (xfs_fileoff_t)o; +#endif /* XFS_BIG_FILES */ +#else /* !BMBT_USE_64 */ +#if XFS_BIG_FILES + return (((xfs_fileoff_t)INT_GET(r->l0, ARCH_CONVERT) & + XFS_MASK32LO(32 - BMBT_EXNTFLAG_BITLEN)) << 23) | + (((xfs_fileoff_t)INT_GET(r->l1, ARCH_CONVERT)) >> 9); +#else /* !XFS_BIG_FILES */ +#ifdef DEBUG + xfs_dfiloff_t o; + + o = (((xfs_dfiloff_t)INT_GET(r->l0, ARCH_CONVERT) & + XFS_MASK32LO(32 - BMBT_EXNTFLAG_BITLEN)) << 23) | + (((xfs_dfiloff_t)INT_GET(r->l1, ARCH_CONVERT)) >> 9); + ASSERT((o >> 32) == 0); + return (xfs_fileoff_t)o; +#else /* !DEBUG */ + return (((xfs_fileoff_t)INT_GET(r->l0, ARCH_CONVERT) & + XFS_MASK32LO(32 - BMBT_EXNTFLAG_BITLEN)) << 23) | + (((xfs_fileoff_t)INT_GET(r->l1, ARCH_CONVERT)) >> 9); +#endif /* DEBUG */ +#endif /* XFS_BIG_FILES */ +#endif /* BMBT_USE_64 */ +} + +xfs_exntst_t +xfs_bmbt_get_state( + xfs_bmbt_rec_t *r) +{ + int ext_flag; + +#if BMBT_USE_64 + ext_flag = (int)((INT_GET(r->l0, ARCH_CONVERT)) >> (64 - BMBT_EXNTFLAG_BITLEN)); +#else /* !BMBT_USE_64 */ + ext_flag = (INT_GET(r->l0, ARCH_CONVERT) >> (32 - BMBT_EXNTFLAG_BITLEN)); +#endif /* BMBT_USE_64 */ + return xfs_extent_state(xfs_bmbt_get_blockcount(r), + ext_flag); +} + + +/* + * Increment cursor by one record at the level. + * For nonzero levels the leaf-ward information is untouched. + */ +int /* error */ +xfs_bmbt_increment( + xfs_btree_cur_t *cur, + int level, + int *stat) /* success/failure */ +{ + xfs_bmbt_block_t *block; + xfs_buf_t *bp; + int error; /* error return value */ +#ifdef XFS_BMBT_TRACE + static char fname[] = "xfs_bmbt_increment"; +#endif + xfs_fsblock_t fsbno; + int lev; + xfs_mount_t *mp; + xfs_trans_t *tp; + + XFS_BMBT_TRACE_CURSOR(cur, ENTRY); + XFS_BMBT_TRACE_ARGI(cur, level); + ASSERT(level < cur->bc_nlevels); + if (level < cur->bc_nlevels - 1) + xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); + block = xfs_bmbt_get_block(cur, level, &bp); +#ifdef DEBUG + if (error = xfs_btree_check_lblock(cur, block, level, bp)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } +#endif + if (++cur->bc_ptrs[level] <= INT_GET(block->bb_numrecs, ARCH_CONVERT)) { + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = 1; + return 0; + } + if (INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLDFSBNO) { + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = 0; + return 0; + } + for (lev = level + 1; lev < cur->bc_nlevels; lev++) { + block = xfs_bmbt_get_block(cur, lev, &bp); +#ifdef DEBUG + if (error = xfs_btree_check_lblock(cur, block, lev, bp)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } +#endif + if (++cur->bc_ptrs[lev] <= INT_GET(block->bb_numrecs, ARCH_CONVERT)) + break; + if (lev < cur->bc_nlevels - 1) + xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA); + } + if (lev == cur->bc_nlevels) { + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = 0; + return 0; + } + tp = cur->bc_tp; + mp = cur->bc_mp; + for (block = xfs_bmbt_get_block(cur, lev, &bp); lev > level; ) { + fsbno = INT_GET(*XFS_BMAP_PTR_IADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT); + if (error = xfs_btree_read_bufl(mp, tp, fsbno, 0, &bp, + XFS_BMAP_BTREE_REF)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } + lev--; + xfs_btree_setbuf(cur, lev, bp); + block = XFS_BUF_TO_BMBT_BLOCK(bp); + if (error = xfs_btree_check_lblock(cur, block, lev, bp)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } + cur->bc_ptrs[lev] = 1; + } + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = 1; + return 0; +} + +/* + * Insert the current record at the point referenced by cur. + */ +int /* error */ +xfs_bmbt_insert( + xfs_btree_cur_t *cur, + int *stat) /* success/failure */ +{ + int error; /* error return value */ +#ifdef XFS_BMBT_TRACE + static char fname[] = "xfs_bmbt_insert"; +#endif + int i; + int level; + xfs_fsblock_t nbno; + xfs_btree_cur_t *ncur; + xfs_bmbt_rec_t nrec; + xfs_btree_cur_t *pcur; + + XFS_BMBT_TRACE_CURSOR(cur, ENTRY); + level = 0; + nbno = NULLFSBLOCK; + xfs_bmbt_set_all(&nrec, &cur->bc_rec.b); + ncur = (xfs_btree_cur_t *)0; + pcur = cur; + do { + if (error = xfs_bmbt_insrec(pcur, level++, &nbno, &nrec, &ncur, + &i)) { + if (pcur != cur) + xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if (pcur != cur && (ncur || nbno == NULLFSBLOCK)) { + cur->bc_nlevels = pcur->bc_nlevels; + cur->bc_private.b.allocated += + pcur->bc_private.b.allocated; + pcur->bc_private.b.allocated = 0; + ASSERT((cur->bc_private.b.firstblock != NULLFSBLOCK) || + (cur->bc_private.b.ip->i_d.di_flags & + XFS_DIFLAG_REALTIME)); + cur->bc_private.b.firstblock = + pcur->bc_private.b.firstblock; + ASSERT(cur->bc_private.b.flist == + pcur->bc_private.b.flist); + xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR); + } + if (ncur) { + pcur = ncur; + ncur = (xfs_btree_cur_t *)0; + } + } while (nbno != NULLFSBLOCK); + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = i; + return 0; +error0: + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; +} + +/* + * Log fields from the btree block header. + */ +void +xfs_bmbt_log_block( + xfs_btree_cur_t *cur, + xfs_buf_t *bp, + int fields) +{ + int first; +#ifdef XFS_BMBT_TRACE + static char fname[] = "xfs_bmbt_log_block"; +#endif + int last; + xfs_trans_t *tp; + static const short offsets[] = { + offsetof(xfs_bmbt_block_t, bb_magic), + offsetof(xfs_bmbt_block_t, bb_level), + offsetof(xfs_bmbt_block_t, bb_numrecs), + offsetof(xfs_bmbt_block_t, bb_leftsib), + offsetof(xfs_bmbt_block_t, bb_rightsib), + sizeof(xfs_bmbt_block_t) + }; + + XFS_BMBT_TRACE_CURSOR(cur, ENTRY); + XFS_BMBT_TRACE_ARGBI(cur, bp, fields); + tp = cur->bc_tp; + if (bp) { + xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, + &last); + xfs_trans_log_buf(tp, bp, first, last); + } else + xfs_trans_log_inode(tp, cur->bc_private.b.ip, + XFS_ILOG_FBROOT(cur->bc_private.b.whichfork)); + XFS_BMBT_TRACE_CURSOR(cur, EXIT); +} + +/* + * Log record values from the btree block. + */ +void +xfs_bmbt_log_recs( + xfs_btree_cur_t *cur, + xfs_buf_t *bp, + int rfirst, + int rlast) +{ + xfs_bmbt_block_t *block; + int first; +#ifdef XFS_BMBT_TRACE + static char fname[] = "xfs_bmbt_log_recs"; +#endif + int last; + xfs_bmbt_rec_t *rp; + xfs_trans_t *tp; + + XFS_BMBT_TRACE_CURSOR(cur, ENTRY); + XFS_BMBT_TRACE_ARGBII(cur, bp, rfirst, rlast); + ASSERT(bp); + tp = cur->bc_tp; + block = XFS_BUF_TO_BMBT_BLOCK(bp); + rp = XFS_BMAP_REC_DADDR(block, 1, cur); + first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block); + last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block); + xfs_trans_log_buf(tp, bp, first, last); + XFS_BMBT_TRACE_CURSOR(cur, EXIT); +} + +int /* error */ +xfs_bmbt_lookup_eq( + xfs_btree_cur_t *cur, + xfs_fileoff_t off, + xfs_fsblock_t bno, + xfs_filblks_t len, + int *stat) /* success/failure */ +{ + cur->bc_rec.b.br_startoff = off; + cur->bc_rec.b.br_startblock = bno; + cur->bc_rec.b.br_blockcount = len; + return xfs_bmbt_lookup(cur, XFS_LOOKUP_EQ, stat); +} + +int /* error */ +xfs_bmbt_lookup_ge( + xfs_btree_cur_t *cur, + xfs_fileoff_t off, + xfs_fsblock_t bno, + xfs_filblks_t len, + int *stat) /* success/failure */ +{ + cur->bc_rec.b.br_startoff = off; + cur->bc_rec.b.br_startblock = bno; + cur->bc_rec.b.br_blockcount = len; + return xfs_bmbt_lookup(cur, XFS_LOOKUP_GE, stat); +} + +int /* error */ +xfs_bmbt_lookup_le( + xfs_btree_cur_t *cur, + xfs_fileoff_t off, + xfs_fsblock_t bno, + xfs_filblks_t len, + int *stat) /* success/failure */ +{ + cur->bc_rec.b.br_startoff = off; + cur->bc_rec.b.br_startblock = bno; + cur->bc_rec.b.br_blockcount = len; + return xfs_bmbt_lookup(cur, XFS_LOOKUP_LE, stat); +} + +/* + * Give the bmap btree a new root block. Copy the old broot contents + * down into a real block and make the broot point to it. + */ +int /* error */ +xfs_bmbt_newroot( + xfs_btree_cur_t *cur, /* btree cursor */ + int *logflags, /* logging flags for inode */ + int *stat) /* return status - 0 fail */ +{ + xfs_alloc_arg_t args; /* allocation arguments */ + xfs_bmbt_block_t *block; /* bmap btree block */ + xfs_buf_t *bp; /* buffer for block */ + xfs_bmbt_block_t *cblock; /* child btree block */ + xfs_bmbt_key_t *ckp; /* child key pointer */ + xfs_bmbt_ptr_t *cpp; /* child ptr pointer */ + int error; /* error return code */ +#ifdef XFS_BMBT_TRACE + static char fname[] = "xfs_bmbt_newroot"; +#endif +#ifdef DEBUG + int i; /* loop counter */ +#endif + xfs_bmbt_key_t *kp; /* pointer to bmap btree key */ + int level; /* btree level */ + xfs_bmbt_ptr_t *pp; /* pointer to bmap block addr */ + + XFS_BMBT_TRACE_CURSOR(cur, ENTRY); + level = cur->bc_nlevels - 1; + block = xfs_bmbt_get_block(cur, level, &bp); + /* + * Copy the root into a real block. + */ + args.mp = cur->bc_mp; + pp = XFS_BMAP_PTR_IADDR(block, 1, cur); + args.tp = cur->bc_tp; + args.fsbno = cur->bc_private.b.firstblock; + args.mod = args.minleft = args.alignment = args.total = args.isfl = + args.userdata = args.minalignslop = 0; + args.minlen = args.maxlen = args.prod = 1; + args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL; + if (args.fsbno == NULLFSBLOCK) { +#ifdef DEBUG + if (error = xfs_btree_check_lptr(cur, INT_GET(*pp, ARCH_CONVERT), level)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } +#endif + args.fsbno = INT_GET(*pp, ARCH_CONVERT); + args.type = XFS_ALLOCTYPE_START_BNO; + } else if (args.wasdel) + args.type = XFS_ALLOCTYPE_FIRST_AG; + else + args.type = XFS_ALLOCTYPE_NEAR_BNO; + if (error = xfs_alloc_vextent(&args)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } + if (args.fsbno == NULLFSBLOCK) { + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *stat = 0; + return 0; + } + ASSERT(args.len == 1); + cur->bc_private.b.firstblock = args.fsbno; + cur->bc_private.b.allocated++; + cur->bc_private.b.ip->i_d.di_nblocks++; + if (XFS_IS_QUOTA_ON(args.mp) && + cur->bc_private.b.ip->i_ino != args.mp->m_sb.sb_uquotino && + cur->bc_private.b.ip->i_ino != args.mp->m_sb.sb_pquotino) + xfs_trans_mod_dquot_byino(args.tp, cur->bc_private.b.ip, + XFS_TRANS_DQ_BCOUNT, 1L); + bp = xfs_btree_get_bufl(args.mp, cur->bc_tp, args.fsbno, 0); + cblock = XFS_BUF_TO_BMBT_BLOCK(bp); + *cblock = *block; + INT_MOD(block->bb_level, ARCH_CONVERT, +1); + INT_SET(block->bb_numrecs, ARCH_CONVERT, 1); + cur->bc_nlevels++; + cur->bc_ptrs[level + 1] = 1; + kp = XFS_BMAP_KEY_IADDR(block, 1, cur); + ckp = XFS_BMAP_KEY_IADDR(cblock, 1, cur); + bcopy(kp, ckp, INT_GET(cblock->bb_numrecs, ARCH_CONVERT) * sizeof(*kp)); + cpp = XFS_BMAP_PTR_IADDR(cblock, 1, cur); +#ifdef DEBUG + for (i = 0; i < INT_GET(cblock->bb_numrecs, ARCH_CONVERT); i++) { + if (error = xfs_btree_check_lptr(cur, INT_GET(pp[i], ARCH_CONVERT), level)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } + } +#endif + bcopy(pp, cpp, INT_GET(cblock->bb_numrecs, ARCH_CONVERT) * sizeof(*pp)); +#ifdef DEBUG + if (error = xfs_btree_check_lptr(cur, (xfs_bmbt_ptr_t)args.fsbno, + level)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } +#endif + INT_SET(*pp, ARCH_CONVERT, args.fsbno); + xfs_iroot_realloc(cur->bc_private.b.ip, 1 - INT_GET(cblock->bb_numrecs, ARCH_CONVERT), + cur->bc_private.b.whichfork); + xfs_btree_setbuf(cur, level, bp); + /* + * Do all this logging at the end so that + * the root is at the right level. + */ + xfs_bmbt_log_block(cur, bp, XFS_BB_ALL_BITS); + xfs_bmbt_log_keys(cur, bp, 1, INT_GET(cblock->bb_numrecs, ARCH_CONVERT)); + xfs_bmbt_log_ptrs(cur, bp, 1, INT_GET(cblock->bb_numrecs, ARCH_CONVERT)); + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + *logflags |= + XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork); + *stat = 1; + return 0; +} + +/* + * Set all the fields in a bmap extent record from the uncompressed form. + */ +void +xfs_bmbt_set_all( + xfs_bmbt_rec_t *r, + xfs_bmbt_irec_t *s) +{ + int extent_flag; + + ASSERT((s->br_state == XFS_EXT_NORM) || + (s->br_state == XFS_EXT_UNWRITTEN)); + extent_flag = (s->br_state == XFS_EXT_NORM) ? 0 : 1; +#if XFS_BIG_FILES + ASSERT((s->br_startoff & XFS_MASK64HI(9)) == 0); + ASSERT((s->br_blockcount & XFS_MASK64HI(43)) == 0); +#else /* !XFS_BIG_FILES */ + ASSERT((s->br_blockcount & XFS_MASK32HI(11)) == 0); +#endif /* XFS_BIG_FILES */ +#if XFS_BIG_FILESYSTEMS + ASSERT((s->br_startblock & XFS_MASK64HI(12)) == 0); +#endif /* XFS_BIG_FILESYSTEMS */ +#if BMBT_USE_64 +#if XFS_BIG_FILESYSTEMS + INT_SET(r->l0, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)extent_flag << 63) | + ((xfs_bmbt_rec_base_t)s->br_startoff << 9) | + ((xfs_bmbt_rec_base_t)s->br_startblock >> 43)); + INT_SET(r->l1, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)s->br_startblock << 21) | + ((xfs_bmbt_rec_base_t)s->br_blockcount & + (xfs_bmbt_rec_base_t)XFS_MASK64LO(21))); +#else /* !XFS_BIG_FILESYSTEMS */ + if (ISNULLSTARTBLOCK(s->br_startblock)) { + INT_SET(r->l0, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)extent_flag << 63) | + ((xfs_bmbt_rec_base_t)s->br_startoff << 9) | + (xfs_bmbt_rec_base_t)XFS_MASK64LO(9)); + INT_SET(r->l1, ARCH_CONVERT, XFS_MASK64HI(11) | + ((xfs_bmbt_rec_base_t)s->br_startblock << 21) | + ((xfs_bmbt_rec_base_t)s->br_blockcount & + (xfs_bmbt_rec_base_t)XFS_MASK64LO(21))); + } else { + INT_SET(r->l0, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)extent_flag << 63) | + ((xfs_bmbt_rec_base_t)s->br_startoff << 9)); + INT_SET(r->l1, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)s->br_startblock << 21) | + ((xfs_bmbt_rec_base_t)s->br_blockcount & + (xfs_bmbt_rec_base_t)XFS_MASK64LO(21))); + } +#endif /* XFS_BIG_FILESYSTEMS */ +#else /* !BMBT_USE_64 */ + INT_SET(r->l0, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)extent_flag << 31) | + ((xfs_bmbt_rec_base_t)(s->br_startoff >> 23))); + INT_SET(r->l3, ARCH_CONVERT, (((xfs_bmbt_rec_base_t)s->br_startblock) << 21) | + ((xfs_bmbt_rec_base_t)(s->br_blockcount & XFS_MASK32LO(21)))); +#if XFS_BIG_FILESYSTEMS + INT_SET(r->l1, ARCH_CONVERT, (((xfs_bmbt_rec_base_t)s->br_startoff) << 9) | + ((xfs_bmbt_rec_base_t)(s->br_startblock >> 43))); + INT_SET(r->l2, ARCH_CONVERT, (xfs_bmbt_rec_base_t)(s->br_startblock >> 11)); +#else /* !XFS_BIG_FILESYSTEMS */ + if (ISNULLSTARTBLOCK(s->br_startblock)) { + INT_SET(r->l1, ARCH_CONVERT, (xfs_bmbt_rec_base_t)(s->br_startoff << 9) | + (xfs_bmbt_rec_base_t)XFS_MASK32LO(9)); + INT_SET(r->l2, ARCH_CONVERT, (xfs_bmbt_rec_base_t)XFS_MASK32HI(11) | + (xfs_bmbt_rec_base_t)(s->br_startblock >> 11)); + } else { + INT_SET(r->l1, ARCH_CONVERT, (xfs_bmbt_rec_base_t)(s->br_startoff << 9)); + INT_SET(r->l2, ARCH_CONVERT, (xfs_bmbt_rec_base_t)(s->br_startblock >> 11)); + } +#endif /* XFS_BIG_FILESYSTEMS */ +#endif /* BMBT_USE_64 */ +} + +/* + * Set all the fields in a bmap extent record from the arguments. + */ +void +xfs_bmbt_set_allf( + xfs_bmbt_rec_t *r, + xfs_fileoff_t o, + xfs_fsblock_t b, + xfs_filblks_t c, + xfs_exntst_t v) +{ + int extent_flag; + + ASSERT((v == XFS_EXT_NORM) || (v == XFS_EXT_UNWRITTEN)); + extent_flag = (v == XFS_EXT_NORM) ? 0 : 1; +#if XFS_BIG_FILES + ASSERT((o & XFS_MASK64HI(64-BMBT_STARTOFF_BITLEN)) == 0); + ASSERT((c & XFS_MASK64HI(64-BMBT_BLOCKCOUNT_BITLEN)) == 0); +#else /* !XFS_BIG_FILES */ + ASSERT((c & XFS_MASK32HI(11)) == 0); +#endif /* XFS_BIG_FILES */ +#if XFS_BIG_FILESYSTEMS + ASSERT((b & XFS_MASK64HI(64-BMBT_STARTBLOCK_BITLEN)) == 0); +#endif /* XFS_BIG_FILESYSTEMS */ +#if BMBT_USE_64 +#if XFS_BIG_FILESYSTEMS + INT_SET(r->l0, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)extent_flag << 63) | + ((xfs_bmbt_rec_base_t)o << 9) | + ((xfs_bmbt_rec_base_t)b >> 43)); + INT_SET(r->l1, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)b << 21) | + ((xfs_bmbt_rec_base_t)c & + (xfs_bmbt_rec_base_t)XFS_MASK64LO(21))); +#else /* !XFS_BIG_FILESYSTEMS */ + if (ISNULLSTARTBLOCK(b)) { + INT_SET(r->l0, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)extent_flag << 63) | + ((xfs_bmbt_rec_base_t)o << 9) | + (xfs_bmbt_rec_base_t)XFS_MASK64LO(9)); + INT_SET(r->l1, ARCH_CONVERT, XFS_MASK64HI(11) | + ((xfs_bmbt_rec_base_t)b << 21) | + ((xfs_bmbt_rec_base_t)c & + (xfs_bmbt_rec_base_t)XFS_MASK64LO(21))); + } else { + INT_SET(r->l0, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)extent_flag << 63) | + ((xfs_bmbt_rec_base_t)o << 9)); + INT_SET(r->l1, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)b << 21) | + ((xfs_bmbt_rec_base_t)c & + (xfs_bmbt_rec_base_t)XFS_MASK64LO(21))); + } +#endif /* XFS_BIG_FILESYSTEMS */ +#else /* !BMBT_USE_64 */ + INT_SET(r->l0, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)extent_flag << 31) | + ((xfs_bmbt_rec_base_t)(o >> 23))); + INT_SET(r->l3, ARCH_CONVERT, (((xfs_bmbt_rec_base_t)b) << 21) | + ((xfs_bmbt_rec_base_t)(c & XFS_MASK32LO(21)))); +#if XFS_BIG_FILESYSTEMS + INT_SET(r->l1, ARCH_CONVERT, (((xfs_bmbt_rec_base_t)o) << 9) | + ((xfs_bmbt_rec_base_t)(b >> 43))); + INT_SET(r->l2, ARCH_CONVERT, (xfs_bmbt_rec_base_t)(b >> 11)); +#else /* !XFS_BIG_FILESYSTEMS */ + if (ISNULLSTARTBLOCK(b)) { + INT_SET(r->l1, ARCH_CONVERT, (xfs_bmbt_rec_base_t)(o << 9) | + (xfs_bmbt_rec_base_t)XFS_MASK32LO(9)); + INT_SET(r->l2, ARCH_CONVERT, (xfs_bmbt_rec_base_t)XFS_MASK32HI(11) | + (xfs_bmbt_rec_base_t)(b >> 11)); + } else { + INT_SET(r->l1, ARCH_CONVERT, (xfs_bmbt_rec_base_t)(o << 9)); + INT_SET(r->l2, ARCH_CONVERT, (xfs_bmbt_rec_base_t)(b >> 11)); + } +#endif /* XFS_BIG_FILESYSTEMS */ +#endif /* BMBT_USE_64 */ +} + +/* + * Set the blockcount field in a bmap extent record. + */ +void +xfs_bmbt_set_blockcount( + xfs_bmbt_rec_t *r, + xfs_filblks_t v) +{ +#if XFS_BIG_FILES + ASSERT((v & XFS_MASK64HI(43)) == 0); +#else /* !XFS_BIG_FILES */ + ASSERT((v & XFS_MASK32HI(11)) == 0); +#endif +#if BMBT_USE_64 + INT_SET(r->l1, ARCH_CONVERT, (INT_GET(r->l1, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK64HI(43)) | + (xfs_bmbt_rec_base_t)(v & XFS_MASK64LO(21))); +#else /* !BMBT_USE_64 */ + INT_SET(r->l3, ARCH_CONVERT, (INT_GET(r->l3, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK32HI(11)) | + ((xfs_bmbt_rec_base_t)v & XFS_MASK32LO(21))); +#endif /* BMBT_USE_64 */ +} + +/* + * Set the startblock field in a bmap extent record. + */ +void +xfs_bmbt_set_startblock( + xfs_bmbt_rec_t *r, + xfs_fsblock_t v) +{ +#if XFS_BIG_FILESYSTEMS + ASSERT((v & XFS_MASK64HI(12)) == 0); +#endif /* XFS_BIG_FILESYSTEMS */ +#if BMBT_USE_64 +#if XFS_BIG_FILESYSTEMS + INT_SET(r->l0, ARCH_CONVERT, (INT_GET(r->l0, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK64HI(55)) | + (xfs_bmbt_rec_base_t)(v >> 43)); + INT_SET(r->l1, ARCH_CONVERT, (INT_GET(r->l1, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK64LO(21)) | + (xfs_bmbt_rec_base_t)(v << 21)); +#else /* !XFS_BIG_FILESYSTEMS */ + if (ISNULLSTARTBLOCK(v)) { + INT_SET(r->l0, ARCH_CONVERT, (INT_GET(r->l0, ARCH_CONVERT) | (xfs_bmbt_rec_base_t)XFS_MASK64LO(9))); + INT_SET(r->l1, ARCH_CONVERT, (xfs_bmbt_rec_base_t)XFS_MASK64HI(11) | + ((xfs_bmbt_rec_base_t)v << 21) | + (INT_GET(r->l1, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK64LO(21))); + } else { + INT_SET(r->l0, ARCH_CONVERT, (INT_GET(r->l0, ARCH_CONVERT) & ~(xfs_bmbt_rec_base_t)XFS_MASK64LO(9))); + INT_SET(r->l1, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)v << 21) | + (INT_GET(r->l1, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK64LO(21))); + } +#endif /* XFS_BIG_FILESYSTEMS */ +#else /* !BMBT_USE_64 */ +#if XFS_BIG_FILESYSTEMS + INT_SET(r->l1, ARCH_CONVERT, (INT_GET(r->l1, ARCH_CONVERT) & XFS_MASK32HI(23)) | (xfs_bmbt_rec_base_t)(v >> 43)); + INT_SET(r->l2, ARCH_CONVERT, (xfs_bmbt_rec_base_t)(v >> 11)); +#else /* !XFS_BIG_FILESYSTEMS */ + if (ISNULLSTARTBLOCK(v)) { + INT_SET(r->l1, ARCH_CONVERT, (INT_GET(r->l1, ARCH_CONVERT) | XFS_MASK32LO(9))); + INT_SET(r->l2, ARCH_CONVERT, XFS_MASK32HI(11) | (xfs_bmbt_rec_base_t)(v >> 11)); + } else { + INT_SET(r->l1, ARCH_CONVERT, (INT_GET(r->l1, ARCH_CONVERT) & ~XFS_MASK32LO(9))); + INT_SET(r->l2, ARCH_CONVERT, (xfs_bmbt_rec_base_t)(v >> 11)); + } +#endif /* XFS_BIG_FILESYSTEMS */ + INT_SET(r->l3, ARCH_CONVERT, (INT_GET(r->l3, ARCH_CONVERT) & XFS_MASK32LO(21)) | + (((xfs_bmbt_rec_base_t)v) << 21)); +#endif /* BMBT_USE_64 */ +} + +/* + * Set the startoff field in a bmap extent record. + */ +void +xfs_bmbt_set_startoff( + xfs_bmbt_rec_t *r, + xfs_fileoff_t v) +{ +#if XFS_BIG_FILES + ASSERT((v & XFS_MASK64HI(9)) == 0); +#endif /* XFS_BIG_FILES */ +#if BMBT_USE_64 + INT_SET(r->l0, ARCH_CONVERT, (INT_GET(r->l0, ARCH_CONVERT) & (xfs_bmbt_rec_base_t) XFS_MASK64HI(1)) | + ((xfs_bmbt_rec_base_t)v << 9) | + (INT_GET(r->l0, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK64LO(9))); +#else /* !BMBT_USE_64 */ + INT_SET(r->l0, ARCH_CONVERT, (INT_GET(r->l0, ARCH_CONVERT) & (xfs_bmbt_rec_base_t) XFS_MASK32HI(1)) | + (xfs_bmbt_rec_base_t)(v >> 23)); + INT_SET(r->l1, ARCH_CONVERT, ((xfs_bmbt_rec_base_t)v << 9) | + (INT_GET(r->l1, ARCH_CONVERT) & (xfs_bmbt_rec_base_t)XFS_MASK32LO(9))); +#endif /* BMBT_USE_64 */ +} + +/* + * Set the extent state field in a bmap extent record. + */ +void +xfs_bmbt_set_state( + xfs_bmbt_rec_t *r, + xfs_exntst_t v) +{ + ASSERT(v == XFS_EXT_NORM || v == XFS_EXT_UNWRITTEN); + if (v == XFS_EXT_NORM) +#if BMBT_USE_64 + INT_SET(r->l0, ARCH_CONVERT, INT_GET(r->l0, ARCH_CONVERT) & XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN)); +#else /* !BMBT_USE_64 */ + INT_SET(r->l0, ARCH_CONVERT, INT_GET(r->l0, ARCH_CONVERT) & XFS_MASK32LO(32 - BMBT_EXNTFLAG_BITLEN)); +#endif /* BMBT_USE_64 */ + else +#if BMBT_USE_64 + INT_SET(r->l0, ARCH_CONVERT, INT_GET(r->l0, ARCH_CONVERT) | XFS_MASK64HI(BMBT_EXNTFLAG_BITLEN)); +#else /* !BMBT_USE_64 */ + INT_SET(r->l0, ARCH_CONVERT, INT_GET(r->l0, ARCH_CONVERT) | XFS_MASK32HI(BMBT_EXNTFLAG_BITLEN)); +#endif /* BMBT_USE_64 */ +} + +/* + * Convert in-memory form of btree root to on-disk form. + */ +void +xfs_bmbt_to_bmdr( + xfs_bmbt_block_t *rblock, + int rblocklen, + xfs_bmdr_block_t *dblock, + int dblocklen) +{ + int dmxr; + xfs_bmbt_key_t *fkp; + xfs_bmbt_ptr_t *fpp; + xfs_bmbt_key_t *tkp; + xfs_bmbt_ptr_t *tpp; + + ASSERT(INT_GET(rblock->bb_magic, ARCH_CONVERT) == XFS_BMAP_MAGIC); + ASSERT(INT_GET(rblock->bb_leftsib, ARCH_CONVERT) == NULLDFSBNO); + ASSERT(INT_GET(rblock->bb_rightsib, ARCH_CONVERT) == NULLDFSBNO); + ASSERT(INT_GET(rblock->bb_level, ARCH_CONVERT) > 0); + dblock->bb_level = rblock->bb_level; /* both in on-disk format */ + dblock->bb_numrecs = rblock->bb_numrecs;/* both in on-disk format */ + dmxr = (int)XFS_BTREE_BLOCK_MAXRECS(dblocklen, xfs_bmdr, 0); + fkp = XFS_BMAP_BROOT_KEY_ADDR(rblock, 1, rblocklen); + tkp = XFS_BTREE_KEY_ADDR(dblocklen, xfs_bmdr, dblock, 1, dmxr); + fpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen); + tpp = XFS_BTREE_PTR_ADDR(dblocklen, xfs_bmdr, dblock, 1, dmxr); + dmxr = INT_GET(dblock->bb_numrecs, ARCH_CONVERT); + bcopy(fkp, tkp, sizeof(*fkp) * dmxr); + bcopy(fpp, tpp, sizeof(*fpp) * dmxr); /* INT_: direct copy */ +} + +/* + * Update the record to the passed values. + */ +int +xfs_bmbt_update( + xfs_btree_cur_t *cur, + xfs_fileoff_t off, + xfs_fsblock_t bno, + xfs_filblks_t len, + xfs_exntst_t state) +{ + xfs_bmbt_block_t *block; + xfs_buf_t *bp; + int error; +#ifdef XFS_BMBT_TRACE + static char fname[] = "xfs_bmbt_update"; +#endif + xfs_bmbt_key_t key; + int ptr; + xfs_bmbt_rec_t *rp; + + XFS_BMBT_TRACE_CURSOR(cur, ENTRY); + XFS_BMBT_TRACE_ARGFFFI(cur, (xfs_dfiloff_t)off, (xfs_dfsbno_t)bno, + (xfs_dfilblks_t)len, (int)state); + block = xfs_bmbt_get_block(cur, 0, &bp); +#ifdef DEBUG + if (error = xfs_btree_check_lblock(cur, block, 0, bp)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } +#endif + ptr = cur->bc_ptrs[0]; + rp = XFS_BMAP_REC_IADDR(block, ptr, cur); + xfs_bmbt_set_allf(rp, off, bno, len, state); + xfs_bmbt_log_recs(cur, bp, ptr, ptr); + if (ptr > 1) { + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + return 0; + } + INT_SET(key.br_startoff, ARCH_CONVERT, off); + if (error = xfs_bmbt_updkey(cur, &key, 1)) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } + XFS_BMBT_TRACE_CURSOR(cur, EXIT); + return 0; +} + +/* + * Check an extent list, which has just been read, for + * any bit in the extent flag field. ASSERT on debug + * kernels, as this condition should not occur. + * Return an error condition (1) if any flags found, + * otherwise return 0. + */ +int +xfs_check_nostate_extents( + xfs_bmbt_rec_t *ep, + xfs_extnum_t num) +{ + for (; num > 0; num--, ep++) { + if ( +#if BMBT_USE_64 + ((INT_GET(ep->l0, ARCH_CONVERT)) >> (64 - BMBT_EXNTFLAG_BITLEN)) != 0 +#else /* !BMBT_USE_64 */ + ((INT_GET(ep->l0, ARCH_CONVERT)) >> (32 - BMBT_EXNTFLAG_BITLEN)) != 0 +#endif /* BMBT_USE_64 */ + ) { + ASSERT(0); + return 1; + } + } + return 0; +} diff --git a/libxfs/xfs_btree.c b/libxfs/xfs_btree.c new file mode 100644 index 000000000..73cdd9ccd --- /dev/null +++ b/libxfs/xfs_btree.c @@ -0,0 +1,889 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +/* + * This file contains common code for the space manager's btree implementations. + */ + +#include + +/* + * Cursor allocation zone. + */ +xfs_zone_t *xfs_btree_cur_zone; + +/* + * Btree magic numbers. + */ +const __uint32_t xfs_magics[XFS_BTNUM_MAX] = +{ + XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC +}; + +/* + * Prototypes for internal routines. + */ + +/* + * Checking routine: return maxrecs for the block. + */ +STATIC int /* number of records fitting in block */ +xfs_btree_maxrecs( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_btree_block_t *block);/* generic btree block pointer */ + +/* + * Internal routines. + */ + +/* + * Checking routine: return maxrecs for the block. + */ +STATIC int /* number of records fitting in block */ +xfs_btree_maxrecs( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_btree_block_t *block) /* generic btree block pointer */ +{ + switch (cur->bc_btnum) { + case XFS_BTNUM_BNO: + case XFS_BTNUM_CNT: + return (int)XFS_ALLOC_BLOCK_MAXRECS(INT_GET(block->bb_h.bb_level, ARCH_CONVERT), cur); + case XFS_BTNUM_BMAP: + return (int)XFS_BMAP_BLOCK_IMAXRECS(INT_GET(block->bb_h.bb_level, ARCH_CONVERT), cur); + case XFS_BTNUM_INO: + return (int)XFS_INOBT_BLOCK_MAXRECS(INT_GET(block->bb_h.bb_level, ARCH_CONVERT), cur); + default: + ASSERT(0); + return 0; + } +} + +/* + * External routines. + */ + +#ifdef DEBUG +/* + * Debug routine: check that block header is ok. + */ +void +xfs_btree_check_block( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_btree_block_t *block, /* generic btree block pointer */ + int level, /* level of the btree block */ + xfs_buf_t *bp) /* buffer containing block, if any */ +{ + if (XFS_BTREE_LONG_PTRS(cur->bc_btnum)) + xfs_btree_check_lblock(cur, (xfs_btree_lblock_t *)block, level, + bp); + else + xfs_btree_check_sblock(cur, (xfs_btree_sblock_t *)block, level, + bp); +} + +/* + * Debug routine: check that keys are in the right order. + */ +void +xfs_btree_check_key( + xfs_btnum_t btnum, /* btree identifier */ + void *ak1, /* pointer to left (lower) key */ + void *ak2) /* pointer to right (higher) key */ +{ + switch (btnum) { + case XFS_BTNUM_BNO: { + xfs_alloc_key_t *k1; + xfs_alloc_key_t *k2; + + k1 = ak1; + k2 = ak2; + ASSERT(INT_GET(k1->ar_startblock, ARCH_CONVERT) < INT_GET(k2->ar_startblock, ARCH_CONVERT)); + break; + } + case XFS_BTNUM_CNT: { + xfs_alloc_key_t *k1; + xfs_alloc_key_t *k2; + + k1 = ak1; + k2 = ak2; + ASSERT(INT_GET(k1->ar_blockcount, ARCH_CONVERT) < INT_GET(k2->ar_blockcount, ARCH_CONVERT) || + (INT_GET(k1->ar_blockcount, ARCH_CONVERT) == INT_GET(k2->ar_blockcount, ARCH_CONVERT) && + INT_GET(k1->ar_startblock, ARCH_CONVERT) < INT_GET(k2->ar_startblock, ARCH_CONVERT))); + break; + } + case XFS_BTNUM_BMAP: { + xfs_bmbt_key_t *k1; + xfs_bmbt_key_t *k2; + + k1 = ak1; + k2 = ak2; + ASSERT(INT_GET(k1->br_startoff, ARCH_CONVERT) < INT_GET(k2->br_startoff, ARCH_CONVERT)); + break; + } + case XFS_BTNUM_INO: { + xfs_inobt_key_t *k1; + xfs_inobt_key_t *k2; + + k1 = ak1; + k2 = ak2; + ASSERT(INT_GET(k1->ir_startino, ARCH_CONVERT) < INT_GET(k2->ir_startino, ARCH_CONVERT)); + break; + } + default: + ASSERT(0); + } +} +#endif /* DEBUG */ + +/* + * Checking routine: check that long form block header is ok. + */ +/* ARGSUSED */ +int /* error (0 or EFSCORRUPTED) */ +xfs_btree_check_lblock( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_btree_lblock_t *block, /* btree long form block pointer */ + int level, /* level of the btree block */ + xfs_buf_t *bp) /* buffer for block, if any */ +{ + int lblock_ok; /* block passes checks */ + xfs_mount_t *mp; /* file system mount point */ + + mp = cur->bc_mp; + lblock_ok = + INT_GET(block->bb_magic, ARCH_CONVERT) == xfs_magics[cur->bc_btnum] && + INT_GET(block->bb_level, ARCH_CONVERT) == level && + INT_GET(block->bb_numrecs, ARCH_CONVERT) <= + xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) && + INT_GET(block->bb_leftsib, ARCH_CONVERT) != 0 && + (INT_GET(block->bb_leftsib, ARCH_CONVERT) == NULLDFSBNO || + XFS_FSB_SANITY_CHECK(mp, INT_GET(block->bb_leftsib, ARCH_CONVERT))) && + INT_GET(block->bb_rightsib, ARCH_CONVERT) != 0 && + (INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLDFSBNO || + XFS_FSB_SANITY_CHECK(mp, INT_GET(block->bb_rightsib, ARCH_CONVERT))); + if (XFS_TEST_ERROR(!lblock_ok, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK, + XFS_RANDOM_BTREE_CHECK_LBLOCK)) { +#pragma mips_frequency_hint NEVER + if (bp) + xfs_buftrace("LBTREE ERROR", bp); + return XFS_ERROR(EFSCORRUPTED); + } + return 0; +} + +/* + * Checking routine: check that (long) pointer is ok. + */ +int /* error (0 or EFSCORRUPTED) */ +xfs_btree_check_lptr( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_dfsbno_t ptr, /* btree block disk address */ + int level) /* btree block level */ +{ + xfs_mount_t *mp; /* file system mount point */ + + mp = cur->bc_mp; + XFS_WANT_CORRUPTED_RETURN( + level > 0 && + ptr != NULLDFSBNO && + XFS_FSB_SANITY_CHECK(mp, ptr)); + return 0; +} + +#ifdef DEBUG +/* + * Debug routine: check that records are in the right order. + */ +void +xfs_btree_check_rec( + xfs_btnum_t btnum, /* btree identifier */ + void *ar1, /* pointer to left (lower) record */ + void *ar2) /* pointer to right (higher) record */ +{ + switch (btnum) { + case XFS_BTNUM_BNO: { + xfs_alloc_rec_t *r1; + xfs_alloc_rec_t *r2; + + r1 = ar1; + r2 = ar2; + ASSERT(INT_GET(r1->ar_startblock, ARCH_CONVERT) + INT_GET(r1->ar_blockcount, ARCH_CONVERT) <= + INT_GET(r2->ar_startblock, ARCH_CONVERT)); + break; + } + case XFS_BTNUM_CNT: { + xfs_alloc_rec_t *r1; + xfs_alloc_rec_t *r2; + + r1 = ar1; + r2 = ar2; + ASSERT(INT_GET(r1->ar_blockcount, ARCH_CONVERT) < INT_GET(r2->ar_blockcount, ARCH_CONVERT) || + (INT_GET(r1->ar_blockcount, ARCH_CONVERT) == INT_GET(r2->ar_blockcount, ARCH_CONVERT) && + INT_GET(r1->ar_startblock, ARCH_CONVERT) < INT_GET(r2->ar_startblock, ARCH_CONVERT))); + break; + } + case XFS_BTNUM_BMAP: { + xfs_bmbt_rec_t *r1; + xfs_bmbt_rec_t *r2; + + r1 = ar1; + r2 = ar2; + ASSERT(xfs_bmbt_get_startoff(r1) + + xfs_bmbt_get_blockcount(r1) <= + xfs_bmbt_get_startoff(r2)); + break; + } + case XFS_BTNUM_INO: { + xfs_inobt_rec_t *r1; + xfs_inobt_rec_t *r2; + + r1 = ar1; + r2 = ar2; + ASSERT(INT_GET(r1->ir_startino, ARCH_CONVERT) + XFS_INODES_PER_CHUNK <= + INT_GET(r2->ir_startino, ARCH_CONVERT)); + break; + } + default: + ASSERT(0); + } +} +#endif /* DEBUG */ + +/* + * Checking routine: check that block header is ok. + */ +/* ARGSUSED */ +int /* error (0 or EFSCORRUPTED) */ +xfs_btree_check_sblock( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_btree_sblock_t *block, /* btree short form block pointer */ + int level, /* level of the btree block */ + xfs_buf_t *bp) /* buffer containing block */ +{ + xfs_buf_t *agbp; /* buffer for ag. freespace struct */ + xfs_agf_t *agf; /* ag. freespace structure */ + xfs_agblock_t agflen; /* native ag. freespace length */ + int sblock_ok; /* block passes checks */ + + agbp = cur->bc_private.a.agbp; + agf = XFS_BUF_TO_AGF(agbp); + agflen = INT_GET(agf->agf_length, ARCH_CONVERT); + sblock_ok = + INT_GET(block->bb_magic, ARCH_CONVERT) == xfs_magics[cur->bc_btnum] && + INT_GET(block->bb_level, ARCH_CONVERT) == level && + INT_GET(block->bb_numrecs, ARCH_CONVERT) <= + xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) && + (INT_GET(block->bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK || + INT_GET(block->bb_leftsib, ARCH_CONVERT) < agflen) && + INT_GET(block->bb_leftsib, ARCH_CONVERT) != 0 && + (INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK || + INT_GET(block->bb_rightsib, ARCH_CONVERT) < agflen) && + INT_GET(block->bb_rightsib, ARCH_CONVERT) != 0; + if (XFS_TEST_ERROR(!sblock_ok, cur->bc_mp, + XFS_ERRTAG_BTREE_CHECK_SBLOCK, + XFS_RANDOM_BTREE_CHECK_SBLOCK)) { +#pragma mips_frequency_hint NEVER + if (bp) + xfs_buftrace("SBTREE ERROR", bp); + return XFS_ERROR(EFSCORRUPTED); + } + return 0; +} + +/* + * Checking routine: check that (short) pointer is ok. + */ +int /* error (0 or EFSCORRUPTED) */ +xfs_btree_check_sptr( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_agblock_t ptr, /* btree block disk address */ + int level) /* btree block level */ +{ + xfs_buf_t *agbp; /* buffer for ag. freespace struct */ + xfs_agf_t *agf; /* ag. freespace structure */ + + agbp = cur->bc_private.a.agbp; + agf = XFS_BUF_TO_AGF(agbp); + XFS_WANT_CORRUPTED_RETURN( + level > 0 && + ptr != NULLAGBLOCK && ptr != 0 && + ptr < INT_GET(agf->agf_length, ARCH_CONVERT)); + return 0; +} + +/* + * Delete the btree cursor. + */ +void +xfs_btree_del_cursor( + xfs_btree_cur_t *cur, /* btree cursor */ + int error) /* del because of error */ +{ + int i; /* btree level */ + + /* + * Clear the buffer pointers, and release the buffers. + * If we're doing this in the face of an error, we + * need to make sure to inspect all of the entries + * in the bc_bufs array for buffers to be unlocked. + * This is because some of the btree code works from + * level n down to 0, and if we get an error along + * the way we won't have initialized all the entries + * down to 0. + */ + for (i = 0; i < cur->bc_nlevels; i++) { + if (cur->bc_bufs[i]) + xfs_btree_setbuf(cur, i, NULL); + else if (!error) + break; + } + /* + * Can't free a bmap cursor without having dealt with the + * allocated indirect blocks' accounting. + */ + ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || + cur->bc_private.b.allocated == 0); + /* + * Free the cursor. + */ + kmem_zone_free(xfs_btree_cur_zone, cur); +} + +/* + * Duplicate the btree cursor. + * Allocate a new one, copy the record, re-get the buffers. + */ +int /* error */ +xfs_btree_dup_cursor( + xfs_btree_cur_t *cur, /* input cursor */ + xfs_btree_cur_t **ncur) /* output cursor */ +{ + xfs_buf_t *bp; /* btree block's buffer pointer */ + int error; /* error return value */ + int i; /* level number of btree block */ + xfs_mount_t *mp; /* mount structure for filesystem */ + xfs_btree_cur_t *new; /* new cursor value */ + xfs_trans_t *tp; /* transaction pointer, can be NULL */ + + tp = cur->bc_tp; + mp = cur->bc_mp; + /* + * Allocate a new cursor like the old one. + */ + new = xfs_btree_init_cursor(mp, tp, cur->bc_private.a.agbp, + cur->bc_private.a.agno, cur->bc_btnum, cur->bc_private.b.ip, + cur->bc_private.b.whichfork); + /* + * Copy the record currently in the cursor. + */ + new->bc_rec = cur->bc_rec; + /* + * For each level current, re-get the buffer and copy the ptr value. + */ + for (i = 0; i < new->bc_nlevels; i++) { + new->bc_ptrs[i] = cur->bc_ptrs[i]; + new->bc_ra[i] = cur->bc_ra[i]; + if (bp = cur->bc_bufs[i]) { + if (error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, + XFS_BUF_ADDR(bp), mp->m_bsize, 0, &bp)) { +#pragma mips_frequency_hint NEVER + xfs_btree_del_cursor(new, error); + *ncur = NULL; + return error; + } + new->bc_bufs[i] = bp; + ASSERT(bp); + ASSERT(!XFS_BUF_GETERROR(bp)); + } else + new->bc_bufs[i] = NULL; + } + /* + * For bmap btrees, copy the firstblock, flist, and flags values, + * since init cursor doesn't get them. + */ + if (new->bc_btnum == XFS_BTNUM_BMAP) { + new->bc_private.b.firstblock = cur->bc_private.b.firstblock; + new->bc_private.b.flist = cur->bc_private.b.flist; + new->bc_private.b.flags = cur->bc_private.b.flags; + } + *ncur = new; + return 0; +} + +/* + * Change the cursor to point to the first record at the given level. + * Other levels are unaffected. + */ +int /* success=1, failure=0 */ +xfs_btree_firstrec( + xfs_btree_cur_t *cur, /* btree cursor */ + int level) /* level to change */ +{ + xfs_btree_block_t *block; /* generic btree block pointer */ + xfs_buf_t *bp; /* buffer containing block */ + + /* + * Get the block pointer for this level. + */ + block = xfs_btree_get_block(cur, level, &bp); + xfs_btree_check_block(cur, block, level, bp); + /* + * It's empty, there is no such record. + */ + if (INT_GET(block->bb_h.bb_numrecs, ARCH_CONVERT) == 0) + return 0; + /* + * Set the ptr value to 1, that's the first record/key. + */ + cur->bc_ptrs[level] = 1; + return 1; +} + +/* + * Retrieve the block pointer from the cursor at the given level. + * This may be a bmap btree root or from a buffer. + */ +xfs_btree_block_t * /* generic btree block pointer */ +xfs_btree_get_block( + xfs_btree_cur_t *cur, /* btree cursor */ + int level, /* level in btree */ + xfs_buf_t **bpp) /* buffer containing the block */ +{ + xfs_btree_block_t *block; /* return value */ + xfs_buf_t *bp; /* return buffer */ + xfs_ifork_t *ifp; /* inode fork pointer */ + int whichfork; /* data or attr fork */ + + if (cur->bc_btnum == XFS_BTNUM_BMAP && level == cur->bc_nlevels - 1) { + whichfork = cur->bc_private.b.whichfork; + ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, whichfork); + block = (xfs_btree_block_t *)ifp->if_broot; + bp = NULL; + } else { + bp = cur->bc_bufs[level]; + block = XFS_BUF_TO_BLOCK(bp); + } + ASSERT(block != NULL); + *bpp = bp; + return block; +} + +/* + * Get a buffer for the block, return it with no data read. + * Long-form addressing. + */ +xfs_buf_t * /* buffer for fsbno */ +xfs_btree_get_bufl( + xfs_mount_t *mp, /* file system mount point */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_fsblock_t fsbno, /* file system block number */ + uint lock) /* lock flags for get_buf */ +{ + xfs_buf_t *bp; /* buffer pointer (return value) */ + xfs_daddr_t d; /* real disk block address */ + + ASSERT(fsbno != NULLFSBLOCK); + d = XFS_FSB_TO_DADDR(mp, fsbno); + bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); + ASSERT(bp); + ASSERT(!XFS_BUF_GETERROR(bp)); + return bp; +} + +/* + * Get a buffer for the block, return it with no data read. + * Short-form addressing. + */ +xfs_buf_t * /* buffer for agno/agbno */ +xfs_btree_get_bufs( + xfs_mount_t *mp, /* file system mount point */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_agnumber_t agno, /* allocation group number */ + xfs_agblock_t agbno, /* allocation group block number */ + uint lock) /* lock flags for get_buf */ +{ + xfs_buf_t *bp; /* buffer pointer (return value) */ + xfs_daddr_t d; /* real disk block address */ + + ASSERT(agno != NULLAGNUMBER); + ASSERT(agbno != NULLAGBLOCK); + d = XFS_AGB_TO_DADDR(mp, agno, agbno); + bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); + ASSERT(bp); + ASSERT(!XFS_BUF_GETERROR(bp)); + return bp; +} + +/* + * Allocate a new btree cursor. + * The cursor is either for allocation (A) or bmap (B) or inodes (I). + */ +xfs_btree_cur_t * /* new btree cursor */ +xfs_btree_init_cursor( + xfs_mount_t *mp, /* file system mount point */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_buf_t *agbp, /* (A only) buffer for agf structure */ + /* (I only) buffer for agi structure */ + xfs_agnumber_t agno, /* (AI only) allocation group number */ + xfs_btnum_t btnum, /* btree identifier */ + xfs_inode_t *ip, /* (B only) inode owning the btree */ + int whichfork) /* (B only) data or attr fork */ +{ + xfs_agf_t *agf; /* (A) allocation group freespace */ + xfs_agi_t *agi; /* (I) allocation group inodespace */ + xfs_btree_cur_t *cur; /* return value */ + xfs_ifork_t *ifp; /* (I) inode fork pointer */ + int nlevels; /* number of levels in the btree */ + + ASSERT(xfs_btree_cur_zone != NULL); + /* + * Allocate a new cursor. + */ + cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP); + /* + * Deduce the number of btree levels from the arguments. + */ + switch (btnum) { + case XFS_BTNUM_BNO: + case XFS_BTNUM_CNT: + agf = XFS_BUF_TO_AGF(agbp); + nlevels = INT_GET(agf->agf_levels[btnum], ARCH_CONVERT); + break; + case XFS_BTNUM_BMAP: + ifp = XFS_IFORK_PTR(ip, whichfork); + nlevels = INT_GET(ifp->if_broot->bb_level, ARCH_CONVERT) + 1; + break; + case XFS_BTNUM_INO: + agi = XFS_BUF_TO_AGI(agbp); + nlevels = INT_GET(agi->agi_level, ARCH_CONVERT); + break; + default: + ASSERT(0); + } + /* + * Fill in the common fields. + */ + cur->bc_tp = tp; + cur->bc_mp = mp; + cur->bc_nlevels = nlevels; + cur->bc_btnum = btnum; + cur->bc_blocklog = mp->m_sb.sb_blocklog; + /* + * Fill in private fields. + */ + switch (btnum) { + case XFS_BTNUM_BNO: + case XFS_BTNUM_CNT: + /* + * Allocation btree fields. + */ + cur->bc_private.a.agbp = agbp; + cur->bc_private.a.agno = agno; + break; + case XFS_BTNUM_BMAP: + /* + * Bmap btree fields. + */ + cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork); + cur->bc_private.b.ip = ip; + cur->bc_private.b.firstblock = NULLFSBLOCK; + cur->bc_private.b.flist = NULL; + cur->bc_private.b.allocated = 0; + cur->bc_private.b.flags = 0; + cur->bc_private.b.whichfork = whichfork; + break; + case XFS_BTNUM_INO: + /* + * Inode allocation btree fields. + */ + cur->bc_private.i.agbp = agbp; + cur->bc_private.i.agno = agno; + break; + default: + ASSERT(0); + } + return cur; +} + +/* + * Check for the cursor referring to the last block at the given level. + */ +int /* 1=is last block, 0=not last block */ +xfs_btree_islastblock( + xfs_btree_cur_t *cur, /* btree cursor */ + int level) /* level to check */ +{ + xfs_btree_block_t *block; /* generic btree block pointer */ + xfs_buf_t *bp; /* buffer containing block */ + + block = xfs_btree_get_block(cur, level, &bp); + xfs_btree_check_block(cur, block, level, bp); + if (XFS_BTREE_LONG_PTRS(cur->bc_btnum)) + return INT_GET(block->bb_u.l.bb_rightsib, ARCH_CONVERT) == NULLDFSBNO; + else + return INT_GET(block->bb_u.s.bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK; +} + +/* + * Change the cursor to point to the last record in the current block + * at the given level. Other levels are unaffected. + */ +int /* success=1, failure=0 */ +xfs_btree_lastrec( + xfs_btree_cur_t *cur, /* btree cursor */ + int level) /* level to change */ +{ + xfs_btree_block_t *block; /* generic btree block pointer */ + xfs_buf_t *bp; /* buffer containing block */ + + /* + * Get the block pointer for this level. + */ + block = xfs_btree_get_block(cur, level, &bp); + xfs_btree_check_block(cur, block, level, bp); + /* + * It's empty, there is no such record. + */ + if (INT_GET(block->bb_h.bb_numrecs, ARCH_CONVERT) == 0) + return 0; + /* + * Set the ptr value to numrecs, that's the last record/key. + */ + cur->bc_ptrs[level] = INT_GET(block->bb_h.bb_numrecs, ARCH_CONVERT); + return 1; +} + +/* + * Compute first and last byte offsets for the fields given. + * Interprets the offsets table, which contains struct field offsets. + */ +void +xfs_btree_offsets( + __int64_t fields, /* bitmask of fields */ + const short *offsets, /* table of field offsets */ + int nbits, /* number of bits to inspect */ + int *first, /* output: first byte offset */ + int *last) /* output: last byte offset */ +{ + int i; /* current bit number */ + __int64_t imask; /* mask for current bit number */ + + ASSERT(fields != 0); + /* + * Find the lowest bit, so the first byte offset. + */ + for (i = 0, imask = 1LL; ; i++, imask <<= 1) { + if (imask & fields) { + *first = offsets[i]; + break; + } + } + /* + * Find the highest bit, so the last byte offset. + */ + for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) { + if (imask & fields) { + *last = offsets[i + 1] - 1; + break; + } + } +} + +/* + * Get a buffer for the block, return it read in. + * Long-form addressing. + */ +int /* error */ +xfs_btree_read_bufl( + xfs_mount_t *mp, /* file system mount point */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_fsblock_t fsbno, /* file system block number */ + uint lock, /* lock flags for read_buf */ + xfs_buf_t **bpp, /* buffer for fsbno */ + int refval) /* ref count value for buffer */ +{ + xfs_buf_t *bp; /* return value */ + xfs_daddr_t d; /* real disk block address */ + int error; + + ASSERT(fsbno != NULLFSBLOCK); + d = XFS_FSB_TO_DADDR(mp, fsbno); + if (error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, + mp->m_bsize, lock, &bp)) { +#pragma mips_frequency_hint NEVER + return error; + } + ASSERT(!bp || !XFS_BUF_GETERROR(bp)); + if (bp != NULL) { + XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval); + } + *bpp = bp; + return 0; +} + +/* + * Get a buffer for the block, return it read in. + * Short-form addressing. + */ +int /* error */ +xfs_btree_read_bufs( + xfs_mount_t *mp, /* file system mount point */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_agnumber_t agno, /* allocation group number */ + xfs_agblock_t agbno, /* allocation group block number */ + uint lock, /* lock flags for read_buf */ + xfs_buf_t **bpp, /* buffer for agno/agbno */ + int refval) /* ref count value for buffer */ +{ + xfs_buf_t *bp; /* return value */ + xfs_daddr_t d; /* real disk block address */ + int error; + + ASSERT(agno != NULLAGNUMBER); + ASSERT(agbno != NULLAGBLOCK); + d = XFS_AGB_TO_DADDR(mp, agno, agbno); + if (error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, + mp->m_bsize, lock, &bp)) { +#pragma mips_frequency_hint NEVER + return error; + } + ASSERT(!bp || !XFS_BUF_GETERROR(bp)); + if (bp != NULL) + switch (refval) { + case XFS_ALLOC_BTREE_REF: + XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval); + break; + case XFS_INO_BTREE_REF: + XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, refval); + break; + } + *bpp = bp; + return 0; +} + +/* + * Read-ahead btree blocks, at the given level. + * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA. + */ +int +xfs_btree_readahead( + xfs_btree_cur_t *cur, /* btree cursor */ + int lev, /* level in btree */ + int lr) /* left/right bits */ +{ + xfs_alloc_block_t *a; + xfs_bmbt_block_t *b; + xfs_inobt_block_t *i; + int rval = 0; + + ASSERT(cur->bc_bufs[lev] != NULL); + if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev]) + return 0; + cur->bc_ra[lev] |= lr; + switch (cur->bc_btnum) { + case XFS_BTNUM_BNO: + case XFS_BTNUM_CNT: + a = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[lev]); + if ((lr & XFS_BTCUR_LEFTRA) && INT_GET(a->bb_leftsib, ARCH_CONVERT) != NULLAGBLOCK) { + xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, + INT_GET(a->bb_leftsib, ARCH_CONVERT), 1); + rval++; + } + if ((lr & XFS_BTCUR_RIGHTRA) && INT_GET(a->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) { + xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, + INT_GET(a->bb_rightsib, ARCH_CONVERT), 1); + rval++; + } + break; + case XFS_BTNUM_BMAP: + b = XFS_BUF_TO_BMBT_BLOCK(cur->bc_bufs[lev]); + if ((lr & XFS_BTCUR_LEFTRA) && INT_GET(b->bb_leftsib, ARCH_CONVERT) != NULLDFSBNO) { + xfs_btree_reada_bufl(cur->bc_mp, INT_GET(b->bb_leftsib, ARCH_CONVERT), 1); + rval++; + } + if ((lr & XFS_BTCUR_RIGHTRA) && INT_GET(b->bb_rightsib, ARCH_CONVERT) != NULLDFSBNO) { + xfs_btree_reada_bufl(cur->bc_mp, INT_GET(b->bb_rightsib, ARCH_CONVERT), 1); + rval++; + } + break; + case XFS_BTNUM_INO: + i = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]); + if ((lr & XFS_BTCUR_LEFTRA) && INT_GET(i->bb_leftsib, ARCH_CONVERT) != NULLAGBLOCK) { + xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.i.agno, + INT_GET(i->bb_leftsib, ARCH_CONVERT), 1); + rval++; + } + if ((lr & XFS_BTCUR_RIGHTRA) && INT_GET(i->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) { + xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.i.agno, + INT_GET(i->bb_rightsib, ARCH_CONVERT), 1); + rval++; + } + break; + default: + ASSERT(0); + } + return rval; +} + +/* + * Set the buffer for level "lev" in the cursor to bp, releasing + * any previous buffer. + */ +void +xfs_btree_setbuf( + xfs_btree_cur_t *cur, /* btree cursor */ + int lev, /* level in btree */ + xfs_buf_t *bp) /* new buffer to set */ +{ + xfs_btree_block_t *b; /* btree block */ + xfs_buf_t *obp; /* old buffer pointer */ + + obp = cur->bc_bufs[lev]; + if (obp) + xfs_trans_brelse(cur->bc_tp, obp); + cur->bc_bufs[lev] = bp; + cur->bc_ra[lev] = 0; + if (!bp) + return; + b = XFS_BUF_TO_BLOCK(bp); + if (XFS_BTREE_LONG_PTRS(cur->bc_btnum)) { + if (INT_GET(b->bb_u.l.bb_leftsib, ARCH_CONVERT) == NULLDFSBNO) + cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; + if (INT_GET(b->bb_u.l.bb_rightsib, ARCH_CONVERT) == NULLDFSBNO) + cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; + } else { + if (INT_GET(b->bb_u.s.bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK) + cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; + if (INT_GET(b->bb_u.s.bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK) + cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; + } +} diff --git a/libxfs/xfs_da_btree.c b/libxfs/xfs_da_btree.c new file mode 100644 index 000000000..37ad6269d --- /dev/null +++ b/libxfs/xfs_da_btree.c @@ -0,0 +1,2524 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include + +/* + * xfs_da_btree.c + * + * Routines to implement directories as Btrees of hashed names. + */ + + +/*======================================================================== + * Routines used for growing the Btree. + *========================================================================*/ + +/* + * Create the initial contents of an intermediate node. + */ +int +xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level, + xfs_dabuf_t **bpp, int whichfork) +{ + xfs_da_intnode_t *node; + xfs_dabuf_t *bp; + int error; + xfs_trans_t *tp; + + tp = args->trans; + error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork); + if (error) + return(error); + ASSERT(bp != NULL); + node = bp->data; + INT_ZERO(node->hdr.info.forw, ARCH_CONVERT); + INT_ZERO(node->hdr.info.back, ARCH_CONVERT); + INT_SET(node->hdr.info.magic, ARCH_CONVERT, XFS_DA_NODE_MAGIC); + INT_ZERO(node->hdr.info.pad, ARCH_CONVERT); + INT_ZERO(node->hdr.count, ARCH_CONVERT); + INT_SET(node->hdr.level, ARCH_CONVERT, level); + + xfs_da_log_buf(tp, bp, + XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr))); + + *bpp = bp; + return(0); +} + +/* + * Split a leaf node, rebalance, then possibly split + * intermediate nodes, rebalance, etc. + */ +int /* error */ +xfs_da_split(xfs_da_state_t *state) +{ + xfs_da_state_blk_t *oldblk, *newblk, *addblk; + xfs_da_intnode_t *node; + xfs_dabuf_t *bp; + int max, action, error, i; + + /* + * Walk back up the tree splitting/inserting/adjusting as necessary. + * If we need to insert and there isn't room, split the node, then + * decide which fragment to insert the new block from below into. + * Note that we may split the root this way, but we need more fixup. + */ + max = state->path.active - 1; + ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH)); + ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC || + state->path.blk[max].magic == XFS_DIRX_LEAF_MAGIC(state->mp)); + + addblk = &state->path.blk[max]; /* initial dummy value */ + for (i = max; (i >= 0) && addblk; state->path.active--, i--) { + oldblk = &state->path.blk[i]; + newblk = &state->altpath.blk[i]; + + /* + * If a leaf node then + * Allocate a new leaf node, then rebalance across them. + * else if an intermediate node then + * We split on the last layer, must we split the node? + */ + switch (oldblk->magic) { + case XFS_ATTR_LEAF_MAGIC: +#ifndef __KERNEL__ + return(ENOTTY); +#else + error = xfs_attr_leaf_split(state, oldblk, newblk); + if ((error != 0) && (error != ENOSPC)) { + return(error); /* GROT: attr is inconsistent */ + } + if (!error) { + addblk = newblk; + break; + } + /* + * Entry wouldn't fit, split the leaf again. + */ + state->extravalid = 1; + if (state->inleaf) { + state->extraafter = 0; /* before newblk */ + error = xfs_attr_leaf_split(state, oldblk, + &state->extrablk); + } else { + state->extraafter = 1; /* after newblk */ + error = xfs_attr_leaf_split(state, newblk, + &state->extrablk); + } + if (error) + return(error); /* GROT: attr inconsistent */ + addblk = newblk; + break; +#endif + case XFS_DIR_LEAF_MAGIC: + ASSERT(XFS_DIR_IS_V1(state->mp)); + error = xfs_dir_leaf_split(state, oldblk, newblk); + if ((error != 0) && (error != ENOSPC)) { + return(error); /* GROT: dir is inconsistent */ + } + if (!error) { + addblk = newblk; + break; + } + /* + * Entry wouldn't fit, split the leaf again. + */ + state->extravalid = 1; + if (state->inleaf) { + state->extraafter = 0; /* before newblk */ + error = xfs_dir_leaf_split(state, oldblk, + &state->extrablk); + if (error) + return(error); /* GROT: dir incon. */ + addblk = newblk; + } else { + state->extraafter = 1; /* after newblk */ + error = xfs_dir_leaf_split(state, newblk, + &state->extrablk); + if (error) + return(error); /* GROT: dir incon. */ + addblk = newblk; + } + break; + case XFS_DIR2_LEAFN_MAGIC: + ASSERT(XFS_DIR_IS_V2(state->mp)); + error = xfs_dir2_leafn_split(state, oldblk, newblk); + if (error) + return error; + addblk = newblk; + break; + case XFS_DA_NODE_MAGIC: + error = xfs_da_node_split(state, oldblk, newblk, addblk, + max - i, &action); + xfs_da_buf_done(addblk->bp); + addblk->bp = NULL; + if (error) + return(error); /* GROT: dir is inconsistent */ + /* + * Record the newly split block for the next time thru? + */ + if (action) + addblk = newblk; + else + addblk = NULL; + break; + } + + /* + * Update the btree to show the new hashval for this child. + */ + xfs_da_fixhashpath(state, &state->path); + /* + * If we won't need this block again, it's getting dropped + * from the active path by the loop control, so we need + * to mark it done now. + */ + if (i > 0 || !addblk) + xfs_da_buf_done(oldblk->bp); + } + if (!addblk) + return(0); + + /* + * Split the root node. + */ + ASSERT(state->path.active == 0); + oldblk = &state->path.blk[0]; + error = xfs_da_root_split(state, oldblk, addblk); + if (error) { + xfs_da_buf_done(oldblk->bp); + xfs_da_buf_done(addblk->bp); + addblk->bp = NULL; + return(error); /* GROT: dir is inconsistent */ + } + + /* + * Update pointers to the node which used to be block 0 and + * just got bumped because of the addition of a new root node. + * There might be three blocks involved if a double split occurred, + * and the original block 0 could be at any position in the list. + */ + + node = oldblk->bp->data; + if (!INT_ISZERO(node->hdr.info.forw, ARCH_CONVERT)) { + if (INT_GET(node->hdr.info.forw, ARCH_CONVERT) == addblk->blkno) { + bp = addblk->bp; + } else { + ASSERT(state->extravalid); + bp = state->extrablk.bp; + } + node = bp->data; + INT_SET(node->hdr.info.back, ARCH_CONVERT, oldblk->blkno); + xfs_da_log_buf(state->args->trans, bp, + XFS_DA_LOGRANGE(node, &node->hdr.info, + sizeof(node->hdr.info))); + } + node = oldblk->bp->data; + if (INT_GET(node->hdr.info.back, ARCH_CONVERT)) { + if (INT_GET(node->hdr.info.back, ARCH_CONVERT) == addblk->blkno) { + bp = addblk->bp; + } else { + ASSERT(state->extravalid); + bp = state->extrablk.bp; + } + node = bp->data; + INT_SET(node->hdr.info.forw, ARCH_CONVERT, oldblk->blkno); + xfs_da_log_buf(state->args->trans, bp, + XFS_DA_LOGRANGE(node, &node->hdr.info, + sizeof(node->hdr.info))); + } + xfs_da_buf_done(oldblk->bp); + xfs_da_buf_done(addblk->bp); + addblk->bp = NULL; + return(0); +} + +/* + * Split the root. We have to create a new root and point to the two + * parts (the split old root) that we just created. Copy block zero to + * the EOF, extending the inode in process. + */ +STATIC int /* error */ +xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, + xfs_da_state_blk_t *blk2) +{ + xfs_da_intnode_t *node, *oldroot; + xfs_da_args_t *args; + xfs_dablk_t blkno; + xfs_dabuf_t *bp; + int error, size; + xfs_inode_t *dp; + xfs_trans_t *tp; + xfs_mount_t *mp; + xfs_dir2_leaf_t *leaf; + + /* + * Copy the existing (incorrect) block from the root node position + * to a free space somewhere. + */ + args = state->args; + ASSERT(args != NULL); + error = xfs_da_grow_inode(args, &blkno); + if (error) + return(error); + dp = args->dp; + tp = args->trans; + mp = state->mp; + error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, args->whichfork); + if (error) + return(error); + ASSERT(bp != NULL); + node = bp->data; + oldroot = blk1->bp->data; + if (INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) { + size = (int)((char *)&oldroot->btree[INT_GET(oldroot->hdr.count, ARCH_CONVERT)] - + (char *)oldroot); + } else { + ASSERT(XFS_DIR_IS_V2(mp)); + ASSERT(INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC); + leaf = (xfs_dir2_leaf_t *)oldroot; + size = (int)((char *)&leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT)] - + (char *)leaf); + } + bcopy(oldroot, node, size); + xfs_da_log_buf(tp, bp, 0, size - 1); + xfs_da_buf_done(blk1->bp); + blk1->bp = bp; + blk1->blkno = blkno; + + /* + * Set up the new root node. + */ + error = xfs_da_node_create(args, + args->whichfork == XFS_DATA_FORK && + XFS_DIR_IS_V2(mp) ? mp->m_dirleafblk : 0, + INT_GET(node->hdr.level, ARCH_CONVERT) + 1, &bp, args->whichfork); + if (error) + return(error); + node = bp->data; + INT_SET(node->btree[0].hashval, ARCH_CONVERT, blk1->hashval); + INT_SET(node->btree[0].before, ARCH_CONVERT, blk1->blkno); + INT_SET(node->btree[1].hashval, ARCH_CONVERT, blk2->hashval); + INT_SET(node->btree[1].before, ARCH_CONVERT, blk2->blkno); + INT_SET(node->hdr.count, ARCH_CONVERT, 2); + if (XFS_DIR_IS_V2(mp)) { + ASSERT(blk1->blkno >= mp->m_dirleafblk && + blk1->blkno < mp->m_dirfreeblk); + ASSERT(blk2->blkno >= mp->m_dirleafblk && + blk2->blkno < mp->m_dirfreeblk); + } + /* Header is already logged by xfs_da_node_create */ + xfs_da_log_buf(tp, bp, + XFS_DA_LOGRANGE(node, node->btree, + sizeof(xfs_da_node_entry_t) * 2)); + xfs_da_buf_done(bp); + + return(0); +} + +/* + * Split the node, rebalance, then add the new entry. + */ +STATIC int /* error */ +xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, + xfs_da_state_blk_t *newblk, + xfs_da_state_blk_t *addblk, + int treelevel, int *result) +{ + xfs_da_intnode_t *node; + xfs_dablk_t blkno; + int newcount, error; + int useextra; + + node = oldblk->bp->data; + ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC); + + /* + * With V2 the extra block is data or freespace. + */ + useextra = state->extravalid && XFS_DIR_IS_V1(state->mp); + newcount = 1 + useextra; + /* + * Do we have to split the node? + */ + if ((INT_GET(node->hdr.count, ARCH_CONVERT) + newcount) > XFS_DA_NODE_ENTRIES(state->mp)) { + /* + * Allocate a new node, add to the doubly linked chain of + * nodes, then move some of our excess entries into it. + */ + error = xfs_da_grow_inode(state->args, &blkno); + if (error) + return(error); /* GROT: dir is inconsistent */ + + error = xfs_da_node_create(state->args, blkno, treelevel, + &newblk->bp, state->args->whichfork); + if (error) + return(error); /* GROT: dir is inconsistent */ + newblk->blkno = blkno; + newblk->magic = XFS_DA_NODE_MAGIC; + xfs_da_node_rebalance(state, oldblk, newblk); + error = xfs_da_blk_link(state, oldblk, newblk); + if (error) + return(error); + *result = 1; + } else { + *result = 0; + } + + /* + * Insert the new entry(s) into the correct block + * (updating last hashval in the process). + * + * xfs_da_node_add() inserts BEFORE the given index, + * and as a result of using node_lookup_int() we always + * point to a valid entry (not after one), but a split + * operation always results in a new block whose hashvals + * FOLLOW the current block. + * + * If we had double-split op below us, then add the extra block too. + */ + node = oldblk->bp->data; + if (oldblk->index <= INT_GET(node->hdr.count, ARCH_CONVERT)) { + oldblk->index++; + xfs_da_node_add(state, oldblk, addblk); + if (useextra) { + if (state->extraafter) + oldblk->index++; + xfs_da_node_add(state, oldblk, &state->extrablk); + state->extravalid = 0; + } + } else { + newblk->index++; + xfs_da_node_add(state, newblk, addblk); + if (useextra) { + if (state->extraafter) + newblk->index++; + xfs_da_node_add(state, newblk, &state->extrablk); + state->extravalid = 0; + } + } + + return(0); +} + +/* + * Balance the btree elements between two intermediate nodes, + * usually one full and one empty. + * + * NOTE: if blk2 is empty, then it will get the upper half of blk1. + */ +STATIC void +xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, + xfs_da_state_blk_t *blk2) +{ + xfs_da_intnode_t *node1, *node2, *tmpnode; + xfs_da_node_entry_t *btree_s, *btree_d; + int count, tmp; + xfs_trans_t *tp; + + node1 = blk1->bp->data; + node2 = blk2->bp->data; + /* + * Figure out how many entries need to move, and in which direction. + * Swap the nodes around if that makes it simpler. + */ + if ((INT_GET(node1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(node2->hdr.count, ARCH_CONVERT) > 0) && + ((INT_GET(node2->btree[ 0 ].hashval, ARCH_CONVERT) < INT_GET(node1->btree[ 0 ].hashval, ARCH_CONVERT)) || + (INT_GET(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) < + INT_GET(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))) { + tmpnode = node1; + node1 = node2; + node2 = tmpnode; + } + ASSERT(INT_GET(node1->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC); + ASSERT(INT_GET(node2->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC); + count = (INT_GET(node1->hdr.count, ARCH_CONVERT) - INT_GET(node2->hdr.count, ARCH_CONVERT)) / 2; + if (count == 0) + return; + tp = state->args->trans; + /* + * Two cases: high-to-low and low-to-high. + */ + if (count > 0) { + /* + * Move elements in node2 up to make a hole. + */ + if ((tmp = INT_GET(node2->hdr.count, ARCH_CONVERT)) > 0) { + tmp *= (uint)sizeof(xfs_da_node_entry_t); + btree_s = &node2->btree[0]; + btree_d = &node2->btree[count]; + ovbcopy(btree_s, btree_d, tmp); + } + + /* + * Move the req'd B-tree elements from high in node1 to + * low in node2. + */ + INT_MOD(node2->hdr.count, ARCH_CONVERT, count); + tmp = count * (uint)sizeof(xfs_da_node_entry_t); + btree_s = &node1->btree[INT_GET(node1->hdr.count, ARCH_CONVERT) - count]; + btree_d = &node2->btree[0]; + bcopy(btree_s, btree_d, tmp); + INT_MOD(node1->hdr.count, ARCH_CONVERT, -(count)); + + } else { + /* + * Move the req'd B-tree elements from low in node2 to + * high in node1. + */ + count = -count; + tmp = count * (uint)sizeof(xfs_da_node_entry_t); + btree_s = &node2->btree[0]; + btree_d = &node1->btree[INT_GET(node1->hdr.count, ARCH_CONVERT)]; + bcopy(btree_s, btree_d, tmp); + INT_MOD(node1->hdr.count, ARCH_CONVERT, count); + xfs_da_log_buf(tp, blk1->bp, + XFS_DA_LOGRANGE(node1, btree_d, tmp)); + + /* + * Move elements in node2 down to fill the hole. + */ + tmp = INT_GET(node2->hdr.count, ARCH_CONVERT) - count; + tmp *= (uint)sizeof(xfs_da_node_entry_t); + btree_s = &node2->btree[count]; + btree_d = &node2->btree[0]; + ovbcopy(btree_s, btree_d, tmp); + INT_MOD(node2->hdr.count, ARCH_CONVERT, -(count)); + } + + /* + * Log header of node 1 and all current bits of node 2. + */ + xfs_da_log_buf(tp, blk1->bp, + XFS_DA_LOGRANGE(node1, &node1->hdr, sizeof(node1->hdr))); + xfs_da_log_buf(tp, blk2->bp, + XFS_DA_LOGRANGE(node2, &node2->hdr, + sizeof(node2->hdr) + + sizeof(node2->btree[0]) * INT_GET(node2->hdr.count, ARCH_CONVERT))); + + /* + * Record the last hashval from each block for upward propagation. + * (note: don't use the swapped node pointers) + */ + node1 = blk1->bp->data; + node2 = blk2->bp->data; + blk1->hashval = INT_GET(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT); + blk2->hashval = INT_GET(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT); + + /* + * Adjust the expected index for insertion. + */ + if (blk1->index >= INT_GET(node1->hdr.count, ARCH_CONVERT)) { + blk2->index = blk1->index - INT_GET(node1->hdr.count, ARCH_CONVERT); + blk1->index = INT_GET(node1->hdr.count, ARCH_CONVERT) + 1; /* make it invalid */ + } +} + +/* + * Add a new entry to an intermediate node. + */ +STATIC void +xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, + xfs_da_state_blk_t *newblk) +{ + xfs_da_intnode_t *node; + xfs_da_node_entry_t *btree; + int tmp; + xfs_mount_t *mp; + + node = oldblk->bp->data; + mp = state->mp; + ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC); + ASSERT((oldblk->index >= 0) && (oldblk->index <= INT_GET(node->hdr.count, ARCH_CONVERT))); + ASSERT(newblk->blkno != 0); + if (state->args->whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(mp)) + ASSERT(newblk->blkno >= mp->m_dirleafblk && + newblk->blkno < mp->m_dirfreeblk); + + /* + * We may need to make some room before we insert the new node. + */ + tmp = 0; + btree = &node->btree[ oldblk->index ]; + if (oldblk->index < INT_GET(node->hdr.count, ARCH_CONVERT)) { + tmp = (INT_GET(node->hdr.count, ARCH_CONVERT) - oldblk->index) * (uint)sizeof(*btree); + ovbcopy(btree, btree + 1, tmp); + } + INT_SET(btree->hashval, ARCH_CONVERT, newblk->hashval); + INT_SET(btree->before, ARCH_CONVERT, newblk->blkno); + xfs_da_log_buf(state->args->trans, oldblk->bp, + XFS_DA_LOGRANGE(node, btree, tmp + sizeof(*btree))); + INT_MOD(node->hdr.count, ARCH_CONVERT, +1); + xfs_da_log_buf(state->args->trans, oldblk->bp, + XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr))); + + /* + * Copy the last hash value from the oldblk to propagate upwards. + */ + oldblk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT); +} + +/*======================================================================== + * Routines used for shrinking the Btree. + *========================================================================*/ + +/* + * Deallocate an empty leaf node, remove it from its parent, + * possibly deallocating that block, etc... + */ +int +xfs_da_join(xfs_da_state_t *state) +{ + xfs_da_state_blk_t *drop_blk, *save_blk; + int action, error; + + action = 0; + drop_blk = &state->path.blk[ state->path.active-1 ]; + save_blk = &state->altpath.blk[ state->path.active-1 ]; + ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC); + ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC || + drop_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp)); + + /* + * Walk back up the tree joining/deallocating as necessary. + * When we stop dropping blocks, break out. + */ + for ( ; state->path.active >= 2; drop_blk--, save_blk--, + state->path.active--) { + /* + * See if we can combine the block with a neighbor. + * (action == 0) => no options, just leave + * (action == 1) => coalesce, then unlink + * (action == 2) => block empty, unlink it + */ + switch (drop_blk->magic) { + case XFS_ATTR_LEAF_MAGIC: +#ifndef __KERNEL__ + error = ENOTTY; +#else + error = xfs_attr_leaf_toosmall(state, &action); +#endif + if (error) + return(error); + if (action == 0) + return(0); +#ifdef __KERNEL__ + xfs_attr_leaf_unbalance(state, drop_blk, save_blk); +#endif + break; + case XFS_DIR_LEAF_MAGIC: + ASSERT(XFS_DIR_IS_V1(state->mp)); + error = xfs_dir_leaf_toosmall(state, &action); + if (error) + return(error); + if (action == 0) + return(0); + xfs_dir_leaf_unbalance(state, drop_blk, save_blk); + break; + case XFS_DIR2_LEAFN_MAGIC: + ASSERT(XFS_DIR_IS_V2(state->mp)); + error = xfs_dir2_leafn_toosmall(state, &action); + if (error) + return error; + if (action == 0) + return 0; + xfs_dir2_leafn_unbalance(state, drop_blk, save_blk); + break; + case XFS_DA_NODE_MAGIC: + /* + * Remove the offending node, fixup hashvals, + * check for a toosmall neighbor. + */ + xfs_da_node_remove(state, drop_blk); + xfs_da_fixhashpath(state, &state->path); + error = xfs_da_node_toosmall(state, &action); + if (error) + return(error); + if (action == 0) + return 0; + xfs_da_node_unbalance(state, drop_blk, save_blk); + break; + } + xfs_da_fixhashpath(state, &state->altpath); + error = xfs_da_blk_unlink(state, drop_blk, save_blk); + xfs_da_state_kill_altpath(state); + if (error) + return(error); + error = xfs_da_shrink_inode(state->args, drop_blk->blkno, + drop_blk->bp); + drop_blk->bp = NULL; + if (error) + return(error); + } + /* + * We joined all the way to the top. If it turns out that + * we only have one entry in the root, make the child block + * the new root. + */ + xfs_da_node_remove(state, drop_blk); + xfs_da_fixhashpath(state, &state->path); + error = xfs_da_root_join(state, &state->path.blk[0]); + return(error); +} + +/* + * We have only one entry in the root. Copy the only remaining child of + * the old root to block 0 as the new root node. + */ +STATIC int +xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk) +{ + xfs_da_intnode_t *oldroot; + /* REFERENCED */ + xfs_da_blkinfo_t *blkinfo; + xfs_da_args_t *args; + xfs_dablk_t child; + xfs_dabuf_t *bp; + int error; + + args = state->args; + ASSERT(args != NULL); + ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC); + oldroot = root_blk->bp->data; + ASSERT(INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC); + ASSERT(INT_ISZERO(oldroot->hdr.info.forw, ARCH_CONVERT)); + ASSERT(INT_ISZERO(oldroot->hdr.info.back, ARCH_CONVERT)); + + /* + * If the root has more than one child, then don't do anything. + */ + if (INT_GET(oldroot->hdr.count, ARCH_CONVERT) > 1) + return(0); + + /* + * Read in the (only) child block, then copy those bytes into + * the root block's buffer and free the original child block. + */ + child = INT_GET(oldroot->btree[ 0 ].before, ARCH_CONVERT); + ASSERT(child != 0); + error = xfs_da_read_buf(args->trans, args->dp, child, -1, &bp, + args->whichfork); + if (error) + return(error); + ASSERT(bp != NULL); + blkinfo = bp->data; + if (INT_GET(oldroot->hdr.level, ARCH_CONVERT) == 1) { + ASSERT(INT_GET(blkinfo->magic, ARCH_CONVERT) == XFS_DIRX_LEAF_MAGIC(state->mp) || + INT_GET(blkinfo->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC); + } else { + ASSERT(INT_GET(blkinfo->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC); + } + ASSERT(INT_GET(blkinfo->forw, ARCH_CONVERT) == 0); + ASSERT(INT_GET(blkinfo->back, ARCH_CONVERT) == 0); + bcopy(bp->data, root_blk->bp->data, state->blocksize); + xfs_da_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1); + error = xfs_da_shrink_inode(args, child, bp); + return(error); +} + +/* + * Check a node block and its neighbors to see if the block should be + * collapsed into one or the other neighbor. Always keep the block + * with the smaller block number. + * If the current block is over 50% full, don't try to join it, return 0. + * If the block is empty, fill in the state structure and return 2. + * If it can be collapsed, fill in the state structure and return 1. + * If nothing can be done, return 0. + */ +STATIC int +xfs_da_node_toosmall(xfs_da_state_t *state, int *action) +{ + xfs_da_intnode_t *node; + xfs_da_state_blk_t *blk; + xfs_da_blkinfo_t *info; + int count, forward, error, retval, i; + xfs_dablk_t blkno; + xfs_dabuf_t *bp; + + /* + * Check for the degenerate case of the block being over 50% full. + * If so, it's not worth even looking to see if we might be able + * to coalesce with a sibling. + */ + blk = &state->path.blk[ state->path.active-1 ]; + info = blk->bp->data; + ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC); + node = (xfs_da_intnode_t *)info; + count = INT_GET(node->hdr.count, ARCH_CONVERT); + if (count > (XFS_DA_NODE_ENTRIES(state->mp) >> 1)) { + *action = 0; /* blk over 50%, dont try to join */ + return(0); /* blk over 50%, dont try to join */ + } + + /* + * Check for the degenerate case of the block being empty. + * If the block is empty, we'll simply delete it, no need to + * coalesce it with a sibling block. We choose (aribtrarily) + * to merge with the forward block unless it is NULL. + */ + if (count == 0) { + /* + * Make altpath point to the block we want to keep and + * path point to the block we want to drop (this one). + */ + forward = (!INT_ISZERO(info->forw, ARCH_CONVERT)); + bcopy(&state->path, &state->altpath, sizeof(state->path)); + error = xfs_da_path_shift(state, &state->altpath, forward, + 0, &retval); + if (error) + return(error); + if (retval) { + *action = 0; + } else { + *action = 2; + } + return(0); + } + + /* + * Examine each sibling block to see if we can coalesce with + * at least 25% free space to spare. We need to figure out + * whether to merge with the forward or the backward block. + * We prefer coalescing with the lower numbered sibling so as + * to shrink a directory over time. + */ + /* start with smaller blk num */ + forward = (INT_GET(info->forw, ARCH_CONVERT) + < INT_GET(info->back, ARCH_CONVERT)); + for (i = 0; i < 2; forward = !forward, i++) { + if (forward) + blkno = INT_GET(info->forw, ARCH_CONVERT); + else + blkno = INT_GET(info->back, ARCH_CONVERT); + if (blkno == 0) + continue; + error = xfs_da_read_buf(state->args->trans, state->args->dp, + blkno, -1, &bp, state->args->whichfork); + if (error) + return(error); + ASSERT(bp != NULL); + + node = (xfs_da_intnode_t *)info; + count = XFS_DA_NODE_ENTRIES(state->mp); + count -= XFS_DA_NODE_ENTRIES(state->mp) >> 2; + count -= INT_GET(node->hdr.count, ARCH_CONVERT); + node = bp->data; + ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC); + count -= INT_GET(node->hdr.count, ARCH_CONVERT); + xfs_da_brelse(state->args->trans, bp); + if (count >= 0) + break; /* fits with at least 25% to spare */ + } + if (i >= 2) { + *action = 0; + return(0); + } + + /* + * Make altpath point to the block we want to keep (the lower + * numbered block) and path point to the block we want to drop. + */ + bcopy(&state->path, &state->altpath, sizeof(state->path)); + if (blkno < blk->blkno) { + error = xfs_da_path_shift(state, &state->altpath, forward, + 0, &retval); + if (error) { + return(error); + } + if (retval) { + *action = 0; + return(0); + } + } else { + error = xfs_da_path_shift(state, &state->path, forward, + 0, &retval); + if (error) { + return(error); + } + if (retval) { + *action = 0; + return(0); + } + } + *action = 1; + return(0); +} + + +/* + * Walk back up the tree adjusting hash values as necessary, + * when we stop making changes, return. + */ +void +xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path) +{ + xfs_da_state_blk_t *blk; + xfs_da_intnode_t *node; + xfs_da_node_entry_t *btree; + xfs_dahash_t lasthash; + int level, count; + + level = path->active-1; + blk = &path->blk[ level ]; + switch (blk->magic) { +#ifdef __KERNEL__ + case XFS_ATTR_LEAF_MAGIC: + lasthash = xfs_attr_leaf_lasthash(blk->bp, &count); + if (count == 0) + return; + break; +#endif + case XFS_DIR_LEAF_MAGIC: + ASSERT(XFS_DIR_IS_V1(state->mp)); + lasthash = xfs_dir_leaf_lasthash(blk->bp, &count); + if (count == 0) + return; + break; + case XFS_DIR2_LEAFN_MAGIC: + ASSERT(XFS_DIR_IS_V2(state->mp)); + lasthash = xfs_dir2_leafn_lasthash(blk->bp, &count); + if (count == 0) + return; + break; + case XFS_DA_NODE_MAGIC: + lasthash = xfs_da_node_lasthash(blk->bp, &count); + if (count == 0) + return; + break; + } + for (blk--, level--; level >= 0; blk--, level--) { + node = blk->bp->data; + ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC); + btree = &node->btree[ blk->index ]; + if (INT_GET(btree->hashval, ARCH_CONVERT) == lasthash) + break; + blk->hashval = lasthash; + INT_SET(btree->hashval, ARCH_CONVERT, lasthash); + xfs_da_log_buf(state->args->trans, blk->bp, + XFS_DA_LOGRANGE(node, btree, sizeof(*btree))); + + lasthash = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT); + } +} + + + +/* + * Remove an entry from an intermediate node. + */ +STATIC void +xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk) +{ + xfs_da_intnode_t *node; + xfs_da_node_entry_t *btree; + int tmp; + + node = drop_blk->bp->data; + ASSERT(drop_blk->index < INT_GET(node->hdr.count, ARCH_CONVERT)); + ASSERT(drop_blk->index >= 0); + + /* + * Copy over the offending entry, or just zero it out. + */ + btree = &node->btree[drop_blk->index]; + if (drop_blk->index < (INT_GET(node->hdr.count, ARCH_CONVERT)-1)) { + tmp = INT_GET(node->hdr.count, ARCH_CONVERT) - drop_blk->index - 1; + tmp *= (uint)sizeof(xfs_da_node_entry_t); + ovbcopy(btree + 1, btree, tmp); + xfs_da_log_buf(state->args->trans, drop_blk->bp, + XFS_DA_LOGRANGE(node, btree, tmp)); + btree = &node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ]; + } + bzero((char *)btree, sizeof(xfs_da_node_entry_t)); + xfs_da_log_buf(state->args->trans, drop_blk->bp, + XFS_DA_LOGRANGE(node, btree, sizeof(*btree))); + INT_MOD(node->hdr.count, ARCH_CONVERT, -1); + xfs_da_log_buf(state->args->trans, drop_blk->bp, + XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr))); + + /* + * Copy the last hash value from the block to propagate upwards. + */ + btree--; + drop_blk->hashval = INT_GET(btree->hashval, ARCH_CONVERT); +} + +/* + * Unbalance the btree elements between two intermediate nodes, + * move all Btree elements from one node into another. + */ +STATIC void +xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, + xfs_da_state_blk_t *save_blk) +{ + xfs_da_intnode_t *drop_node, *save_node; + xfs_da_node_entry_t *btree; + int tmp; + xfs_trans_t *tp; + + drop_node = drop_blk->bp->data; + save_node = save_blk->bp->data; + ASSERT(INT_GET(drop_node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC); + ASSERT(INT_GET(save_node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC); + tp = state->args->trans; + + /* + * If the dying block has lower hashvals, then move all the + * elements in the remaining block up to make a hole. + */ + if ((INT_GET(drop_node->btree[ 0 ].hashval, ARCH_CONVERT) < INT_GET(save_node->btree[ 0 ].hashval, ARCH_CONVERT)) || + (INT_GET(drop_node->btree[ INT_GET(drop_node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) < + INT_GET(save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT))) + { + btree = &save_node->btree[ INT_GET(drop_node->hdr.count, ARCH_CONVERT) ]; + tmp = INT_GET(save_node->hdr.count, ARCH_CONVERT) * (uint)sizeof(xfs_da_node_entry_t); + ovbcopy(&save_node->btree[0], btree, tmp); + btree = &save_node->btree[0]; + xfs_da_log_buf(tp, save_blk->bp, + XFS_DA_LOGRANGE(save_node, btree, + (INT_GET(save_node->hdr.count, ARCH_CONVERT) + INT_GET(drop_node->hdr.count, ARCH_CONVERT)) * + sizeof(xfs_da_node_entry_t))); + } else { + btree = &save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT) ]; + xfs_da_log_buf(tp, save_blk->bp, + XFS_DA_LOGRANGE(save_node, btree, + INT_GET(drop_node->hdr.count, ARCH_CONVERT) * + sizeof(xfs_da_node_entry_t))); + } + + /* + * Move all the B-tree elements from drop_blk to save_blk. + */ + tmp = INT_GET(drop_node->hdr.count, ARCH_CONVERT) * (uint)sizeof(xfs_da_node_entry_t); + bcopy(&drop_node->btree[0], btree, tmp); + INT_MOD(save_node->hdr.count, ARCH_CONVERT, INT_GET(drop_node->hdr.count, ARCH_CONVERT)); + + xfs_da_log_buf(tp, save_blk->bp, + XFS_DA_LOGRANGE(save_node, &save_node->hdr, + sizeof(save_node->hdr))); + + /* + * Save the last hashval in the remaining block for upward propagation. + */ + save_blk->hashval = INT_GET(save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT); +} + + +/*======================================================================== + * Routines used for finding things in the Btree. + *========================================================================*/ + +/* + * Walk down the Btree looking for a particular filename, filling + * in the state structure as we go. + * + * We will set the state structure to point to each of the elements + * in each of the nodes where either the hashval is or should be. + * + * We support duplicate hashval's so for each entry in the current + * node that could contain the desired hashval, descend. This is a + * pruned depth-first tree search. + */ +int /* error */ +xfs_da_node_lookup_int(xfs_da_state_t *state, int *result) +{ + xfs_da_state_blk_t *blk; + xfs_da_blkinfo_t *curr; + xfs_da_intnode_t *node; + xfs_da_node_entry_t *btree; + xfs_dablk_t blkno; + int probe, span, max, error, retval; + xfs_dahash_t hashval; + xfs_da_args_t *args; + + args = state->args; + /* + * Descend thru the B-tree searching each level for the right + * node to use, until the right hashval is found. + */ + if (args->whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(state->mp)) + blkno = state->mp->m_dirleafblk; + else + blkno = 0; + for (blk = &state->path.blk[0], state->path.active = 1; + state->path.active <= XFS_DA_NODE_MAXDEPTH; + blk++, state->path.active++) { + /* + * Read the next node down in the tree. + */ + blk->blkno = blkno; + error = xfs_da_read_buf(state->args->trans, state->args->dp, + blkno, -1, &blk->bp, + state->args->whichfork); + if (error) { + blk->blkno = 0; + state->path.active--; + return(error); + } + ASSERT(blk->bp != NULL); + curr = blk->bp->data; + ASSERT(INT_GET(curr->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC || + INT_GET(curr->magic, ARCH_CONVERT) == XFS_DIRX_LEAF_MAGIC(state->mp) || + INT_GET(curr->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC); + + /* + * Search an intermediate node for a match. + */ + blk->magic = INT_GET(curr->magic, ARCH_CONVERT); + if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) { + node = blk->bp->data; + blk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT); + + /* + * Binary search. (note: small blocks will skip loop) + */ + max = INT_GET(node->hdr.count, ARCH_CONVERT); + probe = span = max / 2; + hashval = state->args->hashval; + for (btree = &node->btree[probe]; span > 4; + btree = &node->btree[probe]) { + span /= 2; + if (INT_GET(btree->hashval, ARCH_CONVERT) < hashval) + probe += span; + else if (INT_GET(btree->hashval, ARCH_CONVERT) > hashval) + probe -= span; + else + break; + } + ASSERT((probe >= 0) && (probe < max)); + ASSERT((span <= 4) || (INT_GET(btree->hashval, ARCH_CONVERT) == hashval)); + + /* + * Since we may have duplicate hashval's, find the first + * matching hashval in the node. + */ + while ((probe > 0) && (INT_GET(btree->hashval, ARCH_CONVERT) >= hashval)) { + btree--; + probe--; + } + while ((probe < max) && (INT_GET(btree->hashval, ARCH_CONVERT) < hashval)) { + btree++; + probe++; + } + + /* + * Pick the right block to descend on. + */ + if (probe == max) { + blk->index = max-1; + blkno = INT_GET(node->btree[ max-1 ].before, ARCH_CONVERT); + } else { + blk->index = probe; + blkno = INT_GET(btree->before, ARCH_CONVERT); + } + } +#ifdef __KERNEL__ + else if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC) { + blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL); + break; + } +#endif + else if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC) { + blk->hashval = xfs_dir_leaf_lasthash(blk->bp, NULL); + break; + } + else if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC) { + blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL); + break; + } + } + + /* + * A leaf block that ends in the hashval that we are interested in + * (final hashval == search hashval) means that the next block may + * contain more entries with the same hashval, shift upward to the + * next leaf and keep searching. + */ + for (;;) { + if (blk->magic == XFS_DIR_LEAF_MAGIC) { + ASSERT(XFS_DIR_IS_V1(state->mp)); + retval = xfs_dir_leaf_lookup_int(blk->bp, state->args, + &blk->index); + } else if (blk->magic == XFS_DIR2_LEAFN_MAGIC) { + ASSERT(XFS_DIR_IS_V2(state->mp)); + retval = xfs_dir2_leafn_lookup_int(blk->bp, state->args, + &blk->index, state); + } +#ifdef __KERNEL__ + else if (blk->magic == XFS_ATTR_LEAF_MAGIC) { + retval = xfs_attr_leaf_lookup_int(blk->bp, state->args); + blk->index = state->args->index; + state->args->blkno = blk->blkno; + } +#endif + if (((retval == ENOENT) || (retval == ENOATTR)) && + (blk->hashval == state->args->hashval)) { + error = xfs_da_path_shift(state, &state->path, 1, 1, + &retval); + if (error) + return(error); + if (retval == 0) { + continue; + } +#ifdef __KERNEL__ + else if (blk->magic == XFS_ATTR_LEAF_MAGIC) { + /* path_shift() gives ENOENT */ + retval = XFS_ERROR(ENOATTR); + } +#endif + } + break; + } + *result = retval; + return(0); +} + + +/*======================================================================== + * Utility routines. + *========================================================================*/ + +/* + * Link a new block into a doubly linked list of blocks (of whatever type). + */ +int /* error */ +xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk, + xfs_da_state_blk_t *new_blk) +{ + xfs_da_blkinfo_t *old_info, *new_info, *tmp_info; + xfs_da_args_t *args; + int before, error; + xfs_dabuf_t *bp; + + /* + * Set up environment. + */ + args = state->args; + ASSERT(args != NULL); + old_info = old_blk->bp->data; + new_info = new_blk->bp->data; + ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC || + old_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp) || + old_blk->magic == XFS_ATTR_LEAF_MAGIC); + ASSERT(old_blk->magic == INT_GET(old_info->magic, ARCH_CONVERT)); + ASSERT(new_blk->magic == INT_GET(new_info->magic, ARCH_CONVERT)); + ASSERT(old_blk->magic == new_blk->magic); + + switch (old_blk->magic) { +#ifdef __KERNEL__ + case XFS_ATTR_LEAF_MAGIC: + before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp); + break; +#endif + case XFS_DIR_LEAF_MAGIC: + ASSERT(XFS_DIR_IS_V1(state->mp)); + before = xfs_dir_leaf_order(old_blk->bp, new_blk->bp); + break; + case XFS_DIR2_LEAFN_MAGIC: + ASSERT(XFS_DIR_IS_V2(state->mp)); + before = xfs_dir2_leafn_order(old_blk->bp, new_blk->bp); + break; + case XFS_DA_NODE_MAGIC: + before = xfs_da_node_order(old_blk->bp, new_blk->bp); + break; + } + + /* + * Link blocks in appropriate order. + */ + if (before) { + /* + * Link new block in before existing block. + */ + INT_SET(new_info->forw, ARCH_CONVERT, old_blk->blkno); + new_info->back = old_info->back; /* INT_: direct copy */ + if (INT_GET(old_info->back, ARCH_CONVERT)) { + error = xfs_da_read_buf(args->trans, args->dp, + INT_GET(old_info->back, + ARCH_CONVERT), -1, &bp, + args->whichfork); + if (error) + return(error); + ASSERT(bp != NULL); + tmp_info = bp->data; + ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT) == INT_GET(old_info->magic, ARCH_CONVERT)); + ASSERT(INT_GET(tmp_info->forw, ARCH_CONVERT) == old_blk->blkno); + INT_SET(tmp_info->forw, ARCH_CONVERT, new_blk->blkno); + xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1); + xfs_da_buf_done(bp); + } + INT_SET(old_info->back, ARCH_CONVERT, new_blk->blkno); + } else { + /* + * Link new block in after existing block. + */ + new_info->forw = old_info->forw; /* INT_: direct copy */ + INT_SET(new_info->back, ARCH_CONVERT, old_blk->blkno); + if (INT_GET(old_info->forw, ARCH_CONVERT)) { + error = xfs_da_read_buf(args->trans, args->dp, + INT_GET(old_info->forw, ARCH_CONVERT), -1, &bp, + args->whichfork); + if (error) + return(error); + ASSERT(bp != NULL); + tmp_info = bp->data; + ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT) + == INT_GET(old_info->magic, ARCH_CONVERT)); + ASSERT(INT_GET(tmp_info->back, ARCH_CONVERT) + == old_blk->blkno); + INT_SET(tmp_info->back, ARCH_CONVERT, new_blk->blkno); + xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1); + xfs_da_buf_done(bp); + } + INT_SET(old_info->forw, ARCH_CONVERT, new_blk->blkno); + } + + xfs_da_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1); + xfs_da_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1); + return(0); +} + + +/* + * Compare two intermediate nodes for "order". + */ +STATIC int +xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp) +{ + xfs_da_intnode_t *node1, *node2; + + node1 = node1_bp->data; + node2 = node2_bp->data; + ASSERT((INT_GET(node1->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) && + (INT_GET(node2->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC)); + if ((INT_GET(node1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(node2->hdr.count, ARCH_CONVERT) > 0) && + ((INT_GET(node2->btree[ 0 ].hashval, ARCH_CONVERT) < + INT_GET(node1->btree[ 0 ].hashval, ARCH_CONVERT)) || + (INT_GET(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) < + INT_GET(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))) { + return(1); + } + return(0); +} + + +/* + * Pick up the last hashvalue from an intermediate node. + */ +STATIC uint +xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count) +{ + xfs_da_intnode_t *node; + + node = bp->data; + ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC); + if (count) + *count = INT_GET(node->hdr.count, ARCH_CONVERT); + if (INT_GET(node->hdr.count, ARCH_CONVERT) == 0) + return(0); + return(INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)); +} + +/* + * Unlink a block from a doubly linked list of blocks. + */ +int /* error */ +xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, + xfs_da_state_blk_t *save_blk) +{ + xfs_da_blkinfo_t *drop_info, *save_info, *tmp_info; + xfs_da_args_t *args; + xfs_dabuf_t *bp; + int error; + + /* + * Set up environment. + */ + args = state->args; + ASSERT(args != NULL); + save_info = save_blk->bp->data; + drop_info = drop_blk->bp->data; + ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC || + save_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp) || + save_blk->magic == XFS_ATTR_LEAF_MAGIC); + ASSERT(save_blk->magic == INT_GET(save_info->magic, ARCH_CONVERT)); + ASSERT(drop_blk->magic == INT_GET(drop_info->magic, ARCH_CONVERT)); + ASSERT(save_blk->magic == drop_blk->magic); + ASSERT((INT_GET(save_info->forw, ARCH_CONVERT) == drop_blk->blkno) || + (INT_GET(save_info->back, ARCH_CONVERT) == drop_blk->blkno)); + ASSERT((INT_GET(drop_info->forw, ARCH_CONVERT) == save_blk->blkno) || + (INT_GET(drop_info->back, ARCH_CONVERT) == save_blk->blkno)); + + /* + * Unlink the leaf block from the doubly linked chain of leaves. + */ + if (INT_GET(save_info->back, ARCH_CONVERT) == drop_blk->blkno) { + save_info->back = drop_info->back; /* INT_: direct copy */ + if (INT_GET(drop_info->back, ARCH_CONVERT)) { + error = xfs_da_read_buf(args->trans, args->dp, + INT_GET(drop_info->back, + ARCH_CONVERT), -1, &bp, + args->whichfork); + if (error) + return(error); + ASSERT(bp != NULL); + tmp_info = bp->data; + ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT) == INT_GET(save_info->magic, ARCH_CONVERT)); + ASSERT(INT_GET(tmp_info->forw, ARCH_CONVERT) == drop_blk->blkno); + INT_SET(tmp_info->forw, ARCH_CONVERT, save_blk->blkno); + xfs_da_log_buf(args->trans, bp, 0, + sizeof(*tmp_info) - 1); + xfs_da_buf_done(bp); + } + } else { + save_info->forw = drop_info->forw; /* INT_: direct copy */ + if (INT_GET(drop_info->forw, ARCH_CONVERT)) { + error = xfs_da_read_buf(args->trans, args->dp, + INT_GET(drop_info->forw, ARCH_CONVERT), -1, &bp, + args->whichfork); + if (error) + return(error); + ASSERT(bp != NULL); + tmp_info = bp->data; + ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT) + == INT_GET(save_info->magic, ARCH_CONVERT)); + ASSERT(INT_GET(tmp_info->back, ARCH_CONVERT) + == drop_blk->blkno); + INT_SET(tmp_info->back, ARCH_CONVERT, save_blk->blkno); + xfs_da_log_buf(args->trans, bp, 0, + sizeof(*tmp_info) - 1); + xfs_da_buf_done(bp); + } + } + + xfs_da_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1); + return(0); +} + +/* + * Move a path "forward" or "!forward" one block at the current level. + * + * This routine will adjust a "path" to point to the next block + * "forward" (higher hashvalues) or "!forward" (lower hashvals) in the + * Btree, including updating pointers to the intermediate nodes between + * the new bottom and the root. + */ +int /* error */ +xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path, + int forward, int release, int *result) +{ + xfs_da_state_blk_t *blk; + xfs_da_blkinfo_t *info; + xfs_da_intnode_t *node; + xfs_da_args_t *args; + xfs_dablk_t blkno; + int level, error; + + /* + * Roll up the Btree looking for the first block where our + * current index is not at the edge of the block. Note that + * we skip the bottom layer because we want the sibling block. + */ + args = state->args; + ASSERT(args != NULL); + ASSERT(path != NULL); + ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); + level = (path->active-1) - 1; /* skip bottom layer in path */ + for (blk = &path->blk[level]; level >= 0; blk--, level--) { + ASSERT(blk->bp != NULL); + node = blk->bp->data; + ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC); + if (forward && (blk->index < INT_GET(node->hdr.count, ARCH_CONVERT)-1)) { + blk->index++; + blkno = INT_GET(node->btree[ blk->index ].before, ARCH_CONVERT); + break; + } else if (!forward && (blk->index > 0)) { + blk->index--; + blkno = INT_GET(node->btree[ blk->index ].before, ARCH_CONVERT); + break; + } + } + if (level < 0) { + *result = XFS_ERROR(ENOENT); /* we're out of our tree */ + ASSERT(args->oknoent); + return(0); + } + + /* + * Roll down the edge of the subtree until we reach the + * same depth we were at originally. + */ + for (blk++, level++; level < path->active; blk++, level++) { + /* + * Release the old block. + * (if it's dirty, trans won't actually let go) + */ + if (release) + xfs_da_brelse(args->trans, blk->bp); + + /* + * Read the next child block. + */ + blk->blkno = blkno; + error = xfs_da_read_buf(args->trans, args->dp, blkno, -1, + &blk->bp, args->whichfork); + if (error) + return(error); + ASSERT(blk->bp != NULL); + info = blk->bp->data; + ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC || + INT_GET(info->magic, ARCH_CONVERT) == XFS_DIRX_LEAF_MAGIC(state->mp) || + INT_GET(info->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC); + blk->magic = INT_GET(info->magic, ARCH_CONVERT); + if (INT_GET(info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) { + node = (xfs_da_intnode_t *)info; + blk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT); + if (forward) + blk->index = 0; + else + blk->index = INT_GET(node->hdr.count, ARCH_CONVERT)-1; + blkno = INT_GET(node->btree[ blk->index ].before, ARCH_CONVERT); + } else { + ASSERT(level == path->active-1); + blk->index = 0; + switch(blk->magic) { +#ifdef __KERNEL__ + case XFS_ATTR_LEAF_MAGIC: + blk->hashval = xfs_attr_leaf_lasthash(blk->bp, + NULL); + break; +#endif + case XFS_DIR_LEAF_MAGIC: + ASSERT(XFS_DIR_IS_V1(state->mp)); + blk->hashval = xfs_dir_leaf_lasthash(blk->bp, + NULL); + break; + case XFS_DIR2_LEAFN_MAGIC: + ASSERT(XFS_DIR_IS_V2(state->mp)); + blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, + NULL); + break; + default: + ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC || + blk->magic == + XFS_DIRX_LEAF_MAGIC(state->mp)); + break; + } + } + } + *result = 0; + return(0); +} + + +/*======================================================================== + * Utility routines. + *========================================================================*/ + +/* + * Implement a simple hash on a character string. + * Rotate the hash value by 7 bits, then XOR each character in. + * This is implemented with some source-level loop unrolling. + */ +xfs_dahash_t +xfs_da_hashname(char *name, int namelen) +{ + xfs_dahash_t hash; + +#define ROTL(x,y) (((x) << (y)) | ((x) >> (32 - (y)))) +#ifdef SLOWVERSION + /* + * This is the old one-byte-at-a-time version. + */ + for (hash = 0; namelen > 0; namelen--) { + hash = *name++ ^ ROTL(hash, 7); + } + return(hash); +#else + /* + * Do four characters at a time as long as we can. + */ + for (hash = 0; namelen >= 4; namelen -= 4, name += 4) { + hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^ + (name[3] << 0) ^ ROTL(hash, 7 * 4); + } + /* + * Now do the rest of the characters. + */ + switch (namelen) { + case 3: + return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^ + ROTL(hash, 7 * 3); + case 2: + return (name[0] << 7) ^ (name[1] << 0) ^ ROTL(hash, 7 * 2); + case 1: + return (name[0] << 0) ^ ROTL(hash, 7 * 1); + case 0: + return hash; + } + /* NOTREACHED */ +#endif +#undef ROTL + return 0; /* keep gcc happy */ +} + +/* + * Add a block to the btree ahead of the file. + * Return the new block number to the caller. + */ +int +xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno) +{ + xfs_fileoff_t bno, b; + xfs_bmbt_irec_t map; + xfs_bmbt_irec_t *mapp; + xfs_inode_t *dp; + int nmap, error, w, count, c, got, i, mapi; + xfs_fsize_t size; + xfs_trans_t *tp; + xfs_mount_t *mp; + + dp = args->dp; + mp = dp->i_mount; + w = args->whichfork; + tp = args->trans; + /* + * For new directories adjust the file offset and block count. + */ + if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp)) { + bno = mp->m_dirleafblk; + count = mp->m_dirblkfsbs; + } else { + bno = 0; + count = 1; + } + /* + * Find a spot in the file space to put the new block. + */ + if (error = xfs_bmap_first_unused(tp, dp, count, &bno, w)) { +#pragma mips_frequency_hint NEVER + return error; + } + if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp)) + ASSERT(bno >= mp->m_dirleafblk && bno < mp->m_dirfreeblk); + /* + * Try mapping it in one filesystem block. + */ + nmap = 1; + ASSERT(args->firstblock != NULL); + if (error = xfs_bmapi(tp, dp, bno, count, + XFS_BMAPI_AFLAG(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA| + XFS_BMAPI_CONTIG, + args->firstblock, args->total, &map, &nmap, + args->flist)) { +#pragma mips_frequency_hint NEVER + return error; + } + ASSERT(nmap <= 1); + if (nmap == 1) { + mapp = ↦ + mapi = 1; + } + /* + * If we didn't get it and the block might work if fragmented, + * try without the CONTIG flag. Loop until we get it all. + */ + else if (nmap == 0 && count > 1) { +#pragma mips_frequency_hint NEVER + mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP); + for (b = bno, mapi = 0; b < bno + count; ) { + nmap = MIN(XFS_BMAP_MAX_NMAP, count); + c = (int)(bno + count - b); + if (error = xfs_bmapi(tp, dp, b, c, + XFS_BMAPI_AFLAG(w)|XFS_BMAPI_WRITE| + XFS_BMAPI_METADATA, + args->firstblock, args->total, + &mapp[mapi], &nmap, args->flist)) { + kmem_free(mapp, sizeof(*mapp) * count); + return error; + } + if (nmap < 1) + break; + mapi += nmap; + b = mapp[mapi - 1].br_startoff + + mapp[mapi - 1].br_blockcount; + } + } else { +#pragma mips_frequency_hint NEVER + mapi = 0; + mapp = NULL; + } + /* + * Count the blocks we got, make sure it matches the total. + */ + for (i = 0, got = 0; i < mapi; i++) + got += mapp[i].br_blockcount; + if (got != count || mapp[0].br_startoff != bno || + mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount != + bno + count) { +#pragma mips_frequency_hint NEVER + if (mapp != &map) + kmem_free(mapp, sizeof(*mapp) * count); + return XFS_ERROR(ENOSPC); + } + if (mapp != &map) + kmem_free(mapp, sizeof(*mapp) * count); + *new_blkno = (xfs_dablk_t)bno; + /* + * For version 1 directories, adjust the file size if it changed. + */ + if (w == XFS_DATA_FORK && XFS_DIR_IS_V1(mp)) { + ASSERT(mapi == 1); + if (error = xfs_bmap_last_offset(tp, dp, &bno, w)) + return error; + size = XFS_FSB_TO_B(mp, bno); + if (size != dp->i_d.di_size) { + dp->i_d.di_size = size; + xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); + } + } + return 0; +} + + +/* + * Ick. We need to always be able to remove a btree block, even + * if there's no space reservation because the filesystem is full. + * This is called if xfs_bunmapi on a btree block fails due to ENOSPC. + * It swaps the target block with the last block in the file. The + * last block in the file can always be removed since it can't cause + * a bmap btree split to do that. + */ +STATIC int +xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop, + xfs_dabuf_t **dead_bufp) +{ + xfs_dablk_t dead_blkno, last_blkno, sib_blkno, par_blkno; + xfs_dabuf_t *dead_buf, *last_buf, *sib_buf, *par_buf; + xfs_fileoff_t lastoff; + xfs_inode_t *ip; + xfs_trans_t *tp; + xfs_mount_t *mp; + int error, w, entno, level, dead_level; + xfs_da_blkinfo_t *dead_info, *sib_info; + xfs_da_intnode_t *par_node, *dead_node; + xfs_dir_leafblock_t *dead_leaf; + xfs_dir2_leaf_t *dead_leaf2; + xfs_dahash_t dead_hash; + + dead_buf = *dead_bufp; + dead_blkno = *dead_blknop; + tp = args->trans; + ip = args->dp; + w = args->whichfork; + ASSERT(w == XFS_DATA_FORK); + mp = ip->i_mount; + if (XFS_DIR_IS_V2(mp)) { + lastoff = mp->m_dirfreeblk; + error = xfs_bmap_last_before(tp, ip, &lastoff, w); + } else + error = xfs_bmap_last_offset(tp, ip, &lastoff, w); + if (error) + return error; + if (lastoff == 0) + return XFS_ERROR(EFSCORRUPTED); + /* + * Read the last block in the btree space. + */ + last_blkno = (xfs_dablk_t)lastoff - mp->m_dirblkfsbs; + if (error = xfs_da_read_buf(tp, ip, last_blkno, -1, &last_buf, w)) + return error; + /* + * Copy the last block into the dead buffer and log it. + */ + bcopy(last_buf->data, dead_buf->data, mp->m_dirblksize); + xfs_da_log_buf(tp, dead_buf, 0, mp->m_dirblksize - 1); + dead_info = dead_buf->data; + /* + * Get values from the moved block. + */ + if (INT_GET(dead_info->magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC) { + ASSERT(XFS_DIR_IS_V1(mp)); + dead_leaf = (xfs_dir_leafblock_t *)dead_info; + dead_level = 0; + dead_hash = + INT_GET(dead_leaf->entries[INT_GET(dead_leaf->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT); + } else if (INT_GET(dead_info->magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC) { + ASSERT(XFS_DIR_IS_V2(mp)); + dead_leaf2 = (xfs_dir2_leaf_t *)dead_info; + dead_level = 0; + dead_hash = INT_GET(dead_leaf2->ents[INT_GET(dead_leaf2->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT); + } else { + ASSERT(INT_GET(dead_info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC); + dead_node = (xfs_da_intnode_t *)dead_info; + dead_level = INT_GET(dead_node->hdr.level, ARCH_CONVERT); + dead_hash = INT_GET(dead_node->btree[INT_GET(dead_node->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT); + } + sib_buf = par_buf = NULL; + /* + * If the moved block has a left sibling, fix up the pointers. + */ + if (sib_blkno = INT_GET(dead_info->back, ARCH_CONVERT)) { + if (error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)) + goto done; + sib_info = sib_buf->data; + if (INT_GET(sib_info->forw, ARCH_CONVERT) != last_blkno || + INT_GET(sib_info->magic, ARCH_CONVERT) != INT_GET(dead_info->magic, ARCH_CONVERT)) { + error = XFS_ERROR(EFSCORRUPTED); + goto done; + } + INT_SET(sib_info->forw, ARCH_CONVERT, dead_blkno); + xfs_da_log_buf(tp, sib_buf, + XFS_DA_LOGRANGE(sib_info, &sib_info->forw, + sizeof(sib_info->forw))); + xfs_da_buf_done(sib_buf); + sib_buf = NULL; + } + /* + * If the moved block has a right sibling, fix up the pointers. + */ + if (sib_blkno = INT_GET(dead_info->forw, ARCH_CONVERT)) { + if (error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)) + goto done; + sib_info = sib_buf->data; + if ( INT_GET(sib_info->back, ARCH_CONVERT) != last_blkno + || INT_GET(sib_info->magic, ARCH_CONVERT) + != INT_GET(dead_info->magic, ARCH_CONVERT)) { + error = XFS_ERROR(EFSCORRUPTED); + goto done; + } + INT_SET(sib_info->back, ARCH_CONVERT, dead_blkno); + xfs_da_log_buf(tp, sib_buf, + XFS_DA_LOGRANGE(sib_info, &sib_info->back, + sizeof(sib_info->back))); + xfs_da_buf_done(sib_buf); + sib_buf = NULL; + } + par_blkno = XFS_DIR_IS_V1(mp) ? 0 : mp->m_dirleafblk; + level = -1; + /* + * Walk down the tree looking for the parent of the moved block. + */ + for (;;) { + if (error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)) + goto done; + par_node = par_buf->data; + if (INT_GET(par_node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC || + (level >= 0 && level != INT_GET(par_node->hdr.level, ARCH_CONVERT) + 1)) { + error = XFS_ERROR(EFSCORRUPTED); + goto done; + } + level = INT_GET(par_node->hdr.level, ARCH_CONVERT); + for (entno = 0; + entno < INT_GET(par_node->hdr.count, ARCH_CONVERT) && + INT_GET(par_node->btree[entno].hashval, ARCH_CONVERT) < dead_hash; + entno++) + continue; + if (entno == INT_GET(par_node->hdr.count, ARCH_CONVERT)) { + error = XFS_ERROR(EFSCORRUPTED); + goto done; + } + par_blkno = INT_GET(par_node->btree[entno].before, ARCH_CONVERT); + if (level == dead_level + 1) + break; + xfs_da_brelse(tp, par_buf); + par_buf = NULL; + } + /* + * We're in the right parent block. + * Look for the right entry. + */ + for (;;) { + for (; + entno < INT_GET(par_node->hdr.count, ARCH_CONVERT) && + INT_GET(par_node->btree[entno].before, ARCH_CONVERT) != last_blkno; + entno++) + continue; + if (entno < INT_GET(par_node->hdr.count, ARCH_CONVERT)) + break; + par_blkno = INT_GET(par_node->hdr.info.forw, ARCH_CONVERT); + xfs_da_brelse(tp, par_buf); + par_buf = NULL; + if (par_blkno == 0) { + error = XFS_ERROR(EFSCORRUPTED); + goto done; + } + if (error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)) + goto done; + par_node = par_buf->data; + if (INT_GET(par_node->hdr.level, ARCH_CONVERT) != level || + INT_GET(par_node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC) { + error = XFS_ERROR(EFSCORRUPTED); + goto done; + } + entno = 0; + } + /* + * Update the parent entry pointing to the moved block. + */ + INT_SET(par_node->btree[entno].before, ARCH_CONVERT, dead_blkno); + xfs_da_log_buf(tp, par_buf, + XFS_DA_LOGRANGE(par_node, &par_node->btree[entno].before, + sizeof(par_node->btree[entno].before))); + xfs_da_buf_done(par_buf); + xfs_da_buf_done(dead_buf); + *dead_blknop = last_blkno; + *dead_bufp = last_buf; + return 0; +done: + if (par_buf) + xfs_da_brelse(tp, par_buf); + if (sib_buf) + xfs_da_brelse(tp, sib_buf); + xfs_da_brelse(tp, last_buf); + return error; +} + +/* + * Remove a btree block from a directory or attribute. + */ +int +xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, + xfs_dabuf_t *dead_buf) +{ + xfs_inode_t *dp; + int done, error, w, count; + xfs_fileoff_t bno; + xfs_fsize_t size; + xfs_trans_t *tp; + xfs_mount_t *mp; + + dp = args->dp; + w = args->whichfork; + tp = args->trans; + mp = dp->i_mount; + if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp)) + count = mp->m_dirblkfsbs; + else + count = 1; + for (;;) { + /* + * Remove extents. If we get ENOSPC for a dir we have to move + * the last block to the place we want to kill. + */ + if ((error = xfs_bunmapi(tp, dp, dead_blkno, count, + XFS_BMAPI_AFLAG(w)|XFS_BMAPI_METADATA, + 0, args->firstblock, args->flist, + &done)) == ENOSPC) { + if (w != XFS_DATA_FORK) + goto done; + if (error = xfs_da_swap_lastblock(args, &dead_blkno, + &dead_buf)) + goto done; + } else if (error) + goto done; + else + break; + } + ASSERT(done); + xfs_da_binval(tp, dead_buf); + /* + * Adjust the directory size for version 1. + */ + if (w == XFS_DATA_FORK && XFS_DIR_IS_V1(mp)) { + if (error = xfs_bmap_last_offset(tp, dp, &bno, w)) + return error; + size = XFS_FSB_TO_B(dp->i_mount, bno); + if (size != dp->i_d.di_size) { + dp->i_d.di_size = size; + xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); + } + } + return 0; +done: + xfs_da_binval(tp, dead_buf); + return error; +} + +/* + * See if the mapping(s) for this btree block are valid, i.e. + * don't contain holes, are logically contiguous, and cover the whole range. + */ +STATIC int +xfs_da_map_covers_blocks( + int nmap, + xfs_bmbt_irec_t *mapp, + xfs_dablk_t bno, + int count) +{ + int i; + xfs_fileoff_t off; + + for (i = 0, off = bno; i < nmap; i++) { + if (mapp[i].br_startblock == HOLESTARTBLOCK || + mapp[i].br_startblock == DELAYSTARTBLOCK) { +#pragma mips_frequency_hint NEVER + return 0; + } + if (off != mapp[i].br_startoff) { +#pragma mips_frequency_hint NEVER + return 0; + } + off += mapp[i].br_blockcount; + } + return off == bno + count; +} + +/* + * Make a dabuf. + * Used for get_buf, read_buf, read_bufr, and reada_buf. + */ +STATIC int +xfs_da_do_buf( + xfs_trans_t *trans, + xfs_inode_t *dp, + xfs_dablk_t bno, + xfs_daddr_t *mappedbnop, + xfs_dabuf_t **bpp, + int whichfork, + int caller, + inst_t *ra) +{ + xfs_buf_t *bp = 0; + xfs_buf_t **bplist; + int error; + int i; + xfs_bmbt_irec_t map; + xfs_bmbt_irec_t *mapp; + xfs_daddr_t mappedbno; + xfs_mount_t *mp; + int nbplist; + int nfsb; + int nmap; + xfs_dabuf_t *rbp; + + mp = dp->i_mount; + if (whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(mp)) + nfsb = mp->m_dirblkfsbs; + else + nfsb = 1; + mappedbno = *mappedbnop; + /* + * Caller doesn't have a mapping. -2 means don't complain + * if we land in a hole. + */ + if (mappedbno == -1 || mappedbno == -2) { + /* + * Optimize the one-block case. + */ + if (nfsb == 1) { + xfs_fsblock_t fsb; + + if (error = + xfs_bmapi_single(trans, dp, whichfork, &fsb, + (xfs_fileoff_t)bno)) { +#pragma mips_frequency_hint NEVER + return error; + } + mapp = ↦ + if (fsb == NULLFSBLOCK) { +#pragma mips_frequency_hint NEVER + nmap = 0; + } else { + map.br_startblock = fsb; + map.br_startoff = (xfs_fileoff_t)bno; + map.br_blockcount = 1; + nmap = 1; + } + } else { +#pragma mips_frequency_hint NEVER + xfs_fsblock_t firstblock; + + firstblock = NULLFSBLOCK; + mapp = kmem_alloc(sizeof(*mapp) * nfsb, KM_SLEEP); + nmap = nfsb; + if (error = xfs_bmapi(trans, dp, (xfs_fileoff_t)bno, + nfsb, + XFS_BMAPI_METADATA | + XFS_BMAPI_AFLAG(whichfork), + &firstblock, 0, mapp, &nmap, NULL)) + goto exit0; + } + } else { + map.br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno); + map.br_startoff = (xfs_fileoff_t)bno; + map.br_blockcount = nfsb; + mapp = ↦ + nmap = 1; + } + if (!xfs_da_map_covers_blocks(nmap, mapp, bno, nfsb)) { +#pragma mips_frequency_hint NEVER + error = mappedbno == -2 ? 0 : XFS_ERROR(EFSCORRUPTED); + goto exit0; + } + if (caller != 3 && nmap > 1) { +#pragma mips_frequency_hint NEVER + bplist = kmem_alloc(sizeof(*bplist) * nmap, KM_SLEEP); + nbplist = 0; + } else + bplist = NULL; + /* + * Turn the mapping(s) into buffer(s). + */ + for (i = 0; i < nmap; i++) { + int nmapped; + + mappedbno = XFS_FSB_TO_DADDR(mp, mapp[i].br_startblock); + if (i == 0) + *mappedbnop = mappedbno; + nmapped = (int)XFS_FSB_TO_BB(mp, mapp[i].br_blockcount); + switch (caller) { + case 0: + bp = xfs_trans_get_buf(trans, mp->m_ddev_targp, + mappedbno, nmapped, 0); + error = bp ? XFS_BUF_GETERROR(bp) : XFS_ERROR(EIO); + break; + case 1: +#ifndef __KERNEL__ + case 2: +#endif + bp = NULL; + error = xfs_trans_read_buf(mp, trans, mp->m_ddev_targp, + mappedbno, nmapped, 0, &bp); + break; +#ifdef __KERNEL__ + case 3: + xfs_baread(mp->m_ddev_targp, mappedbno, nmapped); + error = 0; + bp = NULL; + break; +#endif + } + if (error) { +#pragma mips_frequency_hint NEVER + if (bp) + xfs_trans_brelse(trans, bp); + goto exit1; + } + if (!bp) + continue; + if (caller == 1) { + if (whichfork == XFS_ATTR_FORK) { + XFS_BUF_SET_VTYPE_REF(bp, B_FS_ATTR_BTREE, + XFS_ATTR_BTREE_REF); + } else { + XFS_BUF_SET_VTYPE_REF(bp, B_FS_DIR_BTREE, + XFS_DIR_BTREE_REF); + } + } + if (bplist) { +#pragma mips_frequency_hint NEVER + bplist[nbplist++] = bp; + } + } + /* + * Build a dabuf structure. + */ + if (bplist) { +#pragma mips_frequency_hint NEVER + rbp = xfs_da_buf_make(nbplist, bplist, ra); + } else if (bp) + rbp = xfs_da_buf_make(1, &bp, ra); + else + rbp = NULL; + /* + * For read_buf, check the magic number. + */ + if (caller == 1) { + xfs_dir2_data_t *data; + xfs_dir2_free_t *free; + xfs_da_blkinfo_t *info; + + info = rbp->data; + data = rbp->data; + free = rbp->data; + if (XFS_TEST_ERROR((INT_GET(info->magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC) && + (INT_GET(info->magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC) && + (INT_GET(info->magic, ARCH_CONVERT) != XFS_ATTR_LEAF_MAGIC) && + (INT_GET(info->magic, ARCH_CONVERT) != XFS_DIR2_LEAF1_MAGIC) && + (INT_GET(info->magic, ARCH_CONVERT) != XFS_DIR2_LEAFN_MAGIC) && + (INT_GET(data->hdr.magic, ARCH_CONVERT) != XFS_DIR2_BLOCK_MAGIC) && + (INT_GET(data->hdr.magic, ARCH_CONVERT) != XFS_DIR2_DATA_MAGIC) && + (INT_GET(free->hdr.magic, ARCH_CONVERT) != XFS_DIR2_FREE_MAGIC), + mp, XFS_ERRTAG_DA_READ_BUF, + XFS_RANDOM_DA_READ_BUF)) { +#pragma mips_frequency_hint NEVER + xfs_buftrace("DA READ ERROR", rbp->bps[0]); + error = XFS_ERROR(EFSCORRUPTED); + xfs_da_brelse(trans, rbp); + nbplist = 0; + goto exit1; + } + } + if (bplist) { +#pragma mips_frequency_hint NEVER + kmem_free(bplist, sizeof(*bplist) * nmap); + } + if (mapp != &map) { +#pragma mips_frequency_hint NEVER + kmem_free(mapp, sizeof(*mapp) * nfsb); + } + if (bpp) + *bpp = rbp; + return 0; +exit1: + if (bplist) { + for (i = 0; i < nbplist; i++) + xfs_trans_brelse(trans, bplist[i]); + kmem_free(bplist, sizeof(*bplist) * nmap); + } +exit0: + if (mapp != &map) + kmem_free(mapp, sizeof(*mapp) * nfsb); + if (bpp) + *bpp = NULL; + return error; +} + +/* + * Get a buffer for the dir/attr block. + */ +int +xfs_da_get_buf( + xfs_trans_t *trans, + xfs_inode_t *dp, + xfs_dablk_t bno, + xfs_daddr_t mappedbno, + xfs_dabuf_t **bpp, + int whichfork) +{ + return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 0, + (inst_t *)__return_address); +} + +/* + * Get a buffer for the dir/attr block, fill in the contents. + */ +int +xfs_da_read_buf( + xfs_trans_t *trans, + xfs_inode_t *dp, + xfs_dablk_t bno, + xfs_daddr_t mappedbno, + xfs_dabuf_t **bpp, + int whichfork) +{ + return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 1, + (inst_t *)__return_address); +} + +/* + * Calculate the number of bits needed to hold i different values. + */ +uint +xfs_da_log2_roundup(uint i) +{ + uint rval; + + for (rval = 0; rval < NBBY * sizeof(i); rval++) { + if ((1 << rval) >= i) + break; + } + return(rval); +} + +xfs_zone_t *xfs_da_state_zone; /* anchor for state struct zone */ +xfs_zone_t *xfs_dabuf_zone; /* dabuf zone */ + +/* + * Allocate a dir-state structure. + * We don't put them on the stack since they're large. + */ +xfs_da_state_t * +xfs_da_state_alloc(void) +{ + return kmem_zone_zalloc(xfs_da_state_zone, KM_SLEEP); +} + +/* + * Kill the altpath contents of a da-state structure. + */ +void +xfs_da_state_kill_altpath(xfs_da_state_t *state) +{ + int i; + + for (i = 0; i < state->altpath.active; i++) { + if (state->altpath.blk[i].bp) { + if (state->altpath.blk[i].bp != state->path.blk[i].bp) + xfs_da_buf_done(state->altpath.blk[i].bp); + state->altpath.blk[i].bp = NULL; + } + } + state->altpath.active = 0; +} + +/* + * Free a da-state structure. + */ +void +xfs_da_state_free(xfs_da_state_t *state) +{ + int i; + + xfs_da_state_kill_altpath(state); + for (i = 0; i < state->path.active; i++) { + if (state->path.blk[i].bp) + xfs_da_buf_done(state->path.blk[i].bp); + } + if (state->extravalid && state->extrablk.bp) + xfs_da_buf_done(state->extrablk.bp); +#ifdef DEBUG + bzero((char *)state, sizeof(*state)); +#endif /* DEBUG */ + kmem_zone_free(xfs_da_state_zone, state); +} + +#ifdef XFS_DABUF_DEBUG +xfs_dabuf_t *xfs_dabuf_global_list; +lock_t xfs_dabuf_global_lock; +#endif + +/* + * Create a dabuf. + */ +/* ARGSUSED */ +STATIC xfs_dabuf_t * +xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra) +{ + xfs_buf_t *bp; + xfs_dabuf_t *dabuf; + int i; + int off; + + if (nbuf == 1) + dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_SLEEP); + else + dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_SLEEP); + dabuf->dirty = 0; +#ifdef XFS_DABUF_DEBUG + dabuf->ra = ra; + dabuf->dev = XFS_BUF_TARGET(bps[0]); + dabuf->blkno = XFS_BUF_ADDR(bps[0]); +#endif + if (nbuf == 1) { + dabuf->nbuf = 1; + bp = bps[0]; + dabuf->bbcount = (short)BTOBB(XFS_BUF_COUNT(bp)); + dabuf->data = XFS_BUF_PTR(bp); + dabuf->bps[0] = bp; + } else { + dabuf->nbuf = nbuf; + for (i = 0, dabuf->bbcount = 0; i < nbuf; i++) { + dabuf->bps[i] = bp = bps[i]; + dabuf->bbcount += BTOBB(XFS_BUF_COUNT(bp)); + } + dabuf->data = kmem_alloc(BBTOB(dabuf->bbcount), KM_SLEEP); + for (i = off = 0; i < nbuf; i++, off += XFS_BUF_COUNT(bp)) { + bp = bps[i]; + bcopy(XFS_BUF_PTR(bp), (char *)dabuf->data + off, + XFS_BUF_COUNT(bp)); + } + } +#ifdef XFS_DABUF_DEBUG + { + int s; + xfs_dabuf_t *p; + + s = mutex_spinlock(&xfs_dabuf_global_lock); + for (p = xfs_dabuf_global_list; p; p = p->next) { + ASSERT(p->blkno != dabuf->blkno || + p->dev != dabuf->dev); + } + dabuf->prev = NULL; + if (xfs_dabuf_global_list) + xfs_dabuf_global_list->prev = dabuf; + dabuf->next = xfs_dabuf_global_list; + xfs_dabuf_global_list = dabuf; + mutex_spinunlock(&xfs_dabuf_global_lock, s); + } +#endif + return dabuf; +} + +/* + * Un-dirty a dabuf. + */ +STATIC void +xfs_da_buf_clean(xfs_dabuf_t *dabuf) +{ + xfs_buf_t *bp; + int i; + int off; + + if (dabuf->dirty) { + ASSERT(dabuf->nbuf > 1); + dabuf->dirty = 0; + for (i = off = 0; i < dabuf->nbuf; + i++, off += XFS_BUF_COUNT(bp)) { + bp = dabuf->bps[i]; + bcopy((char *)dabuf->data + off, XFS_BUF_PTR(bp), + XFS_BUF_COUNT(bp)); + } + } +} + +/* + * Release a dabuf. + */ +void +xfs_da_buf_done(xfs_dabuf_t *dabuf) +{ + ASSERT(dabuf); + ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]); + if (dabuf->dirty) + xfs_da_buf_clean(dabuf); + if (dabuf->nbuf > 1) + kmem_free(dabuf->data, BBTOB(dabuf->bbcount)); +#ifdef XFS_DABUF_DEBUG + { + int s; + + s = mutex_spinlock(&xfs_dabuf_global_lock); + if (dabuf->prev) + dabuf->prev->next = dabuf->next; + else + xfs_dabuf_global_list = dabuf->next; + if (dabuf->next) + dabuf->next->prev = dabuf->prev; + mutex_spinunlock(&xfs_dabuf_global_lock, s); + } + bzero(dabuf, XFS_DA_BUF_SIZE(dabuf->nbuf)); +#endif + if (dabuf->nbuf == 1) + kmem_zone_free(xfs_dabuf_zone, dabuf); + else + kmem_free(dabuf, XFS_DA_BUF_SIZE(dabuf->nbuf)); +} + +/* + * Log transaction from a dabuf. + */ +void +xfs_da_log_buf(xfs_trans_t *tp, xfs_dabuf_t *dabuf, uint first, uint last) +{ + xfs_buf_t *bp; + uint f; + int i; + uint l; + int off; + + ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]); + if (dabuf->nbuf == 1) { + ASSERT(dabuf->data == (void *)XFS_BUF_PTR(dabuf->bps[0])); + xfs_trans_log_buf(tp, dabuf->bps[0], first, last); + return; + } + dabuf->dirty = 1; + ASSERT(first <= last); + for (i = off = 0; i < dabuf->nbuf; i++, off += XFS_BUF_COUNT(bp)) { + bp = dabuf->bps[i]; + f = off; + l = f + XFS_BUF_COUNT(bp) - 1; + if (f < first) + f = first; + if (l > last) + l = last; + if (f <= l) + xfs_trans_log_buf(tp, bp, f - off, l - off); + /* + * B_DONE is set by xfs_trans_log buf. + * If we don't set it on a new buffer (get not read) + * then if we don't put anything in the buffer it won't + * be set, and at commit it it released into the cache, + * and then a read will fail. + */ + else if (!(XFS_BUF_ISDONE(bp))) + XFS_BUF_DONE(bp); + } + ASSERT(last < off); +} + +/* + * Release dabuf from a transaction. + * Have to free up the dabuf before the buffers are released, + * since the synchronization on the dabuf is really the lock on the buffer. + */ +void +xfs_da_brelse(xfs_trans_t *tp, xfs_dabuf_t *dabuf) +{ + xfs_buf_t *bp; + xfs_buf_t **bplist; + int i; + int nbuf; + + ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]); + if ((nbuf = dabuf->nbuf) == 1) { + bplist = &bp; + bp = dabuf->bps[0]; + } else { + bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP); + bcopy(dabuf->bps, bplist, nbuf * sizeof(*bplist)); + } + xfs_da_buf_done(dabuf); + for (i = 0; i < nbuf; i++) + xfs_trans_brelse(tp, bplist[i]); + if (bplist != &bp) + kmem_free(bplist, nbuf * sizeof(*bplist)); +} + +/* + * Invalidate dabuf from a transaction. + */ +void +xfs_da_binval(xfs_trans_t *tp, xfs_dabuf_t *dabuf) +{ + xfs_buf_t *bp; + xfs_buf_t **bplist; + int i; + int nbuf; + + ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]); + if ((nbuf = dabuf->nbuf) == 1) { + bplist = &bp; + bp = dabuf->bps[0]; + } else { + bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP); + bcopy(dabuf->bps, bplist, nbuf * sizeof(*bplist)); + } + xfs_da_buf_done(dabuf); + for (i = 0; i < nbuf; i++) + xfs_trans_binval(tp, bplist[i]); + if (bplist != &bp) + kmem_free(bplist, nbuf * sizeof(*bplist)); +} diff --git a/libxfs/xfs_dir.c b/libxfs/xfs_dir.c new file mode 100644 index 000000000..b13d24642 --- /dev/null +++ b/libxfs/xfs_dir.c @@ -0,0 +1,622 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include + +/* + * xfs_dir.c + * + * Provide the external interfaces to manage directories. + */ + + +xfs_dahash_t xfs_dir_hash_dot, xfs_dir_hash_dotdot; + +/* + * One-time startup routine called from xfs_init(). + */ +void +xfs_dir_startup(void) +{ + xfs_dir_hash_dot = xfs_da_hashname(".", 1); + xfs_dir_hash_dotdot = xfs_da_hashname("..", 2); +} + +/* + * Initialize directory-related fields in the mount structure. + */ +STATIC void +xfs_dir_mount(xfs_mount_t *mp) +{ + uint shortcount, leafcount, count; + + mp->m_dirversion = 1; + shortcount = (mp->m_attroffset - (uint)sizeof(xfs_dir_sf_hdr_t)) / + (uint)sizeof(xfs_dir_sf_entry_t); + leafcount = (XFS_LBSIZE(mp) - (uint)sizeof(xfs_dir_leaf_hdr_t)) / + ((uint)sizeof(xfs_dir_leaf_entry_t) + + (uint)sizeof(xfs_dir_leaf_name_t)); + count = shortcount > leafcount ? shortcount : leafcount; + mp->m_dircook_elog = xfs_da_log2_roundup(count + 1); + ASSERT(mp->m_dircook_elog <= mp->m_sb.sb_blocklog); + mp->m_da_node_ents = + (XFS_LBSIZE(mp) - (uint)sizeof(xfs_da_node_hdr_t)) / + (uint)sizeof(xfs_da_node_entry_t); + mp->m_dir_magicpct = (XFS_LBSIZE(mp) * 37) / 100; + mp->m_dirblksize = mp->m_sb.sb_blocksize; + mp->m_dirblkfsbs = 1; +} + +/* + * Initialize a directory with its "." and ".." entries. + */ +STATIC int +xfs_dir_init(xfs_trans_t *trans, xfs_inode_t *dir, xfs_inode_t *parent_dir) +{ + xfs_da_args_t args; + int error; + + bzero((char *)&args, sizeof(args)); + args.dp = dir; + args.trans = trans; + + ASSERT((dir->i_d.di_mode & IFMT) == IFDIR); + if (error = xfs_dir_ino_validate(trans->t_mountp, parent_dir->i_ino)) + return error; + + return(xfs_dir_shortform_create(&args, parent_dir->i_ino)); +} + +/* + * Generic handler routine to add a name to a directory. + * Transitions directory from shortform to Btree as necessary. + */ +STATIC int /* error */ +xfs_dir_createname(xfs_trans_t *trans, xfs_inode_t *dp, char *name, + int namelen, xfs_ino_t inum, xfs_fsblock_t *firstblock, + xfs_bmap_free_t *flist, xfs_extlen_t total) +{ + xfs_da_args_t args; + int retval, newsize, done; + + ASSERT((dp->i_d.di_mode & IFMT) == IFDIR); + + if (retval = xfs_dir_ino_validate(trans->t_mountp, inum)) + return (retval); + + XFS_STATS_INC(xs_dir_create); + /* + * Fill in the arg structure for this request. + */ + args.name = name; + args.namelen = namelen; + args.hashval = xfs_da_hashname(name, namelen); + args.inumber = inum; + args.dp = dp; + args.firstblock = firstblock; + args.flist = flist; + args.total = total; + args.whichfork = XFS_DATA_FORK; + args.trans = trans; + args.justcheck = 0; + args.addname = args.oknoent = 1; + + /* + * Decide on what work routines to call based on the inode size. + */ + done = 0; + if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { + newsize = XFS_DIR_SF_ENTSIZE_BYNAME(args.namelen); + if ((dp->i_d.di_size + newsize) <= XFS_IFORK_DSIZE(dp)) { + retval = xfs_dir_shortform_addname(&args); + done = 1; + } else { + if (total == 0) + return XFS_ERROR(ENOSPC); + retval = xfs_dir_shortform_to_leaf(&args); + done = retval != 0; + } + } + if (!done && xfs_bmap_one_block(dp, XFS_DATA_FORK)) { + retval = xfs_dir_leaf_addname(&args); + done = retval != ENOSPC; + if (!done) { + if (total == 0) + return XFS_ERROR(ENOSPC); + retval = xfs_dir_leaf_to_node(&args); + done = retval != 0; + } + } + if (!done) { + retval = xfs_dir_node_addname(&args); + } + return(retval); +} + +/* + * Generic handler routine to remove a name from a directory. + * Transitions directory from Btree to shortform as necessary. + */ +STATIC int /* error */ +xfs_dir_removename(xfs_trans_t *trans, xfs_inode_t *dp, char *name, + int namelen, xfs_ino_t ino, xfs_fsblock_t *firstblock, + xfs_bmap_free_t *flist, xfs_extlen_t total) +{ + xfs_da_args_t args; + int count, totallen, newsize, retval; + + ASSERT((dp->i_d.di_mode & IFMT) == IFDIR); + XFS_STATS_INC(xs_dir_remove); + /* + * Fill in the arg structure for this request. + */ + args.name = name; + args.namelen = namelen; + args.hashval = xfs_da_hashname(name, namelen); + args.inumber = ino; + args.dp = dp; + args.firstblock = firstblock; + args.flist = flist; + args.total = total; + args.whichfork = XFS_DATA_FORK; + args.trans = trans; + args.justcheck = args.addname = args.oknoent = 0; + + /* + * Decide on what work routines to call based on the inode size. + */ + if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { + retval = xfs_dir_shortform_removename(&args); + } else if (xfs_bmap_one_block(dp, XFS_DATA_FORK)) { + retval = xfs_dir_leaf_removename(&args, &count, &totallen); + if (retval == 0) { + newsize = XFS_DIR_SF_ALLFIT(count, totallen); + if (newsize <= XFS_IFORK_DSIZE(dp)) { + retval = xfs_dir_leaf_to_shortform(&args); + } + } + } else { + retval = xfs_dir_node_removename(&args); + } + return(retval); +} + +STATIC int /* error */ +xfs_dir_lookup(xfs_trans_t *trans, xfs_inode_t *dp, char *name, int namelen, + xfs_ino_t *inum) +{ + xfs_da_args_t args; + int retval; + + ASSERT((dp->i_d.di_mode & IFMT) == IFDIR); + if (namelen >= MAXNAMELEN) { + return(XFS_ERROR(EINVAL)); + } + + XFS_STATS_INC(xs_dir_lookup); + /* + * Fill in the arg structure for this request. + */ + args.name = name; + args.namelen = namelen; + args.hashval = xfs_da_hashname(name, namelen); + args.inumber = 0; + args.dp = dp; + args.firstblock = NULL; + args.flist = NULL; + args.total = 0; + args.whichfork = XFS_DATA_FORK; + args.trans = trans; + args.justcheck = args.addname = 0; + args.oknoent = 1; + + /* + * Decide on what work routines to call based on the inode size. + */ + if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { + retval = xfs_dir_shortform_lookup(&args); + } else if (xfs_bmap_one_block(dp, XFS_DATA_FORK)) { + retval = xfs_dir_leaf_lookup(&args); + } else { + retval = xfs_dir_node_lookup(&args); + } + if (retval == EEXIST) + retval = 0; + *inum = args.inumber; + return(retval); +} + +STATIC int /* error */ +xfs_dir_replace(xfs_trans_t *trans, xfs_inode_t *dp, char *name, int namelen, + xfs_ino_t inum, xfs_fsblock_t *firstblock, + xfs_bmap_free_t *flist, xfs_extlen_t total) +{ + xfs_da_args_t args; + int retval; + + ASSERT((dp->i_d.di_mode & IFMT) == IFDIR); + if (namelen >= MAXNAMELEN) { + return(XFS_ERROR(EINVAL)); + } + + if (retval = xfs_dir_ino_validate(trans->t_mountp, inum)) + return retval; + + /* + * Fill in the arg structure for this request. + */ + args.name = name; + args.namelen = namelen; + args.hashval = xfs_da_hashname(name, namelen); + args.inumber = inum; + args.dp = dp; + args.firstblock = firstblock; + args.flist = flist; + args.total = total; + args.whichfork = XFS_DATA_FORK; + args.trans = trans; + args.justcheck = args.addname = args.oknoent = 0; + + /* + * Decide on what work routines to call based on the inode size. + */ + if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { + retval = xfs_dir_shortform_replace(&args); + } else if (xfs_bmap_one_block(dp, XFS_DATA_FORK)) { + retval = xfs_dir_leaf_replace(&args); + } else { + retval = xfs_dir_node_replace(&args); + } + + return(retval); +} + + +/*======================================================================== + * External routines when dirsize == XFS_LBSIZE(dp->i_mount). + *========================================================================*/ + +/* + * Add a name to the leaf directory structure + * This is the external routine. + */ +int +xfs_dir_leaf_addname(xfs_da_args_t *args) +{ + int index, retval; + xfs_dabuf_t *bp; + + retval = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp, + XFS_DATA_FORK); + if (retval) + return(retval); + ASSERT(bp != NULL); + + retval = xfs_dir_leaf_lookup_int(bp, args, &index); + if (retval == ENOENT) + retval = xfs_dir_leaf_add(bp, args, index); + xfs_da_buf_done(bp); + return(retval); +} + +/* + * Remove a name from the leaf directory structure + * This is the external routine. + */ +STATIC int +xfs_dir_leaf_removename(xfs_da_args_t *args, int *count, int *totallen) +{ + xfs_dir_leafblock_t *leaf; + int index, retval; + xfs_dabuf_t *bp; + + retval = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp, + XFS_DATA_FORK); + if (retval) + return(retval); + ASSERT(bp != NULL); + leaf = bp->data; + ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC); + retval = xfs_dir_leaf_lookup_int(bp, args, &index); + if (retval == EEXIST) { + (void)xfs_dir_leaf_remove(args->trans, bp, index); + *count = INT_GET(leaf->hdr.count, ARCH_CONVERT); + *totallen = INT_GET(leaf->hdr.namebytes, ARCH_CONVERT); + retval = 0; + } + xfs_da_buf_done(bp); + return(retval); +} + +/* + * Look up a name in a leaf directory structure. + * This is the external routine. + */ +STATIC int +xfs_dir_leaf_lookup(xfs_da_args_t *args) +{ + int index, retval; + xfs_dabuf_t *bp; + + retval = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp, + XFS_DATA_FORK); + if (retval) + return(retval); + ASSERT(bp != NULL); + retval = xfs_dir_leaf_lookup_int(bp, args, &index); + xfs_da_brelse(args->trans, bp); + return(retval); +} + +/* + * Look up a name in a leaf directory structure, replace the inode number. + * This is the external routine. + */ +STATIC int +xfs_dir_leaf_replace(xfs_da_args_t *args) +{ + int index, retval; + xfs_dabuf_t *bp; + xfs_ino_t inum; + xfs_dir_leafblock_t *leaf; + xfs_dir_leaf_entry_t *entry; + xfs_dir_leaf_name_t *namest; + + inum = args->inumber; + retval = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp, + XFS_DATA_FORK); + if (retval) + return(retval); + ASSERT(bp != NULL); + retval = xfs_dir_leaf_lookup_int(bp, args, &index); + if (retval == EEXIST) { + leaf = bp->data; + entry = &leaf->entries[index]; + namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT)); + /* XXX - replace assert? */ + XFS_DIR_SF_PUT_DIRINO_ARCH(&inum, &namest->inumber, ARCH_CONVERT); + xfs_da_log_buf(args->trans, bp, + XFS_DA_LOGRANGE(leaf, namest, sizeof(namest->inumber))); + xfs_da_buf_done(bp); + retval = 0; + } else + xfs_da_brelse(args->trans, bp); + return(retval); +} + + +/*======================================================================== + * External routines when dirsize > XFS_LBSIZE(mp). + *========================================================================*/ + +/* + * Add a name to a Btree-format directory. + * + * This will involve walking down the Btree, and may involve splitting + * leaf nodes and even splitting intermediate nodes up to and including + * the root node (a special case of an intermediate node). + */ +STATIC int +xfs_dir_node_addname(xfs_da_args_t *args) +{ + xfs_da_state_t *state; + xfs_da_state_blk_t *blk; + int retval, error; + + /* + * Fill in bucket of arguments/results/context to carry around. + */ + state = xfs_da_state_alloc(); + state->args = args; + state->mp = args->dp->i_mount; + state->blocksize = state->mp->m_sb.sb_blocksize; + + /* + * Search to see if name already exists, and get back a pointer + * to where it should go. + */ + error = xfs_da_node_lookup_int(state, &retval); + if (error) + retval = error; + if (retval != ENOENT) + goto error; + blk = &state->path.blk[ state->path.active-1 ]; + ASSERT(blk->magic == XFS_DIR_LEAF_MAGIC); + retval = xfs_dir_leaf_add(blk->bp, args, blk->index); + if (retval == 0) { + /* + * Addition succeeded, update Btree hashvals. + */ + if (!args->justcheck) + xfs_da_fixhashpath(state, &state->path); + } else { + /* + * Addition failed, split as many Btree elements as required. + */ + if (args->total == 0) { + ASSERT(retval == ENOSPC); + goto error; + } + retval = xfs_da_split(state); + } +error: + xfs_da_state_free(state); + + return(retval); +} + +/* + * Remove a name from a B-tree directory. + * + * This will involve walking down the Btree, and may involve joining + * leaf nodes and even joining intermediate nodes up to and including + * the root node (a special case of an intermediate node). + */ +STATIC int +xfs_dir_node_removename(xfs_da_args_t *args) +{ + xfs_da_state_t *state; + xfs_da_state_blk_t *blk; + int retval, error; + + state = xfs_da_state_alloc(); + state->args = args; + state->mp = args->dp->i_mount; + state->blocksize = state->mp->m_sb.sb_blocksize; + + /* + * Search to see if name exists, and get back a pointer to it. + */ + error = xfs_da_node_lookup_int(state, &retval); + if (error) + retval = error; + if (retval != EEXIST) { + xfs_da_state_free(state); + return(retval); + } + + /* + * Remove the name and update the hashvals in the tree. + */ + blk = &state->path.blk[ state->path.active-1 ]; + ASSERT(blk->magic == XFS_DIR_LEAF_MAGIC); + retval = xfs_dir_leaf_remove(args->trans, blk->bp, blk->index); + xfs_da_fixhashpath(state, &state->path); + + /* + * Check to see if the tree needs to be collapsed. + */ + error = 0; + if (retval) { + error = xfs_da_join(state); + } + + xfs_da_state_free(state); + if (error) + return(error); + return(0); +} + +/* + * Look up a filename in a int directory. + * Use an internal routine to actually do all the work. + */ +STATIC int +xfs_dir_node_lookup(xfs_da_args_t *args) +{ + xfs_da_state_t *state; + int retval, error, i; + + state = xfs_da_state_alloc(); + state->args = args; + state->mp = args->dp->i_mount; + state->blocksize = state->mp->m_sb.sb_blocksize; + + /* + * Search to see if name exists, + * and get back a pointer to it. + */ + error = xfs_da_node_lookup_int(state, &retval); + if (error) { + retval = error; + } + + /* + * If not in a transaction, we have to release all the buffers. + */ + for (i = 0; i < state->path.active; i++) { + xfs_da_brelse(args->trans, state->path.blk[i].bp); + state->path.blk[i].bp = NULL; + } + + xfs_da_state_free(state); + return(retval); +} + +/* + * Look up a filename in an int directory, replace the inode number. + * Use an internal routine to actually do the lookup. + */ +STATIC int +xfs_dir_node_replace(xfs_da_args_t *args) +{ + xfs_da_state_t *state; + xfs_da_state_blk_t *blk; + xfs_dir_leafblock_t *leaf; + xfs_dir_leaf_entry_t *entry; + xfs_dir_leaf_name_t *namest; + xfs_ino_t inum; + int retval, error, i; + xfs_dabuf_t *bp; + + state = xfs_da_state_alloc(); + state->args = args; + state->mp = args->dp->i_mount; + state->blocksize = state->mp->m_sb.sb_blocksize; + inum = args->inumber; + + /* + * Search to see if name exists, + * and get back a pointer to it. + */ + error = xfs_da_node_lookup_int(state, &retval); + if (error) { + retval = error; + } + + if (retval == EEXIST) { + blk = &state->path.blk[state->path.active - 1]; + ASSERT(blk->magic == XFS_DIR_LEAF_MAGIC); + bp = blk->bp; + leaf = bp->data; + entry = &leaf->entries[blk->index]; + namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT)); + /* XXX - replace assert ? */ + XFS_DIR_SF_PUT_DIRINO_ARCH(&inum, &namest->inumber, ARCH_CONVERT); + xfs_da_log_buf(args->trans, bp, + XFS_DA_LOGRANGE(leaf, namest, sizeof(namest->inumber))); + xfs_da_buf_done(bp); + blk->bp = NULL; + retval = 0; + } else { + i = state->path.active - 1; + xfs_da_brelse(args->trans, state->path.blk[i].bp); + state->path.blk[i].bp = NULL; + } + for (i = 0; i < state->path.active - 1; i++) { + xfs_da_brelse(args->trans, state->path.blk[i].bp); + state->path.blk[i].bp = NULL; + } + + xfs_da_state_free(state); + return(retval); +} diff --git a/libxfs/xfs_dir2.c b/libxfs/xfs_dir2.c new file mode 100644 index 000000000..72acbb35d --- /dev/null +++ b/libxfs/xfs_dir2.c @@ -0,0 +1,594 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +/* + * XFS v2 directory implmentation. + * Top-level and utility routines. + */ + +#include + + +/* + * Initialize directory-related fields in the mount structure. + */ +void +xfs_dir2_mount( + xfs_mount_t *mp) /* filesystem mount point */ +{ + mp->m_dirversion = 2; + ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <= + XFS_MAX_BLOCKSIZE); + mp->m_dirblksize = 1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog); + mp->m_dirblkfsbs = 1 << mp->m_sb.sb_dirblklog; + mp->m_dirdatablk = XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_DATA_FIRSTDB(mp)); + mp->m_dirleafblk = XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_LEAF_FIRSTDB(mp)); + mp->m_dirfreeblk = XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_FREE_FIRSTDB(mp)); + mp->m_da_node_ents = + (mp->m_dirblksize - (uint)sizeof(xfs_da_node_hdr_t)) / + (uint)sizeof(xfs_da_node_entry_t); + mp->m_dir_magicpct = (mp->m_dirblksize * 37) / 100; +} + +/* + * Initialize a directory with its "." and ".." entries. + */ +int /* error */ +xfs_dir2_init( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *dp, /* incore directory inode */ + xfs_inode_t *pdp) /* incore parent directory inode */ +{ + xfs_da_args_t args; /* operation arguments */ + int error; /* error return value */ + + bzero((char *)&args, sizeof(args)); + args.dp = dp; + args.trans = tp; + ASSERT((dp->i_d.di_mode & IFMT) == IFDIR); + if (error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino)) { +#pragma mips_frequency_hint NEVER + return error; + } + return xfs_dir2_sf_create(&args, pdp->i_ino); +} + +/* + Enter a name in a directory. + */ +STATIC int /* error */ +xfs_dir2_createname( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *dp, /* incore directory inode */ + char *name, /* new entry name */ + int namelen, /* new entry name length */ + xfs_ino_t inum, /* new entry inode number */ + xfs_fsblock_t *first, /* bmap's firstblock */ + xfs_bmap_free_t *flist, /* bmap's freeblock list */ + xfs_extlen_t total) /* bmap's total block count */ +{ + xfs_da_args_t args; /* operation arguments */ + int rval; /* return value */ + int v; /* type-checking value */ + + ASSERT((dp->i_d.di_mode & IFMT) == IFDIR); + if (rval = xfs_dir_ino_validate(tp->t_mountp, inum)) { +#pragma mips_frequency_hint NEVER + return rval; + } + XFS_STATS_INC(xs_dir_create); + /* + * Fill in the arg structure for this request. + */ + args.name = name; + args.namelen = namelen; + args.hashval = xfs_da_hashname(name, namelen); + args.inumber = inum; + args.dp = dp; + args.firstblock = first; + args.flist = flist; + args.total = total; + args.whichfork = XFS_DATA_FORK; + args.trans = tp; + args.justcheck = 0; + args.addname = args.oknoent = 1; + /* + * Decide on what work routines to call based on the inode size. + */ + if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) + rval = xfs_dir2_sf_addname(&args); + else if (rval = xfs_dir2_isblock(tp, dp, &v)) { +#pragma mips_frequency_hint NEVER + return rval; + } else if (v) + rval = xfs_dir2_block_addname(&args); + else if (rval = xfs_dir2_isleaf(tp, dp, &v)) { +#pragma mips_frequency_hint NEVER + return rval; + } else if (v) + rval = xfs_dir2_leaf_addname(&args); + else + rval = xfs_dir2_node_addname(&args); + return rval; +} + +/* + * Lookup a name in a directory, give back the inode number. + */ +STATIC int /* error */ +xfs_dir2_lookup( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *dp, /* incore directory inode */ + char *name, /* lookup name */ + int namelen, /* lookup name length */ + xfs_ino_t *inum) /* out: inode number */ +{ + xfs_da_args_t args; /* operation arguments */ + int rval; /* return value */ + int v; /* type-checking value */ + + ASSERT((dp->i_d.di_mode & IFMT) == IFDIR); + if (namelen >= MAXNAMELEN) { +#pragma mips_frequency_hint NEVER + return XFS_ERROR(EINVAL); + } + XFS_STATS_INC(xs_dir_lookup); + /* + * Fill in the arg structure for this request. + */ + args.name = name; + args.namelen = namelen; + args.hashval = xfs_da_hashname(name, namelen); + args.inumber = 0; + args.dp = dp; + args.firstblock = NULL; + args.flist = NULL; + args.total = 0; + args.whichfork = XFS_DATA_FORK; + args.trans = tp; + args.justcheck = args.addname = 0; + args.oknoent = 1; + /* + * Decide on what work routines to call based on the inode size. + */ + if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) + rval = xfs_dir2_sf_lookup(&args); + else if (rval = xfs_dir2_isblock(tp, dp, &v)) { +#pragma mips_frequency_hint NEVER + return rval; + } else if (v) + rval = xfs_dir2_block_lookup(&args); + else if (rval = xfs_dir2_isleaf(tp, dp, &v)) { +#pragma mips_frequency_hint NEVER + return rval; + } else if (v) + rval = xfs_dir2_leaf_lookup(&args); + else + rval = xfs_dir2_node_lookup(&args); + if (rval == EEXIST) + rval = 0; + if (rval == 0) + *inum = args.inumber; + return rval; +} + +/* + * Remove an entry from a directory. + */ +STATIC int /* error */ +xfs_dir2_removename( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *dp, /* incore directory inode */ + char *name, /* name of entry to remove */ + int namelen, /* name length of entry to remove */ + xfs_ino_t ino, /* inode number of entry to remove */ + xfs_fsblock_t *first, /* bmap's firstblock */ + xfs_bmap_free_t *flist, /* bmap's freeblock list */ + xfs_extlen_t total) /* bmap's total block count */ +{ + xfs_da_args_t args; /* operation arguments */ + int rval; /* return value */ + int v; /* type-checking value */ + + ASSERT((dp->i_d.di_mode & IFMT) == IFDIR); + XFS_STATS_INC(xs_dir_remove); + /* + * Fill in the arg structure for this request. + */ + args.name = name; + args.namelen = namelen; + args.hashval = xfs_da_hashname(name, namelen); + args.inumber = ino; + args.dp = dp; + args.firstblock = first; + args.flist = flist; + args.total = total; + args.whichfork = XFS_DATA_FORK; + args.trans = tp; + args.justcheck = args.addname = args.oknoent = 0; + /* + * Decide on what work routines to call based on the inode size. + */ + if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) + rval = xfs_dir2_sf_removename(&args); + else if (rval = xfs_dir2_isblock(tp, dp, &v)) { +#pragma mips_frequency_hint NEVER + return rval; + } else if (v) + rval = xfs_dir2_block_removename(&args); + else if (rval = xfs_dir2_isleaf(tp, dp, &v)) { +#pragma mips_frequency_hint NEVER + return rval; + } else if (v) + rval = xfs_dir2_leaf_removename(&args); + else + rval = xfs_dir2_node_removename(&args); + return rval; +} + +/* + * Replace the inode number of a directory entry. + */ +STATIC int /* error */ +xfs_dir2_replace( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *dp, /* incore directory inode */ + char *name, /* name of entry to replace */ + int namelen, /* name length of entry to replace */ + xfs_ino_t inum, /* new inode number */ + xfs_fsblock_t *first, /* bmap's firstblock */ + xfs_bmap_free_t *flist, /* bmap's freeblock list */ + xfs_extlen_t total) /* bmap's total block count */ +{ + xfs_da_args_t args; /* operation arguments */ + int rval; /* return value */ + int v; /* type-checking value */ + + ASSERT((dp->i_d.di_mode & IFMT) == IFDIR); + if (namelen >= MAXNAMELEN) { +#pragma mips_frequency_hint NEVER + return XFS_ERROR(EINVAL); + } + if (rval = xfs_dir_ino_validate(tp->t_mountp, inum)) { +#pragma mips_frequency_hint NEVER + return rval; + } + /* + * Fill in the arg structure for this request. + */ + args.name = name; + args.namelen = namelen; + args.hashval = xfs_da_hashname(name, namelen); + args.inumber = inum; + args.dp = dp; + args.firstblock = first; + args.flist = flist; + args.total = total; + args.whichfork = XFS_DATA_FORK; + args.trans = tp; + args.justcheck = args.addname = args.oknoent = 0; + /* + * Decide on what work routines to call based on the inode size. + */ + if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) + rval = xfs_dir2_sf_replace(&args); + else if (rval = xfs_dir2_isblock(tp, dp, &v)) { +#pragma mips_frequency_hint NEVER + return rval; + } else if (v) + rval = xfs_dir2_block_replace(&args); + else if (rval = xfs_dir2_isleaf(tp, dp, &v)) { +#pragma mips_frequency_hint NEVER + return rval; + } else if (v) + rval = xfs_dir2_leaf_replace(&args); + else + rval = xfs_dir2_node_replace(&args); + return rval; +} + +/* + * Utility routines. + */ + +/* + * Add a block to the directory. + * This routine is for data and free blocks, not leaf/node blocks + * which are handled by xfs_da_grow_inode. + */ +int /* error */ +xfs_dir2_grow_inode( + xfs_da_args_t *args, /* operation arguments */ + int space, /* v2 dir's space XFS_DIR2_xxx_SPACE */ + xfs_dir2_db_t *dbp) /* out: block number added */ +{ + xfs_fileoff_t bno; /* directory offset of new block */ + int count; /* count of filesystem blocks */ + xfs_inode_t *dp; /* incore directory inode */ + int error; /* error return value */ + int got; /* blocks actually mapped */ + int i; /* temp mapping index */ + xfs_bmbt_irec_t map; /* single structure for bmap */ + int mapi; /* mapping index */ + xfs_bmbt_irec_t *mapp; /* bmap mapping structure(s) */ + xfs_mount_t *mp; /* filesystem mount point */ + int nmap; /* number of bmap entries */ + xfs_trans_t *tp; /* transaction pointer */ + + xfs_dir2_trace_args_s("grow_inode", args, space); + dp = args->dp; + tp = args->trans; + mp = dp->i_mount; + /* + * Set lowest possible block in the space requested. + */ + bno = XFS_B_TO_FSBT(mp, space * XFS_DIR2_SPACE_SIZE); + count = mp->m_dirblkfsbs; + /* + * Find the first hole for our block. + */ + if (error = xfs_bmap_first_unused(tp, dp, count, &bno, XFS_DATA_FORK)) { +#pragma mips_frequency_hint NEVER + return error; + } + nmap = 1; + ASSERT(args->firstblock != NULL); + /* + * Try mapping the new block contiguously (one extent). + */ + if (error = xfs_bmapi(tp, dp, bno, count, + XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG, + args->firstblock, args->total, &map, &nmap, + args->flist)) { +#pragma mips_frequency_hint NEVER + return error; + } + ASSERT(nmap <= 1); + /* + * Got it in 1. + */ + if (nmap == 1) { + mapp = ↦ + mapi = 1; + } + /* + * Didn't work and this is a multiple-fsb directory block. + * Try again with contiguous flag turned on. + */ + else if (nmap == 0 && count > 1) { +#pragma mips_frequency_hint NEVER + xfs_fileoff_t b; /* current file offset */ + + /* + * Space for maximum number of mappings. + */ + mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP); + /* + * Iterate until we get to the end of our block. + */ + for (b = bno, mapi = 0; b < bno + count; ) { + int c; /* current fsb count */ + + /* + * Can't map more than MAX_NMAP at once. + */ + nmap = MIN(XFS_BMAP_MAX_NMAP, count); + c = (int)(bno + count - b); + if (error = xfs_bmapi(tp, dp, b, c, + XFS_BMAPI_WRITE|XFS_BMAPI_METADATA, + args->firstblock, args->total, + &mapp[mapi], &nmap, args->flist)) { + kmem_free(mapp, sizeof(*mapp) * count); + return error; + } + if (nmap < 1) + break; + /* + * Add this bunch into our table, go to the next offset. + */ + mapi += nmap; + b = mapp[mapi - 1].br_startoff + + mapp[mapi - 1].br_blockcount; + } + } + /* + * Didn't work. + */ + else { +#pragma mips_frequency_hint NEVER + mapi = 0; + mapp = NULL; + } + /* + * See how many fsb's we got. + */ + for (i = 0, got = 0; i < mapi; i++) + got += mapp[i].br_blockcount; + /* + * Didn't get enough fsb's, or the first/last block's are wrong. + */ + if (got != count || mapp[0].br_startoff != bno || + mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount != + bno + count) { +#pragma mips_frequency_hint NEVER + if (mapp != &map) + kmem_free(mapp, sizeof(*mapp) * count); + return XFS_ERROR(ENOSPC); + } + /* + * Done with the temporary mapping table. + */ + if (mapp != &map) + kmem_free(mapp, sizeof(*mapp) * count); + *dbp = XFS_DIR2_DA_TO_DB(mp, (xfs_dablk_t)bno); + /* + * Update file's size if this is the data space and it grew. + */ + if (space == XFS_DIR2_DATA_SPACE) { + xfs_fsize_t size; /* directory file (data) size */ + + size = XFS_FSB_TO_B(mp, bno + count); + if (size > dp->i_d.di_size) { + dp->i_d.di_size = size; + xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); + } + } + return 0; +} + +/* + * See if the directory is a single-block form directory. + */ +int /* error */ +xfs_dir2_isblock( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *dp, /* incore directory inode */ + int *vp) /* out: 1 is block, 0 is not block */ +{ + xfs_fileoff_t last; /* last file offset */ + xfs_mount_t *mp; /* filesystem mount point */ + int rval; /* return value */ + + mp = dp->i_mount; + if (rval = xfs_bmap_last_offset(tp, dp, &last, XFS_DATA_FORK)) { +#pragma mips_frequency_hint NEVER + return rval; + } + rval = XFS_FSB_TO_B(mp, last) == mp->m_dirblksize; + ASSERT(rval == 0 || dp->i_d.di_size == mp->m_dirblksize); + *vp = rval; + return 0; +} + +/* + * See if the directory is a single-leaf form directory. + */ +int /* error */ +xfs_dir2_isleaf( + xfs_trans_t *tp, /* transaction pointer */ + xfs_inode_t *dp, /* incore directory inode */ + int *vp) /* out: 1 is leaf, 0 is not leaf */ +{ + xfs_fileoff_t last; /* last file offset */ + xfs_mount_t *mp; /* filesystem mount point */ + int rval; /* return value */ + + mp = dp->i_mount; + if (rval = xfs_bmap_last_offset(tp, dp, &last, XFS_DATA_FORK)) { +#pragma mips_frequency_hint NEVER + return rval; + } + *vp = last == mp->m_dirleafblk + (1 << mp->m_sb.sb_dirblklog); + return 0; +} + +/* + * Remove the given block from the directory. + * This routine is used for data and free blocks, leaf/node are done + * by xfs_da_shrink_inode. + */ +int +xfs_dir2_shrink_inode( + xfs_da_args_t *args, /* operation arguments */ + xfs_dir2_db_t db, /* directory block number */ + xfs_dabuf_t *bp) /* block's buffer */ +{ + xfs_fileoff_t bno; /* directory file offset */ + xfs_dablk_t da; /* directory file offset */ + int done; /* bunmap is finished */ + xfs_inode_t *dp; /* incore directory inode */ + int error; /* error return value */ + xfs_mount_t *mp; /* filesystem mount point */ + xfs_trans_t *tp; /* transaction pointer */ + + xfs_dir2_trace_args_db("shrink_inode", args, db, bp); + dp = args->dp; + mp = dp->i_mount; + tp = args->trans; + da = XFS_DIR2_DB_TO_DA(mp, db); + /* + * Unmap the fsblock(s). + */ + if (error = xfs_bunmapi(tp, dp, da, mp->m_dirblkfsbs, + XFS_BMAPI_METADATA, 0, args->firstblock, args->flist, + &done)) { +#pragma mips_frequency_hint NEVER + /* + * ENOSPC actually can happen if we're in a removename with + * no space reservation, and the resulting block removal + * would cause a bmap btree split or conversion from extents + * to btree. This can only happen for un-fragmented + * directory blocks, since you need to be punching out + * the middle of an extent. + * In this case we need to leave the block in the file, + * and not binval it. + * So the block has to be in a consistent empty state + * and appropriately logged. + * We don't free up the buffer, the caller can tell it + * hasn't happened since it got an error back. + */ + return error; + } + ASSERT(done); + /* + * Invalidate the buffer from the transaction. + */ + xfs_da_binval(tp, bp); + /* + * If it's not a data block, we're done. + */ + if (db >= XFS_DIR2_LEAF_FIRSTDB(mp)) + return 0; + /* + * If the block isn't the last one in the directory, we're done. + */ + if (dp->i_d.di_size > XFS_DIR2_DB_OFF_TO_BYTE(mp, db + 1, 0)) + return 0; + bno = da; + if (error = xfs_bmap_last_before(tp, dp, &bno, XFS_DATA_FORK)) { +#pragma mips_frequency_hint NEVER + /* + * This can't really happen unless there's kernel corruption. + */ + return error; + } + if (db == mp->m_dirdatablk) + ASSERT(bno == 0); + else + ASSERT(bno > 0); + /* + * Set the size to the new last block. + */ + dp->i_d.di_size = XFS_FSB_TO_B(mp, bno); + xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); + return 0; +} diff --git a/libxfs/xfs_dir2_block.c b/libxfs/xfs_dir2_block.c new file mode 100644 index 000000000..9cf677314 --- /dev/null +++ b/libxfs/xfs_dir2_block.c @@ -0,0 +1,1094 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +/* + * xfs_dir2_block.c + * XFS V2 directory implementation, single-block form. + * See xfs_dir2_block.h for the format. + */ + +#include + +/* + * Add an entry to a block directory. + */ +int /* error */ +xfs_dir2_block_addname( + xfs_da_args_t *args) /* directory op arguments */ +{ + xfs_dir2_data_free_t *bf; /* bestfree table in block */ + xfs_dir2_block_t *block; /* directory block structure */ + xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ + xfs_dabuf_t *bp; /* buffer for block */ + xfs_dir2_block_tail_t *btp; /* block tail */ + int compact; /* need to compact leaf ents */ + xfs_dir2_data_entry_t *dep; /* block data entry */ + xfs_inode_t *dp; /* directory inode */ + xfs_dir2_data_unused_t *dup; /* block unused entry */ + int error; /* error return value */ + xfs_dir2_data_unused_t *enddup; /* unused at end of data */ + xfs_dahash_t hash; /* hash value of found entry */ + int high; /* high index for binary srch */ + int highstale; /* high stale index */ + int lfloghigh; /* last final leaf to log */ + int lfloglow; /* first final leaf to log */ + int len; /* length of the new entry */ + int low; /* low index for binary srch */ + int lowstale; /* low stale index */ + int mid; /* midpoint for binary srch */ + xfs_mount_t *mp; /* filesystem mount point */ + int needlog; /* need to log header */ + int needscan; /* need to rescan freespace */ + xfs_dir2_data_off_t *tagp; /* pointer to tag value */ + xfs_trans_t *tp; /* transaction structure */ + + xfs_dir2_trace_args("block_addname", args); + dp = args->dp; + tp = args->trans; + mp = dp->i_mount; + /* + * Read the (one and only) directory block into dabuf bp. + */ + if (error = + xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, &bp, XFS_DATA_FORK)) { +#pragma mips_frequency_hint NEVER + return error; + } + ASSERT(bp != NULL); + block = bp->data; + /* + * Check the magic number, corrupted if wrong. + */ + if (INT_GET(block->hdr.magic, ARCH_CONVERT) != XFS_DIR2_BLOCK_MAGIC) { +#pragma mips_frequency_hint NEVER + xfs_da_brelse(tp, bp); + return XFS_ERROR(EFSCORRUPTED); + } + len = XFS_DIR2_DATA_ENTSIZE(args->namelen); + /* + * Set up pointers to parts of the block. + */ + bf = block->hdr.bestfree; + btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); + blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT); + /* + * No stale entries? Need space for entry and new leaf. + */ + if (INT_GET(btp->stale, ARCH_CONVERT) == 0) { + /* + * Tag just before the first leaf entry. + */ + tagp = (xfs_dir2_data_off_t *)blp - 1; + /* + * Data object just before the first leaf entry. + */ + enddup = (xfs_dir2_data_unused_t *)((char *)block + INT_GET(*tagp, ARCH_CONVERT)); + /* + * If it's not free then can't do this add without cleaning up: + * the space before the first leaf entry needs to be free so it + * can be expanded to hold the pointer to the new entry. + */ + if (INT_GET(enddup->freetag, ARCH_CONVERT) != XFS_DIR2_DATA_FREE_TAG) + dup = enddup = NULL; + /* + * Check out the biggest freespace and see if it's the same one. + */ + else { + dup = (xfs_dir2_data_unused_t *) + ((char *)block + INT_GET(bf[0].offset, ARCH_CONVERT)); + if (dup == enddup) { + /* + * It is the biggest freespace, is it too small + * to hold the new leaf too? + */ + if (INT_GET(dup->length, ARCH_CONVERT) < len + (uint)sizeof(*blp)) { +#pragma mips_frequency_hint NEVER + /* + * Yes, we use the second-largest + * entry instead if it works. + */ + if (INT_GET(bf[1].length, ARCH_CONVERT) >= len) + dup = (xfs_dir2_data_unused_t *) + ((char *)block + + INT_GET(bf[1].offset, ARCH_CONVERT)); + else + dup = NULL; + } + } else { + /* + * Not the same free entry, + * just check its length. + */ + if (INT_GET(dup->length, ARCH_CONVERT) < len) { +#pragma mips_frequency_hint NEVER + dup = NULL; + } + } + } + compact = 0; + } + /* + * If there are stale entries we'll use one for the leaf. + * Is the biggest entry enough to avoid compaction? + */ + else if (INT_GET(bf[0].length, ARCH_CONVERT) >= len) { + dup = (xfs_dir2_data_unused_t *) + ((char *)block + INT_GET(bf[0].offset, ARCH_CONVERT)); + compact = 0; + } + /* + * Will need to compact to make this work. + */ + else { +#pragma mips_frequency_hint NEVER + /* + * Tag just before the first leaf entry. + */ + tagp = (xfs_dir2_data_off_t *)blp - 1; + /* + * Data object just before the first leaf entry. + */ + dup = (xfs_dir2_data_unused_t *)((char *)block + INT_GET(*tagp, ARCH_CONVERT)); + /* + * If it's not free then the data will go where the + * leaf data starts now, if it works at all. + */ + if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) { + if (INT_GET(dup->length, ARCH_CONVERT) + (INT_GET(btp->stale, ARCH_CONVERT) - 1) * + (uint)sizeof(*blp) < len) + dup = NULL; + } else if ((INT_GET(btp->stale, ARCH_CONVERT) - 1) * (uint)sizeof(*blp) < len) + dup = NULL; + else + dup = (xfs_dir2_data_unused_t *)blp; + compact = 1; + } + /* + * If this isn't a real add, we're done with the buffer. + */ + if (args->justcheck) + xfs_da_brelse(tp, bp); + /* + * If we don't have space for the new entry & leaf ... + */ + if (!dup) { +#pragma mips_frequency_hint NEVER + /* + * Not trying to actually do anything, or don't have + * a space reservation: return no-space. + */ + if (args->justcheck || args->total == 0) + return XFS_ERROR(ENOSPC); + /* + * Convert to the next larger format. + * Then add the new entry in that format. + */ + error = xfs_dir2_block_to_leaf(args, bp); + xfs_da_buf_done(bp); + if (error) + return error; + return xfs_dir2_leaf_addname(args); + } + /* + * Just checking, and it would work, so say so. + */ + if (args->justcheck) + return 0; + needlog = needscan = 0; + /* + * If need to compact the leaf entries, do it now. + * Leave the highest-numbered stale entry stale. + * XXX should be the one closest to mid but mid is not yet computed. + */ + if (compact) { +#pragma mips_frequency_hint NEVER + int fromidx; /* source leaf index */ + int toidx; /* target leaf index */ + + for (fromidx = toidx = INT_GET(btp->count, ARCH_CONVERT) - 1, + highstale = lfloghigh = -1; + fromidx >= 0; + fromidx--) { + if (INT_GET(blp[fromidx].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR) { + if (highstale == -1) + highstale = toidx; + else { + if (lfloghigh == -1) + lfloghigh = toidx; + continue; + } + } + if (fromidx < toidx) + blp[toidx] = blp[fromidx]; + toidx--; + } + lfloglow = toidx + 1 - (INT_GET(btp->stale, ARCH_CONVERT) - 1); + lfloghigh -= INT_GET(btp->stale, ARCH_CONVERT) - 1; + INT_MOD(btp->count, ARCH_CONVERT, -(INT_GET(btp->stale, ARCH_CONVERT) - 1)); + xfs_dir2_data_make_free(tp, bp, + (xfs_dir2_data_aoff_t)((char *)blp - (char *)block), + (xfs_dir2_data_aoff_t)((INT_GET(btp->stale, ARCH_CONVERT) - 1) * sizeof(*blp)), + &needlog, &needscan); + blp += INT_GET(btp->stale, ARCH_CONVERT) - 1; + INT_SET(btp->stale, ARCH_CONVERT, 1); + /* + * If we now need to rebuild the bestfree map, do so. + * This needs to happen before the next call to use_free. + */ + if (needscan) { + xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, + &needlog, NULL); + needscan = 0; + } + } + /* + * Set leaf logging boundaries to impossible state. + * For the no-stale case they're set explicitly. + */ + else if (INT_GET(btp->stale, ARCH_CONVERT)) { + lfloglow = INT_GET(btp->count, ARCH_CONVERT); + lfloghigh = -1; + } + /* + * Find the slot that's first lower than our hash value, -1 if none. + */ + for (low = 0, high = INT_GET(btp->count, ARCH_CONVERT) - 1; low <= high; ) { + mid = (low + high) >> 1; + if ((hash = INT_GET(blp[mid].hashval, ARCH_CONVERT)) == args->hashval) + break; + if (hash < args->hashval) + low = mid + 1; + else + high = mid - 1; + } + while (mid >= 0 && INT_GET(blp[mid].hashval, ARCH_CONVERT) >= args->hashval) { +#pragma mips_frequency_hint NEVER + mid--; + } + /* + * No stale entries, will use enddup space to hold new leaf. + */ + if (INT_GET(btp->stale, ARCH_CONVERT) == 0) { + /* + * Mark the space needed for the new leaf entry, now in use. + */ + xfs_dir2_data_use_free(tp, bp, enddup, + (xfs_dir2_data_aoff_t) + ((char *)enddup - (char *)block + INT_GET(enddup->length, ARCH_CONVERT) - + sizeof(*blp)), + (xfs_dir2_data_aoff_t)sizeof(*blp), + &needlog, &needscan); + /* + * Update the tail (entry count). + */ + INT_MOD(btp->count, ARCH_CONVERT, +1); + /* + * If we now need to rebuild the bestfree map, do so. + * This needs to happen before the next call to use_free. + */ + if (needscan) { + xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, + &needlog, NULL); + needscan = 0; + } + /* + * Adjust pointer to the first leaf entry, we're about to move + * the table up one to open up space for the new leaf entry. + * Then adjust our index to match. + */ + blp--; + mid++; + if (mid) + ovbcopy(&blp[1], blp, mid * sizeof(*blp)); + lfloglow = 0; + lfloghigh = mid; + } + /* + * Use a stale leaf for our new entry. + */ + else { + for (lowstale = mid; + lowstale >= 0 && + INT_GET(blp[lowstale].address, ARCH_CONVERT) != XFS_DIR2_NULL_DATAPTR; + lowstale--) + continue; + for (highstale = mid + 1; + highstale < INT_GET(btp->count, ARCH_CONVERT) && + INT_GET(blp[highstale].address, ARCH_CONVERT) != XFS_DIR2_NULL_DATAPTR && + (lowstale < 0 || mid - lowstale > highstale - mid); + highstale++) + continue; + /* + * Move entries toward the low-numbered stale entry. + */ + if (lowstale >= 0 && + (highstale == INT_GET(btp->count, ARCH_CONVERT) || + mid - lowstale <= highstale - mid)) { + if (mid - lowstale) + ovbcopy(&blp[lowstale + 1], &blp[lowstale], + (mid - lowstale) * sizeof(*blp)); + lfloglow = MIN(lowstale, lfloglow); + lfloghigh = MAX(mid, lfloghigh); + } + /* + * Move entries toward the high-numbered stale entry. + */ + else { + ASSERT(highstale < INT_GET(btp->count, ARCH_CONVERT)); + mid++; + if (highstale - mid) + ovbcopy(&blp[mid], &blp[mid + 1], + (highstale - mid) * sizeof(*blp)); + lfloglow = MIN(mid, lfloglow); + lfloghigh = MAX(highstale, lfloghigh); + } + INT_MOD(btp->stale, ARCH_CONVERT, -1); + } + /* + * Point to the new data entry. + */ + dep = (xfs_dir2_data_entry_t *)dup; + /* + * Fill in the leaf entry. + */ + INT_SET(blp[mid].hashval, ARCH_CONVERT, args->hashval); + INT_SET(blp[mid].address, ARCH_CONVERT, XFS_DIR2_BYTE_TO_DATAPTR(mp, (char *)dep - (char *)block)); + xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh); + /* + * Mark space for the data entry used. + */ + xfs_dir2_data_use_free(tp, bp, dup, + (xfs_dir2_data_aoff_t)((char *)dup - (char *)block), + (xfs_dir2_data_aoff_t)len, &needlog, &needscan); + /* + * Create the new data entry. + */ + INT_SET(dep->inumber, ARCH_CONVERT, args->inumber); + dep->namelen = args->namelen; + bcopy(args->name, dep->name, args->namelen); + tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); + INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)block)); + /* + * Clean up the bestfree array and log the header, tail, and entry. + */ + if (needscan) + xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog, + NULL); + if (needlog) + xfs_dir2_data_log_header(tp, bp); + xfs_dir2_block_log_tail(tp, bp); + xfs_dir2_data_log_entry(tp, bp, dep); + xfs_dir2_data_check(dp, bp); + xfs_da_buf_done(bp); + return 0; +} + +/* + * Log leaf entries from the block. + */ +STATIC void +xfs_dir2_block_log_leaf( + xfs_trans_t *tp, /* transaction structure */ + xfs_dabuf_t *bp, /* block buffer */ + int first, /* index of first logged leaf */ + int last) /* index of last logged leaf */ +{ + xfs_dir2_block_t *block; /* directory block structure */ + xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ + xfs_dir2_block_tail_t *btp; /* block tail */ + xfs_mount_t *mp; /* filesystem mount point */ + + mp = tp->t_mountp; + block = bp->data; + btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); + blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT); + xfs_da_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)block), + (uint)((char *)&blp[last + 1] - (char *)block - 1)); +} + +/* + * Log the block tail. + */ +STATIC void +xfs_dir2_block_log_tail( + xfs_trans_t *tp, /* transaction structure */ + xfs_dabuf_t *bp) /* block buffer */ +{ + xfs_dir2_block_t *block; /* directory block structure */ + xfs_dir2_block_tail_t *btp; /* block tail */ + xfs_mount_t *mp; /* filesystem mount point */ + + mp = tp->t_mountp; + block = bp->data; + btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); + xfs_da_log_buf(tp, bp, (uint)((char *)btp - (char *)block), + (uint)((char *)(btp + 1) - (char *)block - 1)); +} + +/* + * Look up an entry in the block. This is the external routine, + * xfs_dir2_block_lookup_int does the real work. + */ +int /* error */ +xfs_dir2_block_lookup( + xfs_da_args_t *args) /* dir lookup arguments */ +{ + xfs_dir2_block_t *block; /* block structure */ + xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ + xfs_dabuf_t *bp; /* block buffer */ + xfs_dir2_block_tail_t *btp; /* block tail */ + xfs_dir2_data_entry_t *dep; /* block data entry */ + xfs_inode_t *dp; /* incore inode */ + int ent; /* entry index */ + int error; /* error return value */ + xfs_mount_t *mp; /* filesystem mount point */ + + xfs_dir2_trace_args("block_lookup", args); + /* + * Get the buffer, look up the entry. + * If not found (ENOENT) then return, have no buffer. + */ + if (error = xfs_dir2_block_lookup_int(args, &bp, &ent)) + return error; + dp = args->dp; + mp = dp->i_mount; + block = bp->data; + xfs_dir2_data_check(dp, bp); + btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); + blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT); + /* + * Get the offset from the leaf entry, to point to the data. + */ + dep = (xfs_dir2_data_entry_t *) + ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(blp[ent].address, ARCH_CONVERT))); + /* + * Fill in inode number, release the block. + */ + args->inumber = INT_GET(dep->inumber, ARCH_CONVERT); + xfs_da_brelse(args->trans, bp); + return XFS_ERROR(EEXIST); +} + +/* + * Internal block lookup routine. + */ +STATIC int /* error */ +xfs_dir2_block_lookup_int( + xfs_da_args_t *args, /* dir lookup arguments */ + xfs_dabuf_t **bpp, /* returned block buffer */ + int *entno) /* returned entry number */ +{ + xfs_dir2_dataptr_t addr; /* data entry address */ + xfs_dir2_block_t *block; /* block structure */ + xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ + xfs_dabuf_t *bp; /* block buffer */ + xfs_dir2_block_tail_t *btp; /* block tail */ + xfs_dir2_data_entry_t *dep; /* block data entry */ + xfs_inode_t *dp; /* incore inode */ + int error; /* error return value */ + xfs_dahash_t hash; /* found hash value */ + int high; /* binary search high index */ + int low; /* binary search low index */ + int mid; /* binary search current idx */ + xfs_mount_t *mp; /* filesystem mount point */ + xfs_trans_t *tp; /* transaction pointer */ + + dp = args->dp; + tp = args->trans; + mp = dp->i_mount; + /* + * Read the buffer, return error if we can't get it. + */ + if (error = + xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, &bp, XFS_DATA_FORK)) { +#pragma mips_frequency_hint NEVER + return error; + } + ASSERT(bp != NULL); + block = bp->data; + xfs_dir2_data_check(dp, bp); + btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); + blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT); + /* + * Loop doing a binary search for our hash value. + * Find our entry, ENOENT if it's not there. + */ + for (low = 0, high = INT_GET(btp->count, ARCH_CONVERT) - 1; ; ) { + ASSERT(low <= high); + mid = (low + high) >> 1; + if ((hash = INT_GET(blp[mid].hashval, ARCH_CONVERT)) == args->hashval) + break; + if (hash < args->hashval) + low = mid + 1; + else + high = mid - 1; + if (low > high) { + ASSERT(args->oknoent); + xfs_da_brelse(tp, bp); + return XFS_ERROR(ENOENT); + } + } + /* + * Back up to the first one with the right hash value. + */ + while (mid > 0 && INT_GET(blp[mid - 1].hashval, ARCH_CONVERT) == args->hashval) { +#pragma mips_frequency_hint NEVER + mid--; + } + /* + * Now loop forward through all the entries with the + * right hash value looking for our name. + */ + do { + if ((addr = INT_GET(blp[mid].address, ARCH_CONVERT)) == XFS_DIR2_NULL_DATAPTR) + continue; + /* + * Get pointer to the entry from the leaf. + */ + dep = (xfs_dir2_data_entry_t *) + ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, addr)); + /* + * Compare, if it's right give back buffer & entry number. + */ + if (dep->namelen == args->namelen && + dep->name[0] == args->name[0] && + bcmp(dep->name, args->name, args->namelen) == 0) { + *bpp = bp; + *entno = mid; + return 0; + } + } while (++mid < INT_GET(btp->count, ARCH_CONVERT) && INT_GET(blp[mid].hashval, ARCH_CONVERT) == hash); + /* + * No match, release the buffer and return ENOENT. + */ + ASSERT(args->oknoent); + xfs_da_brelse(tp, bp); + return XFS_ERROR(ENOENT); +} + +/* + * Remove an entry from a block format directory. + * If that makes the block small enough to fit in shortform, transform it. + */ +int /* error */ +xfs_dir2_block_removename( + xfs_da_args_t *args) /* directory operation args */ +{ + xfs_dir2_block_t *block; /* block structure */ + xfs_dir2_leaf_entry_t *blp; /* block leaf pointer */ + xfs_dabuf_t *bp; /* block buffer */ + xfs_dir2_block_tail_t *btp; /* block tail */ + xfs_dir2_data_entry_t *dep; /* block data entry */ + xfs_inode_t *dp; /* incore inode */ + int ent; /* block leaf entry index */ + int error; /* error return value */ + xfs_mount_t *mp; /* filesystem mount point */ + int needlog; /* need to log block header */ + int needscan; /* need to fixup bestfree */ + xfs_dir2_sf_hdr_t sfh; /* shortform header */ + int size; /* shortform size */ + xfs_trans_t *tp; /* transaction pointer */ + + xfs_dir2_trace_args("block_removename", args); + /* + * Look up the entry in the block. Gets the buffer and entry index. + * It will always be there, the vnodeops level does a lookup first. + */ + if (error = xfs_dir2_block_lookup_int(args, &bp, &ent)) { +#pragma mips_frequency_hint NEVER + return error; + } + dp = args->dp; + tp = args->trans; + mp = dp->i_mount; + block = bp->data; + btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); + blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT); + /* + * Point to the data entry using the leaf entry. + */ + dep = (xfs_dir2_data_entry_t *) + ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(blp[ent].address, ARCH_CONVERT))); + /* + * Mark the data entry's space free. + */ + needlog = needscan = 0; + xfs_dir2_data_make_free(tp, bp, + (xfs_dir2_data_aoff_t)((char *)dep - (char *)block), + XFS_DIR2_DATA_ENTSIZE(dep->namelen), &needlog, &needscan); + /* + * Fix up the block tail. + */ + INT_MOD(btp->stale, ARCH_CONVERT, +1); + xfs_dir2_block_log_tail(tp, bp); + /* + * Remove the leaf entry by marking it stale. + */ + INT_SET(blp[ent].address, ARCH_CONVERT, XFS_DIR2_NULL_DATAPTR); + xfs_dir2_block_log_leaf(tp, bp, ent, ent); + /* + * Fix up bestfree, log the header if necessary. + */ + if (needscan) + xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog, + NULL); + if (needlog) + xfs_dir2_data_log_header(tp, bp); + xfs_dir2_data_check(dp, bp); + /* + * See if the size as a shortform is good enough. + */ + if ((size = xfs_dir2_block_sfsize(dp, block, &sfh)) > + XFS_IFORK_DSIZE(dp)) { + xfs_da_buf_done(bp); + return 0; + } + /* + * If it works, do the conversion. + */ + return xfs_dir2_block_to_sf(args, bp, size, &sfh); +} + +/* + * Replace an entry in a V2 block directory. + * Change the inode number to the new value. + */ +int /* error */ +xfs_dir2_block_replace( + xfs_da_args_t *args) /* directory operation args */ +{ + xfs_dir2_block_t *block; /* block structure */ + xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ + xfs_dabuf_t *bp; /* block buffer */ + xfs_dir2_block_tail_t *btp; /* block tail */ + xfs_dir2_data_entry_t *dep; /* block data entry */ + xfs_inode_t *dp; /* incore inode */ + int ent; /* leaf entry index */ + int error; /* error return value */ + xfs_mount_t *mp; /* filesystem mount point */ + + xfs_dir2_trace_args("block_replace", args); + /* + * Lookup the entry in the directory. Get buffer and entry index. + * This will always succeed since the caller has already done a lookup. + */ + if (error = xfs_dir2_block_lookup_int(args, &bp, &ent)) { +#pragma mips_frequency_hint NEVER + return error; + } + dp = args->dp; + mp = dp->i_mount; + block = bp->data; + btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); + blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT); + /* + * Point to the data entry we need to change. + */ + dep = (xfs_dir2_data_entry_t *) + ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(blp[ent].address, ARCH_CONVERT))); + ASSERT(INT_GET(dep->inumber, ARCH_CONVERT) != args->inumber); + /* + * Change the inode number to the new value. + */ + INT_SET(dep->inumber, ARCH_CONVERT, args->inumber); + xfs_dir2_data_log_entry(args->trans, bp, dep); + xfs_dir2_data_check(dp, bp); + xfs_da_buf_done(bp); + return 0; +} + +/* + * Qsort comparison routine for the block leaf entries. + */ +static int /* sort order */ +xfs_dir2_block_sort( + const void *a, /* first leaf entry */ + const void *b) /* second leaf entry */ +{ + const xfs_dir2_leaf_entry_t *la; /* first leaf entry */ + const xfs_dir2_leaf_entry_t *lb; /* second leaf entry */ + + la = a; + lb = b; + return INT_GET(la->hashval, ARCH_CONVERT) < INT_GET(lb->hashval, ARCH_CONVERT) ? -1 : + (INT_GET(la->hashval, ARCH_CONVERT) > INT_GET(lb->hashval, ARCH_CONVERT) ? 1 : 0); +} + +/* + * Convert a V2 leaf directory to a V2 block directory if possible. + */ +int /* error */ +xfs_dir2_leaf_to_block( + xfs_da_args_t *args, /* operation arguments */ + xfs_dabuf_t *lbp, /* leaf buffer */ + xfs_dabuf_t *dbp) /* data buffer */ +{ + xfs_dir2_data_off_t *bestsp; /* leaf bests table */ + xfs_dir2_block_t *block; /* block structure */ + xfs_dir2_block_tail_t *btp; /* block tail */ + xfs_inode_t *dp; /* incore directory inode */ + xfs_dir2_data_unused_t *dup; /* unused data entry */ + int error; /* error return value */ + int from; /* leaf from index */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + xfs_dir2_leaf_entry_t *lep; /* leaf entry */ + xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ + xfs_mount_t *mp; /* file system mount point */ + int needlog; /* need to log data header */ + int needscan; /* need to scan for bestfree */ + xfs_dir2_sf_hdr_t sfh; /* shortform header */ + int size; /* bytes used */ + xfs_dir2_data_off_t *tagp; /* end of entry (tag) */ + int to; /* block/leaf to index */ + xfs_trans_t *tp; /* transaction pointer */ + + xfs_dir2_trace_args_bb("leaf_to_block", args, lbp, dbp); + dp = args->dp; + tp = args->trans; + mp = dp->i_mount; + leaf = lbp->data; + ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC); + ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); + /* + * If there are data blocks other than the first one, take this + * opportunity to remove trailing empty data blocks that may have + * been left behind during no-space-reservation operations. + * These will show up in the leaf bests table. + */ + while (dp->i_d.di_size > mp->m_dirblksize) { + bestsp = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT); + if (INT_GET(bestsp[INT_GET(ltp->bestcount, ARCH_CONVERT) - 1], ARCH_CONVERT) == + mp->m_dirblksize - (uint)sizeof(block->hdr)) { +#pragma mips_frequency_hint NEVER + if (error = + xfs_dir2_leaf_trim_data(args, lbp, + (xfs_dir2_db_t)(INT_GET(ltp->bestcount, ARCH_CONVERT) - 1))) + goto out; + } else { + error = 0; + goto out; + } + } + /* + * Read the data block if we don't already have it, give up if it fails. + */ + if (dbp == NULL && + (error = xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, &dbp, + XFS_DATA_FORK))) { +#pragma mips_frequency_hint NEVER + goto out; + } + block = dbp->data; + ASSERT(INT_GET(block->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC); + /* + * Size of the "leaf" area in the block. + */ + size = (uint)sizeof(block->tail) + + (uint)sizeof(*lep) * (INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT)); + /* + * Look at the last data entry. + */ + tagp = (xfs_dir2_data_off_t *)((char *)block + mp->m_dirblksize) - 1; + dup = (xfs_dir2_data_unused_t *)((char *)block + INT_GET(*tagp, ARCH_CONVERT)); + /* + * If it's not free or is too short we can't do it. + */ + if (INT_GET(dup->freetag, ARCH_CONVERT) != XFS_DIR2_DATA_FREE_TAG || INT_GET(dup->length, ARCH_CONVERT) < size) { + error = 0; + goto out; + } + /* + * Start converting it to block form. + */ + INT_SET(block->hdr.magic, ARCH_CONVERT, XFS_DIR2_BLOCK_MAGIC); + needlog = 1; + needscan = 0; + /* + * Use up the space at the end of the block (blp/btp). + */ + xfs_dir2_data_use_free(tp, dbp, dup, mp->m_dirblksize - size, size, + &needlog, &needscan); + /* + * Initialize the block tail. + */ + btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); + INT_SET(btp->count, ARCH_CONVERT, INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT)); + INT_SET(btp->stale, ARCH_CONVERT, 0); + xfs_dir2_block_log_tail(tp, dbp); + /* + * Initialize the block leaf area. We compact out stale entries. + */ + lep = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT); + for (from = to = 0; from < INT_GET(leaf->hdr.count, ARCH_CONVERT); from++) { + if (INT_GET(leaf->ents[from].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR) + continue; + lep[to++] = leaf->ents[from]; + } + ASSERT(to == INT_GET(btp->count, ARCH_CONVERT)); + xfs_dir2_block_log_leaf(tp, dbp, 0, INT_GET(btp->count, ARCH_CONVERT) - 1); + /* + * Scan the bestfree if we need it and log the data block header. + */ + if (needscan) + xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog, + NULL); + if (needlog) + xfs_dir2_data_log_header(tp, dbp); + /* + * Pitch the old leaf block. + */ + error = xfs_da_shrink_inode(args, mp->m_dirleafblk, lbp); + lbp = NULL; + if (error) { +#pragma mips_frequency_hint NEVER + goto out; + } + /* + * Now see if the resulting block can be shrunken to shortform. + */ + if ((size = xfs_dir2_block_sfsize(dp, block, &sfh)) > + XFS_IFORK_DSIZE(dp)) { + error = 0; + goto out; + } + return xfs_dir2_block_to_sf(args, dbp, size, &sfh); +out: + if (lbp) + xfs_da_buf_done(lbp); + if (dbp) + xfs_da_buf_done(dbp); + return error; +} + +/* + * Convert the shortform directory to block form. + */ +int /* error */ +xfs_dir2_sf_to_block( + xfs_da_args_t *args) /* operation arguments */ +{ + xfs_dir2_db_t blkno; /* dir-relative block # (0) */ + xfs_dir2_block_t *block; /* block structure */ + xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ + xfs_dabuf_t *bp; /* block buffer */ + xfs_dir2_block_tail_t *btp; /* block tail pointer */ + char buf[XFS_DIR2_SF_MAX_SIZE]; /* sf buffer */ + xfs_dir2_data_entry_t *dep; /* data entry pointer */ + xfs_inode_t *dp; /* incore directory inode */ + int dummy; /* trash */ + xfs_dir2_data_unused_t *dup; /* unused entry pointer */ + int endoffset; /* end of data objects */ + int error; /* error return value */ + int i; /* index */ + xfs_mount_t *mp; /* filesystem mount point */ + int needlog; /* need to log block header */ + int needscan; /* need to scan block freespc */ + int newoffset; /* offset from current entry */ + int offset; /* target block offset */ + xfs_dir2_sf_entry_t *sfep; /* sf entry pointer */ + xfs_dir2_sf_t *sfp; /* shortform structure */ + xfs_dir2_data_off_t *tagp; /* end of data entry */ + xfs_trans_t *tp; /* transaction pointer */ + + xfs_dir2_trace_args("sf_to_block", args); + dp = args->dp; + tp = args->trans; + mp = dp->i_mount; + ASSERT(dp->i_df.if_flags & XFS_IFINLINE); + /* + * Bomb out if the shortform directory is way too short. + */ + if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { +#pragma mips_frequency_hint NEVER + ASSERT(XFS_FORCED_SHUTDOWN(mp)); + return XFS_ERROR(EIO); + } + ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); + ASSERT(dp->i_df.if_u1.if_data != NULL); + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count)); + /* + * Copy the directory into the stack buffer. + * Then pitch the incore inode data so we can make extents. + */ + bcopy(sfp, buf, dp->i_df.if_bytes); + xfs_idata_realloc(dp, -dp->i_df.if_bytes, XFS_DATA_FORK); + dp->i_d.di_size = 0; + xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); + /* + * Reset pointer - old sfp is gone. + */ + sfp = (xfs_dir2_sf_t *)buf; + /* + * Add block 0 to the inode. + */ + error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno); + if (error) { +#pragma mips_frequency_hint NEVER + return error; + } + /* + * Initialize the data block. + */ + error = xfs_dir2_data_init(args, blkno, &bp); + if (error) { +#pragma mips_frequency_hint NEVER + return error; + } + block = bp->data; + INT_SET(block->hdr.magic, ARCH_CONVERT, XFS_DIR2_BLOCK_MAGIC); + /* + * Compute size of block "tail" area. + */ + i = (uint)sizeof(*btp) + + (INT_GET(sfp->hdr.count, ARCH_CONVERT) + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t); + /* + * The whole thing is initialized to free by the init routine. + * Say we're using the leaf and tail area. + */ + dup = (xfs_dir2_data_unused_t *)block->u; + needlog = needscan = 0; + xfs_dir2_data_use_free(tp, bp, dup, mp->m_dirblksize - i, i, &needlog, + &needscan); + ASSERT(needscan == 0); + /* + * Fill in the tail. + */ + btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); + INT_SET(btp->count, ARCH_CONVERT, INT_GET(sfp->hdr.count, ARCH_CONVERT) + 2); /* ., .. */ + INT_ZERO(btp->stale, ARCH_CONVERT); + blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT); + endoffset = (uint)((char *)blp - (char *)block); + /* + * Remove the freespace, we'll manage it. + */ + xfs_dir2_data_use_free(tp, bp, dup, + (xfs_dir2_data_aoff_t)((char *)dup - (char *)block), + INT_GET(dup->length, ARCH_CONVERT), &needlog, &needscan); + /* + * Create entry for . + */ + dep = (xfs_dir2_data_entry_t *) + ((char *)block + XFS_DIR2_DATA_DOT_OFFSET); + INT_SET(dep->inumber, ARCH_CONVERT, dp->i_ino); + dep->namelen = 1; + dep->name[0] = '.'; + tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); + INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)block)); + xfs_dir2_data_log_entry(tp, bp, dep); + INT_SET(blp[0].hashval, ARCH_CONVERT, xfs_dir_hash_dot); + INT_SET(blp[0].address, ARCH_CONVERT, XFS_DIR2_BYTE_TO_DATAPTR(mp, (char *)dep - (char *)block)); + /* + * Create entry for .. + */ + dep = (xfs_dir2_data_entry_t *) + ((char *)block + XFS_DIR2_DATA_DOTDOT_OFFSET); + INT_SET(dep->inumber, ARCH_CONVERT, XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, &sfp->hdr.parent, ARCH_CONVERT)); + dep->namelen = 2; + dep->name[0] = dep->name[1] = '.'; + tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); + INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)block)); + xfs_dir2_data_log_entry(tp, bp, dep); + INT_SET(blp[1].hashval, ARCH_CONVERT, xfs_dir_hash_dotdot); + INT_SET(blp[1].address, ARCH_CONVERT, XFS_DIR2_BYTE_TO_DATAPTR(mp, (char *)dep - (char *)block)); + offset = XFS_DIR2_DATA_FIRST_OFFSET; + /* + * Loop over existing entries, stuff them in. + */ + if ((i = 0) == INT_GET(sfp->hdr.count, ARCH_CONVERT)) + sfep = NULL; + else + sfep = XFS_DIR2_SF_FIRSTENTRY(sfp); + /* + * Need to preserve the existing offset values in the sf directory. + * Insert holes (unused entries) where necessary. + */ + while (offset < endoffset) { + /* + * sfep is null when we reach the end of the list. + */ + if (sfep == NULL) + newoffset = endoffset; + else + newoffset = XFS_DIR2_SF_GET_OFFSET_ARCH(sfep, ARCH_CONVERT); + /* + * There should be a hole here, make one. + */ + if (offset < newoffset) { + dup = (xfs_dir2_data_unused_t *) + ((char *)block + offset); + INT_SET(dup->freetag, ARCH_CONVERT, XFS_DIR2_DATA_FREE_TAG); + INT_SET(dup->length, ARCH_CONVERT, newoffset - offset); + INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT), ARCH_CONVERT, + (xfs_dir2_data_off_t) + ((char *)dup - (char *)block)); + xfs_dir2_data_log_unused(tp, bp, dup); + (void)xfs_dir2_data_freeinsert((xfs_dir2_data_t *)block, + dup, &dummy); + offset += INT_GET(dup->length, ARCH_CONVERT); + continue; + } + /* + * Copy a real entry. + */ + dep = (xfs_dir2_data_entry_t *)((char *)block + newoffset); + INT_SET(dep->inumber, ARCH_CONVERT, XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, + XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT)); + dep->namelen = sfep->namelen; + bcopy(sfep->name, dep->name, dep->namelen); + tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); + INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)block)); + xfs_dir2_data_log_entry(tp, bp, dep); + INT_SET(blp[2 + i].hashval, ARCH_CONVERT, xfs_da_hashname((char *)sfep->name, sfep->namelen)); + INT_SET(blp[2 + i].address, ARCH_CONVERT, XFS_DIR2_BYTE_TO_DATAPTR(mp, + (char *)dep - (char *)block)); + offset = (int)((char *)(tagp + 1) - (char *)block); + if (++i == INT_GET(sfp->hdr.count, ARCH_CONVERT)) + sfep = NULL; + else + sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep); + } + /* + * Sort the leaf entries by hash value. + */ + qsort(blp, INT_GET(btp->count, ARCH_CONVERT), sizeof(*blp), xfs_dir2_block_sort); + /* + * Log the leaf entry area and tail. + * Already logged the header in data_init, ignore needlog. + */ + ASSERT(needscan == 0); + xfs_dir2_block_log_leaf(tp, bp, 0, INT_GET(btp->count, ARCH_CONVERT) - 1); + xfs_dir2_block_log_tail(tp, bp); + xfs_dir2_data_check(dp, bp); + xfs_da_buf_done(bp); + return 0; +} diff --git a/libxfs/xfs_dir2_data.c b/libxfs/xfs_dir2_data.c new file mode 100644 index 000000000..d921a21cc --- /dev/null +++ b/libxfs/xfs_dir2_data.c @@ -0,0 +1,832 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +/* + * xfs_dir2_data.c + * Core data block handling routines for XFS V2 directories. + * See xfs_dir2_data.h for data structures. + */ +#include + +#ifdef DEBUG +/* + * Check the consistency of the data block. + * The input can also be a block-format directory. + * Pop an assert if we find anything bad. + */ +void +xfs_dir2_data_check( + xfs_inode_t *dp, /* incore inode pointer */ + xfs_dabuf_t *bp) /* data block's buffer */ +{ + xfs_dir2_dataptr_t addr; /* addr for leaf lookup */ + xfs_dir2_data_free_t *bf; /* bestfree table */ + xfs_dir2_block_tail_t *btp; /* block tail */ + int count; /* count of entries found */ + xfs_dir2_data_t *d; /* data block pointer */ + xfs_dir2_data_entry_t *dep; /* data entry */ + xfs_dir2_data_free_t *dfp; /* bestfree entry */ + xfs_dir2_data_unused_t *dup; /* unused entry */ + char *endp; /* end of useful data */ + int freeseen; /* mask of bestfrees seen */ + xfs_dahash_t hash; /* hash of current name */ + int i; /* leaf index */ + int lastfree; /* last entry was unused */ + xfs_dir2_leaf_entry_t *lep; /* block leaf entries */ + xfs_mount_t *mp; /* filesystem mount point */ + char *p; /* current data position */ + int stale; /* count of stale leaves */ + + mp = dp->i_mount; + d = bp->data; + ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC || + INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC); + bf = d->hdr.bestfree; + p = (char *)d->u; + if (INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC) { + btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d); + lep = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT); + endp = (char *)lep; + } else + endp = (char *)d + mp->m_dirblksize; + count = lastfree = freeseen = 0; + /* + * Account for zero bestfree entries. + */ + if (INT_GET(bf[0].length, ARCH_CONVERT) == 0) { + ASSERT(INT_GET(bf[0].offset, ARCH_CONVERT) == 0); + freeseen |= 1 << 0; + } + if (INT_GET(bf[1].length, ARCH_CONVERT) == 0) { + ASSERT(INT_GET(bf[1].offset, ARCH_CONVERT) == 0); + freeseen |= 1 << 1; + } + if (INT_GET(bf[2].length, ARCH_CONVERT) == 0) { + ASSERT(INT_GET(bf[2].offset, ARCH_CONVERT) == 0); + freeseen |= 1 << 2; + } + ASSERT(INT_GET(bf[0].length, ARCH_CONVERT) >= INT_GET(bf[1].length, ARCH_CONVERT)); + ASSERT(INT_GET(bf[1].length, ARCH_CONVERT) >= INT_GET(bf[2].length, ARCH_CONVERT)); + /* + * Loop over the data/unused entries. + */ + while (p < endp) { + dup = (xfs_dir2_data_unused_t *)p; + /* + * If it's unused, look for the space in the bestfree table. + * If we find it, account for that, else make sure it + * doesn't need to be there. + */ + if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) { + ASSERT(lastfree == 0); + ASSERT(INT_GET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT), ARCH_CONVERT) == + (char *)dup - (char *)d); + dfp = xfs_dir2_data_freefind(d, dup); + if (dfp) { + i = (int)(dfp - bf); + ASSERT((freeseen & (1 << i)) == 0); + freeseen |= 1 << i; + } else + ASSERT(INT_GET(dup->length, ARCH_CONVERT) <= INT_GET(bf[2].length, ARCH_CONVERT)); + p += INT_GET(dup->length, ARCH_CONVERT); + lastfree = 1; + continue; + } + /* + * It's a real entry. Validate the fields. + * If this is a block directory then make sure it's + * in the leaf section of the block. + * The linear search is crude but this is DEBUG code. + */ + dep = (xfs_dir2_data_entry_t *)p; + ASSERT(dep->namelen != 0); + ASSERT(xfs_dir_ino_validate(mp, INT_GET(dep->inumber, ARCH_CONVERT)) == 0); + ASSERT(INT_GET(*XFS_DIR2_DATA_ENTRY_TAG_P(dep), ARCH_CONVERT) == + (char *)dep - (char *)d); + count++; + lastfree = 0; + if (INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC) { + addr = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, mp->m_dirdatablk, + (xfs_dir2_data_aoff_t) + ((char *)dep - (char *)d)); + hash = xfs_da_hashname((char *)dep->name, dep->namelen); + for (i = 0; i < INT_GET(btp->count, ARCH_CONVERT); i++) { + if (INT_GET(lep[i].address, ARCH_CONVERT) == addr && + INT_GET(lep[i].hashval, ARCH_CONVERT) == hash) + break; + } + ASSERT(i < INT_GET(btp->count, ARCH_CONVERT)); + } + p += XFS_DIR2_DATA_ENTSIZE(dep->namelen); + } + /* + * Need to have seen all the entries and all the bestfree slots. + */ + ASSERT(freeseen == 7); + if (INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC) { + for (i = stale = 0; i < INT_GET(btp->count, ARCH_CONVERT); i++) { + if (INT_GET(lep[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR) + stale++; + if (i > 0) + ASSERT(INT_GET(lep[i].hashval, ARCH_CONVERT) >= INT_GET(lep[i - 1].hashval, ARCH_CONVERT)); + } + ASSERT(count == INT_GET(btp->count, ARCH_CONVERT) - INT_GET(btp->stale, ARCH_CONVERT)); + ASSERT(stale == INT_GET(btp->stale, ARCH_CONVERT)); + } +} +#endif + +/* + * Given a data block and an unused entry from that block, + * return the bestfree entry if any that corresponds to it. + */ +xfs_dir2_data_free_t * +xfs_dir2_data_freefind( + xfs_dir2_data_t *d, /* data block */ + xfs_dir2_data_unused_t *dup) /* data unused entry */ +{ + xfs_dir2_data_free_t *dfp; /* bestfree entry */ + xfs_dir2_data_aoff_t off; /* offset value needed */ +#if defined(DEBUG) && defined(__KERNEL__) + int matched; /* matched the value */ + int seenzero; /* saw a 0 bestfree entry */ +#endif + + off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)d); +#if defined(DEBUG) && defined(__KERNEL__) + /* + * Validate some consistency in the bestfree table. + * Check order, non-overlapping entries, and if we find the + * one we're looking for it has to be exact. + */ + ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC || + INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC); + for (dfp = &d->hdr.bestfree[0], seenzero = matched = 0; + dfp < &d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT]; + dfp++) { + if (INT_GET(dfp->offset, ARCH_CONVERT) == 0) { + ASSERT(INT_GET(dfp->length, ARCH_CONVERT) == 0); + seenzero = 1; + continue; + } + ASSERT(seenzero == 0); + if (INT_GET(dfp->offset, ARCH_CONVERT) == off) { + matched = 1; + ASSERT(INT_GET(dfp->length, ARCH_CONVERT) == INT_GET(dup->length, ARCH_CONVERT)); + } else if (off < INT_GET(dfp->offset, ARCH_CONVERT)) + ASSERT(off + INT_GET(dup->length, ARCH_CONVERT) <= INT_GET(dfp->offset, ARCH_CONVERT)); + else + ASSERT(INT_GET(dfp->offset, ARCH_CONVERT) + INT_GET(dfp->length, ARCH_CONVERT) <= off); + ASSERT(matched || INT_GET(dfp->length, ARCH_CONVERT) >= INT_GET(dup->length, ARCH_CONVERT)); + if (dfp > &d->hdr.bestfree[0]) + ASSERT(INT_GET(dfp[-1].length, ARCH_CONVERT) >= INT_GET(dfp[0].length, ARCH_CONVERT)); + } +#endif + /* + * If this is smaller than the smallest bestfree entry, + * it can't be there since they're sorted. + */ + if (INT_GET(dup->length, ARCH_CONVERT) < INT_GET(d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT - 1].length, ARCH_CONVERT)) + return NULL; + /* + * Look at the three bestfree entries for our guy. + */ + for (dfp = &d->hdr.bestfree[0]; + dfp < &d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT]; + dfp++) { + if (INT_GET(dfp->offset, ARCH_CONVERT) == 0) + return NULL; + if (INT_GET(dfp->offset, ARCH_CONVERT) == off) + return dfp; + } + /* + * Didn't find it. This only happens if there are duplicate lengths. + */ + return NULL; +} + +/* + * Insert an unused-space entry into the bestfree table. + */ +xfs_dir2_data_free_t * /* entry inserted */ +xfs_dir2_data_freeinsert( + xfs_dir2_data_t *d, /* data block pointer */ + xfs_dir2_data_unused_t *dup, /* unused space */ + int *loghead) /* log the data header (out) */ +{ + xfs_dir2_data_free_t *dfp; /* bestfree table pointer */ + xfs_dir2_data_free_t new; /* new bestfree entry */ + +#ifdef __KERNEL__ + ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC || + INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC); +#endif + dfp = d->hdr.bestfree; + INT_COPY(new.length, dup->length, ARCH_CONVERT); + INT_SET(new.offset, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dup - (char *)d)); + /* + * Insert at position 0, 1, or 2; or not at all. + */ + if (INT_GET(new.length, ARCH_CONVERT) > INT_GET(dfp[0].length, ARCH_CONVERT)) { + dfp[2] = dfp[1]; + dfp[1] = dfp[0]; + dfp[0] = new; + *loghead = 1; + return &dfp[0]; + } + if (INT_GET(new.length, ARCH_CONVERT) > INT_GET(dfp[1].length, ARCH_CONVERT)) { + dfp[2] = dfp[1]; + dfp[1] = new; + *loghead = 1; + return &dfp[1]; + } + if (INT_GET(new.length, ARCH_CONVERT) > INT_GET(dfp[2].length, ARCH_CONVERT)) { + dfp[2] = new; + *loghead = 1; + return &dfp[2]; + } + return NULL; +} + +/* + * Remove a bestfree entry from the table. + */ +void +xfs_dir2_data_freeremove( + xfs_dir2_data_t *d, /* data block pointer */ + xfs_dir2_data_free_t *dfp, /* bestfree entry pointer */ + int *loghead) /* out: log data header */ +{ +#ifdef __KERNEL__ + ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC || + INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC); +#endif + /* + * It's the first entry, slide the next 2 up. + */ + if (dfp == &d->hdr.bestfree[0]) { + d->hdr.bestfree[0] = d->hdr.bestfree[1]; + d->hdr.bestfree[1] = d->hdr.bestfree[2]; + } + /* + * It's the second entry, slide the 3rd entry up. + */ + else if (dfp == &d->hdr.bestfree[1]) + d->hdr.bestfree[1] = d->hdr.bestfree[2]; + /* + * Must be the last entry. + */ + else + ASSERT(dfp == &d->hdr.bestfree[2]); + /* + * Clear the 3rd entry, must be zero now. + */ + INT_ZERO(d->hdr.bestfree[2].length, ARCH_CONVERT); + INT_ZERO(d->hdr.bestfree[2].offset, ARCH_CONVERT); + *loghead = 1; +} + +/* + * Given a data block, reconstruct its bestfree map. + */ +void +xfs_dir2_data_freescan( + xfs_mount_t *mp, /* filesystem mount point */ + xfs_dir2_data_t *d, /* data block pointer */ + int *loghead, /* out: log data header */ + char *aendp) /* in: caller's endp */ +{ + xfs_dir2_block_tail_t *btp; /* block tail */ + xfs_dir2_data_entry_t *dep; /* active data entry */ + xfs_dir2_data_unused_t *dup; /* unused data entry */ + char *endp; /* end of block's data */ + char *p; /* current entry pointer */ + +#ifdef __KERNEL__ + ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC || + INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC); +#endif + /* + * Start by clearing the table. + */ + bzero(d->hdr.bestfree, sizeof(d->hdr.bestfree)); + *loghead = 1; + /* + * Set up pointers. + */ + p = (char *)d->u; + if (aendp) + endp = aendp; + else if (INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC) { + btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d); + endp = (char *)XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT); + } else + endp = (char *)d + mp->m_dirblksize; + /* + * Loop over the block's entries. + */ + while (p < endp) { + dup = (xfs_dir2_data_unused_t *)p; + /* + * If it's a free entry, insert it. + */ + if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) { + ASSERT((char *)dup - (char *)d == + INT_GET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT), ARCH_CONVERT)); + xfs_dir2_data_freeinsert(d, dup, loghead); + p += INT_GET(dup->length, ARCH_CONVERT); + } + /* + * For active entries, check their tags and skip them. + */ + else { + dep = (xfs_dir2_data_entry_t *)p; + ASSERT((char *)dep - (char *)d == + INT_GET(*XFS_DIR2_DATA_ENTRY_TAG_P(dep), ARCH_CONVERT)); + p += XFS_DIR2_DATA_ENTSIZE(dep->namelen); + } + } +} + +/* + * Initialize a data block at the given block number in the directory. + * Give back the buffer for the created block. + */ +int /* error */ +xfs_dir2_data_init( + xfs_da_args_t *args, /* directory operation args */ + xfs_dir2_db_t blkno, /* logical dir block number */ + xfs_dabuf_t **bpp) /* output block buffer */ +{ + xfs_dabuf_t *bp; /* block buffer */ + xfs_dir2_data_t *d; /* pointer to block */ + xfs_inode_t *dp; /* incore directory inode */ + xfs_dir2_data_unused_t *dup; /* unused entry pointer */ + int error; /* error return value */ + int i; /* bestfree index */ + xfs_mount_t *mp; /* filesystem mount point */ + xfs_trans_t *tp; /* transaction pointer */ + int t; /* temp */ + + dp = args->dp; + mp = dp->i_mount; + tp = args->trans; + /* + * Get the buffer set up for the block. + */ + error = xfs_da_get_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, blkno), -1, &bp, + XFS_DATA_FORK); + if (error) { +#pragma mips_frequency_hint NEVER + return error; + } + ASSERT(bp != NULL); + /* + * Initialize the header. + */ + d = bp->data; + INT_SET(d->hdr.magic, ARCH_CONVERT, XFS_DIR2_DATA_MAGIC); + INT_SET(d->hdr.bestfree[0].offset, ARCH_CONVERT, (xfs_dir2_data_off_t)sizeof(d->hdr)); + for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) { + INT_ZERO(d->hdr.bestfree[i].length, ARCH_CONVERT); + INT_ZERO(d->hdr.bestfree[i].offset, ARCH_CONVERT); + } + /* + * Set up an unused entry for the block's body. + */ + dup = &d->u[0].unused; + INT_SET(dup->freetag, ARCH_CONVERT, XFS_DIR2_DATA_FREE_TAG); + + t=mp->m_dirblksize - (uint)sizeof(d->hdr); + INT_SET(d->hdr.bestfree[0].length, ARCH_CONVERT, t); + INT_SET(dup->length, ARCH_CONVERT, t); + INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT), ARCH_CONVERT, + (xfs_dir2_data_off_t)((char *)dup - (char *)d)); + /* + * Log it and return it. + */ + xfs_dir2_data_log_header(tp, bp); + xfs_dir2_data_log_unused(tp, bp, dup); + *bpp = bp; + return 0; +} + +/* + * Log an active data entry from the block. + */ +void +xfs_dir2_data_log_entry( + xfs_trans_t *tp, /* transaction pointer */ + xfs_dabuf_t *bp, /* block buffer */ + xfs_dir2_data_entry_t *dep) /* data entry pointer */ +{ + xfs_dir2_data_t *d; /* data block pointer */ + + d = bp->data; + ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC || + INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC); + xfs_da_log_buf(tp, bp, (uint)((char *)dep - (char *)d), + (uint)((char *)(XFS_DIR2_DATA_ENTRY_TAG_P(dep) + 1) - + (char *)d - 1)); +} + +/* + * Log a data block header. + */ +void +xfs_dir2_data_log_header( + xfs_trans_t *tp, /* transaction pointer */ + xfs_dabuf_t *bp) /* block buffer */ +{ + xfs_dir2_data_t *d; /* data block pointer */ + + d = bp->data; + ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC || + INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC); + xfs_da_log_buf(tp, bp, (uint)((char *)&d->hdr - (char *)d), + (uint)(sizeof(d->hdr) - 1)); +} + +/* + * Log a data unused entry. + */ +void +xfs_dir2_data_log_unused( + xfs_trans_t *tp, /* transaction pointer */ + xfs_dabuf_t *bp, /* block buffer */ + xfs_dir2_data_unused_t *dup) /* data unused pointer */ +{ + xfs_dir2_data_t *d; /* data block pointer */ + + d = bp->data; + ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC || + INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC); + /* + * Log the first part of the unused entry. + */ + xfs_da_log_buf(tp, bp, (uint)((char *)dup - (char *)d), + (uint)((char *)&dup->length + sizeof(dup->length) - + 1 - (char *)d)); + /* + * Log the end (tag) of the unused entry. + */ + xfs_da_log_buf(tp, bp, + (uint)((char *)XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT) - (char *)d), + (uint)((char *)XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT) - (char *)d + + sizeof(xfs_dir2_data_off_t) - 1)); +} + +/* + * Make a byte range in the data block unused. + * Its current contents are unimportant. + */ +void +xfs_dir2_data_make_free( + xfs_trans_t *tp, /* transaction pointer */ + xfs_dabuf_t *bp, /* block buffer */ + xfs_dir2_data_aoff_t offset, /* starting byte offset */ + xfs_dir2_data_aoff_t len, /* length in bytes */ + int *needlogp, /* out: log header */ + int *needscanp) /* out: regen bestfree */ +{ + xfs_dir2_data_t *d; /* data block pointer */ + xfs_dir2_data_free_t *dfp; /* bestfree pointer */ + char *endptr; /* end of data area */ + xfs_mount_t *mp; /* filesystem mount point */ + int needscan; /* need to regen bestfree */ + xfs_dir2_data_unused_t *newdup; /* new unused entry */ + xfs_dir2_data_unused_t *postdup; /* unused entry after us */ + xfs_dir2_data_unused_t *prevdup; /* unused entry before us */ + + mp = tp->t_mountp; + d = bp->data; + /* + * Figure out where the end of the data area is. + */ + if (INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC) + endptr = (char *)d + mp->m_dirblksize; + else { + xfs_dir2_block_tail_t *btp; /* block tail */ + + ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC); + btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)d); + endptr = (char *)XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT); + } + /* + * If this isn't the start of the block, then back up to + * the previous entry and see if it's free. + */ + if (offset > sizeof(d->hdr)) { + xfs_dir2_data_off_t *tagp; /* tag just before us */ + + tagp = (xfs_dir2_data_off_t *)((char *)d + offset) - 1; + prevdup = (xfs_dir2_data_unused_t *)((char *)d + INT_GET(*tagp, ARCH_CONVERT)); + if (INT_GET(prevdup->freetag, ARCH_CONVERT) != XFS_DIR2_DATA_FREE_TAG) + prevdup = NULL; + } else + prevdup = NULL; + /* + * If this isn't the end of the block, see if the entry after + * us is free. + */ + if ((char *)d + offset + len < endptr) { + postdup = + (xfs_dir2_data_unused_t *)((char *)d + offset + len); + if (INT_GET(postdup->freetag, ARCH_CONVERT) != XFS_DIR2_DATA_FREE_TAG) + postdup = NULL; + } else + postdup = NULL; + ASSERT(*needscanp == 0); + needscan = 0; + /* + * Previous and following entries are both free, + * merge everything into a single free entry. + */ + if (prevdup && postdup) { + xfs_dir2_data_free_t *dfp2; /* another bestfree pointer */ + + /* + * See if prevdup and/or postdup are in bestfree table. + */ + dfp = xfs_dir2_data_freefind(d, prevdup); + dfp2 = xfs_dir2_data_freefind(d, postdup); + /* + * We need a rescan unless there are exactly 2 free entries + * namely our two. Then we know what's happening, otherwise + * since the third bestfree is there, there might be more + * entries. + */ + needscan = INT_GET(d->hdr.bestfree[2].length, ARCH_CONVERT) != 0; + /* + * Fix up the new big freespace. + */ + INT_MOD(prevdup->length, ARCH_CONVERT, len + INT_GET(postdup->length, ARCH_CONVERT)); + INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(prevdup, ARCH_CONVERT), ARCH_CONVERT, + (xfs_dir2_data_off_t)((char *)prevdup - (char *)d)); + xfs_dir2_data_log_unused(tp, bp, prevdup); + if (!needscan) { + /* + * Has to be the case that entries 0 and 1 are + * dfp and dfp2 (don't know which is which), and + * entry 2 is empty. + * Remove entry 1 first then entry 0. + */ + ASSERT(dfp && dfp2); + if (dfp == &d->hdr.bestfree[1]) { + dfp = &d->hdr.bestfree[0]; + ASSERT(dfp2 == dfp); + dfp2 = &d->hdr.bestfree[1]; + } + xfs_dir2_data_freeremove(d, dfp2, needlogp); + xfs_dir2_data_freeremove(d, dfp, needlogp); + /* + * Now insert the new entry. + */ + dfp = xfs_dir2_data_freeinsert(d, prevdup, needlogp); + ASSERT(dfp == &d->hdr.bestfree[0]); + ASSERT(INT_GET(dfp->length, ARCH_CONVERT) == INT_GET(prevdup->length, ARCH_CONVERT)); + ASSERT(INT_GET(dfp[1].length, ARCH_CONVERT) == 0); + ASSERT(INT_GET(dfp[2].length, ARCH_CONVERT) == 0); + } + } + /* + * The entry before us is free, merge with it. + */ + else if (prevdup) { + dfp = xfs_dir2_data_freefind(d, prevdup); + INT_MOD(prevdup->length, ARCH_CONVERT, len); + INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(prevdup, ARCH_CONVERT), ARCH_CONVERT, + (xfs_dir2_data_off_t)((char *)prevdup - (char *)d)); + xfs_dir2_data_log_unused(tp, bp, prevdup); + /* + * If the previous entry was in the table, the new entry + * is longer, so it will be in the table too. Remove + * the old one and add the new one. + */ + if (dfp) { + xfs_dir2_data_freeremove(d, dfp, needlogp); + (void)xfs_dir2_data_freeinsert(d, prevdup, needlogp); + } + /* + * Otherwise we need a scan if the new entry is big enough. + */ + else + needscan = INT_GET(prevdup->length, ARCH_CONVERT) > INT_GET(d->hdr.bestfree[2].length, ARCH_CONVERT); + } + /* + * The following entry is free, merge with it. + */ + else if (postdup) { + dfp = xfs_dir2_data_freefind(d, postdup); + newdup = (xfs_dir2_data_unused_t *)((char *)d + offset); + INT_SET(newdup->freetag, ARCH_CONVERT, XFS_DIR2_DATA_FREE_TAG); + INT_SET(newdup->length, ARCH_CONVERT, len + INT_GET(postdup->length, ARCH_CONVERT)); + INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(newdup, ARCH_CONVERT), ARCH_CONVERT, + (xfs_dir2_data_off_t)((char *)newdup - (char *)d)); + xfs_dir2_data_log_unused(tp, bp, newdup); + /* + * If the following entry was in the table, the new entry + * is longer, so it will be in the table too. Remove + * the old one and add the new one. + */ + if (dfp) { + xfs_dir2_data_freeremove(d, dfp, needlogp); + (void)xfs_dir2_data_freeinsert(d, newdup, needlogp); + } + /* + * Otherwise we need a scan if the new entry is big enough. + */ + else + needscan = INT_GET(newdup->length, ARCH_CONVERT) > INT_GET(d->hdr.bestfree[2].length, ARCH_CONVERT); + } + /* + * Neither neighbor is free. Make a new entry. + */ + else { + newdup = (xfs_dir2_data_unused_t *)((char *)d + offset); + INT_SET(newdup->freetag, ARCH_CONVERT, XFS_DIR2_DATA_FREE_TAG); + INT_SET(newdup->length, ARCH_CONVERT, len); + INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(newdup, ARCH_CONVERT), ARCH_CONVERT, + (xfs_dir2_data_off_t)((char *)newdup - (char *)d)); + xfs_dir2_data_log_unused(tp, bp, newdup); + (void)xfs_dir2_data_freeinsert(d, newdup, needlogp); + } + *needscanp = needscan; +} + +/* + * Take a byte range out of an existing unused space and make it un-free. + */ +void +xfs_dir2_data_use_free( + xfs_trans_t *tp, /* transaction pointer */ + xfs_dabuf_t *bp, /* data block buffer */ + xfs_dir2_data_unused_t *dup, /* unused entry */ + xfs_dir2_data_aoff_t offset, /* starting offset to use */ + xfs_dir2_data_aoff_t len, /* length to use */ + int *needlogp, /* out: need to log header */ + int *needscanp) /* out: need regen bestfree */ +{ + xfs_dir2_data_t *d; /* data block */ + xfs_dir2_data_free_t *dfp; /* bestfree pointer */ + int matchback; /* matches end of freespace */ + int matchfront; /* matches start of freespace */ + int needscan; /* need to regen bestfree */ + xfs_dir2_data_unused_t *newdup; /* new unused entry */ + xfs_dir2_data_unused_t *newdup2; /* another new unused entry */ + int oldlen; /* old unused entry's length */ + + d = bp->data; + ASSERT(INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC || + INT_GET(d->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC); + ASSERT(INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG); + ASSERT(offset >= (char *)dup - (char *)d); + ASSERT(offset + len <= (char *)dup + INT_GET(dup->length, ARCH_CONVERT) - (char *)d); + ASSERT((char *)dup - (char *)d == INT_GET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT), ARCH_CONVERT)); + /* + * Look up the entry in the bestfree table. + */ + dfp = xfs_dir2_data_freefind(d, dup); + oldlen = INT_GET(dup->length, ARCH_CONVERT); + ASSERT(dfp || oldlen <= INT_GET(d->hdr.bestfree[2].length, ARCH_CONVERT)); + /* + * Check for alignment with front and back of the entry. + */ + matchfront = (char *)dup - (char *)d == offset; + matchback = (char *)dup + oldlen - (char *)d == offset + len; + ASSERT(*needscanp == 0); + needscan = 0; + /* + * If we matched it exactly we just need to get rid of it from + * the bestfree table. + */ + if (matchfront && matchback) { + if (dfp) { + needscan = INT_GET(d->hdr.bestfree[2].offset, ARCH_CONVERT) != 0; + if (!needscan) + xfs_dir2_data_freeremove(d, dfp, needlogp); + } + } + /* + * We match the first part of the entry. + * Make a new entry with the remaining freespace. + */ + else if (matchfront) { + newdup = (xfs_dir2_data_unused_t *)((char *)d + offset + len); + INT_SET(newdup->freetag, ARCH_CONVERT, XFS_DIR2_DATA_FREE_TAG); + INT_SET(newdup->length, ARCH_CONVERT, oldlen - len); + INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(newdup, ARCH_CONVERT), ARCH_CONVERT, + (xfs_dir2_data_off_t)((char *)newdup - (char *)d)); + xfs_dir2_data_log_unused(tp, bp, newdup); + /* + * If it was in the table, remove it and add the new one. + */ + if (dfp) { + xfs_dir2_data_freeremove(d, dfp, needlogp); + dfp = xfs_dir2_data_freeinsert(d, newdup, needlogp); + ASSERT(dfp != NULL); + ASSERT(INT_GET(dfp->length, ARCH_CONVERT) == INT_GET(newdup->length, ARCH_CONVERT)); + ASSERT(INT_GET(dfp->offset, ARCH_CONVERT) == (char *)newdup - (char *)d); + /* + * If we got inserted at the last slot, + * that means we don't know if there was a better + * choice for the last slot, or not. Rescan. + */ + needscan = dfp == &d->hdr.bestfree[2]; + } + } + /* + * We match the last part of the entry. + * Trim the allocated space off the tail of the entry. + */ + else if (matchback) { + newdup = dup; + INT_SET(newdup->length, ARCH_CONVERT, (xfs_dir2_data_off_t) + (((char *)d + offset) - (char *)newdup)); + INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(newdup, ARCH_CONVERT), ARCH_CONVERT, + (xfs_dir2_data_off_t)((char *)newdup - (char *)d)); + xfs_dir2_data_log_unused(tp, bp, newdup); + /* + * If it was in the table, remove it and add the new one. + */ + if (dfp) { + xfs_dir2_data_freeremove(d, dfp, needlogp); + dfp = xfs_dir2_data_freeinsert(d, newdup, needlogp); + ASSERT(dfp != NULL); + ASSERT(INT_GET(dfp->length, ARCH_CONVERT) == INT_GET(newdup->length, ARCH_CONVERT)); + ASSERT(INT_GET(dfp->offset, ARCH_CONVERT) == (char *)newdup - (char *)d); + /* + * If we got inserted at the last slot, + * that means we don't know if there was a better + * choice for the last slot, or not. Rescan. + */ + needscan = dfp == &d->hdr.bestfree[2]; + } + } + /* + * Poking out the middle of an entry. + * Make two new entries. + */ + else { + newdup = dup; + INT_SET(newdup->length, ARCH_CONVERT, (xfs_dir2_data_off_t) + (((char *)d + offset) - (char *)newdup)); + INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(newdup, ARCH_CONVERT), ARCH_CONVERT, + (xfs_dir2_data_off_t)((char *)newdup - (char *)d)); + xfs_dir2_data_log_unused(tp, bp, newdup); + newdup2 = (xfs_dir2_data_unused_t *)((char *)d + offset + len); + INT_SET(newdup2->freetag, ARCH_CONVERT, XFS_DIR2_DATA_FREE_TAG); + INT_SET(newdup2->length, ARCH_CONVERT, oldlen - len - INT_GET(newdup->length, ARCH_CONVERT)); + INT_SET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(newdup2, ARCH_CONVERT), ARCH_CONVERT, + (xfs_dir2_data_off_t)((char *)newdup2 - (char *)d)); + xfs_dir2_data_log_unused(tp, bp, newdup2); + /* + * If the old entry was in the table, we need to scan + * if the 3rd entry was valid, since these entries + * are smaller than the old one. + * If we don't need to scan that means there were 1 or 2 + * entries in the table, and removing the old and adding + * the 2 new will work. + */ + if (dfp) { + needscan = INT_GET(d->hdr.bestfree[2].length, ARCH_CONVERT) != 0; + if (!needscan) { + xfs_dir2_data_freeremove(d, dfp, needlogp); + (void)xfs_dir2_data_freeinsert(d, newdup, + needlogp); + (void)xfs_dir2_data_freeinsert(d, newdup2, + needlogp); + } + } + } + *needscanp = needscan; +} diff --git a/libxfs/xfs_dir2_leaf.c b/libxfs/xfs_dir2_leaf.c new file mode 100644 index 000000000..89761db5e --- /dev/null +++ b/libxfs/xfs_dir2_leaf.c @@ -0,0 +1,1496 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +/* + * xfs_dir2_leaf.c + * XFS directory version 2 implementation - single leaf form + * see xfs_dir2_leaf.h for data structures. + * These directories have multiple XFS_DIR2_DATA blocks and one + * XFS_DIR2_LEAF1 block containing the hash table and freespace map. + */ + +#include + + +/* + * Convert a block form directory to a leaf form directory. + */ +int /* error */ +xfs_dir2_block_to_leaf( + xfs_da_args_t *args, /* operation arguments */ + xfs_dabuf_t *dbp) /* input block's buffer */ +{ + xfs_dir2_data_off_t *bestsp; /* leaf's bestsp entries */ + xfs_dablk_t blkno; /* leaf block's bno */ + xfs_dir2_block_t *block; /* block structure */ + xfs_dir2_leaf_entry_t *blp; /* block's leaf entries */ + xfs_dir2_block_tail_t *btp; /* block's tail */ + xfs_inode_t *dp; /* incore directory inode */ + int error; /* error return code */ + xfs_dabuf_t *lbp; /* leaf block's buffer */ + xfs_dir2_db_t ldb; /* leaf block's bno */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + xfs_dir2_leaf_tail_t *ltp; /* leaf's tail */ + xfs_mount_t *mp; /* filesystem mount point */ + int needlog; /* need to log block header */ + int needscan; /* need to rescan bestfree */ + xfs_trans_t *tp; /* transaction pointer */ + + xfs_dir2_trace_args_b("block_to_leaf", args, dbp); + dp = args->dp; + mp = dp->i_mount; + tp = args->trans; + /* + * Add the leaf block to the inode. + * This interface will only put blocks in the leaf/node range. + * Since that's empty now, we'll get the root (block 0 in range). + */ + if (error = xfs_da_grow_inode(args, &blkno)) { +#pragma mips_frequency_hint NEVER + return error; + } + ldb = XFS_DIR2_DA_TO_DB(mp, blkno); + ASSERT(ldb == XFS_DIR2_LEAF_FIRSTDB(mp)); + /* + * Initialize the leaf block, get a buffer for it. + */ + if (error = xfs_dir2_leaf_init(args, ldb, &lbp, XFS_DIR2_LEAF1_MAGIC)) { +#pragma mips_frequency_hint NEVER + return error; + } + ASSERT(lbp != NULL); + leaf = lbp->data; + block = dbp->data; + xfs_dir2_data_check(dp, dbp); + btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); + blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT); + /* + * Set the counts in the leaf header. + */ + INT_COPY(leaf->hdr.count, btp->count, ARCH_CONVERT); /* INT_: type change */ + INT_COPY(leaf->hdr.stale, btp->stale, ARCH_CONVERT); /* INT_: type change */ + /* + * Could compact these but I think we always do the conversion + * after squeezing out stale entries. + */ + bcopy(blp, leaf->ents, INT_GET(btp->count, ARCH_CONVERT) * sizeof(xfs_dir2_leaf_entry_t)); + xfs_dir2_leaf_log_ents(tp, lbp, 0, INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1); + needscan = 0; + needlog = 1; + /* + * Make the space formerly occupied by the leaf entries and block + * tail be free. + */ + xfs_dir2_data_make_free(tp, dbp, + (xfs_dir2_data_aoff_t)((char *)blp - (char *)block), + (xfs_dir2_data_aoff_t)((char *)block + mp->m_dirblksize - + (char *)blp), + &needlog, &needscan); + /* + * Fix up the block header, make it a data block. + */ + INT_SET(block->hdr.magic, ARCH_CONVERT, XFS_DIR2_DATA_MAGIC); + if (needscan) + xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog, + NULL); + /* + * Set up leaf tail and bests table. + */ + ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); + INT_SET(ltp->bestcount, ARCH_CONVERT, 1); + bestsp = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT); + INT_COPY(bestsp[0], block->hdr.bestfree[0].length, ARCH_CONVERT); + /* + * Log the data header and leaf bests table. + */ + if (needlog) + xfs_dir2_data_log_header(tp, dbp); + xfs_dir2_leaf_check(dp, lbp); + xfs_dir2_data_check(dp, dbp); + xfs_dir2_leaf_log_bests(tp, lbp, 0, 0); + xfs_da_buf_done(lbp); + return 0; +} + +/* + * Add an entry to a leaf form directory. + */ +int /* error */ +xfs_dir2_leaf_addname( + xfs_da_args_t *args) /* operation arguments */ +{ + xfs_dir2_data_off_t *bestsp; /* freespace table in leaf */ + int compact; /* need to compact leaves */ + xfs_dir2_data_t *data; /* data block structure */ + xfs_dabuf_t *dbp; /* data block buffer */ + xfs_dir2_data_entry_t *dep; /* data block entry */ + xfs_inode_t *dp; /* incore directory inode */ + xfs_dir2_data_unused_t *dup; /* data unused entry */ + int error; /* error return value */ + int grown; /* allocated new data block */ + int highstale; /* index of next stale leaf */ + int i; /* temporary, index */ + int index; /* leaf table position */ + xfs_dabuf_t *lbp; /* leaf's buffer */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + int length; /* length of new entry */ + xfs_dir2_leaf_entry_t *lep; /* leaf entry table pointer */ + int lfloglow; /* low leaf logging index */ + int lfloghigh; /* high leaf logging index */ + int lowstale; /* index of prev stale leaf */ + xfs_dir2_leaf_tail_t *ltp; /* leaf tail pointer */ + xfs_mount_t *mp; /* filesystem mount point */ + int needbytes; /* leaf block bytes needed */ + int needlog; /* need to log data header */ + int needscan; /* need to rescan data free */ + xfs_dir2_data_off_t *tagp; /* end of data entry */ + xfs_trans_t *tp; /* transaction pointer */ + xfs_dir2_db_t use_block; /* data block number */ + + xfs_dir2_trace_args("leaf_addname", args); + dp = args->dp; + tp = args->trans; + mp = dp->i_mount; + /* + * Read the leaf block. + */ + error = xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp, + XFS_DATA_FORK); + if (error) { +#pragma mips_frequency_hint NEVER + return error; + } + ASSERT(lbp != NULL); + /* + * Look up the entry by hash value and name. + * We know it's not there, our caller has already done a lookup. + * So the index is of the entry to insert in front of. + * But if there are dup hash values the index is of the first of those. + */ + index = xfs_dir2_leaf_search_hash(args, lbp); + leaf = lbp->data; + ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); + bestsp = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT); + length = XFS_DIR2_DATA_ENTSIZE(args->namelen); + /* + * See if there are any entries with the same hash value + * and space in their block for the new entry. + * This is good because it puts multiple same-hash value entries + * in a data block, improving the lookup of those entries. + */ + for (use_block = -1, lep = &leaf->ents[index]; + index < INT_GET(leaf->hdr.count, ARCH_CONVERT) && INT_GET(lep->hashval, ARCH_CONVERT) == args->hashval; + index++, lep++) { + if (INT_GET(lep->address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR) + continue; + i = XFS_DIR2_DATAPTR_TO_DB(mp, INT_GET(lep->address, ARCH_CONVERT)); + ASSERT(i < INT_GET(ltp->bestcount, ARCH_CONVERT)); + ASSERT(INT_GET(bestsp[i], ARCH_CONVERT) != NULLDATAOFF); + if (INT_GET(bestsp[i], ARCH_CONVERT) >= length) { + use_block = i; + break; + } + } + /* + * Didn't find a block yet, linear search all the data blocks. + */ + if (use_block == -1) { + for (i = 0; i < INT_GET(ltp->bestcount, ARCH_CONVERT); i++) { + /* + * Remember a block we see that's missing. + */ + if (INT_GET(bestsp[i], ARCH_CONVERT) == NULLDATAOFF && use_block == -1) + use_block = i; + else if (INT_GET(bestsp[i], ARCH_CONVERT) >= length) { + use_block = i; + break; + } + } + } + /* + * How many bytes do we need in the leaf block? + */ + needbytes = + (INT_GET(leaf->hdr.stale, ARCH_CONVERT) != 0 ? 0 : (uint)sizeof(leaf->ents[0])) + + (use_block != -1 ? 0 : (uint)sizeof(leaf->bests[0])); + /* + * Now kill use_block if it refers to a missing block, so we + * can use it as an indication of allocation needed. + */ + if (use_block != -1 && INT_GET(bestsp[use_block], ARCH_CONVERT) == NULLDATAOFF) + use_block = -1; + /* + * If we don't have enough free bytes but we can make enough + * by compacting out stale entries, we'll do that. + */ + if ((char *)bestsp - (char *)&leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT)] < needbytes && + INT_GET(leaf->hdr.stale, ARCH_CONVERT) > 1) { +#pragma mips_frequency_hint NEVER + compact = 1; + } + /* + * Otherwise if we don't have enough free bytes we need to + * convert to node form. + */ + else if ((char *)bestsp - (char *)&leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT)] < + needbytes) { +#pragma mips_frequency_hint NEVER + /* + * Just checking or no space reservation, give up. + */ + if (args->justcheck || args->total == 0) { + xfs_da_brelse(tp, lbp); + return XFS_ERROR(ENOSPC); + } + /* + * Convert to node form. + */ + error = xfs_dir2_leaf_to_node(args, lbp); + xfs_da_buf_done(lbp); + if (error) + return error; + /* + * Then add the new entry. + */ + return xfs_dir2_node_addname(args); + } + /* + * Otherwise it will fit without compaction. + */ + else + compact = 0; + /* + * If just checking, then it will fit unless we needed to allocate + * a new data block. + */ + if (args->justcheck) { + xfs_da_brelse(tp, lbp); + return use_block == -1 ? XFS_ERROR(ENOSPC) : 0; + } + /* + * If no allocations are allowed, return now before we've + * changed anything. + */ + if (args->total == 0 && use_block == -1) { +#pragma mips_frequency_hint NEVER + xfs_da_brelse(tp, lbp); + return XFS_ERROR(ENOSPC); + } + /* + * Need to compact the leaf entries, removing stale ones. + * Leave one stale entry behind - the one closest to our + * insertion index - and we'll shift that one to our insertion + * point later. + */ + if (compact) { +#pragma mips_frequency_hint NEVER + xfs_dir2_leaf_compact_x1(lbp, &index, &lowstale, &highstale, + &lfloglow, &lfloghigh); + } + /* + * There are stale entries, so we'll need log-low and log-high + * impossibly bad values later. + */ + else if (INT_GET(leaf->hdr.stale, ARCH_CONVERT)) { + lfloglow = INT_GET(leaf->hdr.count, ARCH_CONVERT); + lfloghigh = -1; + } + /* + * If there was no data block space found, we need to allocate + * a new one. + */ + if (use_block == -1) { +#pragma mips_frequency_hint NEVER + /* + * Add the new data block. + */ + if (error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, + &use_block)) { + xfs_da_brelse(tp, lbp); + return error; + } + /* + * Initialize the block. + */ + if (error = xfs_dir2_data_init(args, use_block, &dbp)) { + xfs_da_brelse(tp, lbp); + return error; + } + /* + * If we're adding a new data block on the end we need to + * extend the bests table. Copy it up one entry. + */ + if (use_block >= INT_GET(ltp->bestcount, ARCH_CONVERT)) { + bestsp--; + ovbcopy(&bestsp[1], &bestsp[0], + INT_GET(ltp->bestcount, ARCH_CONVERT) * sizeof(bestsp[0])); + INT_MOD(ltp->bestcount, ARCH_CONVERT, +1); + xfs_dir2_leaf_log_tail(tp, lbp); + xfs_dir2_leaf_log_bests(tp, lbp, 0, INT_GET(ltp->bestcount, ARCH_CONVERT) - 1); + } + /* + * If we're filling in a previously empty block just log it. + */ + else + xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block); + data = dbp->data; + INT_COPY(bestsp[use_block], data->hdr.bestfree[0].length, ARCH_CONVERT); + grown = 1; + } + /* + * Already had space in some data block. + * Just read that one in. + */ + else { + if (error = + xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, use_block), + -1, &dbp, XFS_DATA_FORK)) { +#pragma mips_frequency_hint NEVER + xfs_da_brelse(tp, lbp); + return error; + } + data = dbp->data; + grown = 0; + } + xfs_dir2_data_check(dp, dbp); + /* + * Point to the biggest freespace in our data block. + */ + dup = (xfs_dir2_data_unused_t *) + ((char *)data + INT_GET(data->hdr.bestfree[0].offset, ARCH_CONVERT)); + ASSERT(INT_GET(dup->length, ARCH_CONVERT) >= length); + needscan = needlog = 0; + /* + * Mark the initial part of our freespace in use for the new entry. + */ + xfs_dir2_data_use_free(tp, dbp, dup, + (xfs_dir2_data_aoff_t)((char *)dup - (char *)data), length, + &needlog, &needscan); + /* + * Initialize our new entry (at last). + */ + dep = (xfs_dir2_data_entry_t *)dup; + INT_SET(dep->inumber, ARCH_CONVERT, args->inumber); + dep->namelen = args->namelen; + bcopy(args->name, dep->name, dep->namelen); + tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); + INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)data)); + /* + * Need to scan fix up the bestfree table. + */ + if (needscan) + xfs_dir2_data_freescan(mp, data, &needlog, NULL); + /* + * Need to log the data block's header. + */ + if (needlog) + xfs_dir2_data_log_header(tp, dbp); + xfs_dir2_data_log_entry(tp, dbp, dep); + /* + * If the bests table needs to be changed, do it. + * Log the change unless we've already done that. + */ + if (INT_GET(bestsp[use_block], ARCH_CONVERT) != INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT)) { + INT_COPY(bestsp[use_block], data->hdr.bestfree[0].length, ARCH_CONVERT); + if (!grown) + xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block); + } + /* + * Now we need to make room to insert the leaf entry. + * If there are no stale entries, we just insert a hole at index. + */ + if (INT_GET(leaf->hdr.stale, ARCH_CONVERT) == 0) { + /* + * lep is still good as the index leaf entry. + */ + if (index < INT_GET(leaf->hdr.count, ARCH_CONVERT)) + ovbcopy(lep, lep + 1, + (INT_GET(leaf->hdr.count, ARCH_CONVERT) - index) * sizeof(*lep)); + /* + * Record low and high logging indices for the leaf. + */ + lfloglow = index; + lfloghigh = INT_GET(leaf->hdr.count, ARCH_CONVERT); + INT_MOD(leaf->hdr.count, ARCH_CONVERT, +1); + } + /* + * There are stale entries. + * We will use one of them for the new entry. + * It's probably not at the right location, so we'll have to + * shift some up or down first. + */ + else { + /* + * If we didn't compact before, we need to find the nearest + * stale entries before and after our insertion point. + */ + if (compact == 0) { + /* + * Find the first stale entry before the insertion + * point, if any. + */ + for (lowstale = index - 1; + lowstale >= 0 && + INT_GET(leaf->ents[lowstale].address, ARCH_CONVERT) != + XFS_DIR2_NULL_DATAPTR; + lowstale--) + continue; + /* + * Find the next stale entry at or after the insertion + * point, if any. Stop if we go so far that the + * lowstale entry would be better. + */ + for (highstale = index; + highstale < INT_GET(leaf->hdr.count, ARCH_CONVERT) && + INT_GET(leaf->ents[highstale].address, ARCH_CONVERT) != + XFS_DIR2_NULL_DATAPTR && + (lowstale < 0 || + index - lowstale - 1 >= highstale - index); + highstale++) + continue; + } + /* + * If the low one is better, use it. + */ + if (lowstale >= 0 && + (highstale == INT_GET(leaf->hdr.count, ARCH_CONVERT) || + index - lowstale - 1 < highstale - index)) { + ASSERT(index - lowstale - 1 >= 0); + ASSERT(INT_GET(leaf->ents[lowstale].address, ARCH_CONVERT) == + XFS_DIR2_NULL_DATAPTR); + /* + * Copy entries up to cover the stale entry + * and make room for the new entry. + */ + if (index - lowstale - 1 > 0) + ovbcopy(&leaf->ents[lowstale + 1], + &leaf->ents[lowstale], + (index - lowstale - 1) * sizeof(*lep)); + lep = &leaf->ents[index - 1]; + lfloglow = MIN(lowstale, lfloglow); + lfloghigh = MAX(index - 1, lfloghigh); + } + /* + * The high one is better, so use that one. + */ + else { + ASSERT(highstale - index >= 0); + ASSERT(INT_GET(leaf->ents[highstale].address, ARCH_CONVERT) == + XFS_DIR2_NULL_DATAPTR); + /* + * Copy entries down to copver the stale entry + * and make room for the new entry. + */ + if (highstale - index > 0) + ovbcopy(&leaf->ents[index], + &leaf->ents[index + 1], + (highstale - index) * sizeof(*lep)); + lep = &leaf->ents[index]; + lfloglow = MIN(index, lfloglow); + lfloghigh = MAX(highstale, lfloghigh); + } + INT_MOD(leaf->hdr.stale, ARCH_CONVERT, -1); + } + /* + * Fill in the new leaf entry. + */ + INT_SET(lep->hashval, ARCH_CONVERT, args->hashval); + INT_SET(lep->address, ARCH_CONVERT, XFS_DIR2_DB_OFF_TO_DATAPTR(mp, use_block, INT_GET(*tagp, ARCH_CONVERT))); + /* + * Log the leaf fields and give up the buffers. + */ + xfs_dir2_leaf_log_header(tp, lbp); + xfs_dir2_leaf_log_ents(tp, lbp, lfloglow, lfloghigh); + xfs_dir2_leaf_check(dp, lbp); + xfs_da_buf_done(lbp); + xfs_dir2_data_check(dp, dbp); + xfs_da_buf_done(dbp); + return 0; +} + + +#ifdef DEBUG +/* + * Check the internal consistency of a leaf1 block. + * Pop an assert if something is wrong. + */ +void +xfs_dir2_leaf_check( + xfs_inode_t *dp, /* incore directory inode */ + xfs_dabuf_t *bp) /* leaf's buffer */ +{ + int i; /* leaf index */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + xfs_dir2_leaf_tail_t *ltp; /* leaf tail pointer */ + xfs_mount_t *mp; /* filesystem mount point */ + int stale; /* count of stale leaves */ + + leaf = bp->data; + mp = dp->i_mount; + ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC); + /* + * This value is not restrictive enough. + * Should factor in the size of the bests table as well. + * We can deduce a value for that from di_size. + */ + ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT) <= XFS_DIR2_MAX_LEAF_ENTS(mp)); + ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); + /* + * Leaves and bests don't overlap. + */ + ASSERT((char *)&leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT)] <= + (char *)XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT)); + /* + * Check hash value order, count stale entries. + */ + for (i = stale = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT); i++) { + if (i + 1 < INT_GET(leaf->hdr.count, ARCH_CONVERT)) + ASSERT(INT_GET(leaf->ents[i].hashval, ARCH_CONVERT) <= + INT_GET(leaf->ents[i + 1].hashval, ARCH_CONVERT)); + if (INT_GET(leaf->ents[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR) + stale++; + } + ASSERT(INT_GET(leaf->hdr.stale, ARCH_CONVERT) == stale); +} +#endif /* DEBUG */ + +/* + * Compact out any stale entries in the leaf. + * Log the header and changed leaf entries, if any. + */ +void +xfs_dir2_leaf_compact( + xfs_da_args_t *args, /* operation arguments */ + xfs_dabuf_t *bp) /* leaf buffer */ +{ + int from; /* source leaf index */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + int loglow; /* first leaf entry to log */ + int to; /* target leaf index */ + + leaf = bp->data; + if (INT_GET(leaf->hdr.stale, ARCH_CONVERT) == 0) { +#pragma mips_frequency_hint NEVER + return; + } + /* + * Compress out the stale entries in place. + */ + for (from = to = 0, loglow = -1; from < INT_GET(leaf->hdr.count, ARCH_CONVERT); from++) { + if (INT_GET(leaf->ents[from].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR) + continue; + /* + * Only actually copy the entries that are different. + */ + if (from > to) { + if (loglow == -1) + loglow = to; + leaf->ents[to] = leaf->ents[from]; + } + to++; + } + /* + * Update and log the header, log the leaf entries. + */ + ASSERT(INT_GET(leaf->hdr.stale, ARCH_CONVERT) == from - to); + INT_MOD(leaf->hdr.count, ARCH_CONVERT, -(INT_GET(leaf->hdr.stale, ARCH_CONVERT))); + INT_SET(leaf->hdr.stale, ARCH_CONVERT, 0); + xfs_dir2_leaf_log_header(args->trans, bp); + if (loglow != -1) + xfs_dir2_leaf_log_ents(args->trans, bp, loglow, to - 1); +} + +/* + * Compact the leaf entries, removing stale ones. + * Leave one stale entry behind - the one closest to our + * insertion index - and the caller will shift that one to our insertion + * point later. + * Return new insertion index, where the remaining stale entry is, + * and leaf logging indices. + */ +void +xfs_dir2_leaf_compact_x1( + xfs_dabuf_t *bp, /* leaf buffer */ + int *indexp, /* insertion index */ + int *lowstalep, /* out: stale entry before us */ + int *highstalep, /* out: stale entry after us */ + int *lowlogp, /* out: low log index */ + int *highlogp) /* out: high log index */ +{ + int from; /* source copy index */ + int highstale; /* stale entry at/after index */ + int index; /* insertion index */ + int keepstale; /* source index of kept stale */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + int lowstale; /* stale entry before index */ + int newindex; /* new insertion index */ + int to; /* destination copy index */ + + leaf = bp->data; + ASSERT(INT_GET(leaf->hdr.stale, ARCH_CONVERT) > 1); + index = *indexp; + /* + * Find the first stale entry before our index, if any. + */ + for (lowstale = index - 1; + lowstale >= 0 && + INT_GET(leaf->ents[lowstale].address, ARCH_CONVERT) != XFS_DIR2_NULL_DATAPTR; + lowstale--) + continue; + /* + * Find the first stale entry at or after our index, if any. + * Stop if the answer would be worse than lowstale. + */ + for (highstale = index; + highstale < INT_GET(leaf->hdr.count, ARCH_CONVERT) && + INT_GET(leaf->ents[highstale].address, ARCH_CONVERT) != XFS_DIR2_NULL_DATAPTR && + (lowstale < 0 || index - lowstale > highstale - index); + highstale++) + continue; + /* + * Pick the better of lowstale and highstale. + */ + if (lowstale >= 0 && + (highstale == INT_GET(leaf->hdr.count, ARCH_CONVERT) || + index - lowstale <= highstale - index)) + keepstale = lowstale; + else + keepstale = highstale; + /* + * Copy the entries in place, removing all the stale entries + * except keepstale. + */ + for (from = to = 0; from < INT_GET(leaf->hdr.count, ARCH_CONVERT); from++) { + /* + * Notice the new value of index. + */ + if (index == from) + newindex = to; + if (from != keepstale && + INT_GET(leaf->ents[from].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR) { + if (from == to) + *lowlogp = to; + continue; + } + /* + * Record the new keepstale value for the insertion. + */ + if (from == keepstale) + lowstale = highstale = to; + /* + * Copy only the entries that have moved. + */ + if (from > to) + leaf->ents[to] = leaf->ents[from]; + to++; + } + ASSERT(from > to); + /* + * If the insertion point was past the last entry, + * set the new insertion point accordingly. + */ + if (index == from) + newindex = to; + *indexp = newindex; + /* + * Adjust the leaf header values. + */ + INT_MOD(leaf->hdr.count, ARCH_CONVERT, -(from - to)); + INT_SET(leaf->hdr.stale, ARCH_CONVERT, 1); + /* + * Remember the low/high stale value only in the "right" + * direction. + */ + if (lowstale >= newindex) + lowstale = -1; + else + highstale = INT_GET(leaf->hdr.count, ARCH_CONVERT); + *highlogp = INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1; + *lowstalep = lowstale; + *highstalep = highstale; +} + +/* + * Initialize a new leaf block, leaf1 or leafn magic accepted. + */ +int +xfs_dir2_leaf_init( + xfs_da_args_t *args, /* operation arguments */ + xfs_dir2_db_t bno, /* directory block number */ + xfs_dabuf_t **bpp, /* out: leaf buffer */ + int magic) /* magic number for block */ +{ + xfs_dabuf_t *bp; /* leaf buffer */ + xfs_inode_t *dp; /* incore directory inode */ + int error; /* error return code */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ + xfs_mount_t *mp; /* filesystem mount point */ + xfs_trans_t *tp; /* transaction pointer */ + + dp = args->dp; + ASSERT(dp != NULL); + tp = args->trans; + mp = dp->i_mount; + ASSERT(bno >= XFS_DIR2_LEAF_FIRSTDB(mp) && + bno < XFS_DIR2_FREE_FIRSTDB(mp)); + /* + * Get the buffer for the block. + */ + error = xfs_da_get_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, bno), -1, &bp, + XFS_DATA_FORK); + if (error) { +#pragma mips_frequency_hint NEVER + return error; + } + ASSERT(bp != NULL); + leaf = bp->data; + /* + * Initialize the header. + */ + INT_SET(leaf->hdr.info.magic, ARCH_CONVERT, magic); + INT_ZERO(leaf->hdr.info.forw, ARCH_CONVERT); + INT_ZERO(leaf->hdr.info.back, ARCH_CONVERT); + INT_ZERO(leaf->hdr.count, ARCH_CONVERT); + INT_ZERO(leaf->hdr.stale, ARCH_CONVERT); + xfs_dir2_leaf_log_header(tp, bp); + /* + * If it's a leaf-format directory initialize the tail. + * In this case our caller has the real bests table to copy into + * the block. + */ + if (magic == XFS_DIR2_LEAF1_MAGIC) { + ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); + INT_SET(ltp->bestcount, ARCH_CONVERT, 0); + xfs_dir2_leaf_log_tail(tp, bp); + } + *bpp = bp; + return 0; +} + +/* + * Log the bests entries indicated from a leaf1 block. + */ +void +xfs_dir2_leaf_log_bests( + xfs_trans_t *tp, /* transaction pointer */ + xfs_dabuf_t *bp, /* leaf buffer */ + int first, /* first entry to log */ + int last) /* last entry to log */ +{ + xfs_dir2_data_off_t *firstb; /* pointer to first entry */ + xfs_dir2_data_off_t *lastb; /* pointer to last entry */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ + + leaf = bp->data; + ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC); + ltp = XFS_DIR2_LEAF_TAIL_P(tp->t_mountp, leaf); + firstb = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT) + first; + lastb = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT) + last; + xfs_da_log_buf(tp, bp, (uint)((char *)firstb - (char *)leaf), + (uint)((char *)lastb - (char *)leaf + sizeof(*lastb) - 1)); +} + +/* + * Log the leaf entries indicated from a leaf1 or leafn block. + */ +void +xfs_dir2_leaf_log_ents( + xfs_trans_t *tp, /* transaction pointer */ + xfs_dabuf_t *bp, /* leaf buffer */ + int first, /* first entry to log */ + int last) /* last entry to log */ +{ + xfs_dir2_leaf_entry_t *firstlep; /* pointer to first entry */ + xfs_dir2_leaf_entry_t *lastlep; /* pointer to last entry */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + + leaf = bp->data; + ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC || + INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC); + firstlep = &leaf->ents[first]; + lastlep = &leaf->ents[last]; + xfs_da_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf), + (uint)((char *)lastlep - (char *)leaf + sizeof(*lastlep) - 1)); +} + +/* + * Log the header of the leaf1 or leafn block. + */ +void +xfs_dir2_leaf_log_header( + xfs_trans_t *tp, /* transaction pointer */ + xfs_dabuf_t *bp) /* leaf buffer */ +{ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + + leaf = bp->data; + ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC || + INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC); + xfs_da_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf), + (uint)(sizeof(leaf->hdr) - 1)); +} + +/* + * Log the tail of the leaf1 block. + */ +void +xfs_dir2_leaf_log_tail( + xfs_trans_t *tp, /* transaction pointer */ + xfs_dabuf_t *bp) /* leaf buffer */ +{ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ + xfs_mount_t *mp; /* filesystem mount point */ + + mp = tp->t_mountp; + leaf = bp->data; + ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC); + ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); + xfs_da_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf), + (uint)(mp->m_dirblksize - 1)); +} + +/* + * Look up the entry referred to by args in the leaf format directory. + * Most of the work is done by the xfs_dir2_leaf_lookup_int routine which + * is also used by the node-format code. + */ +int +xfs_dir2_leaf_lookup( + xfs_da_args_t *args) /* operation arguments */ +{ + xfs_dabuf_t *dbp; /* data block buffer */ + xfs_dir2_data_entry_t *dep; /* data block entry */ + xfs_inode_t *dp; /* incore directory inode */ + int error; /* error return code */ + int index; /* found entry index */ + xfs_dabuf_t *lbp; /* leaf buffer */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + xfs_dir2_leaf_entry_t *lep; /* leaf entry */ + xfs_trans_t *tp; /* transaction pointer */ + + xfs_dir2_trace_args("leaf_lookup", args); + /* + * Look up name in the leaf block, returning both buffers and index. + */ + if (error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp)) { +#pragma mips_frequency_hint NEVER + return error; + } + tp = args->trans; + dp = args->dp; + xfs_dir2_leaf_check(dp, lbp); + leaf = lbp->data; + /* + * Get to the leaf entry and contained data entry address. + */ + lep = &leaf->ents[index]; + /* + * Point to the data entry. + */ + dep = (xfs_dir2_data_entry_t *) + ((char *)dbp->data + + XFS_DIR2_DATAPTR_TO_OFF(dp->i_mount, INT_GET(lep->address, ARCH_CONVERT))); + /* + * Return the found inode number. + */ + args->inumber = INT_GET(dep->inumber, ARCH_CONVERT); + xfs_da_brelse(tp, dbp); + xfs_da_brelse(tp, lbp); + return XFS_ERROR(EEXIST); +} + +/* + * Look up name/hash in the leaf block. + * Fill in indexp with the found index, and dbpp with the data buffer. + * If not found dbpp will be NULL, and ENOENT comes back. + * lbpp will always be filled in with the leaf buffer unless there's an error. + */ +STATIC int /* error */ +xfs_dir2_leaf_lookup_int( + xfs_da_args_t *args, /* operation arguments */ + xfs_dabuf_t **lbpp, /* out: leaf buffer */ + int *indexp, /* out: index in leaf block */ + xfs_dabuf_t **dbpp) /* out: data buffer */ +{ + xfs_dir2_db_t curdb; /* current data block number */ + xfs_dabuf_t *dbp; /* data buffer */ + xfs_dir2_data_entry_t *dep; /* data entry */ + xfs_inode_t *dp; /* incore directory inode */ + int error; /* error return code */ + int index; /* index in leaf block */ + xfs_dabuf_t *lbp; /* leaf buffer */ + xfs_dir2_leaf_entry_t *lep; /* leaf entry */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + xfs_mount_t *mp; /* filesystem mount point */ + xfs_dir2_db_t newdb; /* new data block number */ + xfs_trans_t *tp; /* transaction pointer */ + + dp = args->dp; + tp = args->trans; + mp = dp->i_mount; + /* + * Read the leaf block into the buffer. + */ + if (error = + xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp, + XFS_DATA_FORK)) { +#pragma mips_frequency_hint NEVER + return error; + } + *lbpp = lbp; + leaf = lbp->data; + xfs_dir2_leaf_check(dp, lbp); + /* + * Look for the first leaf entry with our hash value. + */ + index = xfs_dir2_leaf_search_hash(args, lbp); + /* + * Loop over all the entries with the right hash value + * looking to match the name. + */ + for (lep = &leaf->ents[index], dbp = NULL, curdb = -1; + index < INT_GET(leaf->hdr.count, ARCH_CONVERT) && INT_GET(lep->hashval, ARCH_CONVERT) == args->hashval; + lep++, index++) { + /* + * Skip over stale leaf entries. + */ + if (INT_GET(lep->address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR) + continue; + /* + * Get the new data block number. + */ + newdb = XFS_DIR2_DATAPTR_TO_DB(mp, INT_GET(lep->address, ARCH_CONVERT)); + /* + * If it's not the same as the old data block number, + * need to pitch the old one and read the new one. + */ + if (newdb != curdb) { + if (dbp) + xfs_da_brelse(tp, dbp); + if (error = + xfs_da_read_buf(tp, dp, + XFS_DIR2_DB_TO_DA(mp, newdb), -1, &dbp, + XFS_DATA_FORK)) { +#pragma mips_frequency_hint NEVER + xfs_da_brelse(tp, lbp); + return error; + } + xfs_dir2_data_check(dp, dbp); + curdb = newdb; + } + /* + * Point to the data entry. + */ + dep = (xfs_dir2_data_entry_t *) + ((char *)dbp->data + + XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(lep->address, ARCH_CONVERT))); + /* + * If it matches then return it. + */ + if (dep->namelen == args->namelen && + dep->name[0] == args->name[0] && + bcmp(dep->name, args->name, args->namelen) == 0) { + *dbpp = dbp; + *indexp = index; + return 0; + } + } + /* + * No match found, return ENOENT. + */ + ASSERT(args->oknoent); + if (dbp) + xfs_da_brelse(tp, dbp); + xfs_da_brelse(tp, lbp); + return XFS_ERROR(ENOENT); +} + +/* + * Remove an entry from a leaf format directory. + */ +int /* error */ +xfs_dir2_leaf_removename( + xfs_da_args_t *args) /* operation arguments */ +{ + xfs_dir2_data_off_t *bestsp; /* leaf block best freespace */ + xfs_dir2_data_t *data; /* data block structure */ + xfs_dir2_db_t db; /* data block number */ + xfs_dabuf_t *dbp; /* data block buffer */ + xfs_dir2_data_entry_t *dep; /* data entry structure */ + xfs_inode_t *dp; /* incore directory inode */ + int error; /* error return code */ + xfs_dir2_db_t i; /* temporary data block # */ + int index; /* index into leaf entries */ + xfs_dabuf_t *lbp; /* leaf buffer */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + xfs_dir2_leaf_entry_t *lep; /* leaf entry */ + xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ + xfs_mount_t *mp; /* filesystem mount point */ + int needlog; /* need to log data header */ + int needscan; /* need to rescan data frees */ + xfs_dir2_data_off_t oldbest; /* old value of best free */ + xfs_trans_t *tp; /* transaction pointer */ + + xfs_dir2_trace_args("leaf_removename", args); + /* + * Lookup the leaf entry, get the leaf and data blocks read in. + */ + if (error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp)) { +#pragma mips_frequency_hint NEVER + return error; + } + dp = args->dp; + tp = args->trans; + mp = dp->i_mount; + leaf = lbp->data; + data = dbp->data; + xfs_dir2_data_check(dp, dbp); + /* + * Point to the leaf entry, use that to point to the data entry. + */ + lep = &leaf->ents[index]; + db = XFS_DIR2_DATAPTR_TO_DB(mp, INT_GET(lep->address, ARCH_CONVERT)); + dep = (xfs_dir2_data_entry_t *) + ((char *)data + XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(lep->address, ARCH_CONVERT))); + needscan = needlog = 0; + oldbest = INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT); + ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); + bestsp = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT); + ASSERT(INT_GET(bestsp[db], ARCH_CONVERT) == oldbest); + /* + * Mark the former data entry unused. + */ + xfs_dir2_data_make_free(tp, dbp, + (xfs_dir2_data_aoff_t)((char *)dep - (char *)data), + XFS_DIR2_DATA_ENTSIZE(dep->namelen), &needlog, &needscan); + /* + * We just mark the leaf entry stale by putting a null in it. + */ + INT_MOD(leaf->hdr.stale, ARCH_CONVERT, +1); + xfs_dir2_leaf_log_header(tp, lbp); + INT_SET(lep->address, ARCH_CONVERT, XFS_DIR2_NULL_DATAPTR); + xfs_dir2_leaf_log_ents(tp, lbp, index, index); + /* + * Scan the freespace in the data block again if necessary, + * log the data block header if necessary. + */ + if (needscan) + xfs_dir2_data_freescan(mp, data, &needlog, NULL); + if (needlog) + xfs_dir2_data_log_header(tp, dbp); + /* + * If the longest freespace in the data block has changed, + * put the new value in the bests table and log that. + */ + if (INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT) != oldbest) { + INT_COPY(bestsp[db], data->hdr.bestfree[0].length, ARCH_CONVERT); + xfs_dir2_leaf_log_bests(tp, lbp, db, db); + } + xfs_dir2_data_check(dp, dbp); + /* + * If the data block is now empty then get rid of the data block. + */ + if (INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT) == + mp->m_dirblksize - (uint)sizeof(data->hdr)) { +#pragma mips_frequency_hint NEVER + ASSERT(db != mp->m_dirdatablk); + if (error = xfs_dir2_shrink_inode(args, db, dbp)) { + /* + * Nope, can't get rid of it because it caused + * allocation of a bmap btree block to do so. + * Just go on, returning success, leaving the + * empty block in place. + */ + if (error == ENOSPC && args->total == 0) { + xfs_da_buf_done(dbp); + error = 0; + } + xfs_dir2_leaf_check(dp, lbp); + xfs_da_buf_done(lbp); + return error; + } + dbp = NULL; + /* + * If this is the last data block then compact the + * bests table by getting rid of entries. + */ + if (db == INT_GET(ltp->bestcount, ARCH_CONVERT) - 1) { + /* + * Look for the last active entry (i). + */ + for (i = db - 1; i > 0; i--) { + if (INT_GET(bestsp[i], ARCH_CONVERT) != NULLDATAOFF) + break; + } + /* + * Copy the table down so inactive entries at the + * end are removed. + */ + ovbcopy(bestsp, &bestsp[db - i], + (INT_GET(ltp->bestcount, ARCH_CONVERT) - (db - i)) * sizeof(*bestsp)); + INT_MOD(ltp->bestcount, ARCH_CONVERT, -(db - i)); + xfs_dir2_leaf_log_tail(tp, lbp); + xfs_dir2_leaf_log_bests(tp, lbp, 0, INT_GET(ltp->bestcount, ARCH_CONVERT) - 1); + } else + INT_SET(bestsp[db], ARCH_CONVERT, NULLDATAOFF); + } + /* + * If the data block was not the first one, drop it. + */ + else if (db != mp->m_dirdatablk && dbp != NULL) { + xfs_da_buf_done(dbp); + dbp = NULL; + } + xfs_dir2_leaf_check(dp, lbp); + /* + * See if we can convert to block form. + */ + return xfs_dir2_leaf_to_block(args, lbp, dbp); +} + +/* + * Replace the inode number in a leaf format directory entry. + */ +int /* error */ +xfs_dir2_leaf_replace( + xfs_da_args_t *args) /* operation arguments */ +{ + xfs_dabuf_t *dbp; /* data block buffer */ + xfs_dir2_data_entry_t *dep; /* data block entry */ + xfs_inode_t *dp; /* incore directory inode */ + int error; /* error return code */ + int index; /* index of leaf entry */ + xfs_dabuf_t *lbp; /* leaf buffer */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + xfs_dir2_leaf_entry_t *lep; /* leaf entry */ + xfs_trans_t *tp; /* transaction pointer */ + + xfs_dir2_trace_args("leaf_replace", args); + /* + * Look up the entry. + */ + if (error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp)) { +#pragma mips_frequency_hint NEVER + return error; + } + dp = args->dp; + leaf = lbp->data; + /* + * Point to the leaf entry, get data address from it. + */ + lep = &leaf->ents[index]; + /* + * Point to the data entry. + */ + dep = (xfs_dir2_data_entry_t *) + ((char *)dbp->data + + XFS_DIR2_DATAPTR_TO_OFF(dp->i_mount, INT_GET(lep->address, ARCH_CONVERT))); + ASSERT(args->inumber != INT_GET(dep->inumber, ARCH_CONVERT)); + /* + * Put the new inode number in, log it. + */ + INT_SET(dep->inumber, ARCH_CONVERT, args->inumber); + tp = args->trans; + xfs_dir2_data_log_entry(tp, dbp, dep); + xfs_da_buf_done(dbp); + xfs_dir2_leaf_check(dp, lbp); + xfs_da_brelse(tp, lbp); + return 0; +} + +/* + * Return index in the leaf block (lbp) which is either the first + * one with this hash value, or if there are none, the insert point + * for that hash value. + */ +int /* index value */ +xfs_dir2_leaf_search_hash( + xfs_da_args_t *args, /* operation arguments */ + xfs_dabuf_t *lbp) /* leaf buffer */ +{ + xfs_dahash_t hash; /* hash from this entry */ + xfs_dahash_t hashwant; /* hash value looking for */ + int high; /* high leaf index */ + int low; /* low leaf index */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + xfs_dir2_leaf_entry_t *lep; /* leaf entry */ + int mid; /* current leaf index */ + + leaf = lbp->data; +#ifndef __KERNEL__ + if (INT_GET(leaf->hdr.count, ARCH_CONVERT) == 0) + return 0; +#endif + /* + * Note, the table cannot be empty, so we have to go through the loop. + * Binary search the leaf entries looking for our hash value. + */ + for (lep = leaf->ents, low = 0, high = INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1, + hashwant = args->hashval; + low <= high; ) { + mid = (low + high) >> 1; + if ((hash = INT_GET(lep[mid].hashval, ARCH_CONVERT)) == hashwant) + break; + if (hash < hashwant) + low = mid + 1; + else + high = mid - 1; + } + /* + * Found one, back up through all the equal hash values. + */ + if (hash == hashwant) { + while (mid > 0 && INT_GET(lep[mid - 1].hashval, ARCH_CONVERT) == hashwant) { +#pragma mips_frequency_hint NEVER + mid--; + } + } + /* + * Need to point to an entry higher than ours. + */ + else if (hash < hashwant) + mid++; + return mid; +} + +/* + * Trim off a trailing data block. We know it's empty since the leaf + * freespace table says so. + */ +int /* error */ +xfs_dir2_leaf_trim_data( + xfs_da_args_t *args, /* operation arguments */ + xfs_dabuf_t *lbp, /* leaf buffer */ + xfs_dir2_db_t db) /* data block number */ +{ + xfs_dir2_data_off_t *bestsp; /* leaf bests table */ +#ifdef DEBUG + xfs_dir2_data_t *data; /* data block structure */ +#endif + xfs_dabuf_t *dbp; /* data block buffer */ + xfs_inode_t *dp; /* incore directory inode */ + int error; /* error return value */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ + xfs_mount_t *mp; /* filesystem mount point */ + xfs_trans_t *tp; /* transaction pointer */ + + dp = args->dp; + mp = dp->i_mount; + tp = args->trans; + /* + * Read the offending data block. We need its buffer. + */ + if (error = xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, db), -1, &dbp, + XFS_DATA_FORK)) { +#pragma mips_frequency_hint NEVER + return error; + } +#ifdef DEBUG + data = dbp->data; + ASSERT(INT_GET(data->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC); +#endif + /* this seems to be an error + * data is only valid if DEBUG is defined? + * RMC 09/08/1999 + */ + + leaf = lbp->data; + ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); + ASSERT(INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT) == + mp->m_dirblksize - (uint)sizeof(data->hdr)); + ASSERT(db == INT_GET(ltp->bestcount, ARCH_CONVERT) - 1); + /* + * Get rid of the data block. + */ + if (error = xfs_dir2_shrink_inode(args, db, dbp)) { +#pragma mips_frequency_hint NEVER + ASSERT(error != ENOSPC); + xfs_da_brelse(tp, dbp); + return error; + } + /* + * Eliminate the last bests entry from the table. + */ + bestsp = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT); + INT_MOD(ltp->bestcount, ARCH_CONVERT, -1); + ovbcopy(&bestsp[0], &bestsp[1], INT_GET(ltp->bestcount, ARCH_CONVERT) * sizeof(*bestsp)); + xfs_dir2_leaf_log_tail(tp, lbp); + xfs_dir2_leaf_log_bests(tp, lbp, 0, INT_GET(ltp->bestcount, ARCH_CONVERT) - 1); + return 0; +} + +/* + * Convert node form directory to leaf form directory. + * The root of the node form dir needs to already be a LEAFN block. + * Just return if we can't do anything. + */ +int /* error */ +xfs_dir2_node_to_leaf( + xfs_da_state_t *state) /* directory operation state */ +{ + xfs_da_args_t *args; /* operation arguments */ + xfs_inode_t *dp; /* incore directory inode */ + int error; /* error return code */ + xfs_dabuf_t *fbp; /* buffer for freespace block */ + xfs_fileoff_t fo; /* freespace file offset */ + xfs_dir2_free_t *free; /* freespace structure */ + xfs_dabuf_t *lbp; /* buffer for leaf block */ + xfs_dir2_leaf_tail_t *ltp; /* tail of leaf structure */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + xfs_mount_t *mp; /* filesystem mount point */ + int rval; /* successful free trim? */ + xfs_trans_t *tp; /* transaction pointer */ + + /* + * There's more than a leaf level in the btree, so there must + * be multiple leafn blocks. Give up. + */ + if (state->path.active > 1) + return 0; + args = state->args; + xfs_dir2_trace_args("node_to_leaf", args); + mp = state->mp; + dp = args->dp; + tp = args->trans; + /* + * Get the last offset in the file. + */ + if (error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK)) { +#pragma mips_frequency_hint NEVER + return error; + } + fo -= mp->m_dirblkfsbs; + /* + * If there are freespace blocks other than the first one, + * take this opportunity to remove trailing empty freespace blocks + * that may have been left behind during no-space-reservation + * operations. + */ + while (fo > mp->m_dirfreeblk) { + if (error = xfs_dir2_node_trim_free(args, fo, &rval)) { +#pragma mips_frequency_hint NEVER + return error; + } + if (rval) + fo -= mp->m_dirblkfsbs; + else + return 0; + } + /* + * Now find the block just before the freespace block. + */ + if (error = xfs_bmap_last_before(tp, dp, &fo, XFS_DATA_FORK)) { +#pragma mips_frequency_hint NEVER + return error; + } + /* + * If it's not the single leaf block, give up. + */ + if (XFS_FSB_TO_B(mp, fo) > XFS_DIR2_LEAF_OFFSET + mp->m_dirblksize) + return 0; + lbp = state->path.blk[0].bp; + leaf = lbp->data; + ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC); + /* + * Read the freespace block. + */ + if (error = xfs_da_read_buf(tp, dp, mp->m_dirfreeblk, -1, &fbp, + XFS_DATA_FORK)) { +#pragma mips_frequency_hint NEVER + return error; + } + free = fbp->data; + ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC); + ASSERT(INT_ISZERO(free->hdr.firstdb, ARCH_CONVERT)); + /* + * Now see if the leafn and free data will fit in a leaf1. + * If not, release the buffer and give up. + */ + if ((uint)sizeof(leaf->hdr) + + (INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT)) * (uint)sizeof(leaf->ents[0]) + + INT_GET(free->hdr.nvalid, ARCH_CONVERT) * (uint)sizeof(leaf->bests[0]) + + (uint)sizeof(leaf->tail) > + mp->m_dirblksize) { + xfs_da_brelse(tp, fbp); + return 0; + } + /* + * If the leaf has any stale entries in it, compress them out. + * The compact routine will log the header. + */ + if (INT_GET(leaf->hdr.stale, ARCH_CONVERT)) + xfs_dir2_leaf_compact(args, lbp); + else + xfs_dir2_leaf_log_header(tp, lbp); + INT_SET(leaf->hdr.info.magic, ARCH_CONVERT, XFS_DIR2_LEAF1_MAGIC); + /* + * Set up the leaf tail from the freespace block. + */ + ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); + INT_COPY(ltp->bestcount, free->hdr.nvalid, ARCH_CONVERT); + /* + * Set up the leaf bests table. + */ + bcopy(free->bests, XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT), + INT_GET(ltp->bestcount, ARCH_CONVERT) * sizeof(leaf->bests[0])); + xfs_dir2_leaf_log_bests(tp, lbp, 0, INT_GET(ltp->bestcount, ARCH_CONVERT) - 1); + xfs_dir2_leaf_log_tail(tp, lbp); + xfs_dir2_leaf_check(dp, lbp); + /* + * Get rid of the freespace block. + */ + error = xfs_dir2_shrink_inode(args, XFS_DIR2_FREE_FIRSTDB(mp), fbp); + if (error) { +#pragma mips_frequency_hint NEVER + /* + * This can't fail here because it can only happen when + * punching out the middle of an extent, and this is an + * isolated block. + */ + ASSERT(error != ENOSPC); + return error; + } + fbp = NULL; + /* + * Now see if we can convert the single-leaf directory + * down to a block form directory. + * This routine always kills the dabuf for the leaf, so + * eliminate it from the path. + */ + error = xfs_dir2_leaf_to_block(args, lbp, NULL); + state->path.blk[0].bp = NULL; + return error; +} diff --git a/libxfs/xfs_dir2_node.c b/libxfs/xfs_dir2_node.c new file mode 100644 index 000000000..b6050ba6a --- /dev/null +++ b/libxfs/xfs_dir2_node.c @@ -0,0 +1,1988 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +/* + * xfs_dir2_node.c + * XFS directory implementation, version 2, node form files + * See data structures in xfs_dir2_node.h and xfs_da_btree.h. + */ + +#include + +/* + * Log entries from a freespace block. + */ +void +xfs_dir2_free_log_bests( + xfs_trans_t *tp, /* transaction pointer */ + xfs_dabuf_t *bp, /* freespace buffer */ + int first, /* first entry to log */ + int last) /* last entry to log */ +{ + xfs_dir2_free_t *free; /* freespace structure */ + + free = bp->data; + ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC); + xfs_da_log_buf(tp, bp, + (uint)((char *)&free->bests[first] - (char *)free), + (uint)((char *)&free->bests[last] - (char *)free + + sizeof(free->bests[0]) - 1)); +} + +/* + * Log header from a freespace block. + */ +static void +xfs_dir2_free_log_header( + xfs_trans_t *tp, /* transaction pointer */ + xfs_dabuf_t *bp) /* freespace buffer */ +{ + xfs_dir2_free_t *free; /* freespace structure */ + + free = bp->data; + ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC); + xfs_da_log_buf(tp, bp, (uint)((char *)&free->hdr - (char *)free), + (uint)(sizeof(xfs_dir2_free_hdr_t) - 1)); +} + +/* + * Convert a leaf-format directory to a node-format directory. + * We need to change the magic number of the leaf block, and copy + * the freespace table out of the leaf block into its own block. + */ +int /* error */ +xfs_dir2_leaf_to_node( + xfs_da_args_t *args, /* operation arguments */ + xfs_dabuf_t *lbp) /* leaf buffer */ +{ + xfs_inode_t *dp; /* incore directory inode */ + int error; /* error return value */ + xfs_dabuf_t *fbp; /* freespace buffer */ + xfs_dir2_db_t fdb; /* freespace block number */ + xfs_dir2_free_t *free; /* freespace structure */ + xfs_dir2_data_off_t *from; /* pointer to freespace entry */ + int i; /* leaf freespace index */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ + xfs_mount_t *mp; /* filesystem mount point */ + int n; /* count of live freespc ents */ + xfs_dir2_data_off_t off; /* freespace entry value */ + xfs_dir2_data_off_t *to; /* pointer to freespace entry */ + xfs_trans_t *tp; /* transaction pointer */ + + xfs_dir2_trace_args_b("leaf_to_node", args, lbp); + dp = args->dp; + mp = dp->i_mount; + tp = args->trans; + /* + * Add a freespace block to the directory. + */ + if (error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE, &fdb)) { +#pragma mips_frequency_hint NEVER + return error; + } + ASSERT(fdb == XFS_DIR2_FREE_FIRSTDB(mp)); + /* + * Get the buffer for the new freespace block. + */ + if (error = xfs_da_get_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, fdb), -1, &fbp, + XFS_DATA_FORK)) { +#pragma mips_frequency_hint NEVER + return error; + } + ASSERT(fbp != NULL); + free = fbp->data; + leaf = lbp->data; + ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); + /* + * Initialize the freespace block header. + */ + INT_SET(free->hdr.magic, ARCH_CONVERT, XFS_DIR2_FREE_MAGIC); + INT_ZERO(free->hdr.firstdb, ARCH_CONVERT); + ASSERT(INT_GET(ltp->bestcount, ARCH_CONVERT) <= (uint)dp->i_d.di_size / mp->m_dirblksize); + INT_COPY(free->hdr.nvalid, ltp->bestcount, ARCH_CONVERT); + /* + * Copy freespace entries from the leaf block to the new block. + * Count active entries. + */ + for (i = n = 0, from = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT), to = free->bests; + i < INT_GET(ltp->bestcount, ARCH_CONVERT); i++, from++, to++) { + if ((off = INT_GET(*from, ARCH_CONVERT)) != NULLDATAOFF) + n++; + INT_SET(*to, ARCH_CONVERT, off); + } + INT_SET(free->hdr.nused, ARCH_CONVERT, n); + INT_SET(leaf->hdr.info.magic, ARCH_CONVERT, XFS_DIR2_LEAFN_MAGIC); + /* + * Log everything. + */ + xfs_dir2_leaf_log_header(tp, lbp); + xfs_dir2_free_log_header(tp, fbp); + xfs_dir2_free_log_bests(tp, fbp, 0, INT_GET(free->hdr.nvalid, ARCH_CONVERT) - 1); + xfs_da_buf_done(fbp); + xfs_dir2_leafn_check(dp, lbp); + return 0; +} + +/* + * Add a leaf entry to a leaf block in a node-form directory. + * The other work necessary is done from the caller. + */ +static int /* error */ +xfs_dir2_leafn_add( + xfs_dabuf_t *bp, /* leaf buffer */ + xfs_da_args_t *args, /* operation arguments */ + int index) /* insertion pt for new entry */ +{ + int compact; /* compacting stale leaves */ + xfs_inode_t *dp; /* incore directory inode */ + int highstale; /* next stale entry */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + xfs_dir2_leaf_entry_t *lep; /* leaf entry */ + int lfloghigh; /* high leaf entry logging */ + int lfloglow; /* low leaf entry logging */ + int lowstale; /* previous stale entry */ + xfs_mount_t *mp; /* filesystem mount point */ + xfs_trans_t *tp; /* transaction pointer */ + + xfs_dir2_trace_args_sb("leafn_add", args, index, bp); + dp = args->dp; + mp = dp->i_mount; + tp = args->trans; + leaf = bp->data; + /* + * If there are already the maximum number of leaf entries in + * the block, if there are no stale entries it won't fit. + * Caller will do a split. If there are stale entries we'll do + * a compact. + */ + if (INT_GET(leaf->hdr.count, ARCH_CONVERT) == XFS_DIR2_MAX_LEAF_ENTS(mp)) { +#pragma mips_frequency_hint NEVER + if (INT_ISZERO(leaf->hdr.stale, ARCH_CONVERT)) + return XFS_ERROR(ENOSPC); + compact = INT_GET(leaf->hdr.stale, ARCH_CONVERT) > 1; + } else + compact = 0; + ASSERT(index == 0 || INT_GET(leaf->ents[index - 1].hashval, ARCH_CONVERT) <= args->hashval); + ASSERT(index == INT_GET(leaf->hdr.count, ARCH_CONVERT) || + INT_GET(leaf->ents[index].hashval, ARCH_CONVERT) >= args->hashval); + + if (args->justcheck) + return 0; + + /* + * Compact out all but one stale leaf entry. Leaves behind + * the entry closest to index. + */ + if (compact) { +#pragma mips_frequency_hint NEVER + xfs_dir2_leaf_compact_x1(bp, &index, &lowstale, &highstale, + &lfloglow, &lfloghigh); + } + /* + * Set impossible logging indices for this case. + */ + else if (!INT_ISZERO(leaf->hdr.stale, ARCH_CONVERT)) { + lfloglow = INT_GET(leaf->hdr.count, ARCH_CONVERT); + lfloghigh = -1; + } + /* + * No stale entries, just insert a space for the new entry. + */ + if (INT_ISZERO(leaf->hdr.stale, ARCH_CONVERT)) { + lep = &leaf->ents[index]; + if (index < INT_GET(leaf->hdr.count, ARCH_CONVERT)) + ovbcopy(lep, lep + 1, + (INT_GET(leaf->hdr.count, ARCH_CONVERT) - index) * sizeof(*lep)); + lfloglow = index; + lfloghigh = INT_GET(leaf->hdr.count, ARCH_CONVERT); + INT_MOD(leaf->hdr.count, ARCH_CONVERT, +1); + } + /* + * There are stale entries. We'll use one for the new entry. + */ + else { + /* + * If we didn't do a compact then we need to figure out + * which stale entry will be used. + */ + if (compact == 0) { + /* + * Find first stale entry before our insertion point. + */ + for (lowstale = index - 1; + lowstale >= 0 && + INT_GET(leaf->ents[lowstale].address, ARCH_CONVERT) != + XFS_DIR2_NULL_DATAPTR; + lowstale--) + continue; + /* + * Find next stale entry after insertion point. + * Stop looking if the answer would be worse than + * lowstale already found. + */ + for (highstale = index; + highstale < INT_GET(leaf->hdr.count, ARCH_CONVERT) && + INT_GET(leaf->ents[highstale].address, ARCH_CONVERT) != + XFS_DIR2_NULL_DATAPTR && + (lowstale < 0 || + index - lowstale - 1 >= highstale - index); + highstale++) + continue; + } + /* + * Using the low stale entry. + * Shift entries up toward the stale slot. + */ + if (lowstale >= 0 && + (highstale == INT_GET(leaf->hdr.count, ARCH_CONVERT) || + index - lowstale - 1 < highstale - index)) { + ASSERT(INT_GET(leaf->ents[lowstale].address, ARCH_CONVERT) == + XFS_DIR2_NULL_DATAPTR); + ASSERT(index - lowstale - 1 >= 0); + if (index - lowstale - 1 > 0) + ovbcopy(&leaf->ents[lowstale + 1], + &leaf->ents[lowstale], + (index - lowstale - 1) * sizeof(*lep)); + lep = &leaf->ents[index - 1]; + lfloglow = MIN(lowstale, lfloglow); + lfloghigh = MAX(index - 1, lfloghigh); + } + /* + * Using the high stale entry. + * Shift entries down toward the stale slot. + */ + else { + ASSERT(INT_GET(leaf->ents[highstale].address, ARCH_CONVERT) == + XFS_DIR2_NULL_DATAPTR); + ASSERT(highstale - index >= 0); + if (highstale - index > 0) + ovbcopy(&leaf->ents[index], + &leaf->ents[index + 1], + (highstale - index) * sizeof(*lep)); + lep = &leaf->ents[index]; + lfloglow = MIN(index, lfloglow); + lfloghigh = MAX(highstale, lfloghigh); + } + INT_MOD(leaf->hdr.stale, ARCH_CONVERT, -1); + } + /* + * Insert the new entry, log everything. + */ + INT_SET(lep->hashval, ARCH_CONVERT, args->hashval); + INT_SET(lep->address, ARCH_CONVERT, XFS_DIR2_DB_OFF_TO_DATAPTR(mp, args->blkno, args->index)); + xfs_dir2_leaf_log_header(tp, bp); + xfs_dir2_leaf_log_ents(tp, bp, lfloglow, lfloghigh); + xfs_dir2_leafn_check(dp, bp); + return 0; +} + +#ifdef DEBUG +/* + * Check internal consistency of a leafn block. + */ +void +xfs_dir2_leafn_check( + xfs_inode_t *dp, /* incore directory inode */ + xfs_dabuf_t *bp) /* leaf buffer */ +{ + int i; /* leaf index */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + xfs_mount_t *mp; /* filesystem mount point */ + int stale; /* count of stale leaves */ + + leaf = bp->data; + mp = dp->i_mount; + ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC); + ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT) <= XFS_DIR2_MAX_LEAF_ENTS(mp)); + for (i = stale = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT); i++) { + if (i + 1 < INT_GET(leaf->hdr.count, ARCH_CONVERT)) { + ASSERT(INT_GET(leaf->ents[i].hashval, ARCH_CONVERT) <= + INT_GET(leaf->ents[i + 1].hashval, ARCH_CONVERT)); + } + if (INT_GET(leaf->ents[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR) + stale++; + } + ASSERT(INT_GET(leaf->hdr.stale, ARCH_CONVERT) == stale); +} +#endif /* DEBUG */ + +/* + * Return the last hash value in the leaf. + * Stale entries are ok. + */ +xfs_dahash_t /* hash value */ +xfs_dir2_leafn_lasthash( + xfs_dabuf_t *bp, /* leaf buffer */ + int *count) /* count of entries in leaf */ +{ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + + leaf = bp->data; + ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC); + if (count) + *count = INT_GET(leaf->hdr.count, ARCH_CONVERT); + if (INT_ISZERO(leaf->hdr.count, ARCH_CONVERT)) + return 0; + return INT_GET(leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT); +} + +/* + * Look up a leaf entry in a node-format leaf block. + * If this is an addname then the extrablk in state is a freespace block, + * otherwise it's a data block. + */ +int +xfs_dir2_leafn_lookup_int( + xfs_dabuf_t *bp, /* leaf buffer */ + xfs_da_args_t *args, /* operation arguments */ + int *indexp, /* out: leaf entry index */ + xfs_da_state_t *state) /* state to fill in */ +{ + xfs_dabuf_t *curbp; /* current data/free buffer */ + xfs_dir2_db_t curdb; /* current data block number */ + xfs_dir2_db_t curfdb; /* current free block number */ + xfs_dir2_data_entry_t *dep; /* data block entry */ + xfs_inode_t *dp; /* incore directory inode */ + int error; /* error return value */ + int fi; /* free entry index */ + xfs_dir2_free_t *free; /* free block structure */ + int index; /* leaf entry index */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + int length; /* length of new data entry */ + xfs_dir2_leaf_entry_t *lep; /* leaf entry */ + xfs_mount_t *mp; /* filesystem mount point */ + xfs_dir2_db_t newdb; /* new data block number */ + xfs_dir2_db_t newfdb; /* new free block number */ + xfs_trans_t *tp; /* transaction pointer */ + + dp = args->dp; + tp = args->trans; + mp = dp->i_mount; + leaf = bp->data; + ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC); +#ifdef __KERNEL__ + ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT) > 0); +#endif + xfs_dir2_leafn_check(dp, bp); + /* + * Look up the hash value in the leaf entries. + */ + index = xfs_dir2_leaf_search_hash(args, bp); + /* + * Do we have a buffer coming in? + */ + if (state->extravalid) + curbp = state->extrablk.bp; + else + curbp = NULL; + /* + * For addname, it's a free block buffer, get the block number. + */ + if (args->addname) { + curfdb = curbp ? state->extrablk.blkno : -1; + curdb = -1; + length = XFS_DIR2_DATA_ENTSIZE(args->namelen); + if (free = (curbp ? curbp->data : NULL)) + ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC); + } + /* + * For others, it's a data block buffer, get the block number. + */ + else { + curfdb = -1; + curdb = curbp ? state->extrablk.blkno : -1; + } + /* + * Loop over leaf entries with the right hash value. + */ + for (lep = &leaf->ents[index]; + index < INT_GET(leaf->hdr.count, ARCH_CONVERT) && INT_GET(lep->hashval, ARCH_CONVERT) == args->hashval; + lep++, index++) { + /* + * Skip stale leaf entries. + */ + if (INT_GET(lep->address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR) + continue; + /* + * Pull the data block number from the entry. + */ + newdb = XFS_DIR2_DATAPTR_TO_DB(mp, INT_GET(lep->address, ARCH_CONVERT)); + /* + * For addname, we're looking for a place to put the new entry. + * We want to use a data block with an entry of equal + * hash value to ours if there is one with room. + */ + if (args->addname) { + /* + * If this block isn't the data block we already have + * in hand, take a look at it. + */ + if (newdb != curdb) { + curdb = newdb; + /* + * Convert the data block to the free block + * holding its freespace information. + */ + newfdb = XFS_DIR2_DB_TO_FDB(mp, newdb); + /* + * If it's not the one we have in hand, + * read it in. + */ + if (newfdb != curfdb) { + /* + * If we had one before, drop it. + */ + if (curbp) + xfs_da_brelse(tp, curbp); + /* + * Read the free block. + */ + if (error = xfs_da_read_buf(tp, dp, + XFS_DIR2_DB_TO_DA(mp, + newfdb), + -1, &curbp, + XFS_DATA_FORK)) { +#pragma mips_frequency_hint NEVER + return error; + } + curfdb = newfdb; + free = curbp->data; + ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == + XFS_DIR2_FREE_MAGIC); + ASSERT((INT_GET(free->hdr.firstdb, ARCH_CONVERT) % + XFS_DIR2_MAX_FREE_BESTS(mp)) == + 0); + ASSERT(INT_GET(free->hdr.firstdb, ARCH_CONVERT) <= curdb); + ASSERT(curdb < + INT_GET(free->hdr.firstdb, ARCH_CONVERT) + + INT_GET(free->hdr.nvalid, ARCH_CONVERT)); + } + /* + * Get the index for our entry. + */ + fi = XFS_DIR2_DB_TO_FDINDEX(mp, curdb); + /* + * If it has room, return it. + */ + if (INT_GET(free->bests[fi], ARCH_CONVERT) == NULLDATAOFF) { +#pragma mips_frequency_hint NEVER + return XFS_ERROR(EFSCORRUPTED); + } + if (INT_GET(free->bests[fi], ARCH_CONVERT) >= length) { + *indexp = index; + state->extravalid = 1; + state->extrablk.bp = curbp; + state->extrablk.blkno = curfdb; + state->extrablk.index = fi; + state->extrablk.magic = + XFS_DIR2_FREE_MAGIC; + ASSERT(args->oknoent); + return XFS_ERROR(ENOENT); + } + } + } + /* + * Not adding a new entry, so we really want to find + * the name given to us. + */ + else { + /* + * If it's a different data block, go get it. + */ + if (newdb != curdb) { + /* + * If we had a block before, drop it. + */ + if (curbp) + xfs_da_brelse(tp, curbp); + /* + * Read the data block. + */ + if (error = + xfs_da_read_buf(tp, dp, + XFS_DIR2_DB_TO_DA(mp, newdb), -1, + &curbp, XFS_DATA_FORK)) { +#pragma mips_frequency_hint NEVER + return error; + } + xfs_dir2_data_check(dp, curbp); + curdb = newdb; + } + /* + * Point to the data entry. + */ + dep = (xfs_dir2_data_entry_t *) + ((char *)curbp->data + + XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(lep->address, ARCH_CONVERT))); + /* + * Compare the entry, return it if it matches. + */ + if (dep->namelen == args->namelen && + dep->name[0] == args->name[0] && + bcmp(dep->name, args->name, args->namelen) == 0) { + args->inumber = INT_GET(dep->inumber, ARCH_CONVERT); + *indexp = index; + state->extravalid = 1; + state->extrablk.bp = curbp; + state->extrablk.blkno = curdb; + state->extrablk.index = + (int)((char *)dep - + (char *)curbp->data); + state->extrablk.magic = XFS_DIR2_DATA_MAGIC; + return XFS_ERROR(EEXIST); + } + } + } + /* + * Didn't find a match. + * If we are holding a buffer, give it back in case our caller + * finds it useful. + */ + if (state->extravalid = (curbp != NULL)) { + state->extrablk.bp = curbp; + state->extrablk.index = -1; + /* + * For addname, giving back a free block. + */ + if (args->addname) { + state->extrablk.blkno = curfdb; + state->extrablk.magic = XFS_DIR2_FREE_MAGIC; + } + /* + * For other callers, giving back a data block. + */ + else { + state->extrablk.blkno = curdb; + state->extrablk.magic = XFS_DIR2_DATA_MAGIC; + } + } + /* + * Return the final index, that will be the insertion point. + */ + *indexp = index; + ASSERT(index == INT_GET(leaf->hdr.count, ARCH_CONVERT) || args->oknoent); + return XFS_ERROR(ENOENT); +} + +/* + * Move count leaf entries from source to destination leaf. + * Log entries and headers. Stale entries are preserved. + */ +static void +xfs_dir2_leafn_moveents( + xfs_da_args_t *args, /* operation arguments */ + xfs_dabuf_t *bp_s, /* source leaf buffer */ + int start_s, /* source leaf index */ + xfs_dabuf_t *bp_d, /* destination leaf buffer */ + int start_d, /* destination leaf index */ + int count) /* count of leaves to copy */ +{ + xfs_dir2_leaf_t *leaf_d; /* destination leaf structure */ + xfs_dir2_leaf_t *leaf_s; /* source leaf structure */ + int stale; /* count stale leaves copied */ + xfs_trans_t *tp; /* transaction pointer */ + + xfs_dir2_trace_args_bibii("leafn_moveents", args, bp_s, start_s, bp_d, + start_d, count); + /* + * Silently return if nothing to do. + */ + if (count == 0) { +#pragma mips_frequency_hint NEVER + return; + } + tp = args->trans; + leaf_s = bp_s->data; + leaf_d = bp_d->data; + /* + * If the destination index is not the end of the current + * destination leaf entries, open up a hole in the destination + * to hold the new entries. + */ + if (start_d < INT_GET(leaf_d->hdr.count, ARCH_CONVERT)) { + ovbcopy(&leaf_d->ents[start_d], &leaf_d->ents[start_d + count], + (INT_GET(leaf_d->hdr.count, ARCH_CONVERT) - start_d) * + sizeof(xfs_dir2_leaf_entry_t)); + xfs_dir2_leaf_log_ents(tp, bp_d, start_d + count, + count + INT_GET(leaf_d->hdr.count, ARCH_CONVERT) - 1); + } + /* + * If the source has stale leaves, count the ones in the copy range + * so we can update the header correctly. + */ + if (!INT_ISZERO(leaf_s->hdr.stale, ARCH_CONVERT)) { + int i; /* temp leaf index */ + + for (i = start_s, stale = 0; i < start_s + count; i++) { + if (INT_GET(leaf_s->ents[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR) + stale++; + } + } else + stale = 0; + /* + * Copy the leaf entries from source to destination. + */ + bcopy(&leaf_s->ents[start_s], &leaf_d->ents[start_d], + count * sizeof(xfs_dir2_leaf_entry_t)); + xfs_dir2_leaf_log_ents(tp, bp_d, start_d, start_d + count - 1); + /* + * If there are source entries after the ones we copied, + * delete the ones we copied by sliding the next ones down. + */ + if (start_s + count < INT_GET(leaf_s->hdr.count, ARCH_CONVERT)) { + ovbcopy(&leaf_s->ents[start_s + count], &leaf_s->ents[start_s], + count * sizeof(xfs_dir2_leaf_entry_t)); + xfs_dir2_leaf_log_ents(tp, bp_s, start_s, start_s + count - 1); + } + /* + * Update the headers and log them. + */ + INT_MOD(leaf_s->hdr.count, ARCH_CONVERT, -(count)); + INT_MOD(leaf_s->hdr.stale, ARCH_CONVERT, -(stale)); + INT_MOD(leaf_d->hdr.count, ARCH_CONVERT, count); + INT_MOD(leaf_d->hdr.stale, ARCH_CONVERT, stale); + xfs_dir2_leaf_log_header(tp, bp_s); + xfs_dir2_leaf_log_header(tp, bp_d); + xfs_dir2_leafn_check(args->dp, bp_s); + xfs_dir2_leafn_check(args->dp, bp_d); +} + +/* + * Determine the sort order of two leaf blocks. + * Returns 1 if both are valid and leaf2 should be before leaf1, else 0. + */ +int /* sort order */ +xfs_dir2_leafn_order( + xfs_dabuf_t *leaf1_bp, /* leaf1 buffer */ + xfs_dabuf_t *leaf2_bp) /* leaf2 buffer */ +{ + xfs_dir2_leaf_t *leaf1; /* leaf1 structure */ + xfs_dir2_leaf_t *leaf2; /* leaf2 structure */ + + leaf1 = leaf1_bp->data; + leaf2 = leaf2_bp->data; + ASSERT(INT_GET(leaf1->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC); + ASSERT(INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC); + if (INT_GET(leaf1->hdr.count, ARCH_CONVERT) > 0 && + INT_GET(leaf2->hdr.count, ARCH_CONVERT) > 0 && + (INT_GET(leaf2->ents[0].hashval, ARCH_CONVERT) < INT_GET(leaf1->ents[0].hashval, ARCH_CONVERT) || + INT_GET(leaf2->ents[INT_GET(leaf2->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT) < + INT_GET(leaf1->ents[INT_GET(leaf1->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT))) + return 1; + return 0; +} + +/* + * Rebalance leaf entries between two leaf blocks. + * This is actually only called when the second block is new, + * though the code deals with the general case. + * A new entry will be inserted in one of the blocks, and that + * entry is taken into account when balancing. + */ +static void +xfs_dir2_leafn_rebalance( + xfs_da_state_t *state, /* btree cursor */ + xfs_da_state_blk_t *blk1, /* first btree block */ + xfs_da_state_blk_t *blk2) /* second btree block */ +{ + xfs_da_args_t *args; /* operation arguments */ + int count; /* count (& direction) leaves */ + int isleft; /* new goes in left leaf */ + xfs_dir2_leaf_t *leaf1; /* first leaf structure */ + xfs_dir2_leaf_t *leaf2; /* second leaf structure */ + int mid; /* midpoint leaf index */ +#ifdef DEBUG + int oldstale; /* old count of stale leaves */ +#endif + int oldsum; /* old total leaf count */ + int swap; /* swapped leaf blocks */ + + args = state->args; + /* + * If the block order is wrong, swap the arguments. + */ + if (swap = xfs_dir2_leafn_order(blk1->bp, blk2->bp)) { +#pragma mips_frequency_hint NEVER + xfs_da_state_blk_t *tmp; /* temp for block swap */ + + tmp = blk1; + blk1 = blk2; + blk2 = tmp; + } + leaf1 = blk1->bp->data; + leaf2 = blk2->bp->data; + oldsum = INT_GET(leaf1->hdr.count, ARCH_CONVERT) + INT_GET(leaf2->hdr.count, ARCH_CONVERT); +#ifdef DEBUG + oldstale = INT_GET(leaf1->hdr.stale, ARCH_CONVERT) + INT_GET(leaf2->hdr.stale, ARCH_CONVERT); +#endif + mid = oldsum >> 1; + /* + * If the old leaf count was odd then the new one will be even, + * so we need to divide the new count evenly. + */ + if (oldsum & 1) { + xfs_dahash_t midhash; /* middle entry hash value */ + + if (mid >= INT_GET(leaf1->hdr.count, ARCH_CONVERT)) + midhash = INT_GET(leaf2->ents[mid - INT_GET(leaf1->hdr.count, ARCH_CONVERT)].hashval, ARCH_CONVERT); + else + midhash = INT_GET(leaf1->ents[mid].hashval, ARCH_CONVERT); + isleft = args->hashval <= midhash; + } + /* + * If the old count is even then the new count is odd, so there's + * no preferred side for the new entry. + * Pick the left one. + */ + else + isleft = 1; + /* + * Calculate moved entry count. Positive means left-to-right, + * negative means right-to-left. Then move the entries. + */ + count = INT_GET(leaf1->hdr.count, ARCH_CONVERT) - mid + (isleft == 0); + if (count > 0) + xfs_dir2_leafn_moveents(args, blk1->bp, + INT_GET(leaf1->hdr.count, ARCH_CONVERT) - count, blk2->bp, 0, count); + else if (count < 0) + xfs_dir2_leafn_moveents(args, blk2->bp, 0, blk1->bp, + INT_GET(leaf1->hdr.count, ARCH_CONVERT), count); + ASSERT(INT_GET(leaf1->hdr.count, ARCH_CONVERT) + INT_GET(leaf2->hdr.count, ARCH_CONVERT) == oldsum); + ASSERT(INT_GET(leaf1->hdr.stale, ARCH_CONVERT) + INT_GET(leaf2->hdr.stale, ARCH_CONVERT) == oldstale); + /* + * Mark whether we're inserting into the old or new leaf. + */ + if (INT_GET(leaf1->hdr.count, ARCH_CONVERT) < INT_GET(leaf2->hdr.count, ARCH_CONVERT)) + state->inleaf = swap; + else if (INT_GET(leaf1->hdr.count, ARCH_CONVERT) > INT_GET(leaf2->hdr.count, ARCH_CONVERT)) + state->inleaf = !swap; + else + state->inleaf = + swap ^ (args->hashval < INT_GET(leaf2->ents[0].hashval, ARCH_CONVERT)); + /* + * Adjust the expected index for insertion. + */ + if (!state->inleaf) + blk2->index = blk1->index - INT_GET(leaf1->hdr.count, ARCH_CONVERT); +} + +/* + * Remove an entry from a node directory. + * This removes the leaf entry and the data entry, + * and updates the free block if necessary. + */ +STATIC int /* error */ +xfs_dir2_leafn_remove( + xfs_da_args_t *args, /* operation arguments */ + xfs_dabuf_t *bp, /* leaf buffer */ + int index, /* leaf entry index */ + xfs_da_state_blk_t *dblk, /* data block */ + int *rval) /* resulting block needs join */ +{ + xfs_dir2_data_t *data; /* data block structure */ + xfs_dir2_db_t db; /* data block number */ + xfs_dabuf_t *dbp; /* data block buffer */ + xfs_dir2_data_entry_t *dep; /* data block entry */ + xfs_inode_t *dp; /* incore directory inode */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + xfs_dir2_leaf_entry_t *lep; /* leaf entry */ + int longest; /* longest data free entry */ + int off; /* data block entry offset */ + xfs_mount_t *mp; /* filesystem mount point */ + int needlog; /* need to log data header */ + int needscan; /* need to rescan data frees */ + xfs_trans_t *tp; /* transaction pointer */ + + xfs_dir2_trace_args_sb("leafn_remove", args, index, bp); + dp = args->dp; + tp = args->trans; + mp = dp->i_mount; + leaf = bp->data; + ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC); + /* + * Point to the entry we're removing. + */ + lep = &leaf->ents[index]; + /* + * Extract the data block and offset from the entry. + */ + db = XFS_DIR2_DATAPTR_TO_DB(mp, INT_GET(lep->address, ARCH_CONVERT)); + ASSERT(dblk->blkno == db); + off = XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(lep->address, ARCH_CONVERT)); + ASSERT(dblk->index == off); + /* + * Kill the leaf entry by marking it stale. + * Log the leaf block changes. + */ + INT_MOD(leaf->hdr.stale, ARCH_CONVERT, +1); + xfs_dir2_leaf_log_header(tp, bp); + INT_SET(lep->address, ARCH_CONVERT, XFS_DIR2_NULL_DATAPTR); + xfs_dir2_leaf_log_ents(tp, bp, index, index); + /* + * Make the data entry free. Keep track of the longest freespace + * in the data block in case it changes. + */ + dbp = dblk->bp; + data = dbp->data; + dep = (xfs_dir2_data_entry_t *)((char *)data + off); + longest = INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT); + needlog = needscan = 0; + xfs_dir2_data_make_free(tp, dbp, off, + XFS_DIR2_DATA_ENTSIZE(dep->namelen), &needlog, &needscan); + /* + * Rescan the data block freespaces for bestfree. + * Log the data block header if needed. + */ + if (needscan) + xfs_dir2_data_freescan(mp, data, &needlog, NULL); + if (needlog) + xfs_dir2_data_log_header(tp, dbp); + xfs_dir2_data_check(dp, dbp); + /* + * If the longest data block freespace changes, need to update + * the corresponding freeblock entry. + */ + if (longest < INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT)) { + int error; /* error return value */ + xfs_dabuf_t *fbp; /* freeblock buffer */ + xfs_dir2_db_t fdb; /* freeblock block number */ + int findex; /* index in freeblock entries */ + xfs_dir2_free_t *free; /* freeblock structure */ + int logfree; /* need to log free entry */ + + /* + * Convert the data block number to a free block, + * read in the free block. + */ + fdb = XFS_DIR2_DB_TO_FDB(mp, db); + if (error = xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, fdb), + -1, &fbp, XFS_DATA_FORK)) { +#pragma mips_frequency_hint NEVER + return error; + } + free = fbp->data; + ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC); + ASSERT(INT_GET(free->hdr.firstdb, ARCH_CONVERT) == + XFS_DIR2_MAX_FREE_BESTS(mp) * + (fdb - XFS_DIR2_FREE_FIRSTDB(mp))); + /* + * Calculate which entry we need to fix. + */ + findex = XFS_DIR2_DB_TO_FDINDEX(mp, db); + longest = INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT); + /* + * If the data block is now empty we can get rid of it + * (usually). + */ + if (longest == mp->m_dirblksize - (uint)sizeof(data->hdr)) { +#pragma mips_frequency_hint NEVER + /* + * Try to punch out the data block. + */ + error = xfs_dir2_shrink_inode(args, db, dbp); + if (error == 0) { + dblk->bp = NULL; + data = NULL; + } + /* + * We can get ENOSPC if there's no space reservation. + * In this case just drop the buffer and some one else + * will eventually get rid of the empty block. + */ + else if (error == ENOSPC && args->total == 0) + xfs_da_buf_done(dbp); + else + return error; + } + /* + * If we got rid of the data block, we can eliminate that entry + * in the free block. + */ + if (data == NULL) { +#pragma mips_frequency_hint NEVER + /* + * One less used entry in the free table. + */ + INT_MOD(free->hdr.nused, ARCH_CONVERT, -1); + xfs_dir2_free_log_header(tp, fbp); + /* + * If this was the last entry in the table, we can + * trim the table size back. There might be other + * entries at the end referring to non-existent + * data blocks, get those too. + */ + if (findex == INT_GET(free->hdr.nvalid, ARCH_CONVERT) - 1) { + int i; /* free entry index */ + + for (i = findex - 1; + i >= 0 && INT_GET(free->bests[i], ARCH_CONVERT) == NULLDATAOFF; + i--) + continue; + INT_SET(free->hdr.nvalid, ARCH_CONVERT, i + 1); + logfree = 0; + } + /* + * Not the last entry, just punch it out. + */ + else { + INT_SET(free->bests[findex], ARCH_CONVERT, NULLDATAOFF); + logfree = 1; + } + /* + * If there are no useful entries left in the block, + * get rid of the block if we can. + */ + if (INT_GET(free->hdr.nused, ARCH_CONVERT) == 0) { + error = xfs_dir2_shrink_inode(args, fdb, fbp); + if (error == 0) { + fbp = NULL; + logfree = 0; + } else if (error != ENOSPC || args->total != 0) + return error; + /* + * It's possible to get ENOSPC if there is no + * space reservation. In this case some one + * else will eventually get rid of this block. + */ + } + } + /* + * Data block is not empty, just set the free entry to + * the new value. + */ + else { + INT_SET(free->bests[findex], ARCH_CONVERT, longest); + logfree = 1; + } + /* + * Log the free entry that changed, unless we got rid of it. + */ + if (logfree) + xfs_dir2_free_log_bests(tp, fbp, findex, findex); + /* + * Drop the buffer if we still have it. + */ + if (fbp) + xfs_da_buf_done(fbp); + } + xfs_dir2_leafn_check(dp, bp); + /* + * Return indication of whether this leaf block is emtpy enough + * to justify trying to join it with a neighbor. + */ + *rval = + ((uint)sizeof(leaf->hdr) + + (uint)sizeof(leaf->ents[0]) * + (INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT))) < + mp->m_dir_magicpct; + return 0; +} + +/* + * Split the leaf entries in the old block into old and new blocks. + */ +int /* error */ +xfs_dir2_leafn_split( + xfs_da_state_t *state, /* btree cursor */ + xfs_da_state_blk_t *oldblk, /* original block */ + xfs_da_state_blk_t *newblk) /* newly created block */ +{ + xfs_da_args_t *args; /* operation arguments */ + xfs_dablk_t blkno; /* new leaf block number */ + int error; /* error return value */ + xfs_mount_t *mp; /* filesystem mount point */ + + /* + * Allocate space for a new leaf node. + */ + args = state->args; + mp = args->dp->i_mount; + ASSERT(args != NULL); + ASSERT(oldblk->magic == XFS_DIR2_LEAFN_MAGIC); + error = xfs_da_grow_inode(args, &blkno); + if (error) { +#pragma mips_frequency_hint NEVER + return error; + } + /* + * Initialize the new leaf block. + */ + error = xfs_dir2_leaf_init(args, XFS_DIR2_DA_TO_DB(mp, blkno), + &newblk->bp, XFS_DIR2_LEAFN_MAGIC); + if (error) { +#pragma mips_frequency_hint NEVER + return error; + } + newblk->blkno = blkno; + newblk->magic = XFS_DIR2_LEAFN_MAGIC; + /* + * Rebalance the entries across the two leaves, link the new + * block into the leaves. + */ + xfs_dir2_leafn_rebalance(state, oldblk, newblk); + error = xfs_da_blk_link(state, oldblk, newblk); + if (error) { +#pragma mips_frequency_hint NEVER + return error; + } + /* + * Insert the new entry in the correct block. + */ + if (state->inleaf) + error = xfs_dir2_leafn_add(oldblk->bp, args, oldblk->index); + else + error = xfs_dir2_leafn_add(newblk->bp, args, newblk->index); + /* + * Update last hashval in each block since we added the name. + */ + oldblk->hashval = xfs_dir2_leafn_lasthash(oldblk->bp, NULL); + newblk->hashval = xfs_dir2_leafn_lasthash(newblk->bp, NULL); + xfs_dir2_leafn_check(args->dp, oldblk->bp); + xfs_dir2_leafn_check(args->dp, newblk->bp); + return error; +} + +/* + * Check a leaf block and its neighbors to see if the block should be + * collapsed into one or the other neighbor. Always keep the block + * with the smaller block number. + * If the current block is over 50% full, don't try to join it, return 0. + * If the block is empty, fill in the state structure and return 2. + * If it can be collapsed, fill in the state structure and return 1. + * If nothing can be done, return 0. + */ +int /* error */ +xfs_dir2_leafn_toosmall( + xfs_da_state_t *state, /* btree cursor */ + int *action) /* resulting action to take */ +{ + xfs_da_state_blk_t *blk; /* leaf block */ + xfs_dablk_t blkno; /* leaf block number */ + xfs_dabuf_t *bp; /* leaf buffer */ + int bytes; /* bytes in use */ + int count; /* leaf live entry count */ + int error; /* error return value */ + int forward; /* sibling block direction */ + int i; /* sibling counter */ + xfs_da_blkinfo_t *info; /* leaf block header */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + int rval; /* result from path_shift */ + + /* + * Check for the degenerate case of the block being over 50% full. + * If so, it's not worth even looking to see if we might be able + * to coalesce with a sibling. + */ + blk = &state->path.blk[state->path.active - 1]; + info = blk->bp->data; + ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC); + leaf = (xfs_dir2_leaf_t *)info; + count = INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT); + bytes = (uint)sizeof(leaf->hdr) + count * (uint)sizeof(leaf->ents[0]); + if (bytes > (state->blocksize >> 1)) { + /* + * Blk over 50%, don't try to join. + */ + *action = 0; + return 0; + } + /* + * Check for the degenerate case of the block being empty. + * If the block is empty, we'll simply delete it, no need to + * coalesce it with a sibling block. We choose (arbitrarily) + * to merge with the forward block unless it is NULL. + */ + if (count == 0) { +#pragma mips_frequency_hint NEVER + /* + * Make altpath point to the block we want to keep and + * path point to the block we want to drop (this one). + */ + forward = !INT_ISZERO(info->forw, ARCH_CONVERT); + bcopy(&state->path, &state->altpath, sizeof(state->path)); + error = xfs_da_path_shift(state, &state->altpath, forward, 0, + &rval); + if (error) + return error; + *action = rval ? 2 : 0; + return 0; + } + /* + * Examine each sibling block to see if we can coalesce with + * at least 25% free space to spare. We need to figure out + * whether to merge with the forward or the backward block. + * We prefer coalescing with the lower numbered sibling so as + * to shrink a directory over time. + */ + forward = INT_GET(info->forw, ARCH_CONVERT) < INT_GET(info->back, ARCH_CONVERT); + for (i = 0, bp = NULL; i < 2; forward = !forward, i++) { + blkno = forward ?INT_GET( info->forw, ARCH_CONVERT) : INT_GET(info->back, ARCH_CONVERT); + if (blkno == 0) + continue; + /* + * Read the sibling leaf block. + */ + if (error = + xfs_da_read_buf(state->args->trans, state->args->dp, blkno, + -1, &bp, XFS_DATA_FORK)) { +#pragma mips_frequency_hint NEVER + return error; + } + ASSERT(bp != NULL); + /* + * Count bytes in the two blocks combined. + */ + leaf = (xfs_dir2_leaf_t *)info; + count = INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT); + bytes = state->blocksize - (state->blocksize >> 2); + leaf = bp->data; + ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC); + count += INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT); + bytes -= count * (uint)sizeof(leaf->ents[0]); + /* + * Fits with at least 25% to spare. + */ + if (bytes >= 0) + break; + xfs_da_brelse(state->args->trans, bp); + } + /* + * Didn't like either block, give up. + */ + if (i >= 2) { + *action = 0; + return 0; + } + /* + * Done with the sibling leaf block here, drop the dabuf + * so path_shift can get it. + */ + xfs_da_buf_done(bp); + /* + * Make altpath point to the block we want to keep (the lower + * numbered block) and path point to the block we want to drop. + */ + bcopy(&state->path, &state->altpath, sizeof(state->path)); + if (blkno < blk->blkno) + error = xfs_da_path_shift(state, &state->altpath, forward, 0, + &rval); + else + error = xfs_da_path_shift(state, &state->path, forward, 0, + &rval); + if (error) { +#pragma mips_frequency_hint NEVER + return error; + } + *action = rval ? 0 : 1; + return 0; +} + +/* + * Move all the leaf entries from drop_blk to save_blk. + * This is done as part of a join operation. + */ +void +xfs_dir2_leafn_unbalance( + xfs_da_state_t *state, /* cursor */ + xfs_da_state_blk_t *drop_blk, /* dead block */ + xfs_da_state_blk_t *save_blk) /* surviving block */ +{ + xfs_da_args_t *args; /* operation arguments */ + xfs_dir2_leaf_t *drop_leaf; /* dead leaf structure */ + xfs_dir2_leaf_t *save_leaf; /* surviving leaf structure */ + + args = state->args; + ASSERT(drop_blk->magic == XFS_DIR2_LEAFN_MAGIC); + ASSERT(save_blk->magic == XFS_DIR2_LEAFN_MAGIC); + drop_leaf = drop_blk->bp->data; + save_leaf = save_blk->bp->data; + ASSERT(INT_GET(drop_leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC); + ASSERT(INT_GET(save_leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC); + /* + * If there are any stale leaf entries, take this opportunity + * to purge them. + */ + if (INT_GET(drop_leaf->hdr.stale, ARCH_CONVERT)) + xfs_dir2_leaf_compact(args, drop_blk->bp); + if (INT_GET(save_leaf->hdr.stale, ARCH_CONVERT)) + xfs_dir2_leaf_compact(args, save_blk->bp); + /* + * Move the entries from drop to the appropriate end of save. + */ + drop_blk->hashval = INT_GET(drop_leaf->ents[INT_GET(drop_leaf->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT); + if (xfs_dir2_leafn_order(save_blk->bp, drop_blk->bp)) + xfs_dir2_leafn_moveents(args, drop_blk->bp, 0, save_blk->bp, 0, + INT_GET(drop_leaf->hdr.count, ARCH_CONVERT)); + else + xfs_dir2_leafn_moveents(args, drop_blk->bp, 0, save_blk->bp, + INT_GET(save_leaf->hdr.count, ARCH_CONVERT), INT_GET(drop_leaf->hdr.count, ARCH_CONVERT)); + save_blk->hashval = INT_GET(save_leaf->ents[INT_GET(save_leaf->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT); + xfs_dir2_leafn_check(args->dp, save_blk->bp); +} + +/* + * Top-level node form directory addname routine. + */ +int /* error */ +xfs_dir2_node_addname( + xfs_da_args_t *args) /* operation arguments */ +{ + xfs_da_state_blk_t *blk; /* leaf block for insert */ + int error; /* error return value */ + int rval; /* sub-return value */ + xfs_da_state_t *state; /* btree cursor */ + + xfs_dir2_trace_args("node_addname", args); + /* + * Allocate and initialize the state (btree cursor). + */ + state = xfs_da_state_alloc(); + state->args = args; + state->mp = args->dp->i_mount; + state->blocksize = state->mp->m_dirblksize; + /* + * Look up the name. We're not supposed to find it, but + * this gives us the insertion point. + */ + error = xfs_da_node_lookup_int(state, &rval); + if (error) + rval = error; + if (rval != ENOENT) { +#pragma mips_frequency_hint NEVER + goto done; + } + /* + * Add the data entry to a data block. + * Extravalid is set to a freeblock found by lookup. + */ + rval = xfs_dir2_node_addname_int(args, + state->extravalid ? &state->extrablk : NULL); + if (rval) { +#pragma mips_frequency_hint NEVER + goto done; + } + blk = &state->path.blk[state->path.active - 1]; + ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC); + /* + * Add the new leaf entry. + */ + rval = xfs_dir2_leafn_add(blk->bp, args, blk->index); + if (rval == 0) { + /* + * It worked, fix the hash values up the btree. + */ + if (!args->justcheck) + xfs_da_fixhashpath(state, &state->path); + } else { +#pragma mips_frequency_hint NEVER + /* + * It didn't work, we need to split the leaf block. + */ + if (args->total == 0) { + ASSERT(rval == ENOSPC); + goto done; + } + /* + * Split the leaf block and insert the new entry. + */ + rval = xfs_da_split(state); + } +done: + xfs_da_state_free(state); + return rval; +} + + +/* + * Add the data entry for a node-format directory name addition. + * The leaf entry is added in xfs_dir2_leafn_add. + * We may enter with a freespace block that the lookup found. + */ +STATIC int /* error */ +xfs_dir2_node_addname_int( + xfs_da_args_t *args, /* operation arguments */ + xfs_da_state_blk_t *fblk) /* optional freespace block */ +{ + xfs_dir2_data_t *data; /* data block structure */ + xfs_dir2_db_t dbno; /* data block number */ + xfs_dabuf_t *dbp; /* data block buffer */ + xfs_dir2_data_entry_t *dep; /* data entry pointer */ + xfs_inode_t *dp; /* incore directory inode */ + xfs_dir2_data_unused_t *dup; /* data unused entry pointer */ + int error; /* error return value */ + xfs_dir2_db_t fbno; /* freespace block number */ + xfs_dabuf_t *fbp; /* freespace buffer */ + int findex; /* freespace entry index */ + xfs_dir2_db_t foundbno; /* found freespace block no */ + int foundindex; /* found freespace entry idx */ + xfs_dir2_free_t *free; /* freespace block structure */ + xfs_dir2_db_t ifbno; /* initial freespace block no */ + xfs_dir2_db_t lastfbno; /* highest freespace block no */ + int length; /* length of the new entry */ + int logfree; /* need to log free entry */ + xfs_mount_t *mp; /* filesystem mount point */ + int needlog; /* need to log data header */ + int needscan; /* need to rescan data frees */ + xfs_dir2_data_off_t *tagp; /* data entry tag pointer */ + xfs_trans_t *tp; /* transaction pointer */ + + dp = args->dp; + mp = dp->i_mount; + tp = args->trans; + length = XFS_DIR2_DATA_ENTSIZE(args->namelen); + /* + * If we came in with a freespace block that means that lookup + * found an entry with our hash value. This is the freespace + * block for that data entry. + */ + if (fblk) { + fbp = fblk->bp; + /* + * Remember initial freespace block number. + */ + ifbno = fblk->blkno; + free = fbp->data; + ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC); + findex = fblk->index; + /* + * This means the free entry showed that the data block had + * space for our entry, so we remembered it. + * Use that data block. + */ + if (findex >= 0) { + ASSERT(findex < INT_GET(free->hdr.nvalid, ARCH_CONVERT)); + ASSERT(INT_GET(free->bests[findex], ARCH_CONVERT) != NULLDATAOFF); + ASSERT(INT_GET(free->bests[findex], ARCH_CONVERT) >= length); + dbno = INT_GET(free->hdr.firstdb, ARCH_CONVERT) + findex; + } + /* + * The data block looked at didn't have enough room. + * We'll start at the beginning of the freespace entries. + */ + else { + dbno = -1; + findex = 0; + } + } + /* + * Didn't come in with a freespace block, so don't have a data block. + */ + else { + ifbno = dbno = -1; + fbp = NULL; + findex = 0; + } + /* + * If we don't have a data block yet, we're going to scan the + * freespace blocks looking for one. Figure out what the + * highest freespace block number is. + */ + if (dbno == -1) { + xfs_fileoff_t fo; /* freespace block number */ + + if (error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK)) + return error; + lastfbno = XFS_DIR2_DA_TO_DB(mp, (xfs_dablk_t)fo); + fbno = ifbno; + foundindex = -1; + } + /* + * While we haven't identified a data block, search the freeblock + * data for a good data block. If we find a null freeblock entry, + * indicating a hole in the data blocks, remember that. + */ + while (dbno == -1) { + /* + * If we don't have a freeblock in hand, get the next one. + */ + if (fbp == NULL) { + /* + * Happens the first time through unless lookup gave + * us a freespace block to start with. + */ + if (++fbno == 0) + fbno = XFS_DIR2_FREE_FIRSTDB(mp); + /* + * If it's ifbno we already looked at it. + */ + if (fbno == ifbno) + fbno++; + /* + * If it's off the end we're done. + */ + if (fbno >= lastfbno) + break; + /* + * Read the block. There can be holes in the + * freespace blocks, so this might not succeed. + * This should be really rare, so there's no reason + * to avoid it. + */ + if (error = xfs_da_read_buf(tp, dp, + XFS_DIR2_DB_TO_DA(mp, fbno), -1, &fbp, + XFS_DATA_FORK)) { +#pragma mips_frequency_hint NEVER + return error; + } + if (fbp == NULL) { +#pragma mips_frequency_hint NEVER + continue; + } + free = fbp->data; + ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC); + findex = 0; + } + /* + * Look at the current free entry. Is it good enough? + */ + if (INT_GET(free->bests[findex], ARCH_CONVERT) != NULLDATAOFF && + INT_GET(free->bests[findex], ARCH_CONVERT) >= length) + dbno = INT_GET(free->hdr.firstdb, ARCH_CONVERT) + findex; + else { + /* + * If we haven't found an empty entry yet, and this + * one is empty, remember this slot. + */ + if (foundindex == -1 && + INT_GET(free->bests[findex], ARCH_CONVERT) == NULLDATAOFF) { + foundindex = findex; + foundbno = fbno; + } + /* + * Are we done with the freeblock? + */ + if (++findex == INT_GET(free->hdr.nvalid, ARCH_CONVERT)) { + /* + * If there is space left in this freeblock, + * and we don't have an empty entry yet, + * remember this slot. + */ + if (foundindex == -1 && + findex < XFS_DIR2_MAX_FREE_BESTS(mp)) { + foundindex = findex; + foundbno = fbno; + } + /* + * Drop the block. + */ + xfs_da_brelse(tp, fbp); + fbp = NULL; + if (fblk && fblk->bp) + fblk->bp = NULL; + } + } + } + /* + * If we don't have a data block, and there's no free slot in a + * freeblock, we need to add a new freeblock. + */ + if (dbno == -1 && foundindex == -1) { +#pragma mips_frequency_hint NEVER + /* + * Not allowed to allocate, so return failure. + */ + if (args->justcheck || args->total == 0) { + return XFS_ERROR(ENOSPC); + } + /* + * Add the new freeblock. + */ + if (error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE, + &fbno)) { + return error; + } + /* + * Get a buffer for the new block. + */ + if (error = xfs_da_get_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, fbno), + -1, &fbp, XFS_DATA_FORK)) { + return error; + } + ASSERT(fbp != NULL); + /* + * Initialize the new block to be empty, and remember + * its first slot as our empty slot. + */ + free = fbp->data; + INT_SET(free->hdr.magic, ARCH_CONVERT, XFS_DIR2_FREE_MAGIC); + INT_SET(free->hdr.firstdb, ARCH_CONVERT, (fbno - XFS_DIR2_FREE_FIRSTDB(mp)) * + XFS_DIR2_MAX_FREE_BESTS(mp)); + INT_ZERO(free->hdr.nused, ARCH_CONVERT); + INT_ZERO(free->hdr.nvalid, ARCH_CONVERT); + foundindex = 0; + foundbno = fbno; + } + /* + * If we don't have a data block, and we don't have a freeblock buffer + * in hand (we dropped the one with the free slot in it), + * go read the freeblock again. + */ + if (dbno == -1 && fbp == NULL) { +#pragma mips_frequency_hint NEVER + /* + * We're going to use the empty slot we found before. + */ + findex = foundindex; + fbno = foundbno; + if (error = xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, fbno), + -1, &fbp, XFS_DATA_FORK)) { + return error; + } + ASSERT(fbp != NULL); + free = fbp->data; + ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC); + } + /* + * If we don't have a data block, we need to allocate one and make + * the freespace entries refer to it. + */ + if (dbno == -1) { +#pragma mips_frequency_hint NEVER + /* + * Not allowed to allocate, return failure. + */ + if (args->justcheck || args->total == 0) { + /* + * Drop the freespace buffer unless it came from our + * caller. + */ + if (fblk == NULL || fblk->bp == NULL) + xfs_da_buf_done(fbp); + return XFS_ERROR(ENOSPC); + } + /* + * Allocate and initialize the new data block. + */ + if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, + &dbno)) || + (error = xfs_dir2_data_init(args, dbno, &dbp))) { + /* + * Drop the freespace buffer unless it came from our + * caller. + */ + if (fblk == NULL || fblk->bp == NULL) + xfs_da_buf_done(fbp); + return error; + } + /* + * If the freespace entry for this data block is not in the + * freespace block we have in hand, drop the one we have + * and get the right one. + */ + if (XFS_DIR2_DB_TO_FDB(mp, dbno) != fbno) { + xfs_da_brelse(tp, fbp); + if (fblk && fblk->bp) + fblk->bp = NULL; + fbno = XFS_DIR2_DB_TO_FDB(mp, dbno); + if (error = xfs_da_read_buf(tp, dp, + XFS_DIR2_DB_TO_DA(mp, fbno), -1, &fbp, + XFS_DATA_FORK)) { + xfs_da_buf_done(dbp); + return error; + } + ASSERT(fbp != NULL); + free = fbp->data; + ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC); + } + /* + * Set the freespace block index from the data block number. + */ + findex = XFS_DIR2_DB_TO_FDINDEX(mp, dbno); + /* + * If it's after the end of the current entries in the + * freespace block, extend that table. + */ + if (findex >= INT_GET(free->hdr.nvalid, ARCH_CONVERT)) { + ASSERT(findex < XFS_DIR2_MAX_FREE_BESTS(mp)); + INT_SET(free->hdr.nvalid, ARCH_CONVERT, findex + 1); + /* + * Tag new entry so nused will go up. + */ + INT_SET(free->bests[findex], ARCH_CONVERT, NULLDATAOFF); + } + /* + * If this entry was for an empty data block + * (this should always be true) then update the header. + */ + if (INT_GET(free->bests[findex], ARCH_CONVERT) == NULLDATAOFF) { + INT_MOD(free->hdr.nused, ARCH_CONVERT, +1); + xfs_dir2_free_log_header(tp, fbp); + } + /* + * Update the real value in the table. + * We haven't allocated the data entry yet so this will + * change again. + */ + data = dbp->data; + INT_COPY(free->bests[findex], data->hdr.bestfree[0].length, ARCH_CONVERT); + logfree = 1; + } + /* + * We had a data block so we don't have to make a new one. + */ + else { + /* + * If just checking, we succeeded. + */ + if (args->justcheck) { + if (fblk == NULL || fblk->bp == NULL) + xfs_da_buf_done(fbp); + return 0; + } + /* + * Read the data block in. + */ + if (error = xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, dbno), + -1, &dbp, XFS_DATA_FORK)) { +#pragma mips_frequency_hint NEVER + if (fblk == NULL || fblk->bp == NULL) + xfs_da_buf_done(fbp); + return error; + } + data = dbp->data; + logfree = 0; + } + ASSERT(INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT) >= length); + /* + * Point to the existing unused space. + */ + dup = (xfs_dir2_data_unused_t *) + ((char *)data + INT_GET(data->hdr.bestfree[0].offset, ARCH_CONVERT)); + needscan = needlog = 0; + /* + * Mark the first part of the unused space, inuse for us. + */ + xfs_dir2_data_use_free(tp, dbp, dup, + (xfs_dir2_data_aoff_t)((char *)dup - (char *)data), length, + &needlog, &needscan); + /* + * Fill in the new entry and log it. + */ + dep = (xfs_dir2_data_entry_t *)dup; + INT_SET(dep->inumber, ARCH_CONVERT, args->inumber); + dep->namelen = args->namelen; + bcopy(args->name, dep->name, dep->namelen); + tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep); + INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)data)); + xfs_dir2_data_log_entry(tp, dbp, dep); + /* + * Rescan the block for bestfree if needed. + */ + if (needscan) + xfs_dir2_data_freescan(mp, data, &needlog, NULL); + /* + * Log the data block header if needed. + */ + if (needlog) + xfs_dir2_data_log_header(tp, dbp); + /* + * If the freespace entry is now wrong, update it. + */ + if (INT_GET(free->bests[findex], ARCH_CONVERT) != INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT)) { + INT_COPY(free->bests[findex], data->hdr.bestfree[0].length, ARCH_CONVERT); + logfree = 1; + } + /* + * Log the freespace entry if needed. + */ + if (logfree) + xfs_dir2_free_log_bests(tp, fbp, findex, findex); + /* + * If the caller didn't hand us the freespace block, drop it. + */ + if (fblk == NULL || fblk->bp == NULL) + xfs_da_buf_done(fbp); + /* + * Return the data block and offset in args, then drop the data block. + */ + args->blkno = (xfs_dablk_t)dbno; + args->index = INT_GET(*tagp, ARCH_CONVERT); + xfs_da_buf_done(dbp); + return 0; +} + +/* + * Lookup an entry in a node-format directory. + * All the real work happens in xfs_da_node_lookup_int. + * The only real output is the inode number of the entry. + */ +int /* error */ +xfs_dir2_node_lookup( + xfs_da_args_t *args) /* operation arguments */ +{ + int error; /* error return value */ + int i; /* btree level */ + int rval; /* operation return value */ + xfs_da_state_t *state; /* btree cursor */ + + xfs_dir2_trace_args("node_lookup", args); + /* + * Allocate and initialize the btree cursor. + */ + state = xfs_da_state_alloc(); + state->args = args; + state->mp = args->dp->i_mount; + state->blocksize = state->mp->m_dirblksize; + /* + * Fill in the path to the entry in the cursor. + */ + error = xfs_da_node_lookup_int(state, &rval); + if (error) + rval = error; + /* + * Release the btree blocks and leaf block. + */ + for (i = 0; i < state->path.active; i++) { + xfs_da_brelse(args->trans, state->path.blk[i].bp); + state->path.blk[i].bp = NULL; + } + /* + * Release the data block if we have it. + */ + if (state->extravalid && state->extrablk.bp) { + xfs_da_brelse(args->trans, state->extrablk.bp); + state->extrablk.bp = NULL; + } + xfs_da_state_free(state); + return rval; +} + +/* + * Remove an entry from a node-format directory. + */ +int /* error */ +xfs_dir2_node_removename( + xfs_da_args_t *args) /* operation arguments */ +{ + xfs_da_state_blk_t *blk; /* leaf block */ + int error; /* error return value */ + int rval; /* operation return value */ + xfs_da_state_t *state; /* btree cursor */ + + xfs_dir2_trace_args("node_removename", args); + /* + * Allocate and initialize the btree cursor. + */ + state = xfs_da_state_alloc(); + state->args = args; + state->mp = args->dp->i_mount; + state->blocksize = state->mp->m_dirblksize; + /* + * Look up the entry we're deleting, set up the cursor. + */ + error = xfs_da_node_lookup_int(state, &rval); + if (error) { +#pragma mips_frequency_hint NEVER + rval = error; + } + /* + * Didn't find it, upper layer screwed up. + */ + if (rval != EEXIST) { +#pragma mips_frequency_hint NEVER + xfs_da_state_free(state); + return rval; + } + blk = &state->path.blk[state->path.active - 1]; + ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC); + ASSERT(state->extravalid); + /* + * Remove the leaf and data entries. + * Extrablk refers to the data block. + */ + error = xfs_dir2_leafn_remove(args, blk->bp, blk->index, + &state->extrablk, &rval); + if (error) { +#pragma mips_frequency_hint NEVER + return error; + } + /* + * Fix the hash values up the btree. + */ + xfs_da_fixhashpath(state, &state->path); + /* + * If we need to join leaf blocks, do it. + */ + if (rval && state->path.active > 1) + error = xfs_da_join(state); + /* + * If no errors so far, try conversion to leaf format. + */ + if (!error) + error = xfs_dir2_node_to_leaf(state); + xfs_da_state_free(state); + return error; +} + +/* + * Replace an entry's inode number in a node-format directory. + */ +int /* error */ +xfs_dir2_node_replace( + xfs_da_args_t *args) /* operation arguments */ +{ + xfs_da_state_blk_t *blk; /* leaf block */ + xfs_dir2_data_t *data; /* data block structure */ + xfs_dir2_data_entry_t *dep; /* data entry changed */ + int error; /* error return value */ + int i; /* btree level */ + xfs_ino_t inum; /* new inode number */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + xfs_dir2_leaf_entry_t *lep; /* leaf entry being changed */ + int rval; /* internal return value */ + xfs_da_state_t *state; /* btree cursor */ + + xfs_dir2_trace_args("node_replace", args); + /* + * Allocate and initialize the btree cursor. + */ + state = xfs_da_state_alloc(); + state->args = args; + state->mp = args->dp->i_mount; + state->blocksize = state->mp->m_dirblksize; + inum = args->inumber; + /* + * Lookup the entry to change in the btree. + */ + error = xfs_da_node_lookup_int(state, &rval); + if (error) { +#pragma mips_frequency_hint NEVER + rval = error; + } + /* + * It should be found, since the vnodeops layer has looked it up + * and locked it. But paranoia is good. + */ + if (rval == EEXIST) { + /* + * Find the leaf entry. + */ + blk = &state->path.blk[state->path.active - 1]; + ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC); + leaf = blk->bp->data; + lep = &leaf->ents[blk->index]; + ASSERT(state->extravalid); + /* + * Point to the data entry. + */ + data = state->extrablk.bp->data; + ASSERT(INT_GET(data->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC); + dep = (xfs_dir2_data_entry_t *) + ((char *)data + + XFS_DIR2_DATAPTR_TO_OFF(state->mp, INT_GET(lep->address, ARCH_CONVERT))); + ASSERT(inum != INT_GET(dep->inumber, ARCH_CONVERT)); + /* + * Fill in the new inode number and log the entry. + */ + INT_SET(dep->inumber, ARCH_CONVERT, inum); + xfs_dir2_data_log_entry(args->trans, state->extrablk.bp, dep); + rval = 0; + } + /* + * Didn't find it, and we're holding a data block. Drop it. + */ + else if (state->extravalid) { +#pragma mips_frequency_hint NEVER + xfs_da_brelse(args->trans, state->extrablk.bp); + state->extrablk.bp = NULL; + } + /* + * Release all the buffers in the cursor. + */ + for (i = 0; i < state->path.active; i++) { + xfs_da_brelse(args->trans, state->path.blk[i].bp); + state->path.blk[i].bp = NULL; + } + xfs_da_state_free(state); + return rval; +} + +/* + * Trim off a trailing empty freespace block. + * Return (in rvalp) 1 if we did it, 0 if not. + */ +int /* error */ +xfs_dir2_node_trim_free( + xfs_da_args_t *args, /* operation arguments */ + xfs_fileoff_t fo, /* free block number */ + int *rvalp) /* out: did something */ +{ + xfs_dabuf_t *bp; /* freespace buffer */ + xfs_inode_t *dp; /* incore directory inode */ + int error; /* error return code */ + xfs_dir2_free_t *free; /* freespace structure */ + xfs_mount_t *mp; /* filesystem mount point */ + xfs_trans_t *tp; /* transaction pointer */ + + dp = args->dp; + mp = dp->i_mount; + tp = args->trans; + /* + * Read the freespace block. + */ + if (error = xfs_da_read_buf(tp, dp, (xfs_dablk_t)fo, -1, &bp, + XFS_DATA_FORK)) { +#pragma mips_frequency_hint NEVER + return error; + } + free = bp->data; + ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC); + /* + * If there are used entries, there's nothing to do. + */ + if (INT_GET(free->hdr.nused, ARCH_CONVERT) > 0) { + xfs_da_brelse(tp, bp); + *rvalp = 0; + return 0; + } + /* + * Blow the block away. + */ + if (error = + xfs_dir2_shrink_inode(args, XFS_DIR2_DA_TO_DB(mp, (xfs_dablk_t)fo), + bp)) { + /* + * Can't fail with ENOSPC since that only happens with no + * space reservation, when breaking up an extent into two + * pieces. This is the last block of an extent. + */ + ASSERT(error != ENOSPC); + xfs_da_brelse(tp, bp); + return error; + } + /* + * Return that we succeeded. + */ + *rvalp = 1; + return 0; +} diff --git a/libxfs/xfs_dir2_sf.c b/libxfs/xfs_dir2_sf.c new file mode 100644 index 000000000..a021822b2 --- /dev/null +++ b/libxfs/xfs_dir2_sf.c @@ -0,0 +1,1119 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +/* + * xfs_dir2_sf.c + * Shortform directory implementation for v2 directories. + */ + +#include + + +/* + * Given a block directory (dp/block), calculate its size as a shortform (sf) + * directory and a header for the sf directory, if it will fit it the + * space currently present in the inode. If it won't fit, the output + * size is too big (but not accurate). + */ +int /* size for sf form */ +xfs_dir2_block_sfsize( + xfs_inode_t *dp, /* incore inode pointer */ + xfs_dir2_block_t *block, /* block directory data */ + xfs_dir2_sf_hdr_t *sfhp) /* output: header for sf form */ +{ + xfs_dir2_dataptr_t addr; /* data entry address */ + xfs_dir2_leaf_entry_t *blp; /* leaf area of the block */ + xfs_dir2_block_tail_t *btp; /* tail area of the block */ + int count; /* shortform entry count */ + xfs_dir2_data_entry_t *dep; /* data entry in the block */ + int i; /* block entry index */ + int i8count; /* count of big-inode entries */ + int isdot; /* entry is "." */ + int isdotdot; /* entry is ".." */ + xfs_mount_t *mp; /* mount structure pointer */ + int namelen; /* total name bytes */ + xfs_ino_t parent; /* parent inode number */ + int size; /* total computed size */ + + mp = dp->i_mount; + + count = i8count = namelen = 0; + btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); + blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT); + + /* + * Iterate over the block's data entries by using the leaf pointers. + */ + for (i = 0; i < INT_GET(btp->count, ARCH_CONVERT); i++) { + if ((addr = INT_GET(blp[i].address, ARCH_CONVERT)) == XFS_DIR2_NULL_DATAPTR) + continue; + /* + * Calculate the pointer to the entry at hand. + */ + dep = (xfs_dir2_data_entry_t *) + ((char *)block + XFS_DIR2_DATAPTR_TO_OFF(mp, addr)); + /* + * Detect . and .., so we can special-case them. + * . is not included in sf directories. + * .. is included by just the parent inode number. + */ + isdot = dep->namelen == 1 && dep->name[0] == '.'; + isdotdot = + dep->namelen == 2 && + dep->name[0] == '.' && dep->name[1] == '.'; +#if XFS_BIG_FILESYSTEMS + if (!isdot) + i8count += INT_GET(dep->inumber, ARCH_CONVERT) > XFS_DIR2_MAX_SHORT_INUM; +#endif + if (!isdot && !isdotdot) { + count++; + namelen += dep->namelen; + } else if (isdotdot) + parent = INT_GET(dep->inumber, ARCH_CONVERT); + /* + * Calculate the new size, see if we should give up yet. + */ + size = XFS_DIR2_SF_HDR_SIZE(i8count) + /* header */ + count + /* namelen */ + count * (uint)sizeof(xfs_dir2_sf_off_t) + /* offset */ + namelen + /* name */ + (i8count ? /* inumber */ + (uint)sizeof(xfs_dir2_ino8_t) * count : + (uint)sizeof(xfs_dir2_ino4_t) * count); + if (size > XFS_IFORK_DSIZE(dp)) + return size; /* size value is a failure */ + } + /* + * Create the output header, if it worked. + */ + sfhp->count = count; + sfhp->i8count = i8count; + XFS_DIR2_SF_PUT_INUMBER_ARCH((xfs_dir2_sf_t *)sfhp, &parent, &sfhp->parent, ARCH_CONVERT); + return size; +} + +/* + * Convert a block format directory to shortform. + * Caller has already checked that it will fit, and built us a header. + */ +int /* error */ +xfs_dir2_block_to_sf( + xfs_da_args_t *args, /* operation arguments */ + xfs_dabuf_t *bp, /* block buffer */ + int size, /* shortform directory size */ + xfs_dir2_sf_hdr_t *sfhp) /* shortform directory hdr */ +{ + xfs_dir2_block_t *block; /* block structure */ + xfs_dir2_block_tail_t *btp; /* block tail pointer */ + xfs_dir2_data_entry_t *dep; /* data entry pointer */ + xfs_inode_t *dp; /* incore directory inode */ + xfs_dir2_data_unused_t *dup; /* unused data pointer */ + char *endptr; /* end of data entries */ + int error; /* error return value */ + int logflags; /* inode logging flags */ + xfs_mount_t *mp; /* filesystem mount point */ + char *ptr; /* current data pointer */ + xfs_dir2_sf_entry_t *sfep; /* shortform entry */ + xfs_dir2_sf_t *sfp; /* shortform structure */ + xfs_ino_t temp; + + xfs_dir2_trace_args_sb("block_to_sf", args, size, bp); + dp = args->dp; + mp = dp->i_mount; + + /* + * Make a copy of the block data, so we can shrink the inode + * and add local data. + */ + block = kmem_alloc(mp->m_dirblksize, KM_SLEEP); + bcopy(bp->data, block, mp->m_dirblksize); + logflags = XFS_ILOG_CORE; + if (error = xfs_dir2_shrink_inode(args, mp->m_dirdatablk, bp)) { +#pragma mips_frequency_hint NEVER + ASSERT(error != ENOSPC); + goto out; + } + /* + * The buffer is now unconditionally gone, whether + * xfs_dir2_shrink_inode worked or not. + * + * Convert the inode to local format. + */ + dp->i_df.if_flags &= ~XFS_IFEXTENTS; + dp->i_df.if_flags |= XFS_IFINLINE; + dp->i_d.di_format = XFS_DINODE_FMT_LOCAL; + ASSERT(dp->i_df.if_bytes == 0); + xfs_idata_realloc(dp, size, XFS_DATA_FORK); + logflags |= XFS_ILOG_DDATA; + /* + * Copy the header into the newly allocate local space. + */ + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + bcopy(sfhp, sfp, XFS_DIR2_SF_HDR_SIZE(sfhp->i8count)); + dp->i_d.di_size = size; + /* + * Set up to loop over the block's entries. + */ + btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); + ptr = (char *)block->u; + endptr = (char *)XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT); + sfep = XFS_DIR2_SF_FIRSTENTRY(sfp); + /* + * Loop over the active and unused entries. + * Stop when we reach the leaf/tail portion of the block. + */ + while (ptr < endptr) { + /* + * If it's unused, just skip over it. + */ + dup = (xfs_dir2_data_unused_t *)ptr; + if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) { + ptr += INT_GET(dup->length, ARCH_CONVERT); + continue; + } + dep = (xfs_dir2_data_entry_t *)ptr; + /* + * Skip . + */ + if (dep->namelen == 1 && dep->name[0] == '.') + ASSERT(INT_GET(dep->inumber, ARCH_CONVERT) == dp->i_ino); + /* + * Skip .., but make sure the inode number is right. + */ + else if (dep->namelen == 2 && + dep->name[0] == '.' && dep->name[1] == '.') + ASSERT(INT_GET(dep->inumber, ARCH_CONVERT) == + XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, &sfp->hdr.parent, ARCH_CONVERT)); + /* + * Normal entry, copy it into shortform. + */ + else { + sfep->namelen = dep->namelen; + XFS_DIR2_SF_PUT_OFFSET_ARCH(sfep, + (xfs_dir2_data_aoff_t) + ((char *)dep - (char *)block), ARCH_CONVERT); + bcopy(dep->name, sfep->name, dep->namelen); + temp=INT_GET(dep->inumber, ARCH_CONVERT); + XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &temp, + XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT); + sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep); + } + ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen); + } + ASSERT((char *)sfep - (char *)sfp == size); + xfs_dir2_sf_check(args); +out: + xfs_trans_log_inode(args->trans, dp, logflags); + kmem_free(block, mp->m_dirblksize); + return error; +} + +/* + * Add a name to a shortform directory. + * There are two algorithms, "easy" and "hard" which we decide on + * before changing anything. + * Convert to block form if necessary, if the new entry won't fit. + */ +int /* error */ +xfs_dir2_sf_addname( + xfs_da_args_t *args) /* operation arguments */ +{ + int add_entsize; /* size of the new entry */ + xfs_inode_t *dp; /* incore directory inode */ + int error; /* error return value */ + int incr_isize; /* total change in size */ + int new_isize; /* di_size after adding name */ + int objchange; /* changing to 8-byte inodes */ + xfs_dir2_data_aoff_t offset; /* offset for new entry */ + int old_isize; /* di_size before adding name */ + int pick; /* which algorithm to use */ + xfs_dir2_sf_t *sfp; /* shortform structure */ + xfs_dir2_sf_entry_t *sfep; /* shortform entry */ + + xfs_dir2_trace_args("sf_addname", args); + ASSERT(xfs_dir2_sf_lookup(args) == ENOENT); + dp = args->dp; + ASSERT(dp->i_df.if_flags & XFS_IFINLINE); + /* + * Make sure the shortform value has some of its header. + */ + if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { +#pragma mips_frequency_hint NEVER + ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); + return XFS_ERROR(EIO); + } + ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); + ASSERT(dp->i_df.if_u1.if_data != NULL); + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count)); + /* + * Compute entry (and change in) size. + */ + add_entsize = XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, args->namelen); + incr_isize = add_entsize; +#if XFS_BIG_FILESYSTEMS + /* + * Do we have to change to 8 byte inodes? + */ + if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->hdr.i8count == 0) { +#pragma mips_frequency_hint NEVER + /* + * Yes, adjust the entry size and the total size. + */ + add_entsize += + (uint)sizeof(xfs_dir2_ino8_t) - + (uint)sizeof(xfs_dir2_ino4_t); + incr_isize += + (sfp->hdr.count + 2) * + ((uint)sizeof(xfs_dir2_ino8_t) - + (uint)sizeof(xfs_dir2_ino4_t)); + objchange = 1; + } else + objchange = 0; +#else + objchange = 0; +#endif + old_isize = (int)dp->i_d.di_size; + new_isize = old_isize + incr_isize; + /* + * Won't fit as shortform any more (due to size), + * or the pick routine says it won't (due to offset values). + */ + if (new_isize > XFS_IFORK_DSIZE(dp) || + (pick = + xfs_dir2_sf_addname_pick(args, objchange, &sfep, &offset)) == 0) { +#pragma mips_frequency_hint NEVER + /* + * Just checking or no space reservation, it doesn't fit. + */ + if (args->justcheck || args->total == 0) + return XFS_ERROR(ENOSPC); + /* + * Convert to block form then add the name. + */ + error = xfs_dir2_sf_to_block(args); + if (error) + return error; + return xfs_dir2_block_addname(args); + } + /* + * Just checking, it fits. + */ + if (args->justcheck) + return 0; + /* + * Do it the easy way - just add it at the end. + */ + if (pick == 1) + xfs_dir2_sf_addname_easy(args, sfep, offset, new_isize); + /* + * Do it the hard way - look for a place to insert the new entry. + * Convert to 8 byte inode numbers first if necessary. + */ + else { + ASSERT(pick == 2); +#if XFS_BIG_FILESYSTEMS + if (objchange) + xfs_dir2_sf_toino8(args); +#endif + xfs_dir2_sf_addname_hard(args, objchange, new_isize); + } + xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); + return 0; +} + +/* + * Add the new entry the "easy" way. + * This is copying the old directory and adding the new entry at the end. + * Since it's sorted by "offset" we need room after the last offset + * that's already there, and then room to convert to a block directory. + * This is already checked by the pick routine. + */ +STATIC void +xfs_dir2_sf_addname_easy( + xfs_da_args_t *args, /* operation arguments */ + xfs_dir2_sf_entry_t *sfep, /* pointer to new entry */ + xfs_dir2_data_aoff_t offset, /* offset to use for new ent */ + int new_isize) /* new directory size */ +{ + int byteoff; /* byte offset in sf dir */ + xfs_inode_t *dp; /* incore directory inode */ + xfs_dir2_sf_t *sfp; /* shortform structure */ + + dp = args->dp; + + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + byteoff = (int)((char *)sfep - (char *)sfp); + /* + * Grow the in-inode space. + */ + xfs_idata_realloc(dp, XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, args->namelen), + XFS_DATA_FORK); + /* + * Need to set up again due to realloc of the inode data. + */ + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + sfep = (xfs_dir2_sf_entry_t *)((char *)sfp + byteoff); + /* + * Fill in the new entry. + */ + sfep->namelen = args->namelen; + XFS_DIR2_SF_PUT_OFFSET_ARCH(sfep, offset, ARCH_CONVERT); + bcopy(args->name, sfep->name, sfep->namelen); + XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &args->inumber, + XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT); + /* + * Update the header and inode. + */ + sfp->hdr.count++; +#if XFS_BIG_FILESYSTEMS + if (args->inumber > XFS_DIR2_MAX_SHORT_INUM) + sfp->hdr.i8count++; +#endif + dp->i_d.di_size = new_isize; + xfs_dir2_sf_check(args); +} + +/* + * Add the new entry the "hard" way. + * The caller has already converted to 8 byte inode numbers if necessary, + * in which case we need to leave the i8count at 1. + * Find a hole that the new entry will fit into, and copy + * the first part of the entries, the new entry, and the last part of + * the entries. + */ +/* ARGSUSED */ +STATIC void +xfs_dir2_sf_addname_hard( + xfs_da_args_t *args, /* operation arguments */ + int objchange, /* changing inode number size */ + int new_isize) /* new directory size */ +{ + int add_datasize; /* data size need for new ent */ + char buf[XFS_DIR2_SF_MAX_SIZE]; /* buffer for old */ + xfs_inode_t *dp; /* incore directory inode */ + int eof; /* reached end of old dir */ + int nbytes; /* temp for byte copies */ + xfs_dir2_data_aoff_t new_offset; /* next offset value */ + xfs_dir2_data_aoff_t offset; /* current offset value */ + int old_isize; /* previous di_size */ + xfs_dir2_sf_entry_t *oldsfep; /* entry in original dir */ + xfs_dir2_sf_t *oldsfp; /* original shortform dir */ + xfs_dir2_sf_entry_t *sfep; /* entry in new dir */ + xfs_dir2_sf_t *sfp; /* new shortform dir */ + + /* + * Copy the old directory to the stack buffer. + */ + dp = args->dp; + + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + old_isize = (int)dp->i_d.di_size; + oldsfp = (xfs_dir2_sf_t *)buf; + bcopy(sfp, oldsfp, old_isize); + /* + * Loop over the old directory finding the place we're going + * to insert the new entry. + * If it's going to end up at the end then oldsfep will point there. + */ + for (offset = XFS_DIR2_DATA_FIRST_OFFSET, + oldsfep = XFS_DIR2_SF_FIRSTENTRY(oldsfp), + add_datasize = XFS_DIR2_DATA_ENTSIZE(args->namelen), + eof = (char *)oldsfep == &buf[old_isize]; + !eof; + offset = new_offset + XFS_DIR2_DATA_ENTSIZE(oldsfep->namelen), + oldsfep = XFS_DIR2_SF_NEXTENTRY(oldsfp, oldsfep), + eof = (char *)oldsfep == &buf[old_isize]) { + new_offset = XFS_DIR2_SF_GET_OFFSET_ARCH(oldsfep, ARCH_CONVERT); + if (offset + add_datasize <= new_offset) + break; + } + /* + * Get rid of the old directory, then allocate space for + * the new one. We do this so xfs_idata_realloc won't copy + * the data. + */ + xfs_idata_realloc(dp, -old_isize, XFS_DATA_FORK); + xfs_idata_realloc(dp, new_isize, XFS_DATA_FORK); + /* + * Reset the pointer since the buffer was reallocated. + */ + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + /* + * Copy the first part of the directory, including the header. + */ + nbytes = (int)((char *)oldsfep - (char *)oldsfp); + bcopy(oldsfp, sfp, nbytes); + sfep = (xfs_dir2_sf_entry_t *)((char *)sfp + nbytes); + /* + * Fill in the new entry, and update the header counts. + */ + sfep->namelen = args->namelen; + XFS_DIR2_SF_PUT_OFFSET_ARCH(sfep, offset, ARCH_CONVERT); + bcopy(args->name, sfep->name, sfep->namelen); + XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &args->inumber, + XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT); + sfp->hdr.count++; +#if XFS_BIG_FILESYSTEMS + if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange) + sfp->hdr.i8count++; +#endif + /* + * If there's more left to copy, do that. + */ + if (!eof) { + sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep); + bcopy(oldsfep, sfep, old_isize - nbytes); + } + dp->i_d.di_size = new_isize; + xfs_dir2_sf_check(args); +} + +/* + * Decide if the new entry will fit at all. + * If it will fit, pick between adding the new entry to the end (easy) + * or somewhere else (hard). + * Return 0 (won't fit), 1 (easy), 2 (hard). + */ +/*ARGSUSED*/ +STATIC int /* pick result */ +xfs_dir2_sf_addname_pick( + xfs_da_args_t *args, /* operation arguments */ + int objchange, /* inode # size changes */ + xfs_dir2_sf_entry_t **sfepp, /* out(1): new entry ptr */ + xfs_dir2_data_aoff_t *offsetp) /* out(1): new offset */ +{ + xfs_inode_t *dp; /* incore directory inode */ + int holefit; /* found hole it will fit in */ + int i; /* entry number */ + xfs_mount_t *mp; /* filesystem mount point */ + xfs_dir2_data_aoff_t offset; /* data block offset */ + xfs_dir2_sf_entry_t *sfep; /* shortform entry */ + xfs_dir2_sf_t *sfp; /* shortform structure */ + int size; /* entry's data size */ + int used; /* data bytes used */ + + dp = args->dp; + mp = dp->i_mount; + + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + size = XFS_DIR2_DATA_ENTSIZE(args->namelen); + offset = XFS_DIR2_DATA_FIRST_OFFSET; + sfep = XFS_DIR2_SF_FIRSTENTRY(sfp); + holefit = 0; + /* + * Loop over sf entries. + * Keep track of data offset and whether we've seen a place + * to insert the new entry. + */ + for (i = 0; i < sfp->hdr.count; i++) { + if (!holefit) + holefit = offset + size <= XFS_DIR2_SF_GET_OFFSET_ARCH(sfep, ARCH_CONVERT); + offset = XFS_DIR2_SF_GET_OFFSET_ARCH(sfep, ARCH_CONVERT) + + XFS_DIR2_DATA_ENTSIZE(sfep->namelen); + sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep); + } + /* + * Calculate data bytes used excluding the new entry, if this + * was a data block (block form directory). + */ + used = offset + + (sfp->hdr.count + 3) * (uint)sizeof(xfs_dir2_leaf_entry_t) + + (uint)sizeof(xfs_dir2_block_tail_t); + /* + * If it won't fit in a block form then we can't insert it, + * we'll go back, convert to block, then try the insert and convert + * to leaf. + */ + if (used + (holefit ? 0 : size) > mp->m_dirblksize) + return 0; + /* + * If changing the inode number size, do it the hard way. + */ +#if XFS_BIG_FILESYSTEMS + if (objchange) { +#pragma mips_frequency_hint NEVER + return 2; + } +#else + ASSERT(objchange == 0); +#endif + /* + * If it won't fit at the end then do it the hard way (use the hole). + */ + if (used + size > mp->m_dirblksize) + return 2; + /* + * Do it the easy way. + */ + *sfepp = sfep; + *offsetp = offset; + return 1; +} + +#ifdef DEBUG +/* + * Check consistency of shortform directory, assert if bad. + */ +STATIC void +xfs_dir2_sf_check( + xfs_da_args_t *args) /* operation arguments */ +{ + xfs_inode_t *dp; /* incore directory inode */ + int i; /* entry number */ + int i8count; /* number of big inode#s */ + xfs_ino_t ino; /* entry inode number */ + int offset; /* data offset */ + xfs_dir2_sf_entry_t *sfep; /* shortform dir entry */ + xfs_dir2_sf_t *sfp; /* shortform structure */ + + dp = args->dp; + + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + offset = XFS_DIR2_DATA_FIRST_OFFSET; + ino = XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, &sfp->hdr.parent, ARCH_CONVERT); + i8count = ino > XFS_DIR2_MAX_SHORT_INUM; + + for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp); + i < sfp->hdr.count; + i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) { + ASSERT(XFS_DIR2_SF_GET_OFFSET_ARCH(sfep, ARCH_CONVERT) >= offset); + ino = XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT); + i8count += ino > XFS_DIR2_MAX_SHORT_INUM; + offset = + XFS_DIR2_SF_GET_OFFSET_ARCH(sfep, ARCH_CONVERT) + + XFS_DIR2_DATA_ENTSIZE(sfep->namelen); + } + ASSERT(i8count == sfp->hdr.i8count); +#if !XFS_BIG_FILESYSTEMS + ASSERT(i8count == 0); +#endif + ASSERT((char *)sfep - (char *)sfp == dp->i_d.di_size); + ASSERT(offset + + (sfp->hdr.count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) + + (uint)sizeof(xfs_dir2_block_tail_t) <= + dp->i_mount->m_dirblksize); +} +#endif /* DEBUG */ + +/* + * Create a new (shortform) directory. + */ +int /* error, always 0 */ +xfs_dir2_sf_create( + xfs_da_args_t *args, /* operation arguments */ + xfs_ino_t pino) /* parent inode number */ +{ + xfs_inode_t *dp; /* incore directory inode */ + int i8count; /* parent inode is an 8-byte number */ + xfs_dir2_sf_t *sfp; /* shortform structure */ + int size; /* directory size */ + + xfs_dir2_trace_args_i("sf_create", args, pino); + dp = args->dp; + + ASSERT(dp != NULL); + ASSERT(dp->i_d.di_size == 0); + /* + * If it's currently a zero-length extent file, + * convert it to local format. + */ + if (dp->i_d.di_format == XFS_DINODE_FMT_EXTENTS) { + dp->i_df.if_flags &= ~XFS_IFEXTENTS; /* just in case */ + dp->i_d.di_format = XFS_DINODE_FMT_LOCAL; + xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE); + dp->i_df.if_flags |= XFS_IFINLINE; + } + ASSERT(dp->i_df.if_flags & XFS_IFINLINE); + ASSERT(dp->i_df.if_bytes == 0); + i8count = pino > XFS_DIR2_MAX_SHORT_INUM; + size = XFS_DIR2_SF_HDR_SIZE(i8count); + /* + * Make a buffer for the data. + */ + xfs_idata_realloc(dp, size, XFS_DATA_FORK); + /* + * Fill in the header, + */ + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + sfp->hdr.i8count = i8count; + /* + * Now can put in the inode number, since i8count is set. + */ + XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &pino, &sfp->hdr.parent, ARCH_CONVERT); + sfp->hdr.count = 0; + dp->i_d.di_size = size; + xfs_dir2_sf_check(args); + xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); + return 0; +} + +/* + * Lookup an entry in a shortform directory. + * Returns EEXIST if found, ENOENT if not found. + */ +int /* error */ +xfs_dir2_sf_lookup( + xfs_da_args_t *args) /* operation arguments */ +{ + xfs_inode_t *dp; /* incore directory inode */ + int i; /* entry index */ + xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ + xfs_dir2_sf_t *sfp; /* shortform structure */ + + xfs_dir2_trace_args("sf_lookup", args); + xfs_dir2_sf_check(args); + dp = args->dp; + + ASSERT(dp->i_df.if_flags & XFS_IFINLINE); + /* + * Bail out if the directory is way too short. + */ + if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { +#pragma mips_frequency_hint NEVER + ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); + return XFS_ERROR(EIO); + } + ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); + ASSERT(dp->i_df.if_u1.if_data != NULL); + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count)); + /* + * Special case for . + */ + if (args->namelen == 1 && args->name[0] == '.') { + args->inumber = dp->i_ino; + return XFS_ERROR(EEXIST); + } + /* + * Special case for .. + */ + if (args->namelen == 2 && + args->name[0] == '.' && args->name[1] == '.') { + args->inumber = XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, &sfp->hdr.parent, ARCH_CONVERT); + return XFS_ERROR(EEXIST); + } + /* + * Loop over all the entries trying to match ours. + */ + for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp); + i < sfp->hdr.count; + i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) { + if (sfep->namelen == args->namelen && + sfep->name[0] == args->name[0] && + bcmp(args->name, sfep->name, args->namelen) == 0) { + args->inumber = + XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, + XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT); + return XFS_ERROR(EEXIST); + } + } + /* + * Didn't find it. + */ + ASSERT(args->oknoent); + return XFS_ERROR(ENOENT); +} + +/* + * Remove an entry from a shortform directory. + */ +int /* error */ +xfs_dir2_sf_removename( + xfs_da_args_t *args) +{ + int byteoff; /* offset of removed entry */ + xfs_inode_t *dp; /* incore directory inode */ + int entsize; /* this entry's size */ + int i; /* shortform entry index */ + int newsize; /* new inode size */ + int oldsize; /* old inode size */ + xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ + xfs_dir2_sf_t *sfp; /* shortform structure */ + + xfs_dir2_trace_args("sf_removename", args); + dp = args->dp; + + ASSERT(dp->i_df.if_flags & XFS_IFINLINE); + oldsize = (int)dp->i_d.di_size; + /* + * Bail out if the directory is way too short. + */ + if (oldsize < offsetof(xfs_dir2_sf_hdr_t, parent)) { +#pragma mips_frequency_hint NEVER + ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); + return XFS_ERROR(EIO); + } + ASSERT(dp->i_df.if_bytes == oldsize); + ASSERT(dp->i_df.if_u1.if_data != NULL); + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + ASSERT(oldsize >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count)); + /* + * Loop over the old directory entries. + * Find the one we're deleting. + */ + for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp); + i < sfp->hdr.count; + i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) { + if (sfep->namelen == args->namelen && + sfep->name[0] == args->name[0] && + bcmp(sfep->name, args->name, args->namelen) == 0) { + ASSERT(XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, + XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT) == + args->inumber); + break; + } + } + /* + * Didn't find it. + */ + if (i == sfp->hdr.count) { +#pragma mips_frequency_hint NEVER + return XFS_ERROR(ENOENT); + } + /* + * Calculate sizes. + */ + byteoff = (int)((char *)sfep - (char *)sfp); + entsize = XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, args->namelen); + newsize = oldsize - entsize; + /* + * Copy the part if any after the removed entry, sliding it down. + */ + if (byteoff + entsize < oldsize) + ovbcopy((char *)sfp + byteoff + entsize, (char *)sfp + byteoff, + oldsize - (byteoff + entsize)); + /* + * Fix up the header and file size. + */ + sfp->hdr.count--; + dp->i_d.di_size = newsize; + /* + * Reallocate, making it smaller. + */ + xfs_idata_realloc(dp, newsize - oldsize, XFS_DATA_FORK); + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; +#if XFS_BIG_FILESYSTEMS + /* + * Are we changing inode number size? + */ + if (args->inumber > XFS_DIR2_MAX_SHORT_INUM) { +#pragma mips_frequency_hint NEVER + if (sfp->hdr.i8count == 1) + xfs_dir2_sf_toino4(args); + else + sfp->hdr.i8count--; + } +#endif + xfs_dir2_sf_check(args); + xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); + return 0; +} + +/* + * Replace the inode number of an entry in a shortform directory. + */ +int /* error */ +xfs_dir2_sf_replace( + xfs_da_args_t *args) /* operation arguments */ +{ + xfs_inode_t *dp; /* incore directory inode */ + int i; /* entry index */ +#if XFS_BIG_FILESYSTEMS || defined(DEBUG) + xfs_ino_t ino; /* entry old inode number */ +#endif + xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ + xfs_dir2_sf_t *sfp; /* shortform structure */ + + xfs_dir2_trace_args("sf_replace", args); + dp = args->dp; + + ASSERT(dp->i_df.if_flags & XFS_IFINLINE); + /* + * Bail out if the shortform directory is way too small. + */ + if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { +#pragma mips_frequency_hint NEVER + ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); + return XFS_ERROR(EIO); + } + ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); + ASSERT(dp->i_df.if_u1.if_data != NULL); + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + ASSERT(dp->i_d.di_size >= XFS_DIR2_SF_HDR_SIZE(sfp->hdr.i8count)); +#if XFS_BIG_FILESYSTEMS + /* + * New inode number is large, and need to convert to 8-byte inodes. + */ + if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->hdr.i8count == 0) { +#pragma mips_frequency_hint NEVER + int error; /* error return value */ + int newsize; /* new inode size */ + + newsize = + dp->i_df.if_bytes + + (sfp->hdr.count + 1) * + ((uint)sizeof(xfs_dir2_ino8_t) - + (uint)sizeof(xfs_dir2_ino4_t)); + /* + * Won't fit as shortform, convert to block then do replace. + */ + if (newsize > XFS_IFORK_DSIZE(dp)) { + error = xfs_dir2_sf_to_block(args); + if (error) { + return error; + } + return xfs_dir2_block_replace(args); + } + /* + * Still fits, convert to 8-byte now. + */ + xfs_dir2_sf_toino8(args); + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + } +#endif + ASSERT(args->namelen != 1 || args->name[0] != '.'); + /* + * Replace ..'s entry. + */ + if (args->namelen == 2 && + args->name[0] == '.' && args->name[1] == '.') { +#if XFS_BIG_FILESYSTEMS || defined(DEBUG) + ino = XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, &sfp->hdr.parent, ARCH_CONVERT); + ASSERT(args->inumber != ino); +#endif + XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &args->inumber, &sfp->hdr.parent, ARCH_CONVERT); + } + /* + * Normal entry, look for the name. + */ + else { + for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp); + i < sfp->hdr.count; + i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) { + if (sfep->namelen == args->namelen && + sfep->name[0] == args->name[0] && + bcmp(args->name, sfep->name, args->namelen) == 0) { +#if XFS_BIG_FILESYSTEMS || defined(DEBUG) + ino = XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, + XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT); + ASSERT(args->inumber != ino); +#endif + XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &args->inumber, + XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT); + break; + } + } + /* + * Didn't find it. + */ + if (i == sfp->hdr.count) { +#pragma mips_frequency_hint NEVER + ASSERT(args->oknoent); + return XFS_ERROR(ENOENT); + } + } +#if XFS_BIG_FILESYSTEMS + /* + * See if the old number was large, the new number is small. + */ + if (ino > XFS_DIR2_MAX_SHORT_INUM && + args->inumber <= XFS_DIR2_MAX_SHORT_INUM) { +#pragma mips_frequency_hint NEVER + /* + * And the old count was one, so need to convert to small. + */ + if (sfp->hdr.i8count == 1) + xfs_dir2_sf_toino4(args); + else + sfp->hdr.i8count--; + } +#endif + xfs_dir2_sf_check(args); + xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA); + return 0; +} + +#if XFS_BIG_FILESYSTEMS +/* + * Convert from 8-byte inode numbers to 4-byte inode numbers. + * The last 8-byte inode number is gone, but the count is still 1. + */ +STATIC void +xfs_dir2_sf_toino4( + xfs_da_args_t *args) /* operation arguments */ +{ + char *buf; /* old dir's buffer */ + xfs_inode_t *dp; /* incore directory inode */ + int i; /* entry index */ + xfs_ino_t ino; /* entry inode number */ + int newsize; /* new inode size */ + xfs_dir2_sf_entry_t *oldsfep; /* old sf entry */ + xfs_dir2_sf_t *oldsfp; /* old sf directory */ + int oldsize; /* old inode size */ + xfs_dir2_sf_entry_t *sfep; /* new sf entry */ + xfs_dir2_sf_t *sfp; /* new sf directory */ + + xfs_dir2_trace_args("sf_toino4", args); + dp = args->dp; + + /* + * Copy the old directory to the buffer. + * Then nuke it from the inode, and add the new buffer to the inode. + * Don't want xfs_idata_realloc copying the data here. + */ + oldsize = dp->i_df.if_bytes; + buf = kmem_alloc(oldsize, KM_SLEEP); + oldsfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + ASSERT(oldsfp->hdr.i8count == 1); + bcopy(oldsfp, buf, oldsize); + /* + * Compute the new inode size. + */ + newsize = + oldsize - + (oldsfp->hdr.count + 1) * + ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)); + xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK); + xfs_idata_realloc(dp, newsize, XFS_DATA_FORK); + /* + * Reset our pointers, the data has moved. + */ + oldsfp = (xfs_dir2_sf_t *)buf; + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + /* + * Fill in the new header. + */ + sfp->hdr.count = oldsfp->hdr.count; + sfp->hdr.i8count = 0; + ino = XFS_DIR2_SF_GET_INUMBER_ARCH(oldsfp, &oldsfp->hdr.parent, ARCH_CONVERT); + XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &ino, &sfp->hdr.parent, ARCH_CONVERT); + /* + * Copy the entries field by field. + */ + for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp), + oldsfep = XFS_DIR2_SF_FIRSTENTRY(oldsfp); + i < sfp->hdr.count; + i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep), + oldsfep = XFS_DIR2_SF_NEXTENTRY(oldsfp, oldsfep)) { + sfep->namelen = oldsfep->namelen; + sfep->offset = oldsfep->offset; + bcopy(oldsfep->name, sfep->name, sfep->namelen); + ino = XFS_DIR2_SF_GET_INUMBER_ARCH(oldsfp, + XFS_DIR2_SF_INUMBERP(oldsfep), ARCH_CONVERT); + XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &ino, XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT); + } + /* + * Clean up the inode. + */ + kmem_free(buf, oldsize); + dp->i_d.di_size = newsize; + xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); +} + +/* + * Convert from 4-byte inode numbers to 8-byte inode numbers. + * The new 8-byte inode number is not there yet, we leave with the + * count 1 but no corresponding entry. + */ +STATIC void +xfs_dir2_sf_toino8( + xfs_da_args_t *args) /* operation arguments */ +{ + char *buf; /* old dir's buffer */ + xfs_inode_t *dp; /* incore directory inode */ + int i; /* entry index */ + xfs_ino_t ino; /* entry inode number */ + int newsize; /* new inode size */ + xfs_dir2_sf_entry_t *oldsfep; /* old sf entry */ + xfs_dir2_sf_t *oldsfp; /* old sf directory */ + int oldsize; /* old inode size */ + xfs_dir2_sf_entry_t *sfep; /* new sf entry */ + xfs_dir2_sf_t *sfp; /* new sf directory */ + + xfs_dir2_trace_args("sf_toino8", args); + dp = args->dp; + + /* + * Copy the old directory to the buffer. + * Then nuke it from the inode, and add the new buffer to the inode. + * Don't want xfs_idata_realloc copying the data here. + */ + oldsize = dp->i_df.if_bytes; + buf = kmem_alloc(oldsize, KM_SLEEP); + oldsfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + ASSERT(oldsfp->hdr.i8count == 0); + bcopy(oldsfp, buf, oldsize); + /* + * Compute the new inode size. + */ + newsize = + oldsize + + (oldsfp->hdr.count + 1) * + ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)); + xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK); + xfs_idata_realloc(dp, newsize, XFS_DATA_FORK); + /* + * Reset our pointers, the data has moved. + */ + oldsfp = (xfs_dir2_sf_t *)buf; + sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; + /* + * Fill in the new header. + */ + sfp->hdr.count = oldsfp->hdr.count; + sfp->hdr.i8count = 1; + ino = XFS_DIR2_SF_GET_INUMBER_ARCH(oldsfp, &oldsfp->hdr.parent, ARCH_CONVERT); + XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &ino, &sfp->hdr.parent, ARCH_CONVERT); + /* + * Copy the entries field by field. + */ + for (i = 0, sfep = XFS_DIR2_SF_FIRSTENTRY(sfp), + oldsfep = XFS_DIR2_SF_FIRSTENTRY(oldsfp); + i < sfp->hdr.count; + i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep), + oldsfep = XFS_DIR2_SF_NEXTENTRY(oldsfp, oldsfep)) { + sfep->namelen = oldsfep->namelen; + sfep->offset = oldsfep->offset; + bcopy(oldsfep->name, sfep->name, sfep->namelen); + ino = XFS_DIR2_SF_GET_INUMBER_ARCH(oldsfp, + XFS_DIR2_SF_INUMBERP(oldsfep), ARCH_CONVERT); + XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &ino, XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT); + } + /* + * Clean up the inode. + */ + kmem_free(buf, oldsize); + dp->i_d.di_size = newsize; + xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); +} +#endif /* XFS_BIG_FILESYSTEMS */ diff --git a/libxfs/xfs_dir_leaf.c b/libxfs/xfs_dir_leaf.c new file mode 100644 index 000000000..40c12148e --- /dev/null +++ b/libxfs/xfs_dir_leaf.c @@ -0,0 +1,1695 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include + +/* + * xfs_dir_leaf.c + * + * Routines to implement leaf blocks of directories as Btrees of hashed names. + */ + +/* + * Validate a given inode number. + */ +int +xfs_dir_ino_validate(xfs_mount_t *mp, xfs_ino_t ino) +{ + xfs_agblock_t agblkno; + xfs_agino_t agino; + xfs_agnumber_t agno; + int ino_ok; + int ioff; + + agno = XFS_INO_TO_AGNO(mp, ino); + agblkno = XFS_INO_TO_AGBNO(mp, ino); + ioff = XFS_INO_TO_OFFSET(mp, ino); + agino = XFS_OFFBNO_TO_AGINO(mp, agblkno, ioff); + ino_ok = + agno < mp->m_sb.sb_agcount && + agblkno < mp->m_sb.sb_agblocks && + agblkno != 0 && + ioff < (1 << mp->m_sb.sb_inopblog) && + XFS_AGINO_TO_INO(mp, agno, agino) == ino; + if (XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE, + XFS_RANDOM_DIR_INO_VALIDATE)) { + xfs_fs_cmn_err(CE_WARN, mp, + "Invalid inode number 0x%Lx\n", ino); + return XFS_ERROR(EFSCORRUPTED); + } + return 0; +} + +/* + * Create the initial contents of a shortform directory. + */ +int +xfs_dir_shortform_create(xfs_da_args_t *args, xfs_ino_t parent) +{ + xfs_dir_sf_hdr_t *hdr; + xfs_inode_t *dp; + + dp = args->dp; + ASSERT(dp != NULL); + ASSERT(dp->i_d.di_size == 0); + if (dp->i_d.di_format == XFS_DINODE_FMT_EXTENTS) { + dp->i_df.if_flags &= ~XFS_IFEXTENTS; /* just in case */ + dp->i_d.di_format = XFS_DINODE_FMT_LOCAL; + xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE); + dp->i_df.if_flags |= XFS_IFINLINE; + } + ASSERT(dp->i_df.if_flags & XFS_IFINLINE); + ASSERT(dp->i_df.if_bytes == 0); + xfs_idata_realloc(dp, sizeof(*hdr), XFS_DATA_FORK); + hdr = (xfs_dir_sf_hdr_t *)dp->i_df.if_u1.if_data; + XFS_DIR_SF_PUT_DIRINO_ARCH(&parent, &hdr->parent, ARCH_CONVERT); + + INT_ZERO(hdr->count, ARCH_CONVERT); + dp->i_d.di_size = sizeof(*hdr); + xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); + return(0); +} + +/* + * Add a name to the shortform directory structure. + * Overflow from the inode has already been checked for. + */ +int +xfs_dir_shortform_addname(xfs_da_args_t *args) +{ + xfs_dir_shortform_t *sf; + xfs_dir_sf_entry_t *sfe; + int i, offset, size; + xfs_inode_t *dp; + + dp = args->dp; + ASSERT(dp->i_df.if_flags & XFS_IFINLINE); + /* + * Catch the case where the conversion from shortform to leaf + * failed part way through. + */ + if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) { +#pragma mips_frequency_hint NEVER + ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); + return XFS_ERROR(EIO); + } + ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); + ASSERT(dp->i_df.if_u1.if_data != NULL); + sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data; + sfe = &sf->list[0]; + for (i = INT_GET(sf->hdr.count, ARCH_CONVERT)-1; i >= 0; i--) { + if (sfe->namelen == args->namelen && + args->name[0] == sfe->name[0] && + bcmp(args->name, sfe->name, args->namelen) == 0) + return(XFS_ERROR(EEXIST)); + sfe = XFS_DIR_SF_NEXTENTRY(sfe); + } + + offset = (int)((char *)sfe - (char *)sf); + size = XFS_DIR_SF_ENTSIZE_BYNAME(args->namelen); + xfs_idata_realloc(dp, size, XFS_DATA_FORK); + sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data; + sfe = (xfs_dir_sf_entry_t *)((char *)sf + offset); + + XFS_DIR_SF_PUT_DIRINO_ARCH(&args->inumber, &sfe->inumber, ARCH_CONVERT); + sfe->namelen = args->namelen; + bcopy(args->name, sfe->name, sfe->namelen); + INT_MOD(sf->hdr.count, ARCH_CONVERT, +1); + + dp->i_d.di_size += size; + xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); + + return(0); +} + +/* + * Remove a name from the shortform directory structure. + */ +int +xfs_dir_shortform_removename(xfs_da_args_t *args) +{ + xfs_dir_shortform_t *sf; + xfs_dir_sf_entry_t *sfe; + int base, size, i; + xfs_inode_t *dp; + + dp = args->dp; + ASSERT(dp->i_df.if_flags & XFS_IFINLINE); + /* + * Catch the case where the conversion from shortform to leaf + * failed part way through. + */ + if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) { +#pragma mips_frequency_hint NEVER + ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); + return XFS_ERROR(EIO); + } + ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); + ASSERT(dp->i_df.if_u1.if_data != NULL); + base = sizeof(xfs_dir_sf_hdr_t); + sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data; + sfe = &sf->list[0]; + for (i = INT_GET(sf->hdr.count, ARCH_CONVERT)-1; i >= 0; i--) { + size = XFS_DIR_SF_ENTSIZE_BYENTRY(sfe); + if (sfe->namelen == args->namelen && + sfe->name[0] == args->name[0] && + bcmp(sfe->name, args->name, args->namelen) == 0) + break; + base += size; + sfe = XFS_DIR_SF_NEXTENTRY(sfe); + } + if (i < 0) { + ASSERT(args->oknoent); + return(XFS_ERROR(ENOENT)); + } + + if ((base + size) != dp->i_d.di_size) { + ovbcopy(&((char *)sf)[base+size], &((char *)sf)[base], + dp->i_d.di_size - (base+size)); + } + INT_MOD(sf->hdr.count, ARCH_CONVERT, -1); + + xfs_idata_realloc(dp, -size, XFS_DATA_FORK); + dp->i_d.di_size -= size; + xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); + + return(0); +} + +/* + * Look up a name in a shortform directory structure. + */ +int +xfs_dir_shortform_lookup(xfs_da_args_t *args) +{ + xfs_dir_shortform_t *sf; + xfs_dir_sf_entry_t *sfe; + int i; + xfs_inode_t *dp; + + dp = args->dp; + ASSERT(dp->i_df.if_flags & XFS_IFINLINE); + /* + * Catch the case where the conversion from shortform to leaf + * failed part way through. + */ + if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) { +#pragma mips_frequency_hint NEVER + ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); + return XFS_ERROR(EIO); + } + ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); + ASSERT(dp->i_df.if_u1.if_data != NULL); + sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data; + if (args->namelen == 2 && + args->name[0] == '.' && args->name[1] == '.') { + XFS_DIR_SF_GET_DIRINO_ARCH(&sf->hdr.parent, &args->inumber, ARCH_CONVERT); + return(XFS_ERROR(EEXIST)); + } + if (args->namelen == 1 && args->name[0] == '.') { + args->inumber = dp->i_ino; + return(XFS_ERROR(EEXIST)); + } + sfe = &sf->list[0]; + for (i = INT_GET(sf->hdr.count, ARCH_CONVERT)-1; i >= 0; i--) { + if (sfe->namelen == args->namelen && + sfe->name[0] == args->name[0] && + bcmp(args->name, sfe->name, args->namelen) == 0) { + XFS_DIR_SF_GET_DIRINO_ARCH(&sfe->inumber, &args->inumber, ARCH_CONVERT); + return(XFS_ERROR(EEXIST)); + } + sfe = XFS_DIR_SF_NEXTENTRY(sfe); + } + ASSERT(args->oknoent); + return(XFS_ERROR(ENOENT)); +} + +/* + * Convert from using the shortform to the leaf. + */ +int +xfs_dir_shortform_to_leaf(xfs_da_args_t *iargs) +{ + xfs_inode_t *dp; + xfs_dir_shortform_t *sf; + xfs_dir_sf_entry_t *sfe; + xfs_da_args_t args; + xfs_ino_t inumber; + char *tmpbuffer; + int retval, i, size; + xfs_dablk_t blkno; + xfs_dabuf_t *bp; + + dp = iargs->dp; + /* + * Catch the case where the conversion from shortform to leaf + * failed part way through. + */ + if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) { +#pragma mips_frequency_hint NEVER + ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); + return XFS_ERROR(EIO); + } + ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); + ASSERT(dp->i_df.if_u1.if_data != NULL); + size = dp->i_df.if_bytes; + tmpbuffer = kmem_alloc(size, KM_SLEEP); + ASSERT(tmpbuffer != NULL); + + bcopy(dp->i_df.if_u1.if_data, tmpbuffer, size); + + sf = (xfs_dir_shortform_t *)tmpbuffer; + XFS_DIR_SF_GET_DIRINO_ARCH(&sf->hdr.parent, &inumber, ARCH_CONVERT); + + xfs_idata_realloc(dp, -size, XFS_DATA_FORK); + dp->i_d.di_size = 0; + xfs_trans_log_inode(iargs->trans, dp, XFS_ILOG_CORE); + retval = xfs_da_grow_inode(iargs, &blkno); + if (retval) + goto out; + + ASSERT(blkno == 0); + retval = xfs_dir_leaf_create(iargs, blkno, &bp); + if (retval) + goto out; + xfs_da_buf_done(bp); + + args.name = "."; + args.namelen = 1; + args.hashval = xfs_dir_hash_dot; + args.inumber = dp->i_ino; + args.dp = dp; + args.firstblock = iargs->firstblock; + args.flist = iargs->flist; + args.total = iargs->total; + args.whichfork = XFS_DATA_FORK; + args.trans = iargs->trans; + args.justcheck = 0; + args.addname = args.oknoent = 1; + retval = xfs_dir_leaf_addname(&args); + if (retval) + goto out; + + args.name = ".."; + args.namelen = 2; + args.hashval = xfs_dir_hash_dotdot; + args.inumber = inumber; + retval = xfs_dir_leaf_addname(&args); + if (retval) + goto out; + + sfe = &sf->list[0]; + for (i = 0; i < INT_GET(sf->hdr.count, ARCH_CONVERT); i++) { + args.name = (char *)(sfe->name); + args.namelen = sfe->namelen; + args.hashval = xfs_da_hashname((char *)(sfe->name), + sfe->namelen); + XFS_DIR_SF_GET_DIRINO_ARCH(&sfe->inumber, &args.inumber, ARCH_CONVERT); + retval = xfs_dir_leaf_addname(&args); + if (retval) + goto out; + sfe = XFS_DIR_SF_NEXTENTRY(sfe); + } + retval = 0; + +out: + kmem_free(tmpbuffer, size); + return(retval); +} + +/* + * Look up a name in a shortform directory structure, replace the inode number. + */ +int +xfs_dir_shortform_replace(xfs_da_args_t *args) +{ + xfs_dir_shortform_t *sf; + xfs_dir_sf_entry_t *sfe; + xfs_inode_t *dp; + int i; + + dp = args->dp; + ASSERT(dp->i_df.if_flags & XFS_IFINLINE); + /* + * Catch the case where the conversion from shortform to leaf + * failed part way through. + */ + if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) { +#pragma mips_frequency_hint NEVER + ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); + return XFS_ERROR(EIO); + } + ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); + ASSERT(dp->i_df.if_u1.if_data != NULL); + sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data; + if (args->namelen == 2 && + args->name[0] == '.' && args->name[1] == '.') { + /* XXX - replace assert? */ + XFS_DIR_SF_PUT_DIRINO_ARCH(&args->inumber, &sf->hdr.parent, ARCH_CONVERT); + xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA); + return(0); + } + ASSERT(args->namelen != 1 || args->name[0] != '.'); + sfe = &sf->list[0]; + for (i = INT_GET(sf->hdr.count, ARCH_CONVERT)-1; i >= 0; i--) { + if (sfe->namelen == args->namelen && + sfe->name[0] == args->name[0] && + bcmp(args->name, sfe->name, args->namelen) == 0) { + ASSERT(bcmp((char *)&args->inumber, + (char *)&sfe->inumber, sizeof(xfs_ino_t))); + XFS_DIR_SF_PUT_DIRINO_ARCH(&args->inumber, &sfe->inumber, ARCH_CONVERT); + xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA); + return(0); + } + sfe = XFS_DIR_SF_NEXTENTRY(sfe); + } + ASSERT(args->oknoent); + return(XFS_ERROR(ENOENT)); +} + +/* + * Convert a leaf directory to shortform structure + */ +int +xfs_dir_leaf_to_shortform(xfs_da_args_t *iargs) +{ + xfs_dir_leafblock_t *leaf; + xfs_dir_leaf_hdr_t *hdr; + xfs_dir_leaf_entry_t *entry; + xfs_dir_leaf_name_t *namest; + xfs_da_args_t args; + xfs_inode_t *dp; + xfs_ino_t parent; + char *tmpbuffer; + int retval, i; + xfs_dabuf_t *bp; + + dp = iargs->dp; + tmpbuffer = kmem_alloc(XFS_LBSIZE(dp->i_mount), KM_SLEEP); + ASSERT(tmpbuffer != NULL); + + retval = xfs_da_read_buf(iargs->trans, iargs->dp, 0, -1, &bp, + XFS_DATA_FORK); + if (retval) + return(retval); + ASSERT(bp != NULL); + bcopy(bp->data, tmpbuffer, XFS_LBSIZE(dp->i_mount)); + leaf = (xfs_dir_leafblock_t *)tmpbuffer; + ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC); + bzero(bp->data, XFS_LBSIZE(dp->i_mount)); + + /* + * Find and special case the parent inode number + */ + hdr = &leaf->hdr; + entry = &leaf->entries[0]; + for (i = INT_GET(hdr->count, ARCH_CONVERT)-1; i >= 0; entry++, i--) { + namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT)); + if ((entry->namelen == 2) && + (namest->name[0] == '.') && + (namest->name[1] == '.')) { + XFS_DIR_SF_GET_DIRINO_ARCH(&namest->inumber, &parent, ARCH_CONVERT); + INT_ZERO(entry->nameidx, ARCH_CONVERT); + } else if ((entry->namelen == 1) && (namest->name[0] == '.')) { + INT_ZERO(entry->nameidx, ARCH_CONVERT); + } + } + retval = xfs_da_shrink_inode(iargs, 0, bp); + if (retval) + goto out; + retval = xfs_dir_shortform_create(iargs, parent); + if (retval) + goto out; + + /* + * Copy the rest of the filenames + */ + entry = &leaf->entries[0]; + args.dp = dp; + args.firstblock = iargs->firstblock; + args.flist = iargs->flist; + args.total = iargs->total; + args.whichfork = XFS_DATA_FORK; + args.trans = iargs->trans; + args.justcheck = 0; + args.addname = args.oknoent = 1; + for (i = 0; i < INT_GET(hdr->count, ARCH_CONVERT); entry++, i++) { + if (INT_GET(entry->nameidx, ARCH_CONVERT) == 0) + continue; + namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT)); + args.name = (char *)(namest->name); + args.namelen = entry->namelen; + args.hashval = INT_GET(entry->hashval, ARCH_CONVERT); + XFS_DIR_SF_GET_DIRINO_ARCH(&namest->inumber, &args.inumber, ARCH_CONVERT); + xfs_dir_shortform_addname(&args); + } + +out: + kmem_free(tmpbuffer, XFS_LBSIZE(dp->i_mount)); + return(retval); +} + +/* + * Convert from using a single leaf to a root node and a leaf. + */ +int +xfs_dir_leaf_to_node(xfs_da_args_t *args) +{ + xfs_dir_leafblock_t *leaf; + xfs_da_intnode_t *node; + xfs_inode_t *dp; + xfs_dabuf_t *bp1, *bp2; + xfs_dablk_t blkno; + int retval; + + dp = args->dp; + retval = xfs_da_grow_inode(args, &blkno); + ASSERT(blkno == 1); + if (retval) + return(retval); + retval = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp1, + XFS_DATA_FORK); + if (retval) + return(retval); + ASSERT(bp1 != NULL); + retval = xfs_da_get_buf(args->trans, args->dp, 1, -1, &bp2, + XFS_DATA_FORK); + if (retval) { + xfs_da_buf_done(bp1); + return(retval); + } + ASSERT(bp2 != NULL); + bcopy(bp1->data, bp2->data, XFS_LBSIZE(dp->i_mount)); + xfs_da_buf_done(bp1); + xfs_da_log_buf(args->trans, bp2, 0, XFS_LBSIZE(dp->i_mount) - 1); + + /* + * Set up the new root node. + */ + retval = xfs_da_node_create(args, 0, 1, &bp1, XFS_DATA_FORK); + if (retval) { + xfs_da_buf_done(bp2); + return(retval); + } + node = bp1->data; + leaf = bp2->data; + ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC); + INT_SET(node->btree[0].hashval, ARCH_CONVERT, INT_GET(leaf->entries[ INT_GET(leaf->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)); + xfs_da_buf_done(bp2); + INT_SET(node->btree[0].before, ARCH_CONVERT, blkno); + INT_SET(node->hdr.count, ARCH_CONVERT, 1); + xfs_da_log_buf(args->trans, bp1, + XFS_DA_LOGRANGE(node, &node->btree[0], sizeof(node->btree[0]))); + xfs_da_buf_done(bp1); + + return(retval); +} + + +/*======================================================================== + * Routines used for growing the Btree. + *========================================================================*/ + +/* + * Create the initial contents of a leaf directory + * or a leaf in a node directory. + */ +int +xfs_dir_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp) +{ + xfs_dir_leafblock_t *leaf; + xfs_dir_leaf_hdr_t *hdr; + xfs_inode_t *dp; + xfs_dabuf_t *bp; + int retval; + + dp = args->dp; + ASSERT(dp != NULL); + retval = xfs_da_get_buf(args->trans, dp, blkno, -1, &bp, XFS_DATA_FORK); + if (retval) + return(retval); + ASSERT(bp != NULL); + leaf = bp->data; + bzero((char *)leaf, XFS_LBSIZE(dp->i_mount)); + hdr = &leaf->hdr; + INT_SET(hdr->info.magic, ARCH_CONVERT, XFS_DIR_LEAF_MAGIC); + INT_SET(hdr->firstused, ARCH_CONVERT, XFS_LBSIZE(dp->i_mount)); + if (INT_ISZERO(hdr->firstused, ARCH_CONVERT)) + INT_SET(hdr->firstused, ARCH_CONVERT, XFS_LBSIZE(dp->i_mount) - 1); + INT_SET(hdr->freemap[0].base, ARCH_CONVERT, sizeof(xfs_dir_leaf_hdr_t)); + INT_SET(hdr->freemap[0].size, ARCH_CONVERT, INT_GET(hdr->firstused, ARCH_CONVERT) - INT_GET(hdr->freemap[0].base, ARCH_CONVERT)); + + xfs_da_log_buf(args->trans, bp, 0, XFS_LBSIZE(dp->i_mount) - 1); + + *bpp = bp; + return(0); +} + +/* + * Split the leaf node, rebalance, then add the new entry. + */ +int +xfs_dir_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, + xfs_da_state_blk_t *newblk) +{ + xfs_dablk_t blkno; + xfs_da_args_t *args; + int error; + + /* + * Allocate space for a new leaf node. + */ + args = state->args; + ASSERT(args != NULL); + ASSERT(oldblk->magic == XFS_DIR_LEAF_MAGIC); + error = xfs_da_grow_inode(args, &blkno); + if (error) + return(error); + error = xfs_dir_leaf_create(args, blkno, &newblk->bp); + if (error) + return(error); + newblk->blkno = blkno; + newblk->magic = XFS_DIR_LEAF_MAGIC; + + /* + * Rebalance the entries across the two leaves. + */ + xfs_dir_leaf_rebalance(state, oldblk, newblk); + error = xfs_da_blk_link(state, oldblk, newblk); + if (error) + return(error); + + /* + * Insert the new entry in the correct block. + */ + if (state->inleaf) { + error = xfs_dir_leaf_add(oldblk->bp, args, oldblk->index); + } else { + error = xfs_dir_leaf_add(newblk->bp, args, newblk->index); + } + + /* + * Update last hashval in each block since we added the name. + */ + oldblk->hashval = xfs_dir_leaf_lasthash(oldblk->bp, NULL); + newblk->hashval = xfs_dir_leaf_lasthash(newblk->bp, NULL); + return(error); +} + +/* + * Add a name to the leaf directory structure. + * + * Must take into account fragmented leaves and leaves where spacemap has + * lost some freespace information (ie: holes). + */ +int +xfs_dir_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args, int index) +{ + xfs_dir_leafblock_t *leaf; + xfs_dir_leaf_hdr_t *hdr; + xfs_dir_leaf_map_t *map; + int tablesize, entsize, sum, i, tmp, error; + + leaf = bp->data; + ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC); + ASSERT((index >= 0) && (index <= INT_GET(leaf->hdr.count, ARCH_CONVERT))); + hdr = &leaf->hdr; + entsize = XFS_DIR_LEAF_ENTSIZE_BYNAME(args->namelen); + + /* + * Search through freemap for first-fit on new name length. + * (may need to figure in size of entry struct too) + */ + tablesize = (INT_GET(hdr->count, ARCH_CONVERT) + 1) * (uint)sizeof(xfs_dir_leaf_entry_t) + + (uint)sizeof(xfs_dir_leaf_hdr_t); + map = &hdr->freemap[XFS_DIR_LEAF_MAPSIZE-1]; + for (sum = 0, i = XFS_DIR_LEAF_MAPSIZE-1; i >= 0; map--, i--) { + if (tablesize > INT_GET(hdr->firstused, ARCH_CONVERT)) { + sum += INT_GET(map->size, ARCH_CONVERT); + continue; + } + if (INT_GET(map->size, ARCH_CONVERT) == 0) + continue; /* no space in this map */ + tmp = entsize; + if (INT_GET(map->base, ARCH_CONVERT) < INT_GET(hdr->firstused, ARCH_CONVERT)) + tmp += (uint)sizeof(xfs_dir_leaf_entry_t); + if (INT_GET(map->size, ARCH_CONVERT) >= tmp) { + if (!args->justcheck) + xfs_dir_leaf_add_work(bp, args, index, i); + return(0); + } + sum += INT_GET(map->size, ARCH_CONVERT); + } + + /* + * If there are no holes in the address space of the block, + * and we don't have enough freespace, then compaction will do us + * no good and we should just give up. + */ + if (!hdr->holes && (sum < entsize)) + return(XFS_ERROR(ENOSPC)); + + /* + * Compact the entries to coalesce free space. + * Pass the justcheck flag so the checking pass can return + * an error, without changing anything, if it won't fit. + */ + error = xfs_dir_leaf_compact(args->trans, bp, + args->total == 0 ? + entsize + + (uint)sizeof(xfs_dir_leaf_entry_t) : 0, + args->justcheck); + if (error) + return(error); + /* + * After compaction, the block is guaranteed to have only one + * free region, in freemap[0]. If it is not big enough, give up. + */ + if (INT_GET(hdr->freemap[0].size, ARCH_CONVERT) < + (entsize + (uint)sizeof(xfs_dir_leaf_entry_t))) + return(XFS_ERROR(ENOSPC)); + + if (!args->justcheck) + xfs_dir_leaf_add_work(bp, args, index, 0); + return(0); +} + +/* + * Add a name to a leaf directory structure. + */ +STATIC void +xfs_dir_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int index, + int mapindex) +{ + xfs_dir_leafblock_t *leaf; + xfs_dir_leaf_hdr_t *hdr; + xfs_dir_leaf_entry_t *entry; + xfs_dir_leaf_name_t *namest; + xfs_dir_leaf_map_t *map; + /* REFERENCED */ + xfs_mount_t *mp; + int tmp, i; + + leaf = bp->data; + ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC); + hdr = &leaf->hdr; + ASSERT((mapindex >= 0) && (mapindex < XFS_DIR_LEAF_MAPSIZE)); + ASSERT((index >= 0) && (index <= INT_GET(hdr->count, ARCH_CONVERT))); + + /* + * Force open some space in the entry array and fill it in. + */ + entry = &leaf->entries[index]; + if (index < INT_GET(hdr->count, ARCH_CONVERT)) { + tmp = INT_GET(hdr->count, ARCH_CONVERT) - index; + tmp *= (uint)sizeof(xfs_dir_leaf_entry_t); + ovbcopy(entry, entry + 1, tmp); + xfs_da_log_buf(args->trans, bp, + XFS_DA_LOGRANGE(leaf, entry, tmp + (uint)sizeof(*entry))); + } + INT_MOD(hdr->count, ARCH_CONVERT, +1); + + /* + * Allocate space for the new string (at the end of the run). + */ + map = &hdr->freemap[mapindex]; + mp = args->trans->t_mountp; + ASSERT(INT_GET(map->base, ARCH_CONVERT) < XFS_LBSIZE(mp)); + ASSERT(INT_GET(map->size, ARCH_CONVERT) >= XFS_DIR_LEAF_ENTSIZE_BYNAME(args->namelen)); + ASSERT(INT_GET(map->size, ARCH_CONVERT) < XFS_LBSIZE(mp)); + INT_MOD(map->size, ARCH_CONVERT, -(XFS_DIR_LEAF_ENTSIZE_BYNAME(args->namelen))); + INT_SET(entry->nameidx, ARCH_CONVERT, INT_GET(map->base, ARCH_CONVERT) + INT_GET(map->size, ARCH_CONVERT)); + INT_SET(entry->hashval, ARCH_CONVERT, args->hashval); + entry->namelen = args->namelen; + xfs_da_log_buf(args->trans, bp, + XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry))); + + /* + * Copy the string and inode number into the new space. + */ + namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT)); + XFS_DIR_SF_PUT_DIRINO_ARCH(&args->inumber, &namest->inumber, ARCH_CONVERT); + bcopy(args->name, namest->name, args->namelen); + xfs_da_log_buf(args->trans, bp, + XFS_DA_LOGRANGE(leaf, namest, XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry))); + + /* + * Update the control info for this leaf node + */ + if (INT_GET(entry->nameidx, ARCH_CONVERT) < INT_GET(hdr->firstused, ARCH_CONVERT)) + INT_COPY(hdr->firstused, entry->nameidx, ARCH_CONVERT); + ASSERT(INT_GET(hdr->firstused, ARCH_CONVERT) >= ((INT_GET(hdr->count, ARCH_CONVERT)*sizeof(*entry))+sizeof(*hdr))); + tmp = (INT_GET(hdr->count, ARCH_CONVERT)-1) * (uint)sizeof(xfs_dir_leaf_entry_t) + + (uint)sizeof(xfs_dir_leaf_hdr_t); + map = &hdr->freemap[0]; + for (i = 0; i < XFS_DIR_LEAF_MAPSIZE; map++, i++) { + if (INT_GET(map->base, ARCH_CONVERT) == tmp) { + INT_MOD(map->base, ARCH_CONVERT, (uint)sizeof(xfs_dir_leaf_entry_t)); + INT_MOD(map->size, ARCH_CONVERT, -((uint)sizeof(xfs_dir_leaf_entry_t))); + } + } + INT_MOD(hdr->namebytes, ARCH_CONVERT, args->namelen); + xfs_da_log_buf(args->trans, bp, + XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr))); +} + +/* + * Garbage collect a leaf directory block by copying it to a new buffer. + */ +STATIC int +xfs_dir_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp, int musthave, + int justcheck) +{ + xfs_dir_leafblock_t *leaf_s, *leaf_d; + xfs_dir_leaf_hdr_t *hdr_s, *hdr_d; + xfs_mount_t *mp; + char *tmpbuffer; + char *tmpbuffer2; + int rval; + int lbsize; + + mp = trans->t_mountp; + lbsize = XFS_LBSIZE(mp); + tmpbuffer = kmem_alloc(lbsize, KM_SLEEP); + ASSERT(tmpbuffer != NULL); + bcopy(bp->data, tmpbuffer, lbsize); + + /* + * Make a second copy in case xfs_dir_leaf_moveents() + * below destroys the original. + */ + if (musthave || justcheck) { + tmpbuffer2 = kmem_alloc(lbsize, KM_SLEEP); + bcopy(bp->data, tmpbuffer2, lbsize); + } + bzero(bp->data, lbsize); + + /* + * Copy basic information + */ + leaf_s = (xfs_dir_leafblock_t *)tmpbuffer; + leaf_d = bp->data; + hdr_s = &leaf_s->hdr; + hdr_d = &leaf_d->hdr; + hdr_d->info = hdr_s->info; /* struct copy */ + INT_SET(hdr_d->firstused, ARCH_CONVERT, lbsize); + if (INT_GET(hdr_d->firstused, ARCH_CONVERT) == 0) + INT_SET(hdr_d->firstused, ARCH_CONVERT, lbsize - 1); + INT_ZERO(hdr_d->namebytes, ARCH_CONVERT); + INT_ZERO(hdr_d->count, ARCH_CONVERT); + hdr_d->holes = 0; + INT_SET(hdr_d->freemap[0].base, ARCH_CONVERT, sizeof(xfs_dir_leaf_hdr_t)); + INT_SET(hdr_d->freemap[0].size, ARCH_CONVERT, INT_GET(hdr_d->firstused, ARCH_CONVERT) - INT_GET(hdr_d->freemap[0].base, ARCH_CONVERT)); + + /* + * Copy all entry's in the same (sorted) order, + * but allocate filenames packed and in sequence. + * This changes the source (leaf_s) as well. + */ + xfs_dir_leaf_moveents(leaf_s, 0, leaf_d, 0, (int)INT_GET(hdr_s->count, ARCH_CONVERT), mp); + + if (musthave && INT_GET(hdr_d->freemap[0].size, ARCH_CONVERT) < musthave) + rval = XFS_ERROR(ENOSPC); + else + rval = 0; + + if (justcheck || rval == ENOSPC) { + ASSERT(tmpbuffer2); + bcopy(tmpbuffer2, bp->data, lbsize); + } else { + xfs_da_log_buf(trans, bp, 0, lbsize - 1); + } + + kmem_free(tmpbuffer, lbsize); + if (musthave || justcheck) + kmem_free(tmpbuffer2, lbsize); + return(rval); +} + +/* + * Redistribute the directory entries between two leaf nodes, + * taking into account the size of the new entry. + * + * NOTE: if new block is empty, then it will get the upper half of old block. + */ +STATIC void +xfs_dir_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, + xfs_da_state_blk_t *blk2) +{ + xfs_da_state_blk_t *tmp_blk; + xfs_dir_leafblock_t *leaf1, *leaf2; + xfs_dir_leaf_hdr_t *hdr1, *hdr2; + int count, totallen, max, space, swap; + + /* + * Set up environment. + */ + ASSERT(blk1->magic == XFS_DIR_LEAF_MAGIC); + ASSERT(blk2->magic == XFS_DIR_LEAF_MAGIC); + leaf1 = blk1->bp->data; + leaf2 = blk2->bp->data; + ASSERT(INT_GET(leaf1->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC); + ASSERT(INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC); + + /* + * Check ordering of blocks, reverse if it makes things simpler. + */ + swap = 0; + if (xfs_dir_leaf_order(blk1->bp, blk2->bp)) { + tmp_blk = blk1; + blk1 = blk2; + blk2 = tmp_blk; + leaf1 = blk1->bp->data; + leaf2 = blk2->bp->data; + swap = 1; + } + hdr1 = &leaf1->hdr; + hdr2 = &leaf2->hdr; + + /* + * Examine entries until we reduce the absolute difference in + * byte usage between the two blocks to a minimum. Then get + * the direction to copy and the number of elements to move. + */ + state->inleaf = xfs_dir_leaf_figure_balance(state, blk1, blk2, + &count, &totallen); + if (swap) + state->inleaf = !state->inleaf; + + /* + * Move any entries required from leaf to leaf: + */ + if (count < INT_GET(hdr1->count, ARCH_CONVERT)) { + /* + * Figure the total bytes to be added to the destination leaf. + */ + count = INT_GET(hdr1->count, ARCH_CONVERT) - count; /* number entries being moved */ + space = INT_GET(hdr1->namebytes, ARCH_CONVERT) - totallen; + space += count * ((uint)sizeof(xfs_dir_leaf_name_t)-1); + space += count * (uint)sizeof(xfs_dir_leaf_entry_t); + + /* + * leaf2 is the destination, compact it if it looks tight. + */ + max = INT_GET(hdr2->firstused, ARCH_CONVERT) - (uint)sizeof(xfs_dir_leaf_hdr_t); + max -= INT_GET(hdr2->count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t); + if (space > max) { + xfs_dir_leaf_compact(state->args->trans, blk2->bp, + 0, 0); + } + + /* + * Move high entries from leaf1 to low end of leaf2. + */ + xfs_dir_leaf_moveents(leaf1, INT_GET(hdr1->count, ARCH_CONVERT) - count, + leaf2, 0, count, state->mp); + + xfs_da_log_buf(state->args->trans, blk1->bp, 0, + state->blocksize-1); + xfs_da_log_buf(state->args->trans, blk2->bp, 0, + state->blocksize-1); + + } else if (count > INT_GET(hdr1->count, ARCH_CONVERT)) { + /* + * Figure the total bytes to be added to the destination leaf. + */ + count -= INT_GET(hdr1->count, ARCH_CONVERT); /* number entries being moved */ + space = totallen - INT_GET(hdr1->namebytes, ARCH_CONVERT); + space += count * ((uint)sizeof(xfs_dir_leaf_name_t)-1); + space += count * (uint)sizeof(xfs_dir_leaf_entry_t); + + /* + * leaf1 is the destination, compact it if it looks tight. + */ + max = INT_GET(hdr1->firstused, ARCH_CONVERT) - (uint)sizeof(xfs_dir_leaf_hdr_t); + max -= INT_GET(hdr1->count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t); + if (space > max) { + xfs_dir_leaf_compact(state->args->trans, blk1->bp, + 0, 0); + } + + /* + * Move low entries from leaf2 to high end of leaf1. + */ + xfs_dir_leaf_moveents(leaf2, 0, leaf1, (int)INT_GET(hdr1->count, ARCH_CONVERT), + count, state->mp); + + xfs_da_log_buf(state->args->trans, blk1->bp, 0, + state->blocksize-1); + xfs_da_log_buf(state->args->trans, blk2->bp, 0, + state->blocksize-1); + } + + /* + * Copy out last hashval in each block for B-tree code. + */ + blk1->hashval = INT_GET(leaf1->entries[ INT_GET(leaf1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT); + blk2->hashval = INT_GET(leaf2->entries[ INT_GET(leaf2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT); + + /* + * Adjust the expected index for insertion. + * GROT: this doesn't work unless blk2 was originally empty. + */ + if (!state->inleaf) { + blk2->index = blk1->index - INT_GET(leaf1->hdr.count, ARCH_CONVERT); + } +} + +/* + * Examine entries until we reduce the absolute difference in + * byte usage between the two blocks to a minimum. + * GROT: Is this really necessary? With other than a 512 byte blocksize, + * GROT: there will always be enough room in either block for a new entry. + * GROT: Do a double-split for this case? + */ +STATIC int +xfs_dir_leaf_figure_balance(xfs_da_state_t *state, + xfs_da_state_blk_t *blk1, + xfs_da_state_blk_t *blk2, + int *countarg, int *namebytesarg) +{ + xfs_dir_leafblock_t *leaf1, *leaf2; + xfs_dir_leaf_hdr_t *hdr1, *hdr2; + xfs_dir_leaf_entry_t *entry; + int count, max, totallen, half; + int lastdelta, foundit, tmp; + + /* + * Set up environment. + */ + leaf1 = blk1->bp->data; + leaf2 = blk2->bp->data; + hdr1 = &leaf1->hdr; + hdr2 = &leaf2->hdr; + foundit = 0; + totallen = 0; + + /* + * Examine entries until we reduce the absolute difference in + * byte usage between the two blocks to a minimum. + */ + max = INT_GET(hdr1->count, ARCH_CONVERT) + INT_GET(hdr2->count, ARCH_CONVERT); + half = (max+1) * (uint)(sizeof(*entry)+sizeof(xfs_dir_leaf_entry_t)-1); + half += INT_GET(hdr1->namebytes, ARCH_CONVERT) + INT_GET(hdr2->namebytes, ARCH_CONVERT) + state->args->namelen; + half /= 2; + lastdelta = state->blocksize; + entry = &leaf1->entries[0]; + for (count = 0; count < max; entry++, count++) { + +#define XFS_DIR_ABS(A) (((A) < 0) ? -(A) : (A)) + /* + * The new entry is in the first block, account for it. + */ + if (count == blk1->index) { + tmp = totallen + (uint)sizeof(*entry) + + XFS_DIR_LEAF_ENTSIZE_BYNAME(state->args->namelen); + if (XFS_DIR_ABS(half - tmp) > lastdelta) + break; + lastdelta = XFS_DIR_ABS(half - tmp); + totallen = tmp; + foundit = 1; + } + + /* + * Wrap around into the second block if necessary. + */ + if (count == INT_GET(hdr1->count, ARCH_CONVERT)) { + leaf1 = leaf2; + entry = &leaf1->entries[0]; + } + + /* + * Figure out if next leaf entry would be too much. + */ + tmp = totallen + (uint)sizeof(*entry) + + XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry); + if (XFS_DIR_ABS(half - tmp) > lastdelta) + break; + lastdelta = XFS_DIR_ABS(half - tmp); + totallen = tmp; +#undef XFS_DIR_ABS + } + + /* + * Calculate the number of namebytes that will end up in lower block. + * If new entry not in lower block, fix up the count. + */ + totallen -= + count * (uint)(sizeof(*entry)+sizeof(xfs_dir_leaf_entry_t)-1); + if (foundit) { + totallen -= (sizeof(*entry)+sizeof(xfs_dir_leaf_entry_t)-1) + + state->args->namelen; + } + + *countarg = count; + *namebytesarg = totallen; + return(foundit); +} + +/*======================================================================== + * Routines used for shrinking the Btree. + *========================================================================*/ + +/* + * Check a leaf block and its neighbors to see if the block should be + * collapsed into one or the other neighbor. Always keep the block + * with the smaller block number. + * If the current block is over 50% full, don't try to join it, return 0. + * If the block is empty, fill in the state structure and return 2. + * If it can be collapsed, fill in the state structure and return 1. + * If nothing can be done, return 0. + */ +int +xfs_dir_leaf_toosmall(xfs_da_state_t *state, int *action) +{ + xfs_dir_leafblock_t *leaf; + xfs_da_state_blk_t *blk; + xfs_da_blkinfo_t *info; + int count, bytes, forward, error, retval, i; + xfs_dablk_t blkno; + xfs_dabuf_t *bp; + + /* + * Check for the degenerate case of the block being over 50% full. + * If so, it's not worth even looking to see if we might be able + * to coalesce with a sibling. + */ + blk = &state->path.blk[ state->path.active-1 ]; + info = blk->bp->data; + ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC); + leaf = (xfs_dir_leafblock_t *)info; + count = INT_GET(leaf->hdr.count, ARCH_CONVERT); + bytes = (uint)sizeof(xfs_dir_leaf_hdr_t) + + count * (uint)sizeof(xfs_dir_leaf_entry_t) + + count * ((uint)sizeof(xfs_dir_leaf_name_t)-1) + + INT_GET(leaf->hdr.namebytes, ARCH_CONVERT); + if (bytes > (state->blocksize >> 1)) { + *action = 0; /* blk over 50%, dont try to join */ + return(0); + } + + /* + * Check for the degenerate case of the block being empty. + * If the block is empty, we'll simply delete it, no need to + * coalesce it with a sibling block. We choose (aribtrarily) + * to merge with the forward block unless it is NULL. + */ + if (count == 0) { + /* + * Make altpath point to the block we want to keep and + * path point to the block we want to drop (this one). + */ + forward = !INT_ISZERO(info->forw, ARCH_CONVERT); + bcopy(&state->path, &state->altpath, sizeof(state->path)); + error = xfs_da_path_shift(state, &state->altpath, forward, + 0, &retval); + if (error) + return(error); + if (retval) { + *action = 0; + } else { + *action = 2; + } + return(0); + } + + /* + * Examine each sibling block to see if we can coalesce with + * at least 25% free space to spare. We need to figure out + * whether to merge with the forward or the backward block. + * We prefer coalescing with the lower numbered sibling so as + * to shrink a directory over time. + */ + forward = (INT_GET(info->forw, ARCH_CONVERT) < INT_GET(info->back, ARCH_CONVERT)); /* start with smaller blk num */ + for (i = 0; i < 2; forward = !forward, i++) { + if (forward) + blkno = INT_GET(info->forw, ARCH_CONVERT); + else + blkno = INT_GET(info->back, ARCH_CONVERT); + if (blkno == 0) + continue; + error = xfs_da_read_buf(state->args->trans, state->args->dp, + blkno, -1, &bp, + XFS_DATA_FORK); + if (error) + return(error); + ASSERT(bp != NULL); + + leaf = (xfs_dir_leafblock_t *)info; + count = INT_GET(leaf->hdr.count, ARCH_CONVERT); + bytes = state->blocksize - (state->blocksize>>2); + bytes -= INT_GET(leaf->hdr.namebytes, ARCH_CONVERT); + leaf = bp->data; + ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC); + count += INT_GET(leaf->hdr.count, ARCH_CONVERT); + bytes -= INT_GET(leaf->hdr.namebytes, ARCH_CONVERT); + bytes -= count * ((uint)sizeof(xfs_dir_leaf_name_t) - 1); + bytes -= count * (uint)sizeof(xfs_dir_leaf_entry_t); + bytes -= (uint)sizeof(xfs_dir_leaf_hdr_t); + if (bytes >= 0) + break; /* fits with at least 25% to spare */ + + xfs_da_brelse(state->args->trans, bp); + } + if (i >= 2) { + *action = 0; + return(0); + } + xfs_da_buf_done(bp); + + /* + * Make altpath point to the block we want to keep (the lower + * numbered block) and path point to the block we want to drop. + */ + bcopy(&state->path, &state->altpath, sizeof(state->path)); + if (blkno < blk->blkno) { + error = xfs_da_path_shift(state, &state->altpath, forward, + 0, &retval); + } else { + error = xfs_da_path_shift(state, &state->path, forward, + 0, &retval); + } + if (error) + return(error); + if (retval) { + *action = 0; + } else { + *action = 1; + } + return(0); +} + +/* + * Remove a name from the leaf directory structure. + * + * Return 1 if leaf is less than 37% full, 0 if >= 37% full. + * If two leaves are 37% full, when combined they will leave 25% free. + */ +int +xfs_dir_leaf_remove(xfs_trans_t *trans, xfs_dabuf_t *bp, int index) +{ + xfs_dir_leafblock_t *leaf; + xfs_dir_leaf_hdr_t *hdr; + xfs_dir_leaf_map_t *map; + xfs_dir_leaf_entry_t *entry; + xfs_dir_leaf_name_t *namest; + int before, after, smallest, entsize; + int tablesize, tmp, i; + xfs_mount_t *mp; + + leaf = bp->data; + ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC); + hdr = &leaf->hdr; + mp = trans->t_mountp; + ASSERT((INT_GET(hdr->count, ARCH_CONVERT) > 0) && (INT_GET(hdr->count, ARCH_CONVERT) < (XFS_LBSIZE(mp)/8))); + ASSERT((index >= 0) && (index < INT_GET(hdr->count, ARCH_CONVERT))); + ASSERT(INT_GET(hdr->firstused, ARCH_CONVERT) >= ((INT_GET(hdr->count, ARCH_CONVERT)*sizeof(*entry))+sizeof(*hdr))); + entry = &leaf->entries[index]; + ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT) >= INT_GET(hdr->firstused, ARCH_CONVERT)); + ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT) < XFS_LBSIZE(mp)); + + /* + * Scan through free region table: + * check for adjacency of free'd entry with an existing one, + * find smallest free region in case we need to replace it, + * adjust any map that borders the entry table, + */ + tablesize = INT_GET(hdr->count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t) + + (uint)sizeof(xfs_dir_leaf_hdr_t); + map = &hdr->freemap[0]; + tmp = INT_GET(map->size, ARCH_CONVERT); + before = after = -1; + smallest = XFS_DIR_LEAF_MAPSIZE - 1; + entsize = XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry); + for (i = 0; i < XFS_DIR_LEAF_MAPSIZE; map++, i++) { + ASSERT(INT_GET(map->base, ARCH_CONVERT) < XFS_LBSIZE(mp)); + ASSERT(INT_GET(map->size, ARCH_CONVERT) < XFS_LBSIZE(mp)); + if (INT_GET(map->base, ARCH_CONVERT) == tablesize) { + INT_MOD(map->base, ARCH_CONVERT, -((uint)sizeof(xfs_dir_leaf_entry_t))); + INT_MOD(map->size, ARCH_CONVERT, (uint)sizeof(xfs_dir_leaf_entry_t)); + } + + if ((INT_GET(map->base, ARCH_CONVERT) + INT_GET(map->size, ARCH_CONVERT)) == INT_GET(entry->nameidx, ARCH_CONVERT)) { + before = i; + } else if (INT_GET(map->base, ARCH_CONVERT) == (INT_GET(entry->nameidx, ARCH_CONVERT) + entsize)) { + after = i; + } else if (INT_GET(map->size, ARCH_CONVERT) < tmp) { + tmp = INT_GET(map->size, ARCH_CONVERT); + smallest = i; + } + } + + /* + * Coalesce adjacent freemap regions, + * or replace the smallest region. + */ + if ((before >= 0) || (after >= 0)) { + if ((before >= 0) && (after >= 0)) { + map = &hdr->freemap[before]; + INT_MOD(map->size, ARCH_CONVERT, entsize); + INT_MOD(map->size, ARCH_CONVERT, INT_GET(hdr->freemap[after].size, ARCH_CONVERT)); + INT_ZERO(hdr->freemap[after].base, ARCH_CONVERT); + INT_ZERO(hdr->freemap[after].size, ARCH_CONVERT); + } else if (before >= 0) { + map = &hdr->freemap[before]; + INT_MOD(map->size, ARCH_CONVERT, entsize); + } else { + map = &hdr->freemap[after]; + INT_COPY(map->base, entry->nameidx, ARCH_CONVERT); + INT_MOD(map->size, ARCH_CONVERT, entsize); + } + } else { + /* + * Replace smallest region (if it is smaller than free'd entry) + */ + map = &hdr->freemap[smallest]; + if (INT_GET(map->size, ARCH_CONVERT) < entsize) { + INT_COPY(map->base, entry->nameidx, ARCH_CONVERT); + INT_SET(map->size, ARCH_CONVERT, entsize); + } + } + + /* + * Did we remove the first entry? + */ + if (INT_GET(entry->nameidx, ARCH_CONVERT) == INT_GET(hdr->firstused, ARCH_CONVERT)) + smallest = 1; + else + smallest = 0; + + /* + * Compress the remaining entries and zero out the removed stuff. + */ + namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT)); + bzero((char *)namest, entsize); + xfs_da_log_buf(trans, bp, XFS_DA_LOGRANGE(leaf, namest, entsize)); + + INT_MOD(hdr->namebytes, ARCH_CONVERT, -(entry->namelen)); + tmp = (INT_GET(hdr->count, ARCH_CONVERT) - index) * (uint)sizeof(xfs_dir_leaf_entry_t); + ovbcopy(entry + 1, entry, tmp); + INT_MOD(hdr->count, ARCH_CONVERT, -1); + xfs_da_log_buf(trans, bp, + XFS_DA_LOGRANGE(leaf, entry, tmp + (uint)sizeof(*entry))); + entry = &leaf->entries[INT_GET(hdr->count, ARCH_CONVERT)]; + bzero((char *)entry, sizeof(xfs_dir_leaf_entry_t)); + + /* + * If we removed the first entry, re-find the first used byte + * in the name area. Note that if the entry was the "firstused", + * then we don't have a "hole" in our block resulting from + * removing the name. + */ + if (smallest) { + tmp = XFS_LBSIZE(mp); + entry = &leaf->entries[0]; + for (i = INT_GET(hdr->count, ARCH_CONVERT)-1; i >= 0; entry++, i--) { + ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT) >= INT_GET(hdr->firstused, ARCH_CONVERT)); + ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT) < XFS_LBSIZE(mp)); + if (INT_GET(entry->nameidx, ARCH_CONVERT) < tmp) + tmp = INT_GET(entry->nameidx, ARCH_CONVERT); + } + INT_SET(hdr->firstused, ARCH_CONVERT, tmp); + if (INT_GET(hdr->firstused, ARCH_CONVERT) == 0) + INT_SET(hdr->firstused, ARCH_CONVERT, tmp - 1); + } else { + hdr->holes = 1; /* mark as needing compaction */ + } + + xfs_da_log_buf(trans, bp, XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr))); + + /* + * Check if leaf is less than 50% full, caller may want to + * "join" the leaf with a sibling if so. + */ + tmp = (uint)sizeof(xfs_dir_leaf_hdr_t); + tmp += INT_GET(leaf->hdr.count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t); + tmp += INT_GET(leaf->hdr.count, ARCH_CONVERT) * ((uint)sizeof(xfs_dir_leaf_name_t) - 1); + tmp += INT_GET(leaf->hdr.namebytes, ARCH_CONVERT); + if (tmp < mp->m_dir_magicpct) + return(1); /* leaf is < 37% full */ + return(0); +} + +/* + * Move all the directory entries from drop_leaf into save_leaf. + */ +void +xfs_dir_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, + xfs_da_state_blk_t *save_blk) +{ + xfs_dir_leafblock_t *drop_leaf, *save_leaf, *tmp_leaf; + xfs_dir_leaf_hdr_t *drop_hdr, *save_hdr, *tmp_hdr; + xfs_mount_t *mp; + char *tmpbuffer; + + /* + * Set up environment. + */ + mp = state->mp; + ASSERT(drop_blk->magic == XFS_DIR_LEAF_MAGIC); + ASSERT(save_blk->magic == XFS_DIR_LEAF_MAGIC); + drop_leaf = drop_blk->bp->data; + save_leaf = save_blk->bp->data; + ASSERT(INT_GET(drop_leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC); + ASSERT(INT_GET(save_leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC); + drop_hdr = &drop_leaf->hdr; + save_hdr = &save_leaf->hdr; + + /* + * Save last hashval from dying block for later Btree fixup. + */ + drop_blk->hashval = INT_GET(drop_leaf->entries[ drop_leaf->hdr.count-1 ].hashval, ARCH_CONVERT); + + /* + * Check if we need a temp buffer, or can we do it in place. + * Note that we don't check "leaf" for holes because we will + * always be dropping it, toosmall() decided that for us already. + */ + if (save_hdr->holes == 0) { + /* + * dest leaf has no holes, so we add there. May need + * to make some room in the entry array. + */ + if (xfs_dir_leaf_order(save_blk->bp, drop_blk->bp)) { + xfs_dir_leaf_moveents(drop_leaf, 0, save_leaf, 0, + (int)INT_GET(drop_hdr->count, ARCH_CONVERT), mp); + } else { + xfs_dir_leaf_moveents(drop_leaf, 0, + save_leaf, INT_GET(save_hdr->count, ARCH_CONVERT), + (int)INT_GET(drop_hdr->count, ARCH_CONVERT), mp); + } + } else { + /* + * Destination has holes, so we make a temporary copy + * of the leaf and add them both to that. + */ + tmpbuffer = kmem_alloc(state->blocksize, KM_SLEEP); + ASSERT(tmpbuffer != NULL); + bzero(tmpbuffer, state->blocksize); + tmp_leaf = (xfs_dir_leafblock_t *)tmpbuffer; + tmp_hdr = &tmp_leaf->hdr; + tmp_hdr->info = save_hdr->info; /* struct copy */ + INT_ZERO(tmp_hdr->count, ARCH_CONVERT); + INT_SET(tmp_hdr->firstused, ARCH_CONVERT, state->blocksize); + if (INT_GET(tmp_hdr->firstused, ARCH_CONVERT) == 0) + INT_SET(tmp_hdr->firstused, ARCH_CONVERT, state->blocksize - 1); + INT_ZERO(tmp_hdr->namebytes, ARCH_CONVERT); + if (xfs_dir_leaf_order(save_blk->bp, drop_blk->bp)) { + xfs_dir_leaf_moveents(drop_leaf, 0, tmp_leaf, 0, + (int)INT_GET(drop_hdr->count, ARCH_CONVERT), mp); + xfs_dir_leaf_moveents(save_leaf, 0, + tmp_leaf, INT_GET(tmp_leaf->hdr.count, ARCH_CONVERT), + (int)INT_GET(save_hdr->count, ARCH_CONVERT), mp); + } else { + xfs_dir_leaf_moveents(save_leaf, 0, tmp_leaf, 0, + (int)INT_GET(save_hdr->count, ARCH_CONVERT), mp); + xfs_dir_leaf_moveents(drop_leaf, 0, + tmp_leaf, INT_GET(tmp_leaf->hdr.count, ARCH_CONVERT), + (int)INT_GET(drop_hdr->count, ARCH_CONVERT), mp); + } + bcopy(tmp_leaf, save_leaf, state->blocksize); + kmem_free(tmpbuffer, state->blocksize); + } + + xfs_da_log_buf(state->args->trans, save_blk->bp, 0, + state->blocksize - 1); + + /* + * Copy out last hashval in each block for B-tree code. + */ + save_blk->hashval = INT_GET(save_leaf->entries[ INT_GET(save_leaf->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT); +} + + +/*======================================================================== + * Routines used for finding things in the Btree. + *========================================================================*/ + +/* + * Look up a name in a leaf directory structure. + * This is the internal routine, it uses the caller's buffer. + * + * Note that duplicate keys are allowed, but only check within the + * current leaf node. The Btree code must check in adjacent leaf nodes. + * + * Return in *index the index into the entry[] array of either the found + * entry, or where the entry should have been (insert before that entry). + * + * Don't change the args->inumber unless we find the filename. + */ +int +xfs_dir_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args, int *index) +{ + xfs_dir_leafblock_t *leaf; + xfs_dir_leaf_entry_t *entry; + xfs_dir_leaf_name_t *namest; + int probe, span; + xfs_dahash_t hashval; + + leaf = bp->data; + ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC); + ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT) < (XFS_LBSIZE(args->dp->i_mount)/8)); + + /* + * Binary search. (note: small blocks will skip this loop) + */ + hashval = args->hashval; + probe = span = INT_GET(leaf->hdr.count, ARCH_CONVERT) / 2; + for (entry = &leaf->entries[probe]; span > 4; + entry = &leaf->entries[probe]) { + span /= 2; + if (INT_GET(entry->hashval, ARCH_CONVERT) < hashval) + probe += span; + else if (INT_GET(entry->hashval, ARCH_CONVERT) > hashval) + probe -= span; + else + break; + } + ASSERT((probe >= 0) && \ + ((INT_GET(leaf->hdr.count, ARCH_CONVERT) == 0) || (probe < INT_GET(leaf->hdr.count, ARCH_CONVERT)))); + ASSERT((span <= 4) || (INT_GET(entry->hashval, ARCH_CONVERT) == hashval)); + + /* + * Since we may have duplicate hashval's, find the first matching + * hashval in the leaf. + */ + while ((probe > 0) && (INT_GET(entry->hashval, ARCH_CONVERT) >= hashval)) { + entry--; + probe--; + } + while ((probe < INT_GET(leaf->hdr.count, ARCH_CONVERT)) && (INT_GET(entry->hashval, ARCH_CONVERT) < hashval)) { + entry++; + probe++; + } + if ((probe == INT_GET(leaf->hdr.count, ARCH_CONVERT)) || (INT_GET(entry->hashval, ARCH_CONVERT) != hashval)) { + *index = probe; + ASSERT(args->oknoent); + return(XFS_ERROR(ENOENT)); + } + + /* + * Duplicate keys may be present, so search all of them for a match. + */ + while ((probe < INT_GET(leaf->hdr.count, ARCH_CONVERT)) && (INT_GET(entry->hashval, ARCH_CONVERT) == hashval)) { + namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT)); + if (entry->namelen == args->namelen && + namest->name[0] == args->name[0] && + bcmp(args->name, namest->name, args->namelen) == 0) { + XFS_DIR_SF_GET_DIRINO_ARCH(&namest->inumber, &args->inumber, ARCH_CONVERT); + *index = probe; + return(XFS_ERROR(EEXIST)); + } + entry++; + probe++; + } + *index = probe; + ASSERT(probe == INT_GET(leaf->hdr.count, ARCH_CONVERT) || args->oknoent); + return(XFS_ERROR(ENOENT)); +} + +/*======================================================================== + * Utility routines. + *========================================================================*/ + +/* + * Move the indicated entries from one leaf to another. + * NOTE: this routine modifies both source and destination leaves. + */ +/* ARGSUSED */ +STATIC void +xfs_dir_leaf_moveents(xfs_dir_leafblock_t *leaf_s, int start_s, + xfs_dir_leafblock_t *leaf_d, int start_d, + int count, xfs_mount_t *mp) +{ + xfs_dir_leaf_hdr_t *hdr_s, *hdr_d; + xfs_dir_leaf_entry_t *entry_s, *entry_d; + int tmp, i; + + /* + * Check for nothing to do. + */ + if (count == 0) + return; + + /* + * Set up environment. + */ + ASSERT(INT_GET(leaf_s->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC); + ASSERT(INT_GET(leaf_d->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC); + hdr_s = &leaf_s->hdr; + hdr_d = &leaf_d->hdr; + ASSERT((INT_GET(hdr_s->count, ARCH_CONVERT) > 0) && (INT_GET(hdr_s->count, ARCH_CONVERT) < (XFS_LBSIZE(mp)/8))); + ASSERT(INT_GET(hdr_s->firstused, ARCH_CONVERT) >= + ((INT_GET(hdr_s->count, ARCH_CONVERT)*sizeof(*entry_s))+sizeof(*hdr_s))); + ASSERT(INT_GET(hdr_d->count, ARCH_CONVERT) < (XFS_LBSIZE(mp)/8)); + ASSERT(INT_GET(hdr_d->firstused, ARCH_CONVERT) >= + ((INT_GET(hdr_d->count, ARCH_CONVERT)*sizeof(*entry_d))+sizeof(*hdr_d))); + + ASSERT(start_s < INT_GET(hdr_s->count, ARCH_CONVERT)); + ASSERT(start_d <= INT_GET(hdr_d->count, ARCH_CONVERT)); + ASSERT(count <= INT_GET(hdr_s->count, ARCH_CONVERT)); + + /* + * Move the entries in the destination leaf up to make a hole? + */ + if (start_d < INT_GET(hdr_d->count, ARCH_CONVERT)) { + tmp = INT_GET(hdr_d->count, ARCH_CONVERT) - start_d; + tmp *= (uint)sizeof(xfs_dir_leaf_entry_t); + entry_s = &leaf_d->entries[start_d]; + entry_d = &leaf_d->entries[start_d + count]; + bcopy(entry_s, entry_d, tmp); + } + + /* + * Copy all entry's in the same (sorted) order, + * but allocate filenames packed and in sequence. + */ + entry_s = &leaf_s->entries[start_s]; + entry_d = &leaf_d->entries[start_d]; + for (i = 0; i < count; entry_s++, entry_d++, i++) { + ASSERT(INT_GET(entry_s->nameidx, ARCH_CONVERT) >= INT_GET(hdr_s->firstused, ARCH_CONVERT)); + ASSERT(entry_s->namelen < MAXNAMELEN); + tmp = XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry_s); + INT_MOD(hdr_d->firstused, ARCH_CONVERT, -(tmp)); + entry_d->hashval = entry_s->hashval; /* INT_: direct copy */ + INT_COPY(entry_d->nameidx, hdr_d->firstused, ARCH_CONVERT); + entry_d->namelen = entry_s->namelen; + ASSERT(INT_GET(entry_d->nameidx, ARCH_CONVERT) + tmp <= XFS_LBSIZE(mp)); + bcopy(XFS_DIR_LEAF_NAMESTRUCT(leaf_s, INT_GET(entry_s->nameidx, ARCH_CONVERT)), + XFS_DIR_LEAF_NAMESTRUCT(leaf_d, INT_GET(entry_d->nameidx, ARCH_CONVERT)), tmp); + ASSERT(INT_GET(entry_s->nameidx, ARCH_CONVERT) + tmp <= XFS_LBSIZE(mp)); + bzero((char *)XFS_DIR_LEAF_NAMESTRUCT(leaf_s, INT_GET(entry_s->nameidx, ARCH_CONVERT)), + tmp); + INT_MOD(hdr_s->namebytes, ARCH_CONVERT, -(entry_d->namelen)); + INT_MOD(hdr_d->namebytes, ARCH_CONVERT, entry_d->namelen); + INT_MOD(hdr_s->count, ARCH_CONVERT, -1); + INT_MOD(hdr_d->count, ARCH_CONVERT, +1); + tmp = INT_GET(hdr_d->count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t) + + (uint)sizeof(xfs_dir_leaf_hdr_t); + ASSERT(INT_GET(hdr_d->firstused, ARCH_CONVERT) >= tmp); + + } + + /* + * Zero out the entries we just copied. + */ + if (start_s == INT_GET(hdr_s->count, ARCH_CONVERT)) { + tmp = count * (uint)sizeof(xfs_dir_leaf_entry_t); + entry_s = &leaf_s->entries[start_s]; + ASSERT((char *)entry_s + tmp <= (char *)leaf_s + XFS_LBSIZE(mp)); + bzero((char *)entry_s, tmp); + } else { + /* + * Move the remaining entries down to fill the hole, + * then zero the entries at the top. + */ + tmp = INT_GET(hdr_s->count, ARCH_CONVERT) - count; + tmp *= (uint)sizeof(xfs_dir_leaf_entry_t); + entry_s = &leaf_s->entries[start_s + count]; + entry_d = &leaf_s->entries[start_s]; + bcopy(entry_s, entry_d, tmp); + + tmp = count * (uint)sizeof(xfs_dir_leaf_entry_t); + entry_s = &leaf_s->entries[INT_GET(hdr_s->count, ARCH_CONVERT)]; + ASSERT((char *)entry_s + tmp <= (char *)leaf_s + XFS_LBSIZE(mp)); + bzero((char *)entry_s, tmp); + } + + /* + * Fill in the freemap information + */ + INT_SET(hdr_d->freemap[0].base, ARCH_CONVERT, (uint)sizeof(xfs_dir_leaf_hdr_t)); + INT_MOD(hdr_d->freemap[0].base, ARCH_CONVERT, INT_GET(hdr_d->count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t)); + INT_SET(hdr_d->freemap[0].size, ARCH_CONVERT, INT_GET(hdr_d->firstused, ARCH_CONVERT) - INT_GET(hdr_d->freemap[0].base, ARCH_CONVERT)); + INT_SET(hdr_d->freemap[1].base, ARCH_CONVERT, INT_ZERO(hdr_d->freemap[2].base, ARCH_CONVERT)); + INT_SET(hdr_d->freemap[1].size, ARCH_CONVERT, INT_ZERO(hdr_d->freemap[2].size, ARCH_CONVERT)); + hdr_s->holes = 1; /* leaf may not be compact */ +} + +/* + * Compare two leaf blocks "order". + */ +int +xfs_dir_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp) +{ + xfs_dir_leafblock_t *leaf1, *leaf2; + + leaf1 = leaf1_bp->data; + leaf2 = leaf2_bp->data; + ASSERT((INT_GET(leaf1->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC) && + (INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC)); + if ((INT_GET(leaf1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(leaf2->hdr.count, ARCH_CONVERT) > 0) && + ((INT_GET(leaf2->entries[ 0 ].hashval, ARCH_CONVERT) < + INT_GET(leaf1->entries[ 0 ].hashval, ARCH_CONVERT)) || + (INT_GET(leaf2->entries[ INT_GET(leaf2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) < + INT_GET(leaf1->entries[ INT_GET(leaf1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))) { + return(1); + } + return(0); +} + +/* + * Pick up the last hashvalue from a leaf block. + */ +xfs_dahash_t +xfs_dir_leaf_lasthash(xfs_dabuf_t *bp, int *count) +{ + xfs_dir_leafblock_t *leaf; + + leaf = bp->data; + ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC); + if (count) + *count = INT_GET(leaf->hdr.count, ARCH_CONVERT); + if (INT_GET(leaf->hdr.count, ARCH_CONVERT) == 0) + return(0); + return(INT_GET(leaf->entries[ INT_GET(leaf->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)); +} diff --git a/libxfs/xfs_ialloc.c b/libxfs/xfs_ialloc.c new file mode 100644 index 000000000..92e2818cb --- /dev/null +++ b/libxfs/xfs_ialloc.c @@ -0,0 +1,1113 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include + +/* + * Internal functions. + */ + +/* + * Log specified fields for the inode given by bp and off. + */ +STATIC void +xfs_ialloc_log_di( + xfs_trans_t *tp, /* transaction pointer */ + xfs_buf_t *bp, /* inode buffer */ + int off, /* index of inode in buffer */ + int fields) /* bitmask of fields to log */ +{ + int first; /* first byte number */ + int ioffset; /* off in bytes */ + int last; /* last byte number */ + xfs_mount_t *mp; /* mount point structure */ + static const short offsets[] = { /* field offsets */ + /* keep in sync with bits */ + offsetof(xfs_dinode_core_t, di_magic), + offsetof(xfs_dinode_core_t, di_mode), + offsetof(xfs_dinode_core_t, di_version), + offsetof(xfs_dinode_core_t, di_format), + offsetof(xfs_dinode_core_t, di_onlink), + offsetof(xfs_dinode_core_t, di_uid), + offsetof(xfs_dinode_core_t, di_gid), + offsetof(xfs_dinode_core_t, di_nlink), + offsetof(xfs_dinode_core_t, di_projid), + offsetof(xfs_dinode_core_t, di_pad), + offsetof(xfs_dinode_core_t, di_atime), + offsetof(xfs_dinode_core_t, di_mtime), + offsetof(xfs_dinode_core_t, di_ctime), + offsetof(xfs_dinode_core_t, di_size), + offsetof(xfs_dinode_core_t, di_nblocks), + offsetof(xfs_dinode_core_t, di_extsize), + offsetof(xfs_dinode_core_t, di_nextents), + offsetof(xfs_dinode_core_t, di_anextents), + offsetof(xfs_dinode_core_t, di_forkoff), + offsetof(xfs_dinode_core_t, di_aformat), + offsetof(xfs_dinode_core_t, di_dmevmask), + offsetof(xfs_dinode_core_t, di_dmstate), + offsetof(xfs_dinode_core_t, di_flags), + offsetof(xfs_dinode_core_t, di_gen), + offsetof(xfs_dinode_t, di_next_unlinked), + offsetof(xfs_dinode_t, di_u), + offsetof(xfs_dinode_t, di_a), + sizeof(xfs_dinode_t) + }; + + + ASSERT(offsetof(xfs_dinode_t, di_core) == 0); + ASSERT((fields & (XFS_DI_U|XFS_DI_A)) == 0); + mp = tp->t_mountp; + /* + * Get the inode-relative first and last bytes for these fields + */ + xfs_btree_offsets(fields, offsets, XFS_DI_NUM_BITS, &first, &last); + /* + * Convert to buffer offsets and log it. + */ + ioffset = off << mp->m_sb.sb_inodelog; + first += ioffset; + last += ioffset; + xfs_trans_log_buf(tp, bp, first, last); +} + +/* + * Allocation group level functions. + */ + +/* + * Allocate new inodes in the allocation group specified by agbp. + * Return 0 for success, else error code. + */ +STATIC int /* error code or 0 */ +xfs_ialloc_ag_alloc( + xfs_trans_t *tp, /* transaction pointer */ + xfs_buf_t *agbp, /* alloc group buffer */ + int *alloc) +{ + xfs_agi_t *agi; /* allocation group header */ + xfs_alloc_arg_t args; /* allocation argument structure */ + int blks_per_cluster; /* fs blocks per inode cluster */ + xfs_btree_cur_t *cur; /* inode btree cursor */ + xfs_daddr_t d; /* disk addr of buffer */ + int error; + xfs_buf_t *fbuf; /* new free inodes' buffer */ + xfs_dinode_t *free; /* new free inode structure */ + int i; /* inode counter */ + int j; /* block counter */ + int nbufs; /* num bufs of new inodes */ + xfs_agino_t newino; /* new first inode's number */ + xfs_agino_t newlen; /* new number of inodes */ + int ninodes; /* num inodes per buf */ + xfs_agino_t thisino; /* current inode number, for loop */ + int version; /* inode version number to use */ + static xfs_timestamp_t ztime; /* zero xfs timestamp */ + int isaligned; /* inode allocation at stripe unit */ + /* boundary */ + xfs_dinode_core_t dic; /* a dinode_core to copy to new */ + /* inodes */ + + args.tp = tp; + args.mp = tp->t_mountp; + + /* + * Locking will ensure that we don't have two callers in here + * at one time. + */ + newlen = XFS_IALLOC_INODES(args.mp); + if (args.mp->m_maxicount && + args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount) + return XFS_ERROR(ENOSPC); + args.minlen = args.maxlen = XFS_IALLOC_BLOCKS(args.mp); + /* + * Set the alignment for the allocation. + * If stripe alignment is turned on then align at stripe unit + * boundary. + * If the cluster size is smaller than a filesystem block + * then we're doing I/O for inodes in filesystem block size pieces, + * so don't need alignment anyway. + */ + isaligned = 0; + if (args.mp->m_sinoalign) { + ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN)); + args.alignment = args.mp->m_dalign; + isaligned = 1; + } else if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) && + args.mp->m_sb.sb_inoalignmt >= + XFS_B_TO_FSBT(args.mp, XFS_INODE_CLUSTER_SIZE(args.mp))) + args.alignment = args.mp->m_sb.sb_inoalignmt; + else + args.alignment = 1; + agi = XFS_BUF_TO_AGI(agbp); + /* + * Need to figure out where to allocate the inode blocks. + * Ideally they should be spaced out through the a.g. + * For now, just allocate blocks up front. + */ + args.agbno = INT_GET(agi->agi_root, ARCH_CONVERT); + args.fsbno = XFS_AGB_TO_FSB(args.mp, INT_GET(agi->agi_seqno, ARCH_CONVERT), + args.agbno); + /* + * Allocate a fixed-size extent of inodes. + */ + args.type = XFS_ALLOCTYPE_NEAR_BNO; + args.mod = args.total = args.wasdel = args.isfl = args.userdata = + args.minalignslop = 0; + args.prod = 1; + /* + * Allow space for the inode btree to split. + */ + args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1; + if (error = xfs_alloc_vextent(&args)) + return error; + + /* + * If stripe alignment is turned on, then try again with cluster + * alignment. + */ + if (isaligned && args.fsbno == NULLFSBLOCK) { + args.type = XFS_ALLOCTYPE_NEAR_BNO; + args.agbno = INT_GET(agi->agi_root, ARCH_CONVERT); + args.fsbno = XFS_AGB_TO_FSB(args.mp, + INT_GET(agi->agi_seqno, ARCH_CONVERT), args.agbno); + if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) && + args.mp->m_sb.sb_inoalignmt >= + XFS_B_TO_FSBT(args.mp, XFS_INODE_CLUSTER_SIZE(args.mp))) + args.alignment = args.mp->m_sb.sb_inoalignmt; + else + args.alignment = 1; + if (error = xfs_alloc_vextent(&args)) + return error; + } + + if (args.fsbno == NULLFSBLOCK) { + *alloc = 0; + return 0; + } + ASSERT(args.len == args.minlen); + /* + * Convert the results. + */ + newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0); + /* + * Loop over the new block(s), filling in the inodes. + * For small block sizes, manipulate the inodes in buffers + * which are multiples of the blocks size. + */ + if (args.mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(args.mp)) { + blks_per_cluster = 1; + nbufs = (int)args.len; + ninodes = args.mp->m_sb.sb_inopblock; + } else { + blks_per_cluster = XFS_INODE_CLUSTER_SIZE(args.mp) / + args.mp->m_sb.sb_blocksize; + nbufs = (int)args.len / blks_per_cluster; + ninodes = blks_per_cluster * args.mp->m_sb.sb_inopblock; + } + /* + * Figure out what version number to use in the inodes we create. + * If the superblock version has caught up to the one that supports + * the new inode format, then use the new inode version. Otherwise + * use the old version so that old kernels will continue to be + * able to use the file system. + */ + if (XFS_SB_VERSION_HASNLINK(&args.mp->m_sb)) + version = XFS_DINODE_VERSION_2; + else + version = XFS_DINODE_VERSION_1; + for (j = 0; j < nbufs; j++) { + /* + * Get the block. + */ + d = XFS_AGB_TO_DADDR(args.mp, INT_GET(agi->agi_seqno, ARCH_CONVERT), + args.agbno + (j * blks_per_cluster)); + fbuf = xfs_trans_get_buf(tp, args.mp->m_ddev_targp, d, + args.mp->m_bsize * blks_per_cluster, + XFS_BUF_LOCK); + ASSERT(fbuf); + ASSERT(!XFS_BUF_GETERROR(fbuf)); + /* + * Loop over the inodes in this buffer. + */ + INT_SET(dic.di_magic, ARCH_CONVERT, XFS_DINODE_MAGIC); + INT_ZERO(dic.di_mode, ARCH_CONVERT); + INT_SET(dic.di_version, ARCH_CONVERT, version); + INT_ZERO(dic.di_format, ARCH_CONVERT); + INT_ZERO(dic.di_onlink, ARCH_CONVERT); + INT_ZERO(dic.di_uid, ARCH_CONVERT); + INT_ZERO(dic.di_gid, ARCH_CONVERT); + INT_ZERO(dic.di_nlink, ARCH_CONVERT); + INT_ZERO(dic.di_projid, ARCH_CONVERT); + bzero(&(dic.di_pad[0]),sizeof(dic.di_pad)); + INT_SET(dic.di_atime.t_sec, ARCH_CONVERT, ztime.t_sec); + INT_SET(dic.di_atime.t_nsec, ARCH_CONVERT, ztime.t_nsec); + + INT_SET(dic.di_mtime.t_sec, ARCH_CONVERT, ztime.t_sec); + INT_SET(dic.di_mtime.t_nsec, ARCH_CONVERT, ztime.t_nsec); + + INT_SET(dic.di_ctime.t_sec, ARCH_CONVERT, ztime.t_sec); + INT_SET(dic.di_ctime.t_nsec, ARCH_CONVERT, ztime.t_nsec); + + INT_ZERO(dic.di_size, ARCH_CONVERT); + INT_ZERO(dic.di_nblocks, ARCH_CONVERT); + INT_ZERO(dic.di_extsize, ARCH_CONVERT); + INT_ZERO(dic.di_nextents, ARCH_CONVERT); + INT_ZERO(dic.di_anextents, ARCH_CONVERT); + INT_ZERO(dic.di_forkoff, ARCH_CONVERT); + INT_ZERO(dic.di_aformat, ARCH_CONVERT); + INT_ZERO(dic.di_dmevmask, ARCH_CONVERT); + INT_ZERO(dic.di_dmstate, ARCH_CONVERT); + INT_ZERO(dic.di_flags, ARCH_CONVERT); + INT_ZERO(dic.di_gen, ARCH_CONVERT); + + for (i = 0; i < ninodes; i++) { + free = XFS_MAKE_IPTR(args.mp, fbuf, i); + bcopy (&dic, &(free->di_core), sizeof(xfs_dinode_core_t)); + INT_SET(free->di_next_unlinked, ARCH_CONVERT, NULLAGINO); + xfs_ialloc_log_di(tp, fbuf, i, + XFS_DI_CORE_BITS | XFS_DI_NEXT_UNLINKED); + } + xfs_trans_inode_alloc_buf(tp, fbuf); + } + INT_MOD(agi->agi_count, ARCH_CONVERT, newlen); + INT_MOD(agi->agi_freecount, ARCH_CONVERT, newlen); + mraccess(&args.mp->m_peraglock); + args.mp->m_perag[INT_GET(agi->agi_seqno, ARCH_CONVERT)].pagi_freecount += newlen; + mraccunlock(&args.mp->m_peraglock); + INT_SET(agi->agi_newino, ARCH_CONVERT, newino); + /* + * Insert records describing the new inode chunk into the btree. + */ + cur = xfs_btree_init_cursor(args.mp, tp, agbp, + INT_GET(agi->agi_seqno, ARCH_CONVERT), + XFS_BTNUM_INO, (xfs_inode_t *)0, 0); + for (thisino = newino; + thisino < newino + newlen; + thisino += XFS_INODES_PER_CHUNK) { + if (error = xfs_inobt_lookup_eq(cur, thisino, + XFS_INODES_PER_CHUNK, XFS_INOBT_ALL_FREE, &i)) { + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + return error; + } + ASSERT(i == 0); + if (error = xfs_inobt_insert(cur, &i)) { + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + return error; + } + ASSERT(i == 1); + } + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + /* + * Log allocation group header fields + */ + xfs_ialloc_log_agi(tp, agbp, + XFS_AGI_COUNT | XFS_AGI_FREECOUNT | XFS_AGI_NEWINO); + /* + * Modify/log superblock values for inode count and inode free count. + */ + xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen); + xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen); + *alloc = 1; + return 0; +} + +/* + * Select an allocation group to look for a free inode in, based on the parent + * inode and then mode. Return the allocation group buffer. + */ +STATIC xfs_buf_t * /* allocation group buffer */ +xfs_ialloc_ag_select( + xfs_trans_t *tp, /* transaction pointer */ + xfs_ino_t parent, /* parent directory inode number */ + mode_t mode, /* bits set to indicate file type */ + int okalloc) /* ok to allocate more space */ +{ + xfs_buf_t *agbp; /* allocation group header buffer */ + xfs_agnumber_t agcount; /* number of ag's in the filesystem */ + xfs_agnumber_t agno; /* current ag number */ + int flags; /* alloc buffer locking flags */ + xfs_extlen_t ineed; /* blocks needed for inode allocation */ + xfs_extlen_t longest; /* longest extent available */ + xfs_mount_t *mp; /* mount point structure */ + int needspace; /* file mode implies space allocated */ + xfs_perag_t *pag; /* per allocation group data */ + xfs_agnumber_t pagno; /* parent (starting) ag number */ + + /* + * Files of these types need at least one block if length > 0 + * (and they won't fit in the inode, but that's hard to figure out). + */ + needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode); + mp = tp->t_mountp; + agcount = mp->m_sb.sb_agcount; + if (S_ISDIR(mode)) + pagno = atomicIncWithWrap((int *)&mp->m_agirotor, agcount); + else + pagno = XFS_INO_TO_AGNO(mp, parent); + ASSERT(pagno < agcount); + /* + * Loop through allocation groups, looking for one with a little + * free space in it. Note we don't look for free inodes, exactly. + * Instead, we include whether there is a need to allocate inodes + * to mean that blocks must be allocated for them, + * if none are currently free. + */ + agno = pagno; + flags = XFS_ALLOC_FLAG_TRYLOCK; + for (;;) { + mraccess(&mp->m_peraglock); + pag = &mp->m_perag[agno]; + if (!pag->pagi_init) { + if (xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { + agbp = NULL; + mraccunlock(&mp->m_peraglock); + goto nextag; + } + } else + agbp = NULL; + /* + * Is there enough free space for the file plus a block + * of inodes (if we need to allocate some)? + */ + ineed = pag->pagi_freecount ? 0 : XFS_IALLOC_BLOCKS(mp); + if (ineed && !pag->pagf_init) { + if (agbp == NULL && + xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { + agbp = NULL; + mraccunlock(&mp->m_peraglock); + goto nextag; + } + (void)xfs_alloc_pagf_init(mp, tp, agno, flags); + } + if (!ineed || pag->pagf_init) { + if (ineed && !(longest = pag->pagf_longest)) + longest = pag->pagf_flcount > 0; + if (!ineed || + (pag->pagf_freeblks >= needspace + ineed && + longest >= ineed && + okalloc)) { + if (agbp == NULL && + xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { + agbp = NULL; + mraccunlock(&mp->m_peraglock); + goto nextag; + } + mraccunlock(&mp->m_peraglock); + return agbp; + } + } + mraccunlock(&mp->m_peraglock); + if (agbp) + xfs_trans_brelse(tp, agbp); +nextag: + /* + * No point in iterating over the rest, if we're shutting + * down. + */ + if (XFS_FORCED_SHUTDOWN(mp)) + return (xfs_buf_t *)0; + agno++; + if (agno == agcount) + agno = 0; + if (agno == pagno) { + if (flags == 0) + return (xfs_buf_t *)0; + flags = 0; + } + } +} + +/* + * Visible inode allocation functions. + */ + +/* + * Allocate an inode on disk. + * Mode is used to tell whether the new inode will need space, and whether + * it is a directory. + * + * The arguments IO_agbp and alloc_done are defined to work within + * the constraint of one allocation per transaction. + * xfs_dialloc() is designed to be called twice if it has to do an + * allocation to make more free inodes. On the first call, + * IO_agbp should be set to NULL. If an inode is available, + * i.e., xfs_dialloc() did not need to do an allocation, an inode + * number is returned. In this case, IO_agbp would be set to the + * current ag_buf and alloc_done set to false. + * If an allocation needed to be done, xfs_dialloc would return + * the current ag_buf in IO_agbp and set alloc_done to true. + * The caller should then commit the current transaction, allocate a new + * transaction, and call xfs_dialloc() again, passing in the previous + * value of IO_agbp. IO_agbp should be held across the transactions. + * Since the agbp is locked across the two calls, the second call is + * guaranteed to have a free inode available. + * + * Once we successfully pick an inode its number is returned and the + * on-disk data structures are updated. The inode itself is not read + * in, since doing so would break ordering constraints with xfs_reclaim. + */ +int +xfs_dialloc( + xfs_trans_t *tp, /* transaction pointer */ + xfs_ino_t parent, /* parent inode (directory) */ + mode_t mode, /* mode bits for new inode */ + int okalloc, /* ok to allocate more space */ + xfs_buf_t **IO_agbp, /* in/out ag header's buffer */ + boolean_t *alloc_done, /* true if we needed to replenish + inode freelist */ + xfs_ino_t *inop) /* inode number allocated */ +{ + xfs_agnumber_t agcount; /* number of allocation groups */ + xfs_buf_t *agbp; /* allocation group header's buffer */ + xfs_agnumber_t agno; /* allocation group number */ + xfs_agi_t *agi; /* allocation group header structure */ + xfs_btree_cur_t *cur; /* inode allocation btree cursor */ + int error; /* error return value */ + int i; /* result code */ + int ialloced; /* inode allocation status */ + int noroom = 0; /* no space for inode blk allocation */ + xfs_ino_t ino; /* fs-relative inode to be returned */ + /* REFERENCED */ + int j; /* result code */ + xfs_mount_t *mp; /* file system mount structure */ + int offset; /* index of inode in chunk */ + xfs_agino_t pagino; /* parent's a.g. relative inode # */ + xfs_agnumber_t pagno; /* parent's allocation group number */ + xfs_inobt_rec_t rec; /* inode allocation record */ + xfs_agnumber_t tagno; /* testing allocation group number */ + xfs_btree_cur_t *tcur; /* temp cursor */ + xfs_inobt_rec_t trec; /* temp inode allocation record */ + + + if (*IO_agbp == NULL) { + /* + * We do not have an agbp, so select an initial allocation + * group for inode allocation. + */ + agbp = xfs_ialloc_ag_select(tp, parent, mode, okalloc); + /* + * Couldn't find an allocation group satisfying the + * criteria, give up. + */ + if (!agbp) { + *inop = NULLFSINO; + return 0; + } + agi = XFS_BUF_TO_AGI(agbp); + ASSERT(INT_GET(agi->agi_magicnum, ARCH_CONVERT) == XFS_AGI_MAGIC); + } else { + /* + * Continue where we left off before. In this case, we + * know that the allocation group has free inodes. + */ + agbp = *IO_agbp; + agi = XFS_BUF_TO_AGI(agbp); + ASSERT(INT_GET(agi->agi_magicnum, ARCH_CONVERT) == XFS_AGI_MAGIC); + ASSERT(INT_GET(agi->agi_freecount, ARCH_CONVERT) > 0); + } + mp = tp->t_mountp; + agcount = mp->m_sb.sb_agcount; + agno = INT_GET(agi->agi_seqno, ARCH_CONVERT); + tagno = agno; + pagno = XFS_INO_TO_AGNO(mp, parent); + pagino = XFS_INO_TO_AGINO(mp, parent); + + /* + * If we have already hit the ceiling of inode blocks then clear + * okalloc so we scan all available agi structures for a free + * inode. + */ + + if (mp->m_maxicount && + mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) { + noroom = 1; + okalloc = 0; + } + + /* + * Loop until we find an allocation group that either has free inodes + * or in which we can allocate some inodes. Iterate through the + * allocation groups upward, wrapping at the end. + */ + *alloc_done = B_FALSE; + while (INT_GET(agi->agi_freecount, ARCH_CONVERT) == 0) { + /* + * Don't do anything if we're not supposed to allocate + * any blocks, just go on to the next ag. + */ + if (okalloc) { + /* + * Try to allocate some new inodes in the allocation + * group. + */ + if (error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced)) { + xfs_trans_brelse(tp, agbp); + if (error == ENOSPC) { + *inop = NULLFSINO; + return 0; + } else + return error; + } + if (ialloced) { + /* + * We successfully allocated some inodes, return + * the current context to the caller so that it + * can commit the current transaction and call + * us again where we left off. + */ + ASSERT(INT_GET(agi->agi_freecount, ARCH_CONVERT) > 0); + *alloc_done = B_TRUE; + *IO_agbp = agbp; + *inop = NULLFSINO; + return 0; + } + } + /* + * If it failed, give up on this ag. + */ + xfs_trans_brelse(tp, agbp); + /* + * Go on to the next ag: get its ag header. + */ +nextag: + if (++tagno == agcount) + tagno = 0; + if (tagno == agno) { + *inop = NULLFSINO; + return noroom ? ENOSPC : 0; + } + mraccess(&mp->m_peraglock); + error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp); + mraccunlock(&mp->m_peraglock); + if (error) + goto nextag; + agi = XFS_BUF_TO_AGI(agbp); + ASSERT(INT_GET(agi->agi_magicnum, ARCH_CONVERT) == XFS_AGI_MAGIC); + } + /* + * Here with an allocation group that has a free inode. + * Reset agno since we may have chosen a new ag in the + * loop above. + */ + agno = tagno; + *IO_agbp = NULL; + cur = xfs_btree_init_cursor(mp, tp, agbp, INT_GET(agi->agi_seqno, ARCH_CONVERT), + XFS_BTNUM_INO, (xfs_inode_t *)0, 0); + /* + * If pagino is 0 (this is the root inode allocation) use newino. + * This must work because we've just allocated some. + */ + if (!pagino) + pagino = INT_GET(agi->agi_newino, ARCH_CONVERT); +#ifdef DEBUG + if (cur->bc_nlevels == 1) { + int freecount = 0; + + if (error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + do { + if (error = xfs_inobt_get_rec(cur, &rec.ir_startino, + &rec.ir_freecount, &rec.ir_free, &i, ARCH_NOCONVERT)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + freecount += rec.ir_freecount; + if (error = xfs_inobt_increment(cur, 0, &i)) + goto error0; + } while (i == 1); + + ASSERT(freecount == INT_GET(agi->agi_freecount, ARCH_CONVERT) || + XFS_FORCED_SHUTDOWN(mp)); + } +#endif + /* + * If in the same a.g. as the parent, try to get near the parent. + */ + if (pagno == agno) { + if (error = xfs_inobt_lookup_le(cur, pagino, 0, 0, &i)) + goto error0; + if (i != 0 && + (error = xfs_inobt_get_rec(cur, &rec.ir_startino, + &rec.ir_freecount, &rec.ir_free, &j, ARCH_NOCONVERT)) == 0 && + j == 1 && + rec.ir_freecount > 0) { + /* + * Found a free inode in the same chunk + * as parent, done. + */ + } + /* + * In the same a.g. as parent, but parent's chunk is full. + */ + else { + int doneleft; /* done, to the left */ + int doneright; /* done, to the right */ + + if (error) + goto error0; + ASSERT(i == 1); + ASSERT(j == 1); + /* + * Duplicate the cursor, search left & right + * simultaneously. + */ + if (error = xfs_btree_dup_cursor(cur, &tcur)) + goto error0; + /* + * Search left with tcur, back up 1 record. + */ + if (error = xfs_inobt_decrement(tcur, 0, &i)) + goto error1; + doneleft = !i; + if (!doneleft) { + if (error = xfs_inobt_get_rec(tcur, + &trec.ir_startino, + &trec.ir_freecount, + &trec.ir_free, &i, ARCH_NOCONVERT)) + goto error1; + XFS_WANT_CORRUPTED_GOTO(i == 1, error1); + } + /* + * Search right with cur, go forward 1 record. + */ + if (error = xfs_inobt_increment(cur, 0, &i)) + goto error1; + doneright = !i; + if (!doneright) { + if (error = xfs_inobt_get_rec(cur, + &rec.ir_startino, + &rec.ir_freecount, + &rec.ir_free, &i, ARCH_NOCONVERT)) + goto error1; + XFS_WANT_CORRUPTED_GOTO(i == 1, error1); + } + /* + * Loop until we find the closest inode chunk + * with a free one. + */ + while (!doneleft || !doneright) { + int useleft; /* using left inode + chunk this time */ + + /* + * Figure out which block is closer, + * if both are valid. + */ + if (!doneleft && !doneright) + useleft = + pagino - + (trec.ir_startino + + XFS_INODES_PER_CHUNK - 1) < + rec.ir_startino - pagino; + else + useleft = !doneleft; + /* + * If checking the left, does it have + * free inodes? + */ + if (useleft && trec.ir_freecount) { + /* + * Yes, set it up as the chunk to use. + */ + rec = trec; + xfs_btree_del_cursor(cur, + XFS_BTREE_NOERROR); + cur = tcur; + break; + } + /* + * If checking the right, does it have + * free inodes? + */ + if (!useleft && rec.ir_freecount) { + /* + * Yes, it's already set up. + */ + xfs_btree_del_cursor(tcur, + XFS_BTREE_NOERROR); + break; + } + /* + * If used the left, get another one + * further left. + */ + if (useleft) { + if (error = xfs_inobt_decrement(tcur, 0, + &i)) + goto error1; + doneleft = !i; + if (!doneleft) { + if (error = xfs_inobt_get_rec( + tcur, + &trec.ir_startino, + &trec.ir_freecount, + &trec.ir_free, &i, ARCH_NOCONVERT)) + goto error1; + XFS_WANT_CORRUPTED_GOTO(i == 1, + error1); + } + } + /* + * If used the right, get another one + * further right. + */ + else { + if (error = xfs_inobt_increment(cur, 0, + &i)) + goto error1; + doneright = !i; + if (!doneright) { + if (error = xfs_inobt_get_rec( + cur, + &rec.ir_startino, + &rec.ir_freecount, + &rec.ir_free, &i, ARCH_NOCONVERT)) + goto error1; + XFS_WANT_CORRUPTED_GOTO(i == 1, + error1); + } + } + } + ASSERT(!doneleft || !doneright); + } + } + /* + * In a different a.g. from the parent. + * See if the most recently allocated block has any free. + */ + else if (INT_GET(agi->agi_newino, ARCH_CONVERT) != NULLAGINO) { + if (error = xfs_inobt_lookup_eq(cur, + INT_GET(agi->agi_newino, ARCH_CONVERT), 0, 0, &i)) + goto error0; + if (i == 1 && + (error = xfs_inobt_get_rec(cur, &rec.ir_startino, + &rec.ir_freecount, &rec.ir_free, &j, ARCH_NOCONVERT)) == 0 && + j == 1 && + rec.ir_freecount > 0) { + /* + * The last chunk allocated in the group still has + * a free inode. + */ + } + /* + * None left in the last group, search the whole a.g. + */ + else { + if (error) + goto error0; + if (error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)) + goto error0; + ASSERT(i == 1); + for (;;) { + if (error = xfs_inobt_get_rec(cur, + &rec.ir_startino, + &rec.ir_freecount, &rec.ir_free, + &i, ARCH_NOCONVERT)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + if (rec.ir_freecount > 0) + break; + if (error = xfs_inobt_increment(cur, 0, &i)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + } + } + } + offset = XFS_IALLOC_FIND_FREE(&rec.ir_free); + ASSERT(offset >= 0); + ASSERT(offset < XFS_INODES_PER_CHUNK); + ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % + XFS_INODES_PER_CHUNK) == 0); + ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); + XFS_INOBT_CLR_FREE(&rec, offset, ARCH_NOCONVERT); + rec.ir_freecount--; + if (error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, + rec.ir_free)) + goto error0; + INT_MOD(agi->agi_freecount, ARCH_CONVERT, -1); + xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); + mraccess(&mp->m_peraglock); + mp->m_perag[tagno].pagi_freecount--; + mraccunlock(&mp->m_peraglock); +#ifdef DEBUG + if (cur->bc_nlevels == 1) { + int freecount = 0; + + if (error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)) + goto error0; + do { + if (error = xfs_inobt_get_rec(cur, &rec.ir_startino, + &rec.ir_freecount, &rec.ir_free, &i, ARCH_NOCONVERT)) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + freecount += rec.ir_freecount; + if (error = xfs_inobt_increment(cur, 0, &i)) + goto error0; + } while (i == 1); + ASSERT(freecount == INT_GET(agi->agi_freecount, ARCH_CONVERT) || + XFS_FORCED_SHUTDOWN(mp)); + } +#endif + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); + *inop = ino; + return 0; +error1: + xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); +error0: + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + return error; +} + + +/* + * Return the location of the inode in bno/off, for mapping it into a buffer. + */ +/*ARGSUSED*/ +int +xfs_dilocate( + xfs_mount_t *mp, /* file system mount structure */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_ino_t ino, /* inode to locate */ + xfs_fsblock_t *bno, /* output: block containing inode */ + int *len, /* output: num blocks in inode cluster */ + int *off, /* output: index in block of inode */ + uint flags) /* flags concerning inode lookup */ +{ + xfs_agblock_t agbno; /* block number of inode in the alloc group */ + xfs_buf_t *agbp; /* agi buffer */ + xfs_agino_t agino; /* inode number within alloc group */ + xfs_agnumber_t agno; /* allocation group number */ + int blks_per_cluster; /* num blocks per inode cluster */ + xfs_agblock_t chunk_agbno; /* first block in inode chunk */ + xfs_agino_t chunk_agino; /* first agino in inode chunk */ + __int32_t chunk_cnt; /* count of free inodes in chunk */ + xfs_inofree_t chunk_free; /* mask of free inodes in chunk */ + xfs_agblock_t cluster_agbno; /* first block in inode cluster */ + xfs_btree_cur_t *cur; /* inode btree cursor */ + int error; /* error code */ + int i; /* temp state */ + int offset; /* index of inode in its buffer */ + int offset_agbno; /* blks from chunk start to inode */ + + ASSERT(ino != NULLFSINO); + /* + * Split up the inode number into its parts. + */ + agno = XFS_INO_TO_AGNO(mp, ino); + agino = XFS_INO_TO_AGINO(mp, ino); + agbno = XFS_AGINO_TO_AGBNO(mp, agino); + if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks || + ino != XFS_AGINO_TO_INO(mp, agno, agino)) + return XFS_ERROR(EINVAL); + if ((mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) || + !(flags & XFS_IMAP_LOOKUP)) { + offset = XFS_INO_TO_OFFSET(mp, ino); + ASSERT(offset < mp->m_sb.sb_inopblock); + *bno = XFS_AGB_TO_FSB(mp, agno, agbno); + *off = offset; + *len = 1; + return 0; + } + blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog; + if (*bno != NULLFSBLOCK) { + offset = XFS_INO_TO_OFFSET(mp, ino); + ASSERT(offset < mp->m_sb.sb_inopblock); + cluster_agbno = XFS_FSB_TO_AGBNO(mp, *bno); + *off = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) + + offset; + *len = blks_per_cluster; + return 0; + } + if (mp->m_inoalign_mask) { + offset_agbno = agbno & mp->m_inoalign_mask; + chunk_agbno = agbno - offset_agbno; + } else { + mraccess(&mp->m_peraglock); + error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); + mraccunlock(&mp->m_peraglock); + if (error) + return error; + cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO, + (xfs_inode_t *)0, 0); + if (error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i)) + goto error0; + if (error = xfs_inobt_get_rec(cur, &chunk_agino, &chunk_cnt, + &chunk_free, &i, ARCH_NOCONVERT)) + goto error0; + if (i == 0) + error = XFS_ERROR(EINVAL); + xfs_trans_brelse(tp, agbp); + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + if (error) + return error; + chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_agino); + offset_agbno = agbno - chunk_agbno; + } + ASSERT(agbno >= chunk_agbno); + cluster_agbno = chunk_agbno + + ((offset_agbno / blks_per_cluster) * blks_per_cluster); + offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) + + XFS_INO_TO_OFFSET(mp, ino); + *bno = XFS_AGB_TO_FSB(mp, agno, cluster_agbno); + *off = offset; + *len = blks_per_cluster; + return 0; +error0: + xfs_trans_brelse(tp, agbp); + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + return error; +} + +/* + * Compute and fill in value of m_in_maxlevels. + */ +void +xfs_ialloc_compute_maxlevels( + xfs_mount_t *mp) /* file system mount structure */ +{ + int level; + uint maxblocks; + uint maxleafents; + int minleafrecs; + int minnoderecs; + + maxleafents = (1LL << XFS_INO_AGINO_BITS(mp)) >> + XFS_INODES_PER_CHUNK_LOG; + minleafrecs = mp->m_alloc_mnr[0]; + minnoderecs = mp->m_alloc_mnr[1]; + maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; + for (level = 1; maxblocks > 1; level++) + maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs; + mp->m_in_maxlevels = level; +} + +/* + * Log specified fields for the ag hdr (inode section) + */ +void +xfs_ialloc_log_agi( + xfs_trans_t *tp, /* transaction pointer */ + xfs_buf_t *bp, /* allocation group header buffer */ + int fields) /* bitmask of fields to log */ +{ + int first; /* first byte number */ + int last; /* last byte number */ + static const short offsets[] = { /* field starting offsets */ + /* keep in sync with bit definitions */ + offsetof(xfs_agi_t, agi_magicnum), + offsetof(xfs_agi_t, agi_versionnum), + offsetof(xfs_agi_t, agi_seqno), + offsetof(xfs_agi_t, agi_length), + offsetof(xfs_agi_t, agi_count), + offsetof(xfs_agi_t, agi_root), + offsetof(xfs_agi_t, agi_level), + offsetof(xfs_agi_t, agi_freecount), + offsetof(xfs_agi_t, agi_newino), + offsetof(xfs_agi_t, agi_dirino), + offsetof(xfs_agi_t, agi_unlinked), + sizeof(xfs_agi_t) + }; +#ifdef DEBUG + xfs_agi_t *agi; /* allocation group header */ + + agi = XFS_BUF_TO_AGI(bp); + ASSERT(INT_GET(agi->agi_magicnum, ARCH_CONVERT) == + XFS_AGI_MAGIC); +#endif + /* + * Compute byte offsets for the first and last fields. + */ + xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS, &first, &last); + /* + * Log the allocation group inode header buffer. + */ + xfs_trans_log_buf(tp, bp, first, last); +} + +/* + * Read in the allocation group header (inode allocation section) + */ +int +xfs_ialloc_read_agi( + xfs_mount_t *mp, /* file system mount structure */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_agnumber_t agno, /* allocation group number */ + xfs_buf_t **bpp) /* allocation group hdr buf */ +{ + xfs_agi_t *agi; /* allocation group header */ + int agi_ok; /* agi is consistent */ + xfs_buf_t *bp; /* allocation group hdr buf */ + xfs_daddr_t d; /* disk block address */ + int error; +#ifdef DEBUG + int i; +#endif + xfs_perag_t *pag; /* per allocation group data */ + + + ASSERT(agno != NULLAGNUMBER); + d = XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR); + if (error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, 1, 0, &bp)) + return error; + ASSERT(bp && !XFS_BUF_GETERROR(bp)); + /* + * Validate the magic number of the agi block. + */ + agi = XFS_BUF_TO_AGI(bp); + agi_ok = + INT_GET(agi->agi_magicnum, ARCH_CONVERT) == XFS_AGI_MAGIC && + XFS_AGI_GOOD_VERSION(INT_GET(agi->agi_versionnum, ARCH_CONVERT)); + if (XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI, + XFS_RANDOM_IALLOC_READ_AGI)) { + xfs_trans_brelse(tp, bp); + return XFS_ERROR(EFSCORRUPTED); + } + pag = &mp->m_perag[agno]; + if (!pag->pagi_init) { + pag->pagi_freecount = INT_GET(agi->agi_freecount, ARCH_CONVERT); + pag->pagi_init = 1; + } else { + /* + * It's possible for these to be out of sync if + * we are in the middle of a forced shutdown. + */ + ASSERT(pag->pagi_freecount == INT_GET(agi->agi_freecount, ARCH_CONVERT) + || XFS_FORCED_SHUTDOWN(mp)); + } +#ifdef DEBUG + for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) + ASSERT(INT_GET(agi->agi_unlinked[i], ARCH_CONVERT) != 0); +#endif + XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGI, XFS_AGI_REF); + *bpp = bp; + return 0; +} diff --git a/libxfs/xfs_ialloc_btree.c b/libxfs/xfs_ialloc_btree.c new file mode 100644 index 000000000..bdf2dae48 --- /dev/null +++ b/libxfs/xfs_ialloc_btree.c @@ -0,0 +1,1552 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +/* + * Inode allocation management for XFS. + */ +#include + +/* + * Insert one record/level. Return information to the caller + * allowing the next level up to proceed if necessary. + */ +STATIC int /* error */ +xfs_inobt_insrec( + xfs_btree_cur_t *cur, /* btree cursor */ + int level, /* level to insert record at */ + xfs_agblock_t *bnop, /* i/o: block number inserted */ + xfs_inobt_rec_t *recp, /* i/o: record data inserted */ + xfs_btree_cur_t **curp, /* output: new cursor replacing cur */ + int *stat) /* success/failure */ +{ + xfs_inobt_block_t *block; /* btree block record/key lives in */ + xfs_buf_t *bp; /* buffer for block */ + int error; /* error return value */ + int i; /* loop index */ + xfs_inobt_key_t key; /* key value being inserted */ + xfs_inobt_key_t *kp; /* pointer to btree keys */ + xfs_agblock_t nbno; /* block number of allocated block */ + xfs_btree_cur_t *ncur; /* new cursor to be used at next lvl */ + xfs_inobt_key_t nkey; /* new key value, from split */ + xfs_inobt_rec_t nrec; /* new record value, for caller */ + int optr; /* old ptr value */ + xfs_inobt_ptr_t *pp; /* pointer to btree addresses */ + int ptr; /* index in btree block for this rec */ + xfs_inobt_rec_t *rp; /* pointer to btree records */ + + /* + * If we made it to the root level, allocate a new root block + * and we're done. + */ + if (level >= cur->bc_nlevels) { + error = xfs_inobt_newroot(cur, &i); + *bnop = NULLAGBLOCK; + *stat = i; + return error; + } + /* + * Make a key out of the record data to be inserted, and save it. + */ + key.ir_startino = recp->ir_startino; /* INT_: direct copy */ + optr = ptr = cur->bc_ptrs[level]; + /* + * If we're off the left edge, return failure. + */ + if (ptr == 0) { + *stat = 0; + return 0; + } + /* + * Get pointers to the btree buffer and block. + */ + bp = cur->bc_bufs[level]; + block = XFS_BUF_TO_INOBT_BLOCK(bp); +#ifdef DEBUG + if (error = xfs_btree_check_sblock(cur, block, level, bp)) + return error; + /* + * Check that the new entry is being inserted in the right place. + */ + if (ptr <= INT_GET(block->bb_numrecs, ARCH_CONVERT)) { + if (level == 0) { + rp = XFS_INOBT_REC_ADDR(block, ptr, cur); + xfs_btree_check_rec(cur->bc_btnum, recp, rp); + } else { + kp = XFS_INOBT_KEY_ADDR(block, ptr, cur); + xfs_btree_check_key(cur->bc_btnum, &key, kp); + } + } +#endif + nbno = NULLAGBLOCK; + ncur = (xfs_btree_cur_t *)0; + /* + * If the block is full, we can't insert the new entry until we + * make the block un-full. + */ + if (INT_GET(block->bb_numrecs, ARCH_CONVERT) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) { + /* + * First, try shifting an entry to the right neighbor. + */ + if (error = xfs_inobt_rshift(cur, level, &i)) + return error; + if (i) { + /* nothing */ + } + /* + * Next, try shifting an entry to the left neighbor. + */ + else { + if (error = xfs_inobt_lshift(cur, level, &i)) + return error; + if (i) { + optr = ptr = cur->bc_ptrs[level]; + } else { + /* + * Next, try splitting the current block + * in half. If this works we have to + * re-set our variables because + * we could be in a different block now. + */ + if (error = xfs_inobt_split(cur, level, &nbno, + &nkey, &ncur, &i)) + return error; + if (i) { + bp = cur->bc_bufs[level]; + block = XFS_BUF_TO_INOBT_BLOCK(bp); +#ifdef DEBUG + if (error = xfs_btree_check_sblock(cur, + block, level, bp)) + return error; +#endif + ptr = cur->bc_ptrs[level]; + nrec.ir_startino = nkey.ir_startino; /* INT_: direct copy */ + } else { + /* + * Otherwise the insert fails. + */ + *stat = 0; + return 0; + } + } + } + } + /* + * At this point we know there's room for our new entry in the block + * we're pointing at. + */ + if (level > 0) { + /* + * It's a non-leaf entry. Make a hole for the new data + * in the key and ptr regions of the block. + */ + kp = XFS_INOBT_KEY_ADDR(block, 1, cur); + pp = XFS_INOBT_PTR_ADDR(block, 1, cur); +#ifdef DEBUG + for (i = INT_GET(block->bb_numrecs, ARCH_CONVERT); i >= ptr; i--) { + if (error = xfs_btree_check_sptr(cur, INT_GET(pp[i - 1], ARCH_CONVERT), level)) + return error; + } +#endif + ovbcopy(&kp[ptr - 1], &kp[ptr], + (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*kp)); + ovbcopy(&pp[ptr - 1], &pp[ptr], + (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*pp)); + /* + * Now stuff the new data in, bump numrecs and log the new data. + */ +#ifdef DEBUG + if (error = xfs_btree_check_sptr(cur, *bnop, level)) + return error; +#endif + kp[ptr - 1] = key; /* INT_: struct copy */ + INT_SET(pp[ptr - 1], ARCH_CONVERT, *bnop); + INT_MOD(block->bb_numrecs, ARCH_CONVERT, +1); + xfs_inobt_log_keys(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT)); + xfs_inobt_log_ptrs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT)); + } else { + /* + * It's a leaf entry. Make a hole for the new record. + */ + rp = XFS_INOBT_REC_ADDR(block, 1, cur); + ovbcopy(&rp[ptr - 1], &rp[ptr], + (INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*rp)); + /* + * Now stuff the new record in, bump numrecs + * and log the new data. + */ + rp[ptr - 1] = *recp; /* INT_: struct copy */ + INT_MOD(block->bb_numrecs, ARCH_CONVERT, +1); + xfs_inobt_log_recs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT)); + } + /* + * Log the new number of records in the btree header. + */ + xfs_inobt_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS); +#ifdef DEBUG + /* + * Check that the key/record is in the right place, now. + */ + if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) { + if (level == 0) + xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1, + rp + ptr); + else + xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1, + kp + ptr); + } +#endif + /* + * If we inserted at the start of a block, update the parents' keys. + */ + if (optr == 1 && (error = xfs_inobt_updkey(cur, &key, level + 1))) + return error; + /* + * Return the new block number, if any. + * If there is one, give back a record value and a cursor too. + */ + *bnop = nbno; + if (nbno != NULLAGBLOCK) { + *recp = nrec; /* INT_: struct copy */ + *curp = ncur; + } + *stat = 1; + return 0; +} + +/* + * Log header fields from a btree block. + */ +STATIC void +xfs_inobt_log_block( + xfs_trans_t *tp, /* transaction pointer */ + xfs_buf_t *bp, /* buffer containing btree block */ + int fields) /* mask of fields: XFS_BB_... */ +{ + int first; /* first byte offset logged */ + int last; /* last byte offset logged */ + static const short offsets[] = { /* table of offsets */ + offsetof(xfs_inobt_block_t, bb_magic), + offsetof(xfs_inobt_block_t, bb_level), + offsetof(xfs_inobt_block_t, bb_numrecs), + offsetof(xfs_inobt_block_t, bb_leftsib), + offsetof(xfs_inobt_block_t, bb_rightsib), + sizeof(xfs_inobt_block_t) + }; + + xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, &last); + xfs_trans_log_buf(tp, bp, first, last); +} + +/* + * Log keys from a btree block (nonleaf). + */ +STATIC void +xfs_inobt_log_keys( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_buf_t *bp, /* buffer containing btree block */ + int kfirst, /* index of first key to log */ + int klast) /* index of last key to log */ +{ + xfs_inobt_block_t *block; /* btree block to log from */ + int first; /* first byte offset logged */ + xfs_inobt_key_t *kp; /* key pointer in btree block */ + int last; /* last byte offset logged */ + + block = XFS_BUF_TO_INOBT_BLOCK(bp); + kp = XFS_INOBT_KEY_ADDR(block, 1, cur); + first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block); + last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block); + xfs_trans_log_buf(cur->bc_tp, bp, first, last); +} + +/* + * Log block pointer fields from a btree block (nonleaf). + */ +STATIC void +xfs_inobt_log_ptrs( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_buf_t *bp, /* buffer containing btree block */ + int pfirst, /* index of first pointer to log */ + int plast) /* index of last pointer to log */ +{ + xfs_inobt_block_t *block; /* btree block to log from */ + int first; /* first byte offset logged */ + int last; /* last byte offset logged */ + xfs_inobt_ptr_t *pp; /* block-pointer pointer in btree blk */ + + block = XFS_BUF_TO_INOBT_BLOCK(bp); + pp = XFS_INOBT_PTR_ADDR(block, 1, cur); + first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block); + last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block); + xfs_trans_log_buf(cur->bc_tp, bp, first, last); +} + +/* + * Log records from a btree block (leaf). + */ +STATIC void +xfs_inobt_log_recs( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_buf_t *bp, /* buffer containing btree block */ + int rfirst, /* index of first record to log */ + int rlast) /* index of last record to log */ +{ + xfs_inobt_block_t *block; /* btree block to log from */ + int first; /* first byte offset logged */ + int last; /* last byte offset logged */ + xfs_inobt_rec_t *rp; /* record pointer for btree block */ + + block = XFS_BUF_TO_INOBT_BLOCK(bp); + rp = XFS_INOBT_REC_ADDR(block, 1, cur); + first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block); + last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block); + xfs_trans_log_buf(cur->bc_tp, bp, first, last); +} + +/* + * Lookup the record. The cursor is made to point to it, based on dir. + * Return 0 if can't find any such record, 1 for success. + */ +STATIC int /* error */ +xfs_inobt_lookup( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_lookup_t dir, /* <=, ==, or >= */ + int *stat) /* success/failure */ +{ + xfs_agblock_t agbno; /* a.g. relative btree block number */ + xfs_agnumber_t agno; /* allocation group number */ + xfs_inobt_block_t *block; /* current btree block */ + int diff; /* difference for the current key */ + int error; /* error return value */ + int keyno; /* current key number */ + int level; /* level in the btree */ + xfs_mount_t *mp; /* file system mount point */ + + /* + * Get the allocation group header, and the root block number. + */ + mp = cur->bc_mp; + { + xfs_agi_t *agi; /* a.g. inode header */ + + agi = XFS_BUF_TO_AGI(cur->bc_private.i.agbp); + agno = INT_GET(agi->agi_seqno, ARCH_CONVERT); + agbno = INT_GET(agi->agi_root, ARCH_CONVERT); + } + /* + * Iterate over each level in the btree, starting at the root. + * For each level above the leaves, find the key we need, based + * on the lookup record, then follow the corresponding block + * pointer down to the next level. + */ + for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) { + xfs_buf_t *bp; /* buffer pointer for btree block */ + xfs_daddr_t d; /* disk address of btree block */ + + /* + * Get the disk address we're looking for. + */ + d = XFS_AGB_TO_DADDR(mp, agno, agbno); + /* + * If the old buffer at this level is for a different block, + * throw it away, otherwise just use it. + */ + bp = cur->bc_bufs[level]; + if (bp && XFS_BUF_ADDR(bp) != d) + bp = (xfs_buf_t *)0; + if (!bp) { + /* + * Need to get a new buffer. Read it, then + * set it in the cursor, releasing the old one. + */ + if (error = xfs_btree_read_bufs(mp, cur->bc_tp, + agno, agbno, 0, &bp, XFS_INO_BTREE_REF)) + return error; + xfs_btree_setbuf(cur, level, bp); + /* + * Point to the btree block, now that we have the buffer + */ + block = XFS_BUF_TO_INOBT_BLOCK(bp); + if (error = xfs_btree_check_sblock(cur, block, level, + bp)) + return error; + } else + block = XFS_BUF_TO_INOBT_BLOCK(bp); + /* + * If we already had a key match at a higher level, we know + * we need to use the first entry in this block. + */ + if (diff == 0) + keyno = 1; + /* + * Otherwise we need to search this block. Do a binary search. + */ + else { + int high; /* high entry number */ + xfs_inobt_key_t *kkbase;/* base of keys in block */ + xfs_inobt_rec_t *krbase;/* base of records in block */ + int low; /* low entry number */ + + /* + * Get a pointer to keys or records. + */ + if (level > 0) + kkbase = XFS_INOBT_KEY_ADDR(block, 1, cur); + else + krbase = XFS_INOBT_REC_ADDR(block, 1, cur); + /* + * Set low and high entry numbers, 1-based. + */ + low = 1; + if (!(high = INT_GET(block->bb_numrecs, ARCH_CONVERT))) { + /* + * If the block is empty, the tree must + * be an empty leaf. + */ + ASSERT(level == 0 && cur->bc_nlevels == 1); + cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE; + *stat = 0; + return 0; + } + /* + * Binary search the block. + */ + while (low <= high) { + xfs_agino_t startino; /* key value */ + + /* + * keyno is average of low and high. + */ + keyno = (low + high) >> 1; + /* + * Get startino. + */ + if (level > 0) { + xfs_inobt_key_t *kkp; + + kkp = kkbase + keyno - 1; + startino = INT_GET(kkp->ir_startino, ARCH_CONVERT); + } else { + xfs_inobt_rec_t *krp; + + krp = krbase + keyno - 1; + startino = INT_GET(krp->ir_startino, ARCH_CONVERT); + } + /* + * Compute difference to get next direction. + */ + diff = (int)startino - cur->bc_rec.i.ir_startino; + /* + * Less than, move right. + */ + if (diff < 0) + low = keyno + 1; + /* + * Greater than, move left. + */ + else if (diff > 0) + high = keyno - 1; + /* + * Equal, we're done. + */ + else + break; + } + } + /* + * If there are more levels, set up for the next level + * by getting the block number and filling in the cursor. + */ + if (level > 0) { + /* + * If we moved left, need the previous key number, + * unless there isn't one. + */ + if (diff > 0 && --keyno < 1) + keyno = 1; + agbno = INT_GET(*XFS_INOBT_PTR_ADDR(block, keyno, cur), ARCH_CONVERT); +#ifdef DEBUG + if (error = xfs_btree_check_sptr(cur, agbno, level)) + return error; +#endif + cur->bc_ptrs[level] = keyno; + } + } + /* + * Done with the search. + * See if we need to adjust the results. + */ + if (dir != XFS_LOOKUP_LE && diff < 0) { + keyno++; + /* + * If ge search and we went off the end of the block, but it's + * not the last block, we're in the wrong block. + */ + if (dir == XFS_LOOKUP_GE && + keyno > INT_GET(block->bb_numrecs, ARCH_CONVERT) && + INT_GET(block->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) { + int i; + + cur->bc_ptrs[0] = keyno; + if (error = xfs_inobt_increment(cur, 0, &i)) + return error; + ASSERT(i == 1); + *stat = 1; + return 0; + } + } + else if (dir == XFS_LOOKUP_LE && diff > 0) + keyno--; + cur->bc_ptrs[0] = keyno; + /* + * Return if we succeeded or not. + */ + if (keyno == 0 || keyno > INT_GET(block->bb_numrecs, ARCH_CONVERT)) + *stat = 0; + else + *stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0)); + return 0; +} + +/* + * Move 1 record left from cur/level if possible. + * Update cur to reflect the new path. + */ +STATIC int /* error */ +xfs_inobt_lshift( + xfs_btree_cur_t *cur, /* btree cursor */ + int level, /* level to shift record on */ + int *stat) /* success/failure */ +{ + int error; /* error return value */ +#ifdef DEBUG + int i; /* loop index */ +#endif + xfs_inobt_key_t key; /* key value for leaf level upward */ + xfs_buf_t *lbp; /* buffer for left neighbor block */ + xfs_inobt_block_t *left; /* left neighbor btree block */ + xfs_inobt_key_t *lkp; /* key pointer for left block */ + xfs_inobt_ptr_t *lpp; /* address pointer for left block */ + xfs_inobt_rec_t *lrp; /* record pointer for left block */ + int nrec; /* new number of left block entries */ + xfs_buf_t *rbp; /* buffer for right (current) block */ + xfs_inobt_block_t *right; /* right (current) btree block */ + xfs_inobt_key_t *rkp; /* key pointer for right block */ + xfs_inobt_ptr_t *rpp; /* address pointer for right block */ + xfs_inobt_rec_t *rrp; /* record pointer for right block */ + + /* + * Set up variables for this block as "right". + */ + rbp = cur->bc_bufs[level]; + right = XFS_BUF_TO_INOBT_BLOCK(rbp); +#ifdef DEBUG + if (error = xfs_btree_check_sblock(cur, right, level, rbp)) + return error; +#endif + /* + * If we've got no left sibling then we can't shift an entry left. + */ + if (INT_GET(right->bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK) { + *stat = 0; + return 0; + } + /* + * If the cursor entry is the one that would be moved, don't + * do it... it's too complicated. + */ + if (cur->bc_ptrs[level] <= 1) { + *stat = 0; + return 0; + } + /* + * Set up the left neighbor as "left". + */ + if (error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, + cur->bc_private.i.agno, INT_GET(right->bb_leftsib, ARCH_CONVERT), 0, &lbp, + XFS_INO_BTREE_REF)) + return error; + left = XFS_BUF_TO_INOBT_BLOCK(lbp); + if (error = xfs_btree_check_sblock(cur, left, level, lbp)) + return error; + /* + * If it's full, it can't take another entry. + */ + if (INT_GET(left->bb_numrecs, ARCH_CONVERT) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) { + *stat = 0; + return 0; + } + nrec = INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1; + /* + * If non-leaf, copy a key and a ptr to the left block. + */ + if (level > 0) { + lkp = XFS_INOBT_KEY_ADDR(left, nrec, cur); + rkp = XFS_INOBT_KEY_ADDR(right, 1, cur); + *lkp = *rkp; + xfs_inobt_log_keys(cur, lbp, nrec, nrec); + lpp = XFS_INOBT_PTR_ADDR(left, nrec, cur); + rpp = XFS_INOBT_PTR_ADDR(right, 1, cur); +#ifdef DEBUG + if (error = xfs_btree_check_sptr(cur, INT_GET(*rpp, ARCH_CONVERT), level)) + return error; +#endif + *lpp = *rpp; /* INT_: no-change copy */ + xfs_inobt_log_ptrs(cur, lbp, nrec, nrec); + } + /* + * If leaf, copy a record to the left block. + */ + else { + lrp = XFS_INOBT_REC_ADDR(left, nrec, cur); + rrp = XFS_INOBT_REC_ADDR(right, 1, cur); + *lrp = *rrp; + xfs_inobt_log_recs(cur, lbp, nrec, nrec); + } + /* + * Bump and log left's numrecs, decrement and log right's numrecs. + */ + INT_MOD(left->bb_numrecs, ARCH_CONVERT, +1); + xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS); +#ifdef DEBUG + if (level > 0) + xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp); + else + xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp); +#endif + INT_MOD(right->bb_numrecs, ARCH_CONVERT, -1); + xfs_inobt_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS); + /* + * Slide the contents of right down one entry. + */ + if (level > 0) { +#ifdef DEBUG + for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) { + if (error = xfs_btree_check_sptr(cur, INT_GET(rpp[i + 1], ARCH_CONVERT), + level)) + return error; + } +#endif + ovbcopy(rkp + 1, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp)); + ovbcopy(rpp + 1, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp)); + xfs_inobt_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT)); + xfs_inobt_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT)); + } else { + ovbcopy(rrp + 1, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp)); + xfs_inobt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT)); + key.ir_startino = rrp->ir_startino; /* INT_: direct copy */ + rkp = &key; + } + /* + * Update the parent key values of right. + */ + if (error = xfs_inobt_updkey(cur, rkp, level + 1)) + return error; + /* + * Slide the cursor value left one. + */ + cur->bc_ptrs[level]--; + *stat = 1; + return 0; +} + +/* + * Allocate a new root block, fill it in. + */ +STATIC int /* error */ +xfs_inobt_newroot( + xfs_btree_cur_t *cur, /* btree cursor */ + int *stat) /* success/failure */ +{ + xfs_agi_t *agi; /* a.g. inode header */ + xfs_alloc_arg_t args; /* allocation argument structure */ + xfs_inobt_block_t *block; /* one half of the old root block */ + xfs_buf_t *bp; /* buffer containing block */ + int error; /* error return value */ + xfs_inobt_key_t *kp; /* btree key pointer */ + xfs_agblock_t lbno; /* left block number */ + xfs_buf_t *lbp; /* left buffer pointer */ + xfs_inobt_block_t *left; /* left btree block */ + xfs_buf_t *nbp; /* new (root) buffer */ + xfs_inobt_block_t *new; /* new (root) btree block */ + int nptr; /* new value for key index, 1 or 2 */ + xfs_inobt_ptr_t *pp; /* btree address pointer */ + xfs_agblock_t rbno; /* right block number */ + xfs_buf_t *rbp; /* right buffer pointer */ + xfs_inobt_block_t *right; /* right btree block */ + xfs_inobt_rec_t *rp; /* btree record pointer */ + + ASSERT(cur->bc_nlevels < XFS_IN_MAXLEVELS(cur->bc_mp)); + + /* + * Get a block & a buffer. + */ + agi = XFS_BUF_TO_AGI(cur->bc_private.i.agbp); + args.tp = cur->bc_tp; + args.mp = cur->bc_mp; + args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.i.agno, + INT_GET(agi->agi_root, ARCH_CONVERT)); + args.mod = args.minleft = args.alignment = args.total = args.wasdel = + args.isfl = args.userdata = args.minalignslop = 0; + args.minlen = args.maxlen = args.prod = 1; + args.type = XFS_ALLOCTYPE_NEAR_BNO; + if (error = xfs_alloc_vextent(&args)) + return error; + /* + * None available, we fail. + */ + if (args.fsbno == NULLFSBLOCK) { + *stat = 0; + return 0; + } + ASSERT(args.len == 1); + nbp = xfs_btree_get_bufs(args.mp, args.tp, args.agno, args.agbno, 0); + new = XFS_BUF_TO_INOBT_BLOCK(nbp); + /* + * Set the root data in the a.g. inode structure. + */ + INT_SET(agi->agi_root, ARCH_CONVERT, args.agbno); + INT_MOD(agi->agi_level, ARCH_CONVERT, 1); + xfs_ialloc_log_agi(args.tp, cur->bc_private.i.agbp, + XFS_AGI_ROOT | XFS_AGI_LEVEL); + /* + * At the previous root level there are now two blocks: the old + * root, and the new block generated when it was split. + * We don't know which one the cursor is pointing at, so we + * set up variables "left" and "right" for each case. + */ + bp = cur->bc_bufs[cur->bc_nlevels - 1]; + block = XFS_BUF_TO_INOBT_BLOCK(bp); +#ifdef DEBUG + if (error = xfs_btree_check_sblock(cur, block, cur->bc_nlevels - 1, bp)) + return error; +#endif + if (INT_GET(block->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) { + /* + * Our block is left, pick up the right block. + */ + lbp = bp; + lbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(lbp)); + left = block; + rbno = INT_GET(left->bb_rightsib, ARCH_CONVERT); + if (error = xfs_btree_read_bufs(args.mp, args.tp, args.agno, + rbno, 0, &rbp, XFS_INO_BTREE_REF)) + return error; + bp = rbp; + right = XFS_BUF_TO_INOBT_BLOCK(rbp); + if (error = xfs_btree_check_sblock(cur, right, + cur->bc_nlevels - 1, rbp)) + return error; + nptr = 1; + } else { + /* + * Our block is right, pick up the left block. + */ + rbp = bp; + rbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(rbp)); + right = block; + lbno = INT_GET(right->bb_leftsib, ARCH_CONVERT); + if (error = xfs_btree_read_bufs(args.mp, args.tp, args.agno, + lbno, 0, &lbp, XFS_INO_BTREE_REF)) + return error; + bp = lbp; + left = XFS_BUF_TO_INOBT_BLOCK(lbp); + if (error = xfs_btree_check_sblock(cur, left, + cur->bc_nlevels - 1, lbp)) + return error; + nptr = 2; + } + /* + * Fill in the new block's btree header and log it. + */ + INT_SET(new->bb_magic, ARCH_CONVERT, xfs_magics[cur->bc_btnum]); + INT_SET(new->bb_level, ARCH_CONVERT, (__uint16_t)cur->bc_nlevels); + INT_SET(new->bb_numrecs, ARCH_CONVERT, 2); + INT_SET(new->bb_leftsib, ARCH_CONVERT, NULLAGBLOCK); + INT_SET(new->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK); + xfs_inobt_log_block(args.tp, nbp, XFS_BB_ALL_BITS); + ASSERT(lbno != NULLAGBLOCK && rbno != NULLAGBLOCK); + /* + * Fill in the key data in the new root. + */ + kp = XFS_INOBT_KEY_ADDR(new, 1, cur); + if (INT_GET(left->bb_level, ARCH_CONVERT) > 0) { + kp[0] = *XFS_INOBT_KEY_ADDR(left, 1, cur); /* INT_: struct copy */ + kp[1] = *XFS_INOBT_KEY_ADDR(right, 1, cur); /* INT_: struct copy */ + } else { + rp = XFS_INOBT_REC_ADDR(left, 1, cur); + INT_COPY(kp[0].ir_startino, rp->ir_startino, ARCH_CONVERT); + rp = XFS_INOBT_REC_ADDR(right, 1, cur); + INT_COPY(kp[1].ir_startino, rp->ir_startino, ARCH_CONVERT); + } + xfs_inobt_log_keys(cur, nbp, 1, 2); + /* + * Fill in the pointer data in the new root. + */ + pp = XFS_INOBT_PTR_ADDR(new, 1, cur); + INT_SET(pp[0], ARCH_CONVERT, lbno); + INT_SET(pp[1], ARCH_CONVERT, rbno); + xfs_inobt_log_ptrs(cur, nbp, 1, 2); + /* + * Fix up the cursor. + */ + xfs_btree_setbuf(cur, cur->bc_nlevels, nbp); + cur->bc_ptrs[cur->bc_nlevels] = nptr; + cur->bc_nlevels++; + *stat = 1; + return 0; +} + +/* + * Move 1 record right from cur/level if possible. + * Update cur to reflect the new path. + */ +STATIC int /* error */ +xfs_inobt_rshift( + xfs_btree_cur_t *cur, /* btree cursor */ + int level, /* level to shift record on */ + int *stat) /* success/failure */ +{ + int error; /* error return value */ + int i; /* loop index */ + xfs_inobt_key_t key; /* key value for leaf level upward */ + xfs_buf_t *lbp; /* buffer for left (current) block */ + xfs_inobt_block_t *left; /* left (current) btree block */ + xfs_inobt_key_t *lkp; /* key pointer for left block */ + xfs_inobt_ptr_t *lpp; /* address pointer for left block */ + xfs_inobt_rec_t *lrp; /* record pointer for left block */ + xfs_buf_t *rbp; /* buffer for right neighbor block */ + xfs_inobt_block_t *right; /* right neighbor btree block */ + xfs_inobt_key_t *rkp; /* key pointer for right block */ + xfs_inobt_ptr_t *rpp; /* address pointer for right block */ + xfs_inobt_rec_t *rrp; /* record pointer for right block */ + xfs_btree_cur_t *tcur; /* temporary cursor */ + + /* + * Set up variables for this block as "left". + */ + lbp = cur->bc_bufs[level]; + left = XFS_BUF_TO_INOBT_BLOCK(lbp); +#ifdef DEBUG + if (error = xfs_btree_check_sblock(cur, left, level, lbp)) + return error; +#endif + /* + * If we've got no right sibling then we can't shift an entry right. + */ + if (INT_GET(left->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK) { + *stat = 0; + return 0; + } + /* + * If the cursor entry is the one that would be moved, don't + * do it... it's too complicated. + */ + if (cur->bc_ptrs[level] >= INT_GET(left->bb_numrecs, ARCH_CONVERT)) { + *stat = 0; + return 0; + } + /* + * Set up the right neighbor as "right". + */ + if (error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, + cur->bc_private.i.agno, INT_GET(left->bb_rightsib, ARCH_CONVERT), 0, &rbp, + XFS_INO_BTREE_REF)) + return error; + right = XFS_BUF_TO_INOBT_BLOCK(rbp); + if (error = xfs_btree_check_sblock(cur, right, level, rbp)) + return error; + /* + * If it's full, it can't take another entry. + */ + if (INT_GET(right->bb_numrecs, ARCH_CONVERT) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) { + *stat = 0; + return 0; + } + /* + * Make a hole at the start of the right neighbor block, then + * copy the last left block entry to the hole. + */ + if (level > 0) { + lkp = XFS_INOBT_KEY_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur); + lpp = XFS_INOBT_PTR_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur); + rkp = XFS_INOBT_KEY_ADDR(right, 1, cur); + rpp = XFS_INOBT_PTR_ADDR(right, 1, cur); +#ifdef DEBUG + for (i = INT_GET(right->bb_numrecs, ARCH_CONVERT) - 1; i >= 0; i--) { + if (error = xfs_btree_check_sptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level)) + return error; + } +#endif + ovbcopy(rkp, rkp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp)); + ovbcopy(rpp, rpp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp)); +#ifdef DEBUG + if (error = xfs_btree_check_sptr(cur, INT_GET(*lpp, ARCH_CONVERT), level)) + return error; +#endif + *rkp = *lkp; /* INT_: no change copy */ + *rpp = *lpp; /* INT_: no change copy */ + xfs_inobt_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1); + xfs_inobt_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1); + } else { + lrp = XFS_INOBT_REC_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur); + rrp = XFS_INOBT_REC_ADDR(right, 1, cur); + ovbcopy(rrp, rrp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp)); + *rrp = *lrp; + xfs_inobt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1); + key.ir_startino = rrp->ir_startino; /* INT_: direct copy */ + rkp = &key; + } + /* + * Decrement and log left's numrecs, bump and log right's numrecs. + */ + INT_MOD(left->bb_numrecs, ARCH_CONVERT, -1); + xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS); + INT_MOD(right->bb_numrecs, ARCH_CONVERT, +1); +#ifdef DEBUG + if (level > 0) + xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1); + else + xfs_btree_check_rec(cur->bc_btnum, rrp, rrp + 1); +#endif + xfs_inobt_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS); + /* + * Using a temporary cursor, update the parent key values of the + * block on the right. + */ + if (error = xfs_btree_dup_cursor(cur, &tcur)) + return error; + xfs_btree_lastrec(tcur, level); + if ((error = xfs_inobt_increment(tcur, level, &i)) || + (error = xfs_inobt_updkey(tcur, rkp, level + 1))) { + xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); + return error; + } + xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); + *stat = 1; + return 0; +} + +/* + * Split cur/level block in half. + * Return new block number and its first record (to be inserted into parent). + */ +STATIC int /* error */ +xfs_inobt_split( + xfs_btree_cur_t *cur, /* btree cursor */ + int level, /* level to split */ + xfs_agblock_t *bnop, /* output: block number allocated */ + xfs_inobt_key_t *keyp, /* output: first key of new block */ + xfs_btree_cur_t **curp, /* output: new cursor */ + int *stat) /* success/failure */ +{ + xfs_alloc_arg_t args; /* allocation argument structure */ + int error; /* error return value */ + int i; /* loop index/record number */ + xfs_agblock_t lbno; /* left (current) block number */ + xfs_buf_t *lbp; /* buffer for left block */ + xfs_inobt_block_t *left; /* left (current) btree block */ + xfs_inobt_key_t *lkp; /* left btree key pointer */ + xfs_inobt_ptr_t *lpp; /* left btree address pointer */ + xfs_inobt_rec_t *lrp; /* left btree record pointer */ + xfs_buf_t *rbp; /* buffer for right block */ + xfs_inobt_block_t *right; /* right (new) btree block */ + xfs_inobt_key_t *rkp; /* right btree key pointer */ + xfs_inobt_ptr_t *rpp; /* right btree address pointer */ + xfs_inobt_rec_t *rrp; /* right btree record pointer */ + + /* + * Set up left block (current one). + */ + lbp = cur->bc_bufs[level]; + args.tp = cur->bc_tp; + args.mp = cur->bc_mp; + lbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(lbp)); + /* + * Allocate the new block. + * If we can't do it, we're toast. Give up. + */ + args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.i.agno, lbno); + args.mod = args.minleft = args.alignment = args.total = args.wasdel = + args.isfl = args.userdata = args.minalignslop = 0; + args.minlen = args.maxlen = args.prod = 1; + args.type = XFS_ALLOCTYPE_NEAR_BNO; + if (error = xfs_alloc_vextent(&args)) + return error; + if (args.fsbno == NULLFSBLOCK) { + *stat = 0; + return 0; + } + ASSERT(args.len == 1); + rbp = xfs_btree_get_bufs(args.mp, args.tp, args.agno, args.agbno, 0); + /* + * Set up the new block as "right". + */ + right = XFS_BUF_TO_INOBT_BLOCK(rbp); + /* + * "Left" is the current (according to the cursor) block. + */ + left = XFS_BUF_TO_INOBT_BLOCK(lbp); +#ifdef DEBUG + if (error = xfs_btree_check_sblock(cur, left, level, lbp)) + return error; +#endif + /* + * Fill in the btree header for the new block. + */ + INT_SET(right->bb_magic, ARCH_CONVERT, xfs_magics[cur->bc_btnum]); + right->bb_level = left->bb_level; /* INT_: direct copy */ + INT_SET(right->bb_numrecs, ARCH_CONVERT, (__uint16_t)(INT_GET(left->bb_numrecs, ARCH_CONVERT) / 2)); + /* + * Make sure that if there's an odd number of entries now, that + * each new block will have the same number of entries. + */ + if ((INT_GET(left->bb_numrecs, ARCH_CONVERT) & 1) && + cur->bc_ptrs[level] <= INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1) + INT_MOD(right->bb_numrecs, ARCH_CONVERT, +1); + i = INT_GET(left->bb_numrecs, ARCH_CONVERT) - INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1; + /* + * For non-leaf blocks, copy keys and addresses over to the new block. + */ + if (level > 0) { + lkp = XFS_INOBT_KEY_ADDR(left, i, cur); + lpp = XFS_INOBT_PTR_ADDR(left, i, cur); + rkp = XFS_INOBT_KEY_ADDR(right, 1, cur); + rpp = XFS_INOBT_PTR_ADDR(right, 1, cur); +#ifdef DEBUG + for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) { + if (error = xfs_btree_check_sptr(cur, INT_GET(lpp[i], ARCH_CONVERT), level)) + return error; + } +#endif + bcopy(lkp, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp)); + bcopy(lpp, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp)); + xfs_inobt_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT)); + xfs_inobt_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT)); + *keyp = *rkp; + } + /* + * For leaf blocks, copy records over to the new block. + */ + else { + lrp = XFS_INOBT_REC_ADDR(left, i, cur); + rrp = XFS_INOBT_REC_ADDR(right, 1, cur); + bcopy(lrp, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp)); + xfs_inobt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT)); + keyp->ir_startino = rrp->ir_startino; /* INT_: direct copy */ + } + /* + * Find the left block number by looking in the buffer. + * Adjust numrecs, sibling pointers. + */ + INT_MOD(left->bb_numrecs, ARCH_CONVERT, -(INT_GET(right->bb_numrecs, ARCH_CONVERT))); + right->bb_rightsib = left->bb_rightsib; /* INT_: direct copy */ + INT_SET(left->bb_rightsib, ARCH_CONVERT, args.agbno); + INT_SET(right->bb_leftsib, ARCH_CONVERT, lbno); + xfs_inobt_log_block(args.tp, rbp, XFS_BB_ALL_BITS); + xfs_inobt_log_block(args.tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); + /* + * If there's a block to the new block's right, make that block + * point back to right instead of to left. + */ + if (INT_GET(right->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) { + xfs_inobt_block_t *rrblock; /* rr btree block */ + xfs_buf_t *rrbp; /* buffer for rrblock */ + + if (error = xfs_btree_read_bufs(args.mp, args.tp, args.agno, + INT_GET(right->bb_rightsib, ARCH_CONVERT), 0, &rrbp, + XFS_INO_BTREE_REF)) + return error; + rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp); + if (error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)) + return error; + INT_SET(rrblock->bb_leftsib, ARCH_CONVERT, args.agbno); + xfs_inobt_log_block(args.tp, rrbp, XFS_BB_LEFTSIB); + } + /* + * If the cursor is really in the right block, move it there. + * If it's just pointing past the last entry in left, then we'll + * insert there, so don't change anything in that case. + */ + if (cur->bc_ptrs[level] > INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1) { + xfs_btree_setbuf(cur, level, rbp); + cur->bc_ptrs[level] -= INT_GET(left->bb_numrecs, ARCH_CONVERT); + } + /* + * If there are more levels, we'll need another cursor which refers + * the right block, no matter where this cursor was. + */ + if (level + 1 < cur->bc_nlevels) { + if (error = xfs_btree_dup_cursor(cur, curp)) + return error; + (*curp)->bc_ptrs[level + 1]++; + } + *bnop = args.agbno; + *stat = 1; + return 0; +} + +/* + * Update keys at all levels from here to the root along the cursor's path. + */ +STATIC int /* error */ +xfs_inobt_updkey( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_inobt_key_t *keyp, /* new key value to update to */ + int level) /* starting level for update */ +{ + int ptr; /* index of key in block */ + + /* + * Go up the tree from this level toward the root. + * At each level, update the key value to the value input. + * Stop when we reach a level where the cursor isn't pointing + * at the first entry in the block. + */ + for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) { + xfs_buf_t *bp; /* buffer for block */ + xfs_inobt_block_t *block; /* btree block */ +#ifdef DEBUG + int error; /* error return value */ +#endif + xfs_inobt_key_t *kp; /* ptr to btree block keys */ + + bp = cur->bc_bufs[level]; + block = XFS_BUF_TO_INOBT_BLOCK(bp); +#ifdef DEBUG + if (error = xfs_btree_check_sblock(cur, block, level, bp)) + return error; +#endif + ptr = cur->bc_ptrs[level]; + kp = XFS_INOBT_KEY_ADDR(block, ptr, cur); + *kp = *keyp; + xfs_inobt_log_keys(cur, bp, ptr, ptr); + } + return 0; +} + +/* + * Externally visible routines. + */ + +/* + * Decrement cursor by one record at the level. + * For nonzero levels the leaf-ward information is untouched. + */ +int /* error */ +xfs_inobt_decrement( + xfs_btree_cur_t *cur, /* btree cursor */ + int level, /* level in btree, 0 is leaf */ + int *stat) /* success/failure */ +{ + xfs_inobt_block_t *block; /* btree block */ + int error; + int lev; /* btree level */ + + ASSERT(level < cur->bc_nlevels); + /* + * Read-ahead to the left at this level. + */ + xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA); + /* + * Decrement the ptr at this level. If we're still in the block + * then we're done. + */ + if (--cur->bc_ptrs[level] > 0) { + *stat = 1; + return 0; + } + /* + * Get a pointer to the btree block. + */ + block = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[level]); +#ifdef DEBUG + if (error = xfs_btree_check_sblock(cur, block, level, + cur->bc_bufs[level])) + return error; +#endif + /* + * If we just went off the left edge of the tree, return failure. + */ + if (INT_GET(block->bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK) { + *stat = 0; + return 0; + } + /* + * March up the tree decrementing pointers. + * Stop when we don't go off the left edge of a block. + */ + for (lev = level + 1; lev < cur->bc_nlevels; lev++) { + if (--cur->bc_ptrs[lev] > 0) + break; + /* + * Read-ahead the left block, we're going to read it + * in the next loop. + */ + xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA); + } + /* + * If we went off the root then we are seriously confused. + */ + ASSERT(lev < cur->bc_nlevels); + /* + * Now walk back down the tree, fixing up the cursor's buffer + * pointers and key numbers. + */ + for (block = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]); lev > level; ) { + xfs_agblock_t agbno; /* block number of btree block */ + xfs_buf_t *bp; /* buffer containing btree block */ + + agbno = INT_GET(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT); + if (error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, + cur->bc_private.i.agno, agbno, 0, &bp, + XFS_INO_BTREE_REF)) + return error; + lev--; + xfs_btree_setbuf(cur, lev, bp); + block = XFS_BUF_TO_INOBT_BLOCK(bp); + if (error = xfs_btree_check_sblock(cur, block, lev, bp)) + return error; + cur->bc_ptrs[lev] = INT_GET(block->bb_numrecs, ARCH_CONVERT); + } + *stat = 1; + return 0; +} + +/* + * Get the data from the pointed-to record. + */ +int /* error */ +xfs_inobt_get_rec( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_agino_t *ino, /* output: starting inode of chunk */ + __int32_t *fcnt, /* output: number of free inodes */ + xfs_inofree_t *free, /* output: free inode mask */ + int *stat, /* output: success/failure */ + xfs_arch_t arch) /* input: architecture */ +{ + xfs_inobt_block_t *block; /* btree block */ + xfs_buf_t *bp; /* buffer containing btree block */ +#ifdef DEBUG + int error; /* error return value */ +#endif + int ptr; /* record number */ + xfs_inobt_rec_t *rec; /* record data */ + + bp = cur->bc_bufs[0]; + ptr = cur->bc_ptrs[0]; + block = XFS_BUF_TO_INOBT_BLOCK(bp); +#ifdef DEBUG + if (error = xfs_btree_check_sblock(cur, block, 0, bp)) + return error; +#endif + /* + * Off the right end or left end, return failure. + */ + if (ptr > INT_GET(block->bb_numrecs, ARCH_CONVERT) || ptr <= 0) { + *stat = 0; + return 0; + } + /* + * Point to the record and extract its data. + */ + rec = XFS_INOBT_REC_ADDR(block, ptr, cur); + ASSERT(arch == ARCH_NOCONVERT || arch == ARCH_CONVERT); + if (arch == ARCH_NOCONVERT) { + *ino = INT_GET(rec->ir_startino, ARCH_CONVERT); + *fcnt = INT_GET(rec->ir_freecount, ARCH_CONVERT); + *free = INT_GET(rec->ir_free, ARCH_CONVERT); + } else { + INT_COPY(*ino, rec->ir_startino, ARCH_CONVERT); + INT_COPY(*fcnt, rec->ir_freecount, ARCH_CONVERT); + INT_COPY(*free, rec->ir_free, ARCH_CONVERT); + } + *stat = 1; + return 0; +} + +/* + * Increment cursor by one record at the level. + * For nonzero levels the leaf-ward information is untouched. + */ +int /* error */ +xfs_inobt_increment( + xfs_btree_cur_t *cur, /* btree cursor */ + int level, /* level in btree, 0 is leaf */ + int *stat) /* success/failure */ +{ + xfs_inobt_block_t *block; /* btree block */ + xfs_buf_t *bp; /* buffer containing btree block */ + int error; /* error return value */ + int lev; /* btree level */ + + ASSERT(level < cur->bc_nlevels); + /* + * Read-ahead to the right at this level. + */ + xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); + /* + * Get a pointer to the btree block. + */ + bp = cur->bc_bufs[level]; + block = XFS_BUF_TO_INOBT_BLOCK(bp); +#ifdef DEBUG + if (error = xfs_btree_check_sblock(cur, block, level, bp)) + return error; +#endif + /* + * Increment the ptr at this level. If we're still in the block + * then we're done. + */ + if (++cur->bc_ptrs[level] <= INT_GET(block->bb_numrecs, ARCH_CONVERT)) { + *stat = 1; + return 0; + } + /* + * If we just went off the right edge of the tree, return failure. + */ + if (INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK) { + *stat = 0; + return 0; + } + /* + * March up the tree incrementing pointers. + * Stop when we don't go off the right edge of a block. + */ + for (lev = level + 1; lev < cur->bc_nlevels; lev++) { + bp = cur->bc_bufs[lev]; + block = XFS_BUF_TO_INOBT_BLOCK(bp); +#ifdef DEBUG + if (error = xfs_btree_check_sblock(cur, block, lev, bp)) + return error; +#endif + if (++cur->bc_ptrs[lev] <= INT_GET(block->bb_numrecs, ARCH_CONVERT)) + break; + /* + * Read-ahead the right block, we're going to read it + * in the next loop. + */ + xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA); + } + /* + * If we went off the root then we are seriously confused. + */ + ASSERT(lev < cur->bc_nlevels); + /* + * Now walk back down the tree, fixing up the cursor's buffer + * pointers and key numbers. + */ + for (bp = cur->bc_bufs[lev], block = XFS_BUF_TO_INOBT_BLOCK(bp); + lev > level; ) { + xfs_agblock_t agbno; /* block number of btree block */ + + agbno = INT_GET(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT); + if (error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, + cur->bc_private.i.agno, agbno, 0, &bp, + XFS_INO_BTREE_REF)) + return error; + lev--; + xfs_btree_setbuf(cur, lev, bp); + block = XFS_BUF_TO_INOBT_BLOCK(bp); + if (error = xfs_btree_check_sblock(cur, block, lev, bp)) + return error; + cur->bc_ptrs[lev] = 1; + } + *stat = 1; + return 0; +} + +/* + * Insert the current record at the point referenced by cur. + * The cursor may be inconsistent on return if splits have been done. + */ +int /* error */ +xfs_inobt_insert( + xfs_btree_cur_t *cur, /* btree cursor */ + int *stat) /* success/failure */ +{ + int error; /* error return value */ + int i; /* result value, 0 for failure */ + int level; /* current level number in btree */ + xfs_agblock_t nbno; /* new block number (split result) */ + xfs_btree_cur_t *ncur; /* new cursor (split result) */ + xfs_inobt_rec_t nrec; /* record being inserted this level */ + xfs_btree_cur_t *pcur; /* previous level's cursor */ + + level = 0; + nbno = NULLAGBLOCK; + INT_SET(nrec.ir_startino, ARCH_CONVERT, cur->bc_rec.i.ir_startino); + INT_SET(nrec.ir_freecount, ARCH_CONVERT, cur->bc_rec.i.ir_freecount); + INT_SET(nrec.ir_free, ARCH_CONVERT, cur->bc_rec.i.ir_free); + ncur = (xfs_btree_cur_t *)0; + pcur = cur; + /* + * Loop going up the tree, starting at the leaf level. + * Stop when we don't get a split block, that must mean that + * the insert is finished with this level. + */ + do { + /* + * Insert nrec/nbno into this level of the tree. + * Note if we fail, nbno will be null. + */ + if (error = xfs_inobt_insrec(pcur, level++, &nbno, &nrec, &ncur, + &i)) { + if (pcur != cur) + xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); + return error; + } + /* + * See if the cursor we just used is trash. + * Can't trash the caller's cursor, but otherwise we should + * if ncur is a new cursor or we're about to be done. + */ + if (pcur != cur && (ncur || nbno == NULLAGBLOCK)) { + cur->bc_nlevels = pcur->bc_nlevels; + xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR); + } + /* + * If we got a new cursor, switch to it. + */ + if (ncur) { + pcur = ncur; + ncur = (xfs_btree_cur_t *)0; + } + } while (nbno != NULLAGBLOCK); + *stat = i; + return 0; +} + +/* + * Lookup the record equal to ino in the btree given by cur. + */ +int /* error */ +xfs_inobt_lookup_eq( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_agino_t ino, /* starting inode of chunk */ + __int32_t fcnt, /* free inode count */ + xfs_inofree_t free, /* free inode mask */ + int *stat) /* success/failure */ +{ + cur->bc_rec.i.ir_startino = ino; + cur->bc_rec.i.ir_freecount = fcnt; + cur->bc_rec.i.ir_free = free; + return xfs_inobt_lookup(cur, XFS_LOOKUP_EQ, stat); +} + +/* + * Lookup the first record greater than or equal to ino + * in the btree given by cur. + */ +int /* error */ +xfs_inobt_lookup_ge( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_agino_t ino, /* starting inode of chunk */ + __int32_t fcnt, /* free inode count */ + xfs_inofree_t free, /* free inode mask */ + int *stat) /* success/failure */ +{ + cur->bc_rec.i.ir_startino = ino; + cur->bc_rec.i.ir_freecount = fcnt; + cur->bc_rec.i.ir_free = free; + return xfs_inobt_lookup(cur, XFS_LOOKUP_GE, stat); +} + +/* + * Lookup the first record less than or equal to ino + * in the btree given by cur. + */ +int /* error */ +xfs_inobt_lookup_le( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_agino_t ino, /* starting inode of chunk */ + __int32_t fcnt, /* free inode count */ + xfs_inofree_t free, /* free inode mask */ + int *stat) /* success/failure */ +{ + cur->bc_rec.i.ir_startino = ino; + cur->bc_rec.i.ir_freecount = fcnt; + cur->bc_rec.i.ir_free = free; + return xfs_inobt_lookup(cur, XFS_LOOKUP_LE, stat); +} + +/* + * Update the record referred to by cur, to the value given + * by [ino, fcnt, free]. + * This either works (return 0) or gets an EFSCORRUPTED error. + */ +int /* error */ +xfs_inobt_update( + xfs_btree_cur_t *cur, /* btree cursor */ + xfs_agino_t ino, /* starting inode of chunk */ + __int32_t fcnt, /* free inode count */ + xfs_inofree_t free) /* free inode mask */ +{ + xfs_inobt_block_t *block; /* btree block to update */ + xfs_buf_t *bp; /* buffer containing btree block */ + int error; /* error return value */ + int ptr; /* current record number (updating) */ + xfs_inobt_rec_t *rp; /* pointer to updated record */ + + /* + * Pick up the current block. + */ + bp = cur->bc_bufs[0]; + block = XFS_BUF_TO_INOBT_BLOCK(bp); +#ifdef DEBUG + if (error = xfs_btree_check_sblock(cur, block, 0, bp)) + return error; +#endif + /* + * Get the address of the rec to be updated. + */ + ptr = cur->bc_ptrs[0]; + rp = XFS_INOBT_REC_ADDR(block, ptr, cur); + /* + * Fill in the new contents and log them. + */ + INT_SET(rp->ir_startino, ARCH_CONVERT, ino); + INT_SET(rp->ir_freecount, ARCH_CONVERT, fcnt); + INT_SET(rp->ir_free, ARCH_CONVERT, free); + xfs_inobt_log_recs(cur, bp, ptr, ptr); + /* + * Updating first record in leaf. Pass new key value up to our parent. + */ + if (ptr == 1) { + xfs_inobt_key_t key; /* key containing [ino] */ + + INT_SET(key.ir_startino, ARCH_CONVERT, ino); + if (error = xfs_inobt_updkey(cur, &key, 1)) + return error; + } + return 0; +} diff --git a/libxfs/xfs_inode.c b/libxfs/xfs_inode.c new file mode 100644 index 000000000..36bf1bd9e --- /dev/null +++ b/libxfs/xfs_inode.c @@ -0,0 +1,1371 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include + +xfs_zone_t *xfs_ifork_zone; +xfs_zone_t *xfs_inode_zone; + +#ifdef DEBUG +void +xfs_inobp_check( + xfs_mount_t *mp, + xfs_buf_t *bp) +{ + int i; + int j; + xfs_dinode_t *dip; + + j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; + + for (i = 0; i < j; i++) { + dip = (xfs_dinode_t *)xfs_buf_offset(bp, + i * mp->m_sb.sb_inodesize); + if (INT_ISZERO(dip->di_next_unlinked, ARCH_CONVERT)) { + xfs_fs_cmn_err(CE_ALERT, mp, + "Detected a bogus zero next_unlinked field in incore inode buffer 0x%p. About to pop an ASSERT.", + bp); + ASSERT(!INT_ISZERO(dip->di_next_unlinked, ARCH_CONVERT)); + } + } +} +#endif + + +/* + * This routine is called to map an inode to the buffer containing + * the on-disk version of the inode. It returns a pointer to the + * buffer containing the on-disk inode in the bpp parameter, and in + * the dip parameter it returns a pointer to the on-disk inode within + * that buffer. + * + * If a non-zero error is returned, then the contents of bpp and + * dipp are undefined. + * + * If the inode is new and has not yet been initialized, use xfs_imap() + * to determine the size and location of the buffer to read from disk. + * If the inode has already been mapped to its buffer and read in once, + * then use the mapping information stored in the inode rather than + * calling xfs_imap(). This allows us to avoid the overhead of looking + * at the inode btree for small block file systems (see xfs_dilocate()). + * We can tell whether the inode has been mapped in before by comparing + * its disk block address to 0. Only uninitialized inodes will have + * 0 for the disk block address. + */ +int +xfs_itobp( + xfs_mount_t *mp, + xfs_trans_t *tp, + xfs_inode_t *ip, + xfs_dinode_t **dipp, + xfs_buf_t **bpp, + xfs_daddr_t bno) +{ + xfs_buf_t *bp; + int error; + xfs_imap_t imap; +#ifdef __KERNEL__ + int i; + int ni; +#endif + + if (ip->i_blkno == (xfs_daddr_t)0) { + /* + * Call the space management code to find the location of the + * inode on disk. + */ + imap.im_blkno = bno; + error = xfs_imap(mp, tp, ip->i_ino, &imap, XFS_IMAP_LOOKUP); + if (error != 0) { + return error; + } + + /* + * If the inode number maps to a block outside the bounds + * of the file system then return NULL rather than calling + * read_buf and panicing when we get an error from the + * driver. + */ + if ((imap.im_blkno + imap.im_len) > + XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { + return XFS_ERROR(EINVAL); + } + + /* + * Fill in the fields in the inode that will be used to + * map the inode to its buffer from now on. + */ + ip->i_blkno = imap.im_blkno; + ip->i_len = imap.im_len; + ip->i_boffset = imap.im_boffset; + } else { + /* + * We've already mapped the inode once, so just use the + * mapping that we saved the first time. + */ + imap.im_blkno = ip->i_blkno; + imap.im_len = ip->i_len; + imap.im_boffset = ip->i_boffset; + } + ASSERT(bno == 0 || bno == imap.im_blkno); + + /* + * Read in the buffer. If tp is NULL, xfs_trans_read_buf() will + * default to just a read_buf() call. + */ + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno, + (int)imap.im_len, XFS_BUF_LOCK, &bp); + + if (error) { + return error; + } +#ifdef __KERNEL__ + /* + * Validate the magic number and version of every inode in the buffer + * (if DEBUG kernel) or the first inode in the buffer, otherwise. + */ +#ifdef DEBUG + ni = BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog; +#else + ni = 1; +#endif + for (i = 0; i < ni; i++) { + int di_ok; + xfs_dinode_t *dip; + + dip = (xfs_dinode_t *)xfs_buf_offset(bp, + (i << mp->m_sb.sb_inodelog)); + di_ok = INT_GET(dip->di_core.di_magic, ARCH_CONVERT) == XFS_DINODE_MAGIC && + XFS_DINODE_GOOD_VERSION(INT_GET(dip->di_core.di_version, ARCH_CONVERT)); + if (XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP, + XFS_RANDOM_ITOBP_INOTOBP)) { +#ifdef DEBUG + prdev("bad inode magic/vsn daddr 0x%Lx #%d (magic=%x)", + mp->m_dev, imap.im_blkno, i, + INT_GET(dip->di_core.di_magic, ARCH_CONVERT)); +#endif + xfs_trans_brelse(tp, bp); + return XFS_ERROR(EFSCORRUPTED); + } + } +#endif /* __KERNEL__ */ + + xfs_inobp_check(mp, bp); + + /* + * Mark the buffer as an inode buffer now that it looks good + */ + XFS_BUF_SET_VTYPE(bp, B_FS_INO); + + /* + * Set *dipp to point to the on-disk inode in the buffer. + */ + *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); + *bpp = bp; + return 0; +} + +/* + * Move inode type and inode format specific information from the + * on-disk inode to the in-core inode. For fifos, devs, and sockets + * this means set if_rdev to the proper value. For files, directories, + * and symlinks this means to bring in the in-line data or extent + * pointers. For a file in B-tree format, only the root is immediately + * brought in-core. The rest will be in-lined in if_extents when it + * is first referenced (see xfs_iread_extents()). + */ +STATIC int +xfs_iformat( + xfs_inode_t *ip, + xfs_dinode_t *dip) +{ + xfs_attr_shortform_t *atp; + int size; + int error; + xfs_fsize_t di_size; + ip->i_df.if_ext_max = + XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); + error = 0; + + if (INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) + + INT_GET(dip->di_core.di_anextents, ARCH_CONVERT) > + INT_GET(dip->di_core.di_nblocks, ARCH_CONVERT)) { + xfs_fs_cmn_err(CE_WARN, ip->i_mount, + "corrupt dinode %Lu, extent total = %d, nblocks = %Ld. Unmount and run xfs_repair.", + ip->i_ino, + (int)(INT_GET(dip->di_core.di_nextents, ARCH_CONVERT) + INT_GET(dip->di_core.di_anextents, ARCH_CONVERT)), + INT_GET(dip->di_core.di_nblocks, ARCH_CONVERT)); + return XFS_ERROR(EFSCORRUPTED); + } + + if (INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT) > ip->i_mount->m_sb.sb_inodesize) { + xfs_fs_cmn_err(CE_WARN, ip->i_mount, + "corrupt dinode %Lu, forkoff = 0x%x. Unmount and run xfs_repair.", + ip->i_ino, (int)(INT_GET(dip->di_core.di_forkoff, ARCH_CONVERT))); + return XFS_ERROR(EFSCORRUPTED); + } + + switch (ip->i_d.di_mode & IFMT) { + case IFIFO: + case IFCHR: + case IFBLK: + case IFSOCK: + if (INT_GET(dip->di_core.di_format, ARCH_CONVERT) != XFS_DINODE_FMT_DEV) + return XFS_ERROR(EFSCORRUPTED); + ip->i_d.di_size = 0; + ip->i_df.if_u2.if_rdev = INT_GET(dip->di_u.di_dev, ARCH_CONVERT); + break; + + case IFREG: + case IFLNK: + case IFDIR: + switch (INT_GET(dip->di_core.di_format, ARCH_CONVERT)) { + case XFS_DINODE_FMT_LOCAL: + /* + * no local regular files yet + */ + if ((INT_GET(dip->di_core.di_mode, ARCH_CONVERT) & IFMT) == IFREG) { + xfs_fs_cmn_err(CE_WARN, ip->i_mount, + "corrupt inode (local format for regular file) %Lu. Unmount and run xfs_repair.", + ip->i_ino); + return XFS_ERROR(EFSCORRUPTED); + } + + di_size=INT_GET(dip->di_core.di_size, ARCH_CONVERT); + if (di_size > + XFS_DFORK_DSIZE_ARCH(dip, ip->i_mount, ARCH_CONVERT)) { + xfs_fs_cmn_err(CE_WARN, ip->i_mount, + "corrupt inode %Lu (bad size %Ld for local inode). Unmount and run xfs_repair.", + ip->i_ino, di_size); + return XFS_ERROR(EFSCORRUPTED); + } + + size = (int)di_size; + error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size); + break; + case XFS_DINODE_FMT_EXTENTS: + error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK); + break; + case XFS_DINODE_FMT_BTREE: + error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK); + break; + default: + return XFS_ERROR(EFSCORRUPTED); + } + break; + + default: + return XFS_ERROR(EFSCORRUPTED); + } + if (error) { + return error; + } + if (!XFS_DFORK_Q_ARCH(dip, ARCH_CONVERT)) + return 0; + ASSERT(ip->i_afp == NULL); + ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); + ip->i_afp->if_ext_max = + XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); + switch (INT_GET(dip->di_core.di_aformat, ARCH_CONVERT)) { + case XFS_DINODE_FMT_LOCAL: + atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR_ARCH(dip, ARCH_CONVERT); + size = (int)INT_GET(atp->hdr.totsize, ARCH_CONVERT); + error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size); + break; + case XFS_DINODE_FMT_EXTENTS: + error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK); + break; + case XFS_DINODE_FMT_BTREE: + error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK); + break; + default: + error = XFS_ERROR(EFSCORRUPTED); + break; + } + if (error) { + kmem_zone_free(xfs_ifork_zone, ip->i_afp); + ip->i_afp = NULL; + xfs_idestroy_fork(ip, XFS_DATA_FORK); + } + return error; +} + +/* + * The file is in-lined in the on-disk inode. + * If it fits into if_inline_data, then copy + * it there, otherwise allocate a buffer for it + * and copy the data there. Either way, set + * if_data to point at the data. + * If we allocate a buffer for the data, make + * sure that its size is a multiple of 4 and + * record the real size in i_real_bytes. + */ +STATIC int +xfs_iformat_local( + xfs_inode_t *ip, + xfs_dinode_t *dip, + int whichfork, + int size) +{ + xfs_ifork_t *ifp; + int real_size; + + /* + * If the size is unreasonable, then something + * is wrong and we just bail out rather than crash in + * kmem_alloc() or bcopy() below. + */ + if (size > XFS_DFORK_SIZE_ARCH(dip, ip->i_mount, whichfork, ARCH_CONVERT)) { + xfs_fs_cmn_err(CE_WARN, ip->i_mount, + "corrupt inode %Lu (bad size %d for local fork, size = %d). Unmount and run xfs_repair.", + ip->i_ino, size, + XFS_DFORK_SIZE_ARCH(dip, ip->i_mount, whichfork, ARCH_CONVERT)); + return XFS_ERROR(EFSCORRUPTED); + } + ifp = XFS_IFORK_PTR(ip, whichfork); + real_size = 0; + if (size == 0) + ifp->if_u1.if_data = NULL; + else if (size <= sizeof(ifp->if_u2.if_inline_data)) + ifp->if_u1.if_data = ifp->if_u2.if_inline_data; + else { + real_size = roundup(size, 4); + ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); + } + ifp->if_bytes = size; + ifp->if_real_bytes = real_size; + if (size) + bcopy(XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT), ifp->if_u1.if_data, size); + ifp->if_flags &= ~XFS_IFEXTENTS; + ifp->if_flags |= XFS_IFINLINE; + return 0; +} + +/* + * The file consists of a set of extents all + * of which fit into the on-disk inode. + * If there are few enough extents to fit into + * the if_inline_ext, then copy them there. + * Otherwise allocate a buffer for them and copy + * them into it. Either way, set if_extents + * to point at the extents. + */ +STATIC int +xfs_iformat_extents( + xfs_inode_t *ip, + xfs_dinode_t *dip, + int whichfork) +{ + xfs_ifork_t *ifp; + int nex; + int real_size; + int size; + + ifp = XFS_IFORK_PTR(ip, whichfork); + nex = XFS_DFORK_NEXTENTS_ARCH(dip, whichfork, ARCH_CONVERT); + size = nex * (uint)sizeof(xfs_bmbt_rec_t); + + /* + * If the number of extents is unreasonable, then something + * is wrong and we just bail out rather than crash in + * kmem_alloc() or bcopy() below. + */ + if (size < 0 || size > XFS_DFORK_SIZE_ARCH(dip, ip->i_mount, whichfork, ARCH_CONVERT)) { + xfs_fs_cmn_err(CE_WARN, ip->i_mount, + "corrupt inode %Lu ((a)extents = %d). Unmount and run xfs_repair.", + ip->i_ino, nex); + return XFS_ERROR(EFSCORRUPTED); + } + + real_size = 0; + if (nex == 0) + ifp->if_u1.if_extents = NULL; + else if (nex <= XFS_INLINE_EXTS) + ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; + else { + ifp->if_u1.if_extents = kmem_alloc(size, KM_SLEEP); + ASSERT(ifp->if_u1.if_extents != NULL); + real_size = size; + } + ifp->if_bytes = size; + ifp->if_real_bytes = real_size; + if (size) { + xfs_validate_extents( + (xfs_bmbt_rec_32_t *)XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT), + nex, XFS_EXTFMT_INODE(ip)); + bcopy(XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT), ifp->if_u1.if_extents, + size); + xfs_bmap_trace_exlist("xfs_iformat_extents", ip, nex, + whichfork); + if (whichfork != XFS_DATA_FORK || + XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE) + if (xfs_check_nostate_extents( + ifp->if_u1.if_extents, nex)) + return XFS_ERROR(EFSCORRUPTED); + } + ifp->if_flags |= XFS_IFEXTENTS; + return 0; +} + +/* + * The file has too many extents to fit into + * the inode, so they are in B-tree format. + * Allocate a buffer for the root of the B-tree + * and copy the root into it. The i_extents + * field will remain NULL until all of the + * extents are read in (when they are needed). + */ +STATIC int +xfs_iformat_btree( + xfs_inode_t *ip, + xfs_dinode_t *dip, + int whichfork) +{ + xfs_bmdr_block_t *dfp; + xfs_ifork_t *ifp; + /* REFERENCED */ + int nrecs; + int size; + + ifp = XFS_IFORK_PTR(ip, whichfork); + dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT); + size = XFS_BMAP_BROOT_SPACE(dfp); + nrecs = XFS_BMAP_BROOT_NUMRECS(dfp); + + /* + * blow out if -- fork has less extents than can fit in + * fork (fork shouldn't be a btree format), root btree + * block has more records than can fit into the fork, + * or the number of extents is greater than the number of + * blocks. + */ + if (XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max + || XFS_BMDR_SPACE_CALC(nrecs) > + XFS_DFORK_SIZE_ARCH(dip, ip->i_mount, whichfork, ARCH_CONVERT) + || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks) { + xfs_fs_cmn_err(CE_WARN, ip->i_mount, + "corrupt inode %Lu (btree). Unmount and run xfs_repair.", + ip->i_ino); + return XFS_ERROR(EFSCORRUPTED); + } + + ifp->if_broot_bytes = size; + ifp->if_broot = kmem_alloc(size, KM_SLEEP); + ASSERT(ifp->if_broot != NULL); + /* + * Copy and convert from the on-disk structure + * to the in-memory structure. + */ + xfs_bmdr_to_bmbt(dfp, XFS_DFORK_SIZE_ARCH(dip, ip->i_mount, whichfork, ARCH_CONVERT), + ifp->if_broot, size); + ifp->if_flags &= ~XFS_IFEXTENTS; + ifp->if_flags |= XFS_IFBROOT; + + return 0; +} + +/* + * xfs_xlate_dinode_core - translate an xfs_inode_core_t between ondisk + * and native format + * + * buf = on-disk representation + * dip = native representation + * dir = direction - +ve -> disk to native + * -ve -> native to disk + * arch = on-disk architecture + */ + +void +xfs_xlate_dinode_core(xfs_caddr_t buf, xfs_dinode_core_t *dip, + int dir, xfs_arch_t arch) +{ + xfs_dinode_core_t *buf_core; + xfs_dinode_core_t *mem_core; + + ASSERT(dir); + + buf_core=(xfs_dinode_core_t*)buf; + mem_core=(xfs_dinode_core_t*)dip; + + if (arch == ARCH_NOCONVERT) { + if (dir>0) { + bcopy((xfs_caddr_t)buf_core, (xfs_caddr_t)mem_core, sizeof(xfs_dinode_core_t)); + } else { + bcopy((xfs_caddr_t)mem_core, (xfs_caddr_t)buf_core, sizeof(xfs_dinode_core_t)); + } + return; + } + + INT_XLATE(buf_core->di_magic, mem_core->di_magic, dir, arch); + INT_XLATE(buf_core->di_mode, mem_core->di_mode, dir, arch); + INT_XLATE(buf_core->di_version, mem_core->di_version, dir, arch); + INT_XLATE(buf_core->di_format, mem_core->di_format, dir, arch); + INT_XLATE(buf_core->di_onlink, mem_core->di_onlink, dir, arch); + INT_XLATE(buf_core->di_uid, mem_core->di_uid, dir, arch); + INT_XLATE(buf_core->di_gid, mem_core->di_gid, dir, arch); + INT_XLATE(buf_core->di_nlink, mem_core->di_nlink, dir, arch); + INT_XLATE(buf_core->di_projid, mem_core->di_projid, dir, arch); + + if (dir>0) { + bcopy(buf_core->di_pad, mem_core->di_pad, sizeof(buf_core->di_pad)); + } else { + bcopy(mem_core->di_pad, buf_core->di_pad, sizeof(buf_core->di_pad)); + } + + INT_XLATE(buf_core->di_atime.t_sec, mem_core->di_atime.t_sec, dir, arch); + INT_XLATE(buf_core->di_atime.t_nsec,mem_core->di_atime.t_nsec, dir, arch); + + INT_XLATE(buf_core->di_mtime.t_sec, mem_core->di_mtime.t_sec, dir, arch); + INT_XLATE(buf_core->di_mtime.t_nsec,mem_core->di_mtime.t_nsec, dir, arch); + + INT_XLATE(buf_core->di_ctime.t_sec, mem_core->di_ctime.t_sec, dir, arch); + INT_XLATE(buf_core->di_ctime.t_nsec,mem_core->di_ctime.t_nsec, dir, arch); + + INT_XLATE(buf_core->di_size, mem_core->di_size, dir, arch); + INT_XLATE(buf_core->di_nblocks, mem_core->di_nblocks, dir, arch); + INT_XLATE(buf_core->di_extsize, mem_core->di_extsize, dir, arch); + + INT_XLATE(buf_core->di_nextents, mem_core->di_nextents, dir, arch); + INT_XLATE(buf_core->di_anextents, mem_core->di_anextents, dir, arch); + INT_XLATE(buf_core->di_forkoff, mem_core->di_forkoff, dir, arch); + INT_XLATE(buf_core->di_aformat, mem_core->di_aformat, dir, arch); + INT_XLATE(buf_core->di_dmevmask, mem_core->di_dmevmask, dir, arch); + INT_XLATE(buf_core->di_dmstate, mem_core->di_dmstate, dir, arch); + INT_XLATE(buf_core->di_flags, mem_core->di_flags, dir, arch); + INT_XLATE(buf_core->di_gen, mem_core->di_gen, dir, arch); + +} + +/* + * Given a mount structure and an inode number, return a pointer + * to a newly allocated in-core inode coresponding to the given + * inode number. + * + * Initialize the inode's attributes and extent pointers if it + * already has them (it will not if the inode has no links). + */ +int +xfs_iread( + xfs_mount_t *mp, + xfs_trans_t *tp, + xfs_ino_t ino, + xfs_inode_t **ipp, + xfs_daddr_t bno) +{ + xfs_buf_t *bp; + xfs_dinode_t *dip; + xfs_inode_t *ip; + int error; + + ASSERT(xfs_inode_zone != NULL); + + ip = kmem_zone_zalloc(xfs_inode_zone, KM_SLEEP); + ip->i_ino = ino; + ip->i_dev = mp->m_dev; + ip->i_mount = mp; + + /* + * Get pointer's to the on-disk inode and the buffer containing it. + * If the inode number refers to a block outside the file system + * then xfs_itobp() will return NULL. In this case we should + * return NULL as well. Set i_blkno to 0 so that xfs_itobp() will + * know that this is a new incore inode. + */ + error = xfs_itobp(mp, tp, ip, &dip, &bp, bno); + + if (error != 0) { + kmem_zone_free(xfs_inode_zone, ip); + return error; + } + + /* + * Initialize inode's trace buffers. + * Do this before xfs_iformat in case it adds entries. + */ +#ifdef XFS_BMAP_TRACE + ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_SLEEP); +#endif +#ifdef XFS_BMBT_TRACE + ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_SLEEP); +#endif +#ifdef XFS_RW_TRACE + ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_SLEEP); +#endif +#ifdef XFS_STRAT_TRACE + ip->i_strat_trace = ktrace_alloc(XFS_STRAT_KTRACE_SIZE, KM_SLEEP); +#endif +#ifdef XFS_ILOCK_TRACE + ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_SLEEP); +#endif +#ifdef XFS_DIR2_TRACE + ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_SLEEP); +#endif + + /* + * If we got something that isn't an inode it means someone + * (nfs or dmi) has a stale handle. + */ + if (INT_GET(dip->di_core.di_magic, ARCH_CONVERT) != XFS_DINODE_MAGIC) { + kmem_zone_free(xfs_inode_zone, ip); + xfs_trans_brelse(tp, bp); + return XFS_ERROR(EINVAL); + } + + /* + * If the on-disk inode is already linked to a directory + * entry, copy all of the inode into the in-core inode. + * xfs_iformat() handles copying in the inode format + * specific information. + * Otherwise, just get the truly permanent information. + */ + if (!INT_ISZERO(dip->di_core.di_mode, ARCH_CONVERT)) { + xfs_xlate_dinode_core((xfs_caddr_t)&dip->di_core, + &(ip->i_d), 1, ARCH_CONVERT); + error = xfs_iformat(ip, dip); + if (error) { + kmem_zone_free(xfs_inode_zone, ip); + xfs_trans_brelse(tp, bp); + return error; + } + } else { + ip->i_d.di_magic = INT_GET(dip->di_core.di_magic, ARCH_CONVERT); + ip->i_d.di_version = INT_GET(dip->di_core.di_version, ARCH_CONVERT); + ip->i_d.di_gen = INT_GET(dip->di_core.di_gen, ARCH_CONVERT); + /* + * Make sure to pull in the mode here as well in + * case the inode is released without being used. + * This ensures that xfs_inactive() will see that + * the inode is already free and not try to mess + * with the uninitialized part of it. + */ + ip->i_d.di_mode = 0; + /* + * Initialize the per-fork minima and maxima for a new + * inode here. xfs_iformat will do it for old inodes. + */ + ip->i_df.if_ext_max = + XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); + } + + /* + * The inode format changed when we moved the link count and + * made it 32 bits long. If this is an old format inode, + * convert it in memory to look like a new one. If it gets + * flushed to disk we will convert back before flushing or + * logging it. We zero out the new projid field and the old link + * count field. We'll handle clearing the pad field (the remains + * of the old uuid field) when we actually convert the inode to + * the new format. We don't change the version number so that we + * can distinguish this from a real new format inode. + */ + if (ip->i_d.di_version == XFS_DINODE_VERSION_1) { + ip->i_d.di_nlink = ip->i_d.di_onlink; + ip->i_d.di_onlink = 0; + ip->i_d.di_projid = 0; + } + + ip->i_delayed_blks = 0; + + /* + * Mark the buffer containing the inode as something to keep + * around for a while. This helps to keep recently accessed + * meta-data in-core longer. + */ + XFS_BUF_SET_REF(bp, XFS_INO_REF); + + /* + * Use xfs_trans_brelse() to release the buffer containing the + * on-disk inode, because it was acquired with xfs_trans_read_buf() + * in xfs_itobp() above. If tp is NULL, this is just a normal + * brelse(). If we're within a transaction, then xfs_trans_brelse() + * will only release the buffer if it is not dirty within the + * transaction. It will be OK to release the buffer in this case, + * because inodes on disk are never destroyed and we will be + * locking the new in-core inode before putting it in the hash + * table where other processes can find it. Thus we don't have + * to worry about the inode being changed just because we released + * the buffer. + */ + xfs_trans_brelse(tp, bp); + *ipp = ip; + return 0; +} + +/* + * Read in extents from a btree-format inode. + * Allocate and fill in if_extents. Real work is done in xfs_bmap.c. + */ +int +xfs_iread_extents( + xfs_trans_t *tp, + xfs_inode_t *ip, + int whichfork) +{ + int error; + xfs_ifork_t *ifp; + size_t size; + + if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) + return XFS_ERROR(EFSCORRUPTED); + size = XFS_IFORK_NEXTENTS(ip, whichfork) * (uint)sizeof(xfs_bmbt_rec_t); + ifp = XFS_IFORK_PTR(ip, whichfork); + /* + * We know that the size is legal (it's checked in iformat_btree) + */ + ifp->if_u1.if_extents = kmem_alloc(size, KM_SLEEP); + ASSERT(ifp->if_u1.if_extents != NULL); + ifp->if_lastex = NULLEXTNUM; + ifp->if_bytes = ifp->if_real_bytes = (int)size; + ifp->if_flags |= XFS_IFEXTENTS; + error = xfs_bmap_read_extents(tp, ip, whichfork); + if (error) { + kmem_free(ifp->if_u1.if_extents, size); + ifp->if_u1.if_extents = NULL; + ifp->if_bytes = ifp->if_real_bytes = 0; + ifp->if_flags &= ~XFS_IFEXTENTS; + return error; + } + xfs_validate_extents((xfs_bmbt_rec_32_t *)ifp->if_u1.if_extents, + XFS_IFORK_NEXTENTS(ip, whichfork), XFS_EXTFMT_INODE(ip)); + return 0; +} + +/* + * Reallocate the space for if_broot based on the number of records + * being added or deleted as indicated in rec_diff. Move the records + * and pointers in if_broot to fit the new size. When shrinking this + * will eliminate holes between the records and pointers created by + * the caller. When growing this will create holes to be filled in + * by the caller. + * + * The caller must not request to add more records than would fit in + * the on-disk inode root. If the if_broot is currently NULL, then + * if we adding records one will be allocated. The caller must also + * not request that the number of records go below zero, although + * it can go to zero. + * + * ip -- the inode whose if_broot area is changing + * ext_diff -- the change in the number of records, positive or negative, + * requested for the if_broot array. + */ +void +xfs_iroot_realloc( + xfs_inode_t *ip, + int rec_diff, + int whichfork) +{ + int cur_max; + xfs_ifork_t *ifp; + xfs_bmbt_block_t *new_broot; + int new_max; + size_t new_size; + char *np; + char *op; + + /* + * Handle the degenerate case quietly. + */ + if (rec_diff == 0) { + return; + } + + ifp = XFS_IFORK_PTR(ip, whichfork); + if (rec_diff > 0) { + /* + * If there wasn't any memory allocated before, just + * allocate it now and get out. + */ + if (ifp->if_broot_bytes == 0) { + new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff); + ifp->if_broot = (xfs_bmbt_block_t*)kmem_alloc(new_size, + KM_SLEEP); + ifp->if_broot_bytes = (int)new_size; + return; + } + + /* + * If there is already an existing if_broot, then we need + * to realloc() it and shift the pointers to their new + * location. The records don't change location because + * they are kept butted up against the btree block header. + */ + cur_max = XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes); + new_max = cur_max + rec_diff; + new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); + ifp->if_broot = (xfs_bmbt_block_t *) + kmem_realloc(ifp->if_broot, + new_size, + (size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */ + KM_SLEEP); + op = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1, + ifp->if_broot_bytes); + np = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1, + (int)new_size); + ifp->if_broot_bytes = (int)new_size; + ASSERT(ifp->if_broot_bytes <= + XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ); + ovbcopy(op, np, cur_max * (uint)sizeof(xfs_dfsbno_t)); + return; + } + + /* + * rec_diff is less than 0. In this case, we are shrinking the + * if_broot buffer. It must already exist. If we go to zero + * records, just get rid of the root and clear the status bit. + */ + ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0)); + cur_max = XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes); + new_max = cur_max + rec_diff; + ASSERT(new_max >= 0); + if (new_max > 0) + new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); + else + new_size = 0; + if (new_size > 0) { + new_broot = (xfs_bmbt_block_t *)kmem_alloc(new_size, KM_SLEEP); + /* + * First copy over the btree block header. + */ + bcopy(ifp->if_broot, new_broot, sizeof(xfs_bmbt_block_t)); + } else { + new_broot = NULL; + ifp->if_flags &= ~XFS_IFBROOT; + } + + /* + * Only copy the records and pointers if there are any. + */ + if (new_max > 0) { + /* + * First copy the records. + */ + op = (char *)XFS_BMAP_BROOT_REC_ADDR(ifp->if_broot, 1, + ifp->if_broot_bytes); + np = (char *)XFS_BMAP_BROOT_REC_ADDR(new_broot, 1, + (int)new_size); + bcopy(op, np, new_max * (uint)sizeof(xfs_bmbt_rec_t)); + + /* + * Then copy the pointers. + */ + op = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1, + ifp->if_broot_bytes); + np = (char *)XFS_BMAP_BROOT_PTR_ADDR(new_broot, 1, + (int)new_size); + bcopy(op, np, new_max * (uint)sizeof(xfs_dfsbno_t)); + } + kmem_free(ifp->if_broot, ifp->if_broot_bytes); + ifp->if_broot = new_broot; + ifp->if_broot_bytes = (int)new_size; + ASSERT(ifp->if_broot_bytes <= + XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ); + return; +} + +/* + * This is called when the amount of space needed for if_extents + * is increased or decreased. The change in size is indicated by + * the number of extents that need to be added or deleted in the + * ext_diff parameter. + * + * If the amount of space needed has decreased below the size of the + * inline buffer, then switch to using the inline buffer. Otherwise, + * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer + * to what is needed. + * + * ip -- the inode whose if_extents area is changing + * ext_diff -- the change in the number of extents, positive or negative, + * requested for the if_extents array. + */ +void +xfs_iext_realloc( + xfs_inode_t *ip, + int ext_diff, + int whichfork) +{ + int byte_diff; + xfs_ifork_t *ifp; + int new_size; + uint rnew_size; + + if (ext_diff == 0) { + return; + } + + ifp = XFS_IFORK_PTR(ip, whichfork); + byte_diff = ext_diff * (uint)sizeof(xfs_bmbt_rec_t); + new_size = (int)ifp->if_bytes + byte_diff; + ASSERT(new_size >= 0); + + if (new_size == 0) { + if (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext) { + ASSERT(ifp->if_real_bytes != 0); + kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes); + } + ifp->if_u1.if_extents = NULL; + rnew_size = 0; + } else if (new_size <= sizeof(ifp->if_u2.if_inline_ext)) { + /* + * If the valid extents can fit in if_inline_ext, + * copy them from the malloc'd vector and free it. + */ + if (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext) { + /* + * For now, empty files are format EXTENTS, + * so the if_extents pointer is null. + */ + if (ifp->if_u1.if_extents) { + bcopy(ifp->if_u1.if_extents, + ifp->if_u2.if_inline_ext, new_size); + kmem_free(ifp->if_u1.if_extents, + ifp->if_real_bytes); + } + ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; + } + rnew_size = 0; + } else { + rnew_size = new_size; + if ((rnew_size & (rnew_size - 1)) != 0) + rnew_size = xfs_iroundup(rnew_size); + /* + * Stuck with malloc/realloc. + */ + if (ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext) { + ifp->if_u1.if_extents = (xfs_bmbt_rec_t *) + kmem_alloc(rnew_size, KM_SLEEP); + bcopy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents, + sizeof(ifp->if_u2.if_inline_ext)); + } else if (rnew_size != ifp->if_real_bytes) { + ifp->if_u1.if_extents = (xfs_bmbt_rec_t *) + kmem_realloc(ifp->if_u1.if_extents, + rnew_size, + ifp->if_real_bytes, + KM_SLEEP); + } + } + ifp->if_real_bytes = rnew_size; + ifp->if_bytes = new_size; +} + + +/* + * This is called when the amount of space needed for if_data + * is increased or decreased. The change in size is indicated by + * the number of bytes that need to be added or deleted in the + * byte_diff parameter. + * + * If the amount of space needed has decreased below the size of the + * inline buffer, then switch to using the inline buffer. Otherwise, + * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer + * to what is needed. + * + * ip -- the inode whose if_data area is changing + * byte_diff -- the change in the number of bytes, positive or negative, + * requested for the if_data array. + */ +void +xfs_idata_realloc( + xfs_inode_t *ip, + int byte_diff, + int whichfork) +{ + xfs_ifork_t *ifp; + int new_size; + int real_size; + + if (byte_diff == 0) { + return; + } + + ifp = XFS_IFORK_PTR(ip, whichfork); + new_size = (int)ifp->if_bytes + byte_diff; + ASSERT(new_size >= 0); + + if (new_size == 0) { + if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { + kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes); + } + ifp->if_u1.if_data = NULL; + real_size = 0; + } else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) { + /* + * If the valid extents/data can fit in if_inline_ext/data, + * copy them from the malloc'd vector and free it. + */ + if (ifp->if_u1.if_data == NULL) { + ifp->if_u1.if_data = ifp->if_u2.if_inline_data; + } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { + ASSERT(ifp->if_real_bytes != 0); + bcopy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data, + new_size); + kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes); + ifp->if_u1.if_data = ifp->if_u2.if_inline_data; + } + real_size = 0; + } else { + /* + * Stuck with malloc/realloc. + * For inline data, the underlying buffer must be + * a multiple of 4 bytes in size so that it can be + * logged and stay on word boundaries. We enforce + * that here. + */ + real_size = roundup(new_size, 4); + if (ifp->if_u1.if_data == NULL) { + ASSERT(ifp->if_real_bytes == 0); + ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); + } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { + /* + * Only do the realloc if the underlying size + * is really changing. + */ + if (ifp->if_real_bytes != real_size) { + ifp->if_u1.if_data = + kmem_realloc(ifp->if_u1.if_data, + real_size, + ifp->if_real_bytes, + KM_SLEEP); + } + } else { + ASSERT(ifp->if_real_bytes == 0); + ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP); + bcopy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data, + ifp->if_bytes); + } + } + ifp->if_real_bytes = real_size; + ifp->if_bytes = new_size; + ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); +} + + +/* + * Map inode to disk block and offset. + * + * mp -- the mount point structure for the current file system + * tp -- the current transaction + * ino -- the inode number of the inode to be located + * imap -- this structure is filled in with the information necessary + * to retrieve the given inode from disk + * flags -- flags to pass to xfs_dilocate indicating whether or not + * lookups in the inode btree were OK or not + */ +int +xfs_imap( + xfs_mount_t *mp, + xfs_trans_t *tp, + xfs_ino_t ino, + xfs_imap_t *imap, + uint flags) +{ + xfs_fsblock_t fsbno; + int len; + int off; + int error; + + fsbno = imap->im_blkno ? + XFS_DADDR_TO_FSB(mp, imap->im_blkno) : NULLFSBLOCK; + error = xfs_dilocate(mp, tp, ino, &fsbno, &len, &off, flags); + if (error != 0) { + return error; + } + imap->im_blkno = XFS_FSB_TO_DADDR(mp, fsbno); + imap->im_len = XFS_FSB_TO_BB(mp, len); + imap->im_agblkno = XFS_FSB_TO_AGBNO(mp, fsbno); + imap->im_ioffset = (ushort)off; + imap->im_boffset = (ushort)(off << mp->m_sb.sb_inodelog); + return 0; +} + +void +xfs_idestroy_fork( + xfs_inode_t *ip, + int whichfork) +{ + xfs_ifork_t *ifp; + + ifp = XFS_IFORK_PTR(ip, whichfork); + if (ifp->if_broot != NULL) { + kmem_free(ifp->if_broot, ifp->if_broot_bytes); + ifp->if_broot = NULL; + } + + /* + * If the format is local, then we can't have an extents + * array so just look for an inline data array. If we're + * not local then we may or may not have an extents list, + * so check and free it up if we do. + */ + if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { + if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) && + (ifp->if_u1.if_data != NULL)) { + ASSERT(ifp->if_real_bytes != 0); + kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes); + ifp->if_u1.if_data = NULL; + ifp->if_real_bytes = 0; + } + } else if ((ifp->if_flags & XFS_IFEXTENTS) && + (ifp->if_u1.if_extents != NULL) && + (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)) { + ASSERT(ifp->if_real_bytes != 0); + kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes); + ifp->if_u1.if_extents = NULL; + ifp->if_real_bytes = 0; + } + ASSERT(ifp->if_u1.if_extents == NULL || + ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext); + ASSERT(ifp->if_real_bytes == 0); + if (whichfork == XFS_ATTR_FORK) { + kmem_zone_free(xfs_ifork_zone, ip->i_afp); + ip->i_afp = NULL; + } +} + +/* + * xfs_iroundup: round up argument to next power of two + */ +uint +xfs_iroundup( + uint v) +{ + int i; + uint m; + + if ((v & (v - 1)) == 0) + return v; + ASSERT((v & 0x80000000) == 0); + if ((v & (v + 1)) == 0) + return v + 1; + for (i = 0, m = 1; i < 31; i++, m <<= 1) { + if (v & m) + continue; + v |= m; + if ((v & (v + 1)) == 0) + return v + 1; + } + ASSERT(0); + return( 0 ); +} + +/* + * xfs_iextents_copy() + * + * This is called to copy the REAL extents (as opposed to the delayed + * allocation extents) from the inode into the given buffer. It + * returns the number of bytes copied into the buffer. + * + * If there are no delayed allocation extents, then we can just + * bcopy() the extents into the buffer. Otherwise, we need to + * examine each extent in turn and skip those which are delayed. + */ +int +xfs_iextents_copy( + xfs_inode_t *ip, + xfs_bmbt_rec_32_t *buffer, + int whichfork) +{ + int copied; + xfs_bmbt_rec_32_t *dest_ep; + xfs_bmbt_rec_t *ep; +#ifdef DEBUG + xfs_exntfmt_t fmt = XFS_EXTFMT_INODE(ip); +#endif +#ifdef XFS_BMAP_TRACE + static char fname[] = "xfs_iextents_copy"; +#endif + int i; + xfs_ifork_t *ifp; + int nrecs; + xfs_fsblock_t start_block; + + ifp = XFS_IFORK_PTR(ip, whichfork); + ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS)); + ASSERT(ifp->if_bytes > 0); + + nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + xfs_bmap_trace_exlist(fname, ip, nrecs, whichfork); + ASSERT(nrecs > 0); + if (nrecs == XFS_IFORK_NEXTENTS(ip, whichfork)) { + /* + * There are no delayed allocation extents, + * so just copy everything. + */ + ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); + ASSERT(ifp->if_bytes == + (XFS_IFORK_NEXTENTS(ip, whichfork) * + (uint)sizeof(xfs_bmbt_rec_t))); + bcopy(ifp->if_u1.if_extents, buffer, ifp->if_bytes); + xfs_validate_extents(buffer, nrecs, fmt); + return ifp->if_bytes; + } + + ASSERT(whichfork == XFS_DATA_FORK); + /* + * There are some delayed allocation extents in the + * inode, so copy the extents one at a time and skip + * the delayed ones. There must be at least one + * non-delayed extent. + */ + ASSERT(nrecs > ip->i_d.di_nextents); + ep = ifp->if_u1.if_extents; + dest_ep = buffer; + copied = 0; + for (i = 0; i < nrecs; i++) { + start_block = xfs_bmbt_get_startblock(ep); + if (ISNULLSTARTBLOCK(start_block)) { + /* + * It's a delayed allocation extent, so skip it. + */ + ep++; + continue; + } + + *dest_ep = *(xfs_bmbt_rec_32_t *)ep; + dest_ep++; + ep++; + copied++; + } + ASSERT(copied != 0); + ASSERT(copied == ip->i_d.di_nextents); + ASSERT((copied * (uint)sizeof(xfs_bmbt_rec_t)) <= XFS_IFORK_DSIZE(ip)); + xfs_validate_extents(buffer, copied, fmt); + + return (copied * (uint)sizeof(xfs_bmbt_rec_t)); +} + +/* + * Each of the following cases stores data into the same region + * of the on-disk inode, so only one of them can be valid at + * any given time. While it is possible to have conflicting formats + * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is + * in EXTENTS format, this can only happen when the fork has + * changed formats after being modified but before being flushed. + * In these cases, the format always takes precedence, because the + * format indicates the current state of the fork. + */ +STATIC int +xfs_iflush_fork( + xfs_inode_t *ip, + xfs_dinode_t *dip, + xfs_inode_log_item_t *iip, + int whichfork, + xfs_buf_t *bp) +{ + char *cp; + xfs_ifork_t *ifp; + xfs_mount_t *mp; +#ifdef XFS_TRANS_DEBUG + int first; +#endif + static const short brootflag[2] = + { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT }; + static const short dataflag[2] = + { XFS_ILOG_DDATA, XFS_ILOG_ADATA }; + static const short extflag[2] = + { XFS_ILOG_DEXT, XFS_ILOG_AEXT }; + + if (iip == NULL) + return 0; + ifp = XFS_IFORK_PTR(ip, whichfork); + /* + * This can happen if we gave up in iformat in an error path, + * for the attribute fork. + */ + if (ifp == NULL) { + ASSERT(whichfork == XFS_ATTR_FORK); + return 0; + } + cp = XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT); + mp = ip->i_mount; + switch (XFS_IFORK_FORMAT(ip, whichfork)) { + case XFS_DINODE_FMT_LOCAL: + if ((iip->ili_format.ilf_fields & dataflag[whichfork]) && + (ifp->if_bytes > 0)) { + ASSERT(ifp->if_u1.if_data != NULL); + ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); + bcopy(ifp->if_u1.if_data, cp, ifp->if_bytes); + } + if (whichfork == XFS_DATA_FORK) { + if (XFS_DIR_SHORTFORM_VALIDATE_ONDISK(mp, dip)) { + return XFS_ERROR(EFSCORRUPTED); + } + } + break; + + case XFS_DINODE_FMT_EXTENTS: + ASSERT((ifp->if_flags & XFS_IFEXTENTS) || + !(iip->ili_format.ilf_fields & extflag[whichfork])); + ASSERT((ifp->if_u1.if_extents != NULL) || (ifp->if_bytes == 0)); + ASSERT((ifp->if_u1.if_extents == NULL) || (ifp->if_bytes > 0)); + if ((iip->ili_format.ilf_fields & extflag[whichfork]) && + (ifp->if_bytes > 0)) { + ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); + (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_32_t *)cp, + whichfork); + } + break; + + case XFS_DINODE_FMT_BTREE: + if ((iip->ili_format.ilf_fields & brootflag[whichfork]) && + (ifp->if_broot_bytes > 0)) { + ASSERT(ifp->if_broot != NULL); + ASSERT(ifp->if_broot_bytes <= + (XFS_IFORK_SIZE(ip, whichfork) + + XFS_BROOT_SIZE_ADJ)); + xfs_bmbt_to_bmdr(ifp->if_broot, ifp->if_broot_bytes, + (xfs_bmdr_block_t *)cp, + XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_CONVERT)); + } + break; + + case XFS_DINODE_FMT_DEV: + if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) { + ASSERT(whichfork == XFS_DATA_FORK); + INT_SET(dip->di_u.di_dev, ARCH_CONVERT, ip->i_df.if_u2.if_rdev); + } + break; + + case XFS_DINODE_FMT_UUID: + if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) { + ASSERT(whichfork == XFS_DATA_FORK); + bcopy(&ip->i_df.if_u2.if_uuid, &dip->di_u.di_muuid, + sizeof(uuid_t)); + } + break; + + default: + ASSERT(0); + break; + } + + return 0; +} diff --git a/libxfs/xfs_mount.c b/libxfs/xfs_mount.c new file mode 100644 index 000000000..c4de3b9d2 --- /dev/null +++ b/libxfs/xfs_mount.c @@ -0,0 +1,214 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include + +/* + * Mount initialization code establishing various mount + * fields from the superblock associated with the given + * mount structure. + */ +void +xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp) +{ + int i; + + mp->m_agfrotor = mp->m_agirotor = 0; + mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG; + mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT; + mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1; + mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog; + mp->m_litino = sbp->sb_inodesize - + ((uint)sizeof(xfs_dinode_core_t) + (uint)sizeof(xfs_agino_t)); + mp->m_blockmask = sbp->sb_blocksize - 1; + mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG; + mp->m_blockwmask = mp->m_blockwsize - 1; + + /* + * Setup for attributes, in case they get created. + * This value is for inodes getting attributes for the first time, + * the per-inode value is for old attribute values. + */ + ASSERT(sbp->sb_inodesize >= 256 && sbp->sb_inodesize <= 2048); + switch (sbp->sb_inodesize) { + case 256: + mp->m_attroffset = XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(2); + break; + case 512: + case 1024: + case 2048: + mp->m_attroffset = XFS_BMDR_SPACE_CALC(12); + break; + default: + ASSERT(0); + } + ASSERT(mp->m_attroffset < XFS_LITINO(mp)); + + for (i = 0; i < 2; i++) { + mp->m_alloc_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, + xfs_alloc, i == 0); + mp->m_alloc_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, + xfs_alloc, i == 0); + } + for (i = 0; i < 2; i++) { + mp->m_bmap_dmxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, + xfs_bmbt, i == 0); + mp->m_bmap_dmnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, + xfs_bmbt, i == 0); + } + for (i = 0; i < 2; i++) { + mp->m_inobt_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, + xfs_inobt, i == 0); + mp->m_inobt_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, + xfs_inobt, i == 0); + } + + mp->m_bsize = XFS_FSB_TO_BB(mp, 1); + mp->m_ialloc_inos = (int)MAX(XFS_INODES_PER_CHUNK, sbp->sb_inopblock); + mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog; +} + +static struct { + short offset; + short type; /* 0 = integer + * 1 = binary / string (no translation) + */ +} xfs_sb_info[] = { + { offsetof(xfs_sb_t, sb_magicnum), 0 }, + { offsetof(xfs_sb_t, sb_blocksize), 0 }, + { offsetof(xfs_sb_t, sb_dblocks), 0 }, + { offsetof(xfs_sb_t, sb_rblocks), 0 }, + { offsetof(xfs_sb_t, sb_rextents), 0 }, + { offsetof(xfs_sb_t, sb_uuid), 1 }, + { offsetof(xfs_sb_t, sb_logstart), 0 }, + { offsetof(xfs_sb_t, sb_rootino), 0 }, + { offsetof(xfs_sb_t, sb_rbmino), 0 }, + { offsetof(xfs_sb_t, sb_rsumino), 0 }, + { offsetof(xfs_sb_t, sb_rextsize), 0 }, + { offsetof(xfs_sb_t, sb_agblocks), 0 }, + { offsetof(xfs_sb_t, sb_agcount), 0 }, + { offsetof(xfs_sb_t, sb_rbmblocks), 0 }, + { offsetof(xfs_sb_t, sb_logblocks), 0 }, + { offsetof(xfs_sb_t, sb_versionnum), 0 }, + { offsetof(xfs_sb_t, sb_sectsize), 0 }, + { offsetof(xfs_sb_t, sb_inodesize), 0 }, + { offsetof(xfs_sb_t, sb_inopblock), 0 }, + { offsetof(xfs_sb_t, sb_fname[0]), 1 }, + { offsetof(xfs_sb_t, sb_blocklog), 0 }, + { offsetof(xfs_sb_t, sb_sectlog), 0 }, + { offsetof(xfs_sb_t, sb_inodelog), 0 }, + { offsetof(xfs_sb_t, sb_inopblog), 0 }, + { offsetof(xfs_sb_t, sb_agblklog), 0 }, + { offsetof(xfs_sb_t, sb_rextslog), 0 }, + { offsetof(xfs_sb_t, sb_inprogress), 0 }, + { offsetof(xfs_sb_t, sb_imax_pct), 0 }, + { offsetof(xfs_sb_t, sb_icount), 0 }, + { offsetof(xfs_sb_t, sb_ifree), 0 }, + { offsetof(xfs_sb_t, sb_fdblocks), 0 }, + { offsetof(xfs_sb_t, sb_frextents), 0 }, + { offsetof(xfs_sb_t, sb_uquotino), 0 }, + { offsetof(xfs_sb_t, sb_pquotino), 0 }, + { offsetof(xfs_sb_t, sb_qflags), 0 }, + { offsetof(xfs_sb_t, sb_flags), 0 }, + { offsetof(xfs_sb_t, sb_shared_vn), 0 }, + { offsetof(xfs_sb_t, sb_inoalignmt), 0 }, + { offsetof(xfs_sb_t, sb_unit), 0 }, + { offsetof(xfs_sb_t, sb_width), 0 }, + { offsetof(xfs_sb_t, sb_dirblklog), 0 }, + { offsetof(xfs_sb_t, sb_dummy), 1 }, + { sizeof(xfs_sb_t), 0 } +}; + +/* + * xfs_xlatesb + * data - on disk version of sb + * sb - a superblock + * dir - conversion direction: <0 - convert sb to buf + * >0 - convert buf to sb + * arch - architecture to read/write from/to buf + * fields - which fields to copy (bitmask) + */ +void +xfs_xlatesb(void *data, xfs_sb_t *sb, int dir, xfs_arch_t arch, + __int64_t fields) +{ + xfs_caddr_t buf_ptr; + xfs_caddr_t mem_ptr; + + ASSERT(dir); + ASSERT(fields); + + if (!fields) + return; + + buf_ptr=(xfs_caddr_t)data; + mem_ptr=(xfs_caddr_t)sb; + + while (fields) { + xfs_sb_field_t f; + int first; + int size; + + f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); + first = xfs_sb_info[f].offset; + size = xfs_sb_info[f + 1].offset - first; + + ASSERT(xfs_sb_info[f].type==0 || xfs_sb_info[f].type==1); + + if (arch == ARCH_NOCONVERT || size==1 || xfs_sb_info[f].type==1) { + if (dir>0) { + bcopy(buf_ptr + first, mem_ptr + first, size); + } else { + bcopy(mem_ptr + first, buf_ptr + first, size); + } + } else { + switch (size) { + case 2: + INT_XLATE(*(__uint16_t*)(buf_ptr+first), + *(__uint16_t*)(mem_ptr+first), dir, arch); + break; + case 4: + INT_XLATE(*(__uint32_t*)(buf_ptr+first), + *(__uint32_t*)(mem_ptr+first), dir, arch); + break; + case 8: + INT_XLATE(*(__uint64_t*)(buf_ptr+first), + *(__uint64_t*)(mem_ptr+first), dir, arch); + break; + default: + ASSERT(0); + } + } + fields &= ~(1LL << f); + } + +} diff --git a/libxfs/xfs_rtalloc.c b/libxfs/xfs_rtalloc.c new file mode 100644 index 000000000..8f0a447f9 --- /dev/null +++ b/libxfs/xfs_rtalloc.c @@ -0,0 +1,835 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +/* + * Free realtime space allocation for XFS. + */ +#include + + +/* + * Get a buffer for the bitmap or summary file block specified. + * The buffer is returned read and locked. + */ +STATIC int /* error */ +xfs_rtbuf_get( + xfs_mount_t *mp, /* file system mount structure */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_rtblock_t block, /* block number in bitmap or summary */ + int issum, /* is summary not bitmap */ + xfs_buf_t **bpp) /* output: buffer for the block */ +{ + xfs_buf_t *bp; /* block buffer, result */ + xfs_daddr_t d; /* disk addr of block */ + int error; /* error value */ + xfs_fsblock_t fsb; /* fs block number for block */ + xfs_inode_t *ip; /* bitmap or summary inode */ + + ip = issum ? mp->m_rsumip : mp->m_rbmip; + /* + * Map from the file offset (block) and inode number to the + * file system block. + */ + error = xfs_bmapi_single(tp, ip, XFS_DATA_FORK, &fsb, block); + if (error) { + return error; + } + ASSERT(fsb != NULLFSBLOCK); + /* + * Convert to disk address for buffer cache. + */ + d = XFS_FSB_TO_DADDR(mp, fsb); + /* + * Read the buffer. + */ + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, + mp->m_bsize, 0, &bp); + if (error) { + return error; + } + ASSERT(bp && !XFS_BUF_GETERROR(bp)); + *bpp = bp; + return 0; +} + +/* + * Searching backward from start to limit, find the first block whose + * allocated/free state is different from start's. + */ +STATIC int /* error */ +xfs_rtfind_back( + xfs_mount_t *mp, /* file system mount point */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_rtblock_t start, /* starting block to look at */ + xfs_rtblock_t limit, /* last block to look at */ + xfs_rtblock_t *rtblock) /* out: start block found */ +{ + xfs_rtword_t *b; /* current word in buffer */ + int bit; /* bit number in the word */ + xfs_rtblock_t block; /* bitmap block number */ + xfs_buf_t *bp; /* buf for the block */ + xfs_rtword_t *bufp; /* starting word in buffer */ + int error; /* error value */ + xfs_rtblock_t firstbit; /* first useful bit in the word */ + xfs_rtblock_t i; /* current bit number rel. to start */ + xfs_rtblock_t len; /* length of inspected area */ + xfs_rtword_t mask; /* mask of relevant bits for value */ + xfs_rtword_t want; /* mask for "good" values */ + xfs_rtword_t wdiff; /* difference from wanted value */ + int word; /* word number in the buffer */ + + /* + * Compute and read in starting bitmap block for starting block. + */ + block = XFS_BITTOBLOCK(mp, start); + error = xfs_rtbuf_get(mp, tp, block, 0, &bp); + if (error) { + return error; + } + bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); + /* + * Get the first word's index & point to it. + */ + word = XFS_BITTOWORD(mp, start); + b = &bufp[word]; + bit = (int)(start & (XFS_NBWORD - 1)); + len = start - limit + 1; + /* + * Compute match value, based on the bit at start: if 1 (free) + * then all-ones, else all-zeroes. + */ + want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0; + /* + * If the starting position is not word-aligned, deal with the + * partial word. + */ + if (bit < XFS_NBWORD - 1) { + /* + * Calculate first (leftmost) bit number to look at, + * and mask for all the relevant bits in this word. + */ + firstbit = XFS_RTMAX((xfs_srtblock_t)(bit - len + 1), 0); + mask = (((xfs_rtword_t)1 << (bit - firstbit + 1)) - 1) << + firstbit; + /* + * Calculate the difference between the value there + * and what we're looking for. + */ + if (wdiff = (*b ^ want) & mask) { + /* + * Different. Mark where we are and return. + */ + xfs_trans_brelse(tp, bp); + i = bit - XFS_RTHIBIT(wdiff); + *rtblock = start - i + 1; + return 0; + } + i = bit - firstbit + 1; + /* + * Go on to previous block if that's where the previous word is + * and we need the previous word. + */ + if (--word == -1 && i < len) { + /* + * If done with this block, get the previous one. + */ + xfs_trans_brelse(tp, bp); + error = xfs_rtbuf_get(mp, tp, --block, 0, &bp); + if (error) { + return error; + } + bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); + word = XFS_BLOCKWMASK(mp); + b = &bufp[word]; + } else { + /* + * Go on to the previous word in the buffer. + */ + b--; + } + } else { + /* + * Starting on a word boundary, no partial word. + */ + i = 0; + } + /* + * Loop over whole words in buffers. When we use up one buffer + * we move on to the previous one. + */ + while (len - i >= XFS_NBWORD) { + /* + * Compute difference between actual and desired value. + */ + if (wdiff = *b ^ want) { + /* + * Different, mark where we are and return. + */ + xfs_trans_brelse(tp, bp); + i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff); + *rtblock = start - i + 1; + return 0; + } + i += XFS_NBWORD; + /* + * Go on to previous block if that's where the previous word is + * and we need the previous word. + */ + if (--word == -1 && i < len) { + /* + * If done with this block, get the previous one. + */ + xfs_trans_brelse(tp, bp); + error = xfs_rtbuf_get(mp, tp, --block, 0, &bp); + if (error) { + return error; + } + bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); + word = XFS_BLOCKWMASK(mp); + b = &bufp[word]; + } else { + /* + * Go on to the previous word in the buffer. + */ + b--; + } + } + /* + * If not ending on a word boundary, deal with the last + * (partial) word. + */ + if (len - i) { + /* + * Calculate first (leftmost) bit number to look at, + * and mask for all the relevant bits in this word. + */ + firstbit = XFS_NBWORD - (len - i); + mask = (((xfs_rtword_t)1 << (len - i)) - 1) << firstbit; + /* + * Compute difference between actual and desired value. + */ + if (wdiff = (*b ^ want) & mask) { + /* + * Different, mark where we are and return. + */ + xfs_trans_brelse(tp, bp); + i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff); + *rtblock = start - i + 1; + return 0; + } else + i = len; + } + /* + * No match, return that we scanned the whole area. + */ + xfs_trans_brelse(tp, bp); + *rtblock = start - i + 1; + return 0; +} + +/* + * Searching forward from start to limit, find the first block whose + * allocated/free state is different from start's. + */ +STATIC int /* error */ +xfs_rtfind_forw( + xfs_mount_t *mp, /* file system mount point */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_rtblock_t start, /* starting block to look at */ + xfs_rtblock_t limit, /* last block to look at */ + xfs_rtblock_t *rtblock) /* out: start block found */ +{ + xfs_rtword_t *b; /* current word in buffer */ + int bit; /* bit number in the word */ + xfs_rtblock_t block; /* bitmap block number */ + xfs_buf_t *bp; /* buf for the block */ + xfs_rtword_t *bufp; /* starting word in buffer */ + int error; /* error value */ + xfs_rtblock_t i; /* current bit number rel. to start */ + xfs_rtblock_t lastbit; /* last useful bit in the word */ + xfs_rtblock_t len; /* length of inspected area */ + xfs_rtword_t mask; /* mask of relevant bits for value */ + xfs_rtword_t want; /* mask for "good" values */ + xfs_rtword_t wdiff; /* difference from wanted value */ + int word; /* word number in the buffer */ + + /* + * Compute and read in starting bitmap block for starting block. + */ + block = XFS_BITTOBLOCK(mp, start); + error = xfs_rtbuf_get(mp, tp, block, 0, &bp); + if (error) { + return error; + } + bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); + /* + * Get the first word's index & point to it. + */ + word = XFS_BITTOWORD(mp, start); + b = &bufp[word]; + bit = (int)(start & (XFS_NBWORD - 1)); + len = limit - start + 1; + /* + * Compute match value, based on the bit at start: if 1 (free) + * then all-ones, else all-zeroes. + */ + want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0; + /* + * If the starting position is not word-aligned, deal with the + * partial word. + */ + if (bit) { + /* + * Calculate last (rightmost) bit number to look at, + * and mask for all the relevant bits in this word. + */ + lastbit = XFS_RTMIN(bit + len, XFS_NBWORD); + mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; + /* + * Calculate the difference between the value there + * and what we're looking for. + */ + if (wdiff = (*b ^ want) & mask) { + /* + * Different. Mark where we are and return. + */ + xfs_trans_brelse(tp, bp); + i = XFS_RTLOBIT(wdiff) - bit; + *rtblock = start + i - 1; + return 0; + } + i = lastbit - bit; + /* + * Go on to next block if that's where the next word is + * and we need the next word. + */ + if (++word == XFS_BLOCKWSIZE(mp) && i < len) { + /* + * If done with this block, get the previous one. + */ + xfs_trans_brelse(tp, bp); + error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); + if (error) { + return error; + } + b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); + word = 0; + } else { + /* + * Go on to the previous word in the buffer. + */ + b++; + } + } else { + /* + * Starting on a word boundary, no partial word. + */ + i = 0; + } + /* + * Loop over whole words in buffers. When we use up one buffer + * we move on to the next one. + */ + while (len - i >= XFS_NBWORD) { + /* + * Compute difference between actual and desired value. + */ + if (wdiff = *b ^ want) { + /* + * Different, mark where we are and return. + */ + xfs_trans_brelse(tp, bp); + i += XFS_RTLOBIT(wdiff); + *rtblock = start + i - 1; + return 0; + } + i += XFS_NBWORD; + /* + * Go on to next block if that's where the next word is + * and we need the next word. + */ + if (++word == XFS_BLOCKWSIZE(mp) && i < len) { + /* + * If done with this block, get the next one. + */ + xfs_trans_brelse(tp, bp); + error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); + if (error) { + return error; + } + b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); + word = 0; + } else { + /* + * Go on to the next word in the buffer. + */ + b++; + } + } + /* + * If not ending on a word boundary, deal with the last + * (partial) word. + */ + if (lastbit = len - i) { + /* + * Calculate mask for all the relevant bits in this word. + */ + mask = ((xfs_rtword_t)1 << lastbit) - 1; + /* + * Compute difference between actual and desired value. + */ + if (wdiff = (*b ^ want) & mask) { + /* + * Different, mark where we are and return. + */ + xfs_trans_brelse(tp, bp); + i += XFS_RTLOBIT(wdiff); + *rtblock = start + i - 1; + return 0; + } else + i = len; + } + /* + * No match, return that we scanned the whole area. + */ + xfs_trans_brelse(tp, bp); + *rtblock = start + i - 1; + return 0; +} + +/* + * Mark an extent specified by start and len freed. + * Updates all the summary information as well as the bitmap. + */ +STATIC int /* error */ +xfs_rtfree_range( + xfs_mount_t *mp, /* file system mount point */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_rtblock_t start, /* starting block to free */ + xfs_extlen_t len, /* length to free */ + xfs_buf_t **rbpp, /* in/out: summary block buffer */ + xfs_fsblock_t *rsb) /* in/out: summary block number */ +{ + xfs_rtblock_t end; /* end of the freed extent */ + int error; /* error value */ + xfs_rtblock_t postblock; /* first block freed > end */ + xfs_rtblock_t preblock; /* first block freed < start */ + + end = start + len - 1; + /* + * Modify the bitmap to mark this extent freed. + */ + error = xfs_rtmodify_range(mp, tp, start, len, 1); + if (error) { + return error; + } + /* + * Assume we're freeing out of the middle of an allocated extent. + * We need to find the beginning and end of the extent so we can + * properly update the summary. + */ + error = xfs_rtfind_back(mp, tp, start, 0, &preblock); + if (error) { + return error; + } + /* + * Find the next allocated block (end of allocated extent). + */ + error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1, + &postblock); + /* + * If there are blocks not being freed at the front of the + * old extent, add summary data for them to be allocated. + */ + if (preblock < start) { + error = xfs_rtmodify_summary(mp, tp, + XFS_RTBLOCKLOG(start - preblock), + XFS_BITTOBLOCK(mp, preblock), -1, rbpp, rsb); + if (error) { + return error; + } + } + /* + * If there are blocks not being freed at the end of the + * old extent, add summary data for them to be allocated. + */ + if (postblock > end) { + error = xfs_rtmodify_summary(mp, tp, + XFS_RTBLOCKLOG(postblock - end), + XFS_BITTOBLOCK(mp, end + 1), -1, rbpp, rsb); + if (error) { + return error; + } + } + /* + * Increment the summary information corresponding to the entire + * (new) free extent. + */ + error = xfs_rtmodify_summary(mp, tp, + XFS_RTBLOCKLOG(postblock + 1 - preblock), + XFS_BITTOBLOCK(mp, preblock), 1, rbpp, rsb); + return error; +} + +/* + * Set the given range of bitmap bits to the given value. + * Do whatever I/O and logging is required. + */ +STATIC int /* error */ +xfs_rtmodify_range( + xfs_mount_t *mp, /* file system mount point */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_rtblock_t start, /* starting block to modify */ + xfs_extlen_t len, /* length of extent to modify */ + int val) /* 1 for free, 0 for allocated */ +{ + xfs_rtword_t *b; /* current word in buffer */ + int bit; /* bit number in the word */ + xfs_rtblock_t block; /* bitmap block number */ + xfs_buf_t *bp; /* buf for the block */ + xfs_rtword_t *bufp; /* starting word in buffer */ + int error; /* error value */ + xfs_rtword_t *first; /* first used word in the buffer */ + int i; /* current bit number rel. to start */ + int lastbit; /* last useful bit in word */ + xfs_rtword_t mask; /* mask o frelevant bits for value */ + int word; /* word number in the buffer */ + + /* + * Compute starting bitmap block number. + */ + block = XFS_BITTOBLOCK(mp, start); + /* + * Read the bitmap block, and point to its data. + */ + error = xfs_rtbuf_get(mp, tp, block, 0, &bp); + if (error) { + return error; + } + bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); + /* + * Compute the starting word's address, and starting bit. + */ + word = XFS_BITTOWORD(mp, start); + first = b = &bufp[word]; + bit = (int)(start & (XFS_NBWORD - 1)); + /* + * 0 (allocated) => all zeroes; 1 (free) => all ones. + */ + val = -val; + /* + * If not starting on a word boundary, deal with the first + * (partial) word. + */ + if (bit) { + /* + * Compute first bit not changed and mask of relevant bits. + */ + lastbit = XFS_RTMIN(bit + len, XFS_NBWORD); + mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; + /* + * Set/clear the active bits. + */ + if (val) + *b |= mask; + else + *b &= ~mask; + i = lastbit - bit; + /* + * Go on to the next block if that's where the next word is + * and we need the next word. + */ + if (++word == XFS_BLOCKWSIZE(mp) && i < len) { + /* + * Log the changed part of this block. + * Get the next one. + */ + xfs_trans_log_buf(tp, bp, + (uint)((char *)first - (char *)bufp), + (uint)((char *)b - (char *)bufp)); + error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); + if (error) { + return error; + } + first = b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); + word = 0; + } else { + /* + * Go on to the next word in the buffer + */ + b++; + } + } else { + /* + * Starting on a word boundary, no partial word. + */ + i = 0; + } + /* + * Loop over whole words in buffers. When we use up one buffer + * we move on to the next one. + */ + while (len - i >= XFS_NBWORD) { + /* + * Set the word value correctly. + */ + *b = val; + i += XFS_NBWORD; + /* + * Go on to the next block if that's where the next word is + * and we need the next word. + */ + if (++word == XFS_BLOCKWSIZE(mp) && i < len) { + /* + * Log the changed part of this block. + * Get the next one. + */ + xfs_trans_log_buf(tp, bp, + (uint)((char *)first - (char *)bufp), + (uint)((char *)b - (char *)bufp)); + error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); + if (error) { + return error; + } + first = b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); + word = 0; + } else { + /* + * Go on to the next word in the buffer + */ + b++; + } + } + /* + * If not ending on a word boundary, deal with the last + * (partial) word. + */ + if (lastbit = len - i) { + /* + * Compute a mask of relevant bits. + */ + bit = 0; + mask = ((xfs_rtword_t)1 << lastbit) - 1; + /* + * Set/clear the active bits. + */ + if (val) + *b |= mask; + else + *b &= ~mask; + b++; + } + /* + * Log any remaining changed bytes. + */ + if (b > first) + xfs_trans_log_buf(tp, bp, (uint)((char *)first - (char *)bufp), + (uint)((char *)b - (char *)bufp - 1)); + return 0; +} + +/* + * Read and modify the summary information for a given extent size, + * bitmap block combination. + * Keeps track of a current summary block, so we don't keep reading + * it from the buffer cache. + */ +STATIC int /* error */ +xfs_rtmodify_summary( + xfs_mount_t *mp, /* file system mount point */ + xfs_trans_t *tp, /* transaction pointer */ + int log, /* log2 of extent size */ + xfs_rtblock_t bbno, /* bitmap block number */ + int delta, /* change to make to summary info */ + xfs_buf_t **rbpp, /* in/out: summary block buffer */ + xfs_fsblock_t *rsb) /* in/out: summary block number */ +{ + xfs_buf_t *bp; /* buffer for the summary block */ + int error; /* error value */ + xfs_fsblock_t sb; /* summary fsblock */ + int so; /* index into the summary file */ + xfs_suminfo_t *sp; /* pointer to returned data */ + + /* + * Compute entry number in the summary file. + */ + so = XFS_SUMOFFS(mp, log, bbno); + /* + * Compute the block number in the summary file. + */ + sb = XFS_SUMOFFSTOBLOCK(mp, so); + /* + * If we have an old buffer, and the block number matches, use that. + */ + if (rbpp && *rbpp && *rsb == sb) + bp = *rbpp; + /* + * Otherwise we have to get the buffer. + */ + else { + /* + * If there was an old one, get rid of it first. + */ + if (rbpp && *rbpp) + xfs_trans_brelse(tp, *rbpp); + error = xfs_rtbuf_get(mp, tp, sb, 1, &bp); + if (error) { + return error; + } + /* + * Remember this buffer and block for the next call. + */ + if (rbpp) { + *rbpp = bp; + *rsb = sb; + } + } + /* + * Point to the summary information, modify and log it. + */ + sp = XFS_SUMPTR(mp, bp, so); + *sp += delta; + xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)XFS_BUF_PTR(bp)), + (uint)((char *)sp - (char *)XFS_BUF_PTR(bp) + sizeof(*sp) - 1)); + return 0; +} + +/* + * Free an extent in the realtime subvolume. Length is expressed in + * realtime extents, as is the block number. + */ +int /* error */ +xfs_rtfree_extent( + xfs_trans_t *tp, /* transaction pointer */ + xfs_rtblock_t bno, /* starting block number to free */ + xfs_extlen_t len) /* length of extent freed */ +{ + int error; /* error value */ + xfs_inode_t *ip; /* bitmap file inode */ + xfs_mount_t *mp; /* file system mount structure */ + xfs_fsblock_t sb; /* summary file block number */ + xfs_buf_t *sumbp; /* summary file block buffer */ + + mp = tp->t_mountp; + /* + * Synchronize by locking the bitmap inode. + */ + error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, XFS_ILOCK_EXCL, &ip); + if (error) { + return error; + } +#if defined(__KERNEL__) && defined(DEBUG) + /* + * Check to see that this whole range is currently allocated. + */ + { + int stat; /* result from checking range */ + + error = xfs_rtcheck_alloc_range(mp, tp, bno, len, &stat); + if (error) { + return error; + } + ASSERT(stat); + } +#endif + sumbp = NULL; + /* + * Free the range of realtime blocks. + */ + error = xfs_rtfree_range(mp, tp, bno, len, &sumbp, &sb); + if (error) { + return error; + } + /* + * Mark more blocks free in the superblock. + */ + xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS, (long)len); + /* + * If we've now freed all the blocks, reset the file sequence + * number to 0. + */ + if (tp->t_frextents_delta + mp->m_sb.sb_frextents == + mp->m_sb.sb_rextents) { + if (!(ip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) + ip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM; + *(__uint64_t *)&ip->i_d.di_atime = 0; + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + } + return 0; +} + +/* + * Initialize realtime fields in the mount structure. + */ +int /* error */ +xfs_rtmount_init( + xfs_mount_t *mp) /* file system mount structure */ +{ + xfs_buf_t *bp; /* buffer for last block of subvolume */ + xfs_daddr_t d; /* address of last block of subvolume */ + int error; /* error return value */ + xfs_sb_t *sbp; /* filesystem superblock copy in mount */ + + sbp = &mp->m_sb; + if (sbp->sb_rblocks == 0) + return 0; + if (!mp->m_rtdev) { + printk(KERN_WARNING + "XFS: This FS has an RT subvol - specify -o rtdev on mount\n"); + return XFS_ERROR(ENODEV); + } + mp->m_rsumlevels = sbp->sb_rextslog + 1; + mp->m_rsumsize = + (uint)sizeof(xfs_suminfo_t) * mp->m_rsumlevels * + sbp->sb_rbmblocks; + mp->m_rsumsize = roundup(mp->m_rsumsize, sbp->sb_blocksize); + mp->m_rbmip = mp->m_rsumip = NULL; + /* + * Check that the realtime section is an ok size. + */ + d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks); + if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_rblocks) { + printk(KERN_WARNING "XFS: RT mount - %llu != %llu\n", + XFS_BB_TO_FSB(mp, d), mp->m_sb.sb_rblocks); + return XFS_ERROR(E2BIG); + } + error = xfs_read_buf(mp, &mp->m_rtdev_targ, d - 1, 1, 0, &bp); + if (error) { + printk(KERN_WARNING + "XFS: RT mount - xfs_read_buf returned %d\n", error); + if (error == ENOSPC) + return XFS_ERROR(E2BIG); + return error; + } + xfs_buf_relse(bp); + return 0; +} diff --git a/libxfs/xfs_rtbit.c b/libxfs/xfs_rtbit.c new file mode 100644 index 000000000..c51cba34c --- /dev/null +++ b/libxfs/xfs_rtbit.c @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +/* + * XFS bit manipulation routines, used only in realtime code. + */ + +#include + +/* + * xfs_lowbit32: get low bit set out of 32-bit argument, -1 if none set. + */ +int +xfs_lowbit32( + __uint32_t v) +{ + int i; + + if (v & 0x0000ffff) + if (v & 0x000000ff) + i = 0; + else + i = 8; + else if (v & 0xffff0000) + if (v & 0x00ff0000) + i = 16; + else + i = 24; + else + return -1; + return i + xfs_lowbit[(v >> i) & 0xff]; +} diff --git a/libxfs/xfs_trans.c b/libxfs/xfs_trans.c new file mode 100644 index 000000000..a30ad8967 --- /dev/null +++ b/libxfs/xfs_trans.c @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include + +/* + * Initialize the precomputed transaction reservation values + * in the mount structure. + */ +void +xfs_trans_init( + xfs_mount_t *mp) +{ + xfs_trans_reservations_t *resp; + + resp = &(mp->m_reservations); + resp->tr_write = + (uint)(XFS_CALC_WRITE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp)); + resp->tr_itruncate = + (uint)(XFS_CALC_ITRUNCATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp)); + resp->tr_rename = + (uint)(XFS_CALC_RENAME_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp)); + resp->tr_link = (uint)XFS_CALC_LINK_LOG_RES(mp); + resp->tr_remove = + (uint)(XFS_CALC_REMOVE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp)); + resp->tr_symlink = + (uint)(XFS_CALC_SYMLINK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp)); + resp->tr_create = + (uint)(XFS_CALC_CREATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp)); + resp->tr_mkdir = + (uint)(XFS_CALC_MKDIR_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp)); + resp->tr_ifree = + (uint)(XFS_CALC_IFREE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp)); + resp->tr_ichange = + (uint)(XFS_CALC_ICHANGE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp)); + resp->tr_growdata = (uint)XFS_CALC_GROWDATA_LOG_RES(mp); + resp->tr_swrite = (uint)XFS_CALC_SWRITE_LOG_RES(mp); + resp->tr_writeid = (uint)XFS_CALC_WRITEID_LOG_RES(mp); + resp->tr_addafork = + (uint)(XFS_CALC_ADDAFORK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp)); + resp->tr_attrinval = (uint)XFS_CALC_ATTRINVAL_LOG_RES(mp); + resp->tr_attrset = + (uint)(XFS_CALC_ATTRSET_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp)); + resp->tr_attrrm = + (uint)(XFS_CALC_ATTRRM_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp)); + resp->tr_clearagi = (uint)XFS_CALC_CLEAR_AGI_BUCKET_LOG_RES(mp); + resp->tr_growrtalloc = (uint)XFS_CALC_GROWRTALLOC_LOG_RES(mp); + resp->tr_growrtzero = (uint)XFS_CALC_GROWRTZERO_LOG_RES(mp); + resp->tr_growrtfree = (uint)XFS_CALC_GROWRTFREE_LOG_RES(mp); +} diff --git a/logprint/Makefile b/logprint/Makefile new file mode 100644 index 000000000..4b878e297 --- /dev/null +++ b/logprint/Makefile @@ -0,0 +1,50 @@ +# +# Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write the Free Software Foundation, Inc., 59 +# Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, +# Mountain View, CA 94043, or: +# +# http://www.sgi.com +# +# For further information regarding this notice, see: +# +# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ +# + +TOPDIR = .. +include $(TOPDIR)/include/builddefs + +CMDTARGET = xfs_logprint +CMDDEPS = $(LIBXFS) + +CFILES = log_print_trans.c log_print_all.c log_misc.c logprint.c \ + xfs_log_recover.c +HFILES = logprint.h +LLDLIBS = $(LIBXFS) $(LIBUUID) + +default: $(CMDTARGET) + +include $(BUILDRULES) + +install: default + $(INSTALL) -m 755 -d $(XFS_CMDS_BIN_DIR) + $(INSTALL) -m 755 $(CMDTARGET) $(XFS_CMDS_BIN_DIR) diff --git a/logprint/log_misc.c b/logprint/log_misc.c new file mode 100644 index 000000000..bc53bfa40 --- /dev/null +++ b/logprint/log_misc.c @@ -0,0 +1,1184 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include "logprint.h" + +#define ZEROED_LOG (-4) +#define FULL_READ (-3) +#define PARTIAL_READ (-2) +#define BAD_HEADER (-1) +#define NO_ERROR (0) + +static int logBBsize; +char *trans_type[] = { + "", + "SETATTR", + "SETATTR_SIZE", + "INACTIVE", + "CREATE", + "CREATE_TRUNC", + "TRUNCATE_FILE", + "REMOVE", + "LINK", + "RENAME", + "MKDIR", + "RMDIR", + "SYMLINK", + "SET_DMATTRS", + "GROWFS", + "STRAT_WRITE", + "DIOSTRAT", + "WRITE_SYNC", + "WRITEID", + "ADDAFORK", + "ATTRINVAL", + "ATRUNCATE", + "ATTR_SET", + "ATTR_RM", + "ATTR_FLAG", + "CLEAR_AGI_BUCKET", + "QM_SBCHANGE", + "DUMMY1", + "DUMMY2", + "QM_QUOTAOFF", + "QM_DQALLOC", + "QM_SETQLIM", + "QM_DQCLUSTER", + "QM_QINOCREATE", + "QM_QUOTAOFF_END", + "SB_UNIT", + "FSYNC_TS", + "GROWFSRT_ALLOC", + "GROWFSRT_ZERO", + "GROWFSRT_FREE", + "SWAPEXT", +}; + +typedef struct xlog_split_item { + struct xlog_split_item *si_next; + struct xlog_split_item *si_prev; + xlog_tid_t si_tid; + int si_skip; +} xlog_split_item_t; + +xlog_split_item_t *split_list = 0; + +void +print_xlog_op_line(void) +{ + printf("--------------------------------------" + "--------------------------------------\n"); +} /* print_xlog_op_line */ + +void +print_xlog_record_line(void) +{ + printf("======================================" + "======================================\n"); +} /* print_xlog_record_line */ + +void +print_stars(void) +{ + printf("***********************************" + "***********************************\n"); +} /* print_xlog_record_line */ + +/* + * Given a pointer to a data segment, print out the data as if it were + * a log operation header. + */ +void +xlog_print_op_header(xlog_op_header_t *op_head, + int i, + xfs_caddr_t *ptr) +{ + xlog_op_header_t hbuf; + + /* + * bcopy because on 64/n32, partial reads can cause the op_head + * pointer to come in pointing to an odd-numbered byte + */ + bcopy(op_head, &hbuf, sizeof(xlog_op_header_t)); + op_head = &hbuf; + *ptr += sizeof(xlog_op_header_t); + printf("Oper (%d): tid: %x len: %d clientid: %s ", i, + INT_GET(op_head->oh_tid, ARCH_CONVERT), + INT_GET(op_head->oh_len, ARCH_CONVERT), + (op_head->oh_clientid == XFS_TRANSACTION ? "TRANS" : + (op_head->oh_clientid == XFS_LOG ? "LOG" : "ERROR"))); + printf("flags: "); + if (op_head->oh_flags) { + if (op_head->oh_flags & XLOG_START_TRANS) + printf("START "); + if (op_head->oh_flags & XLOG_COMMIT_TRANS) + printf("COMMIT "); + if (op_head->oh_flags & XLOG_WAS_CONT_TRANS) + printf("WAS_CONT "); + if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) + printf("UNMOUNT "); + if (op_head->oh_flags & XLOG_CONTINUE_TRANS) + printf("CONTINUE "); + if (op_head->oh_flags & XLOG_END_TRANS) + printf("END "); + } else { + printf("none"); + } + printf("\n"); +} /* xlog_print_op_header */ + + +void +xlog_print_add_to_trans(xlog_tid_t tid, + int skip) +{ + xlog_split_item_t *item; + + item = (xlog_split_item_t *)calloc(sizeof(xlog_split_item_t), 1); + item->si_tid = tid; + item->si_skip = skip; + item->si_next = split_list; + item->si_prev = 0; + if (split_list) + split_list->si_prev = item; + split_list = item; +} /* xlog_print_add_to_trans */ + + +int +xlog_print_find_tid(xlog_tid_t tid, uint was_cont) +{ + xlog_split_item_t *listp = split_list; + + if (!split_list) { + if (was_cont != 0) /* Not first time we have used this tid */ + return 1; + else + return 0; + } + while (listp) { + if (listp->si_tid == tid) + break; + listp = listp->si_next; + } + if (!listp) { + return 0; + } + if (--listp->si_skip == 0) { + if (listp == split_list) { /* delete at head */ + split_list = listp->si_next; + if (split_list) + split_list->si_prev = NULL; + } else { + if (listp->si_next) + listp->si_next->si_prev = listp->si_prev; + listp->si_prev->si_next = listp->si_next; + } + free(listp); + } + return 1; +} /* xlog_print_find_tid */ + +int +xlog_print_trans_header(xfs_caddr_t *ptr, int len) +{ + xfs_trans_header_t *h; + xfs_caddr_t cptr = *ptr; + __uint32_t magic; + char *magic_c = (char *)&magic; + + *ptr += len; + + magic=*(__uint32_t*)cptr; /* XXX INT_GET soon */ + + if (len >= 4) + printf("%c%c%c%c:", +#if __BYTE_ORDER == __LITTLE_ENDIAN + magic_c[3], magic_c[2], magic_c[1], magic_c[0]); +#else + magic_c[0], magic_c[1], magic_c[2], magic_c[3]); +#endif + if (len != sizeof(xfs_trans_header_t)) { + printf(" Not enough data to decode further\n"); + return 1; + } + h = (xfs_trans_header_t *)cptr; + printf(" type: %s tid: %x num_items: %d\n", + trans_type[h->th_type], h->th_tid, h->th_num_items); + return 0; +} /* xlog_print_trans_header */ + + +int +xlog_print_trans_buffer(xfs_caddr_t *ptr, int len, int *i, int num_ops) +{ + xfs_buf_log_format_t *f; + xfs_buf_log_format_v1_t *old_f; + xfs_agi_t *agi; + xfs_agf_t *agf; + xfs_disk_dquot_t *dq; + xlog_op_header_t *head = 0; + int num, skip; + int super_block = 0; + int bucket, col, buckets; + __int64_t blkno; + xfs_buf_log_format_t lbuf; + int size, blen, map_size, struct_size; + long long x, y; + + /* + * bcopy to ensure 8-byte alignment for the long longs in + * buf_log_format_t structure + */ + bcopy(*ptr, &lbuf, sizeof(xfs_buf_log_format_t)); + f = &lbuf; + *ptr += len; + + if (f->blf_type == XFS_LI_BUF) { + blkno = f->blf_blkno; + size = f->blf_size; + blen = f->blf_len; + map_size = f->blf_map_size; + struct_size = sizeof(xfs_buf_log_format_t); + } else { + old_f = (xfs_buf_log_format_v1_t*)f; + blkno = old_f->blf_blkno; + size = old_f->blf_size; + blen = old_f->blf_len; + map_size = old_f->blf_map_size; + struct_size = sizeof(xfs_buf_log_format_v1_t); + } + switch (f->blf_type) { + case XFS_LI_BUF: + printf("BUF: "); + break; + case XFS_LI_6_1_BUF: + printf("6.1 BUF: "); + break; + case XFS_LI_5_3_BUF: + printf("5.3 BUF: "); + break; + case XFS_LI_DQUOT: + printf("DQUOT BUF: "); + break; + default: + printf("UNKNOWN BUF: "); + break; + } + if (len >= struct_size) { + ASSERT((len - sizeof(struct_size)) % sizeof(int) == 0); + printf("#regs: %d start blkno: %lld (0x%llx) len: %d bmap size: %d\n", + size, blkno, blkno, blen, map_size); + if (blkno == 0) + super_block = 1; + } else { + ASSERT(len >= 4); /* must have at least 4 bytes if != 0 */ + printf("#regs: %d Not printing rest of data\n", f->blf_size); + return size; + } + num = size-1; + + /* Check if all regions in this log item were in the given LR ptr */ + if (*i+num > num_ops-1) { + skip = num - (num_ops-1-*i); + num = num_ops-1-*i; + } else { + skip = 0; + } + while (num-- > 0) { + (*i)++; + head = (xlog_op_header_t *)*ptr; + xlog_print_op_header(head, *i, ptr); + if (super_block) { + printf("SUPER BLOCK Buffer: "); + if (INT_GET(head->oh_len, ARCH_CONVERT) < 4*8) { + printf("Out of space\n"); + } else { + printf("\n"); + /* + * bcopy because *ptr may not be 8-byte aligned + */ + bcopy(*ptr, &x, sizeof(long long)); + bcopy(*ptr+8, &y, sizeof(long long)); + printf("icount: %lld ifree: %lld ", + INT_GET(x, ARCH_CONVERT), + INT_GET(y, ARCH_CONVERT)); + bcopy(*ptr+16, &x, sizeof(long long)); + bcopy(*ptr+24, &y, sizeof(long long)); + printf("fdblks: %lld frext: %lld\n", + INT_GET(x, ARCH_CONVERT), + INT_GET(y, ARCH_CONVERT)); + } + super_block = 0; + } else if (INT_GET(*(uint *)(*ptr), ARCH_CONVERT) == XFS_AGI_MAGIC) { + agi = (xfs_agi_t *)(*ptr); + printf("AGI Buffer: XAGI "); + if (INT_GET(head->oh_len, ARCH_CONVERT) < + sizeof(xfs_agi_t) - + XFS_AGI_UNLINKED_BUCKETS*sizeof(xfs_agino_t)) { + printf("out of space\n"); + } else { + printf("\n"); + printf("ver: %d ", + INT_GET(agi->agi_versionnum, ARCH_CONVERT)); + printf("seq#: %d len: %d cnt: %d root: %d\n", + INT_GET(agi->agi_seqno, ARCH_CONVERT), + INT_GET(agi->agi_length, ARCH_CONVERT), + INT_GET(agi->agi_count, ARCH_CONVERT), + INT_GET(agi->agi_root, ARCH_CONVERT)); + printf("level: %d free#: 0x%x newino: 0x%x\n", + INT_GET(agi->agi_level, ARCH_CONVERT), + INT_GET(agi->agi_freecount, ARCH_CONVERT), + INT_GET(agi->agi_newino, ARCH_CONVERT)); + if (INT_GET(head->oh_len, ARCH_CONVERT) == 128) { + buckets = 17; + } else if (INT_GET(head->oh_len, ARCH_CONVERT) == 256) { + buckets = 32 + 17; + } else { + buckets = XFS_AGI_UNLINKED_BUCKETS; + } + for (bucket = 0; bucket < buckets;) { + printf("bucket[%d - %d]: ", bucket, bucket+3); + for (col = 0; col < 4; col++, bucket++) { + if (bucket < buckets) { + printf("0x%x ", + INT_GET(agi->agi_unlinked[bucket], ARCH_CONVERT)); + } + } + printf("\n"); + } + } + } else if (INT_GET(*(uint *)(*ptr), ARCH_CONVERT) == XFS_AGF_MAGIC) { + agf = (xfs_agf_t *)(*ptr); + printf("AGF Buffer: XAGF "); + if (INT_GET(head->oh_len, ARCH_CONVERT) < sizeof(xfs_agf_t)) { + printf("Out of space\n"); + } else { + printf("\n"); + printf("ver: %d seq#: %d len: %d \n", + INT_GET(agf->agf_versionnum, ARCH_CONVERT), + INT_GET(agf->agf_seqno, ARCH_CONVERT), + INT_GET(agf->agf_length, ARCH_CONVERT)); + printf("root BNO: %d CNT: %d\n", + INT_GET(agf->agf_roots[XFS_BTNUM_BNOi], + ARCH_CONVERT), + INT_GET(agf->agf_roots[XFS_BTNUM_CNTi], + ARCH_CONVERT)); + printf("level BNO: %d CNT: %d\n", + INT_GET(agf->agf_levels[XFS_BTNUM_BNOi], + ARCH_CONVERT), + INT_GET(agf->agf_levels[XFS_BTNUM_CNTi], + ARCH_CONVERT)); + printf("1st: %d last: %d cnt: %d " + "freeblks: %d longest: %d\n", + INT_GET(agf->agf_flfirst, ARCH_CONVERT), + INT_GET(agf->agf_fllast, ARCH_CONVERT), + INT_GET(agf->agf_flcount, ARCH_CONVERT), + INT_GET(agf->agf_freeblks, ARCH_CONVERT), + INT_GET(agf->agf_longest, ARCH_CONVERT)); + } + } else if (INT_GET(*(uint *)(*ptr), ARCH_CONVERT) == XFS_DQUOT_MAGIC) { + dq = (xfs_disk_dquot_t *)(*ptr); + printf("DQUOT Buffer: DQ "); + if (INT_GET(head->oh_len, ARCH_CONVERT) < + sizeof(xfs_disk_dquot_t)) { + printf("Out of space\n"); + } + else { + printf("\n"); + printf("ver: %d flags: 0x%x id: %d \n", + INT_GET(dq->d_version, ARCH_CONVERT), + INT_GET(dq->d_flags, ARCH_CONVERT), + INT_GET(dq->d_id, ARCH_CONVERT)); + printf("blk limits hard: %llu soft: %llu\n", + INT_GET(dq->d_blk_hardlimit, ARCH_CONVERT), + INT_GET(dq->d_blk_softlimit, ARCH_CONVERT)); + printf("blk count: %llu warns: %d timer: %d\n", + INT_GET(dq->d_bcount, ARCH_CONVERT), + INT_GET(dq->d_bwarns, ARCH_CONVERT), + INT_GET(dq->d_btimer, ARCH_CONVERT)); + printf("ino limits hard: %llu soft: %llu\n", + INT_GET(dq->d_ino_hardlimit, ARCH_CONVERT), + INT_GET(dq->d_ino_softlimit, ARCH_CONVERT)); + printf("ino count: %llu warns: %d timer: %d\n", + INT_GET(dq->d_icount, ARCH_CONVERT), + INT_GET(dq->d_iwarns, ARCH_CONVERT), + INT_GET(dq->d_itimer, ARCH_CONVERT)); + } + } else { + printf("BUF DATA\n"); + if (print_data) { + uint *dp = (uint *)*ptr; + int nums = INT_GET(head->oh_len, ARCH_CONVERT) >> 2; + int i = 0; + + while (i < nums) { + if ((i % 8) == 0) + printf("%2x ", i); + printf("%8x ", *dp); + dp++; + i++; + if ((i % 8) == 0) + printf("\n"); + } + printf("\n"); + } + } + *ptr += INT_GET(head->oh_len, ARCH_CONVERT); + } + if (head && head->oh_flags & XLOG_CONTINUE_TRANS) + skip++; + return skip; +} /* xlog_print_trans_buffer */ + + +int +xlog_print_trans_efd(xfs_caddr_t *ptr, uint len) +{ + xfs_efd_log_format_t *f; + xfs_extent_t *ex; + int i; + xfs_efd_log_format_t lbuf; + + /* + * bcopy to ensure 8-byte alignment for the long longs in + * xfs_efd_log_format_t structure + */ + bcopy(*ptr, &lbuf, sizeof(xfs_efd_log_format_t)); + f = &lbuf; + *ptr += len; + if (len >= sizeof(xfs_efd_log_format_t)) { + printf("EFD: #regs: %d num_extents: %d id: 0x%llx\n", + f->efd_size, f->efd_nextents, f->efd_efi_id); + ex = f->efd_extents; + for (i=0; i< f->efd_size; i++) { + printf("(s: 0x%llx, l: %d) ", ex->ext_start, ex->ext_len); + if (i % 4 == 3) printf("\n"); + ex++; + } + if (i % 4 != 0) printf("\n"); + return 0; + } else { + printf("EFD: Not enough data to decode further\n"); + return 1; + } +} /* xlog_print_trans_efd */ + + +int +xlog_print_trans_efi(xfs_caddr_t *ptr, uint len) +{ + xfs_efi_log_format_t *f; + xfs_extent_t *ex; + int i; + xfs_efi_log_format_t lbuf; + + /* + * bcopy to ensure 8-byte alignment for the long longs in + * xfs_efi_log_format_t structure + */ + bcopy(*ptr, &lbuf, sizeof(xfs_efi_log_format_t)); + f = &lbuf; + *ptr += len; + if (len >= sizeof(xfs_efi_log_format_t)) { + printf("EFI: #regs: %d num_extents: %d id: 0x%llx\n", + f->efi_size, f->efi_nextents, f->efi_id); + ex = f->efi_extents; + for (i=0; i< f->efi_size; i++) { + printf("(s: 0x%llx, l: %d) ", ex->ext_start, ex->ext_len); + if (i % 4 == 3) printf("\n"); + ex++; + } + if (i % 4 != 0) printf("\n"); + return 0; + } else { + printf("EFI: Not enough data to decode further\n"); + return 1; + } +} /* xlog_print_trans_efi */ + + +/* ARGSUSED */ +void +xlog_print_trans_inode_core(xfs_dinode_core_t *ip) +{ + printf("INODE CORE\n"); + printf("magic 0x%hx mode 0%ho version %d format %d\n", + ip->di_magic, ip->di_mode, (int)ip->di_version, + (int)ip->di_format); + printf("nlink %hd uid %d gid %d\n", + ip->di_nlink, ip->di_uid, ip->di_gid); + printf("atime 0x%x mtime 0x%x ctime 0x%x\n", + ip->di_atime.t_sec, ip->di_mtime.t_sec, ip->di_ctime.t_sec); + printf("size 0x%llx nblocks 0x%llx extsize 0x%x nextents 0x%x\n", + ip->di_size, ip->di_nblocks, ip->di_extsize, ip->di_nextents); + printf("naextents 0x%x forkoff %d dmevmask 0x%x dmstate 0x%hx\n", + ip->di_anextents, (int)ip->di_forkoff, ip->di_dmevmask, + ip->di_dmstate); + printf("flags 0x%x gen 0x%x\n", + ip->di_flags, ip->di_gen); +} + +void +xlog_print_dir_sf(xfs_dir_shortform_t *sfp, int size) +{ + xfs_ino_t ino; + int count; + int i; + char namebuf[257]; + xfs_dir_sf_entry_t *sfep; + + /* XXX need to determine whether this is v1 or v2, then + print appropriate structure */ + + printf("SHORTFORM DIRECTORY size %d\n", + size); + /* bail out for now */ + + return; + + printf("SHORTFORM DIRECTORY size %d count %d\n", + size, sfp->hdr.count); + bcopy(&(sfp->hdr.parent), &ino, sizeof(ino)); + printf(".. ino 0x%llx\n", INT_GET(ino, ARCH_CONVERT)); + + count = (uint)(sfp->hdr.count); + sfep = &(sfp->list[0]); + for (i = 0; i < count; i++) { + bcopy(&(sfep->inumber), &ino, sizeof(ino)); + bcopy((sfep->name), namebuf, sfep->namelen); + namebuf[sfep->namelen] = '\0'; + printf("%s ino 0x%llx namelen %d\n", + namebuf, ino, sfep->namelen); + sfep = XFS_DIR_SF_NEXTENTRY(sfep); + } +} + +int +xlog_print_trans_inode(xfs_caddr_t *ptr, int len, int *i, int num_ops) +{ + xfs_inode_log_format_t *f; + xfs_inode_log_format_t_v1 *old_f; + xfs_dinode_core_t dino; + xlog_op_header_t *op_head; + int version; + xfs_inode_log_format_t lbuf = {0}; + int mode; + int size; + + /* + * print inode type header region + * + * bcopy to ensure 8-byte alignment for the long longs in + * xfs_inode_log_format_t structure + * + * len can be smaller than xfs_inode_log_format_t sometimes... (?) + */ + bcopy(*ptr, &lbuf, MIN(sizeof(xfs_inode_log_format_t), len)); + version = lbuf.ilf_type; + f = &lbuf; + (*i)++; /* bump index */ + *ptr += len; + if (version == XFS_LI_5_3_INODE) { + old_f = (xfs_inode_log_format_t_v1 *)f; + if (len == sizeof(xfs_inode_log_format_t_v1)) { + printf("5.3 INODE: #regs: %d ino: 0x%llx flags: 0x%x dsize: %d\n", + old_f->ilf_size, old_f->ilf_ino, + old_f->ilf_fields, old_f->ilf_dsize); + } else { + ASSERT(len >= 4); /* must have at least 4 bytes if != 0 */ + printf("5.3 INODE: #regs: %d Not printing rest of data\n", + old_f->ilf_size); + return old_f->ilf_size; + } + } else { + if (len == sizeof(xfs_inode_log_format_t)) { + if (version == XFS_LI_6_1_INODE) + printf("6.1 INODE: "); + else printf("INODE: "); + printf("#regs: %d ino: 0x%llx flags: 0x%x dsize: %d\n", + f->ilf_size, f->ilf_ino, f->ilf_fields, f->ilf_dsize); + printf(" blkno: %lld len: %d boff: %d\n", + f->ilf_blkno, f->ilf_len, f->ilf_boffset); + } else { + ASSERT(len >= 4); /* must have at least 4 bytes if != 0 */ + printf("INODE: #regs: %d Not printing rest of data\n", + f->ilf_size); + return f->ilf_size; + } + } + + if (*i >= num_ops) /* end of LR */ + return f->ilf_size-1; + + /* core inode comes 2nd */ + op_head = (xlog_op_header_t *)*ptr; + xlog_print_op_header(op_head, *i, ptr); + + if (XLOG_SET(op_head->oh_flags, XLOG_CONTINUE_TRANS)) { + return f->ilf_size-1; + } + + bcopy(*ptr, &dino, sizeof(dino)); + mode = dino.di_mode & IFMT; + size = (int)dino.di_size; + xlog_print_trans_inode_core(&dino); + *ptr += sizeof(xfs_dinode_core_t); + + if (*i == num_ops-1 && f->ilf_size == 3) { + return 1; + } + + /* does anything come next */ + op_head = (xlog_op_header_t *)*ptr; + switch (f->ilf_fields & XFS_ILOG_NONCORE) { + case XFS_ILOG_DEXT: { + ASSERT(f->ilf_size == 3); + (*i)++; + xlog_print_op_header(op_head, *i, ptr); + printf("EXTENTS inode data\n"); + *ptr += INT_GET(op_head->oh_len, ARCH_CONVERT); + if (XLOG_SET(op_head->oh_flags, XLOG_CONTINUE_TRANS)) { + return 1; + } + break; + } + case XFS_ILOG_DBROOT: { + ASSERT(f->ilf_size == 3); + (*i)++; + xlog_print_op_header(op_head, *i, ptr); + printf("BTREE inode data\n"); + *ptr += INT_GET(op_head->oh_len, ARCH_CONVERT); + if (XLOG_SET(op_head->oh_flags, XLOG_CONTINUE_TRANS)) { + return 1; + } + break; + } + case XFS_ILOG_DDATA: { + ASSERT(f->ilf_size == 3); + (*i)++; + xlog_print_op_header(op_head, *i, ptr); + printf("LOCAL inode data\n"); + if (mode == IFDIR) { + xlog_print_dir_sf((xfs_dir_shortform_t*)*ptr, size); + } + *ptr += INT_GET(op_head->oh_len, ARCH_CONVERT); + if (XLOG_SET(op_head->oh_flags, XLOG_CONTINUE_TRANS)) + return 1; + break; + } + case XFS_ILOG_DEV: { + ASSERT(f->ilf_size == 2); + printf("DEV inode: no extra region\n"); + break; + } + case XFS_ILOG_UUID: { + ASSERT(f->ilf_size == 2); + printf("UUID inode: no extra region\n"); + break; + } + case 0: { + ASSERT(f->ilf_size == 2); + break; + } + default: { + xlog_panic("xlog_print_trans_inode: illegal inode type"); + } + } + return 0; +} /* xlog_print_trans_inode */ + + + +/****************************************************************************** + * + * Log print routines + * + ****************************************************************************** + */ + +void +xlog_print_lseek(xlog_t *log, int fd, xfs_daddr_t blkno, int whence) +{ +#define BBTOOFF64(bbs) (((xfs_off_t)(bbs)) << BBSHIFT) + xfs_off_t offset; + + if (whence == SEEK_SET) + offset = BBTOOFF64(blkno+log->l_logBBstart); + else + offset = BBTOOFF64(blkno); + if (lseek64(fd, offset, whence) < 0) { + fprintf(stderr, "%s: lseek64 to %llu failed: %s\n", + progname, offset, strerror(errno)); + exit(1); + } +} /* xlog_print_lseek */ + + +void +print_lsn(xfs_caddr_t string, + xfs_lsn_t *lsn, + xfs_arch_t arch) +{ + printf("%s: %u,%u", string, + CYCLE_LSN(*lsn, arch), BLOCK_LSN(*lsn, arch)); +} + + +int +xlog_print_record(int fd, + int num_ops, + int len, + int *read_type, + xfs_caddr_t *partial_buf, + xlog_rec_header_t *rhead) +{ + xlog_op_header_t *op_head; + xlog_rec_header_t *rechead; + xfs_caddr_t buf, ptr; + int read_len, skip; + int ret, n, i; + + if (print_no_print) + return NO_ERROR; + + if (!len) { + printf("\n"); + return NO_ERROR; + } + + /* read_len must read up to some block boundary */ + read_len = (int) BBTOB(BTOBB(len)); + + /* read_type => don't malloc() new buffer, use old one */ + if (*read_type == FULL_READ) { + if ((ptr = buf = (xfs_caddr_t)malloc(read_len)) == NULL) { + fprintf(stderr, "xlog_print_record: malloc failed\n"); + exit(1); + } + } else { + read_len -= *read_type; + buf = (xfs_caddr_t)((__psint_t)(*partial_buf) + (__psint_t)(*read_type)); + ptr = *partial_buf; + } + if ((ret = (int) read(fd, buf, read_len)) == -1) { + fprintf(stderr, "xlog_print_record: read error\n"); + exit(1); + } + /* Did we overflow the end? */ + if (*read_type == FULL_READ && + BLOCK_LSN(rhead->h_lsn, ARCH_CONVERT)+BTOBB(read_len) >= logBBsize) { + *read_type = BBTOB(logBBsize-BLOCK_LSN(rhead->h_lsn, ARCH_CONVERT)-1); + *partial_buf = buf; + return PARTIAL_READ; + } + + /* Did we read everything? */ + if ((ret == 0 && read_len != 0) || ret != read_len) { + *read_type = ret; + *partial_buf = buf; + return PARTIAL_READ; + } + if (*read_type != FULL_READ) + read_len += *read_type; + + /* Everything read in. Start from beginning of buffer */ + buf = ptr; + for (i = 0; ptr < buf + read_len; ptr += BBSIZE, i++) { + rechead = (xlog_rec_header_t *)ptr; + if (INT_GET(rechead->h_magicno, ARCH_CONVERT) == XLOG_HEADER_MAGIC_NUM) { + xlog_print_lseek(0, fd, -read_len+i*BBSIZE, SEEK_CUR); + free(buf); + return -1; + } else { + if (INT_GET(rhead->h_cycle, ARCH_CONVERT) != + INT_GET(*(uint *)ptr, ARCH_CONVERT)) { + if (*read_type == FULL_READ) + return -1; + else if (INT_GET(rhead->h_cycle, ARCH_CONVERT) + 1 != + INT_GET(*(uint *)ptr, ARCH_CONVERT)) + return -1; + } + } + INT_SET(*(uint *)ptr, ARCH_CONVERT, + INT_GET(rhead->h_cycle_data[i], ARCH_CONVERT)); + } + ptr = buf; + for (i=0; ioh_flags, XLOG_WAS_CONT_TRANS) || + XLOG_SET(op_head->oh_flags, XLOG_CONTINUE_TRANS)) && + INT_GET(op_head->oh_len, ARCH_CONVERT) == 0)) { + for (n = 0; n < INT_GET(op_head->oh_len, ARCH_CONVERT); n++) { + printf("%c", *ptr); + ptr++; + } + printf("\n"); + continue; + } + if (xlog_print_find_tid(INT_GET(op_head->oh_tid, ARCH_CONVERT), + op_head->oh_flags & XLOG_WAS_CONT_TRANS)) { + printf("Left over region from split log item\n"); + ptr += INT_GET(op_head->oh_len, ARCH_CONVERT); + continue; + } + if (INT_GET(op_head->oh_len, ARCH_CONVERT) != 0) { + if (*(uint *)ptr == XFS_TRANS_HEADER_MAGIC) { + skip = xlog_print_trans_header(&ptr, + INT_GET(op_head->oh_len, ARCH_CONVERT)); + } else { + switch (*(unsigned short *)ptr) { + case XFS_LI_5_3_BUF: + case XFS_LI_6_1_BUF: + case XFS_LI_DQUOT: + case XFS_LI_BUF: { + skip = xlog_print_trans_buffer(&ptr, + INT_GET(op_head->oh_len, ARCH_CONVERT), + &i, num_ops); + break; + } + case XFS_LI_5_3_INODE: + case XFS_LI_6_1_INODE: + case XFS_LI_INODE: { + skip = xlog_print_trans_inode(&ptr, + INT_GET(op_head->oh_len, ARCH_CONVERT), + &i, num_ops); + break; + } + case XFS_LI_EFI: { + skip = xlog_print_trans_efi(&ptr, + INT_GET(op_head->oh_len, ARCH_CONVERT)); + break; + } + case XFS_LI_EFD: { + skip = xlog_print_trans_efd(&ptr, + INT_GET(op_head->oh_len, ARCH_CONVERT)); + break; + } + case XLOG_UNMOUNT_TYPE: { + printf("Unmount filesystem\n"); + skip = 0; + break; + } + default: { + fprintf(stderr, "%s: unknown log operation type (%x)\n", + progname, *(unsigned short *)ptr); + skip = 0; + ptr += INT_GET(op_head->oh_len, ARCH_CONVERT); + } + } /* switch */ + } /* else */ + if (skip != 0) + xlog_print_add_to_trans(INT_GET(op_head->oh_tid, ARCH_CONVERT), skip); + } + } + printf("\n"); + free(buf); + return NO_ERROR; +} /* xlog_print_record */ + + +int +xlog_print_rec_head(xlog_rec_header_t *head, int *len) +{ + int i; + char uub[64]; + int datalen,bbs; + + if (print_no_print) + return INT_GET(head->h_num_logops, ARCH_CONVERT); + + if (INT_ISZERO(head->h_magicno, ARCH_CONVERT)) + return ZEROED_LOG; + + if (INT_GET(head->h_magicno, ARCH_CONVERT) != XLOG_HEADER_MAGIC_NUM) { + printf("Header 0x%x wanted 0x%x\n", + INT_GET(head->h_magicno, ARCH_CONVERT), + XLOG_HEADER_MAGIC_NUM); + return BAD_HEADER; + } + + datalen=INT_GET(head->h_len, ARCH_CONVERT); + bbs=(datalen/BBSIZE)+(datalen%BBSIZE)?1:0; + + printf("cycle: %d version: %d ", + INT_GET(head->h_cycle, ARCH_CONVERT), + INT_GET(head->h_version, ARCH_CONVERT)); + print_lsn(" lsn", &head->h_lsn, ARCH_CONVERT); + print_lsn(" tail_lsn", &head->h_tail_lsn, ARCH_CONVERT); + printf("\n"); + printf("length of Log Record: %d prev offset: %d num ops: %d\n", + datalen, + INT_GET(head->h_prev_block, ARCH_CONVERT), + INT_GET(head->h_num_logops, ARCH_CONVERT)); + + if (print_overwrite) { + printf("cycle num overwrites: "); + for (i=0; i< bbs; i++) + printf("%d - 0x%x ", + i, + INT_GET(head->h_cycle_data[i], ARCH_CONVERT)); + printf("\n"); + } + + uuid_unparse(head->h_fs_uuid, uub); + printf("uuid: %s format: ", uub); + switch (INT_GET(head->h_fmt, ARCH_CONVERT)) { + case XLOG_FMT_UNKNOWN: + printf("unknown\n"); + break; + case XLOG_FMT_LINUX_LE: + printf("little endian linux\n"); + break; + case XLOG_FMT_LINUX_BE: + printf("big endian linux\n"); + break; + case XLOG_FMT_IRIX_BE: + printf("big endian irix\n"); + break; + default: + printf("? (%d)\n", INT_GET(head->h_fmt, ARCH_CONVERT)); + break; + } + + *len = INT_GET(head->h_len, ARCH_CONVERT); + return(INT_GET(head->h_num_logops, ARCH_CONVERT)); +} /* xlog_print_rec_head */ + +static void +print_xlog_bad_zeroed(xfs_daddr_t blkno) +{ + print_stars(); + printf("* ERROR: found data after zeroed blocks block=%-21lld *\n", + (__int64_t)blkno); + print_stars(); + if (print_exit) + xlog_exit("Bad log - data after zeroed blocks"); +} /* print_xlog_bad_zeroed */ + +static void +print_xlog_bad_header(xfs_daddr_t blkno, xfs_caddr_t buf) +{ + print_stars(); + printf("* ERROR: header cycle=%-11d block=%-21lld *\n", + GET_CYCLE(buf, ARCH_CONVERT), (__int64_t)blkno); + print_stars(); + if (print_exit) + xlog_exit("Bad log record header"); +} /* print_xlog_bad_header */ + +void +print_xlog_bad_data(xfs_daddr_t blkno) +{ + print_stars(); + printf("* ERROR: data block=%-21lld *\n", + (__int64_t)blkno); + print_stars(); + if (print_exit) + xlog_exit("Bad data in log"); +} /* print_xlog_bad_data */ + + +/* + * This code is gross and needs to be rewritten. + */ +void xfs_log_print(xlog_t *log, + int fd, + int print_block_start) +{ + char hbuf[XLOG_HEADER_SIZE]; + int num_ops, len; + xfs_daddr_t block_end = 0, block_start, blkno, error; + int read_type = FULL_READ; + xfs_caddr_t partial_buf; + int zeroed = 0; + + logBBsize = log->l_logBBsize; + + /* + * Normally, block_start and block_end are the same value since we + * are printing the entire log. However, if the start block is given, + * we still end at the end of the logical log. + */ + if (error = xlog_print_find_oldest(log, &block_end)) { + fprintf(stderr, "%s: problem finding oldest LR\n", progname); + return; + } + if (print_block_start == -1) + block_start = block_end; + else + block_start = print_block_start; + xlog_print_lseek(log, fd, block_start, SEEK_SET); + blkno = block_start; + + for (;;) { + if (read(fd, hbuf, 512) == 0) { + printf("%s: physical end of log\n", progname); + print_xlog_record_line(); + break; + } + if (print_only_data) { + printf("BLKNO: %lld\n", (__int64_t)blkno); + xlog_recover_print_data(hbuf, 512); + blkno++; + goto loop; + } + num_ops = xlog_print_rec_head((xlog_rec_header_t *)hbuf, &len); + blkno++; + + if (zeroed && num_ops != ZEROED_LOG) { + printf("%s: after %d zeroed blocks\n", progname, zeroed); + /* once we find zeroed blocks - that's all we expect */ + print_xlog_bad_zeroed(blkno-1); + /* reset count since we're assuming previous zeroed blocks + * were bad + */ + zeroed = 0; + } + + if (num_ops == ZEROED_LOG || num_ops == BAD_HEADER) { + if (num_ops == ZEROED_LOG) { + zeroed++; + } else { + print_xlog_bad_header(blkno-1, hbuf); + } + + goto loop; + } + + error = xlog_print_record(fd, num_ops, len, &read_type, &partial_buf, + (xlog_rec_header_t *)hbuf); + switch (error) { + case 0: { + blkno += BTOBB(len); + if (print_block_start != -1 && + blkno >= block_end) /* If start specified, we */ + goto end; /* end early */ + break; + } + case -1: { + print_xlog_bad_data(blkno-1); + if (print_block_start != -1 && + blkno >= block_end) /* If start specified, */ + goto end; /* we end early */ + xlog_print_lseek(log, fd, blkno, SEEK_SET); + goto loop; + } + case PARTIAL_READ: { + print_xlog_record_line(); + printf("%s: physical end of log\n", progname); + print_xlog_record_line(); + blkno = 0; + xlog_print_lseek(log, fd, 0, SEEK_SET); + /* + * We may have hit the end of the log when we started at 0. + * In this case, just end. + */ + if (block_start == 0) + goto end; + goto partial_log_read; + } + default: xlog_panic("illegal value"); + } + print_xlog_record_line(); +loop: + if (blkno >= logBBsize) { + if (zeroed) { + printf("%s: skipped %d zeroed blocks\n", progname, zeroed); + if (zeroed == logBBsize) + printf("%s: totally zeroed log\n", progname); + + zeroed=0; + } + printf("%s: physical end of log\n", progname); + print_xlog_record_line(); + break; + } + } + + /* Do we need to print the first part of physical log? */ + if (block_start != 0) { + blkno = 0; + xlog_print_lseek(log, fd, 0, SEEK_SET); + for (;;) { + if (read(fd, hbuf, 512) == 0) { + xlog_panic("xlog_find_head: bad read"); + } + if (print_only_data) { + printf("BLKNO: %lld\n", (__int64_t)blkno); + xlog_recover_print_data(hbuf, 512); + blkno++; + goto loop2; + } + num_ops = xlog_print_rec_head((xlog_rec_header_t *)hbuf, &len); + blkno++; + + if (num_ops == ZEROED_LOG || num_ops == BAD_HEADER) { + /* we only expect zeroed log entries at the end + * of the _physical_ log, so treat them the same + * as bad blocks here + */ + print_xlog_bad_header(blkno-1, hbuf); + + if (blkno >= block_end) + break; + continue; + } +partial_log_read: + error= xlog_print_record(fd, num_ops, len, &read_type, + &partial_buf, (xlog_rec_header_t *)hbuf); + if (read_type != FULL_READ) + len -= read_type; + read_type = FULL_READ; + if (!error) + blkno += BTOBB(len); + else { + print_xlog_bad_data(blkno-1); + xlog_print_lseek(log, fd, blkno, SEEK_SET); + goto loop2; + } + print_xlog_record_line(); +loop2: + if (blkno >= block_end) + break; + } + } + +end: + printf("%s: logical end of log\n", progname); + print_xlog_record_line(); +} diff --git a/logprint/log_print_all.c b/logprint/log_print_all.c new file mode 100644 index 000000000..a1a81cc99 --- /dev/null +++ b/logprint/log_print_all.c @@ -0,0 +1,593 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include "logprint.h" + + +/* + * Start is defined to be the block pointing to the oldest valid log record. + * Used by log print code. Don't put in cmd/xfs/logprint/xfs_log_print.c + * since most of the bread routines live in kern/fs/xfs/xfs_log_recover only. + */ +int +xlog_print_find_oldest( + struct log *log, + xfs_daddr_t *last_blk) +{ + xfs_buf_t *bp; + xfs_daddr_t first_blk; + uint first_half_cycle, last_half_cycle; + int error; + + if (xlog_find_zeroed(log, &first_blk)) + return 0; + + first_blk = 0; /* read first block */ + bp = xlog_get_bp(1, log->l_mp); + xlog_bread(log, 0, 1, bp); + first_half_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT); + *last_blk = log->l_logBBsize-1; /* read last block */ + xlog_bread(log, *last_blk, 1, bp); + last_half_cycle = GET_CYCLE(XFS_BUF_PTR(bp), ARCH_CONVERT); + ASSERT(last_half_cycle != 0); + + if (first_half_cycle == last_half_cycle) { /* all cycle nos are same */ + *last_blk = 0; + } else { /* have 1st and last; look for middle cycle */ + error = xlog_find_cycle_start(log, bp, first_blk, + last_blk, last_half_cycle); + if (error) + return error; + } + + xlog_put_bp(bp); + return 0; +} /* xlog_print_find_oldest */ + + +void +xlog_recover_print_data( + xfs_caddr_t p, + int len) +{ + if (print_data) { + uint *dp = (uint *)p; + int nums = len >> 2; + int j = 0; + + while (j < nums) { + if ((j % 8) == 0) + printf("%2x ", j); + printf("%8x ", *dp); + dp++; + j++; + if ((j % 8) == 0) + printf("\n"); + } + printf("\n"); + } +} /* xlog_recover_print_data */ + + +STATIC void +xlog_recover_print_buffer( + xlog_recover_item_t *item) +{ + xfs_agi_t *agi; + xfs_agf_t *agf; + xfs_buf_log_format_v1_t *old_f; + xfs_buf_log_format_t *f; + xfs_caddr_t p; + int len, num, i; + xfs_daddr_t blkno; + xfs_disk_dquot_t *ddq; + + f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr; + old_f = (xfs_buf_log_format_v1_t *)f; + len = item->ri_buf[0].i_len; + printf(" "); + switch (f->blf_type) { + case XFS_LI_BUF: { + printf("BUF: "); + break; + } + case XFS_LI_6_1_BUF: { + printf("6.1 BUF: "); + break; + } + case XFS_LI_5_3_BUF: { + printf("5.3 BUF: "); + break; + } + } + if (f->blf_type == XFS_LI_BUF) { + printf("#regs:%d start blkno:0x%Lx len:%d bmap size:%d\n", + f->blf_size, f->blf_blkno, f->blf_len, f->blf_map_size); + blkno = (xfs_daddr_t)f->blf_blkno; + } else { + printf("#regs:%d start blkno:0x%x len:%d bmap size:%d\n", + old_f->blf_size, old_f->blf_blkno, old_f->blf_len, + old_f->blf_map_size); + blkno = (xfs_daddr_t)old_f->blf_blkno; + } + num = f->blf_size-1; + i = 1; + while (num-- > 0) { + p = item->ri_buf[i].i_addr; + len = item->ri_buf[i].i_len; + i++; + if (blkno == 0) { /* super block */ + printf(" SUPER Block Buffer:\n"); + if (!print_buffer) continue; + printf(" icount:%Ld ifree:%Ld ", + INT_GET(*(long long *)(p), ARCH_CONVERT), + INT_GET(*(long long *)(p+8), ARCH_CONVERT)); + printf("fdblks:%Ld frext:%Ld\n", + INT_GET(*(long long *)(p+16), ARCH_CONVERT), + INT_GET(*(long long *)(p+24), ARCH_CONVERT)); + printf(" sunit:%u swidth:%u\n", + INT_GET(*(uint *)(p+56), ARCH_CONVERT), + INT_GET(*(uint *)(p+60), ARCH_CONVERT)); + } else if (INT_GET(*(uint *)p, ARCH_CONVERT) == XFS_AGI_MAGIC) { + agi = (xfs_agi_t *)p; + printf(" AGI Buffer: (XAGI)\n"); + if (!print_buffer) continue; + printf(" ver:%d ", + INT_GET(agi->agi_versionnum, ARCH_CONVERT)); + printf("seq#:%d len:%d cnt:%d root:%d\n", + INT_GET(agi->agi_seqno, ARCH_CONVERT), + INT_GET(agi->agi_length, ARCH_CONVERT), + INT_GET(agi->agi_count, ARCH_CONVERT), + INT_GET(agi->agi_root, ARCH_CONVERT)); + printf(" level:%d free#:0x%x newino:0x%x\n", + INT_GET(agi->agi_level, ARCH_CONVERT), + INT_GET(agi->agi_freecount, ARCH_CONVERT), + INT_GET(agi->agi_newino, ARCH_CONVERT)); + } else if (INT_GET(*(uint *)p, ARCH_CONVERT) == XFS_AGF_MAGIC) { + agf = (xfs_agf_t *)p; + printf(" AGF Buffer: (XAGF)\n"); + if (!print_buffer) continue; + printf(" ver:%d seq#:%d len:%d \n", + INT_GET(agf->agf_versionnum, ARCH_CONVERT), + INT_GET(agf->agf_seqno, ARCH_CONVERT), + INT_GET(agf->agf_length, ARCH_CONVERT)); + printf(" root BNO:%d CNT:%d\n", + INT_GET(agf->agf_roots[XFS_BTNUM_BNOi], + ARCH_CONVERT), + INT_GET(agf->agf_roots[XFS_BTNUM_CNTi], + ARCH_CONVERT)); + printf(" level BNO:%d CNT:%d\n", + INT_GET(agf->agf_levels[XFS_BTNUM_BNOi], + ARCH_CONVERT), + INT_GET(agf->agf_levels[XFS_BTNUM_CNTi], + ARCH_CONVERT)); + printf(" 1st:%d last:%d cnt:%d " + "freeblks:%d longest:%d\n", + INT_GET(agf->agf_flfirst, ARCH_CONVERT), + INT_GET(agf->agf_fllast, ARCH_CONVERT), + INT_GET(agf->agf_flcount, ARCH_CONVERT), + INT_GET(agf->agf_freeblks, ARCH_CONVERT), + INT_GET(agf->agf_longest, ARCH_CONVERT)); + } else if (*(uint *)p == XFS_DQUOT_MAGIC) { + ddq = (xfs_disk_dquot_t *)p; + printf(" DQUOT Buffer:\n"); + if (!print_buffer) continue; + printf(" UIDs 0x%x-0x%x\n", + INT_GET(ddq->d_id, ARCH_CONVERT), + INT_GET(ddq->d_id, ARCH_CONVERT) + + (BBTOB(f->blf_len) / sizeof(xfs_dqblk_t)) - 1); + } else { + printf(" BUF DATA\n"); + if (!print_buffer) continue; + xlog_recover_print_data(p, len); + } + } +} /* xlog_recover_print_buffer */ + +STATIC void +xlog_recover_print_quotaoff( + xlog_recover_item_t *item) +{ + xfs_qoff_logformat_t *qoff_f; + char str[20]; + + qoff_f = (xfs_qoff_logformat_t *)item->ri_buf[0].i_addr; + ASSERT(qoff_f); + if (qoff_f->qf_flags & XFS_UQUOTA_ACCT) + strcpy(str, "USER QUOTA"); + if (qoff_f->qf_flags & XFS_PQUOTA_ACCT) + strcat(str, "PROJ QUOTA"); + printf("\tQUOTAOFF: #regs:%d type:%s\n", + qoff_f->qf_size, str); +} + + +STATIC void +xlog_recover_print_dquot( + xlog_recover_item_t *item) +{ + xfs_dq_logformat_t *f; + xfs_disk_dquot_t *d; + + f = (xfs_dq_logformat_t *)item->ri_buf[0].i_addr; + ASSERT(f); + ASSERT(f->qlf_len == 1); + d = (xfs_disk_dquot_t *)item->ri_buf[1].i_addr; + printf("\tDQUOT: #regs:%d blkno:%Ld boffset:%u id: %d\n", + f->qlf_size, f->qlf_blkno, f->qlf_boffset, f->qlf_id); + if (!print_quota) + return; + printf("\t\tmagic 0x%x\tversion 0x%x\tID 0x%x (%d)\t\n", + INT_GET(d->d_magic, ARCH_CONVERT), + INT_GET(d->d_version, ARCH_CONVERT), + INT_GET(d->d_id, ARCH_CONVERT), + INT_GET(d->d_id, ARCH_CONVERT)); + printf("\t\tblk_hard 0x%x\tblk_soft 0x%x\tino_hard 0x%x" + "\tino_soft 0x%x\n", + (int)INT_GET(d->d_blk_hardlimit, ARCH_CONVERT), + (int)INT_GET(d->d_blk_softlimit, ARCH_CONVERT), + (int)INT_GET(d->d_ino_hardlimit, ARCH_CONVERT), + (int)INT_GET(d->d_ino_softlimit, ARCH_CONVERT)); + printf("\t\tbcount 0x%x (%d) icount 0x%x (%d)\n", + (int)INT_GET(d->d_bcount, ARCH_CONVERT), + (int)INT_GET(d->d_bcount, ARCH_CONVERT), + (int)INT_GET(d->d_icount, ARCH_CONVERT), + (int)INT_GET(d->d_icount, ARCH_CONVERT)); + printf("\t\tbtimer 0x%x itimer 0x%x \n", + (int)INT_GET(d->d_btimer, ARCH_CONVERT), + (int)INT_GET(d->d_itimer, ARCH_CONVERT)); +} + +STATIC void +xlog_recover_print_inode_core( + xfs_dinode_core_t *di) +{ + printf(" CORE inode:\n"); + if (!print_inode) + return; + printf(" magic:%c%c mode:0x%x ver:%d format:%d " + "onlink:%d\n", + (di->di_magic>>8) & 0xff, di->di_magic & 0xff, + di->di_mode, di->di_version, di->di_format, di->di_onlink); + printf(" uid:%d gid:%d nlink:%d projid:%d\n", + di->di_uid, di->di_gid, di->di_nlink, (uint)di->di_projid); + printf(" atime:%d mtime:%d ctime:%d\n", + di->di_atime.t_sec, di->di_mtime.t_sec, di->di_ctime.t_sec); + printf(" size:0x%Lx nblks:0x%Lx exsize:%d nextents:%d" + " anextents:%d\n", + di->di_size, di->di_nblocks, di->di_extsize, di->di_nextents, + (int)di->di_anextents); + printf(" forkoff:%d dmevmask:0x%x dmstate:%d flags:0x%x " + "gen:%d\n", + (int)di->di_forkoff, di->di_dmevmask, (int)di->di_dmstate, + (int)di->di_flags, di->di_gen); +} /* xlog_recover_print_inode_core */ + + +STATIC void +xlog_recover_print_inode( + xlog_recover_item_t *item) +{ + xfs_inode_log_format_t *f; + int attr_index; + int hasdata; + int hasattr; + + f = (xfs_inode_log_format_t *)item->ri_buf[0].i_addr; + ASSERT(item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)); + printf(" INODE: #regs:%d ino:0x%Lx flags:0x%x dsize:%d\n", + f->ilf_size, f->ilf_ino, f->ilf_fields, f->ilf_dsize); + + /* core inode comes 2nd */ + ASSERT(item->ri_buf[1].i_len == sizeof(xfs_dinode_core_t)); + xlog_recover_print_inode_core((xfs_dinode_core_t *) + item->ri_buf[1].i_addr); + + hasdata = (f->ilf_fields & XFS_ILOG_DFORK) != 0; + hasattr = (f->ilf_fields & XFS_ILOG_AFORK) != 0; + /* does anything come next */ + switch (f->ilf_fields & (XFS_ILOG_DFORK | XFS_ILOG_DEV | XFS_ILOG_UUID)) { + case XFS_ILOG_DEXT: { + ASSERT(f->ilf_size == 3 + hasattr); + printf(" DATA FORK EXTENTS inode data:\n"); + if (print_inode && print_data) { + xlog_recover_print_data(item->ri_buf[2].i_addr, + item->ri_buf[2].i_len); + } + break; + } + case XFS_ILOG_DBROOT: { + ASSERT(f->ilf_size == 3 + hasattr); + printf(" DATA FORK BTREE inode data:\n"); + if (print_inode && print_data) { + xlog_recover_print_data(item->ri_buf[2].i_addr, + item->ri_buf[2].i_len); + } + break; + } + case XFS_ILOG_DDATA: { + ASSERT(f->ilf_size == 3 + hasattr); + printf(" DATA FORK LOCAL inode data:\n"); + if (print_inode && print_data) { + xlog_recover_print_data(item->ri_buf[2].i_addr, + item->ri_buf[2].i_len); + } + break; + } + case XFS_ILOG_DEV: { + ASSERT(f->ilf_size == 2 + hasattr); + printf(" DEV inode: no extra region\n"); + break; + } + case XFS_ILOG_UUID: { + ASSERT(f->ilf_size == 2 + hasattr); + printf(" UUID inode: no extra region\n"); + break; + } + + + case 0: { + ASSERT(f->ilf_size == 2 + hasattr); + break; + } + default: { + xlog_panic("xlog_print_trans_inode: illegal inode type"); + } + } + + if (hasattr) { + attr_index = 2 + hasdata; + switch (f->ilf_fields & XFS_ILOG_AFORK) { + case XFS_ILOG_AEXT: { + ASSERT(f->ilf_size == 3 + hasdata); + printf(" ATTR FORK EXTENTS inode data:\n"); + if (print_inode && print_data) { + xlog_recover_print_data( + item->ri_buf[attr_index].i_addr, + item->ri_buf[attr_index].i_len); + } + break; + } + case XFS_ILOG_ABROOT: { + ASSERT(f->ilf_size == 3 + hasdata); + printf(" ATTR FORK BTREE inode data:\n"); + if (print_inode && print_data) { + xlog_recover_print_data( + item->ri_buf[attr_index].i_addr, + item->ri_buf[attr_index].i_len); + } + break; + } + case XFS_ILOG_ADATA: { + ASSERT(f->ilf_size == 3 + hasdata); + printf(" ATTR FORK LOCAL inode data:\n"); + if (print_inode && print_data) { + xlog_recover_print_data( + item->ri_buf[attr_index].i_addr, + item->ri_buf[attr_index].i_len); + } + break; + } + default: { + xlog_panic("xlog_print_trans_inode: " + "illegal inode log flag"); + } + } + } + +} /* xlog_recover_print_inode */ + + +STATIC void +xlog_recover_print_efd( + xlog_recover_item_t *item) +{ + xfs_efd_log_format_t *f; + xfs_extent_t *ex; + int i; + + f = (xfs_efd_log_format_t *)item->ri_buf[0].i_addr; + /* + * An xfs_efd_log_format structure contains a variable length array + * as the last field. Each element is of size xfs_extent_t. + */ + ASSERT(item->ri_buf[0].i_len == + sizeof(xfs_efd_log_format_t) + sizeof(xfs_extent_t) * + (f->efd_nextents-1)); + printf(" EFD: #regs: %d num_extents: %d id: 0x%Lx\n", + f->efd_size, f->efd_nextents, f->efd_efi_id); + ex = f->efd_extents; + printf(" "); + for (i=0; i < f->efd_size; i++) { + printf("(s: 0x%Lx, l: %d) ", ex->ext_start, ex->ext_len); + if (i % 4 == 3) + printf("\n"); + ex++; + } + if (i % 4 != 0) printf("\n"); + return; +} /* xlog_recover_print_efd */ + + +STATIC void +xlog_recover_print_efi( + xlog_recover_item_t *item) +{ + xfs_efi_log_format_t *f; + xfs_extent_t *ex; + int i; + + f = (xfs_efi_log_format_t *)item->ri_buf[0].i_addr; + /* + * An xfs_efi_log_format structure contains a variable length array + * as the last field. Each element is of size xfs_extent_t. + */ + ASSERT(item->ri_buf[0].i_len == + sizeof(xfs_efi_log_format_t) + sizeof(xfs_extent_t) * + (f->efi_nextents-1)); + + printf(" EFI: #regs:%d num_extents:%d id:0x%Lx\n", + f->efi_size, f->efi_nextents, f->efi_id); + ex = f->efi_extents; + printf(" "); + for (i=0; i< f->efi_nextents; i++) { + printf("(s: 0x%Lx, l: %d) ", ex->ext_start, ex->ext_len); + if (i % 4 == 3) printf("\n"); + ex++; + } + if (i % 4 != 0) printf("\n"); + return; +} /* xlog_recover_print_efi */ + +void +xlog_recover_print_logitem( + xlog_recover_item_t *item) +{ + switch (ITEM_TYPE(item)) { + case XFS_LI_BUF: + case XFS_LI_6_1_BUF: + case XFS_LI_5_3_BUF: { + xlog_recover_print_buffer(item); + break; + } + case XFS_LI_INODE: + case XFS_LI_6_1_INODE: + case XFS_LI_5_3_INODE: { + xlog_recover_print_inode(item); + break; + } + case XFS_LI_EFD: { + xlog_recover_print_efd(item); + break; + } + case XFS_LI_EFI: { + xlog_recover_print_efi(item); + break; + } + case XFS_LI_DQUOT: { + xlog_recover_print_dquot(item); + break; + } + case XFS_LI_QUOTAOFF: { + xlog_recover_print_quotaoff(item); + break; + } + default: { + printf("xlog_recover_print_logitem: illegal type\n"); + break; + } + } +} /* xlog_recover_print_logitem */ + +void +xlog_recover_print_item(xlog_recover_item_t *item) +{ + int i; + + switch (ITEM_TYPE(item)) { + case XFS_LI_BUF: { + printf("BUF"); + break; + } + case XFS_LI_INODE: { + printf("INO"); + break; + } + case XFS_LI_EFD: { + printf("EFD"); + break; + } + case XFS_LI_EFI: { + printf("EFI"); + break; + } + case XFS_LI_6_1_BUF: { + printf("6.1 BUF"); + break; + } + case XFS_LI_5_3_BUF: { + printf("5.3 BUF"); + break; + } + case XFS_LI_6_1_INODE: { + printf("6.1 INO"); + break; + } + case XFS_LI_5_3_INODE: { + printf("5.3 INO"); + break; + } + case XFS_LI_DQUOT: { + printf("DQ "); + break; + } + case XFS_LI_QUOTAOFF: { + printf("QOFF"); + break; + } + default: { + cmn_err(CE_PANIC, "xlog_recover_print_item: illegal type"); + break; + } + } + +/* type isn't filled in yet + printf("ITEM: type: %d cnt: %d total: %d ", + item->ri_type, item->ri_cnt, item->ri_total); +*/ + printf(": cnt:%d total:%d ", item->ri_cnt, item->ri_total); + for (i=0; iri_cnt; i++) { + printf("a:%p len:%d ", + item->ri_buf[i].i_addr, item->ri_buf[i].i_len); + } + printf("\n"); + xlog_recover_print_logitem(item); +} /* xlog_recover_print_item */ + +void +xlog_recover_print_trans(xlog_recover_t *trans, + xlog_recover_item_t *itemq, + int print) +{ + xlog_recover_item_t *first_item, *item; + + if (print < 3) + return; + + print_xlog_record_line(); + xlog_recover_print_trans_head(trans); + item = first_item = itemq; + do { + xlog_recover_print_item(item); + item = item->ri_next; + } while (first_item != item); +} /* xlog_recover_print_trans */ diff --git a/logprint/log_print_trans.c b/logprint/log_print_trans.c new file mode 100644 index 000000000..9b830468a --- /dev/null +++ b/logprint/log_print_trans.c @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include "logprint.h" + +void +xlog_recover_print_trans_head( + xlog_recover_t *tr) +{ + printf("TRANS: tid:0x%x type:%s #items:%d trans:0x%x q:%p\n", + tr->r_log_tid, trans_type[tr->r_theader.th_type], + tr->r_theader.th_num_items, + tr->r_theader.th_tid, tr->r_itemq); +} /* xlog_recover_print_trans_head */ + +int +xlog_recover_do_trans(xlog_t *log, + xlog_recover_t *trans, + int pass) +{ + xlog_recover_print_trans(trans, trans->r_itemq, 3); + return 0; +} /* xlog_recover_do_trans */ + +static int print_record_header=0; + +void +xfs_log_print_trans(xlog_t *log, + int print_block_start) +{ + xfs_daddr_t head_blk, tail_blk; + + if (xlog_find_tail(log, &head_blk, &tail_blk, 0)) + exit(1); + + printf(" log tail: %lld head: %lld state: %s\n", + (__int64_t)tail_blk, + (__int64_t)head_blk, + (tail_blk == head_blk)?"":""); + + if (print_block_start != -1) { + printf(" override tail: %lld\n", + (__int64_t)print_block_start); + tail_blk = print_block_start; + } + printf("\n"); + + print_record_header=1; + if (xlog_do_recovery_pass(log, head_blk, tail_blk, XLOG_RECOVER_PASS1)) + exit(1); + +} /* xfs_log_print_trans */ + +static int +header_check_uuid(xfs_mount_t *mp, xlog_rec_header_t *head) +{ + char uu_log[64], uu_sb[64]; + + if (!uuid_compare(mp->m_sb.sb_uuid, head->h_fs_uuid)) return 0; + + uuid_unparse(mp->m_sb.sb_uuid, uu_sb); + uuid_unparse(head->h_fs_uuid, uu_log); + + printf("* ERROR: mismatched uuid in log\n" + "* SB : %s\n* log: %s\n", + uu_sb, uu_log); + + return 1; +} + +int +xlog_header_check_recover(xfs_mount_t *mp, xlog_rec_header_t *head) +{ + if (print_record_header) + printf("\nLOG REC AT LSN cycle %d block %d (0x%x, 0x%x)\n", + CYCLE_LSN(head->h_lsn, ARCH_CONVERT), + BLOCK_LSN(head->h_lsn, ARCH_CONVERT), + CYCLE_LSN(head->h_lsn, ARCH_CONVERT), + BLOCK_LSN(head->h_lsn, ARCH_CONVERT)); + + if (INT_GET(head->h_magicno, ARCH_CONVERT) != XLOG_HEADER_MAGIC_NUM) { + + printf("* ERROR: bad magic number in log header: 0x%x\n", + INT_GET(head->h_magicno, ARCH_CONVERT)); + + } else if (header_check_uuid(mp, head)) { + + /* failed - fall through */ + + } else if (INT_GET(head->h_fmt, ARCH_CONVERT) != XLOG_FMT) { + + printf("* ERROR: log format incompatible (log=%d, ours=%d)\n", + INT_GET(head->h_fmt, ARCH_CONVERT), XLOG_FMT); + + } else { + /* everything is ok */ + return 0; + } + + /* bail out now or just carry on regardless */ + if (print_exit) + xlog_exit("Bad log"); + + return 0; +} + +int +xlog_header_check_mount(xfs_mount_t *mp, xlog_rec_header_t *head) +{ + if (uuid_is_null(head->h_fs_uuid)) return 0; + if (header_check_uuid(mp, head)) { + /* bail out now or just carry on regardless */ + if (print_exit) + xlog_exit("Bad log"); + } + return 0; +} diff --git a/logprint/logprint.c b/logprint/logprint.c new file mode 100644 index 000000000..16a652d56 --- /dev/null +++ b/logprint/logprint.c @@ -0,0 +1,247 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include "logprint.h" +#include +#include + +int print_data; +int print_only_data; +int print_inode; +int print_quota; +int print_buffer; +int print_transactions; +int print_overwrite; +int print_no_data; +int print_no_print; +int print_exit = 1; /* -e is now default. specify -c to override */ + +libxfs_init_t x; +xfs_mount_t mp; + +void +usage(void) +{ + fprintf(stderr, "Usage: %s [options...] \n\n\ +Options:\n\ + -c try to continue if error found in log\n\ + -l filename of external log\n\ + -n don't try and interpret log data\n\ + -o print buffer data in hex\n\ + -s block # to start printing\n\ + -v print \"overwrite\" data\n\ + -t print out transactional view\n\ + -b in transactional view, extract buffer info\n\ + -i in transactional view, extract inode info\n\ + -q in transactional view, extract quota info\n\ + -D print only data; no decoding\n\ + -V print version information\n", + progname); + exit(1); +} + +int +logstat(libxfs_init_t *x) +{ + int fd; + char buf[BBSIZE]; + xfs_sb_t *sb; + + /* On Linux we always read the superblock of the + * filesystem. We need this to get the length of the + * log. Otherwise we end up seeking forever. -- mkp + */ + if ((fd = open(x->dname, O_RDONLY)) == -1) { + fprintf(stderr, " Can't open device %s: %s\n", + x->dname, strerror(errno)); + exit(1); + } + lseek64(fd, 0, SEEK_SET); + if (read(fd, buf, sizeof(buf)) != sizeof(buf)) { + fprintf(stderr, " read of XFS superblock failed\n"); + exit(1); + } + close (fd); + + /* + * Conjure up a mount structure + */ + libxfs_xlate_sb(buf, &(mp.m_sb), 1, ARCH_CONVERT, XFS_SB_ALL_BITS); + sb = &(mp.m_sb); + mp.m_blkbb_log = sb->sb_blocklog - BBSHIFT; + + x->logBBsize = XFS_FSB_TO_BB(&mp, sb->sb_logblocks); + x->logBBstart = XFS_FSB_TO_DADDR(&mp, sb->sb_logstart); + + if (!x->logname && sb->sb_logstart == 0) { + fprintf(stderr, " external log device not specified\n\n"); + usage(); + /*NOTREACHED*/ + } + + if (x->logname && *x->logname) { /* External log */ + if ((fd = open(x->logname, O_RDONLY)) == -1) { + fprintf(stderr, "Can't open file %s: %s\n", + x->logname, strerror(errno)); + exit(1); + } + close(fd); + } else { /* Internal log */ + x->logdev = x->ddev; + } + + return 0; +} + +int +main(int argc, char **argv) +{ + int print_start = -1; + int c; + int logfd; + xlog_t log = {0}; + + progname = basename(argv[0]); + while ((c = getopt(argc, argv, "bel:iqnors:tDVvc")) != EOF) { + switch (c) { + case 'D': { + print_only_data++; + print_data++; + break; + } + case 'b': { + print_buffer++; + break; + } + case 'l': { + x.logname = optarg; + x.lisfile = 1; + break; + } + case 'c': { + /* default is to stop on error. + * -c turns this off. + */ + print_exit=0; + break; + } + case 'e': { + /* -e is now default + */ + print_exit++; + break; + } + case 'i': { + print_inode++; + break; + } + case 'q': { + print_quota++; + break; + } + case 'n': { + print_no_data++; + break; + } + case 'o': { + print_data++; + break; + } + case 's': { + print_start = atoi(optarg); + break; + } + case 't': { + print_transactions++; + break; + } + case 'V': { + printf("%s version %s\n", progname, VERSION); + break; + } + case 'v': { + print_overwrite++; + break; + } + case '?': { + usage(); + } + } + } + + if (argc - optind != 1) + usage(); + + x.dname = argv[optind]; + + if (x.dname == NULL) + usage(); + + x.notvolok = 1; + x.isreadonly = LIBXFS_ISINACTIVE; + x.notvolmsg = "You should never see this message.\n"; + + printf("xfs_logprint:\n"); + if (!libxfs_init(&x)) + exit(1); + + logstat(&x); + + logfd=(x.logfd<0)?(x.dfd):(x.logfd); + + printf(" data device: 0x%Lx\n", x.ddev); + + if (x.logname) { + printf(" log file: \"%s\" ", x.logname); + } else { + printf(" log device: 0x%Lx ", x.logdev); + } + + printf("daddr: %Ld length: %Ld\n\n", + (__int64_t)x.logBBstart, (__int64_t)x.logBBsize); + + ASSERT(x.logBBstart <= INT_MAX); + + /* init log structure */ + log.l_dev = x.logdev; + log.l_logsize = BBTOB(x.logBBsize); + log.l_logBBstart = x.logBBstart; + log.l_logBBsize = x.logBBsize; + log.l_mp = ∓ + + if (print_transactions) + xfs_log_print_trans(&log, print_start); + else + xfs_log_print(&log, logfd, print_start); + + exit(0); +} diff --git a/logprint/logprint.h b/logprint/logprint.h new file mode 100644 index 000000000..17eb3ba46 --- /dev/null +++ b/logprint/logprint.h @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef XFS_LOGPRINT_H +#define XFS_LOGPRINT_H + +#include +#include +#include + +/* + * define the userlevel xlog_t to be the subset of the kernel's + * xlog_t that we actually need to get our work done, avoiding + * the need to define any exotic kernel types in userland. + */ +typedef struct log { + xfs_lsn_t l_tail_lsn; /* lsn of 1st LR w/ unflush buffers */ + xfs_lsn_t l_last_sync_lsn;/* lsn of last LR on disk */ + xfs_mount_t *l_mp; /* mount point */ + dev_t l_dev; /* dev_t of log */ + xfs_daddr_t l_logBBstart; /* start block of log */ + int l_logsize; /* size of log in bytes */ + int l_logBBsize; /* size of log in 512 byte chunks */ + int l_roundoff; /* round off error of all iclogs */ + int l_curr_cycle; /* Cycle number of log writes */ + int l_prev_cycle; /* Cycle # b4 last block increment */ + int l_curr_block; /* current logical block of log */ + int l_prev_block; /* previous logical block of log */ + int l_iclog_size; /* size of log in bytes */ + int l_iclog_size_log;/* log power size of log */ + int l_iclog_bufs; /* number of iclog buffers */ + int l_grant_reserve_cycle; /* */ + int l_grant_reserve_bytes; /* */ + int l_grant_write_cycle; /* */ + int l_grant_write_bytes; /* */ +} xlog_t; + +#include +#include +#include +#include +#include + + +/* + * macros mapping kernel code to user code + */ +#define STATIC static +#define EFSCORRUPTED EIO +#define XFS_ERROR(e) (e) + +#define xlog_warn(fmt,args...) \ + ( fprintf(stderr,fmt,## args), fputc('\n', stderr) ) +#define cmn_err(sev,fmt,args...) \ + xlog_warn(fmt,## args) +#define xlog_exit(fmt,args...) \ + ( xlog_warn(fmt,## args), exit(1) ) +#define xlog_panic(fmt,args...) \ + xlog_exit(fmt,## args) + +#define xlog_get_bp(nbblks, mp) libxfs_getbuf(x.logdev, 0, (nbblks)) +#define xlog_put_bp(bp) libxfs_putbuf(bp) +#define xlog_bread(log,blkno,nbblks,bp) \ + (libxfs_readbufr(x.logdev, \ + (log)->l_logBBstart+(blkno), bp, (nbblks), 1), 0) + +#define kmem_zalloc(size, foo) calloc(size,1) +#define kmem_free(ptr, foo) free(ptr) +#define kmem_realloc(ptr, len, old, foo) realloc(ptr, len) + +/* command line flags */ +extern int print_data; +extern int print_only_data; +extern int print_inode; +extern int print_quota; +extern int print_buffer; +extern int print_transactions; +extern int print_overwrite; + +extern int print_exit; +extern int print_no_data; +extern int print_no_print; + +/* exports */ + +extern char *trans_type[]; + +/* libxfs parameters */ +extern libxfs_init_t x; + +extern void xfs_log_print_trans(xlog_t *log, + int print_block_start); + +extern void xfs_log_print( xlog_t *log, + int fd, + int print_block_start); + +extern int xlog_find_zeroed(xlog_t *log, xfs_daddr_t *blk_no); +extern int xlog_find_cycle_start(xlog_t *log, xfs_buf_t *bp, + xfs_daddr_t first_blk, xfs_daddr_t *last_blk, uint cycle); +extern int xlog_find_tail(xlog_t *log, xfs_daddr_t *head_blk, + xfs_daddr_t *tail_blk, int readonly); + +extern int xlog_test_footer(xlog_t *log); +extern int xlog_recover(xlog_t *log, int readonly); +extern void xlog_recover_print_data(xfs_caddr_t p, int len); +extern void xlog_recover_print_logitem(xlog_recover_item_t *item); +extern void xlog_recover_print_trans_head(xlog_recover_t *tr); +extern int xlog_print_find_oldest(xlog_t *log, xfs_daddr_t *last_blk); + +extern void print_xlog_op_line(void); +extern void print_xlog_record_line(void); +extern void print_stars(void); + +/* for transactional view */ +extern void xlog_recover_print_trans_head(xlog_recover_t *tr); + +extern void xlog_recover_print_trans( xlog_recover_t *trans, + xlog_recover_item_t *itemq, + int print); + +extern int xlog_do_recovery_pass( xlog_t *log, + xfs_daddr_t head_blk, + xfs_daddr_t tail_blk, + int pass); +extern int xlog_recover_do_trans( xlog_t *log, + xlog_recover_t *trans, + int pass); +extern int xlog_header_check_recover( xfs_mount_t *mp, + xlog_rec_header_t *head); +extern int xlog_header_check_mount( xfs_mount_t *mp, + xlog_rec_header_t *head); + +#endif /* XFS_LOGPRINT_H */ diff --git a/man/Makefile b/man/Makefile new file mode 100644 index 000000000..139d5ae85 --- /dev/null +++ b/man/Makefile @@ -0,0 +1,41 @@ +# +# Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write the Free Software Foundation, Inc., 59 +# Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, +# Mountain View, CA 94043, or: +# +# http://www.sgi.com +# +# For further information regarding this notice, see: +# +# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ +# + +TOPDIR = .. +include $(TOPDIR)/include/builddefs + +SUBDIRS = man1 man2 man3 man5 man8 + +default install : $(SUBDIRS) + $(SUBDIRS_MAKERULE) + +include $(BUILDRULES) diff --git a/man/man5/Makefile b/man/man5/Makefile new file mode 100644 index 000000000..8602606f0 --- /dev/null +++ b/man/man5/Makefile @@ -0,0 +1,49 @@ +# +# Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write the Free Software Foundation, Inc., 59 +# Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, +# Mountain View, CA 94043, or: +# +# http://www.sgi.com +# +# For further information regarding this notice, see: +# +# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ +# + +TOPDIR = ../.. +include $(TOPDIR)/include/builddefs + +MAN_SECTION = 5 + +MAN_PAGES = $(shell echo *.$(MAN_SECTION)) +MAN_DEST = $(XFS_CMDS_MAN_DIR)/man$(MAN_SECTION) +LSRCFILES = $(MAN_PAGES) + + +default : $(MAN_PAGES) + +include $(BUILDRULES) + +install : default + $(INSTALL) -m 755 -d $(MAN_DEST) + $(INSTALL_MAN) diff --git a/man/man5/xfs.5 b/man/man5/xfs.5 new file mode 100644 index 000000000..a358f4aaf --- /dev/null +++ b/man/man5/xfs.5 @@ -0,0 +1,114 @@ +.TH xfs 5 +.SH NAME +xfs \- layout of the XFS filesystem +.SH DESCRIPTION +An XFS filesystem can reside on a regular disk partition or on a +logical volume (see +.IR lvm (8)). +An XFS filesystem has up to three parts: +a data section, a log section, and a real-time section. +For disk partition filesystems, +the real-time section is absent, and +the log area is contained within the data section. +For logical volume filesystems, +the real-time section is optional, +and the log section can be separate from the data section +or contained within it. +The filesystem sections are divided into a certain number of +.IR blocks , +whose size is specified at +.IR mkfs (8) +time with the +.B \-b +option. +.PP +The data section contains all the filesystem metadata +(inodes, directories, indirect blocks) +as well as the user file data for ordinary (non-real-time) files +and the log area if the log is +.I internal +to the data section. +The data section is divided into a number of +\f2allocation groups\f1. +The number and size of the allocation groups are chosen by +.I mkfs +so that there is normally a small number of equal-sized groups. +The number of allocation groups controls the amount of parallelism +available in file and block allocation. +It should be increased from +the default if there is sufficient memory and a lot of allocation +activity. +The number of allocation groups should not be set very high, +since this can cause large amounts of CPU time to be used by +the filesystem, especially when the filesystem is nearly full. +More allocation groups are added (of the original size) when +.IR xfs_growfs (8) +is run. +.PP +The log section (or area, if it is internal to the data section) +is used to store changes to filesystem metadata while the +filesystem is running until those changes are made to the data +section. +It is written sequentially during normal operation and read only +during mount. +When mounting a filesystem after a crash, the log +is read to complete operations that were +in progress at the time of the crash. +.PP +The real-time section is used to store the data of real-time files. +These files had an attribute bit set through +.IR fcntl (2) +after file creation, before any data was written to the file. +The real-time section is divided into a number of +.I extents +of fixed size (specified at +.I mkfs +time). +Each file in the real-time section has an extent size that +is a multiple of the real-time section extent size. +.PP +Each allocation group contains several data structures. +The first sector contains the superblock. +For allocation groups after the first, +the superblock is just a copy and is not updated after +.IR mkfs . +The next three sectors contain information for block and inode +allocation within the allocation group. +Also contained within each allocation group are data structures +to locate free blocks and inodes; +these are located through the header structures. +.PP +Each XFS filesystem is labeled with a unique +universal identifier (UUID). +The UUID is stored in every allocation group header and +is used to help distinguish one XFS filesystem from another, +therefore you should avoid using +.I dd +or other block-by-block copying programs to copy XFS filesystems. +If two XFS filesystems on the same machine have the UUID, +.I xfsdump +may become confused when doing incremental and resumed dumps. +(See +.IR xfsdump (8) +for more details.) +.I xfs_copy +or +.IR xfsdump / xfsrestore +are recommended for making copies of XFS filesystems. +.PP +All these data structures are subject to change, and the +headers that specify their layout on disk are not provided. +.SH SEE ALSO +fs(5), +mkfs.xfs(8), +xfs_bmap(8), +xfs_check(8), +xfs_copy(8), +xfs_estimate(8), +xfs_growfs(8), +xfs_logprint(8), +xfs_repair(8), +xfsdump(8), +xfsrestore(8), +fcntl(2), +lvm(8). diff --git a/man/man8/Makefile b/man/man8/Makefile new file mode 100644 index 000000000..9ccd9c446 --- /dev/null +++ b/man/man8/Makefile @@ -0,0 +1,49 @@ +#! gmake +# +# Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write the Free Software Foundation, Inc., 59 +# Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, +# Mountain View, CA 94043, or: +# +# http://www.sgi.com +# +# For further information regarding this notice, see: +# +# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ +# + +TOPDIR = ../.. +include $(TOPDIR)/include/builddefs + +MAN_SECTION = 8 + +MAN_PAGES = $(shell echo *.$(MAN_SECTION)) +MAN_DEST = $(XFS_CMDS_MAN_DIR)/man$(MAN_SECTION) +LSRCFILES = $(MAN_PAGES) + +default : $(MAN_PAGES) + +include $(BUILDRULES) + +install : default + $(INSTALL) -m 755 -d $(MAN_DEST) + $(INSTALL_MAN) diff --git a/man/man8/fsck.xfs.8 b/man/man8/fsck.xfs.8 new file mode 100644 index 000000000..128691bee --- /dev/null +++ b/man/man8/fsck.xfs.8 @@ -0,0 +1,23 @@ +.TH fsck.xfs 8 +.SH NAME +fsck.xfs \- do nothing, successfully +.SH SYNOPSIS +.nf +\f3fsck.xfs\f1 [ \f3...\f1] +.fi +.SH DESCRIPTION +.I fsck.xfs +is called by the generic Linux +.IR fsck (8) +program at startup to check and repair an XFS filesystem. +XFS is a journalled filesystem and performs recovery at +.IR mount (8) +time if necessary, so +.I fsck.xfs +simply exits with a zero exit status. +.SH FILES +.IR /etc/fstab . +.SH SEE ALSO +fsck(8), +fstab(5), +xfs(5). diff --git a/man/man8/mkfs.xfs.8 b/man/man8/mkfs.xfs.8 new file mode 100644 index 000000000..7366bcf6d --- /dev/null +++ b/man/man8/mkfs.xfs.8 @@ -0,0 +1,485 @@ +.TH mkfs.xfs 8 +.SH NAME +mkfs.xfs \- construct an XFS filesystem +.SH SYNOPSIS +.nf +\f3mkfs.xfs\f1 [ \f3\-b\f1 subopt=value ] \c +[ \f3\-d\f1 subopt[=value] ] [ \f3\-i\f1 subopt=value ] + [ \f3\-l\f1 subopt[=value] ] \c +[ \f3\-n\f1 subopt[=value] ] \c +[ \f3\-p\f1 protofile ] [ \f3\-q\f1 ] + [ \f3\-r\f1 subopt[=value] ] [ \f3\-C\f1 ] device +.fi +.SH DESCRIPTION +.I mkfs.xfs +constructs an XFS filesystem by writing on a special +file using the values found in the arguments of the command line. +It is invoked automatically by \f2mkfs\f1(8) when \f2mkfs\f1 is +given the \f3\-t xfs\f1 option. +.PP +In its simplest (and most commonly used form), the size of the +filesystem is determined from the disk driver. As an example, to make +a filesystem with an internal log on the first partition on the first +SCSI disk, use: +.PP +.nf + mkfs.xfs /dev/sda1 +.fi +.PP +The metadata log can be placed on another device to reduce the number +of disk seeks. To create a filesystem on the first partition on the +first SCSI disk with a 10000 block log located on the first partition +on the second SCSI disk, use: +.PP +.nf + mkfs.xfs -l logdev=/dev/sdb1,size=10000b /dev/sda1 +.fi +.PP +Each of the +.I subopt=value +elements in the argument list above can be given as multiple comma-separated +.I subopt=value +suboptions if multiple suboptions apply to the same option. +Equivalently, each main option can be given multiple times with +different suboptions. +For example, +.B \-l internal,size=10000b +and +.B \-l internal \-l size=10000b +are equivalent. +.PP +In the descriptions below, sizes are given in bytes, blocks, kilobytes, +or megabytes. +Sizes are treated as hexadecimal if prefixed by 0x or 0X, +octal if prefixed by 0, or decimal otherwise. +If suffixed with \f3b\f1 then the size is converted by multiplying it +by the filesystem's block size. +If suffixed with \f3k\f1 then the size is converted by multiplying it by 1024. +If suffixed with \f3m\f1 then the size is converted by multiplying it by +If suffixed with \f3g\f1 then the size is converted by multiplying it by +1073741824 (1024 * 1024 * 1024). +.TP +.B \-b +Block size options. +.IP +This option specifies the fundamental block size of the filesystem. +The valid suboptions are: +.BI log= value +and +\f3size=\f1\f2value\f1; +only one can be supplied. +The block size is specified either as a base two logarithm value with +.BR log= , +or in bytes with +.BR size= . +The default value is 4096 bytes (4 KB). The minimum value for block +size is 512; the maximum is 65536 (64 KB). XFS on Linux currently +only supports 4KB blocks. +.TP +.B \-d +Data section options. +.IP +These options specify the location, size, and other parameters of the +data section of the filesystem. +The valid suboptions are: +\f3agcount=\f1\f2value\f1, +\f3file\f1[\f3=\f1\f2value\f1], +\f3name=\f1\f2value\f1, +\f3size=\f1\f2value\f1, +\f3sunit=\f1\f2value\f1, +\f3swidth=\f1\f2value\f1, +and +\f3unwritten\f1[\f3=\f1\f2value\f1]. +.IP +The +.B agcount +suboption is used to specify the number of allocation groups. +The data section of the filesystem is divided into allocation groups +to improve the performance of XFS. +More allocation groups imply that more parallelism can be achieved +when allocating blocks and inodes. +The minimum allocation group size is 16 MB; +the maximum size is just under 4 GB. +The data section of the filesystem is divided into +.I agcount +allocation groups (default value 8, unless the filesystem is smaller +than 128 MB or larger than 8 GB). +Setting +.I agcount +to a very large number should be avoided, since this causes an unreasonable +amount of CPU time to be used when the filesystem is close to full. +.IP +The +.B name +suboption can be used to specify the name of the special file containing +the filesystem. +In this case, the log section must be specified as +.B internal +(with a size, see the +.B \-l +option below) and there can be no real-time section. +Note that the default log in this case is an internal log with +at least 1000 blocks, actual size depending on the filesystem block +size and the directory block size. +.IP +The +.B file +suboption is used to specify that the file given by the +.B name +suboption is a regular file. +The suboption value is either 0 or 1, +with 1 signifying that the file is regular. +This suboption is used only to make a filesystem image +(for instance, a miniroot image). +If the value is omitted then 1 is assumed. +.IP +The +.B size +suboption is used to specify the size of the data section. +This suboption is required if +.B \-d file[=1] +is given. +Otherwise, it is only needed if the filesystem should occupy +less space than the size of the special file. +.IP +The +.B sunit +suboption is used to specify the stripe unit for a RAID device or a +logical volume. The suboption value has to be specified in 512-byte +block units. This suboption ensures that data allocations will be +stripe unit aligned when the current end of file is being extended and +the file size is larger than 512KB. Also inode allocations and the +internal log will be stripe unit aligned. +.IP +The +.B swidth +suboption is used to specify the stripe width for a RAID device or a +striped logical volume. +The suboption value has to be specified in 512-byte block units. +This suboption is required if +.B \-d sunit +has been specified and it has to be a multiple of the +.B \-d sunit +suboption. +The stripe width will be the preferred iosize returned in the +.IR stat (2) +system call. +.IP +The +.B unwritten +suboption is used to specify whether unwritten extents are flagged as such, +or not. +The suboption value is either 0 or 1, with 1 signifying that unwritten +extent flagging should occur. +If the suboption is omitted, unwritten extent flagging is enabled. +If unwritten extents are flagged, filesystem write performance +will be negatively affected for preallocated file extents, since +extra filesystem transactions are required to convert extent flags +for the range of the file written. +This suboption should be disabled if the filesystem +needs to be used on operating system versions which do not support the +flagging capability. +.TP +.B \-i +Inode options. +.IP +This option specifies the inode size of the filesystem, and other +inode allocation parameters. +The XFS inode contains a fixed-size part and a variable-size part. +The variable-size part, whose size is affected by this option, can contain: +directory data, for small directories; +attribute data, for small attribute sets; +symbolic link data, for small symbolic links; +the extent list for the file, for files with a small number of extents; +and the root of a tree describing the location of extents for the file, +for files with a large number of extents. +.IP +The valid suboptions for specifying inode size are: +\f3log=\f1\f2value\f1, +\f3perblock=\f1\f2value\f1, +and +\f3size=\f1\f2value\f1; +only one can be supplied. +The inode size is specified either as a base two logarithm value with +.BR log= , +in bytes with +.BR size= , +or as the number fitting in a filesystem block with +.BR perblock= . +The mininum (and default) value is 256 bytes. +The maximum value is 2048 (2 KB) subject to the restriction that +the inode size cannot exceed one half of the filesystem block size. +.IP +The option \f3maxpct=\f1\f2value\f1 specifies the maximum percentage +of space in the filesystem that can be allocated to inodes. +The default value is 25%. +Setting the value to 0 means that +essentially all of the filesystem can become inode blocks. +.IP +The option +.BI align[= value ] +is used to specify that inode allocation is or is not aligned. +The value is either 0 or 1, +with 1 signifying that inodes are allocated aligned. +If the value is omitted, 1 is assumed. +The default is that inodes are aligned. +Aligned inode access is normally more efficient than unaligned access; +alignment must be established at the time the filesystem is created, +since inodes are allocated at that time. +This option can be used to turn off inode alignment when the +filesystem needs to be mountable by a version of IRIX +that does not have the inode alignment feature +(any release of IRIX before 6.2, and IRIX 6.2 without XFS patches). +.TP +.B \-l +Log section options. +.IP +These options specify the location, size, and other parameters of the +log section of the filesystem. +The valid suboptions are: +.BI internal[= value ] +and +\f3size=\f1\f2value\f1. +.IP +The +.B internal +suboption is used to specify that the log section is a piece of +the data section instead of being another device or logical volume. +The suboption value is either 0 or 1, +with 1 signifying that the log is internal. +If the value is omitted, 1 is assumed. +.IP +The +.B size +suboption is used to specify the size of the log section. +This suboption is required if +.B \-l internal[=1] +is given. +Otherwise, it is only needed if the log section of the filesystem +should occupy less space than the size of the special file. +The size is specified in bytes or blocks, with a \f3b\f1 suffix +meaning multiplication by the filesystem block size, as described above. +The overriding minimum value for size is 512 blocks. +With some combinations of filesystem block size, inode size, +and directory block size, the minimum log size is larger than 512 blocks. +.TP +.B \-n +Naming options. +.IP +These options specify the version and size parameters for the naming +(directory) area of the filesystem. +The valid suboptions are: +\f3log=\f1\f2value\f1, +\f3size=\f1\f2value\f1, +and +\f3version=\f1\f2value\f1. +The naming (directory) version is 1 or 2, +defaulting to 1 if unspecified. +With version 2 directories, +the directory block size can be any power of 2 size +from the filesystem block size up to 65536. +The block size is specified either as a base two logarithm value with +.BR log= , +or in bytes with +.BR size= . +The default size value for version 2 directories is 4096 bytes (4 KB), +unless the filesystem block size is larger than 4096, +in which case the default value is the filesystem block size. +For version 1 directories the block size is the same as the +filesystem block size. +.TP +\f3\-p\f1 \f2protofile\f1 +If the optional +.B \-p +.I protofile +argument is given, +.I mkfs.xfs +uses +.I protofile +as a prototype file +and takes its directions from that file. +The blocks and inodes +specifiers in the +.I protofile +are provided for backwards compatibility, but are otherwise unused. +The prototype file +contains tokens separated by spaces or +newlines. +A sample prototype specification follows (line numbers have been added to +aid in the explanation): +.nf +.sp .8v +.in +5 +\f71 /stand/\f1\f2diskboot\f1\f7 +2 4872 110 +3 d--777 3 1 +4 usr d--777 3 1 +5 sh ---755 3 1 /bin/sh +6 ken d--755 6 1 +7 $ +8 b0 b--644 3 1 0 0 +9 c0 c--644 3 1 0 0 +10 fifo p--644 3 1 +11 slink l--644 3 1 /a/symbolic/link +12 : This is a comment line +13 $ +14 $\f1 +.in -5 +.fi +.IP +Line 1 is a dummy string. +(It was formerly the bootfilename.) +It is present for backward +compatibility; boot blocks are not used on SGI systems. +.IP +Note that some string of characters must be present as the first line of +the proto file to cause it to be parsed correctly; the value +of this string is immaterial since it is ignored. +.IP +Line 2 contains two numeric values (formerly the numbers of blocks and inodes). +These are also merely for backward compatibility: two numeric values must +appear at this point for the proto file to be correctly parsed, +but their values are immaterial since they are ignored. +.IP +Lines 3-11 tell +.I mkfs.xfs +about files and directories to +be included in this filesystem. +Line 3 specifies the root directory. +Lines 4-6 and 8-10 specifies other directories and files. +Note the special symbolic link syntax on line 11. +.IP +The +.B $ +on line 7 tells +.I mkfs.xfs +to end the branch of the filesystem it is on, and continue +from the next higher directory. +It must be the last character +on a line. +The colon +on line 12 introduces a comment; all characters up until the +following newline are ignored. +Note that this means you cannot +have a file in a prototype file whose name contains a colon. +The +.B $ +on lines 13 and 14 end the process, since no additional +specifications follow. +.IP +File specifications give the mode, +the user ID, +the group ID, +and the initial contents of the file. +Valid syntax for the contents field +depends on the first character of the mode. +.IP +The mode for a file is specified by a 6-character string. +The first character +specifies the type of the file. +The character range is +.B \-bcdpl +to specify regular, block special, +character special, directory files, named pipes (fifos), and symbolic +links, respectively. +The second character of the mode +is either +.B u +or +.B \- +to specify setuserID mode or not. +The third is +.B g +or +.B \- +for the setgroupID mode. +The rest of the mode +is a three digit octal number giving the +owner, group, and other read, write, execute +permissions (see +.IR chmod (1)). +.IP +Two decimal number +tokens come after the mode; they specify the +user and group IDs of the owner of the file. +.IP +If the file is a regular file, +the next token of the specification can be a pathname +from which the contents and size are copied. +If the file is a block or character special file, +two decimal numbers +follow that give the major and minor device numbers. +If the file is a symbolic link, the next token of the specification +is used as the contents of the link. +If the file is a directory, +.I mkfs.xfs +makes the entries +.BR . "" +and +.B .. +and then +reads a list of names and +(recursively) +file specifications for the entries +in the directory. +As noted above, the scan is terminated with the +token +.BR $ . +.TP +.B \-q +Quiet option. +.IP +Normally +.I mkfs.xfs +prints the parameters of the filesystem +to be constructed; +the +.B \-q +flag suppresses this. +.TP +.B \-r +Real-time section options. +.IP +These options specify the location, size, and other parameters of the +real-time section of the filesystem. +The valid suboptions are: +.BI extsize= value +and +\f3size=\f1\f2value\f1. +.IP +The +.B extsize +suboption is used to specify the size of the blocks in the real-time +section of the filesystem. +This size must be a multiple of the filesystem block size. +The minimum allowed value is the filesystem block size +or 4 KB (whichever is larger); +the default value is the stripe width for striped volumes or 64 KB for +non-striped volumes; +the maximum allowed value is 1 GB. +The real-time extent size should be carefully chosen to match the +parameters of the physical media used. +.IP +The +.B size +suboption is used to specify the size of the real-time section. +This suboption is only needed if the real-time section of the +filesystem should occupy +less space than the size of the partition or logical volume containing the section. +.TP +.B \-C +Disable overlapping partition/volume checks. +.IP +By default \f2mkfs.xfs\f1 checks to see if the destination partition or logical +volume overlaps any mounted or reserved partitions in the system. If an +overlap or mount conflict is found, the user will be notified and prevented +from potentially corrupting the existing data. For systems with +a large number of disks, this additional checking may add noticable overhead +to the command's execution time. For situations where command performance is +necessary, this switch may be used to disable the safeguards. Due to the +potential for user-error causing corrupted filesystems or other on-disk +data corruption, we strongly discourage use of this switch in normal operation. +.SH SEE ALSO +mkfs(8). +.SH BUGS +With a prototype file, it is not possible to specify hard links. diff --git a/man/man8/xfs_admin.8 b/man/man8/xfs_admin.8 new file mode 100644 index 000000000..50cfc3e6d --- /dev/null +++ b/man/man8/xfs_admin.8 @@ -0,0 +1,68 @@ +.TH xfs_admin 8 +.SH NAME +xfs_admin \- change parameters of an XFS filesystem +.SH SYNOPSIS +.nf +\f3xfs_admin\f1 [ \f3-lu\f1] [ \f3\-L \f2label\f1 ] [ \f3\-U \f2uuid\f1 ] device +\f3xfs_admin \-f\f1 [ \f3-lu\f1] [ \f3\-L \f2label\f1 ] [ \f3\-U \f2uuid\f1 ] filename +.fi +.SH DESCRIPTION +.I xfs_admin +uses the +.IR xfs_db (8) +command to modify various parameters of a filesystem. +.PP +Devices that are mounted cannot be modified. +Administrators must unmount filesystems before +.I xfs_admin +or +.I xfs_db +can convert parameters. +A number of parameters of a mounted filesystem can be examined +and modified using the +.IR xfs_growfs (8) +command. +.SH OPTIONS +.TP 5 +\f3\-f\f1 +Specifies that the filesystem image to be processed is stored in a +regular file (see the \f2mkfs.xfs\f1 \f3\-d\f1 \f2file\f1 option). +.TP 5 +\f3\-l\f1 +Print the current filesystem label. +.TP 5 +\f3\-u\f1 +Print the current filesystem UUID (Universally Unique IDentifier). +.TP 5 +\f3\-L\f1 \f2label\f1 +Set the filesystem label. +XFS filesystem labels can be at most 12 characters long; if +.I label +is longer than 12 characters, +.I xfs_admin +will truncate it and print a warning message. +The filesystem label can be cleared using the special ``\c +.BR \-\- '' +value for +.IR label . +.TP 5 +\f3\-U\f1 \f2UUID\f1 +Set the UUID of the filesystem. +A sample UUID looks like this: "c1b9d5a2-f162-11cf-9ece-0020afc76f16". +The uuid may also be +.IR null , +which will set the filesystem UUID to the null UUID. +The uuid may also be +.IR generate , +which will generate a new UUID for the filesystem. +.PP +The +.IR mount (8) +manual entry describes how to mount a filesystem using its label or UUID, +rather than its block special device name. +.SH SEE ALSO +mkfs.xfs(8), +mount(8), +xfs_db(8), +xfs_growfs(8), +xfs(5). diff --git a/man/man8/xfs_bmap.8 b/man/man8/xfs_bmap.8 new file mode 100644 index 000000000..0d08ec362 --- /dev/null +++ b/man/man8/xfs_bmap.8 @@ -0,0 +1,54 @@ +.TH xfs_bmap 8 +.SH NAME +xfs_bmap \- print block mapping for an XFS file +.SH SYNOPSIS +.nf +\f3xfs_bmap\f1 [ \f3\-a\f1 ] [ \f3\-l\f1 ] [ \f3\-d\f1 ] [ \f3\-n \f2nnn\f1 ] file ... +.fi +.SH DESCRIPTION +.I xfs_bmap +prints the map of disk blocks used by files in an XFS filesystem. +The map lists each \f2extent\fP used by the file, as well as regions +in the file that do not have any corresponding blocks (\f2hole\f1s). +Each line of the listings takes the following form: + +.Ex +\f2extent\f1\f7: [\f1\f2startoffset\f1\f7..\f1\f2endoffset\f1\f7]: \c +\f1\f2startblock\f1\f7..\f1\f2endblock\f1 +.Ee + +Holes are marked by replacing the \f2startblock..endblock\f1 with \f2hole\fP. +All the file offsets and disk blocks are in units of 512-byte blocks, +no matter what the filesystem's block size is. +.PP +If portions of the file have been migrated offline by +a DMAPI application, a DMAPI read event will be generated to +bring those portions back online before the disk block map is +printed. However if the \f3-d\f1 option is used, no DMAPI read event +will be generated for a DMAPI file and offline portions will be reported as holes. +.PP +If the \f3-l\f1 option is used, then + +.Ex +\f1\f2\f1\f7 \f1\f2blocks\f1\f7 +.Ee + +will be appended to each line. \f1\f2Nblocks\f1\f7 is the length +of the extent described on the line in units of 512-byte blocks. +.PP +If the \f3\-a\f1 option is given, information about the file's +attribute fork is printed instead of the default data fork. +.PP +If the \f3\-n \f2nnn\f1 option is given, \f3xfs_bmap\f1 obtains the extent +list of the file in groups of \f2nnn\f1 extents. +In the absence of \f3\-n\f1, \f3xfs_bmap\f1 queries the system for +the number of extents in the file and uses that value to compute +the group size. +.SH DIAGNOSTICS +.TP 10 +\f7fcntl(F_GETBMAPX) \f1\f2filename\f1\f7: Invalid argument\f1 +The file \f2filename\f1 is not in an XFS filesystem. +.SH SEE ALSO +fcntl(2), +lvm(8). + diff --git a/man/man8/xfs_check.8 b/man/man8/xfs_check.8 new file mode 100644 index 000000000..96480f4da --- /dev/null +++ b/man/man8/xfs_check.8 @@ -0,0 +1,177 @@ +.TH xfs_check 8 +.SH NAME +xfs_check \- check XFS filesystem consistency +.SH SYNOPSIS +.nf +\f3xfs_check\f1 [ \f3\-i\f1 ino ] ... [ \f3\-b\f1 bno ] ... \c +[ \f3\-s\f1 ] [ \f3\-v\f1 ] xfs_special +.sp .8v +\f3xfs_check\f1 \f3\-f\f1 [ \f3\-i\f1 ino ] ... [ \f3\-b\f1 bno ] ... \c +[ \f3\-s\f1 ] [ \f3\-v\f1 ] file +.fi +.SH DESCRIPTION +.I xfs_check +checks whether an XFS filesystem is consistent. +It is normally run only when there is reason to believe that the +filesystem has a consistency problem. +The filesystem to be checked is specified by the +.I xfs_special +argument, which should be the disk or volume device for the filesystem. +Filesystems stored in files can also be checked, using the \f3\-f\f1 flag. +The filesystem should normally be unmounted or read-only +during the execution of +.IR xfs_check . +Otherwise, spurious problems are reported. +.PP +The options to \f2xfs_check\f1 are: +.TP 9 +.B \-f +Specifies that the special device is actually a file (see the +\f2mkfs.xfs\f1 \f3\-d\f1 \f2file\f1 option). +This might happen if an image copy +of a filesystem has been made into an ordinary file. +.TP +.B \-s +Specifies that only serious errors should be reported. +Serious errors are those that make it impossible to find major data +structures in the filesystem. +This option can be used to cut down the +amount of output when there is a serious problem, when the output might make it +difficult to see what the real problem is. +.TP +.B \-v +Specifies verbose output; it is impossibly long for a +reasonably-sized filesystem. +This option is intended for internal use only. +.TP +.BI \-i " ino" +Specifies verbose behavior for a +specific inode. +For instance, it can be used to locate all the blocks +associated with a given inode. +.TP +.BI \-b " bno" +Specifies verbose behavior for a specific filesystem block. +For instance, it can be used to determine what a specific block +is used for. +The block number is a "file system block number". +Conversion between disk addresses (i.e. addresses reported by +.IR xfs_bmap ) +and file system blocks may be accomplished using +.IR xfs_db 's +.B convert +command. +.PP +Any non-verbose output from +.I xfs_check +means that the filesystem has an inconsistency. +The filesystem can be repaired using either +.IR xfs_repair(8) +to fix the filesystem in place, +or by using +.IR xfsdump (8) +and +.IR mkfs.xfs (8) +to dump the filesystem, +make a new filesystem, +then use +.IR xfsrestore (8) +to restore the data onto the new filesystem. +Note that xfsdump may fail on a corrupt filesystem. +However, if the filesystem is mountable, xfsdump can +be used to try and save important data before +repairing the filesystem with xfs_repair. +If the filesystem is not mountable though, xfs_repair is +the only viable option. +.SH DIAGNOSTICS +Under one circumstance, +.I xfs_check +unfortunately might dump core +rather than produce useful output. +If the filesystem is completely corrupt, a core dump might +be produced instead of the message +.Ex +\f2xxx\f1\f7 is not a valid filesystem\f1 +.Ee +.PP +If the filesystem is very large (has many files) then +.I xfs_check +might run out of memory. +In this case the message +.Ex +out of memory +.Ee +is printed. +.PP +The following is a description of the most likely problems and the associated +messages. +Most of the diagnostics produced are only meaningful with an understanding +of the structure of the filesystem. +.TP +\f7agf_freeblks \f1\f2n\f1\f7, counted \f1\f2m\f1\f7 in ag \f1\f2a\f1 +The freeblocks count in the allocation group header for allocation group +.I a +doesn't match the number of blocks counted free. +.TP +\f7agf_longest \f1\f2n\f1\f7, counted \f1\f2m\f1\f7 in ag \f1\f2a\f1 +The longest free extent in the allocation group header for allocation group +.I a +doesn't match the longest free extent found in the allocation group. +.TP +\f7agi_count \f1\f2n\f1\f7, counted \f1\f2m\f1\f7 in ag \f1\f2a\f1 +The allocated inode count in the allocation group header for allocation group +.I a +doesn't match the number of inodes counted in the allocation group. +.TP +\f7agi_freecount \f1\f2n\f1\f7, counted \f1\f2m\f1\f7 in ag \f1\f2a\f1 +The free inode count in the allocation group header for allocation group +.I a +doesn't match the number of inodes counted free in the allocation group. +.TP +\f7block \f1\f2a/b\f1\f7 expected inum 0 got \f1\f2i\f1 +The block number is specified as a pair +(allocation group number, block in the allocation group). +The block is used multiple times (shared), between multiple inodes. +This message usually follows a message of the next type. +.TP +\f7block \f1\f2a/b\f1\f7 expected type unknown got \f1\f2y\f1 +The block is used multiple times (shared). +.TP +\f7block \f1\f2a/b\f1\f7 type unknown not expected\f1 +The block is unaccounted for (not in the freelist and not in use). +.TP +\f7link count mismatch for inode \f1\f2nnn\f1\f7 (name \f1\f2xxx\f1\f7), nlink \f1\f2m\f1\f7, counted \f1\f2n\f1 +The inode has a bad link count (number of references in directories). +.TP +\f7rtblock \f1\f2b\f1\f7 expected inum 0 got \f1\f2i\f1 +The block is used multiple times (shared), between multiple inodes. +This message usually follows a message of the next type. +.TP +\f7rtblock \f1\f2b\f1\f7 expected type unknown got \f1\f2y\f1 +The real-time block is used multiple times (shared). +.TP +\f7rtblock \f1\f2b\f1\f7 type unknown not expected\f1 +The real-time block is unaccounted for (not in the freelist and not in use). +.TP +\f7sb_fdblocks \f1\f2n\f1\f7, counted \f1\f2m\f1 +The number of free data blocks recorded +in the superblock doesn't match the number counted free in the filesystem. +.TP +\f7sb_frextents \f1\f2n\f1\f7, counted \f1\f2m\f1 +The number of free real-time extents recorded +in the superblock doesn't match the number counted free in the filesystem. +.TP +\f7sb_icount \f1\f2n\f1\f7, counted \f1\f2m\f1 +The number of allocated inodes recorded +in the superblock doesn't match the number allocated in the filesystem. +.TP +\f7sb_ifree \f1\f2n\f1\f7, counted \f1\f2m\f1 +The number of free inodes recorded +in the superblock doesn't match the number free in the filesystem. +.SH SEE ALSO +mkfs.xfs(8), +xfsdump(8), +xfsrestore(8), +xfs_ncheck(8), +xfs_repair(8), +xfs(5). diff --git a/man/man8/xfs_db.8 b/man/man8/xfs_db.8 new file mode 100644 index 000000000..82c2ad944 --- /dev/null +++ b/man/man8/xfs_db.8 @@ -0,0 +1,1187 @@ +.TH xfs_db 8 +.SH NAME +xfs_db \- debug an XFS filesystem +.SH SYNOPSIS +.nf +\f3xfs_db\f1 [ \f3\-c\f1 cmd ] ... [ \f3\-p\f1 prog ] [ \f3\-r\f1 ] [ \f3\-x\f1 ] xfs_special +.sp .8v +\f3xfs_db\f1 \f3\-f\f1 [ \f3\-c\f1 cmd ] ... [ \f3\-p\f1 prog ] [ \f3\-f\f1 ] [ \f3\-r\f1 ] [ \f3\-x\f1 ] file +.fi +.SH DESCRIPTION +\f2xfs_db\f1 is used to examine an XFS filesystem. +Under rare circumstances it can also be used to modify an XFS filesystem, +but that task is normally left to \f2xfs_repair\f1(8) or to +scripts such as \f2xfs_chver\f1 that run \f2xfs_db\f1. +.PP +The options to \f2xfs_db\f1 are: +.TP 10 +\f3\-c\f1 \f2cmd\f1 +\f2xfs_db\f1 commands may be run interactively (the default) +or as arguments on the command line. +Multiple \f3\-c\f1 arguments may be given. +The commands are run in the sequence given, then the program exits. +This is the mechanism used to implement \f2xfs_check\f1(8). +.TP +\f3\-f\f1 +Specifies that the filesystem image to be processed is stored in a +regular file +(see the \f2mkfs.xfs\f1 \f3\-d\f1 \f2file\f1 option). +This might happen if an image copy +of a filesystem has been made into an ordinary file with \f2xfs_copy\f1(8). +.TP +\f3\-i\f1 +Allows execution on a mounted filesystem, provided it is mounted read-only. +Useful for shell scripts such as \f2xfs_check\f1(8), which must only +operate on filesystems in a guarenteed consistent state +(either unmounted or mounted read-only). +These semantics are slightly different to that of the \f3\-r\f1 option. +.TP +\f3\-p\f1 \f2prog\f1 +Set the program name for prompts and some error messages, +the default value is \f2xfs_db\f1. +.TP +\f3\-r\f1 +Open \f2file\f1 or \f2xfs_special\f1 read-only. +This option is required if \f2xfs_special\f1 is a mounted filesystem. +It is only necessary to omit this flag if a command that changes data +(\f3write\f1, \f3blocktrash\f1) is to be used. +.TP +\f3\-x\f1 +Specifies expert mode. +This enables the \f3write\f1 command. +.SH CONCEPTS +\f2xfs_db\f1 commands can be broken up into two classes. +Most commands are for the navigation and display of data structures in +the filesystem. +Other commands are for scanning the filesystem in some way. +.PP +Commands which are used to navigate the filesystem structure take arguments +which reflect the names of filesystem structure fields. +There can be multiple field names separated by dots when the underlying +structures are nested, as in C. +The field names can be indexed (as an array index) +if the underlying field is an array. +The array indices can be specified as a range, two numbers separated by a dash. +.PP +\f2xfs_db\f1 maintains a current address in the filesystem. +The granularity of the address is a filesystem structure. +This can be a filesystem block, +an inode or quota (smaller than a filesystem block), +or a directory block (could be larger than a filesystem block). +There are a variety of commands to set the current address. +Associated with the current address is the current data type, +which is the structural type of this data. +Commands which follow the structure of the filesystem always set the type +as well as the address. +Commands which examine pieces of an individual file (inode) need the current +inode to be set, this is done with the \f3inode\f1 command. +.PP +The current address/type information is actually maintained in a +stack that can be explicitly manipulated with the +\f3push\f1, \f3pop\f1, and \f3stack\f1 commands. +This allows for easy examination of a nested filesystem structure. +Also, the last several locations visited are stored in a ring buffer +which can be manipulated with the +\f3forward\f1, \f3back\f3, and \f3ring\f1 commands. +.PP +XFS filesystems are divided into a small number of allocation groups. +\f2xfs_db\f1 maintains a notion of the current allocation group which is +manipulated by some commands. +The initial allocation group is 0. +.SH COMMANDS +.PP +Many commands have extensive online help. +Use the \f3help\f1 command for more details on any command. +.TP 10 +\f3a\f1 +See the \f3addr\f1 command. +.TP +\f3ablock\f1 \f2filoff\f1 +Set current address to the offset \f2filoff\f1 (a filesystem block number) +in the attribute area of the current inode. +.TP +\f3addr\f1 [ \f2field-expression\f1 ] +Set current address to the value of the \f2field-expression\f1. +This is used to ``follow'' a reference in one structure to the object +being referred to. +If no argument is given the current address is printed. +.TP +\f3agf\f1 [ \f2agno\f1 ] +Set current address to the AGF block for allocation group \f2agno\f1. +If no argument is given use the current allocation group. +.TP +\f3agfl\f1 [ \f2agno\f1 ] +Set current address to the AGFL block for allocation group \f2agno\f1. +If no argument is given use the current allocation group. +.TP +\f3agi\f1 [ \f2agno\f1 ] +Set current address to the AGI block for allocation group \f2agno\f1. +If no argument is given use the current allocation group. +.TP +\f3b\f1 +See the \f3back\f1 command. +.TP +\f3back\f1 +Move to the previous location in the position ring. +.TP +\f3blockfree\f1 +Free block usage information collected by the last execution of the +\f3blockget\f1 command. +This must be done before another \f3blockget\f1 command can be given, +presumably with different arguments than the previous one. +.TP +\f3blockget\f1 [ \f3\-npsv\f1 ] [ \f3\-b\f1 \f2bno\f1 ] ... [ \f3\-i\f1 \f2ino\f1 ] ... +Get block usage and check filesystem consistency. +The information is saved for use by a subsequent +\f3blockuse\f1, \f3ncheck\f1, or \f3blocktrash\f1 command. +See \f2xfs_check\f1(8) for more information. +.br +The \f3\-b\f1 option is used to specify filesystem block numbers +about which verbose information should be printed. +.br +The \f3\-i\f1 option is used to specify inode numbers about which +verbose information should be printed. +.br +The \f3\-n\f1 option is used to save pathnames for inodes visited, +this is used to support the \f2xfs_ncheck\f1(8) command. +It also means that pathnames will be printed for inodes that have problems. +This option uses a lot of memory so is not enabled by default. +.br +The \f3\-p\f1 option causes error messages to be prefixed with the +filesystem name being processed. +This is useful if several copies of \f2xfs_db\f1 are run in parallel. +.br +The \f3\-s\f1 option restricts output to severe errors only. +This is useful if the output is too long otherwise. +.br +The \f3\-v\f1 option enables verbose output. +Messages will be printed for every block and inode processed. +.TP +\f3blocktrash\f1 [ \f3\-n\f1 \f2c\f1 ] [ \f3\-x\f1 \f2a\f1 ] [ \f3\-y\f1 \f2b\f1 ] [ \f3\-s\f1 \f2s\f1 ] [ \f3\-0123\f1 ] [ \f3\-t\f1 \f2t\f1 ] ... +Trash randomly selected filesystem metadata blocks. +Trashing occurs to randomly selected bits in the chosen blocks. +This command is available only in debugging versions of \f2xfs_db\f1. +It is useful for testing \f2xfs_repair\f1(8) and \f2xfs_check\f1(8). +.br +The \f3\-0\f1, \f3\-1\f1, \f3\-2\f1, and \f3\-3\f1 options (mutually exclusive) +set the operating mode for \f3blocktrash\f1. +In \f3\-0\f1 mode, changed bits are cleared. +In \f3\-1\f1 mode, changed bits are set. +In \f3\-2\f1 mode, changed bits are inverted. +In \f3\-3\f1 mode, changed bits are randomized. +.br +The \f3\-n\f1 option supplies the count of block-trashings to perform +(default 1). +.br +The \f3\-s\f1 option supplies a seed to the random processing. +.br +The \f3\-t\f1 option gives a type of blocks to be selected +for trashing. +Multiple \f3\-t\f1 options may be given. +If no \f3\-t\f1 options are given then all metadata types can be trashed. +.br +The \f3\-x\f1 option sets the minimum size of bit range to be trashed. +The default value is 1. +.br +The \f3\-y\f1 option sets the maximum size of bit range to be trashed. +The default value is 1024. +.TP +\f3blockuse\f1 [ \f3\-n\f1 ] [ \f3\-c\f1 \f2blockcount\f1 ] +Print usage for current filesystem block(s). +For each block, the type and (if any) inode are printed. +.br +The \f3\-c\f1 option specifies a count of blocks to process. +The default value is 1 (the current block only). +.br +The \f3\-n\f1 option specifies that file names should be printed. +The prior \f3blockget\f1 command must have also specified the \f3\-n\f1 option. +.TP +\f3bmap\f1 [ \f3\-a\f1 ] [ \f3\-d\f1 ] [ \f2block\f1 [ \f2len\f1 ] ] +Show the block map for the current inode. +The map display can be restricted to an area of the file with the +\f2block\f1 and \f2len\f1 arguments. +If \f2block\f1 is given and \f2len\f1 is omitted then 1 is assumed for len. +.br +The \f3\-a\f1 and \f3\-d\f1 options are used to select the attribute or data +area of the inode, if neither option is given then both areas are shown. +.TP +\f3check\f1 +See the \f3blockget\f1 command. +.TP +\f3convert\f1 \f2type\f1 \f2number\f1 [ \f2type\f1 \f2number\f1 ] ... \f2type\f1 +Convert from one address form to another. +The known \f2type\f1s, with alternate names, are: +\f3agblock\f1 or \f3agbno\f1 (filesystem block within an allocation group), +\f3agino\f1 or \f3aginode\f1 (inode number within an allocation group), +\f3agnumber\f1 or \f3agno\f1 (allocation group number), +\f3bboff\f1 or \f3daddroff\f1 (byte offset in a \f3daddr\f1), +\f3blkoff\f1 or \f3fsboff\f1 or \f3agboff\f1 (byte offset in a \f3agblock\f1 +or \f3fsblock\f1), +\f3byte\f1 or \f3fsbyte\f1 (byte address in filesystem), +\f3daddr\f1 or \f3bb\f1 (disk address, 512-byte blocks), +\f3fsblock\f1 or \f3fsb\f1 or \f3fsbno\f1 (filesystem block, see the +\f3fsblock\f1 command), +\f3ino\f1 or \f3inode\f1 (inode number), +\f3inoidx\f1 or \f3offset\f1 (index of inode in filesystem block), +and \f3inooff\f1 or \f3inodeoff\f1 (byte offset in inode). +Only conversions that ``make sense'' are allowed. +The compound form (with more than three arguments) is useful for +conversions such as +\f3convert\f1 \f3agno\f1 \f2ag\f1 \f3agbno\f1 \f2agb\f1 \f3fsblock\f1. +.TP +\f3daddr\f1 [ \f2d\f1 ] +Set current address to the daddr (512 byte block) given by \f2d\f1. +If no value for \f2d\f1 is given the current address is printed, +expressed as a daddr. +The type is set to \f3data\f1 (uninterpreted). +.TP +\f3dblock\f1 \f2filoff\f1 +Set current address to the offset \f2filoff\f1 (a filesystem block number) +in the data area of the current inode. +.TP +\f3debug\f1 [ \f2flagbits\f1 ] +Set debug option bits. +These are used for debugging \f2xfs_db\f1. +If no value is given for \f2flagbits\f1, print the current debug option bits. +These are for the use of the implementor. +.TP +\f3dquot\f1 [ \f2projectid_or_userid\f1 ] +Set current address to a project or user quota block. +.TP +\f3echo\f1 [ \f2arg\f1 ] ... +Echo the arguments to the output. +.TP +\f3f\f1 +See the \f3forward\f1 command. +.TP +\f3forward\f1 +Move forward to the next entry in the position ring. +.TP +\f3frag\f1 [ \f3\-adflqRrv\f1 ] +Get file fragmentation data. +This prints information about fragmentation of file data in the filesystem +(as opposed to fragmentation of freespace, +for which see the \f3freesp\f1 command). +Every file in the filesystem is examined to see how far from ideal +its extent mappings are. +A summary is printed giving the totals. +.br +The \f3\-v\f1 option sets verbosity, +every inode has information printed for it. +The remaining options select which inodes and extents are examined. +If no options are given then all are assumed set, +otherwise just those given are enabled. +.br +The \f3\-a\f1 option enables processing of attribute data. +.br +The \f3\-d\f1 option enables processing of directory data. +.br +The \f3\-f\f1 option enables processing of regular file data. +.br +The \f3\-l\f1 option enables processing of symbolic link data. +.br +The \f3\-q\f1 option enables processing of quota file data. +.br +The \f3\-R\f1 option enables processing of realtime control file data. +.br +The \f3\-r\f1 option enables processing of realtime file data. +.TP +\f3freesp\f1 [ \f3\-bcds\f1 ] [ \f3\-a\f1 \f2a\f1 ] ... [ \f3\-e\f1 \f2i\f1 ] [ \f3\-h\f1 \f2h1\f1 ] ... [ \f3\-m\f1 \f2m\f1 ] +Summarize free space for the filesystem. +The free blocks are examined and totalled, +and displayed in the form of a histogram, +with a count of extents in each range of free extent sizes. +.br +The \f3\-a\f1 \f2a\f1 option adds \f2a\f1 to the list of +allocation groups to be processed. +If no \f3\-a\f1 options are given then all allocation groups are processed. +.br +The \f3\-b\f1 option specifies that the histogram buckets are binary-sized, +with the starting sizes being the powers of 2. +.br +The \f3\-c\f1 option specifies that \f3freesp\f1 will search the +by-size (cnt) space Btree instead of the default by-block (bno) space Btree. +.br +The \f3\-d\f1 option specifies that every free extent will be displayed. +.br +The \f3\-e\f1 \f2i\f1 option specifies that the histogram buckets are +equal-sized, with the size specified as \f2i\f1. +.br +The \f3\-h\f1 \f2h1\f1 option specifies a starting block number +for a histogram bucket as \f2h1\f1. +Multiple \f3\-h\f1 options are given to specify the complete set of buckets. +.br +The \f3\-m\f1 \f2m\f1 option specifies that the histogram +starting block numbers are powers of \f2m\f1. +This is the general case of \f3\-b\f1. +.br +The \f3\-s\f1 option specifies that a final summary of total free extents, +free blocks, and the average free extent size is printed. +.TP +\f3fsb\f1 +See the \f3fsblock\f1 command. +.TP +\f3fsblock\f1 [ \f2fsb\f1 ] +Set current address to the fsblock value given by \f2fsb\f1. +If no value for \f2fsb\f1 is given the current address is printed, +expressed as an fsb. +The type is set to \f3data\f1 (uninterpreted). +XFS filesystem block numbers are computed +((\f2agno\f1 << \f2agshift\f1) | \f2agblock\f1) +where \f2agshift\f1 depends on the size of an allocation group. +Use the \f3convert\f1 command to convert to and from this form. +Block numbers given for file blocks +(for instance from the \f3bmap\f1 command) +are in this form. +.TP +\f3hash\f1 \f2string\f1 +Prints the hash value of \f2string\f1 using the hash function of the XFS +directory and attribute implementation. +.TP +\f3help\f1 [ \f2command\f1 ] +Print help for one or all commands. +.TP +\f3inode\f1 [ \f2inode#\f1 ] +Set the current inode number. +If no \f2inode#\f1 is given, print the current inode number. +.TP +\f3log\f1 [ \f3stop\f1 | \f3start\f1 \f2filename\f1 ] +Start logging output to \f2filename\f1, stop logging, +or print the current logging status. +.TP +\f3ncheck\f1 [ \f3\-s\f1 ] [ \f3\-i\f1 \f2ino\f1 ] ... +Print name-inode pairs. +A \f3blockget -n\f1 command must be run first to gather the information. +.br +The \f3\-i\f1 option specifies an inode number to be printed. +If no \f3\-i\f1 options are given then all inodes are printed. +.br +The \f3\-s\f1 option specifies that only setuid and setgid files are printed. +.TP +\f3p\f1 +See the \f3print\f1 command. +.TP +\f3pop\f1 +Pop location from the stack. +.TP +\f3print\f1 [ \f2field-expression\f1 ] ... +Print field values. +If no argument is given, print all fields in the current structure. +.TP +\f3push\f1 [ \f2command\f1 ] +Push location to the stack. +If \f2command\f1 is supplied, +set the current location to the results of \f2command\f1 +after pushing the old location. +.TP +\f3q\f1 +See the \f3quit\f1 command. +.TP +\f3quit\f1 +Exit \f2xfs_db\f1. +.TP +\f3ring\f1 [ \f2index\f1 ] +Show position ring (if no \f2index\f1 argument is given), +or move to a specific entry in the position ring given by \f2index\f1. +.TP +\f3sb\f1 [ \f2agno\f1 ] +Set current address to SB header in allocation group \f2agno\f1. +If no \f2agno\f1 is given use the current allocation group number. +.TP +\f3source\f1 \f2source-file\f1 +Process commands from \f2source-file\f1. +\f3source\f1 commands can be nested. +.TP +\f3stack\f1 +View the location stack. +.TP +\f3type\f1 [ \f2type\f1 ] +Set the current data type to \f2type\f1. +If no argument is given, show the current data type. +The possible data types are: +\f3agf\f1, \f3agfl\f1, \f3agi\f1, \f3attr\f1, \f3bmapbta\f1, \f3bmapbtd\f1, +\f3bnobt\f1, \f3cntbt\f1, \f3data\f1, \f3dir\f1, \f3dir2\f1, \f3dqblk\f1, +\f3inobt\f1, \f3inode\f1, \f3log\f1, \f3rtbitmap\f1, \f3rtsummary\f1, +\f3sb\f1, and \f3symlink\f1. +See the TYPES section below for more information on these data types. +.TP +\f3write\f1 [ \f2field or value\f1 ] ... +Write a value to disk. +Specific fields can be set in structures (struct mode), +or a block can be set to data values (data mode), +or a block can be set to string values (string mode, for symlink blocks). +The operation happens immediately: there is no buffering. +.br +Struct mode is in effect when the current type is structural, +i.e. not data. +For struct mode, the syntax is ``\f3write\f1 \f2field\f1 \f2value\f1''. +.br +Data mode is in effect when the current type is data. +In this case the contents of the block can be shifted or rotated left or right, +or filled with a sequence, a constant value, or a random value. +In this mode \f3write\f1 with no arguments gives more information on +the allowed commands. +.SH TYPES +This section gives the fields in each structure type and their meanings. +Note that some types of block cover multiple actual structures, +for instance directory blocks. +.TP 10 +\f3agf\f1 +The AGF block is the header for block allocation information; +it is in the second 512-byte block of each allocation group. +The following fields are defined: +.br +\f3magicnum\f1: AGF block magic number, 0x58414746 ('XAGF') +.br +\f3versionnum\f1: version number, currently 1 +.br +\f3seqno\f1: sequence number starting from 0 +.br +\f3length\f1: size in filesystem blocks of the allocation group. +All allocation groups except the last one of the filesystem have +the superblock's \f3agblocks\f1 value here +.br +\f3bnoroot\f1: block number of the root of the Btree holding free space +information sorted by block number +.br +\f3cntroot\f1: block number of the root of the Btree holding free space +information sorted by block count +.br +\f3bnolevel\f1: number of levels in the by-block-number Btree +.br +\f3cntlevel\f1: number of levels in the by-block-count Btree +.br +\f3flfirst\f1: index into the AGFL block of the first active entry +.br +\f3fllast\f1: index into the AGFL block of the last active entry +.br +\f3flcount\f1: count of active entries in the AGFL block +.br +\f3freeblks\f1: count of blocks represented in the freespace Btrees +.br +\f3longest\f1: longest free space represented in the freespace Btrees +.TP +\f3agfl\f1 +The AGFL block contains block numbers for use of the block allocator; +it is in the fourth 512-byte block of each allocation group. +Each entry in the active list is a block number within the allocation group +that can be used for any purpose if space runs low. +The AGF block fields \f3flfirst\f1, \f3fllast\f1, and \f3flcount\f1 +designate which entries are currently active. +Entry space is allocated in a circular manner within the AGFL block. +Fields defined: +.br +\f3bno\f1: array of all block numbers. +Even those which are not active are printed +.TP +\f3agi\f1 +The AGI block is the header for inode allocation information; +it is in the third 512-byte block of each allocation group. +Fields defined: +.br +\f3magicnum\f1: AGI block magic number, 0x58414749 ('XAGI') +.br +\f3versionnum\f1: version number, currently 1 +.br +\f3seqno\f1: sequence number starting from 0 +.br +\f3length\f1: size in filesystem blocks of the allocation group +.br +\f3count\f1: count of inodes allocated +.br +\f3root\f1: block number of the root of the Btree holding inode allocation +information +.br +\f3level\f1: number of levels in the inode allocation Btree +.br +\f3freecount\f1: count of allocated inodes that are not in use +.br +\f3newino\f1: last inode number allocated +.br +\f3dirino\f1: unused +.br +\f3unlinked\f1: an array of inode numbers within the allocation group. +The entries in the AGI block are the heads of lists which run through the +inode \f3next_unlinked\f1 field. +These inodes are to be unlinked the next time the filesystem is mounted +.TP +\f3attr\f1 +An attribute fork is organized as a Btree with the actual data +embedded in the leaf blocks. +The root of the Btree is found in block 0 of the fork. +The index (sort order) of the Btree is the hash value of the attribute name. +All the blocks contain a \f3blkinfo\f1 structure at the beginning, +see type \f3dir\f1 for a description. +Nonleaf blocks are identical in format to those for version 1 and +version 2 directories, see type \f3dir\f1 for a description. +Leaf blocks can refer to ``local'' or ``remote'' attribute values. +Local values are stored directly in the leaf block. +Remote values are stored in an independent block in the attribute fork +(with no structure). +Leaf blocks contain the following fields: +.br +\f3hdr\f1: header containing +a \f3blkinfo\f1 structure \f3info\f1 (magic number 0xfbee), +a \f3count\f1 of active entries, +\f3usedbytes\f1 total bytes of names and values, +the \f3firstused\f1 byte in the name area, +\f3holes\f1 set if the block needs compaction, +and array \f3freemap\f1 as for \f3dir\f1 leaf blocks +.br +\f3entries\f1: array of structures containing +a \f3hashval\f1, +\f3nameidx\f1 (index into the block of the name), +and flags \f3incomplete\f1, +\f3root\f1, +and \f3local\f1 +.br +\f3nvlist\f1: array of structures describing the attribute names and values. +Fields always present: +\f3valuelen\f1 (length of value in bytes), +\f3namelen\f1, +and \f3name\f1. +Fields present for local values: +\f3value\f1 (value string). +Fields present for remote values: +\f3valueblk\f1 (fork block number of containing the value). +.TP +\f3bmapbt\f1 +Files with many extents in their data or attribute fork will have the +extents described by the contents of a Btree for that fork, +instead of being stored directly in the inode. +Each bmap Btree starts with a root block contained within the inode. +The other levels of the Btree are stored in filesystem blocks. +The blocks are linked to sibling left and right blocks at each level, +as well as by pointers from parent to child blocks. +Each block contains the following fields: +.br +\f3magic\f1: bmap Btree block magic number, 0x424d4150 ('BMAP') +.br +\f3level\f1: level of this block above the leaf level +.br +\f3numrecs\f1: number of records or keys in the block +.br +\f3leftsib\f1: left (logically lower) sibling block, 0 if none +.br +\f3rightsib\f1: right (logically higher) sibling block, 0 if none +.br +\f3recs\f1: [leaf blocks only] array of extent records. +Each record contains +\f3startoff\f1, +\f3startblock\f1, +\f3blockcount\f1, +and \f3extentflag\f1 (1 if the extent is unwritten) +.br +\f3keys\f1: [nonleaf blocks only] array of key records. +These are the first key value of each block in the level below this one. +Each record contains \f3startoff\f1 +.br +\f3ptrs\f1: [nonleaf blocks only] array of child block pointers. +Each pointer is a filesystem block number to the next level in the Btree +.TP +\f3bnobt\f1 +There is one set of filesystem blocks forming the by-block-number allocation +Btree for each allocation group. +The root block of this Btree is designated by the \f3bnoroot\f1 field in the +coresponding AGF block. +The blocks are linked to sibling left and right blocks at each level, +as well as by pointers from parent to child blocks. +Each block has the following fields: +.br +\f3magic\f1: BNOBT block magic number, 0x41425442 ('ABTB') +.br +\f3level\f1: level number of this block, 0 is a leaf +.br +\f3numrecs\f1: number of data entries in the block +.br +\f3leftsib\f1: left (logically lower) sibling block, 0 if none +.br +\f3rightsib\f1: right (logically higher) sibling block, 0 if none +.br +\f3recs\f1: [leaf blocks only] array of freespace records. +Each record contains +\f3startblock\f1 +and \f3blockcount\f1 +.br +\f3keys\f1: [nonleaf blocks only] array of key records. +These are the first value of each block in the level below this one. +Each record contains +\f3startblock\f1 +and \f3blockcount\f1 +.br +\f3ptrs\f1: [nonleaf blocks only] array of child block pointers. +Each pointer is a block number within the allocation group to the next level +in the Btree +.TP +\f3cntbt\f1 +There is one set of filesystem blocks forming the by-block-count allocation +Btree for each allocation group. +The root block of this Btree is designated by the \cntroot\f1 field in the +coresponding AGF block. +The blocks are linked to sibling left and right blocks at each level, +as well as by pointers from parent to child blocks. +Each block has the following fields: +.br +\f3magic\f1: CNTBT block magic number, 0x41425443 ('ABTC') +.br +\f3level\f1: level number of this block, 0 is a leaf +.br +\f3numrecs\f1: number of data entries in the block +.br +\f3leftsib\f1: left (logically lower) sibling block, 0 if none +.br +\f3rightsib\f1: right (logically higher) sibling block, 0 if none +.br +\f3recs\f1: [leaf blocks only] array of freespace records. +Each record contains +\f3startblock\f1 +and \f3blockcount\f1 +.br +\f3keys\f1: [nonleaf blocks only] array of key records. +These are the first value of each block in the level below this one. +Each record contains +\f3blockcount\f1 +and \f3startblock\f1 +.br +\f3ptrs\f1: [nonleaf blocks only] array of child block pointers. +Each pointer is a block number within the allocation group to the next level +in the Btree +.TP +\f3data\f1 +User file blocks, and other blocks whose type is unknown, +have this type for display purposes in \f2xfs_db\f1. +The block data is displayed in hexadecimal format. +.TP +\f3dir\f1 +A version 1 directory is organized as a Btree with the directory data +embedded in the leaf blocks. +The root of the Btree is found in block 0 of the file. +The index (sort order) of the Btree is the hash value of the entry name. +All the blocks contain a \f3blkinfo\f1 structure at the beginning +with the following fields: +.br +\f3forw\f1: next sibling block +.br +\f3back\f1: previous sibling block +.br +\f3magic\f1: magic number for this block type +.sp +The nonleaf (node) blocks have the following fields: +.br +\f3hdr\f1: header containing +a \f3blkinfo\f1 structure \f3info\f1 (magic number 0xfebe), +the \f3count\f1 of active entries, +and the \f3level\f1 of this block above the leaves +.br +\f3btree\f1: array of entries containing +\f3hashval\f1 and +\f3before\f1 fields. +The \f3before\f1 value is a block number within the directory file to the +child block, +the \f3hashval\f1 is the last hash value in that block +.sp +The leaf blocks have the following fields: +.br +\f3hdr\f1: header containing +a \f3blkinfo\f1 structure \f3info\f1 (magic number 0xfeeb), +the \f3count\f1 of active entries, +\f3namebytes\f1 (total name string bytes), +\f3holes\f1 flag (block needs compaction), +and \f3freemap\f1 (array of \f3base\f1, \f3size\f1 entries for free regions) +.br +\f3entries\f1: array of structures containing +\f3hashval\f1, +\f3nameidx\f1 (byte index into the block of the name string), +and \f3namelen\f1 +.br +\f3namelist\f1: array of structures containing +\f3inumber\f1 +and \f3name\f1 +.TP +\f3dir2\f1 +A version 2 directory has four kinds of blocks. +Data blocks start at offset 0 in the file. +There are two kinds of data blocks: single-block directories have +the leaf information embedded at the end of the block, data blocks +in multi-block directories do not. +Node and leaf blocks start at offset 32GB (with either a single +leaf block or the root node block). +Freespace blocks start at offset 64GB. +The node and leaf blocks form a Btree, with references to the data +in the data blocks. +The freespace blocks form an index of longest free spaces within the +data blocks. +.sp +A single-block directory block contains the following fields: +.br +\f3bhdr\f1: header containing +\f3magic\f1 number 0x58443242 ('XD2B') +and an array \f3bestfree\f1 of the longest 3 free spaces in the block +(\f3offset\f1, \f3length\f1) +.br +\f3bu\f1: array of union structures. +Each element is either an entry or a freespace. +For entries, there are the following fields: +\f3inumber\f1, +\f3namelen\f1, +\f3name\f1, +and \f3tag\f1. +For freespace, there are the following fields: +\f3freetag\f1 (0xffff), +\f3length\f1, +and \f3tag\f1. +The \f3tag\f1 value is the byte offset in the block of the start +of the entry it is contained in +.br +\f3bleaf\f1: array of leaf entries containing +\f3hashval\f1 +and \f3address\f1. +The \f3address\f1 is a 64-bit word offset into the file +.br +\f3btail\f1: tail structure containing +the total \f3count\f1 of leaf entries +and \f3stale\f1 count of unused leaf entries +.sp +A data block contains the following fields: +.br +\f3dhdr\f1: +header containing +\f3magic\f1 number 0x58443244 ('XD2D') +and an array \f3bestfree\f1 of the longest 3 free spaces in the block +(\f3offset\f1, \f3length\f1) +.br +\f3du\f1: array of union structures as for \f3bu\f1 +.sp +Leaf blocks have two possible forms. +If the Btree consists of a single leaf then the freespace information +is in the leaf block, +otherwise it is in separate blocks and the root of the Btree is +a node block. +A leaf block contains the following fields: +.br +\f3lhdr\f1: header containing +a \f3blkinfo\f1 structure \f3info\f1 (magic number 0xd2f1 for the single +leaf case, 0xd2ff for the true Btree case), +the total \f3count\f1 of leaf entries, +and \f3stale\f1 count of unused leaf entries +.br +\f3lents\f1: leaf entries, as for \f3bleaf\f1 +.br +\f3lbests\f1: [single leaf only] +array of values which represent the longest freespace +in each data block in the directory +.br +\f3ltail\f1: [single leaf only] tail structure containing +\f3bestcount\f1 count of \f3lbests\f1 +.sp +A node block is identical to that for types \f3attr\f1 and \f3dir\f1. +.sp +A freespace block contains the following fields: +.br +\f3fhdr\f1: header containing +\f3magic\f1 number 0x58443246 ('XD2F'), +\f3firstdb\f1 first data block number covered by this freespace block, +\f3nvalid\f1 number of valid entries, +and \f3nused\f1 number of entries representing real data blocks +.br +\f3fbests\f1: array of values as for \f3lbests\f1 +.TP +\f3dqblk\f1 +The quota information is stored in files referred to by the superblock +\f3uquotino\f1 and \f3pquotino\f1 fields. +Each filesystem block in a quota file contains a constant number of +quota entries. +The quota entry size is currently 136 bytes, +so with a 4KB filesystem block size there are 30 quota entries per block. +The \f3dquot\f1 command is used to locate these entries in the filesystem. +The file entries are indexed by the user or project identifier +to determine the block and offset. +Each quota entry has the following fields: +.br +\f3magic\f1: magic number, 0x4451 ('DQ') +.br +\f3version\f1: version number, currently 1 +.br +\f3flags\f1: flags, values include +0x01 for user quota, +0x02 for project quota +.br +\f3id\f1: user or project identifier +.br +\f3blk_hardlimit\f1: absolute limit on blocks in use +.br +\f3blk_softlimit\f1: preferred limit on blocks in use +.br +\f3ino_hardlimit\f1: absolute limit on inodes in use +.br +\f3ino_softlimit\f1: preferred limit on inodes in use +.br +\f3bcount\f1: blocks actually in use +.br +\f3icount\f1: inodes actually in use +.br +\f3itimer\f1: time when service will be refused if soft limit is violated +for inodes +.br +\f3btimer\f1: time when service will be refused if soft limit is violated +for blocks +.br +\f3iwarns\f1: number of warnings issued about inode limit violations +.br +\f3bwarns\f1: number of warnings issued about block limit violations +.br +\f3rtb_hardlimit\f1: absolute limit on realtime blocks in use +.br +\f3rtb_softlimit\f1: preferred limit on realtime blocks in use +.br +\f3rtbcount\f1: realtime blocks actually in use +.br +\f3rtbtimer\f1: time when service will be refused if soft limit is violated +for realtime blocks +.br +\f3rtbwarns\f1: number of warnings issued about realtime block limit violations +.TP +\f3inobt\f1 +There is one set of filesystem blocks forming the inode allocation +Btree for each allocation group. +The root block of this Btree is designated by the \f3root\f1 field in the +coresponding AGI block. +The blocks are linked to sibling left and right blocks at each level, +as well as by pointers from parent to child blocks. +Each block has the following fields: +.br +\f3magic\f1: INOBT block magic number, 0x49414254 ('IABT') +.br +\f3level\f1: level number of this block, 0 is a leaf +.br +\f3numrecs\f1: number of data entries in the block +.br +\f3leftsib\f1: left (logically lower) sibling block, 0 if none +.br +\f3rightsib\f1: right (logically higher) sibling block, 0 if none +.br +\f3recs\f1: [leaf blocks only] array of inode records. +Each record contains +\f3startino\f1 allocation-group relative inode number, +\f3freecount\f1 count of free inodes in this chunk, +and \f3free\f1 bitmap, LSB corresponds to inode 0 +.br +\f3keys\f1: [nonleaf blocks only] array of key records. +These are the first value of each block in the level below this one. +Each record contains +\f3startino\f1 +.br +\f3ptrs\f1: [nonleaf blocks only] array of child block pointers. +Each pointer is a block number within the allocation group to the next level +in the Btree +.TP +\f3inode\f1 +Inodes are allocated in ``chunks'' of 64 inodes each. +Usually a chunk is multiple filesystem blocks, although there are cases +with large filesystem blocks where a chunk is less than one block. +The inode Btree (see \f3inobt\f1 above) +refers to the inode numbers per allocation group. +The inode numbers directly reflect the location of the inode block on disk. +Use the \f3inode\f1 command to point \f2xfs_db\f1 to a specific inode. +Each inode contains four regions: +\f3core\f1, +\f3next_unlinked\f1, +\f3u\f1, +and \f3a\f1. +\f3core\f1 contains the fixed information. +\f3next_unlinked\f1 is separated from the core due to +journalling considerations, see type \f3agi\f1 field \f3unlinked\f1. +\f3u\f1 is a union structure that is different in size and format depending +on the type and representation of the file data (``data fork''). +\f3a\f1 is an optional union structure to describe attribute data, +that is different in size, format, and location depending on the presence +and representation of attribute data, and the size of the \f3u\f1 data +(``attribute fork''). +\f2xfs_db\f1 automatically selects the proper union members based on +information in the inode. +.br +The following are fields in the inode core: +.br +\f3magic\f1: inode magic number, 0x494e ('IN') +.br +\f3mode\f1: mode and type of file, as described in \f3chmod\f1(2), +\f3mknod\f1(2), and \f3stat\f1(2) +.br +\f3version\f1: inode version, 1 or 2 +.br +\f3format\f1: format of \f3u\f1 union data +(0: dev_t, +1: local file \- in-inode directory or symlink, +2: extent list, +3: Btree root, +4: unique id [unused]) +.br +\f3nlinkv1\f1: number of links to the file in a version 1 inode +.br +\f3nlinkv2\f1: number of links to the file in a version 2 inode +.br +\f3projid\f1: owner's project id (version 2 inode only) +.br +\f3uid\f1: owner's user id +.br +\f3gid\f1: owner's group id +.br +\f3atime\f1: time last accessed (seconds and nanoseconds) +.br +\f3mtime\f1: time last modified +.br +\f3ctime\f1: time created or inode last modified +.br +\f3size\f1: number of bytes in the file +.br +\f3nblocks\f1: total number of blocks in the file including +indirect and attribute +.br +\f3extsize\f1: basic/minimum extent size for the file, used only for realtime +.br +\f3nextents\f1: number of extents in the data fork +.br +\f3naextents\f1: number of extents in the attribute fork +.br +\f3forkoff\f1: attribute fork offset in the inode, +in 64-bit words from the start of \f3u\f1 +.br +\f3aformat\f1: format of \f3a\f1 data +(1: local attribute data, +2: extent list, +3: Btree root) +.br +\f3dmevmask\f1: DMAPI event mask +.br +\f3dmstate\f1: DMAPI state information +.br +\f3newrtbm\f1: file is the realtime bitmap and is ``new'' format +.br +\f3prealloc\f1: file has preallocated data space after EOF +.br +\f3realtime\f1: file data is in the realtime subvolume +.br +\f3gen\f1: inode generation number +.sp +The following fields are in the \f3u\f1 data fork union: +.br +\f3bmbt\f1: bmap Btree root. +This looks like a \f3bmapbtd\f1 block with redundant information removed +.br +\f3bmx\f1: array of extent descriptors +.br +\f3dev\f1: dev_t for the block or character device +.br +\f3sfdir\f1: shortform (in-inode) version 1 directory. +This consists of +a \f3hdr\f1 containing +the \f3parent\f1 inode number +and a \f3count\f1 of active entries in the directory, +followed by +an array \f3list\f1 of \f3hdr\f1.\f3count\f1 entries. +Each such entry contains +\f3inumber\f1, +\f3namelen\f1, +and \f3name\f1 string +.br +\f3sfdir2\f1: shortform (in-inode) version 2 directory. +This consists of +a \f3hdr\f1 containing +a \f3count\f1 of active entries in the directory, +an \f3i8count\f1 of entries with inumbers that don't fit in a 32-bit value, +and the \f3parent\f1 inode number, +followed by +an array \f3list\f1 of \f3hdr\f1.\f3count\f1 entries. +Each such entry contains +\f3namelen\f1, +a saved \f3offset\f1 used when the directory is converted to a larger form, +a \f3name\f1 string, +and the \f3inumber\f1 +.br +\f3symlink\f1: symbolic link string value +.sp +The following fields are in the \f3a\f1 attribute fork union if it exists: +.br +\f3bmbt\f1: bmap Btree root, as above +.br +\f3bmx\f1: array of extent descriptors +.br +\f3sfattr\f1: shortform (in-inode) attribute values. +This consists of +a \f3hdr\f1 containing +a \f3totsize\f1 (total size in bytes) +and a \f3count\f1 of active entries, +followed by +an array \f3list\f1 of \f3hdr\f1.\f3count\f1 entries. +Each such entry contains +\f3namelen\f1, +\f3valuelen\f1, +\f3root\f1 flag, +\f3name\f1, +and \f3value\f1 +.TP +\f3log\f1 +Log blocks contain the journal entries for XFS. +It's not useful to examine these with \f2xfs_db\f1, +use \f2xfs_logprint\f1(8) instead. +.TP +\f3rtbitmap\f1 +If the filesystem has a realtime subvolume, then the \f3rbmino\f1 field +in the superblock refers to a file that contains the realtime bitmap. +Each bit in the bitmap file controls the allocation of a single realtime extent +(set == free). +The bitmap is processed in 32-bit words, +the LSB of a word is used for the first extent controlled by that bitmap word. +The \f3atime\f1 field of the realtime bitmap inode contains a counter +that is used to control where the next new realtime file will start. +.TP +\f3rtsummary\f1 +If the filesystem has a realtime subvolume, +then the \f3rsumino\f1 field in the superblock refers to a file +that contains the realtime summary data. +The summary file contains a two-dimensional array of 16-bit values. +Each value counts the number of free extent runs +(consecutive free realtime extents) +of a given range of sizes that starts in a given bitmap block. +The size ranges are binary buckets (low size in the bucket is a power of 2). +There are as many size ranges as are necessary given the size of the +realtime subvolume. +The first dimension is the size range, +the second dimension is the starting bitmap block number +(adjacent entries are for the same size, adjacent bitmap blocks). +.TP +\f3sb\f1 +There is one sb (superblock) structure per allocation group. +It is the first disk block in the allocation group. +Only the first one (block 0 of the filesystem) is actually used; +the other blocks are redundant information for \f2xfs_repair\f1(8) +to use if the first superblock is damaged. +Fields defined: +.br +\f3magicnum\f1: superblock magic number, 0x58465342 ('XFSB') +.br +\f3blocksize\f1: filesystem block size in bytes +.br +\f3dblocks\f1: number of filesystem blocks present in the data subvolume +.br +\f3rblocks\f1: number of filesystem blocks present in the realtime subvolume +.br +\f3rextents\f1: number of realtime extents that \f3rblocks\f1 contain +.br +\f3uuid\f1: unique identifier of the filesystem +.br +\f3logstart\f1: starting filesystem block number of the log (journal). +If this value is 0 the log is ``external'' +.br +\f3rootino\f1: root inode number +.br +\f3rbmino\f1: realtime bitmap inode number +.br +\f3rsumino\f1: realtime summary data inode number +.br +\f3rextsize\f1: realtime extent size in filesystem blocks +.br +\f3agblocks\f1: size of an allocation group in filesystem blocks +.br +\f3agcount\f1: number of allocation groups +.br +\f3rbmblocks\f1: number of realtime bitmap blocks +.br +\f3logblocks\f1: number of log blocks (filesystem blocks) +.br +\f3versionnum\f1: filesystem version information. +This value is currently 1, 2, 3, or 4 in the low 4 bits. +If the low bits are 4 then the other bits have additional meanings. +1 is the original value. +2 means that attributes were used. +3 means that version 2 inodes (large link counts) were used. +4 is the bitmask version of the version number. +In this case, the other bits are used as flags +(0x0010: attributes were used, +0x0020: version 2 inodes were used, +0x0040: quotas were used, +0x0080: inode cluster alignment is in force, +0x0100: data stripe alignment is in force, +0x0200: the \f3shared_vn\f1 field is used, +0x1000: unwritten extent tracking is on, +0x2000: version 2 directories are in use) +.br +\f3sectsize\f1: sector size in bytes, currently always 512. +This is the size of the superblock and the other header blocks +.br +\f3inodesize\f1: inode size in bytes +.br +\f3inopblock\f1: number of inodes per filesystem block +.br +\f3fname\f1: obsolete, filesystem name +.br +\f3fpack\f1: obsolete, filesystem pack name +.br +\f3blocklog\f1: log2 of \f3blocksize\f1 +.br +\f3sectlog\f1: log2 of \f3sectsize\f1 +.br +\f3inodelog\f1: log2 of \f3inodesize\f1 +.br +\f3inopblog\f1: log2 of \f3inopblock\f1 +.br +\f3agblklog\f1: log2 of \f3agblocks\f1 (rounded up) +.br +\f3rextslog\f1: log2 of \f3rextents\f1 +.br +\f3inprogress\f1: \f2mkfs.xfs\f1(8) aborted before completing this filesystem +.br +\f3imax_pct\f1: maximum percentage of filesystem space used for inode blocks +.br +\f3icount\f1: number of allocated inodes +.br +\f3ifree\f1: number of allocated inodes that are not in use +.br +\f3fdblocks\f1: number of free data blocks +.br +\f3frextents\f1: number of free realtime extents +.br +\f3uquotino\f1: user quota inode number +.br +\f3pquotino\f1: project quota inode number; this is currently unused +.br +\f3qflags\f1: quota status flags +(0x01: user quota accounting is on, +0x02: user quota limits are enforced, +0x04: quotacheck has been run on user quotas, +0x08: project quota accounting is on, +0x10: project quota limits are enforced, +0x20: quotacheck has been run on project quotas) +.br +\f3flags\f1: random flags. +0x01: only read-only mounts are allowed +.br +\f3shared_vn\f1: shared version number (shared readonly filesystems) +.br +\f3inoalignmt\f1: inode chunk alignment in filesystem blocks +.br +\f3unit\f1: stripe or RAID unit +.br +\f3width\f1: stripe or RAID width +.br +\f3dirblklog\f1: log2 of directory block size (filesystem blocks) +.TP +\f3symlink\f1 +Symbolic link blocks are used only when the symbolic link value does +not fit inside the inode. +The block content is just the string value. +Bytes past the logical end of the symbolic link value have arbitrary values. +.SH DIAGNOSTICS +Many messages can come from the \f3check\f1 (\f3blockget\f1) command; +these are documented in \f2xfs_check\f1(8). +.SH SEE ALSO +mkfs.xfs(8), +xfs_check(8), +xfs_copy(8), +xfs_logprint(8), +xfs_ncheck(8), +xfs_repair(8), +chmod(2), +mknod(2), +stat(2), +xfs(5). diff --git a/man/man8/xfs_growfs.8 b/man/man8/xfs_growfs.8 new file mode 100644 index 000000000..5a2496c55 --- /dev/null +++ b/man/man8/xfs_growfs.8 @@ -0,0 +1,135 @@ +.TH xfs_growfs 8 +.SH NAME +xfs_growfs, xfs_info \- expand an XFS filesystem +.SH SYNOPSIS +.nf +\f3xfs_growfs\f1 [ \f3\-dilnrxV\f1 ] [ \f3\-D\f1 size ] [ \f3\-e\f1 rtextsize ] + [ \f3\-L\f1 size ] [ \f3\-m\f1 maxpct ] [ \f3-t\f1 mtab ] + [ \f3\-R\f1 size ] mount-point +\f3xfs_info\f1 [ \f3-t\f1 mtab ] mount-point +.fi +.SH DESCRIPTION +.I xfs_growfs +expands an existing XFS filesystem (see +.IR xfs (5)). +The +.I mount-point +argument is the pathname of the directory where the filesystem +is mounted. +The filesystem must be mounted to be grown (see +.IR mount (8)). +The existing contents of the filesystem are undisturbed, and the added space +becomes available for additional file storage. +.PP +.I xfs_info +is equivalent to invoking +.I xfs_growfs +with the +.B \-n +option (see discussion below). +.PP +The options to +.I xfs_growfs +are: +.TP +\f3\-d\f1, \f3\-D\f1 \f2size\f1 +Specifies that the data section of the filesystem should be grown. +If the +.B \-D +.I size +option is given, the data section is grown to that size, otherwise +the data section is grown to the largest size possible. +The size +is expressed in +filesystem blocks. +.TP +.B \-e +Allows the real-time extent size to be specified. +In +.IR mkfs.xfs (8) +this is specified with +.B \-r +.BI extsize= nnnn. +.TP +.B \-i +The new log is an internal log +(inside the data section). +.TP +\f3\-l\f1, \f3\-L\f1 \f2size\f1 +Specifies that the log section of the filesystem should be grown, +shrunk, or moved. +If the +.B \-L +.I size +option is given, the log section is changed to be that size, +if possible. +The size is expressed in +filesystem blocks. +The size of an internal log must be smaller than the size +of an allocation group (this value is printed at \f2mkfs\f1(8) time). +If neither +.B \-i +nor +.B \-x +is given with +.BR \-l , +the log continues to be internal or external as it was before. +.TP +.B \-m +Specify a new value for the maximum percentage +of space in the filesystem that can be allocated as inodes. +In +.I mkfs.xfs +this is specified with +.B -i +.BI maxpct= nn. +.TP +.B \-n +Specifies that no change to the filesystem is to be made. +The filesystem geometry is printed, and argument checking is performed, +but no growth occurs. +.TP +\f3\-r\f1, \f3\-R\f1 \f2size\f1 +Specifies that the real-time section of the filesystem should be grown. +If the +.B \-R +.I size +option is given, the real-time section is grown to that size, otherwise +the real-time section is grown to the largest size possible. +The size +is expressed in +filesystem blocks. +The filesystem does not need to have contained a real-time section before +the \f2xfs_growfs\f1 operation. +.TP +.B \-t +Specifies an alternate mount table file (default is +.IR /etc/mtab ). +This is used when working with filesystems mounted without writing to +.I /etc/mtab +file - refer to +.BR mount (8) +for further details. +.TP +.PP +.I xfs_growfs +is most often used in conjunction with +logical volumes +(see +.IR lvm (8) +). +However, it can also be used on a regular disk partition, for example if a +partition has been enlarged while retaining the same starting block. +.SH PRACTICAL USE +Filesystems normally occupy all of the space on the device where they +reside. +In order to grow a filesystem, it is necessary to provide added +space for it to occupy. +Therefore there must be at least one spare new +disk partition available. +Adding the space is done through the mechanism of +logical volumes. +.SH SEE ALSO +mkfs.xfs(8), +lvm(8), +mount(8). diff --git a/man/man8/xfs_logprint.8 b/man/man8/xfs_logprint.8 new file mode 100644 index 000000000..15ddc18d4 --- /dev/null +++ b/man/man8/xfs_logprint.8 @@ -0,0 +1,86 @@ +.TH xfs_logprint 8 +.SH NAME +xfs_logprint \- print the log of an XFS filesystem +.SH SYNOPSIS +.nf +\f3xfs_logprint\f1 [ options ] device-name +\f3xfs_logprint \-f\f1 [ options ] filename +.fi +.SH DESCRIPTION +.I xfs_logprint +prints the log of an XFS filesystem (see +.IR xfs (5)). +The +.I device-name +argument is the pathname of the partition or logical volume +containing the filesystem. +The contents of the filesystem remain undisturbed. +There are two major modes of operation in +.IR xfs_logprint . +.PP +One mode is better for filesystem operation debugging. +It is called the transactional view and is enabled through the \f3\-t\f1 +option. +The transactional view prints only the portion of the log that +pertains to recovery. +In other words, it prints out complete transactions between the tail +and the head. +This view tries to display each transaction without +regard to how they are split across log records. +.PP +The second mode starts printing out information from the beginning of the log. +Some error blocks might print out in the beginning because the last log +record usually overlaps the oldest log record. +A message is +printed when the physical end of the log is reached and when the +logical end of the log is reached. +A log record view is displayed +one record at a time. +Transactions that span log records may not be +decoded fully. +.PP +Common options are: +.TP +\f3\-b\f1 +Extract and print buffer information. +Only used in transactional view. +.TP +\f3\-D\f1 +Don't decode anything; +just print data. +.TP +\f3\-e\f1 +Exit when an error is found in the log. +Normally, +.I xfs_logprint +tries to continue and unwind from bad logs. +However, sometimes it just dies in bad ways. +Using this option prevents core dumps. +.TP +\f3\-f\f1 +The log is a file. +.TP +\f3\-i\f1 +Extract and print inode information. +Only used in transactional view. +.TP +\f3\-q\f1 +Extract and print quota information. +Only used in transactional view. +.TP +\f3\-n\f1 +Don't try and interpret log data; +just interpret log header information. +.TP +\f3\-o\f1 +Also print buffer data in hex. +Normally, buffer data is just decoded, so better information can be printed. +.TP +\f3\-s\f1 \f2start-block\f1 +Override any notion of where to start printing. +.TP +\f3\-t\f1 +Print out the transactional view. +.SH SEE ALSO +mkfs.xfs(8), +mount(8). diff --git a/man/man8/xfs_mkfile.8 b/man/man8/xfs_mkfile.8 new file mode 100644 index 000000000..2cc151741 --- /dev/null +++ b/man/man8/xfs_mkfile.8 @@ -0,0 +1,27 @@ +.TH xfs_mkfile 8 +.SH NAME +xfs_mkfile \- create an XFS file +.SH SYNOPSIS +.nf +\f3xfs_mkfile\f1 [\f3\-v\f1] [\f3\-n\f1] \c +\f2size\f1[\f3k\f1|\f3b\f1|\f3m\f1|\f3g\f1] \f2filename\f1... +.fi +.SH DESCRIPTION +.I xfs_mkfile +creates one or more files. +The file is padded with zeroes by +default. +The default size is in bytes, but it can be +flagged as kilobytes, blocks, megabytes, or gigabytes with the \f3k\f1, +\f3b\f1, \f3m\f1, or \f3g\f1 suffixes, respectively. +.SH OPTIONS +.TP +\f3\-v\f1 +Verbose. +Report the names and sizes of created files. +.TP +\f3\-n\f1 +No bytes. +Create a holey file - that is, +do not write out any data, just +seek to end of file and write a block. diff --git a/man/man8/xfs_ncheck.8 b/man/man8/xfs_ncheck.8 new file mode 100644 index 000000000..201b43e28 --- /dev/null +++ b/man/man8/xfs_ncheck.8 @@ -0,0 +1,53 @@ +.TH xfs_ncheck 8 +.SH NAME +xfs_ncheck \- generate pathnames from i-numbers for XFS +.SH SYNOPSIS +.nf +\f3xfs_ncheck\f1 [ \f3\-i\f1 ino ] ... \c +[ \f3\-s\f1 ] xfs_special +.sp .8v +\f3xfs_ncheck\f1 \f3\-f\f1 [ \f3\-i\f1 ino ] ... \c +[ \f3\-s\f1 ] file +.fi +.SH DESCRIPTION +.I xfs_ncheck +with no +.B \-i +arguments generates an inode number and pathname list of all +files on the given filesystem. +Names of directory files are followed by +.BR /. . +The output is not sorted in any particular order. +The filesystem to be examined is specified by the +.I xfs_special +argument, which should be the disk or volume device for the filesystem. +Filesystems stored in files can also be checked, using the \f3\-f\f1 flag. +.PP +The options to \f2xfs_ncheck\f1 are: +.TP 9 +.B \-f +Specifies that the special device is actually a file (see the +\f2mkfs.xfs\f1 \f3\-d\f1 \f2file\f1 option). +This might happen if an image copy +of a filesystem has been made into an ordinary file. +.TP +.B \-s +Limits the report to special files and files with setuserid mode. +This option may be used to detect violations of security policy. +.TP +.BI \-i " ino" +Limits the report to only those files whose inode numbers follow. +May be given multiple times to select multiple inode numbers. +.PP +If the filesystem is seriously corrupted, or very busy and looks +like it is corrupt, a message of the form that would be generated by +.IR xfs_check (8) +may appear. +.PP +.I xfs_ncheck +is only useful with XFS filesystems. +.SH SEE ALSO +mkfs.xfs(8), +xfs_ncheck(8), +xfs_check(8), +xfs(5). diff --git a/man/man8/xfs_repair.8 b/man/man8/xfs_repair.8 new file mode 100644 index 000000000..014620cce --- /dev/null +++ b/man/man8/xfs_repair.8 @@ -0,0 +1,353 @@ +.TH xfs_repair 8 +.SH NAME +xfs_repair \- repair an XFS filesystem +.SH SYNOPSIS +.nf +\f3xfs_repair\f1 [ \f3\-n\f1 ] [ \f3\-o\f1 subopt[=value] ] xfs_special +.sp .8v +\f3xfs_repair\f1 \f3\-f\f1 [ \f3\-n\f1 ] [ \f3\-o\f1 subopt[=value] ] ... file +.fi +.SH DESCRIPTION +.I xfs_repair +repairs corrupt or damaged XFS filesystems +(see +.IR xfs (5)). +The filesystem is specified using the +.I xfs_special +argument which should be the device name of the +disk partition or volume containing +the filesystem. +If given the name of a block device, +.I xfs_repair +will attempt to find the raw device associated +with the specified block device and will use the raw device +instead. +.PP +Regardless, the filesystem to be repaired +must be unmounted, +otherwise, the resulting filesystem may be inconsistent or corrupt. +.PP +The options to \f2xfs_repair\f1 are: +.TP +.B \-f +Specifies that the special device is actually a file (see the +\f2mkfs.xfs\f1 \f3\-d\f1 \f2file\f1 option). +This might happen if an image copy +of a filesystem has been copied or written into an ordinary file. +.TP +.B \-n +No modify mode. +Specifies that +.I xfs_repair +should not modify the filesystem but should only scan the +filesystem and indicate what repairs would have been made. +.TP +.B \-o +Override what the program might conclude about the filesystem +if left to its own devices. +.IP +The +.B assume_xfs +suboption +specifies that the filesystem is an XFS filesystem. +Normally, if +.I xfs_repair +cannot find an XFS superblock, it checks to see if the +filesystem is an EFS filesystem before it tries to +regenerate the XFS superblock. +If the +.B assume_xfs +option is in effect, +.I xfs_repair +will assume that the filesystem is an XFS filesystem and +will ignore an EFS superblock if one is found. +.SS Checks Performed +Inconsistencies corrected include the following: +.TP +1. +Inode and inode blockmap (addressing) checks: +bad magic number in inode, +bad magic numbers in inode blockmap blocks, +extents out of order, +incorrect number of records in inode blockmap blocks, +blocks claimed that are not in a legal data area of the filesystem, +blocks that are claimed by more than one inode. +.TP +2. +Inode allocation map checks: +bad magic number in inode map blocks, +inode state as indicated by map (free or in-use) inconsistent +with state indicated by the inode, +inodes referenced by the filesystem that do not appear in +the inode allocation map, +inode allocation map referencing blocks that do not appear +to contain inodes. +.TP +3. +Size checks: +number of blocks claimed by inode inconsistent with inode size, +directory size not block aligned, +inode size not consistent with inode format. +.TP +4. +Directory checks: +bad magic numbers in directory blocks, +incorrect number of entries in a directory block, +bad freespace information in a directory leaf block, +entry pointing to an unallocated (free) or out +of range inode, +overlapping entries, +missing or incorrect dot and dotdot entries, +entries out of hashvalue order, +incorrect internal directory pointers, +directory type not consistent with inode format and size. +.TP +5. +Pathname checks: +files or directories not referenced by a pathname starting from +the filesystem root, +illegal pathname components. +.TP +6. +Link count checks: +link counts that do not agree with the number of +directory references to the inode. +.TP +7. +Freemap checks: +blocks claimed free by the freemap but also claimed by an inode, +blocks unclaimed by any inode but not appearing in the freemap. +.TP +8. +Super Block checks: +total free block and/or free i-node count incorrect, +filesystem geometry inconsistent, +secondary and primary superblocks contradictory. +.PP +Orphaned files and directories (allocated, in-use but unreferenced) are +reconnected by placing them in the +.I lost+found +directory. +The name assigned is the inode number. +.SS Disk Errors +.I xfs_repair +aborts on most disk I/O errors. +Therefore, if you are trying +to repair a filesystem that was damaged due to a disk drive failure, +steps should be taken to ensure that +all blocks in the filesystem are readable and writeable +before attempting to use +.I xfs_repair +to repair the filesystem. +A possible method is using +.IR dd (8) +to copy the data onto a good disk. +.SS lost+found +The directory +.I lost+found +does not have to already exist in the filesystem being repaired. +If the directory does not exist, it is automatically created. +If the \f2lost+found\f1 directory already exists, +the \f2lost+found\f1 +directory is deleted and recreated every time \f2xfs_repair\f1 +runs. +This ensures that there are no name conflicts in \f2lost+found\f1. +However, if you rename a file in \f2lost+found\f1 and leave it there, +if \f2xfs_repair\f1 is run again, that file is renamed back to +its inode number. +.SS Corrupted Superblocks +XFS has both primary and secondary superblocks. +\f2xfs_repair\f1 uses information in the primary superblock +to automatically find and validate the primary superblock +against the secondary superblocks before proceeding. +Should the primary be too corrupted to be useful in locating +the secondary superblocks, the program scans the filesystem +until it finds and validates some secondary superblocks. +At that point, it generates a primary superblock. +.SS Quotas +If quotas are in use, it is possible that \f2xfs_repair\f1 will clear +some or all of the filesystem quota information. +If so, the program issues a warning just before it terminates. +If all quota information is lost, quotas are disabled and the +program issues a warning to that effect. +.PP +Note that \f2xfs_repair\f1 does not check the validity of quota limits. +It is recommended that you check the quota limit information manually +after \f2xfs_repair\f1. +Also, space usage information is automatically regenerated the +next time the filesystem is mounted with quotas turned on, so the +next quota mount of the filesystem may take some time. +.SH DIAGNOSTICS +.I xfs_repair +issues informative messages as it proceeds +indicating what it has found that is abnormal or any corrective +action that it has taken. +Most of the messages are completely understandable only to those +who are knowledgeable about the structure of the filesystem. +Some of the more common messages are explained here. +Note that the language of the messages is slightly different +if \f2xfs_repair\f1 is run in no-modify mode because the program is not +changing anything on disk. +No-modify mode indicates what it would do to repair the filesystem +if run without the no-modify flag. +.PP +disconnected inode \f3xxxx\f1, moving to \f2lost+found\f1 +.IP +An inode numbered +.B xxxx +was not connected to the filesystem +directory tree and was reconnected to the \f2lost+found\f1 directory. +The inode is assigned the name of its inode number (i-number). +If a \f2lost+found\f1 directory does not exist, it is automatically +created. +.PP +disconnected dir inode \f3xxxx\f1, moving to \f2lost+found\f1 +.IP +As above only the inode is a directory inode. +If a directory inode is attached to \f2lost+found\f1, all of its +children (if any) stay attached to the directory and therefore +get automatically reconnected when the directory is reconnected. +.PP +imap claims in-use inode \f3xxxx\f1 is free, correcting imap +.IP +The inode allocation map thinks that inode \f3xxxx\f1 is +free whereas examination of the inode indicates that the +inode may be in use (although it may be disconnected). +The program updates the inode allocation map. +.PP +imap claims free inode \f3xxxx\f1 is in use, correcting imap +.IP +The inode allocation map thinks that inode \f3xxxx\f1 is +in use whereas examination of the inode indicates that the +inode is not in use and therefore is free. +The program updates the inode allocation map. +.PP +resetting inode \f3xxxx\f1 nlinks from \f3x\f1 to \f3y\f1 +.IP +The program detected a mismatch between the +number of valid directory entries referencing inode \f3xxxx\f1 +and the number of references recorded in the inode and corrected the +the number in the inode. +.PP +\f3fork-type\f1 fork in ino \f3xxxx\f1 claims used block \f3yyyy\f1 +.IP +Inode \f3xxxx\f1 claims a block \f3yyyy\f1 that is used (claimed) +by either another inode or the filesystem itself for metadata storage. +The \f3fork-type\f1 is either \f3data\f1 or \f3attr\f1 +indicating whether the problem lies in the portion of the +inode that tracks regular data or the portion of the inode +that stores XFS attributes. +If the inode is a real-time (rt) inode, the message says so. +Any inode that claims blocks used by the filesystem is deleted. +If two or more inodes claim the same block, they are both deleted. +.PP +\f3fork-type\f1 fork in ino \f3xxxx\f1 claims dup extent ... +.IP +Inode \f3xxxx\f1 claims a block in an extent known to be +claimed more than once. +The offset in the inode, start and length of the extent is given. +The message is slightly different +if the inode is a real-time (rt) inode and the extent is therefore +a real-time (rt) extent. +.PP +inode \f3xxxx\f1 - bad extent ... +.IP +An extent record in the blockmap of inode \f3xxxx\f1 claims +blocks that are out of the legal range of the filesystem. +The message supplies the start, end, and file offset of +the extent. +The message is slightly different +if the extent is a real-time (rt) exent. +.PP +bad \f3fork-type\f1 fork in inode \f3xxxx\f1 +.IP +There was something structurally wrong or inconsistent with the +data structures that map offsets to filesystem blocks. +.PP +cleared inode \f3xxxx\f1 +.IP +There was something wrong with the inode that +was uncorrectable so the program freed the inode. +This usually happens because the inode claims +blocks that are used by something else or the inode itself +is badly corrupted. +Typically, this message +is preceded by one or more messages indicating why the +inode needed to be cleared. +.PP +bad attribute fork in inode \f3xxxx\f1, clearing attr fork +.IP +There was something wrong with the portion of the inode that +stores XFS attributes (the attribute fork) so the program reset +the attribute fork. +As a result of this, all attributes on that inode are lost. +.PP +correcting nextents for inode \f3xxxx\f1, was \f3x\f1 - counted \f3y\f1 +.IP +The program found that the number of extents used to store +the data in the inode is wrong and corrected the number. +The message refers to nextents if the count is wrong +on the number of extents used to store attribute information. +.PP +entry \f3"name"\f1 in dir \f3xxxx\f1 not consistent +with .. +value (\f3yyyy\f1) in dir ino \f3xxxx\f1, +junking entry \f3"name"\f1 in directory inode \f3xxxx\f1 +.IP +The entry \f3"name"\f1 in directory inode \f3xxxx\f1 references a +directory inode \f3yyyy\f1. +However, the ..\& entry in directory \f3yyyy\f1 does not point +back to directory \f3xxxx\f1, +so the program deletes the entry \f3"name"\f1 in directory inode +\f3xxxx\f1. +If the directory inode \f3yyyy\f1 winds up becoming a disconnected +inode as a result of this, it is moved to \f2lost+found\f1 later. +.PP +entry \f3"name"\f1 in dir \f3xxxx\f1 references already +connected dir ino \f3yyyy\f1, +junking entry \f3"name"\f1 in directory inode \f3xxxx\f1 +.IP +The entry \f3"name"\f1 in directory inode \f3xxxx\f1 points to a +directory inode \f3yyyy\f1 that is known to be a child of another +directory. +Therefore, the entry is invalid and is deleted. +This message refers to an entry in a small directory. +If this were a large directory, the last phrase would read +"will clear entry". +.PP +entry references free inode \f3xxxx\f1 in directory \f3yyyy\f1, +will clear entry +.IP +An entry in directory inode \f3yyyy\f1 references an inode \f3xxxx\f1 +that is known to be free. +The entry is therefore invalid and is deleted. +This message refers to a large directory. +If the directory were small, the message would read "junking entry ...". +.SH EXIT STATUS +.I xfs_repair -n +(no modify node) +will return a status of 1 if filesystem corruption was detected and +0 if no filesystem corruption was detected. +.I xfs_repair +run without the -n option will always return a status code of 0. +.SH BUGS +.I xfs_repair +does not do a thorough job on XFS extended attributes. +The structure of the attribute fork will be consistent, +but only the contents of attribute forks that will fit into +an inode are checked. +This limitation will be fixed in the future. +.PP +The no-modify mode (\f3\-n\f1 option) is not completely +accurate. +It does not catch inconsistencies in the freespace and inode +maps, particularly lost blocks or subtly corrupted maps (trees). +.PP +The no-modify mode can generate repeated warnings about +the same problems because it cannot fix the problems as they +are encountered. +.SH SEE ALSO +dd(1), +mkfs.xfs(8), +xfs_check(8), +xfs(5). diff --git a/mkfile/Makefile b/mkfile/Makefile new file mode 100644 index 000000000..fc274e821 --- /dev/null +++ b/mkfile/Makefile @@ -0,0 +1,45 @@ +# +# Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write the Free Software Foundation, Inc., 59 +# Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, +# Mountain View, CA 94043, or: +# +# http://www.sgi.com +# +# For further information regarding this notice, see: +# +# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ +# + +TOPDIR = .. +include $(TOPDIR)/include/builddefs + +CMDTARGET = xfs_mkfile +CFILES = xfs_mkfile.c + +default: $(CMDTARGET) + +include $(BUILDRULES) + +install: default + $(INSTALL) -m 755 -d $(XFS_CMDS_BIN_DIR) + $(INSTALL) -m 755 $(CMDTARGET) $(XFS_CMDS_BIN_DIR) diff --git a/mkfile/xfs_mkfile.c b/mkfile/xfs_mkfile.c new file mode 100644 index 000000000..f880d932c --- /dev/null +++ b/mkfile/xfs_mkfile.c @@ -0,0 +1,284 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +/* + * Make file utility for xfs. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#undef O_DIRECT +#define O_DIRECT 0 /* nathans TODO - remove this when direct IO done */ + +#define MAXBUFFERSIZE (256 * 1024) + +static void usage(void); + +int +main(int argc, char **argv) +{ + int fd; + loff_t result; + loff_t size = 0; + loff_t mult = 0; + int bytes = 0; + loff_t wrote = 0; + int len = 0; + int c; + int errflg = 0; + int errs = 0; + int nobytes = 0; + int prealloc = 0; + int verbose = 0; + struct dioattr da; + char *progname; + void *buf = NULL; + int buflen = 0, nbuflen; + int bufalign = 0, nbufalign, bufmin; + int oflags; + xfs_flock64_t flck; + + progname = basename(argv[0]); + while ((c = getopt(argc, argv, "npvV")) != EOF) { + switch(c) { + case 'n': + nobytes++; + break; + case 'p': + prealloc++; + break; + case 'v': + verbose++; + break; + case 'V': + printf("%s version %s\n", progname, VERSION); + break; + default: + errflg++; + break; + } + } + + if (argc < optind + 2 || errflg) + usage(); + + mult = 1; + + len = strlen(argv[optind]); + + if (isalpha(argv[optind][len-1])) { + switch (argv[optind][len-1]) { + case 'k': + case 'K': + mult = 1024; + break; + case 'b': + case 'B': + mult = 512; + break; + case 'm': + case 'M': + mult = 1024; + mult *= 1024; + break; + case 'g': + case 'G': + mult = 1024; + mult *= 1024; + mult *= 1024; + break; + default: + fprintf(stderr, "unknown size %s\n", argv[optind]); + usage(); + } + + argv[optind][len-1] = '\0'; + } + + size = atoll(argv[optind]) * mult; + + optind++; + + while (optind < argc) { + if (verbose) + fprintf(stdout, "%s %lld bytes %s\n", + argv[optind], size, + prealloc + ? "(pre-allocated)" + : ""); + + oflags = O_CREAT|O_TRUNC|O_WRONLY|(nobytes ? 0 : O_DIRECT); + + fd = open(argv[optind], oflags, 0600); + + if ( (oflags & O_DIRECT) + && ( (fd < 0 && errno == EINVAL) + || ioctl(fd, XFS_IOC_DIOINFO, &da) < 0)) { + + close(fd); + + oflags &= ~O_DIRECT; + + fd = open(argv[optind], oflags, 0600); + } + + if (fd < 0) { + perror(argv[optind]); + optind++; + errs++; + continue; + } + + if (size == 0) { + close(fd); + optind++; + continue; + } + + if ((result = lseek64(fd, size - 1, SEEK_SET)) < 0LL) { + /* + * This check doesn't actually work for 6.2 + * efs and nfs2, although it should. + */ + fprintf(stderr, + "lseek64 error, result = %lld\n", result); + if (errno) + perror(argv[optind]); + errs++; + } else if (nobytes) { + if (write(fd, "", 1) < 0) { + perror(argv[optind]); + errs++; + } + } else { + flck.l_whence = SEEK_SET; + flck.l_start = 0LL; + flck.l_len = size; +#if 0 + (void)ioctl(fd, XFS_IOC_RESVSP64, &flck); + + if (prealloc) { + if ( close(fd) < 0 ) { + perror(argv[optind]); + unlink(argv[optind]); + errs++; + } + + optind++; + + continue; + } +#endif + if (oflags & O_DIRECT) { + nbufalign = da.d_mem; + + if ( da.d_miniosz <= MAXBUFFERSIZE + && MAXBUFFERSIZE <= da.d_maxiosz) + nbuflen = MAXBUFFERSIZE; + else if (da.d_maxiosz < MAXBUFFERSIZE) + nbuflen = da.d_maxiosz; + else + nbuflen = da.d_miniosz; + + bufmin = da.d_miniosz; + } else { + nbuflen = MAXBUFFERSIZE; + nbufalign = sizeof(long); + bufmin = 0; + } + + if (nbuflen > buflen || nbufalign > bufalign) { + if (buf) + free(buf); + buf = memalign(nbufalign, nbuflen); + buflen = nbuflen; + bzero(buf, nbuflen); + nbufalign = bufalign; + } + + wrote = 0; + + lseek64(fd, 0LL, SEEK_SET); + + while (wrote < size) { + if (size - wrote >= buflen) + bytes = buflen; + else if (bufmin) + bytes = roundup(size - wrote, bufmin); + else + bytes = size - wrote; + + len = write(fd, buf, bytes); + + if (len < 0) { + perror(argv[optind]); + unlink(argv[optind]); + errs++; + break; + } + + wrote += len; + } + + if (wrote > size && ftruncate64(fd, size) < 0) { + perror(argv[optind]); + unlink(argv[optind]); + errs++; + } + } + + if ( close(fd) < 0 ) { + perror(argv[optind]); + unlink(argv[optind]); + errs++; + } + + optind++; + } + + return errs != 0; +} + +static void +usage(void) +{ + fprintf(stderr, "mkfile: [-npv] [] ...\n"); + exit(2); +} diff --git a/mkfs/Makefile b/mkfs/Makefile new file mode 100644 index 000000000..d6f813d13 --- /dev/null +++ b/mkfs/Makefile @@ -0,0 +1,59 @@ +# +# Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write the Free Software Foundation, Inc., 59 +# Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, +# Mountain View, CA 94043, or: +# +# http://www.sgi.com +# +# For further information regarding this notice, see: +# +# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ +# + +TOPDIR = .. +include $(TOPDIR)/include/builddefs + +CMDTARGET = mkfs.xfs +CMDDEPS = $(LIBXFS) +MAXTRRES = maxtrres + +CFILES = xfs_mkfs.c mountinfo.c proto.c +HFILES = xfs_mkfs.h mountinfo.h proto.h volume.h +LLDLIBS = $(LIBXFS) $(LIBUUID) $(LIBLVM) +MAXTRLIBS = $(LIBXFS) $(LIBUUID) +LSRCFILES = $(MAXTRRES).c +LDIRT = $(MAXTRRES) $(MAXTRRES).h + +default: $(MAXTRRES).h $(CMDTARGET) + +include $(BUILDRULES) + +install: default + $(INSTALL) -m 755 -d $(XFS_CMDS_SBIN_DIR) + $(INSTALL) -m 755 $(CMDTARGET) $(XFS_CMDS_SBIN_DIR) + +$(MAXTRRES): + $(CCF) $@.c -o $@ $(LDFLAGS) $(MAXTRLIBS) + +$(MAXTRRES).h: $(MAXTRRES) + ./$(MAXTRRES) > $@ || ( rm -f $@ && exit 1 ) diff --git a/mkfs/maxtrres.c b/mkfs/maxtrres.c new file mode 100644 index 000000000..638d945d4 --- /dev/null +++ b/mkfs/maxtrres.c @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +/* + * maxtrres + * + * Compute the maximum transaction reservation for every legal + * combination of block size, inode size, directory version, + * and directory block size. + * Generates a table compiled into mkfs, to control the default + * and minimum log sizes. + */ + +#include +#include "xfs_mkfs.h" + +xfs_trans_reservations_t tr_count = { + XFS_WRITE_LOG_COUNT, /* extent alloc trans */ + XFS_ITRUNCATE_LOG_COUNT, /* truncate trans */ + XFS_RENAME_LOG_COUNT, /* rename trans */ + XFS_LINK_LOG_COUNT, /* link trans */ + XFS_REMOVE_LOG_COUNT, /* unlink trans */ + XFS_SYMLINK_LOG_COUNT, /* symlink trans */ + XFS_CREATE_LOG_COUNT, /* create trans */ + XFS_MKDIR_LOG_COUNT, /* mkdir trans */ + XFS_DEFAULT_LOG_COUNT, /* inode free trans */ + XFS_DEFAULT_LOG_COUNT, /* inode update trans */ + XFS_DEFAULT_LOG_COUNT, /* fs data section grow trans */ + XFS_DEFAULT_LOG_COUNT, /* sync write inode trans */ + XFS_ADDAFORK_LOG_COUNT, /* cvt inode to attributed trans */ + XFS_DEFAULT_LOG_COUNT, /* write setuid/setgid file */ + XFS_ATTRINVAL_LOG_COUNT, /* attr fork buffer invalidation */ + XFS_ATTRSET_LOG_COUNT, /* set/create an attribute */ + XFS_ATTRRM_LOG_COUNT, /* remove an attribute */ + XFS_DEFAULT_LOG_COUNT, /* clear bad agi unlinked ino bucket */ + XFS_DEFAULT_PERM_LOG_COUNT, /* grow realtime allocations */ + XFS_DEFAULT_LOG_COUNT, /* grow realtime zeroing */ + XFS_DEFAULT_LOG_COUNT, /* grow realtime freeing */ +}; + +static int +max_trans_res( + xfs_mount_t *mp, + int *mul) +{ + uint *p; + uint *q; + int rval; + xfs_trans_reservations_t *tr; + xfs_da_args_t args; + int local; + int size; + int nblks; + int res; + + nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK); + + /* + * Fill in the arg structure for this request. + */ + bzero(&args, sizeof(args)); + args.name = NULL; + args.namelen = MAXNAMELEN; + args.value = NULL; + args.valuelen = 65536; + args.flags = 0; + args.hashval = 0; + args.dp = NULL; + args.firstblock = NULL; + args.flist = NULL; + args.whichfork = XFS_ATTR_FORK; + args.oknoent = 1; + + /* + * Determine space new attribute will use, and if it will be + * inline or out of line. + */ + size = libxfs_attr_leaf_newentsize( + &args, mp->m_sb.sb_blocksize, &local); + + if (local) { + printf("Uh-oh.. attribute is local\n"); + } else { + /* Out of line attribute, cannot double split, but make + * room for the attribute value itself. + */ + nblks += XFS_B_TO_FSB(mp, size); + nblks += XFS_NEXTENTADD_SPACE_RES(mp, size, XFS_ATTR_FORK); + } + res = XFS_ATTRSET_LOG_RES(mp, nblks); +#if 0 + printf("size = %d nblks = %d res = %d\n", size, nblks, res); +#endif + mp->m_reservations.tr_attrset = res; + + for (rval = 0, tr = &mp->m_reservations, p = (uint *)tr, + q = (uint *)&tr_count; + p < (uint *)(tr + 1); + p++, q++) { + if ((int)*p > rval) { + rval = (int)*p; + *mul = (int)*q; + } + } + return rval; +} + +int +main(int argc, char **argv) +{ + int bl; + int dl; + int dv; + int i; + int il; + xfs_mount_t m; + xfs_sb_t *sbp; + int mul; + + progname = basename(argv[0]); + if (argc > 1) { + fprintf(stderr, "Usage: %s\n", progname); + return 1; + } + memset(&m, 0, sizeof(m)); + sbp = &m.m_sb; + sbp->sb_magicnum = XFS_SB_MAGIC; + sbp->sb_sectlog = 9; + sbp->sb_sectsize = 1 << sbp->sb_sectlog; + for (bl = XFS_MIN_BLOCKSIZE_LOG; bl <= XFS_MAX_BLOCKSIZE_LOG; bl++) { + sbp->sb_blocklog = bl; + sbp->sb_blocksize = 1 << bl; + sbp->sb_agblocks = XFS_AG_MIN_BYTES / (1 << bl); + for (il = XFS_DINODE_MIN_LOG; il <= XFS_DINODE_MAX_LOG; il++) { + if ((1 << il) > (1 << bl) / XFS_MIN_INODE_PERBLOCK) + continue; + sbp->sb_inodelog = il; + sbp->sb_inopblog = bl - il; + sbp->sb_inodesize = 1 << il; + sbp->sb_inopblock = 1 << (bl - il); + for (dl = bl; dl <= XFS_MAX_BLOCKSIZE_LOG; dl++) { + sbp->sb_dirblklog = dl - bl; + for (dv = 1; dv <= 2; dv++) { + if (dv == 1 && dl != bl) + continue; + sbp->sb_versionnum = + XFS_SB_VERSION_4 | + (dv == 2 ? + XFS_SB_VERSION_DIRV2BIT : + 0); + libxfs_mount(&m, sbp, 0, 0, 0, 0); + i = max_trans_res(&m, &mul); + printf( + "#define\tMAXTRRES_B%d_I%d_D%d_V%d\t%lld\t" + "/* LOG_FACTOR %d */\n", + bl, il, dl, dv, + XFS_B_TO_FSB(&m, i), mul); + libxfs_umount(&m); + } + } + } + } + return 0; +} diff --git a/mkfs/proto.c b/mkfs/proto.c new file mode 100644 index 000000000..8570d140d --- /dev/null +++ b/mkfs/proto.c @@ -0,0 +1,769 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include +#include +#include "proto.h" + +/* + * Prototypes for internal functions. + */ +extern long long cvtnum(int blocksize, char *s); +extern void parseproto(xfs_mount_t *mp, xfs_inode_t *pip, char **pp, + char *name); +static long getnum(char **pp); +static char *getstr(char **pp); +static void fail(char *msg, int i); +static void getres(xfs_trans_t *tp, uint blocks); +static void rsvfile(xfs_mount_t *mp, xfs_inode_t *ip, long long len); +static int newfile(xfs_trans_t *tp, xfs_inode_t *ip, xfs_bmap_free_t *flist, + xfs_fsblock_t *first, int dolocal, int logit, char *buf, int len); +static char *newregfile(char **pp, int *len); +static void rtinit(xfs_mount_t *mp); +static long filesize(int fd); + +/* + * Use this for block reservations needed for mkfs's conditions + * (basically no fragmentation). + */ +#define MKFS_BLOCKRES_INODE \ + ((uint)(XFS_IALLOC_BLOCKS(mp) + (XFS_IN_MAXLEVELS(mp) - 1))) +#define MKFS_BLOCKRES(rb) \ + ((uint)(MKFS_BLOCKRES_INODE + XFS_DA_NODE_MAXDEPTH + \ + (XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 1) + (rb))) + + +char * +setup_proto( + char *fname) +{ + char *buf; + static char dflt[] = "d--755 0 0 $"; + int fd; + long size; + + if (!fname) + return dflt; + if ((fd = open(fname, O_RDONLY)) < 0 || (size = filesize(fd)) < 0) { + fprintf(stderr, "%s: failed to open %s: %s\n", + progname, fname, strerror(errno)); + exit(1); + } + buf = malloc(size + 1); + if (read(fd, buf, size) < size) { + fprintf(stderr, "%s: read failed on %s: %s\n", + progname, fname, strerror(errno)); + exit(1); + } + if (buf[size - 1] != '\n') { + fprintf(stderr, "%s: proto file %s premature EOF\n", + progname, fname); + exit(1); + } + buf[size] = '\0'; + /* + * Skip past the stuff there for compatibility, a string and 2 numbers. + */ + (void)getstr(&buf); /* boot image name */ + (void)getnum(&buf); /* block count */ + (void)getnum(&buf); /* inode count */ + return buf; +} + +static long +getnum( + char **pp) +{ + char *s; + + s = getstr(pp); + return atol(s); +} + +static void +fail( + char *msg, + int i) +{ + fprintf(stderr, "%s: %s %d\n", progname, msg, i); + ASSERT(0); + exit(1); +} + +static void +getres( + xfs_trans_t *tp, + uint blocks) +{ + int i; + xfs_mount_t *mp; + uint r; + + mp = tp->t_mountp; + for (i = 0, r = MKFS_BLOCKRES(blocks); r >= blocks; r--) { + i = libxfs_trans_reserve(tp, r, 0, 0, 0, 0); + if (i == 0) + return; + } + res_failed(i); + /* NOTREACHED */ +} + +static char * +getstr( + char **pp) +{ + int c; + char *p; + char *rval; + + p = *pp; + while (c = *p) { + switch (c) { + case ' ': + case '\t': + case '\n': + p++; + continue; + case ':': + p++; + while (*p++ != '\n') + ; + continue; + default: + rval = p; + while (c != ' ' && c != '\t' && c != '\n' && c != '\0') + c = *++p; + *p++ = '\0'; + *pp = p; + return rval; + } + } + if (!c) { + fprintf(stderr, "%s: premature EOF in prototype file\n", + progname); + exit(1); + } + return NULL; +} + +static void +rsvfile( + xfs_mount_t *mp, + xfs_inode_t *ip, + long long llen) +{ + int error; + xfs_trans_t *tp; + + error = libxfs_alloc_file_space(ip, 0, llen, 1, 0); + + if (error) { + fail("error reserving space for a file", error); + exit(1); + } + + /* + * update the inode timestamp, mode, and prealloc flag bits + */ + tp = libxfs_trans_alloc(mp, 0); + + libxfs_trans_ijoin(tp, ip, 0); + libxfs_trans_ihold(tp, ip); + + ip->i_d.di_mode &= ~ISUID; + + /* + * Note that we don't have to worry about mandatory + * file locking being disabled here because we only + * clear the ISGID bit if the Group execute bit is + * on, but if it was on then mandatory locking wouldn't + * have been enabled. + */ + if (ip->i_d.di_mode & (IEXEC >> 3)) + ip->i_d.di_mode &= ~ISGID; + + libxfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + + ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; + + libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + libxfs_trans_commit(tp, 0, NULL); +} + +static int +newfile( + xfs_trans_t *tp, + xfs_inode_t *ip, + xfs_bmap_free_t *flist, + xfs_fsblock_t *first, + int dolocal, + int logit, + char *buf, + int len) +{ + xfs_buf_t *bp; + xfs_daddr_t d; + int error; + int flags; + xfs_bmbt_irec_t map; + xfs_mount_t *mp; + xfs_extlen_t nb; + int nmap; + + flags = 0; + mp = ip->i_mount; + if (dolocal && len <= XFS_IFORK_DSIZE(ip)) { + libxfs_idata_realloc(ip, len, XFS_DATA_FORK); + if (buf) + bcopy(buf, ip->i_df.if_u1.if_data, len); + ip->i_d.di_size = len; + ip->i_df.if_flags &= ~XFS_IFEXTENTS; + ip->i_df.if_flags |= XFS_IFINLINE; + ip->i_d.di_format = XFS_DINODE_FMT_LOCAL; + flags = XFS_ILOG_DDATA; + } else if (len > 0) { + nb = XFS_B_TO_FSB(mp, len); + nmap = 1; + error = libxfs_bmapi(tp, ip, 0, nb, XFS_BMAPI_WRITE, first, nb, + &map, &nmap, flist); + if (error) { + fail("error allocating space for a file", error); + } + if (nmap != 1) { + fprintf(stderr, "%s: cannot allocate space for file\n", + progname); + exit(1); + } + d = XFS_FSB_TO_DADDR(mp, map.br_startblock); + bp = libxfs_trans_get_buf(logit ? tp : 0, mp->m_dev, d, + nb << mp->m_blkbb_log, 0); + bcopy(buf, XFS_BUF_PTR(bp), len); + if (len < XFS_BUF_COUNT(bp)) + bzero(XFS_BUF_PTR(bp) + len, XFS_BUF_COUNT(bp) - len); + if (logit) + libxfs_trans_log_buf(tp, bp, 0, XFS_BUF_COUNT(bp) - 1); + else + libxfs_writebuf(bp, 1); + } + ip->i_d.di_size = len; + return flags; +} + +static char * +newregfile( + char **pp, + int *len) +{ + char *buf; + int fd; + char *fname; + long size; + + fname = getstr(pp); + if ((fd = open(fname, O_RDONLY)) < 0 || (size = filesize(fd)) < 0) { + fprintf(stderr, "%s: cannot open %s: %s\n", + progname, fname, strerror(errno)); + exit(1); + } + if (*len = (int)size) { + buf = malloc(size); + if (read(fd, buf, size) < size) { + fprintf(stderr, "%s: read failed on %s: %s\n", + progname, fname, strerror(errno)); + exit(1); + } + } else + buf = 0; + close(fd); + return buf; +} + +static void +newdirent( + xfs_mount_t *mp, + xfs_trans_t *tp, + xfs_inode_t *pip, + char *name, + int namelen, + xfs_ino_t inum, + xfs_fsblock_t *first, + xfs_bmap_free_t *flist, + xfs_extlen_t total) +{ + int error; + + if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb)) + error = libxfs_dir2_createname(tp, pip, name, namelen, + inum, first, flist, total); + else + error = libxfs_dir_createname(tp, pip, name, namelen, + inum, first, flist, total); + if (error) + fail("directory createname error", error); +} + +static void +newdirectory( + xfs_mount_t *mp, + xfs_trans_t *tp, + xfs_inode_t *dp, + xfs_inode_t *pdp) +{ + int error; + + if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb)) + error = libxfs_dir2_init(tp, dp, pdp); + else + error = libxfs_dir_init(tp, dp, pdp); + if (error) + fail("directory create error", error); +} + +void +parseproto( + xfs_mount_t *mp, + xfs_inode_t *pip, + char **pp, + char *name) +{ +#define IF_REGULAR 0 +#define IF_RESERVED 1 +#define IF_BLOCK 2 +#define IF_CHAR 3 +#define IF_DIRECTORY 4 +#define IF_SYMLINK 5 +#define IF_FIFO 6 + + char *buf; + int committed; + int error; + xfs_fsblock_t first; + int flags; + xfs_bmap_free_t flist; + int fmt; + int i; + xfs_inode_t *ip; + int len; + long long llen; + int majdev; + int mindev; + int mode; + char *mstr; + xfs_trans_t *tp; + int val; + int isroot = 0; + cred_t creds; + char *value; + + bzero(&creds, sizeof(creds)); + mstr = getstr(pp); + switch (mstr[0]) { + case '-': + fmt = IF_REGULAR; + break; + case 'r': + fmt = IF_RESERVED; + break; + case 'b': + fmt = IF_BLOCK; + break; + case 'c': + fmt = IF_CHAR; + break; + case 'd': + fmt = IF_DIRECTORY; + break; + case 'l': + fmt = IF_SYMLINK; + break; + case 'p': + fmt = IF_FIFO; + break; + default: + fprintf(stderr, "%s: bad format string %s\n", progname, mstr); + exit(1); + } + mode = 0; + switch (mstr[1]) { + case '-': + break; + case 'u': + mode |= ISUID; + break; + default: + fprintf(stderr, "%s: bad format string %s\n", progname, mstr); + exit(1); + } + switch (mstr[2]) { + case '-': + break; + case 'g': + mode |= ISGID; + break; + default: + fprintf(stderr, "%s: bad format string %s\n", progname, mstr); + exit(1); + } + val = 0; + for (i = 3; i < 6; i++) { + if (mstr[i] < '0' || mstr[i] > '7') { + fprintf(stderr, "%s: bad format string %s\n", + progname, mstr); + exit(1); + } + val = val * 8 + mstr[i] - '0'; + } + mode |= val; + creds.cr_uid = (int)getnum(pp); + creds.cr_gid = (int)getnum(pp); + tp = libxfs_trans_alloc(mp, 0); + flags = XFS_ILOG_CORE; + XFS_BMAP_INIT(&flist, &first); + switch (fmt) { + case IF_REGULAR: + buf = newregfile(pp, &len); + getres(tp, XFS_B_TO_FSB(mp, len)); + error = libxfs_inode_alloc(&tp, pip, mode|IFREG, 1, + mp->m_dev, &creds, &ip); + if (error) + fail("Inode allocation failed", error); + flags |= newfile(tp, ip, &flist, &first, 0, 0, buf, len); + if (buf) + free(buf); + libxfs_trans_ijoin(tp, pip, 0); + i = strlen(name); + newdirent(mp, tp, pip, name, i, ip->i_ino, &first, &flist, 1); + libxfs_trans_ihold(tp, pip); + break; + + case IF_RESERVED: /* pre-allocated space only */ + value = getstr(pp); + llen = cvtnum(mp->m_sb.sb_blocksize, value); + getres(tp, XFS_B_TO_FSB(mp, llen)); + + error = libxfs_inode_alloc(&tp, pip, mode|IFREG, 1, + mp->m_dev, &creds, &ip); + if (error) + fail("Inode pre-allocation failed", error); + + libxfs_trans_ijoin(tp, pip, 0); + + i = strlen(name); + newdirent(mp, tp, pip, name, i, ip->i_ino, &first, &flist, 1); + libxfs_trans_ihold(tp, pip); + libxfs_trans_log_inode(tp, ip, flags); + + error = libxfs_bmap_finish(&tp, &flist, first, &committed); + if (error) + fail("Pre-allocated file creation failed", error); + libxfs_trans_commit(tp, 0, NULL); + rsvfile(mp, ip, llen); + return; + + case IF_BLOCK: + getres(tp, 0); + majdev = (int)getnum(pp); + mindev = (int)getnum(pp); + error = libxfs_inode_alloc(&tp, pip, mode|IFBLK, 1, + makedev(majdev, mindev), &creds, &ip); + if (error) { + fail("Inode allocation failed", error); + } + libxfs_trans_ijoin(tp, pip, 0); + i = strlen(name); + newdirent(mp, tp, pip, name, i, ip->i_ino, &first, &flist, 1); + libxfs_trans_ihold(tp, pip); + flags |= XFS_ILOG_DEV; + break; + + case IF_CHAR: + getres(tp, 0); + majdev = (int)getnum(pp); + mindev = (int)getnum(pp); + error = libxfs_inode_alloc(&tp, pip, mode|IFCHR, 1, + makedev(majdev, mindev), &creds, &ip); + if (error) + fail("Inode allocation failed", error); + libxfs_trans_ijoin(tp, pip, 0); + i = strlen(name); + newdirent(mp, tp, pip, name, i, ip->i_ino, &first, &flist, 1); + libxfs_trans_ihold(tp, pip); + flags |= XFS_ILOG_DEV; + break; + + case IF_FIFO: + getres(tp, 0); + error = libxfs_inode_alloc(&tp, pip, mode|IFIFO, 1, + mp->m_dev, &creds, &ip); + if (error) + fail("Inode allocation failed", error); + libxfs_trans_ijoin(tp, pip, 0); + i = strlen(name); + newdirent(mp, tp, pip, name, i, ip->i_ino, &first, &flist, 1); + libxfs_trans_ihold(tp, pip); + break; + case IF_SYMLINK: + buf = getstr(pp); + len = (int)strlen(buf); + getres(tp, XFS_B_TO_FSB(mp, len)); + error = libxfs_inode_alloc(&tp, pip, mode|IFLNK, 1, + mp->m_dev, &creds, &ip); + if (error) + fail("Inode allocation failed", error); + flags |= newfile(tp, ip, &flist, &first, 1, 1, buf, len); + libxfs_trans_ijoin(tp, pip, 0); + i = strlen(name); + newdirent(mp, tp, pip, name, i, ip->i_ino, &first, &flist, 1); + libxfs_trans_ihold(tp, pip); + break; + case IF_DIRECTORY: + getres(tp, 0); + error = libxfs_inode_alloc(&tp, pip, mode|IFDIR, 1, + mp->m_dev, &creds, &ip); + if (error) + fail("Inode allocation failed", error); + ip->i_d.di_nlink++; /* account for . */ + if (!pip) { + pip = ip; + mp->m_sb.sb_rootino = ip->i_ino; + libxfs_mod_sb(tp, XFS_SB_ROOTINO); + mp->m_rootip = ip; + isroot = 1; + } else { + libxfs_trans_ijoin(tp, pip, 0); + i = strlen(name); + newdirent(mp, tp, pip, name, i, ip->i_ino, + &first, &flist, 1); + pip->i_d.di_nlink++; + libxfs_trans_ihold(tp, pip); + libxfs_trans_log_inode(tp, pip, XFS_ILOG_CORE); + } + newdirectory(mp, tp, ip, pip); + libxfs_trans_log_inode(tp, ip, flags); + error = libxfs_bmap_finish(&tp, &flist, first, &committed); + if (error) + fail("Directory creation failed", error); + libxfs_trans_ihold(tp, ip); + libxfs_trans_commit(tp, 0, NULL); + /* + * RT initialization. Do this here to ensure that + * the RT inodes get placed after the root inode. + */ + if (isroot) + rtinit(mp); + tp = NULL; + for (;;) { + name = getstr(pp); + if (strcmp(name, "$") == 0) + break; + parseproto(mp, ip, pp, name); + } + libxfs_iput(ip, 0); + return; + } + libxfs_trans_log_inode(tp, ip, flags); + error = libxfs_bmap_finish(&tp, &flist, first, &committed); + if (error) { + fail("Error encountered creating file from prototype", error); + } + libxfs_trans_commit(tp, 0, NULL); +} + +/* + * Allocate the realtime bitmap and summary inodes, and fill in data if any. + */ +static void +rtinit( + xfs_mount_t *mp) +{ + xfs_dfiloff_t bno; + int committed; + xfs_dfiloff_t ebno; + xfs_bmbt_irec_t *ep; + int error; + xfs_fsblock_t first; + xfs_bmap_free_t flist; + int i; + xfs_bmbt_irec_t map[XFS_BMAP_MAX_NMAP]; + xfs_extlen_t nsumblocks; + int nmap; + xfs_inode_t *rbmip; + xfs_inode_t *rsumip; + xfs_trans_t *tp; + cred_t creds; + + /* + * First, allocate the inodes. + */ + tp = libxfs_trans_alloc(mp, 0); + if (i = libxfs_trans_reserve(tp, MKFS_BLOCKRES_INODE, 0, 0, 0, 0)) + res_failed(i); + bzero(&creds, sizeof(creds)); + error = libxfs_inode_alloc(&tp, mp->m_rootip, IFREG, 1, + mp->m_dev, &creds, &rbmip); + if (error) { + fail("Realtime bitmap inode allocation failed", error); + } + /* + * Do our thing with rbmip before allocating rsumip, + * because the next call to ialloc() may + * commit the transaction in which rbmip was allocated. + */ + mp->m_sb.sb_rbmino = rbmip->i_ino; + rbmip->i_d.di_size = mp->m_sb.sb_rbmblocks * mp->m_sb.sb_blocksize; + rbmip->i_d.di_flags = XFS_DIFLAG_NEWRTBM; + *(__uint64_t *)&rbmip->i_d.di_atime = 0; + libxfs_trans_log_inode(tp, rbmip, XFS_ILOG_CORE); + libxfs_mod_sb(tp, XFS_SB_RBMINO); + libxfs_trans_ihold(tp, rbmip); + mp->m_rbmip = rbmip; + error = libxfs_inode_alloc(&tp, mp->m_rootip, IFREG, 1, + mp->m_dev, &creds, &rsumip); + if (error) { + fail("Realtime bitmap inode allocation failed", error); + } + mp->m_sb.sb_rsumino = rsumip->i_ino; + rsumip->i_d.di_size = mp->m_rsumsize; + libxfs_trans_log_inode(tp, rsumip, XFS_ILOG_CORE); + libxfs_mod_sb(tp, XFS_SB_RSUMINO); + libxfs_trans_ihold(tp, rsumip); + libxfs_trans_commit(tp, 0, NULL); + mp->m_rsumip = rsumip; + /* + * Next, give the bitmap file some zero-filled blocks. + */ + tp = libxfs_trans_alloc(mp, 0); + if (i = libxfs_trans_reserve(tp, mp->m_sb.sb_rbmblocks + + (XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 1), 0, 0, 0, 0)) + res_failed(i); + libxfs_trans_ijoin(tp, rbmip, 0); + bno = 0; + XFS_BMAP_INIT(&flist, &first); + while (bno < mp->m_sb.sb_rbmblocks) { + nmap = XFS_BMAP_MAX_NMAP; + error = libxfs_bmapi(tp, rbmip, bno, + (xfs_extlen_t)(mp->m_sb.sb_rbmblocks - bno), + XFS_BMAPI_WRITE, &first, mp->m_sb.sb_rbmblocks, + map, &nmap, &flist); + if (error) { + fail("Allocation of the realtime bitmap failed", error); + } + for (i = 0, ep = map; i < nmap; i++, ep++) { + libxfs_device_zero(mp->m_dev, + XFS_FSB_TO_DADDR(mp, ep->br_startblock), + XFS_FSB_TO_BB(mp, ep->br_blockcount)); + bno += ep->br_blockcount; + } + } + + error = libxfs_bmap_finish(&tp, &flist, first, &committed); + if (error) { + fail("Allocation of the realtime bitmap failed", error); + } + libxfs_trans_commit(tp, 0, NULL); + /* + * Give the summary file some zero-filled blocks. + */ + tp = libxfs_trans_alloc(mp, 0); + nsumblocks = mp->m_rsumsize >> mp->m_sb.sb_blocklog; + if (i = libxfs_trans_reserve(tp, + nsumblocks + (XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 1), + 0, 0, 0, 0)) + res_failed(i); + libxfs_trans_ijoin(tp, rsumip, 0); + bno = 0; + XFS_BMAP_INIT(&flist, &first); + while (bno < nsumblocks) { + nmap = XFS_BMAP_MAX_NMAP; + error = libxfs_bmapi(tp, rsumip, bno, + (xfs_extlen_t)(nsumblocks - bno), + XFS_BMAPI_WRITE, &first, nsumblocks, + map, &nmap, &flist); + if (error) { + fail("Allocation of the realtime bitmap failed", error); + } + for (i = 0, ep = map; i < nmap; i++, ep++) { + libxfs_device_zero(mp->m_dev, + XFS_FSB_TO_DADDR(mp, ep->br_startblock), + XFS_FSB_TO_BB(mp, ep->br_blockcount)); + bno += ep->br_blockcount; + } + } + error = libxfs_bmap_finish(&tp, &flist, first, &committed); + if (error) { + fail("Allocation of the realtime bitmap failed", error); + } + libxfs_trans_commit(tp, 0, NULL); + /* + * Free the whole area using transactions. + * Do one transaction per bitmap block. + */ + for (bno = 0; bno < mp->m_sb.sb_rextents; bno = ebno) { + tp = libxfs_trans_alloc(mp, 0); + if (i = libxfs_trans_reserve(tp, 0, 0, 0, 0, 0)) + res_failed(i); + XFS_BMAP_INIT(&flist, &first); + ebno = XFS_RTMIN(mp->m_sb.sb_rextents, + bno + NBBY * mp->m_sb.sb_blocksize); + error = libxfs_rtfree_extent(tp, bno, (xfs_extlen_t)(ebno-bno)); + if (error) { + fail("Error initializing the realtime bitmap", error); + } + error = libxfs_bmap_finish(&tp, &flist, first, &committed); + if (error) { + fail("Error initializing the realtime bitmap", error); + } + libxfs_trans_commit(tp, 0, NULL); + } +} + +void +res_failed( + int err) +{ + fprintf(stderr, "%s: ran out of disk space!\n", progname); + ASSERT(0); + exit(1); +} + +static long +filesize( + int fd) +{ + struct stat64 stb; + + if (fstat64(fd, &stb) < 0) + return -1; + return (long)stb.st_size; +} diff --git a/mkfs/proto.h b/mkfs/proto.h new file mode 100644 index 000000000..e588e4805 --- /dev/null +++ b/mkfs/proto.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +char *setup_proto(char *fname); +void parseproto(xfs_mount_t *mp, xfs_inode_t *pip, char **pp, char *name); +void res_failed(int err); diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c new file mode 100644 index 000000000..13132b979 --- /dev/null +++ b/mkfs/xfs_mkfs.c @@ -0,0 +1,1944 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include +#include +#include "xfs_mkfs.h" +#include "proto.h" +#include "volume.h" +#include "maxtrres.h" +#include "mountinfo.h" + +#if HAVE_LIBLVM + #include "lvm_user.h" + + char *cmd; /* Not used. liblvm is broken */ + int opt_d; /* Same thing */ +#endif + +/* + * Prototypes for internal functions. + */ +static void conflict(char opt, char *tab[], int oldidx, int newidx); +static void illegal(char *value, char *opt); +static void reqval(char opt, char *tab[], int idx); +static void respec(char opt, char *tab[], int idx); +static void unknown(char opt, char *s); +static int ispow2(unsigned int i); +static int max_trans_res(xfs_mount_t *mp); + +/* + * option tables for getsubopt calls + */ +char *bopts[] = { +#define B_LOG 0 + "log", +#define B_SIZE 1 + "size", + NULL +}; + +char *dopts[] = { +#define D_AGCOUNT 0 + "agcount", +#define D_FILE 1 + "file", +#define D_NAME 2 + "name", +#define D_SIZE 3 + "size", +#define D_SUNIT 4 + "sunit", +#define D_SWIDTH 5 + "swidth", +#define D_UNWRITTEN 6 + "unwritten", + NULL +}; + +char *iopts[] = { +#define I_ALIGN 0 + "align", +#define I_LOG 1 + "log", +#define I_MAXPCT 2 + "maxpct", +#define I_PERBLOCK 3 + "perblock", +#define I_SIZE 4 + "size", + NULL +}; + +char *lopts[] = { +#define L_AGNUM 0 + "agnum", +#define L_INTERNAL 1 + "internal", +#define L_SIZE 2 + "size", +#define L_DEV 3 + "logdev", +#ifdef MKFS_SIMULATION +#define L_FILE 4 + "file", +#define L_NAME 5 + "name", +#endif + NULL +}; + +char *nopts[] = { +#define N_LOG 0 + "log", +#define N_SIZE 1 + "size", +#define N_VERSION 2 + "version", + NULL, +}; + +char *ropts[] = { +#define R_EXTSIZE 0 + "extsize", +#define R_SIZE 1 + "size", +#define R_DEV 2 + "rtdev", +#ifdef MKFS_SIMULATION +#define R_FILE 3 + "file", +#define R_NAME 4 + "name", +#endif + NULL +}; + +/* + * max transaction reservation values + * version 1: + * first dimension log(blocksize) (base XFS_MIN_BLOCKSIZE_LOG) + * second dimension log(inodesize) (base XFS_DINODE_MIN_LOG) + * version 2: + * first dimension log(blocksize) (base XFS_MIN_BLOCKSIZE_LOG) + * second dimension log(inodesize) (base XFS_DINODE_MIN_LOG) + * third dimension log(dirblocksize) (base XFS_MIN_BLOCKSIZE_LOG) + */ +#define DFL_B (XFS_MAX_BLOCKSIZE_LOG + 1 - XFS_MIN_BLOCKSIZE_LOG) +#define DFL_I (XFS_DINODE_MAX_LOG + 1 - XFS_DINODE_MIN_LOG) +#define DFL_D (XFS_MAX_BLOCKSIZE_LOG + 1 - XFS_MIN_BLOCKSIZE_LOG) + +static const int max_trres_v1[DFL_B][DFL_I] = { + { MAXTRRES_B9_I8_D9_V1, 0, 0, 0 }, + { MAXTRRES_B10_I8_D10_V1, MAXTRRES_B10_I9_D10_V1, 0, 0 }, + { MAXTRRES_B11_I8_D11_V1, MAXTRRES_B11_I9_D11_V1, + MAXTRRES_B11_I10_D11_V1, 0 }, + { MAXTRRES_B12_I8_D12_V1, MAXTRRES_B12_I9_D12_V1, + MAXTRRES_B12_I10_D12_V1, MAXTRRES_B12_I11_D12_V1 }, + { MAXTRRES_B13_I8_D13_V1, MAXTRRES_B13_I9_D13_V1, + MAXTRRES_B13_I10_D13_V1, MAXTRRES_B13_I11_D13_V1 }, + { MAXTRRES_B14_I8_D14_V1, MAXTRRES_B14_I9_D14_V1, + MAXTRRES_B14_I10_D14_V1, MAXTRRES_B14_I11_D14_V1 }, + { MAXTRRES_B15_I8_D15_V1, MAXTRRES_B15_I9_D15_V1, + MAXTRRES_B15_I10_D15_V1, MAXTRRES_B15_I11_D15_V1 }, + { MAXTRRES_B16_I8_D16_V1, MAXTRRES_B16_I9_D16_V1, + MAXTRRES_B16_I10_D16_V1, MAXTRRES_B16_I11_D16_V1 }, +}; + +static const int max_trres_v2[DFL_B][DFL_I][DFL_D] = { + { { MAXTRRES_B9_I8_D9_V2, MAXTRRES_B9_I8_D10_V2, MAXTRRES_B9_I8_D11_V2, + MAXTRRES_B9_I8_D12_V2, MAXTRRES_B9_I8_D13_V2, MAXTRRES_B9_I8_D14_V2, + MAXTRRES_B9_I8_D15_V2, MAXTRRES_B9_I8_D16_V2 }, + { 0, 0, 0, 0, 0, 0, 0, 0 }, + { 0, 0, 0, 0, 0, 0, 0, 0 }, + { 0, 0, 0, 0, 0, 0, 0, 0 } }, + { { 0, MAXTRRES_B10_I8_D10_V2, MAXTRRES_B10_I8_D11_V2, + MAXTRRES_B10_I8_D12_V2, MAXTRRES_B10_I8_D13_V2, + MAXTRRES_B10_I8_D14_V2, MAXTRRES_B10_I8_D15_V2, + MAXTRRES_B10_I8_D16_V2 }, + { 0, MAXTRRES_B10_I9_D10_V2, MAXTRRES_B10_I9_D11_V2, + MAXTRRES_B10_I9_D12_V2, MAXTRRES_B10_I9_D13_V2, + MAXTRRES_B10_I9_D14_V2, MAXTRRES_B10_I9_D15_V2, + MAXTRRES_B10_I9_D16_V2 }, + { 0, 0, 0, 0, 0, 0, 0, 0 }, + { 0, 0, 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, MAXTRRES_B11_I8_D11_V2, MAXTRRES_B11_I8_D12_V2, + MAXTRRES_B11_I8_D13_V2, MAXTRRES_B11_I8_D14_V2, + MAXTRRES_B11_I8_D15_V2, MAXTRRES_B11_I8_D16_V2 }, + { 0, 0, MAXTRRES_B11_I9_D11_V2, MAXTRRES_B11_I9_D12_V2, + MAXTRRES_B11_I9_D13_V2, MAXTRRES_B11_I9_D14_V2, + MAXTRRES_B11_I9_D15_V2, MAXTRRES_B11_I9_D16_V2 }, + { 0, 0, MAXTRRES_B11_I10_D11_V2, MAXTRRES_B11_I10_D12_V2, + MAXTRRES_B11_I10_D13_V2, MAXTRRES_B11_I10_D14_V2, + MAXTRRES_B11_I10_D15_V2, MAXTRRES_B11_I10_D16_V2 }, + { 0, 0, 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, MAXTRRES_B12_I8_D12_V2, MAXTRRES_B12_I8_D13_V2, + MAXTRRES_B12_I8_D14_V2, MAXTRRES_B12_I8_D15_V2, + MAXTRRES_B12_I8_D16_V2 }, + { 0, 0, 0, MAXTRRES_B12_I9_D12_V2, MAXTRRES_B12_I9_D13_V2, + MAXTRRES_B12_I9_D14_V2, MAXTRRES_B12_I9_D15_V2, + MAXTRRES_B12_I9_D16_V2 }, + { 0, 0, 0, MAXTRRES_B12_I10_D12_V2, MAXTRRES_B12_I10_D13_V2, + MAXTRRES_B12_I10_D14_V2, MAXTRRES_B12_I10_D15_V2, + MAXTRRES_B12_I10_D16_V2 }, + { 0, 0, 0, MAXTRRES_B12_I11_D12_V2, MAXTRRES_B12_I11_D13_V2, + MAXTRRES_B12_I11_D14_V2, MAXTRRES_B12_I11_D15_V2, + MAXTRRES_B12_I11_D16_V2 } }, + { { 0, 0, 0, 0, MAXTRRES_B13_I8_D13_V2, MAXTRRES_B13_I8_D14_V2, + MAXTRRES_B13_I8_D15_V2, MAXTRRES_B13_I8_D16_V2 }, + { 0, 0, 0, 0, MAXTRRES_B13_I9_D13_V2, MAXTRRES_B13_I9_D14_V2, + MAXTRRES_B13_I9_D15_V2, MAXTRRES_B13_I9_D16_V2 }, + { 0, 0, 0, 0, MAXTRRES_B13_I10_D13_V2, MAXTRRES_B13_I10_D14_V2, + MAXTRRES_B13_I10_D15_V2, MAXTRRES_B13_I10_D16_V2 }, + { 0, 0, 0, 0, MAXTRRES_B13_I11_D13_V2, MAXTRRES_B13_I11_D14_V2, + MAXTRRES_B13_I11_D15_V2, MAXTRRES_B13_I11_D16_V2 } }, + { { 0, 0, 0, 0, 0, MAXTRRES_B14_I8_D14_V2, MAXTRRES_B14_I8_D15_V2, + MAXTRRES_B14_I8_D16_V2 }, + { 0, 0, 0, 0, 0, MAXTRRES_B14_I9_D14_V2, MAXTRRES_B14_I9_D15_V2, + MAXTRRES_B14_I9_D16_V2 }, + { 0, 0, 0, 0, 0, MAXTRRES_B14_I10_D14_V2, MAXTRRES_B14_I10_D15_V2, + MAXTRRES_B14_I10_D16_V2 }, + { 0, 0, 0, 0, 0, MAXTRRES_B14_I11_D14_V2, MAXTRRES_B14_I11_D15_V2, + MAXTRRES_B14_I11_D16_V2 } }, + { { 0, 0, 0, 0, 0, 0, MAXTRRES_B15_I8_D15_V2, MAXTRRES_B15_I8_D16_V2 }, + { 0, 0, 0, 0, 0, 0, MAXTRRES_B15_I9_D15_V2, MAXTRRES_B15_I9_D16_V2 }, + { 0, 0, 0, 0, 0, 0, MAXTRRES_B15_I10_D15_V2, + MAXTRRES_B15_I10_D16_V2 }, + { 0, 0, 0, 0, 0, 0, MAXTRRES_B15_I11_D15_V2, + MAXTRRES_B15_I11_D16_V2 } }, + { { 0, 0, 0, 0, 0, 0, 0, MAXTRRES_B16_I8_D16_V2 }, + { 0, 0, 0, 0, 0, 0, 0, MAXTRRES_B16_I9_D16_V2 }, + { 0, 0, 0, 0, 0, 0, 0, MAXTRRES_B16_I10_D16_V2 }, + { 0, 0, 0, 0, 0, 0, 0, MAXTRRES_B16_I11_D16_V2, } }, +}; + +/* + * Use this before we have a superblock, else would use XFS_DTOBT + */ +#define DTOBT(d) ((xfs_drfsbno_t)((d) >> (blocklog - BBSHIFT))) + +/* + * Use this for block reservations needed for mkfs's conditions + * (basically no fragmentation). + */ +#define MKFS_BLOCKRES_INODE \ + ((uint)(XFS_IALLOC_BLOCKS(mp) + (XFS_IN_MAXLEVELS(mp) - 1))) +#define MKFS_BLOCKRES(rb) \ + ((uint)(MKFS_BLOCKRES_INODE + XFS_DA_NODE_MAXDEPTH + \ + (XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 1) + (rb))) + +static void +get_subvol_stripe_wrapper(char *dfile, int type, int *sunit, int *swidth) +{ + struct stat64 sb; +#if HAVE_LIBLVM + lv_t *lv; + char *vgname; +#endif + + if (!dfile) + return; + + if (stat64 (dfile, &sb)) { + fprintf (stderr, "Could not stat %s\n", dfile); + usage(); + } + +#if HAVE_LIBLVM + /* If this is not an LVM volume, just bail out */ + if (sb.st_rdev >> 8 != LVM_BLK_MAJOR) + return; + + /* Find volume group */ + if (! (vgname = vg_name_of_lv (dfile))) { + fprintf (stderr, "Can't find volume group for %s\n", dfile); + usage(); + } + + /* Logical volume */ + if (! lvm_tab_lv_check_exist (dfile)) { + fprintf (stderr, "Logical volume %s doesn't exist!\n", dfile); + usage(); + } + + /* Get status */ + if (lv_status_byname (vgname, dfile, &lv) < 0 || lv == NULL) { + fprintf (stderr, "Could not get status info from %s\n", dfile); + usage(); + } + + /* Check that data is consistent */ + if (lv_check_consistency (lv) < 0) { + fprintf (stderr, "Logical volume %s is inconsistent\n", dfile); + usage(); + } + + /* Update sizes */ + *sunit = lv->lv_stripesize; + *swidth = lv->lv_stripes * lv->lv_stripesize; + +#endif /* HAVE_LIBLVM */ +} + + +static int +get_default_blocksize(void) +{ + size_t pagesize = getpagesize(); + int i; + + /* default is between 4K and 16K */ + for (i = 12; i <= 16; i++) + if ((1 << i) == pagesize) + return pagesize; + return (1 << XFS_DFL_BLOCKSIZE_LOG); +} + + +int +main(int argc, char **argv) +{ + __uint64_t agcount; + xfs_agf_t *agf; + xfs_agi_t *agi; + xfs_agnumber_t agno; + __uint64_t agsize; + xfs_alloc_rec_t *arec; + xfs_btree_sblock_t *block; + int blflag; + int blocklog; + int blocksize; + int bsflag; + int bsize; + xfs_buf_t *buf; + int c; + int daflag; + xfs_drfsbno_t dblocks; + char *dfile; + int dirblocklog; + int dirblocksize; + int dirversion; + int do_overlap_checks; + char *dsize; + int dsunit; + int dswidth; + int extent_flagging; + int force_fs_overwrite; + int i; + int iaflag; + int ilflag; + int imaxpct; + int imflag; + int inodelog; + int inopblock; + int ipflag; + int isflag; + int isize; + int laflag; + int lalign; + int ldflag; + int liflag; + xfs_agnumber_t logagno; + xfs_drfsbno_t logblocks; + char *logfile; + int loginternal; + char *logsize; + xfs_dfsbno_t logstart; + int lsflag; + int min_logblocks; + mnt_check_state_t *mnt_check_state; + int mnt_partition_count; + xfs_mount_t *mp; + xfs_mount_t mbuf; + xfs_extlen_t nbmblocks; + int nlflag; + int nodsflag; + xfs_alloc_rec_t *nrec; + int nsflag; + int nvflag; + char *p; + char *protofile; + char *protostring; + int qflag; + xfs_drfsbno_t rtblocks; + xfs_extlen_t rtextblocks; + xfs_drtbno_t rtextents; + char *rtextsize; + char *rtfile; + char *rtsize; + xfs_sb_t *sbp; + int sectlog; + __uint64_t tmp_agsize; + uuid_t uuid; + int worst_freelist; + libxfs_init_t xi; + int xlv_dsunit; + int xlv_dswidth; + + progname = basename(argv[0]); + agcount = 8; + blflag = bsflag = 0; + blocksize = get_default_blocksize(); + blocklog = libxfs_highbit32(blocksize); + agsize = daflag = dblocks = 0; + ilflag = imflag = ipflag = isflag = 0; + liflag = laflag = lsflag = ldflag = 0; + loginternal = 1; + logagno = logblocks = rtblocks = 0; + nlflag = nsflag = nvflag = 0; + dirblocklog = dirblocksize = dirversion = 0; + qflag = 0; + imaxpct = inodelog = inopblock = isize = 0; + iaflag = XFS_IFLAG_ALIGN; + bzero(&xi, sizeof(xi)); + xi.notvolok = 1; + dfile = logfile = rtfile = NULL; + dsize = logsize = rtsize = rtextsize = protofile = NULL; + opterr = 0; + dsunit = dswidth = nodsflag = lalign = 0; + do_overlap_checks = 1; + extent_flagging = 0; + force_fs_overwrite = 0; + worst_freelist = 0; + + while ((c = getopt(argc, argv, "b:d:i:l:n:p:qr:CfV")) != EOF) { + switch (c) { + case 'C': + do_overlap_checks = 0; + break; + case 'f': + force_fs_overwrite = 1; + break; + case 'b': + p = optarg; + while (*p != '\0') { + char *value; + + switch (getsubopt(&p, (constpp)bopts, &value)) { + case B_LOG: + if (!value) + reqval('b', bopts, B_LOG); + if (blflag) + respec('b', bopts, B_LOG); + if (bsflag) + conflict('b', bopts, B_SIZE, + B_LOG); + blocklog = atoi(value); + if (blocklog <= 0) + illegal(value, "b log"); + blocksize = 1 << blocklog; + blflag = 1; + break; + case B_SIZE: + if (!value) + reqval('b', bopts, B_SIZE); + if (bsflag) + respec('b', bopts, B_SIZE); + if (blflag) + conflict('b', bopts, B_LOG, + B_SIZE); + blocksize = cvtnum(0, value); + if (blocksize <= 0 || + !ispow2(blocksize)) + illegal(value, "b size"); + blocklog = libxfs_highbit32(blocksize); + bsflag = 1; + break; + default: + unknown('b', value); + } + } + break; + case 'd': + p = optarg; + while (*p != '\0') { + char *value; + + switch (getsubopt(&p, (constpp)dopts, &value)) { + case D_AGCOUNT: + if (!value) + reqval('d', dopts, D_AGCOUNT); + if (daflag) + respec('d', dopts, D_AGCOUNT); + agcount = (__uint64_t)atoll(value); + if ((__int64_t)agcount <= 0) + illegal(value, "d agcount"); + daflag = 1; + break; + case D_FILE: + if (!value) + value = "1"; + xi.disfile = atoi(value); + if (xi.disfile < 0 || xi.disfile > 1) + illegal(value, "d file"); + if (xi.disfile) + xi.dcreat = 1; + break; + case D_NAME: + if (!value) + reqval('d', dopts, D_NAME); + if (xi.dname) + respec('d', dopts, D_NAME); + xi.dname = value; + break; + case D_SIZE: + if (!value) + reqval('d', dopts, D_SIZE); + if (dsize) + respec('d', dopts, D_SIZE); + dsize = value; + break; + case D_SUNIT: + if (!value) + reqval('d', dopts, D_SUNIT); + if (dsunit) + respec('d', dopts, D_SUNIT); + dsunit = cvtnum(0, value); + break; + case D_SWIDTH: + if (!value) + reqval('d', dopts, D_SWIDTH); + if (dswidth) + respec('d', dopts, D_SWIDTH); + dswidth = cvtnum(0, value); + break; + case D_UNWRITTEN: + if (!value) + reqval('d', dopts, D_UNWRITTEN); + i = atoi(value); + if (i < 0 || i > 1) + illegal(value, "d unwritten"); + extent_flagging = i; + break; + default: + unknown('d', value); + } + } + break; + case 'i': + p = optarg; + while (*p != '\0') { + char *value; + + switch (getsubopt(&p, (constpp)iopts, &value)) { + case I_ALIGN: + if (!value) + value = "1"; + iaflag = atoi(value); + if (iaflag < 0 || iaflag > 1) + illegal(value, "i align"); + break; + case I_LOG: + if (!value) + reqval('i', iopts, I_LOG); + if (ilflag) + respec('i', iopts, I_LOG); + if (ipflag) + conflict('i', iopts, I_PERBLOCK, + I_LOG); + if (isflag) + conflict('i', iopts, I_SIZE, + I_LOG); + inodelog = atoi(value); + if (inodelog <= 0) + illegal(value, "i log"); + isize = 1 << inodelog; + ilflag = 1; + break; + case I_MAXPCT: + if (!value) + reqval('i', iopts, I_MAXPCT); + if (imflag) + respec('i', iopts, I_MAXPCT); + imaxpct = atoi(value); + if (imaxpct < 0 || imaxpct > 100) + illegal(value, "i maxpct"); + imflag = 1; + break; + case I_PERBLOCK: + if (!value) + reqval('i', iopts, I_PERBLOCK); + if (ilflag) + conflict('i', iopts, I_LOG, + I_PERBLOCK); + if (ipflag) + respec('i', iopts, I_PERBLOCK); + if (isflag) + conflict('i', iopts, I_SIZE, + I_PERBLOCK); + inopblock = atoi(value); + if (inopblock < + XFS_MIN_INODE_PERBLOCK || + !ispow2(inopblock)) + illegal(value, "i perblock"); + ipflag = 1; + break; + case I_SIZE: + if (!value) + reqval('i', iopts, I_SIZE); + if (ilflag) + conflict('i', iopts, I_LOG, + I_SIZE); + if (ipflag) + conflict('i', iopts, I_PERBLOCK, + I_SIZE); + if (isflag) + respec('i', iopts, I_SIZE); + isize = cvtnum(0, value); + if (isize <= 0 || !ispow2(isize)) + illegal(value, "i size"); + inodelog = libxfs_highbit32(isize); + isflag = 1; + break; + default: + unknown('i', value); + } + } + break; + case 'l': + p = optarg; + while (*p != '\0') { + char *value; + + switch (getsubopt(&p, (constpp)lopts, &value)) { + case L_AGNUM: + if (laflag) + respec('l', lopts, L_AGNUM); + + if (ldflag) + conflict('l', lopts, L_AGNUM, L_DEV); + + logagno = atoi(value); + laflag = 1; + break; + case L_DEV: + if (!value) { + fprintf (stderr, "Must specify log device\n"); + usage(); + } + + if (laflag) + conflict('l', lopts, L_AGNUM, L_DEV); + + if (liflag) + conflict('l', lopts, L_INTERNAL, L_DEV); + + ldflag = 1; + loginternal = 0; + logfile = value; + xi.logname = value; + break; +#ifdef HAVE_VOLUME_MANAGER + case L_FILE: + if (!value) + value = "1"; + if (loginternal) + conflict('l', lopts, L_INTERNAL, + L_FILE); + xi.lisfile = atoi(value); + if (xi.lisfile < 0 || xi.lisfile > 1) + illegal(value, "l file"); + if (xi.lisfile) + xi.lcreat = 1; + break; +#endif + case L_INTERNAL: + if (!value) + value = "1"; + + if (ldflag) + conflict('l', lopts, L_INTERNAL, L_DEV); +#ifdef HAVE_VOLUME_MANAGER + if (xi.logname) + conflict('l', lopts, L_NAME, + L_INTERNAL); + if (xi.lisfile) + conflict('l', lopts, L_FILE, + L_INTERNAL); +#endif + if (liflag) + respec('l', lopts, L_INTERNAL); + loginternal = atoi(value); + if (loginternal < 0 || loginternal > 1) + illegal(value, "l internal"); + liflag = 1; + break; +#ifdef HAVE_VOLUME_MANAGER + case L_NAME: + if (!value) + reqval('l', lopts, L_NAME); + if (loginternal) + conflict('l', lopts, L_INTERNAL, + L_NAME); + if (xi.logname) + respec('l', lopts, L_NAME); + xi.logname = value; + break; +#endif + case L_SIZE: + if (!value) + reqval('l', lopts, L_SIZE); + if (logsize) + respec('l', lopts, L_SIZE); + logsize = value; + lsflag = 1; + break; + default: + unknown('l', value); + } + } + break; + case 'n': + p = optarg; + while (*p != '\0') { + char *value; + + switch (getsubopt(&p, (constpp)nopts, &value)) { + case N_LOG: + if (!value) + reqval('n', nopts, N_LOG); + if (nlflag) + respec('n', nopts, N_LOG); + if (nsflag) + conflict('n', nopts, N_SIZE, + N_LOG); + dirblocklog = atoi(value); + if (dirblocklog <= 0) + illegal(value, "n log"); + dirblocksize = 1 << dirblocklog; + nlflag = 1; + break; + case N_SIZE: + if (!value) + reqval('n', nopts, N_SIZE); + if (nsflag) + respec('n', nopts, N_SIZE); + if (nlflag) + conflict('n', nopts, N_LOG, + N_SIZE); + dirblocksize = cvtnum(0, value); + if (dirblocksize <= 0 || + !ispow2(dirblocksize)) + illegal(value, "n size"); + dirblocklog = + libxfs_highbit32(dirblocksize); + nsflag = 1; + break; + case N_VERSION: + if (!value) + reqval('n', nopts, N_VERSION); + if (nvflag) + respec('n', nopts, N_VERSION); + dirversion = atoi(value); + if (dirversion < 1 || dirversion > 2) + illegal(value, "n version"); + nvflag = 1; + break; + default: + unknown('n', value); + } + } + break; + case 'p': + if (protofile) + respec('p', 0, 0); + protofile = optarg; + break; + case 'q': + qflag = 1; + break; + case 'r': + p = optarg; + while (*p != '\0') { + char *value; + + switch (getsubopt(&p, (constpp)ropts, &value)) { + case R_EXTSIZE: + if (!value) + reqval('r', ropts, R_EXTSIZE); + if (rtextsize) + respec('r', ropts, R_EXTSIZE); + rtextsize = value; + break; + case R_DEV: + if (!value) + reqval('r', ropts, R_DEV); + xi.rtname = value; + break; +#ifdef HAVE_VOLUME_MANAGER + case R_FILE: + if (!value) + value = "1"; + xi.risfile = atoi(value); + if (xi.risfile < 0 || xi.risfile > 1) + illegal(value, "r file"); + if (xi.risfile) + xi.rcreat = 1; + break; + case R_NAME: + if (!value) + reqval('r', ropts, R_NAME); + if (xi.rtname) + respec('r', ropts, R_NAME); + xi.rtname = value; + break; +#endif + case R_SIZE: + if (!value) + reqval('r', ropts, R_SIZE); + if (rtsize) + respec('r', ropts, R_SIZE); + rtsize = value; + break; + + default: + unknown('r', value); + } + } + break; + case 'V': + printf("%s version %s\n", progname, VERSION); + break; + case '?': + unknown(optopt, ""); + } + } + if (argc - optind > 1) { + fprintf(stderr, "extra arguments\n"); + usage(); + } else if (argc - optind == 1) { + dfile = xi.volname = argv[optind]; + if (xi.dname) { + fprintf(stderr, + "cannot specify both %s and -d name=%s\n", + xi.volname, xi.dname); + usage(); + } + } else + dfile = xi.dname; + /* option post-processing */ + if (blocksize < XFS_MIN_BLOCKSIZE || blocksize > XFS_MAX_BLOCKSIZE) { + fprintf(stderr, "illegal block size %d\n", blocksize); + usage(); + } + if (!nvflag) + dirversion = (nsflag || nlflag) ? 2 : XFS_DFL_DIR_VERSION; + switch (dirversion) { + case 1: + if ((nsflag || nlflag) && dirblocklog != blocklog) { + fprintf(stderr, "illegal directory block size %d\n", + dirblocksize); + usage(); + } + break; + case 2: + if (nsflag || nlflag) { + if (dirblocksize < blocksize || + dirblocksize > XFS_MAX_BLOCKSIZE) { + fprintf(stderr, + "illegal directory block size %d\n", + dirblocksize); + usage(); + } + } else { + if (blocksize < (1 << XFS_MIN_REC_DIRSIZE)) + dirblocklog = XFS_MIN_REC_DIRSIZE; + else + dirblocklog = blocklog; + dirblocksize = 1 << dirblocklog; + } + break; + } + if (!daflag) + agcount = 8; + + if (xi.disfile && (!dsize || !xi.dname)) { + fprintf(stderr, + "if -d file then -d name and -d size are required\n"); + usage(); + } + if (dsize) { + __uint64_t dbytes; + + dbytes = cvtnum(blocksize, dsize); + if (dbytes % XFS_MIN_BLOCKSIZE) { + fprintf(stderr, + "illegal data length %lld, not a multiple of %d\n", + dbytes, XFS_MIN_BLOCKSIZE); + usage(); + } + dblocks = (xfs_drfsbno_t)(dbytes >> blocklog); + if (dbytes % blocksize) + fprintf(stderr, + "warning: data length %lld not a multiple of %d, truncated to %lld\n", + dbytes, blocksize, dblocks << blocklog); + } + if (ipflag) { + inodelog = blocklog - libxfs_highbit32(inopblock); + isize = 1 << inodelog; + } else if (!ilflag && !isflag) { + inodelog = XFS_DINODE_DFL_LOG; + isize = 1 << inodelog; + } +#ifdef HAVE_VOLUME_MANAGER + if (xi.lisfile && (!logsize || !xi.logname)) { + fprintf(stderr, + "if -l file then -l name and -l size are required\n"); + usage(); + } +#endif + if (logsize) { + __uint64_t logbytes; + + logbytes = cvtnum(blocksize, logsize); + if (logbytes % XFS_MIN_BLOCKSIZE) { + fprintf(stderr, + "illegal log length %lld, not a multiple of %d\n", + logbytes, XFS_MIN_BLOCKSIZE); + usage(); + } + logblocks = (xfs_drfsbno_t)(logbytes >> blocklog); + if (logbytes % blocksize) + fprintf(stderr, + "warning: log length %lld not a multiple of %d, truncated to %lld\n", + logbytes, blocksize, logblocks << blocklog); + } +#ifdef HAVE_VOLUME_MANAGER + if (xi.risfile && (!rtsize || !xi.rtname)) { + fprintf(stderr, + "if -r file then -r name and -r size are required\n"); + usage(); + } +#endif + if (rtsize) { + __uint64_t rtbytes; + + rtbytes = cvtnum(blocksize, rtsize); + if (rtbytes % XFS_MIN_BLOCKSIZE) { + fprintf(stderr, + "illegal rt length %lld, not a multiple of %d\n", + rtbytes, XFS_MIN_BLOCKSIZE); + usage(); + } + rtblocks = (xfs_drfsbno_t)(rtbytes >> blocklog); + if (rtbytes % blocksize) + fprintf(stderr, + "warning: rt length %lld not a multiple of %d, truncated to %lld\n", + rtbytes, blocksize, rtblocks << blocklog); + } + /* + * If specified, check rt extent size against its constraints. + */ + if (rtextsize) { + __uint64_t rtextbytes; + + rtextbytes = cvtnum(blocksize, rtextsize); + if (rtextbytes % blocksize) { + fprintf(stderr, + "illegal rt extent size %lld, not a multiple of %d\n", + rtextbytes, blocksize); + usage(); + } + if (rtextbytes > XFS_MAX_RTEXTSIZE) { + fprintf(stderr, + "rt extent size %s too large, maximum %d\n", + rtextsize, XFS_MAX_RTEXTSIZE); + usage(); + } + if (rtextbytes < XFS_MIN_RTEXTSIZE) { + fprintf(stderr, + "rt extent size %s too small, minimum %d\n", + rtextsize, XFS_MIN_RTEXTSIZE); + usage(); + } + rtextblocks = (xfs_extlen_t)(rtextbytes >> blocklog); + } else { + /* + * If realtime extsize has not been specified by the user, + * and the underlying volume is striped, then set rtextblocks + * to the stripe width. + */ + int dummy1, rswidth; + __uint64_t rtextbytes; + dummy1 = rswidth = 0; + + if (!xi.disfile) + get_subvol_stripe_wrapper(dfile, SVTYPE_RT, &dummy1, + &rswidth); + + /* check that rswidth is a multiple of fs blocksize */ + if (rswidth && !(BBTOB(rswidth) % blocksize)) { + rswidth = DTOBT(rswidth); + rtextbytes = rswidth << blocklog; + if (XFS_MIN_RTEXTSIZE <= rtextbytes && + (rtextbytes <= XFS_MAX_RTEXTSIZE)) { + rtextblocks = rswidth; + } else { + rtextblocks = XFS_DFL_RTEXTSIZE >> blocklog; + } + } else + rtextblocks = XFS_DFL_RTEXTSIZE >> blocklog; + } + + /* + * Check some argument sizes against mins, maxes. + */ + if (isize > blocksize / XFS_MIN_INODE_PERBLOCK || + isize < XFS_DINODE_MIN_SIZE || + isize > XFS_DINODE_MAX_SIZE) { + int maxsz; + + fprintf(stderr, "illegal inode size %d\n", isize); + maxsz = MIN(blocksize / XFS_MIN_INODE_PERBLOCK, + XFS_DINODE_MAX_SIZE); + if (XFS_DINODE_MIN_SIZE == maxsz) + fprintf(stderr, + "allowable inode size with %d byte blocks is %d\n", + blocksize, XFS_DINODE_MIN_SIZE); + else + fprintf(stderr, + "allowable inode size with %d byte blocks is between %d and %d\n", + blocksize, XFS_DINODE_MIN_SIZE, maxsz); + usage(); + } + + if (dsunit && !dswidth || !dsunit && dswidth) { + fprintf(stderr, +"both sunit and swidth options have to be specified\n"); + usage(); + } + + if (dsunit && dswidth % dsunit != 0) { + fprintf(stderr, +"mount: stripe width (%d) has to be a multiple of the stripe unit (%d)\n", + dswidth, dsunit); + return 1; + } + + /* other global variables */ + sectlog = 9; /* i.e. 512 bytes */ + + /* + * Initialize. This will open the log and rt devices as well. + */ + if (!libxfs_init(&xi)) + usage(); + if (!xi.ddev) { + fprintf(stderr, "no device name given in argument list\n"); + usage(); + } + + /* + * Check whether this partition contains a known filesystem. + */ + + if (force_fs_overwrite == 0) { + char *fstyp; + int fsfound = 0; + + fstyp = (char *) mnt_known_fs_type (dfile); + + if (fstyp != NULL) { + fprintf(stderr, "%s: " + "%s appears to contain an existing filesystem (%s).\n", + progname, dfile, fstyp); + fsfound = 1; + } + + if (logfile && *logfile) { + fstyp = (char *) mnt_known_fs_type (logfile); + + if (fstyp != NULL) { + fprintf(stderr, "%s: " + "%s appears to contain an existing filesystem (%s).\n", + progname, logfile, fstyp); + fsfound = 1; + } + } + + if (xi.rtname && *xi.rtname) { + fstyp = (char *) mnt_known_fs_type (xi.rtname); + + if (fstyp != NULL) { + fprintf(stderr, "%s: " + "%s appears to contain an existing filesystem (%s).\n", + progname, xi.rtname, fstyp); + fsfound = 1; + } + } + + if (fsfound) { + fprintf(stderr, "%s: " + "Use the -f option to force overwrite\n", + progname); + exit(1); + } + } + + if (!xi.disfile && do_overlap_checks) { + /* + * do partition overlap check + * If this is a straight file we assume that it's been created + * before the call to mnt_check_init() + */ + + if (mnt_check_init(&mnt_check_state) == -1) { + fprintf(stderr, + "unable to initialize mount checking " + "routines, bypassing protection checks.\n"); + } else { + mnt_partition_count = mnt_find_mount_conflicts( + mnt_check_state, dfile); + + /* + * ignore -1 return codes, since 3rd party devices + * may not be part of hinv. + */ + if (mnt_partition_count > 0) { + if (mnt_causes_test(mnt_check_state, MNT_CAUSE_MOUNTED)) { + fprintf(stderr, "%s: " + "%s is already in use.\n", + progname, dfile); + } else if (mnt_causes_test(mnt_check_state, MNT_CAUSE_OVERLAP)) { + fprintf(stderr, "%s: " + "%s overlaps partition(s) " + "already in use.\n", + progname, dfile); + } else { + mnt_causes_show(mnt_check_state, stderr, progname); + } + fprintf(stderr, "\n"); + fflush(stderr); + mnt_plist_show(mnt_check_state, stderr, progname); + fprintf(stderr, "\n"); + } + mnt_check_end(mnt_check_state); + if (mnt_partition_count > 0) { + usage(); + } + } + } + + if (!liflag && !ldflag) + loginternal = xi.logdev == 0; + if (xi.logname) + logfile = xi.logname; + else if (loginternal) + logfile = "internal log"; + else if (xi.volname && xi.logdev) + logfile = "volume log"; + else if (!ldflag) { + fprintf(stderr, "no log subvolume or internal log\n"); + usage(); + } + if (xi.rtname) + rtfile = xi.rtname; + else + if (xi.volname && xi.rtdev) + rtfile = "volume rt"; + else if (!xi.rtdev) + rtfile = "none"; + if (dsize && xi.dsize > 0 && dblocks > DTOBT(xi.dsize)) { + fprintf(stderr, +"size %s specified for data subvolume is too large, maximum is %lld blocks\n", + dsize, DTOBT(xi.dsize)); + usage(); + } else if (!dsize && xi.dsize > 0) + dblocks = DTOBT(xi.dsize); + else if (!dsize) { + fprintf(stderr, "can't get size of data subvolume\n"); + usage(); + } + if (dblocks < XFS_MIN_DATA_BLOCKS) { + fprintf(stderr, + "size %lld of data subvolume is too small, minimum %d blocks\n", + dblocks, XFS_MIN_DATA_BLOCKS); + usage(); + } + if (xi.logdev && loginternal) { + fprintf(stderr, "can't have both external and internal logs\n"); + usage(); + } + if (dirversion == 1) + i = max_trres_v1[blocklog - XFS_MIN_BLOCKSIZE_LOG] + [inodelog - XFS_DINODE_MIN_LOG]; + else + i = max_trres_v2[blocklog - XFS_MIN_BLOCKSIZE_LOG] + [inodelog - XFS_DINODE_MIN_LOG] + [dirblocklog - XFS_MIN_BLOCKSIZE_LOG]; + min_logblocks = MAX(XFS_MIN_LOG_BLOCKS, i * XFS_MIN_LOG_FACTOR); + if (logsize && xi.logBBsize > 0 && logblocks > DTOBT(xi.logBBsize)) { + fprintf(stderr, +"size %s specified for log subvolume is too large, maximum is %lld blocks\n", + logsize, DTOBT(xi.logBBsize)); + usage(); + } else if (!logsize && xi.logBBsize > 0) + logblocks = DTOBT(xi.logBBsize); + else if (logsize && !xi.logdev && !loginternal) { + fprintf(stderr, + "size specified for non-existent log subvolume\n"); + usage(); + } else if (loginternal && logsize && logblocks >= dblocks) { + fprintf(stderr, "size %lld too large for internal log\n", + logblocks); + usage(); + } else if (!loginternal && !xi.logdev) + logblocks = 0; + else if (loginternal && !logsize) + logblocks = MAX(XFS_DFL_LOG_SIZE, i * XFS_DFL_LOG_FACTOR); + if (logblocks < min_logblocks) { + fprintf(stderr, + "log size %lld blocks too small, minimum size is %d blocks\n", + logblocks, min_logblocks); + usage(); + } + if (logblocks > XFS_MAX_LOG_BLOCKS) { + fprintf(stderr, + "log size %lld blocks too large, maximum size is %d blocks\n", + logblocks, XFS_MAX_LOG_BLOCKS); + usage(); + } + if ((logblocks << blocklog) > XFS_MAX_LOG_BYTES) { + fprintf(stderr, + "log size %lld bytes too large, maximum size is %d bytes\n", + logblocks << blocklog, XFS_MAX_LOG_BYTES); + usage(); + } + if (rtsize && xi.rtsize > 0 && rtblocks > DTOBT(xi.rtsize)) { + fprintf(stderr, +"size %s specified for rt subvolume is too large, maximum is %lld blocks\n", + rtsize, DTOBT(xi.rtsize)); + usage(); + } else if (!rtsize && xi.rtsize > 0) + rtblocks = DTOBT(xi.rtsize); + else if (rtsize && !xi.rtdev) { + fprintf(stderr, + "size specified for non-existent rt subvolume\n"); + usage(); + } + if (xi.rtdev) { + rtextents = rtblocks / rtextblocks; + nbmblocks = (xfs_extlen_t)howmany(rtextents, NBBY * blocksize); + } else { + rtextents = rtblocks = 0; + nbmblocks = 0; + } + agsize = dblocks / agcount + (dblocks % agcount != 0); + + /* + * If the ag size is too small, complain if agcount was specified, + * and fix it otherwise. + */ + if (agsize < XFS_AG_MIN_BLOCKS(blocklog)) { + if (daflag) { + fprintf(stderr, + "too many allocation groups for size\n"); + fprintf(stderr, "need at most %lld allocation groups\n", + dblocks / XFS_AG_MIN_BLOCKS(blocklog) + + (dblocks % XFS_AG_MIN_BLOCKS(blocklog) != 0)); + usage(); + } + agsize = XFS_AG_MIN_BLOCKS(blocklog); + if (dblocks < agsize) + agcount = 1; + else { + agcount = dblocks / agsize; + agsize = dblocks / agcount + (dblocks % agcount != 0); + } + } + /* + * If the ag size is too large, complain if agcount was specified, + * and fix it otherwise. + */ + else if (agsize > XFS_AG_MAX_BLOCKS(blocklog)) { + if (daflag) { + fprintf(stderr, "too few allocation groups for size\n"); + fprintf(stderr, + "need at least %lld allocation groups\n", + dblocks / XFS_AG_MAX_BLOCKS(blocklog) + + (dblocks % XFS_AG_MAX_BLOCKS(blocklog) != 0)); + usage(); + } + agsize = XFS_AG_MAX_BLOCKS(blocklog); + agcount = dblocks / agsize + (dblocks % agsize != 0); + agsize = dblocks / agcount + (dblocks % agcount != 0); + } + /* + * If agcount was not specified, and agsize is larger than + * we'd like, make it the size we want. + */ + if (!daflag && agsize > XFS_AG_BEST_BLOCKS(blocklog)) { + agsize = XFS_AG_BEST_BLOCKS(blocklog); + agcount = dblocks / agsize + (dblocks % agsize != 0); + agsize = dblocks / agcount + (dblocks % agcount != 0); + } + /* + * If agcount is too large, make it smaller. + */ + if (agcount > XFS_MAX_AGNUMBER + 1) { + agcount = XFS_MAX_AGNUMBER + 1; + agsize = dblocks / agcount + (dblocks % agcount != 0); + if (agsize > XFS_AG_MAX_BLOCKS(blocklog)) { + /* + * We're confused. + */ + fprintf(stderr, "%s: can't compute agsize/agcount\n", + progname); + exit(1); + } + } + + xlv_dsunit = xlv_dswidth = 0; + if (!xi.disfile) + get_subvol_stripe_wrapper(dfile, SVTYPE_DATA, &xlv_dsunit, + &xlv_dswidth); + if (dsunit) { + + if (xlv_dsunit && xlv_dsunit != dsunit) { + fprintf(stderr, "%s: " + "Specified data stripe unit %d is not the same as the xlv stripe unit %d\n", + progname, dsunit, xlv_dsunit); + exit(1); + } + if (xlv_dswidth && xlv_dswidth != dswidth) { + fprintf(stderr, "%s: " +"Specified data stripe width (%d) is not the same as the xlv stripe width (%d)\n", + progname, dswidth, xlv_dswidth); + exit(1); + } + } else { + dsunit = xlv_dsunit; + dswidth = xlv_dswidth; + nodsflag = 1; + } + + /* + * If dsunit is a multiple of fs blocksize, then check that is a + * multiple of the agsize too + */ + if (dsunit && !(BBTOB(dsunit) % blocksize) && + dswidth && !(BBTOB(dswidth) % blocksize)) { + + /* convert from 512 byte blocks to fs blocksize */ + dsunit = DTOBT(dsunit); + dswidth = DTOBT(dswidth); + + /* + * agsize is not a multiple of dsunit + */ + if ((agsize % dsunit) != 0) { + /* + * round up to stripe unit boundary. Also make sure + * that agsize is still larger than + * XFS_AG_MIN_BLOCKS(blocklog) + */ + tmp_agsize = ((agsize + (dsunit - 1))/ dsunit) * dsunit; + if ((tmp_agsize >= XFS_AG_MIN_BLOCKS(blocklog)) && + (tmp_agsize <= XFS_AG_MAX_BLOCKS(blocklog)) && + !daflag) { + agsize = tmp_agsize; + agcount = dblocks/agsize + + (dblocks % agsize != 0); + } else { + if (nodsflag) + dsunit = dswidth = 0; + else { + fprintf(stderr, +"Allocation group size %lld is not a multiple of the stripe unit %d\n", + agsize, dsunit); + exit(1); + } + } + } + } else { + if (nodsflag) + dsunit = dswidth = 0; + else { + fprintf(stderr, "%s: " +"Stripe unit(%d) or stripe width(%d) is not a multiple of the block size(%d)\n", + progname, dsunit, dswidth, blocksize); + exit(1); + } + } + + protostring = setup_proto(protofile); + bsize = 1 << (blocklog - BBSHIFT); + buf = libxfs_getbuf(xi.ddev, XFS_SB_DADDR, 1); + mp = &mbuf; + sbp = &mp->m_sb; + bzero(mp, sizeof(xfs_mount_t)); + sbp->sb_blocklog = (__uint8_t)blocklog; + sbp->sb_agblklog = (__uint8_t)libxfs_log2_roundup((unsigned int)agsize); + mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT; + if (loginternal) { + if (logblocks > agsize - XFS_PREALLOC_BLOCKS(mp)) { + fprintf(stderr, + "internal log size %lld too large, must fit in allocation group\n", + logblocks); + usage(); + } + if (laflag) { + if (logagno >= agcount) { + fprintf(stderr, + "log ag number %d too large, must be less than %lld\n", + logagno, agcount); + usage(); + } + } else + logagno = (xfs_agnumber_t)(agcount / 2); + + logstart = XFS_AGB_TO_FSB(mp, logagno, XFS_PREALLOC_BLOCKS(mp)); + /* + * Align the logstart at stripe unit boundary. + */ + if (dsunit && ((logstart % dsunit) != 0)) { + logstart = ((logstart + (dsunit - 1))/dsunit) * dsunit; + + /* + * Make sure that the log size is a multiple of the + * stripe unit + */ + if ((logblocks % dsunit) != 0) + if (!lsflag) + logblocks = ((logblocks + (dsunit - 1)) + /dsunit) * dsunit; + else { + fprintf(stderr, + "internal log size %lld is not a multiple of the stripe unit %d\n", + logblocks, dsunit); + usage(); + } + + if (logblocks > agsize-XFS_FSB_TO_AGBNO(mp,logstart)) { + fprintf(stderr, + "Due to stripe alignment, the internal log size %lld is too large.\n" + "Must fit in allocation group\n", + logblocks); + usage(); + } + lalign = 1; + } + } else + logstart = 0; + sbp->sb_magicnum = XFS_SB_MAGIC; + sbp->sb_blocksize = blocksize; + sbp->sb_dblocks = dblocks; + sbp->sb_rblocks = rtblocks; + sbp->sb_rextents = rtextents; + uuid_generate(uuid); + uuid_copy(sbp->sb_uuid, uuid); + sbp->sb_logstart = logstart; + sbp->sb_rootino = sbp->sb_rbmino = sbp->sb_rsumino = NULLFSINO; + sbp->sb_rextsize = rtextblocks; + sbp->sb_agblocks = (xfs_agblock_t)agsize; + sbp->sb_agcount = (xfs_agnumber_t)agcount; + sbp->sb_rbmblocks = nbmblocks; + sbp->sb_logblocks = (xfs_extlen_t)logblocks; + sbp->sb_sectsize = 1 << sectlog; + sbp->sb_inodesize = (__uint16_t)isize; + sbp->sb_inopblock = (__uint16_t)(blocksize / isize); + sbp->sb_sectlog = (__uint8_t)sectlog; + sbp->sb_inodelog = (__uint8_t)inodelog; + sbp->sb_inopblog = (__uint8_t)(blocklog - inodelog); + sbp->sb_rextslog = + (__uint8_t)(rtextents ? + libxfs_highbit32((unsigned int)rtextents) : 0); + sbp->sb_inprogress = 1; /* mkfs is in progress */ + sbp->sb_imax_pct = imflag ? imaxpct : XFS_DFL_IMAXIMUM_PCT; + sbp->sb_icount = 0; + sbp->sb_ifree = 0; + sbp->sb_fdblocks = dblocks - agcount * XFS_PREALLOC_BLOCKS(mp) - + (loginternal ? logblocks : 0); + sbp->sb_frextents = 0; /* will do a free later */ + sbp->sb_uquotino = sbp->sb_pquotino = 0; + sbp->sb_qflags = 0; + sbp->sb_unit = dsunit; + sbp->sb_width = dswidth; + if (dirversion == 2) + sbp->sb_dirblklog = dirblocklog - blocklog; + if (iaflag) { + sbp->sb_inoalignmt = XFS_INODE_BIG_CLUSTER_SIZE >> blocklog; + iaflag = sbp->sb_inoalignmt != 0; + } else + sbp->sb_inoalignmt = 0; + sbp->sb_versionnum = + XFS_SB_VERSION_MKFS(iaflag, dsunit != 0, extent_flagging, + dirversion == 2); + + bzero(XFS_BUF_PTR(buf), BBSIZE); + libxfs_xlate_sb(XFS_BUF_PTR(buf), sbp, -1, ARCH_CONVERT, + XFS_SB_ALL_BITS); + libxfs_writebuf(buf, 1); + + if (!qflag) + printf( + "meta-data=%-22s isize=%-6d agcount=%lld, agsize=%lld blks\n" + "data =%-22s bsize=%-6d blocks=%lld, imaxpct=%d\n" + " =%-22s sunit=%-6d swidth=%d blks, unwritten=%d\n" + "naming =version %-14d bsize=%-6d\n" + "log =%-22s bsize=%-6d blocks=%lld\n" + "realtime =%-22s extsz=%-6d blocks=%lld, rtextents=%lld\n", + dfile, isize, agcount, agsize, + "", blocksize, dblocks, sbp->sb_imax_pct, + "", dsunit, dswidth, extent_flagging, + dirversion, dirversion == 1 ? blocksize : dirblocksize, + logfile, 1 << blocklog, logblocks, + rtfile, rtextblocks << blocklog, rtblocks, rtextents); + /* + * If the data area is a file, then grow it out to its final size + * so that the reads for the end of the device in the mount code + * will succeed. + */ + if (xi.disfile && ftruncate64(xi.dfd, dblocks * blocksize) < 0) { + fprintf(stderr, "%s: Growing the data section file failed\n", + progname); + exit(1); + } + /* + * Zero the log if there is one. + */ + if (loginternal) + xi.logdev = xi.ddev; + if (xi.logdev) + libxfs_log_clear( + xi.logdev, + XFS_FSB_TO_DADDR(mp, logstart), + (xfs_extlen_t)XFS_FSB_TO_BB(mp, logblocks), + &sbp->sb_uuid, + XLOG_FMT); + + mp = libxfs_mount(mp, sbp, xi.ddev, xi.logdev, xi.rtdev, 1); + if (!mp) { + fprintf(stderr, "%s: mount initialization failed\n", progname); + exit(1); + } + if (xi.logdev && + XFS_FSB_TO_B(mp, logblocks) < + XFS_MIN_LOG_FACTOR * max_trans_res(mp)) { + fprintf(stderr, "%s: log size (%lld) is too small for " + "transaction reservations\n", + progname, logblocks); + exit(1); + } + + for (agno = 0; agno < agcount; agno++) { + /* + * Superblock. + */ + buf = libxfs_getbuf(xi.ddev, + XFS_AG_DADDR(mp, agno, XFS_SB_DADDR), 1); + bzero(XFS_BUF_PTR(buf), BBSIZE); + libxfs_xlate_sb(XFS_BUF_PTR(buf), sbp, -1, ARCH_CONVERT, + XFS_SB_ALL_BITS); + libxfs_writebuf(buf, 1); + + /* + * AG header block: freespace + */ + buf = libxfs_getbuf(mp->m_dev, + XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR), 1); + agf = XFS_BUF_TO_AGF(buf); + bzero(agf, BBSIZE); + if (agno == agcount - 1) + agsize = dblocks - (xfs_drfsbno_t)(agno * agsize); + INT_SET(agf->agf_magicnum, ARCH_CONVERT, XFS_AGF_MAGIC); + INT_SET(agf->agf_versionnum, ARCH_CONVERT, XFS_AGF_VERSION); + INT_SET(agf->agf_seqno, ARCH_CONVERT, agno); + INT_SET(agf->agf_length, ARCH_CONVERT, (xfs_agblock_t)agsize); + INT_SET(agf->agf_roots[XFS_BTNUM_BNOi], ARCH_CONVERT, + XFS_BNO_BLOCK(mp)); + INT_SET(agf->agf_roots[XFS_BTNUM_CNTi], ARCH_CONVERT, + XFS_CNT_BLOCK(mp)); + INT_SET(agf->agf_levels[XFS_BTNUM_BNOi], ARCH_CONVERT, 1); + INT_SET(agf->agf_levels[XFS_BTNUM_CNTi], ARCH_CONVERT, 1); + INT_SET(agf->agf_flfirst, ARCH_CONVERT, 0); + INT_SET(agf->agf_fllast, ARCH_CONVERT, XFS_AGFL_SIZE - 1); + INT_SET(agf->agf_flcount, ARCH_CONVERT, 0); + nbmblocks = (xfs_extlen_t)(agsize - XFS_PREALLOC_BLOCKS(mp)); + INT_SET(agf->agf_freeblks, ARCH_CONVERT, nbmblocks); + INT_SET(agf->agf_longest, ARCH_CONVERT, nbmblocks); + if (loginternal && agno == logagno) { + INT_MOD(agf->agf_freeblks, ARCH_CONVERT, -logblocks); + INT_SET(agf->agf_longest, ARCH_CONVERT, agsize - + XFS_FSB_TO_AGBNO(mp, logstart) - logblocks); + } + if (XFS_MIN_FREELIST(agf, mp) > worst_freelist) + worst_freelist = XFS_MIN_FREELIST(agf, mp); + libxfs_writebuf(buf, 1); + + /* + * AG header block: inodes + */ + buf = libxfs_getbuf(mp->m_dev, + XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR), 1); + agi = XFS_BUF_TO_AGI(buf); + bzero(agi, BBSIZE); + INT_SET(agi->agi_magicnum, ARCH_CONVERT, XFS_AGI_MAGIC); + INT_SET(agi->agi_versionnum, ARCH_CONVERT, XFS_AGI_VERSION); + INT_SET(agi->agi_seqno, ARCH_CONVERT, agno); + INT_SET(agi->agi_length, ARCH_CONVERT, (xfs_agblock_t)agsize); + INT_SET(agi->agi_count, ARCH_CONVERT, 0); + INT_SET(agi->agi_root, ARCH_CONVERT, XFS_IBT_BLOCK(mp)); + INT_SET(agi->agi_level, ARCH_CONVERT, 1); + INT_SET(agi->agi_freecount, ARCH_CONVERT, 0); + INT_SET(agi->agi_newino, ARCH_CONVERT, NULLAGINO); + INT_SET(agi->agi_dirino, ARCH_CONVERT, NULLAGINO); + for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) + INT_SET(agi->agi_unlinked[i], ARCH_CONVERT, NULLAGINO); + libxfs_writebuf(buf, 1); + + /* + * BNO btree root block + */ + buf = libxfs_getbuf(mp->m_dev, + XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)), + bsize); + block = XFS_BUF_TO_SBLOCK(buf); + bzero(block, blocksize); + INT_SET(block->bb_magic, ARCH_CONVERT, XFS_ABTB_MAGIC); + INT_SET(block->bb_level, ARCH_CONVERT, 0); + INT_SET(block->bb_numrecs, ARCH_CONVERT, 1); + INT_SET(block->bb_leftsib, ARCH_CONVERT, NULLAGBLOCK); + INT_SET(block->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK); + arec = XFS_BTREE_REC_ADDR(blocksize, xfs_alloc, block, 1, + XFS_BTREE_BLOCK_MAXRECS(blocksize, xfs_alloc, 1)); + INT_SET(arec->ar_startblock, ARCH_CONVERT, + XFS_PREALLOC_BLOCKS(mp)); + if (loginternal && agno == logagno) { + if (lalign) { + /* + * Have to insert two records + */ + INT_SET(arec->ar_blockcount, ARCH_CONVERT, + (xfs_extlen_t)(XFS_FSB_TO_AGBNO( + mp, logstart) + - (INT_GET(arec->ar_startblock, + ARCH_CONVERT)))); + nrec = arec + 1; + INT_SET(nrec->ar_startblock, ARCH_CONVERT, + INT_GET(arec->ar_startblock, + ARCH_CONVERT) + + INT_GET(arec->ar_blockcount, + ARCH_CONVERT)); + arec = nrec; + INT_MOD(block->bb_numrecs, ARCH_CONVERT, 1); + } + INT_MOD(arec->ar_startblock, ARCH_CONVERT, logblocks); + } + INT_SET(arec->ar_blockcount, ARCH_CONVERT, + (xfs_extlen_t)(agsize - + INT_GET(arec->ar_startblock, ARCH_CONVERT))); + libxfs_writebuf(buf, 1); + + /* + * CNT btree root block + */ + buf = libxfs_getbuf(mp->m_dev, + XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)), + bsize); + block = XFS_BUF_TO_SBLOCK(buf); + bzero(block, blocksize); + INT_SET(block->bb_magic, ARCH_CONVERT, XFS_ABTC_MAGIC); + INT_SET(block->bb_level, ARCH_CONVERT, 0); + INT_SET(block->bb_numrecs, ARCH_CONVERT, 1); + INT_SET(block->bb_leftsib, ARCH_CONVERT, NULLAGBLOCK); + INT_SET(block->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK); + arec = XFS_BTREE_REC_ADDR(blocksize, xfs_alloc, block, 1, + XFS_BTREE_BLOCK_MAXRECS(blocksize, xfs_alloc, 1)); + INT_SET(arec->ar_startblock, ARCH_CONVERT, + XFS_PREALLOC_BLOCKS(mp)); + if (loginternal && agno == logagno) { + if (lalign) { + INT_SET(arec->ar_blockcount, ARCH_CONVERT, + (xfs_extlen_t)( XFS_FSB_TO_AGBNO( + mp, logstart) - (INT_GET( + arec->ar_startblock, ARCH_CONVERT)) ) + ); + nrec = arec + 1; + INT_SET(nrec->ar_startblock, ARCH_CONVERT, + INT_GET(arec->ar_startblock, ARCH_CONVERT) + + INT_GET(arec->ar_blockcount, ARCH_CONVERT)); + arec = nrec; + INT_MOD(block->bb_numrecs, ARCH_CONVERT, 1); + } + INT_MOD(arec->ar_startblock, ARCH_CONVERT, logblocks); + } + INT_SET(arec->ar_blockcount, ARCH_CONVERT, (xfs_extlen_t) + (agsize - INT_GET(arec->ar_startblock, ARCH_CONVERT))); + libxfs_writebuf(buf, 1); + /* + * INO btree root block + */ + buf = libxfs_getbuf(mp->m_dev, + XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)), + bsize); + block = XFS_BUF_TO_SBLOCK(buf); + bzero(block, blocksize); + INT_SET(block->bb_magic, ARCH_CONVERT, XFS_IBT_MAGIC); + INT_SET(block->bb_level, ARCH_CONVERT, 0); + INT_SET(block->bb_numrecs, ARCH_CONVERT, 0); + INT_SET(block->bb_leftsib, ARCH_CONVERT, NULLAGBLOCK); + INT_SET(block->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK); + libxfs_writebuf(buf, 1); + } + + /* + * Touch last block, make fs the right size if it's a file. + */ + buf = libxfs_getbuf(mp->m_dev, + (xfs_daddr_t)XFS_FSB_TO_BB(mp, dblocks - 1LL), bsize); + bzero(XFS_BUF_PTR(buf), blocksize); + libxfs_writebuf(buf, 1); + + /* + * Make sure we can write the last block in the realtime area. + */ + if (mp->m_rtdev && rtblocks > 0) { + buf = libxfs_getbuf(mp->m_rtdev, + XFS_FSB_TO_BB(mp, rtblocks - 1LL), bsize); + bzero(XFS_BUF_PTR(buf), blocksize); + libxfs_writebuf(buf, 1); + } + /* + * BNO, CNT free block list + */ + for (agno = 0; agno < agcount; agno++) { + xfs_alloc_arg_t args; + xfs_trans_t *tp; + + bzero(&args, sizeof(args)); + args.tp = tp = libxfs_trans_alloc(mp, 0); + args.mp = mp; + args.agno = agno; + args.alignment = 1; + args.minalignslop = UINT_MAX; + args.pag = &mp->m_perag[agno]; + if (i = libxfs_trans_reserve(tp, worst_freelist, 0, 0, 0, 0)) + res_failed(i); + libxfs_alloc_fix_freelist(&args, 0); + libxfs_trans_commit(tp, 0, NULL); + } + /* + * Allocate the root inode and anything else in the proto file. + */ + mp->m_rootip = NULL; + parseproto(mp, NULL, &protostring, NULL); + + /* + * protect ourselves against possible stupidity + */ + if (XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino) != 0) { + fprintf(stderr, "%s: root inode not created in AG 0, " + "created in AG %u", + progname, XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino)); + exit(1); + } + + /* + * write out multiple copies of superblocks with the rootinode field set + */ + if (mp->m_sb.sb_agcount > 1) { + /* + * the last superblock + */ + buf = libxfs_readbuf(mp->m_dev, + XFS_AGB_TO_DADDR(mp, mp->m_sb.sb_agcount-1, + XFS_SB_DADDR), + BTOBB(mp->m_sb.sb_sectsize), 1); + INT_SET((XFS_BUF_TO_SBP(buf))->sb_rootino, + ARCH_CONVERT, mp->m_sb.sb_rootino); + libxfs_writebuf(buf, 1); + /* + * and one in the middle for luck + */ + if (mp->m_sb.sb_agcount > 2) { + buf = libxfs_readbuf(mp->m_dev, + XFS_AGB_TO_DADDR(mp, (mp->m_sb.sb_agcount-1)/2, + XFS_SB_DADDR), + BTOBB(mp->m_sb.sb_sectsize), 1); + INT_SET((XFS_BUF_TO_SBP(buf))->sb_rootino, + ARCH_CONVERT, mp->m_sb.sb_rootino); + libxfs_writebuf(buf, 1); + } + } + + /* + * Mark the filesystem ok. + */ + buf = libxfs_getsb(mp, 1); + (XFS_BUF_TO_SBP(buf))->sb_inprogress = 0; + libxfs_writebuf(buf, 1); + + libxfs_umount(mp); + if (xi.rtdev) + libxfs_device_close(xi.rtdev); + if (xi.logdev && xi.logdev != xi.ddev) + libxfs_device_close(xi.logdev); + libxfs_device_close(xi.ddev); + + return 0; +} + +static void +conflict( + char opt, + char *tab[], + int oldidx, + int newidx) +{ + fprintf(stderr, "Cannot specify both -%c %s and -%c %s\n", + opt, tab[oldidx], opt, tab[newidx]); + usage(); +} + + +static void +illegal( + char *value, + char *opt) +{ + fprintf(stderr, "Illegal value %s for -%s option\n", value, opt); + usage(); +} + +static int +ispow2( + unsigned int i) +{ + return (i & (i - 1)) == 0; +} + +static void +reqval( + char opt, + char *tab[], + int idx) +{ + fprintf(stderr, "-%c %s option requires a value\n", opt, tab[idx]); + usage(); +} + +static void +respec( + char opt, + char *tab[], + int idx) +{ + fprintf(stderr, "-%c ", opt); + if (tab) + fprintf(stderr, "%s ", tab[idx]); + fprintf(stderr, "option respecified\n"); + usage(); +} + +static void +unknown( + char opt, + char *s) +{ + fprintf(stderr, "unknown option -%c %s\n", opt, s); + usage(); +} + +static int +max_trans_res( + xfs_mount_t *mp) +{ + uint *p; + int rval; + xfs_trans_reservations_t *tr; + + tr = &mp->m_reservations; + + for (rval = 0, p = (uint *)tr; p < (uint *)(tr + 1); p++) { + if ((int)*p > rval) + rval = (int)*p; + } + return rval; +} + +long long +cvtnum( + int blocksize, + char *s) +{ + long long i; + char *sp; + extern void usage(void); + + i = strtoll(s, &sp, 0); + if (i == 0 && sp == s) + return -1LL; + if (*sp == '\0') + return i; + + if (*sp == 'b' && sp[1] == '\0') { + if (blocksize) + return i * blocksize; + + fprintf(stderr, "blocksize not available yet.\n"); + usage(); + } + + if (*sp == 'k' && sp[1] == '\0') + return 1024LL * i; + if (*sp == 'm' && sp[1] == '\0') + return 1024LL * 1024LL * i; + if (*sp == 'g' && sp[1] == '\0') + return 1024LL * 1024LL * 1024LL * i; + return -1LL; +} + +void +usage(void) +{ + fprintf(stderr, "Usage: %s\n\ +/* blocksize */ [-b log=n|size=num]\n\ +/* data subvol */ [-d agcount=n,agsize=n,file,name=xxx,size=num,\n\ + sunit=value,swidth=value,unwritten=0|1]\n\ +/* inode size */ [-i log=n|perblock=n|size=num,maxpct=n]\n\ +/* log subvol */ [-l agnum=n,internal,size=num,logdev=xxx]\n\ +/* naming */ [-n log=n|size=num|version=n]\n\ +/* prototype file */ [-p fname]\n\ +/* quiet */ [-q]\n\ +/* version */ [-V]\n\ +/* realtime subvol */ [-r extsize=num,size=num,rtdev=xxx]\n\ + devicename\n\ +devicename is required unless -d name=xxx is given\n\ +internal 1000 block log is default unless overridden or using a volume\ +manager with log\n\ +num is xxx (bytes), or xxxb (blocks), or xxxk (xxx KB), or xxxm (xxx MB)\n\ +value is xxx (512 blocks)\n", + progname); + exit(1); +} diff --git a/mkfs/xfs_mkfs.h b/mkfs/xfs_mkfs.h new file mode 100644 index 000000000..6dcd004d0 --- /dev/null +++ b/mkfs/xfs_mkfs.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef __XFS_MKFS_H__ +#define __XFS_MKFS_H__ + +#define XFS_DFL_BLOCKSIZE_LOG 12 /* 4096 byte blocks */ +#define XFS_DINODE_DFL_LOG 8 /* 256 byte inodes */ +#define XFS_MIN_DATA_BLOCKS 100 +#define XFS_MIN_INODE_PERBLOCK 2 /* min inodes per block */ +#define XFS_DFL_IMAXIMUM_PCT 25 /* max % of space for inodes */ +#define XFS_IFLAG_ALIGN 1 /* -i align defaults on */ +#define XFS_MIN_REC_DIRSIZE 12 /* 4096 byte dirblocks (V2) */ +#define XFS_DFL_DIR_VERSION 2 /* default directory version */ +#define XFS_DFL_LOG_SIZE 1000 /* default log size, blocks */ +#define XFS_MIN_LOG_FACTOR 3 /* min log size factor */ +#define XFS_DFL_LOG_FACTOR 16 /* default log size, factor */ + /* with max trans reservation */ +extern void usage (void); +extern long long cvtnum (int blocksize, char *s); + +#endif /* __XFS_MKFS_H__ */ diff --git a/repair/Makefile b/repair/Makefile new file mode 100644 index 000000000..96f81d7c4 --- /dev/null +++ b/repair/Makefile @@ -0,0 +1,72 @@ +# +# Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write the Free Software Foundation, Inc., 59 +# Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, +# Mountain View, CA 94043, or: +# +# http://www.sgi.com +# +# For further information regarding this notice, see: +# +# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ +# + +TOPDIR = .. +include $(TOPDIR)/include/builddefs + +CMDTARGET = xfs_repair +CMDDEPS = $(LIBXFS) + +HFILES = agheader.h attr_repair.h avl.h avl64.h bmap.h dinode.h dir.h \ + dir2.h dir_stack.h err_protos.h globals.h incore.h protos.h rt.h \ + scan.h versions.h + +CFILES = agheader.c attr_repair.c avl.c avl64.c bmap.c dino_chunks.c \ + dinode.c dir.c dir2.c dir_stack.c globals.c incore.c \ + incore_bmc.c init.c incore_ext.c incore_ino.c io.c phase1.c \ + phase2.c phase3.c phase4.c phase5.c phase6.c phase7.c rt.c sb.c \ + scan.c versions.c xfs_repair.c + +LLDLIBS = $(LIBXFS) $(LIBUUID) + +default: $(CMDTARGET) + +include $(BUILDRULES) + +# +# Tracing flags: +# -DXR_BMAP_DBG incore block bitmap debugging +# -DXR_INODE_TRACE inode processing +# -DXR_BMAP_TRACE bmap btree processing +# -DXR_DIR_TRACE directory processing +# -DXR_DUP_TRACE duplicate extent processing +# -DXR_BCNT_TRACE incore bcnt freespace btree building +# -DXR_BLD_FREE_TRACE building on-disk freespace (bcnt/bno) btrees +# -DXR_BLD_INO_TRACE building on-disk inode allocation btrees +# -DXR_BLD_ADD_EXTENT track phase 5 block extent creation +# -DXR_BCKPTR_DBG parent list debugging info +# +CFLAGS += -DAVL_USER_MODE -DAVL_FUTURE_ENHANCEMENTS + +install: default + $(INSTALL) -m 755 -d $(XFS_CMDS_SBIN_DIR) + $(INSTALL) -m 755 $(CMDTARGET) $(XFS_CMDS_SBIN_DIR) diff --git a/repair/README b/repair/README new file mode 100644 index 000000000..c007af9f9 --- /dev/null +++ b/repair/README @@ -0,0 +1,718 @@ +A living document. The basic algorithm. + +TODO: (D == DONE) + +0) Need to bring some sanity into the case of flags that can + be set in the secondaries at mkfs time but reset or cleared + in the primary later in the filesystem's life. + +0) Clear the persistent read-only bit if set. Clear the + shared bit if set and the version number is zero. This + brings the filesystem back to a known state. + +0) make sure that superblock geometry code checks the logstart + value against whether or not we have an internal log. + If we have an internal log and a logdev, that's ok. + (Maybe we just aren't using it). If we have an external + log (logstart == 0) but no logdev, that's right out. + +0) write secondary superblock search code. Rewrite initial + superblock parsing code to be less complicated. Just + use variables to indicate primary, secondary, etc., + and use a function to get the SB given a specific location + or something. + +2) For inode alignment, if the SB bit is set and the + inode alignment size field in the SB is set, then + believe that the fs inodes MUST be aligned and + disallow any non-aligned inodes. Likewise, if + the SB bit isn't set (or earlier version) and + the inode alignment size field is zero, then + never set the bit even if the inodes are aligned. + Note that the bits and alignment values are + replicated in the secondary superblocks. + +0) add feature specification options to parse_arguments + +0) add logic to add_inode_ref(), add_inode_reached() + to detect nlink overflows in cases where the fs + (or user had indicated fs) doesn't support new nlinks. + +6) check to make sure that the inodes containing btree blocks + with # recs < minrecs aren't legit -- e.g. the only + descendant of a root block. + +7) inode di_size value sanity checking -- should always be less than + the biggest filebno offset mentioned in the bmaps. Doesn't + have to be equal though since we're allowed to overallocate + (it just wastes a little space). This is for both regular + files and directories (have to modify the existing directory + check). + + Add tracking of largest offset in bmap scanning code. Compare + value against di_size. Should be >= di_size. + + Alternatively, you could pass the inode into down through + the extent record processing layer and make the checks + there. + + Add knowledge of quota inodes. size of quota inode is + always zero. We should maintain that. + +8) Basic quota stuff. + + Invariants + if quota feature bit is set, the quota inodes + if set, should point to disconnected, 0 len inodes. + +D - if quota inodes exist, the quota bits must be + turned on. It's ok for the quota flags to be + zeroed but they should be in a legal state + (see xfs_quota.h). + +D - if the quota flags are non-zero, the corresponding + quota inodes must exist. + + quota inodes are never deleted, only their space + is freed. + + if quotas are being downgraded, then check quota inodes + at the end of phase 3. If they haven't been cleared yet, + clear them. Regardless, then clear sb flags (quota inode + fields, quota flags, and quota bit). + + +5) look at verify_inode_chunk(). it's probably really broken. + + +9) Complicated quota stuff. Add code to bmap scan code to + track used blocks. Add another pair of AVL trees + to track user and project quota limits. Set AVL + trees up at the beginning of phase 3. Quota inodes + can be rebuilt or corrected later if damaged. + + +D - 0) fix directory processing. phase 3, if an entry references + a free inode, *don't* mark it used. wait for the rest of + phase 3 processing to hit that inode. If it looks like it's + in use, we'll mark in use then. If not, we'll clear it and + mark the inode map. then in phase 4, you can depend on the + inode map. should probably set the parent info in phase 4. + So we have a check_dups flag. Maybe we should change the + name of check_dir to discover_inodes. During phase 3 + (discover_inodes == 1), uncertain inodes are added to list. + During phase 4 (discover_inodes == 0), they aren't. And + we never mark inodes in use from the directory code. + During phase 4, we shouldn't complain about names with + a leading '/' since we made those names in phase 3. + + Have to change dino_chunks.c (parent setting), dinode.c + and dir.c. + +D - 0) make sure we don't screw up filesystems with real-time inodes. + remember to initialize real-time map with all blocks XR_E_FREE. + +D - 4) check contents of symlinks as well as lengths in process_symlinks() + in dinode.c. Right now, we only check lengths. + + +D - 1) Feature mismatches -- for quotas and attributes, + if the stuff exists in the filesystem, set the + superblock version bits. + +D - 0) rewrite directory leaf block holemap comparison code. + probably should just check the leaf block hole info + against our incore bitmap. If the hole flag is not + set, then we know that there can only be one hole and + it has to be between the entry table and the top of heap. + If the hole flag is set, then it's ok if the on-disk + holemap doesn't describe everything as long as what + it does describe doesn't conflict with reality. + +D - 0) rewrite setting nlinks handling -- for version 1 + inodes, set both nlinks and onlinks (zero projid + and pad) if we have to change anything. For + version 2, I think we're ok. + +D - 0) Put awareness of quota inode into mark_standalone_inodes. + + +D - 8) redo handling of superblocks with bad version numbers. need + to bail out (without harming) fs's that have sbs that + are newer than we are. + +D - 0) How do we handle feature mismatches between fs and + superblock? For nlink, check each inode after you + know it's good. If onlinks is 0 and nlinks is > 0 + and it's a version 2 inode, then it really is a version + 2 inode and the nlinks flag in the SB needs to be set. + If it's a version 2 inode and the SB agrees but onlink + is non-zero, then clear onlink. + +D - 3) keep cumulative counts of freeblocks, inodes, etc. to set in + the superblock at the end of phase 5. Remember that + agf freeblock counters don't include blocks used by + the non-root levels of the freespace trees but that + the sb free block counters include those. + +D - 0) Do parent setting in directory code (called by phase 3). + actually, I put it in process_inode_set and propagated + the parent up to it from the process_dinode/process_dir + routines. seemed cleaner than pushing the irec down + and letting them bang on it. + +D - 0) If we clear a file in phase 4, make sure that if it's + a directory that the parent info is cleared also. + +D - 0) put inode tree flashover (call to add_ino_backptrs) into phase 5. + +D - 0) do set/get_inode_parent functions in incore_ino.c. + also do is/set/ inode_processed. + +D - 0) do a versions.c to extract feature info and set global vars + from the superblock version number and possibly feature bits + +D - 0) change longform_dir_entry_check + shortform_dir_entry_check + to return a count of how many illegal '/' entries exist. + if > 0, then process_dirstack needs to call prune_dir_entry + with a hash value of 0 to delete the entries. + +D - 0) add the "processed" bitfield + to the backptrs_t struct that gets attached after + phase 4. + +D- ) Phase 6 !!! + +D - 0) look at usage of XFS_MAKE_IPTR(). It does the right + arithmetic assuming you count your offsets from the + beginning of the buffer. + + +D - 0) look at references to XFS_INODES_PER_CHUNK. change the + ones that really mean sizeof(__uint64_t)*NBBY to + something else (like that only defined as a constant + INOS_PER_IREC. this isn't as important since + XFS_INODES_PER_CHUNK will never chang + + +D - 0) look at junk_zerolen_dir_leaf_entries() to make sure it isn't hosing + the freemap since it assumed that bytes between the + end of the table and firstused didn't show up in the + freemap when they actually do. + +D - 0) track down XFS_INO_TO_OFFSET() usage. I don't think I'm + using it right. (e.g. I think + it gives you the offset of an inode into a block but + on small block filesystems, I may be reading in inodes + in multiblock buffers and working from the start of + the buffer plus I'm using it to get offsets into + my ino_rec's which may not be a good idea since I + use 64-inode ino_rec's whereas the offset macro + works off blocksize). + +D - 0.0) put buffer -> dirblock conversion macros into xfs kernel code + +D - 0.2) put in sibling pointer checking and path fixup into + bmap (long form) scan routines in scan.c +D - 0.3) find out if bmap btrees with only root blocks are legal. I'm + betting that they're not because they'd be extent inodes + instead. If that's the case, rip some code out of + process_btinode() + + +Algorithm (XXX means not done yet): + +Phase 1 -- get a superblock and zero log + + get a superblock -- either read in primary or + find a secondary (ag header), check ag headers + + To find secondary: + + Go for brute force and read in the filesystem N meg + at a time looking for a superblock. as a + slight optimization, we could maybe skip + ahead some number of blocks to try and get + towards the end of the first ag. + + After you find a secondary, try and find at least + other ags as a verification that the + secondary is a good superblock. + +XXX - Ugh. Have to take growfs'ed filesystems into account. + The root superblock geometry info may not be right if + recovery hasn't run or it's been trashed. The old ag's + may or may not be right since the system could have crashed + during growfs or the bwrite() to the superblocks could have + failed and the buffer been reused. So we need to check + to see if another ag exists beyond the "last" ag + to see if a growfs happened. If not, then we know that + the geometry info is good and treat the fs as a non-growfs'ed + fs. If we do have inconsistencies, then the smaller geometry + is the old fs and the larger the new. We can check the + new superblocks to see if they're good. If not, then we + know the system crashed at or soon after the growfs and + we can choose to either accept the new geometry info or + trash it and truncate the fs back to the old geometry + parameters. + + Cross-check geometry information in secondary sb's with + primary to ensure that it's correct. + + Use sim code to allow mount filesystems *without* reading + in root inode. This sets up the xfs_mount_t structure + and allows us to use XFS_* macros that we wouldn't + otherwise be able to use. + + Note, I split phase 1 and 2 into separate pieces because I want + to initialize the xfs_repair incore data structures after phase 1. + + parse superblock version and feature flags and set appropriate + global vars to reflect the flags (attributes, quotas, etc.) + + Workaround for the mkfs "not zeroing the superblock buffer" bug. + Determine what field is the last valid non-zero field in + the superblock. The trick here is to be able to differentiate + the last valid non-zero field in the primary superblock and + secondaries because they may not be the same. Fields in + the primary can be set as the filesystem gets upgraded but + the upgrades won't touch the secondaries. This means that + we need to find some number of secondaries and check them. + So we do the checking here and the setting in phase2. + +Phase 2 -- check integrity of allocation group allocation structures + + zero the log if in no modify mode + + sanity check ag headers -- superblocks match, agi isn't + trashed -- the agf and agfl + don't really matter because we can + just recreate them later. + + Zero part of the superblock buffer if necessary + + Walk the freeblock trees to get an + initial idea of what the fs thinks is free. + Files that disagree (claim free'd blocks) + can be salvaged or deleted. If the btree is + internally inconsistent, when in doubt, mark + blocks free. If they're used, they'll be stolen + back later. don't have to check sibling pointers + for each level since we're going to regenerate + all the trees anyway. + Walk the inode allocation trees and + make sure they're ok, otherwise the sim + inode routines will probably just barf. + mark inode allocation tree blocks and ag header + blocks as used blocks. If the trees are + corrupted, this phase will generate "uncertain" + inode chunks. Those chunks go on a list and + will have to verified later. Record the blocks + that are used to detect corruption and multiply + claimed blocks. These trees will be regenerated + later. Mark the blocks containing inodes referenced + by uncorrupted inode trees as being used by inodes. + The other blocks will get marked when/if the inodes + are verified. + + calculate root and realtime inode numbers from the + filesystem geometry, fix up mount structure's + incore superblock if they're wrong. + +ASSUMPTION: at end of phase 2, we've got superblocks and ag headers + that are not garbage (some data in them like counters and the + freeblock and inode trees may be inconsistent but the header + is readable and otherwise makes sense). + +XXX if in no_modify mode, check for blocks claimed by one freespace + btree and not the other + +Phase 3 -- traverse inodes to make the inodes, bmaps and freespace maps + consistent. For each ag, use either the incore inode map or + scan the ag for inodes. + Let's use the incore inode map, now that we've made one + up in phase2. If we lose the maps, we'll locate inodes + when we traverse the directory heirarchy. If we lose both, + we could scan the disk. Ugh. Maybe make that a command-line + option that we support later. + + ASSUMPTION: we know if the ag allocation btrees are intact (phase 2) + + First - Walk and clear the ag unlinked lists. We'll process + the inodes later. Check and make sure that the unlinked + lists reference known inodes. If not, add to the list + of uncertain inodes. + + Second, check the uncertain inode list generated in phase2 and + above and get them into the inode tree if they're good. + The incore inode cluster tree *always* has good + clusters (alignment, etc.) in it. + + Third, make sure that the root inode is known. If not, + and we know the inode number from the superblock, + discover that inode and it's chunk. + + Then, walk the incore inode-cluster tree. + + Maintain an in-core bitmap over the entire fs for block allocation. + + traverse each inode, make sure inode mode field matches free/allocated + bit in the incore inode allocation tree. If there's a mismatch, + assume that the inode is in use. + + - for each in-use inode, traverse each bmap/dir/attribute + map or tree. Maintain a map (extent list?) for the + current inode. + + - For each block marked as used, check to see if already known + (referenced by another file or directory) and sanity + check the contents of the block as well if possible + (in the case of meta-blocks). + + - if the inode claims already used blocks, mark the blocks + as multiply claimed (duplicate) and go on. the inode + will be cleared in phase 4. + + - if metablocks are garbaged, clear the inode after + traversing what you can of the bmap and + proceed to next inode. We don't have to worry + about trashing the maps or trees in cleared inodes + because the blocks will show up as free in the + ag freespace trees that we set up in phase 5. + + - clear the di_next_unlinked pointer -- all unlinked + but active files go bye-bye. + + - All blocks start out unknown. We need the last state + in case we run into a case where we need to step + on a block to store filesystem meta-data and it + turns out later that it's referenced by some inode's + bmap. In that case, the inode loses because we've + already trashed the block. This shouldn't happen + in the first version unless some inode has a bogus + bmap referencing blocks in the ag header but the + 4th state will keep us from inadvertently doing + something stupid in that case. + + - If inode is allocated, mark all blocks allocated to the + current inode as allocated in the incore freespace + bitmap. + + - If inode is good and a directory, scan through it to + find leaf entries and discover any unknown inodes. + + For shortform, we correct what we can. + + If the directory is corrupt, we try and fix it in + place. If it has zero good entries, then we blast it. + + All unknown inodes get put onto the uncertain inode + list. This is safe because we only put inodes onto + the list when we're processing known inodes so the + uncertain inode list isn't in use. + + We fix only one problem -- an entry that has + a mathematically invalid inode numbers in them. + If that's the case, we replace the inode number + with NULLFSINO and we'll fix up the entry in + phase 6. + + That info may conflict with the inode information, + but we'll straighten out any inconsistencies there + in phase4 when we process the inodes again. + + Errors involving bogus forward/back links, + zero-length entries make the directory get + trashed. + + if an entry references a free inode, ignore that + fact for now. wait for the rest of phase 3 + processing to hit that inode. If it looks like it's + in use, we'll mark in use then. If not, we'll + clear it and mark the inode map. then in phase + 4, you can depend on the inode map. + + Entries that point to non-existent or free + inodes, and extra blocks in the directory + will get fixed in place in a later pass. + + Entries that point to a quota inode are + marked TBD. + + If the directory internally points to the same + block twice, the directory gets blown away. + + Note that processing uncertain inodes can add more inodes + to the uncertain list if they're directories. So we loop + until the uncertain list is empty. + + During inode verification, if the inode blocks are unknown, + mark then as in-use by inodes. + +XXX HEURISTIC -- if we blow an inode away that has space, + assume that the freespace btree is now out of wack. + If it was ok earlier, it's certain to be wrong now. + And the odds of this space free cancelling out the + existing error is so small I'm willing to ignore it. + Should probably do this via a global var and complain + about this later. + +Assumption: All known inodes are now marked as in-use or free. Any + inodes that we haven't found by now are hosed (lost) since + we can't reach them via either the inode btrees or via directory + entries. + + Directories are semi-clean. All '.' entries are good. + Root '..' entry is good if root inode exists. All entries + referencing non-existent inodes, free inodes, etc. + +XXX verify that either quota inode is 0 or NULLFSINO or + if sb quota flag is non zero, verify that quota inode + is NULLFSINO or is referencing a used, but disconnected + inode. + +XXX if in no_modify mode, check for unclaimed blocks + +- Phase 4 - Check for inodes referencing duplicate blocks + + At this point, all known duplicate blocks are marked in + the block map. However, some of the claimed blocks in + the bmap may in fact be free because they belong to inodes + that have to be cleared either due to being a trashed + directory or because it's the first inode to claim a + block that was then claimed later. There's a similar + problem with meta-data blocks that are referenced by + inode bmaps that are going to be freed once the inode + (or directory) gets cleared. + + So at this point, we collect the duplicate blocks into + extents and put them into the duplicate extent list. + + Mark the ag header blocks as in use. + + We then process each inode twice -- the first time + we check to see if the inode claims a duplicate extent + and we do NOT set the block bitmap. If the inode claims + a duplicate extent, we clear the inode. Since the bitmap + hasn't been set, that automatically frees all blocks associated + with the cleared inode. If the inode is ok, process it a second + time and set the bitmap since we know that this inode will live. + + The unlinked list gets cleared in every inode at this point as + well. We no longer need to preserve it since we've discovered + every inode we're going to find from it. + + verify existence of root inode. if it exists, check for + existence of "lost+found". If it exists, mark the entry + to be deleted, and clear the inode. All the inodes that + were connected to the lost+found will be reconnected later. + +XXX HEURISTIC -- if we blow an inode away that has space, + assume that the freespace btree is now out of wack. + If it was ok earlier, it's certain to be wrong now. + And the odds of this space free cancelling out the + existing error is so small I'm willing to ignore it. + Should probably do this via a global var and complain + about this later. + + Clear the quota inodes if the inode btree says that + they're not in use. The space freed will get picked + up by phase 5. + +XXX Clear the quota inodes if the filesystem is being downgraded. + +- Phase 5 - Build inode allocation trees, freespace trees and + agfl's for each ag. After this, we should be able to + unmount the filesystem and remount it for real. + + For each ag: (if no in no_modify mode) + + scan bitmap first to figure out number of extents. + + calculate space required for all trees. Start with inode trees. + Setup the btree cursor which includes the list of preallocated + blocks. As a by-product, this will delete the extents required + for the inode tree from the incore extent tree. + + Calculate how many extents will be required to represent the + remaining free extent tree on disk (twice, one for bybno and + one for bycnt). You have to iterate on this because consuming + extents can alter the number of blocks required to represent + the remaining extents. If there's slop left over, you can + put it in the agfl though. + + Then, manually build the trees, agi, agfs, and agfls. + +XXX if in no_modify mode, scan the on-disk inode allocation + trees and compare against the incore versions. Don't have + to scan the freespace trees because we caught the problems + there in phase2 and phase3. But if we cleared any inodes + with space during phases 3 or 4, now is the time to complain. + +XXX - Free duplicate extent lists. ??? + +Assumptions: at this point, sim code having to do with inode + creation/modification/deletion and space allocation + work because the inode maps, space maps, and bmaps + for all files in the filesystem are good. The only + structures that are screwed up are the directory contents, + which means that lookup may not work for beans, the + root inode which exists but may be completely bogus and + the link counts on all inodes which may also be bogus. + + Free the bitmap, the freespace tree. + + Flash the incore inode tree over from parent list to having + full backpointers. + + realtime processing, if any -- + + (Skip to below if running in no_modify mode). + + Generate the realtime bitmap from the incore realtime + extent map and slam the info into the realtime bitmap + inode. Generate summary info from the realtime extent map. + +XXX if in no_modify mode, compare contents of realtime bitmap + inode to the incore realtime extent map. generate the + summary info from the incore realtime extent map. + compare against the contents of the realtime summary inode. + complain if bad. + + reset superblock counters, sync version numbers + +- Phase 6 - directory traversal -- check reference counts, + attach disconnected inodes, fix up bogus directories + + Assumptions: all on-disk space and inode trees are structurally + sound. Incore and on-disk inode trees agree on whether + an inode is in use. + + Directories are structurally sound. All hashvalues + are monotonically increasing and interior nodes are + correct so lookups work. All legal directory entries + point to inodes that are in use and exist. Shortform + directories are fine except that the links haven't been + checked for conflicts (cycles, ".." being correct, etc.). + Longform directories haven't been checked for those problems + either PLUS longform directories may still contain + entries beginning with '/'. No zero-length entries + exist (they've been deleted or converted to '/'). + + Root directory may or may not exist. orphange may + or may not exist. Contents of either may be completely + bogus. + + Entries may point to free or non-existent inodes. + + At this we point, we may need new incore structures and + may be able to trash an old one (like the filesystem + block map) + + If '/' is trashed, then reinitialize it. + + If no realtime inodes, make them and if necessary, slam the + summary info into the realtime summary + inode. Ditto with the realtime bitmap inode. + + Make orphanage (lost+found ???). + + Traverse each directory from '/' (unless it was created). + Check directory structure and each directory entry. + If the entry is bogus (points to a non-existent or + free inode, for example), mark that entry TBD. Maintain + link counts on all inodes. Currently, traversal is + depth-first. + + Mark every inode reached as "reached" (includes + bumping up link counts). + + If a entry points to a directory but the parent (..) + disagrees, then blow away the entry. if the directory + being pointed to winds up disconnected, it'll be moved + to the orphanage (and the link count incremented to + account for the link and the reached bit set then). + + If an entry points to a directory that we've already + reached, then some entry is bad and should be blown + away. It's easiest to blow away the current entry + plus since presumably the parent entry in the + reached directory points to another directory, + then it's far more likely that the current + entry is bogus (otherwise the parent should point + at it). + + If an entry points to a non-existent of free inode, + blow the entry away. + + Every time a good entry is encountered update the + link count for the inode that the entry points to. + + After traversal, scan incore inode map for directories not + reached. Go to first one and try and find it's root + by following .. entries. Once at root, run traversal + algorithm. When algorithm terminates, move subtree + root inode to the orphanage. Repeat as necessary + until all disconnected directories are attached. + + Move all disconnected inodes to orphanage. + +- Phase 7: reset reference counts if required. + + Now traverse the on-disk inodes again, and make sure on-disk + reference counts are correct. Reset if necessary. + + SKIP all unused inodes -- that also makes us + skip the orphanage inode which we think is + unused but is really used. However, the ref counts + on that should be right so that's ok. + +--- + +multiple TB xfs_repair + +modify above to work in a couple of AGs at a time. The bitmaps +should span only the current set of AGs. + +The key it scan the inode bmaps and keep a list of inodes +that span multiple AG sets and keep the list in a data structure +that's keyed off AG set # as well as inode # and also has a bit +to indicate whether or not the inode will be cleared. + +Then in each AG set, when doing duplicate extent processing, +you have to process all multi-AG-set inodes that claim blocks in +the current AG set. If there's a conflict, you mark clear the +inode in the current AG and you mark the multi-AG inode as +"to be cleared". + +After going through all AGs, you can clear the to-be-cleared +multi-AG-set inodes and pull them off the list. + +When building up the AG freespace trees, you walk the bmaps +of all multi-AG-set inodes that are in the AG-set and include +blocks claimed in the AG by the inode as used. + +This probably involves adding a phase 3-0 which would have to +check all the inodes to see which ones are multi-AG-set inodes +and set up the multi-AG-set inode data structure. Plus the +process_dinode routines may have to be altered just a bit +to do the right thing if running in tera-byte mode (call +out to routines that check the multi-AG-set inodes when +appropriate). + +To make things go faster, phase 3-0 could probably run +in parallel. It should be possible to run phases 2-5 +in parallel as well once the appropriate synchronization +is added to the incore routines and the static directory +leaf block bitmap is changed to be on the stack. + +Phase 7 probably can be in parallel as well. + +By in parallel, I mean that assuming that an AG-set +contains 4 AGs, you could run 4 threads, 1 per AG +in parallel to process the AG set. + +I don't see how phase 6 can be run in parallel though. + +And running Phase 8 in parallel is just silly. + diff --git a/repair/agheader.c b/repair/agheader.c new file mode 100644 index 000000000..0a4200f7c --- /dev/null +++ b/repair/agheader.c @@ -0,0 +1,432 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "globals.h" +#include "agheader.h" +#include "protos.h" +#include "err_protos.h" + +int +verify_set_agf(xfs_mount_t *mp, xfs_agf_t *agf, xfs_agnumber_t i) +{ + xfs_drfsbno_t agblocks; + int retval = 0; + + /* check common fields */ + + if (INT_GET(agf->agf_magicnum, ARCH_CONVERT) != XFS_AGF_MAGIC) { + retval = XR_AG_AGF; + do_warn("bad magic # 0x%x for agf %d\n", INT_GET(agf->agf_magicnum, ARCH_CONVERT), i); + + if (!no_modify) + INT_SET(agf->agf_magicnum, ARCH_CONVERT, XFS_AGF_MAGIC); + } + + if (!XFS_AGF_GOOD_VERSION(INT_GET(agf->agf_versionnum, ARCH_CONVERT))) { + retval = XR_AG_AGF; + do_warn("bad version # %d for agf %d\n", + INT_GET(agf->agf_versionnum, ARCH_CONVERT), i); + + if (!no_modify) + INT_SET(agf->agf_versionnum, ARCH_CONVERT, XFS_AGF_VERSION); + } + + if (INT_GET(agf->agf_seqno, ARCH_CONVERT) != i) { + retval = XR_AG_AGF; + do_warn("bad sequence # %d for agf %d\n", INT_GET(agf->agf_seqno, ARCH_CONVERT), i); + + if (!no_modify) + INT_SET(agf->agf_seqno, ARCH_CONVERT, i); + } + + if (INT_GET(agf->agf_length, ARCH_CONVERT) != mp->m_sb.sb_agblocks) { + if (i != mp->m_sb.sb_agcount - 1) { + retval = XR_AG_AGF; + do_warn("bad length %d for agf %d, should be %d\n", + INT_GET(agf->agf_length, ARCH_CONVERT), i, mp->m_sb.sb_agblocks); + if (!no_modify) + INT_SET(agf->agf_length, ARCH_CONVERT, mp->m_sb.sb_agblocks); + } else { + agblocks = mp->m_sb.sb_dblocks - + (xfs_drfsbno_t) mp->m_sb.sb_agblocks * i; + + if (INT_GET(agf->agf_length, ARCH_CONVERT) != agblocks) { + retval = XR_AG_AGF; + do_warn( + "bad length %d for agf %d, should be %llu\n", + INT_GET(agf->agf_length, ARCH_CONVERT), i, agblocks); + if (!no_modify) + INT_SET(agf->agf_length, ARCH_CONVERT, (xfs_agblock_t) agblocks); + } + } + } + + /* + * check first/last AGF fields. if need be, lose the free + * space in the AGFL, we'll reclaim it later. + */ + if (INT_GET(agf->agf_flfirst, ARCH_CONVERT) >= XFS_AGFL_SIZE) { + do_warn("flfirst %d in agf %d too large (max = %d)\n", + INT_GET(agf->agf_flfirst, ARCH_CONVERT), i, XFS_AGFL_SIZE); + if (!no_modify) + INT_ZERO(agf->agf_flfirst, ARCH_CONVERT); + } + + if (INT_GET(agf->agf_fllast, ARCH_CONVERT) >= XFS_AGFL_SIZE) { + do_warn("fllast %d in agf %d too large (max = %d)\n", + INT_GET(agf->agf_fllast, ARCH_CONVERT), i, XFS_AGFL_SIZE); + if (!no_modify) + INT_ZERO(agf->agf_fllast, ARCH_CONVERT); + } + + /* don't check freespace btrees -- will be checked by caller */ + + return(retval); +} + +int +verify_set_agi(xfs_mount_t *mp, xfs_agi_t *agi, xfs_agnumber_t i) +{ + xfs_drfsbno_t agblocks; + int retval = 0; + + /* check common fields */ + + if (INT_GET(agi->agi_magicnum, ARCH_CONVERT) != XFS_AGI_MAGIC) { + retval = XR_AG_AGI; + do_warn("bad magic # 0x%x for agi %d\n", INT_GET(agi->agi_magicnum, ARCH_CONVERT), i); + + if (!no_modify) + INT_SET(agi->agi_magicnum, ARCH_CONVERT, XFS_AGI_MAGIC); + } + + if (!XFS_AGI_GOOD_VERSION(INT_GET(agi->agi_versionnum, ARCH_CONVERT))) { + retval = XR_AG_AGI; + do_warn("bad version # %d for agi %d\n", + INT_GET(agi->agi_versionnum, ARCH_CONVERT), i); + + if (!no_modify) + INT_SET(agi->agi_versionnum, ARCH_CONVERT, XFS_AGI_VERSION); + } + + if (INT_GET(agi->agi_seqno, ARCH_CONVERT) != i) { + retval = XR_AG_AGI; + do_warn("bad sequence # %d for agi %d\n", INT_GET(agi->agi_seqno, ARCH_CONVERT), i); + + if (!no_modify) + INT_SET(agi->agi_seqno, ARCH_CONVERT, i); + } + + if (INT_GET(agi->agi_length, ARCH_CONVERT) != mp->m_sb.sb_agblocks) { + if (i != mp->m_sb.sb_agcount - 1) { + retval = XR_AG_AGI; + do_warn("bad length # %d for agi %d, should be %d\n", + INT_GET(agi->agi_length, ARCH_CONVERT), i, mp->m_sb.sb_agblocks); + if (!no_modify) + INT_SET(agi->agi_length, ARCH_CONVERT, mp->m_sb.sb_agblocks); + } else { + agblocks = mp->m_sb.sb_dblocks - + (xfs_drfsbno_t) mp->m_sb.sb_agblocks * i; + + if (INT_GET(agi->agi_length, ARCH_CONVERT) != agblocks) { + retval = XR_AG_AGI; + do_warn( + "bad length # %d for agi %d, should be %llu\n", + INT_GET(agi->agi_length, ARCH_CONVERT), i, agblocks); + if (!no_modify) + INT_SET(agi->agi_length, ARCH_CONVERT, (xfs_agblock_t) agblocks); + } + } + } + + /* don't check inode btree -- will be checked by caller */ + + return(retval); +} + +/* + * superblock comparison - compare arbitrary superblock with + * filesystem mount-point superblock + * + * the verified fields include id and geometry. + + * the inprogress fields, version numbers, and counters + * are allowed to differ as well as all fields after the + * counters to cope with the pre-6.5 mkfs non-bzeroed + * secondary superblock sectors. + */ + +int +compare_sb(xfs_mount_t *mp, xfs_sb_t *sb) +{ + fs_geometry_t fs_geo, sb_geo; + + get_sb_geometry(&fs_geo, &mp->m_sb); + get_sb_geometry(&sb_geo, sb); + + if (memcmp(&fs_geo, &sb_geo, + (char *) &fs_geo.sb_shared_vn - (char *) &fs_geo)) + return(XR_SB_GEO_MISMATCH); + + return(XR_OK); +} + +/* + * possible fields that may have been set at mkfs time, + * sb_inoalignmt, sb_unit, sb_width. We know that + * the quota inode fields in the secondaries should be zero. + * Likewise, the sb_flags and sb_shared_vn should also be + * zero and the shared version bit should be cleared for + * current mkfs's. + * + * And everything else in the buffer beyond sb_width should + * be zeroed. + */ +int +secondary_sb_wack(xfs_mount_t *mp, xfs_buf_t *sbuf, xfs_sb_t *sb, + xfs_agnumber_t i) +{ + int do_bzero; + int size; + int *ip; + int rval; + + rval = do_bzero = 0; + + /* + * mkfs's that stamped a feature bit besides the ones in the mask + * (e.g. were pre-6.5 beta) could leave garbage in the secondary + * superblock sectors. Anything stamping the shared fs bit or better + * into the secondaries is ok and should generate clean secondary + * superblock sectors. so only run the bzero check on the + * potentially garbaged secondaries. + */ + if (pre_65_beta || + (sb->sb_versionnum & XR_GOOD_SECSB_VNMASK) == 0 || + sb->sb_versionnum < XFS_SB_VERSION_4) { + /* + * check for garbage beyond the last field set by the + * pre-6.5 mkfs's. Don't blindly use sizeof(sb). + * Use field addresses instead so this code will still + * work against older filesystems when the superblock + * gets rev'ed again with new fields appended. + */ + size = (__psint_t)&sb->sb_width + sizeof(sb->sb_width) + - (__psint_t)sb; + for (ip = (int *)((__psint_t)sb + size); + ip < (int *)((__psint_t)sb + mp->m_sb.sb_sectsize); + ip++) { + if (*ip) { + do_bzero = 1; + break; + } + } + + if (do_bzero) { + rval |= XR_AG_SB_SEC; + if (!no_modify) { + do_warn( + "zeroing unused portion of secondary superblock %d sector\n", + i); + bzero((void *)((__psint_t)sb + size), + mp->m_sb.sb_sectsize - size); + } else + do_warn( + "would zero unused portion of secondary superblock %d sector\n", + i); + } + } + + /* + * now look for the fields we can manipulate directly. + * if we did a bzero and that bzero could have included + * the field in question, just silently reset it. otherwise, + * complain. + * + * for now, just zero the flags field since only + * the readonly flag is used + */ + if (sb->sb_flags) { + if (!no_modify) + sb->sb_flags = 0; + if (sb->sb_versionnum & XR_PART_SECSB_VNMASK || !do_bzero) { + rval |= XR_AG_SB; + do_warn("bad flags field in superblock %d\n", i); + } else + rval |= XR_AG_SB_SEC; + } + + /* + * quota inodes and flags in secondary superblocks + * are never set by mkfs. However, they could be set + * in a secondary if a fs with quotas was growfs'ed since + * growfs copies the new primary into the secondaries. + */ + if (sb->sb_inprogress == 1 && sb->sb_uquotino) { + if (!no_modify) + sb->sb_uquotino = 0; + if (sb->sb_versionnum & XR_PART_SECSB_VNMASK || !do_bzero) { + rval |= XR_AG_SB; + do_warn( + "non-null user quota inode field in superblock %d\n", + i); + } else + rval |= XR_AG_SB_SEC; + } + + if (sb->sb_inprogress == 1 && sb->sb_pquotino) { + if (!no_modify) + sb->sb_pquotino = 0; + if (sb->sb_versionnum & XR_PART_SECSB_VNMASK || !do_bzero) { + rval |= XR_AG_SB; + do_warn( + "non-null project quota inode field in superblock %d\n", + i); + } else + rval |= XR_AG_SB_SEC; + } + + if (sb->sb_inprogress == 1 && sb->sb_qflags) { + if (!no_modify) + sb->sb_qflags = 0; + if (sb->sb_versionnum & XR_PART_SECSB_VNMASK || !do_bzero) { + rval |= XR_AG_SB; + do_warn("non-null quota flags in superblock %d\n", i); + } else + rval |= XR_AG_SB_SEC; + } + + /* + * if the secondaries agree on a stripe unit/width or inode + * alignment, those fields ought to be valid since they are + * written at mkfs time (and the corresponding sb version bits + * are set). + */ + if (!XFS_SB_VERSION_HASSHARED(sb) && sb->sb_shared_vn != 0) { + if (!no_modify) + sb->sb_shared_vn = 0; + if (sb->sb_versionnum & XR_PART_SECSB_VNMASK || !do_bzero) { + rval |= XR_AG_SB; + do_warn("bad shared version number in superblock %d\n", + i); + } else + rval |= XR_AG_SB_SEC; + } + + if (!XFS_SB_VERSION_HASALIGN(sb) && sb->sb_inoalignmt != 0) { + if (!no_modify) + sb->sb_inoalignmt = 0; + if (sb->sb_versionnum & XR_PART_SECSB_VNMASK || !do_bzero) { + rval |= XR_AG_SB; + do_warn("bad inode alignment field in superblock %d\n", + i); + } else + rval |= XR_AG_SB_SEC; + } + + if (!XFS_SB_VERSION_HASDALIGN(sb) && + (sb->sb_unit != 0 || sb->sb_width != 0)) { + if (!no_modify) + sb->sb_unit = sb->sb_width = 0; + if (sb->sb_versionnum & XR_GOOD_SECSB_VNMASK || !do_bzero) { + rval |= XR_AG_SB; + do_warn( + "bad stripe unit/width fields in superblock %d\n", + i); + } else + rval |= XR_AG_SB_SEC; + } + + return(rval); +} + +/* + * verify and reset the ag header if required. + * + * lower 4 bits of rval are set depending on what got modified. + * (see agheader.h for more details) + * + * NOTE -- this routine does not tell the user that it has + * altered things. Rather, it is up to the caller to do so + * using the bits encoded into the return value. + */ + +int +verify_set_agheader(xfs_mount_t *mp, xfs_buf_t *sbuf, xfs_sb_t *sb, + xfs_agf_t *agf, xfs_agi_t *agi, xfs_agnumber_t i) +{ + int rval = 0; + int status = XR_OK; + int status_sb = XR_OK; + + status = verify_sb(sb, (i == 0)); + + if (status != XR_OK) { + do_warn("bad on-disk superblock %d - %s\n", + i, err_string(status)); + } + + status_sb = compare_sb(mp, sb); + + if (status_sb != XR_OK) { + do_warn("primary and secondary superblock %d conflict - %s\n", + i, err_string(status_sb)); + } + + if (status != XR_OK || status_sb != XR_OK) { + if (!no_modify) { + *sb = mp->m_sb; + + /* + * clear the more transient fields + */ + sb->sb_inprogress = 1; + + sb->sb_icount = 0; + sb->sb_ifree = 0; + sb->sb_fdblocks = 0; + sb->sb_frextents = 0; + + sb->sb_qflags = 0; + } + + rval |= XR_AG_SB; + } + + rval |= secondary_sb_wack(mp, sbuf, sb, i); + + rval |= verify_set_agf(mp, agf, i); + rval |= verify_set_agi(mp, agi, i); + + return(rval); +} diff --git a/repair/agheader.h b/repair/agheader.h new file mode 100644 index 000000000..48326f7e8 --- /dev/null +++ b/repair/agheader.h @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +typedef struct fs_geometry { + /* + * these types should match the superblock types + */ + __uint32_t sb_blocksize; /* blocksize (bytes) */ + xfs_drfsbno_t sb_dblocks; /* # data blocks */ + xfs_drfsbno_t sb_rblocks; /* # realtime blocks */ + xfs_drtbno_t sb_rextents; /* # realtime extents */ + uuid_t sb_uuid; /* fs uuid */ + xfs_dfsbno_t sb_logstart; /* starting log block # */ + xfs_agblock_t sb_rextsize; /* realtime extent size (blocks )*/ + xfs_agblock_t sb_agblocks; /* # of blocks per ag */ + xfs_agnumber_t sb_agcount; /* # of ags */ + xfs_extlen_t sb_rbmblocks; /* # of rt bitmap blocks */ + xfs_extlen_t sb_logblocks; /* # of log blocks */ + __uint16_t sb_sectsize; /* volume sector size (bytes) */ + __uint16_t sb_inodesize; /* inode size (bytes) */ + __uint8_t sb_imax_pct; /* max % of fs for inode space */ + + /* + * these don't have to match the superblock types but are placed + * before sb_shared_vn because these values don't have to be + * checked manually. These variables will be set only on + * filesystems with dependably good (fully initialized) + * secondary superblock sectors, will be stamped in all + * superblocks at mkfs time, and are features that cannot + * be downgraded unless all superblocks in the filesystem + * are rewritten. + */ + int sb_extflgbit; /* extent flag feature bit set */ + + /* + * fields after this point have to be checked manually in compare_sb() + */ + __uint8_t sb_shared_vn; /* shared version number */ + xfs_extlen_t sb_inoalignmt; /* inode chunk alignment, fsblocks */ + __uint32_t sb_unit; /* stripe or raid unit */ + __uint32_t sb_width; /* stripe or width unit */ + + /* + * these don't have to match, they track superblock properties + * that could have been upgraded and/or downgraded during + * run-time so that the primary superblock has them but the + * secondaries do not. + * Plus, they have associated data fields whose data fields may + * be corrupt in cases where the filesystem was made on a + * pre-6.5 campus alpha mkfs and the feature was enabled on + * the filesystem later. + */ + int sb_ialignbit; /* sb has inode alignment bit set */ + int sb_salignbit; /* sb has stripe alignment bit set */ + int sb_sharedbit; /* sb has inode alignment bit set */ + + int sb_fully_zeroed; /* has zeroed secondary sb sectors */ +} fs_geometry_t; + +typedef struct fs_geo_list { + struct fs_geo_list *next; + int refs; + int index; + fs_geometry_t geo; +} fs_geo_list_t; + +/* + * fields for sb_last_nonzero + */ + +#define XR_SB_COUNTERS 0x0001 +#define XR_SB_INOALIGN 0x0002 +#define XR_SB_SALIGN 0x0004 + +/* + * what got modified by verify_set_* routines + */ + +#define XR_AG_SB 0x1 +#define XR_AG_AGF 0x2 +#define XR_AG_AGI 0x4 +#define XR_AG_SB_SEC 0x8 + + diff --git a/repair/attr_repair.c b/repair/attr_repair.c new file mode 100644 index 000000000..d64230b09 --- /dev/null +++ b/repair/attr_repair.c @@ -0,0 +1,1067 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include +#include + +#include "globals.h" +#include "err_protos.h" +#include "dir.h" +#include "dinode.h" +#include "bmap.h" + +static int acl_valid(struct acl *aclp); +static int mac_valid(mac_t lp); + + +/* + * For attribute repair, there are 3 formats to worry about. First, is + * shortform attributes which reside in the inode. Second is the leaf + * form, and lastly the btree. Much of this models after the directory + * structure so code resembles the directory repair cases. + * For shortform case, if an attribute looks corrupt, it is removed. + * If that leaves the shortform down to 0 attributes, it's okay and + * will appear to just have a null attribute fork. Some checks are done + * for validity of the value field based on what the security needs are. + * Calls will be made out to mac_valid or acl_valid libc libraries if + * the security attributes exist. They will be cleared if invalid. No + * other values will be checked. The DMF folks do not have current + * requirements, but may in the future. + * + * For leaf block attributes, it requires more processing. One sticky + * point is that the attributes can be local (within the leaf) or + * remote (outside the leaf in other blocks). Thinking of local only + * if you get a bad attribute, and want to delete just one, its a-okay + * if it remains large enough to still be a leaf block attribute. Otherwise, + * it may have to be converted to shortform. How to convert this and when + * is an issue. This call is happening in Phase3. Phase5 will capture empty + * blocks, but Phase6 allows you to use the simulation library which knows + * how to handle attributes in the kernel for converting formats. What we + * could do is mark an attribute to be cleared now, but in phase6 somehow + * have it cleared for real and then the format changed to shortform if + * applicable. Since this requires more work than I anticipate can be + * accomplished for the next release, we will instead just say any bad + * attribute in the leaf block will make the entire attribute fork be + * cleared. The simplest way to do that is to ignore the leaf format, and + * call clear_dinode_attr to just make a shortform attribute fork with + * zero entries. + * + * Another issue with handling repair on leaf attributes is the remote + * blocks. To make sure that they look good and are not used multiple times + * by the attribute fork, some mechanism to keep track of all them is necessary. + * Do this in the future, time permitting. For now, note that there is no + * check for remote blocks and their allocations. + * + * For btree formatted attributes, the model can follow directories. That + * would mean go down the tree to the leftmost leaf. From there moving down + * the links and processing each. They would call back up the tree, to verify + * that the tree structure is okay. Any problems will result in the attribute + * fork being emptied and put in shortform format. + */ + +/* + * This routine just checks what security needs are for attribute values + * only called when root flag is set, otherwise these names could exist in + * in user attribute land without a conflict. + * If value is non-zero, then a remote attribute is being passed in + */ + +int +valuecheck(char *namevalue, char *value, int namelen, int valuelen) +{ + /* for proper alignment issues, get the structs and bcopy the values */ + mac_label macl; + struct acl thisacl; + void *valuep; + int clearit = 0; + + if ((strncmp(namevalue, SGI_ACL_FILE, SGI_ACL_FILE_SIZE) == 0) || + (strncmp(namevalue, SGI_ACL_DEFAULT, + SGI_ACL_DEFAULT_SIZE) == 0)) { + if (value == NULL) { + bzero(&thisacl, sizeof(struct acl)); + bcopy(namevalue+namelen, &thisacl, valuelen); + valuep = &thisacl; + } else + valuep = value; + + if (acl_valid((struct acl *) valuep) != 0) { /* 0 means valid */ + clearit = 1; + do_warn("entry contains illegal value in attribute named SGI_ACL_FILE or SGI_ACL_DEFAULT\n"); + } + } else if (strncmp(namevalue, SGI_MAC_FILE, SGI_MAC_FILE_SIZE) == 0) { + if (value == NULL) { + bzero(&macl, sizeof(mac_label)); + bcopy(namevalue+namelen, &macl, valuelen); + valuep = &macl; + } else + valuep = value; + + if (mac_valid((mac_label *) valuep) != 1) { /* 1 means valid */ + /* + *if sysconf says MAC enabled, + * temp = mac_from_text("msenhigh/mintlow", NULL) + * copy it to value, update valuelen, totsize + * This causes pushing up or down of all following + * attributes, forcing a attribute format change!! + * else clearit = 1; + */ + clearit = 1; + do_warn("entry contains illegal value in attribute named SGI_MAC_LABEL\n"); + } + } else if (strncmp(namevalue, SGI_CAP_FILE, SGI_CAP_FILE_SIZE) == 0) { + if ( valuelen != sizeof(cap_set_t)) { + clearit = 1; + do_warn("entry contains illegal value in attribute named SGI_CAP_FILE\n"); + } + } + + return(clearit); +} + + +/* + * this routine validates the attributes in shortform format. + * a non-zero return repair value means certain attributes are bogus + * and were cleared if possible. Warnings do not generate error conditions + * if you cannot modify the structures. repair is set to 1, if anything + * was fixed. + */ +int +process_shortform_attr( + xfs_ino_t ino, + xfs_dinode_t *dip, + int *repair) +{ + xfs_attr_shortform_t *asf; + xfs_attr_sf_entry_t *currententry, *nextentry, *tempentry; + int i, junkit; + int currentsize, remainingspace; + + *repair = 0; + + asf = (xfs_attr_shortform_t *) XFS_DFORK_APTR_ARCH(dip, ARCH_CONVERT); + + /* Assumption: hdr.totsize is less than a leaf block and was checked + * by lclinode for valid sizes. Check the count though. + */ + if (INT_GET(asf->hdr.count, ARCH_CONVERT) == 0) + /* then the total size should just be the header length */ + if (INT_GET(asf->hdr.totsize, ARCH_CONVERT) != sizeof(xfs_attr_sf_hdr_t)) { + /* whoops there's a discrepancy. Clear the hdr */ + if (!no_modify) { + do_warn("there are no attributes in the fork for inode %llu \n", ino); + INT_SET(asf->hdr.totsize, ARCH_CONVERT, + sizeof(xfs_attr_sf_hdr_t)); + *repair = 1; + return(1); + } else { + do_warn("would junk the attribute fork since the count is 0 for inode %llu\n",ino); + return(1); + } + } + + currentsize = sizeof(xfs_attr_sf_hdr_t); + remainingspace = INT_GET(asf->hdr.totsize, ARCH_CONVERT) - currentsize; + nextentry = &asf->list[0]; + for (i = 0; i < INT_GET(asf->hdr.count, ARCH_CONVERT); i++) { + currententry = nextentry; + junkit = 0; + + /* don't go off the end if the hdr.count was off */ + if ((currentsize + (sizeof(xfs_attr_sf_entry_t) - 1)) > + INT_GET(asf->hdr.totsize, ARCH_CONVERT)) + break; /* get out and reset count and totSize */ + + /* if the namelen is 0, can't get to the rest of the entries */ + if (INT_GET(currententry->namelen, ARCH_CONVERT) == 0) { + do_warn("zero length name entry in attribute fork, "); + if (!no_modify) { + do_warn("truncating attributes for inode %llu to %d \n", ino, i); + *repair = 1; + break; /* and then update hdr fields */ + } else { + do_warn("would truncate attributes for inode %llu to %d \n", ino, i); + break; + } + } else { + /* It's okay to have a 0 length valuelen, but do a + * rough check to make sure we haven't gone outside of + * totsize. + */ + if ((remainingspace < INT_GET(currententry->namelen, ARCH_CONVERT)) || + ((remainingspace - INT_GET(currententry->namelen, ARCH_CONVERT)) + < INT_GET(currententry->valuelen, ARCH_CONVERT))) { + do_warn("name or value attribute lengths are too large, \n"); + if (!no_modify) { + do_warn(" truncating attributes for inode %llu to %d \n", ino, i); + *repair = 1; + break; /* and then update hdr fields */ + } else { + do_warn(" would truncate attributes for inode %llu to %d \n", ino, i); + break; + } + } + } + + /* namecheck checks for / and null terminated for file names. + * attributes names currently follow the same rules. + */ + if (namecheck((char *)¤tentry->nameval[0], + INT_GET(currententry->namelen, ARCH_CONVERT))) { + do_warn("entry contains illegal character in shortform attribute name\n"); + junkit = 1; + } + + if (INT_GET(currententry->flags, ARCH_CONVERT) & XFS_ATTR_INCOMPLETE) { + do_warn("entry has INCOMPLETE flag on in shortform attribute\n"); + junkit = 1; + } + + /* Only check values for root security attributes */ + if (INT_GET(currententry->flags, ARCH_CONVERT) & XFS_ATTR_ROOT) + junkit = valuecheck((char *)¤tentry->nameval[0], NULL, + INT_GET(currententry->namelen, ARCH_CONVERT), INT_GET(currententry->valuelen, ARCH_CONVERT)); + + remainingspace = remainingspace - + XFS_ATTR_SF_ENTSIZE(currententry); + + if (junkit) { + if (!no_modify) { + /* get rid of only this entry */ + do_warn("removing attribute entry %d for inode %llu \n", i, ino); + tempentry = (xfs_attr_sf_entry_t *) + ((__psint_t) currententry + + XFS_ATTR_SF_ENTSIZE(currententry)); + memmove(currententry,tempentry,remainingspace); + INT_MOD(asf->hdr.count, ARCH_CONVERT, -1); + i--; /* no worries, it will wrap back to 0 */ + *repair = 1; + continue; /* go back up now */ + } else { + do_warn("would remove attribute entry %d for inode %llu \n", i, ino); + } + } + + /* Let's get ready for the next entry... */ + nextentry = (xfs_attr_sf_entry_t *) + ((__psint_t) nextentry + + XFS_ATTR_SF_ENTSIZE(currententry)); + currentsize = currentsize + XFS_ATTR_SF_ENTSIZE(currententry); + + } /* end the loop */ + + + if (INT_GET(asf->hdr.count, ARCH_CONVERT) != i) { + if (no_modify) { + do_warn("would have corrected attribute entry count in inode %llu from %d to %d\n", + ino, INT_GET(asf->hdr.count, ARCH_CONVERT), i); + } else { + do_warn("corrected attribute entry count in inode %llu, was %d, now %d\n", + ino, INT_GET(asf->hdr.count, ARCH_CONVERT), i); + INT_SET(asf->hdr.count, ARCH_CONVERT, i); + *repair = 1; + } + } + + /* ASSUMPTION: currentsize <= totsize */ + if (INT_GET(asf->hdr.totsize, ARCH_CONVERT) != currentsize) { + if (no_modify) { + do_warn("would have corrected attribute totsize in inode %llu from %d to %d\n", + ino, INT_GET(asf->hdr.totsize, ARCH_CONVERT), currentsize); + } else { + do_warn("corrected attribute entry totsize in inode %llu, was %d, now %d\n", + ino, INT_GET(asf->hdr.totsize, ARCH_CONVERT), currentsize); + INT_SET(asf->hdr.totsize, ARCH_CONVERT, currentsize); + *repair = 1; + } + } + + return(*repair); +} + +/* This routine brings in blocks from disk one by one and assembles them + * in the value buffer. If get_bmapi gets smarter later to return an extent + * or list of extents, that would be great. For now, we don't expect too + * many blocks per remote value, so one by one is sufficient. + */ +static int +rmtval_get(xfs_mount_t *mp, xfs_ino_t ino, blkmap_t *blkmap, + xfs_dablk_t blocknum, int valuelen, char* value) +{ + xfs_dfsbno_t bno; + xfs_buf_t *bp; + int clearit = 0, i = 0, length = 0, amountdone = 0; + + /* ASSUMPTION: valuelen is a valid number, so use it for looping */ + /* Note that valuelen is not a multiple of blocksize */ + while (amountdone < valuelen) { + bno = blkmap_get(blkmap, blocknum + i); + if (bno == NULLDFSBNO) { + do_warn("remote block for attributes of inode %llu" + " is missing\n", ino); + clearit = 1; + break; + } + bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, bno), + XFS_FSB_TO_BB(mp, 1), 0); + if (!bp) { + do_warn("can't read remote block for attributes" + " of inode %llu\n", ino); + clearit = 1; + break; + } + ASSERT(mp->m_sb.sb_blocksize == XFS_BUF_COUNT(bp)); + length = MIN(XFS_BUF_COUNT(bp), valuelen - amountdone); + bcopy(XFS_BUF_PTR(bp), value, length); + amountdone += length; + value += length; + i++; + libxfs_putbuf(bp); + } + return (clearit); +} + +/* + * freespace map for directory and attribute leaf blocks (1 bit per byte) + * 1 == used, 0 == free + */ +static da_freemap_t attr_freemap[DA_BMAP_SIZE]; + +/* The block is read in. The magic number and forward / backward + * links are checked by the caller process_leaf_attr. + * If any problems occur the routine returns with non-zero. In + * this case the next step is to clear the attribute fork, by + * changing it to shortform and zeroing it out. Forkoff need not + * be changed. + */ + +int +process_leaf_attr_block( + xfs_mount_t *mp, + xfs_attr_leafblock_t *leaf, + xfs_dablk_t da_bno, + xfs_ino_t ino, + blkmap_t *blkmap, + xfs_dahash_t last_hashval, + xfs_dahash_t *current_hashval, + int *repair) +{ + xfs_attr_leaf_entry_t *entry; + xfs_attr_leaf_name_local_t *local; + xfs_attr_leaf_name_remote_t *remotep; + int i, start, stop, clearit, usedbs, firstb, thissize; + + clearit = usedbs = 0; + *repair = 0; + firstb = mp->m_sb.sb_blocksize; + stop = sizeof(xfs_attr_leaf_hdr_t); + + /* does the count look sorta valid? */ + if (INT_GET(leaf->hdr.count, ARCH_CONVERT) + * sizeof(xfs_attr_leaf_entry_t) + + sizeof(xfs_attr_leaf_hdr_t) + > XFS_LBSIZE(mp)) { + do_warn("bad attribute count %d in attr block %u, inode %llu\n", + (int) INT_GET(leaf->hdr.count, ARCH_CONVERT), + da_bno, ino); + return (1); + } + + init_da_freemap(attr_freemap); + (void) set_da_freemap(mp, attr_freemap, 0, stop); + + /* go thru each entry checking for problems */ + for (i = 0, entry = &leaf->entries[0]; + i < INT_GET(leaf->hdr.count, ARCH_CONVERT); + i++, entry++) { + + /* check if index is within some boundary. */ + if (INT_GET(entry->nameidx, ARCH_CONVERT) > XFS_LBSIZE(mp)) { + do_warn("bad attribute nameidx %d in attr block %u, inode %llu\n", + (int)INT_GET(entry->nameidx, ARCH_CONVERT), + da_bno,ino); + clearit = 1; + break; + } + + if (INT_GET(entry->flags, ARCH_CONVERT) & XFS_ATTR_INCOMPLETE) { + /* we are inconsistent state. get rid of us */ + do_warn("attribute entry #%d in attr block %u, inode %llu is INCOMPLETE\n", + i, da_bno, ino); + clearit = 1; + break; + } + + /* mark the entry used */ + start = (__psint_t)&leaf->entries[i] - (__psint_t)leaf; + stop = start + sizeof(xfs_attr_leaf_entry_t); + if (set_da_freemap(mp, attr_freemap, start, stop)) { + do_warn("attribute entry %d in attr block %u, inode %llu claims already used space\n", + i,da_bno,ino); + clearit = 1; + break; /* got an overlap */ + } + + if (INT_GET(entry->flags, ARCH_CONVERT) & XFS_ATTR_LOCAL) { + + local = XFS_ATTR_LEAF_NAME_LOCAL(leaf, i); + if ((INT_GET(local->namelen, ARCH_CONVERT) == 0) || + (namecheck((char *)&local->nameval[0], + INT_GET(local->namelen, ARCH_CONVERT)))) { + do_warn("attribute entry %d in attr block %u, inode %llu has bad name (namelen = %d)\n", + i, da_bno, ino, (int) INT_GET(local->namelen, ARCH_CONVERT)); + + clearit = 1; + break; + }; + + /* Check on the hash value. Checking ordering of hash values + * is not necessary, since one wrong one clears the whole + * fork. If the ordering's wrong, it's caught here or + * the kernel code has a bug with transaction logging + * or attributes itself. For paranoia reasons, let's check + * ordering anyway in case both the name value and the + * hashvalue were wrong but matched. Unlikely, however. + */ + if (INT_GET(entry->hashval, ARCH_CONVERT) != + libxfs_da_hashname((char *)&local->nameval[0], + INT_GET(local->namelen, ARCH_CONVERT)) || + (INT_GET(entry->hashval, ARCH_CONVERT) + < last_hashval)) { + do_warn("bad hashvalue for attribute entry %d in attr block %u, inode %llu\n", + i, da_bno, ino); + clearit = 1; + break; + } + + /* Only check values for root security attributes */ + if (INT_GET(entry->flags, ARCH_CONVERT) & XFS_ATTR_ROOT) + if (valuecheck((char *)&local->nameval[0], NULL, + INT_GET(local->namelen, ARCH_CONVERT), INT_GET(local->valuelen, ARCH_CONVERT))) { + do_warn("bad security value for attribute entry %d in attr block %u, inode %llu\n", + i,da_bno,ino); + clearit = 1; + break; + }; + thissize = XFS_ATTR_LEAF_ENTSIZE_LOCAL( + INT_GET(local->namelen, ARCH_CONVERT), INT_GET(local->valuelen, ARCH_CONVERT)); + + } else { + /* do the remote case */ + remotep = XFS_ATTR_LEAF_NAME_REMOTE(leaf, i); + thissize = XFS_ATTR_LEAF_ENTSIZE_REMOTE( + INT_GET(remotep->namelen, ARCH_CONVERT)); + + if ((INT_GET(remotep->namelen, ARCH_CONVERT) == 0) || + (namecheck((char *)&remotep->name[0], + INT_GET(remotep->namelen, ARCH_CONVERT))) || + (INT_GET(entry->hashval, ARCH_CONVERT) + != libxfs_da_hashname( + (char *)&remotep->name[0], + INT_GET(remotep->namelen, ARCH_CONVERT))) || + (INT_GET(entry->hashval, ARCH_CONVERT) + < last_hashval) || + (INT_GET(remotep->valueblk, ARCH_CONVERT) == 0)) { + do_warn("inconsistent remote attribute entry %d in attr block %u, ino %llu\n", + i, da_bno, ino); + clearit = 1; + break; + }; + + if (INT_GET(entry->flags, ARCH_CONVERT) & XFS_ATTR_ROOT) { + char* value; + if ((value = malloc(INT_GET(remotep->valuelen, ARCH_CONVERT)))==NULL){ + do_warn("cannot malloc enough for remotevalue attribute for inode %llu\n",ino); + do_warn("SKIPPING this remote attribute\n"); + continue; + } + if (rmtval_get(mp, ino, blkmap, + INT_GET(remotep->valueblk, ARCH_CONVERT), + INT_GET(remotep->valuelen, ARCH_CONVERT), value)) { + do_warn("remote attribute get failed for entry %d, inode %llu\n", i,ino); + clearit = 1; + free(value); + break; + } + if (valuecheck((char *)&remotep->name[0], value, + INT_GET(remotep->namelen, ARCH_CONVERT), INT_GET(remotep->valuelen, ARCH_CONVERT))){ + do_warn("remote attribute value check failed for entry %d, inode %llu\n", i, ino); + clearit = 1; + free(value); + break; + } + free(value); + } + } + + *current_hashval = last_hashval + = INT_GET(entry->hashval, ARCH_CONVERT); + + if (set_da_freemap(mp, attr_freemap, INT_GET(entry->nameidx, ARCH_CONVERT), + INT_GET(entry->nameidx, ARCH_CONVERT) + thissize)) { + do_warn("attribute entry %d in attr block %u, inode %llu claims used space\n", + i, da_bno, ino); + clearit = 1; + break; /* got an overlap */ + } + usedbs += thissize; + if (INT_GET(entry->nameidx, ARCH_CONVERT) < firstb) + firstb = INT_GET(entry->nameidx, ARCH_CONVERT); + + } /* end the loop */ + + if (!clearit) { + /* verify the header information is correct */ + + /* if the holes flag is set, don't reset first_used unless it's + * pointing to used bytes. we're being conservative here + * since the block will get compacted anyhow by the kernel. + */ + + if ( (INT_GET(leaf->hdr.holes, ARCH_CONVERT) == 0 + && firstb != INT_GET(leaf->hdr.firstused, ARCH_CONVERT)) + || INT_GET(leaf->hdr.firstused, ARCH_CONVERT) > firstb) { + if (!no_modify) { + do_warn("- resetting first used heap value from %d to %d in block %u of attribute fork of inode %llu\n", + (int)INT_GET(leaf->hdr.firstused, + ARCH_CONVERT), firstb, + da_bno, ino); + INT_SET(leaf->hdr.firstused, + ARCH_CONVERT, firstb); + *repair = 1; + } else { + do_warn("- would reset first used value from %d to %d in block %u of attribute fork of inode %llu\n", + (int)INT_GET(leaf->hdr.firstused, + ARCH_CONVERT), firstb, + da_bno, ino); + } + } + + if (usedbs != INT_GET(leaf->hdr.usedbytes, ARCH_CONVERT)) { + if (!no_modify) { + do_warn("- resetting usedbytes cnt from %d to %d in block %u of attribute fork of inode %llu\n", + (int)INT_GET(leaf->hdr.usedbytes, + ARCH_CONVERT), usedbs, da_bno, ino); + INT_SET(leaf->hdr.usedbytes, + ARCH_CONVERT, usedbs); + *repair = 1; + } else { + do_warn("- would reset usedbytes cnt from %d to %d in block %u of attribute fork of %llu\n", + (int)INT_GET(leaf->hdr.usedbytes, + ARCH_CONVERT), usedbs,da_bno,ino); + } + } + + /* there's a lot of work in process_leaf_dir_block to go thru + * checking for holes and compacting if appropiate. I don't think + * attributes need all that, so let's just leave the holes. If + * we discover later that this is a good place to do compaction + * we can add it then. + */ + } + return (clearit); /* and repair */ +} + + +/* + * returns 0 if the attribute fork is ok, 1 if it has to be junked. + */ +int +process_leaf_attr_level(xfs_mount_t *mp, + da_bt_cursor_t *da_cursor) +{ + int repair; + xfs_attr_leafblock_t *leaf; + xfs_buf_t *bp; + xfs_ino_t ino; + xfs_dfsbno_t dev_bno; + xfs_dablk_t da_bno; + xfs_dablk_t prev_bno; + xfs_dahash_t current_hashval = 0; + xfs_dahash_t greatest_hashval; + + da_bno = da_cursor->level[0].bno; + ino = da_cursor->ino; + prev_bno = 0; + + do { + repair = 0; + dev_bno = blkmap_get(da_cursor->blkmap, da_bno); + /* + * 0 is the root block and no block + * pointer can point to the root block of the btree + */ + ASSERT(da_bno != 0); + + if (dev_bno == NULLDFSBNO) { + do_warn("can't map block %u for attribute fork " + "for inode %llu\n", da_bno, ino); + goto error_out; + } + + bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, dev_bno), + XFS_FSB_TO_BB(mp, 1), 0); + if (!bp) { + do_warn("can't read file block %u (fsbno %llu) for" + " attribute fork of inode %llu\n", + da_bno, dev_bno, ino); + goto error_out; + } + + leaf = (xfs_attr_leafblock_t *)XFS_BUF_PTR(bp); + + /* check magic number for leaf directory btree block */ + if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) + != XFS_ATTR_LEAF_MAGIC) { + do_warn("bad attribute leaf magic %#x for inode %llu\n", + leaf->hdr.info.magic, ino); + libxfs_putbuf(bp); + goto error_out; + } + + /* + * for each block, process the block, verify it's path, + * then get next block. update cursor values along the way + */ + if (process_leaf_attr_block(mp, leaf, da_bno, ino, + da_cursor->blkmap, current_hashval, + &greatest_hashval, &repair)) { + libxfs_putbuf(bp); + goto error_out; + } + + /* + * index can be set to hdr.count so match the + * indexes of the interior blocks -- which at the + * end of the block will point to 1 after the final + * real entry in the block + */ + da_cursor->level[0].hashval = greatest_hashval; + da_cursor->level[0].bp = bp; + da_cursor->level[0].bno = da_bno; + da_cursor->level[0].index + = INT_GET(leaf->hdr.count, ARCH_CONVERT); + da_cursor->level[0].dirty = repair; + + if (INT_GET(leaf->hdr.info.back, ARCH_CONVERT) != prev_bno) { + do_warn("bad sibling back pointer for block %u in " + "attribute fork for inode %llu\n", da_bno, ino); + libxfs_putbuf(bp); + goto error_out; + } + + prev_bno = da_bno; + da_bno = INT_GET(leaf->hdr.info.forw, ARCH_CONVERT); + + if (da_bno != 0 && verify_da_path(mp, da_cursor, 0)) { + libxfs_putbuf(bp); + goto error_out; + } + + current_hashval = greatest_hashval; + + if (repair && !no_modify) { + libxfs_writebuf(bp, 0); + } + else { + libxfs_putbuf(bp); + } + } while (da_bno != 0); + + if (verify_final_da_path(mp, da_cursor, 0)) { + /* + * verify the final path up (right-hand-side) if still ok + */ + do_warn("bad hash path in attribute fork for inode %llu\n", + da_cursor->ino); + goto error_out; + } + + /* releases all buffers holding interior btree blocks */ + release_da_cursor(mp, da_cursor, 0); + return(0); + +error_out: + /* release all buffers holding interior btree blocks */ + err_release_da_cursor(mp, da_cursor, 0); + return(1); +} + + +/* + * a node directory is a true btree -- where the attribute fork + * has gotten big enough that it is represented as a non-trivial (e.g. + * has more than just a block) btree. + * + * Note that if we run into any problems, we will trash the attribute fork. + * + * returns 0 if things are ok, 1 if bad + * Note this code has been based off process_node_dir. + */ +int +process_node_attr( + xfs_mount_t *mp, + xfs_ino_t ino, + xfs_dinode_t *dip, + blkmap_t *blkmap) +{ + xfs_dablk_t bno; + int error = 0; + da_bt_cursor_t da_cursor; + + /* + * try again -- traverse down left-side of tree until we hit + * the left-most leaf block setting up the btree cursor along + * the way. Then walk the leaf blocks left-to-right, calling + * a parent-verification routine each time we traverse a block. + */ + bzero(&da_cursor, sizeof(da_bt_cursor_t)); + da_cursor.active = 0; + da_cursor.type = 0; + da_cursor.ino = ino; + da_cursor.dip = dip; + da_cursor.greatest_bno = 0; + da_cursor.blkmap = blkmap; + + /* + * now process interior node. don't have any buffers held in this path. + */ + error = traverse_int_dablock(mp, &da_cursor, &bno, XFS_ATTR_FORK); + if (error == 0) + return(1); /* 0 means unsuccessful */ + + /* + * now pass cursor and bno into leaf-block processing routine + * the leaf dir level routine checks the interior paths + * up to the root including the final right-most path. + */ + + return (process_leaf_attr_level(mp, &da_cursor)); +} + +/* + * Start processing for a leaf or fuller btree. + * A leaf directory is one where the attribute fork is too big for + * the inode but is small enough to fit into one btree block + * outside the inode. This code is modelled after process_leaf_dir_block. + * + * returns 0 if things are ok, 1 if bad (attributes needs to be junked) + * repair is set, if anything was changed, but attributes can live thru it + */ + +int +process_longform_attr( + xfs_mount_t *mp, + xfs_ino_t ino, + xfs_dinode_t *dip, + blkmap_t *blkmap, + int *repair) /* out - 1 if something was fixed */ +{ + xfs_attr_leafblock_t *leaf; + xfs_dfsbno_t bno; + xfs_buf_t *bp; + xfs_dahash_t next_hashval; + int repairlinks = 0; + + *repair = 0; + + bno = blkmap_get(blkmap, 0); + + if ( bno == NULLDFSBNO ) { + if (INT_GET(dip->di_core.di_anextents, ARCH_CONVERT) == 0 && + dip->di_core.di_aformat == XFS_DINODE_FMT_EXTENTS ) + /* it's okay the kernel can handle this state */ + return(0); + else { + do_warn("block 0 of inode %llu attribute fork" + " is missing\n", ino); + return(1); + } + } + /* FIX FOR bug 653709 -- EKN */ + if (mp->m_sb.sb_agcount < XFS_FSB_TO_AGNO(mp, bno)) { + do_warn("agno of attribute fork of inode %llu out of " + "regular partition\n", ino); + return(1); + } + + bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, bno), + XFS_FSB_TO_BB(mp, 1), 0); + if (!bp) { + do_warn("can't read block 0 of inode %llu attribute fork\n", + ino); + return(1); + } + + /* verify leaf block */ + leaf = (xfs_attr_leafblock_t *)XFS_BUF_PTR(bp); + + /* check sibling pointers in leaf block or root block 0 before + * we have to release the btree block + */ + if ( INT_GET(leaf->hdr.info.forw, ARCH_CONVERT) != 0 + || INT_GET(leaf->hdr.info.back, ARCH_CONVERT) != 0) { + if (!no_modify) { + do_warn("clearing forw/back pointers in block 0 " + "for attributes in inode %llu\n", ino); + repairlinks = 1; + INT_SET(leaf->hdr.info.forw, ARCH_CONVERT, 0); + INT_SET(leaf->hdr.info.back, ARCH_CONVERT, 0); + } else { + do_warn("would clear forw/back pointers in block 0 " + "for attributes in inode %llu\n", ino); + } + } + + /* + * use magic number to tell us what type of attribute this is. + * it's possible to have a node or leaf attribute in either an + * extent format or btree format attribute fork. + */ + switch (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT)) { + case XFS_ATTR_LEAF_MAGIC: /* leaf-form attribute */ + if (process_leaf_attr_block(mp, leaf, 0, ino, blkmap, + 0, &next_hashval, repair)) { + /* the block is bad. lose the attribute fork. */ + libxfs_putbuf(bp); + return(1); + } + *repair = *repair || repairlinks; + break; + + case XFS_DA_NODE_MAGIC: /* btree-form attribute */ + /* must do this now, to release block 0 before the traversal */ + if (repairlinks) { + *repair = 1; + libxfs_writebuf(bp, 0); + } else + libxfs_putbuf(bp); + return (process_node_attr(mp, ino, dip, blkmap)); /* + repair */ + default: + do_warn("bad attribute leaf magic # %#x for dir ino %llu\n", + INT_GET(leaf->hdr.info.magic, ARCH_CONVERT), ino); + libxfs_putbuf(bp); + return(1); + } + + if (*repair && !no_modify) + libxfs_writebuf(bp, 0); + else + libxfs_putbuf(bp); + + return(0); /* repair may be set */ +} + + +/* + * returns 1 if attributes got cleared + * and 0 if things are ok. + */ +int +process_attributes( + xfs_mount_t *mp, + xfs_ino_t ino, + xfs_dinode_t *dip, + blkmap_t *blkmap, + int *repair) /* returned if we did repair */ +{ + int err; + xfs_dinode_core_t *dinoc; + /* REFERENCED */ + xfs_attr_shortform_t *asf; + + dinoc = &dip->di_core; + asf = (xfs_attr_shortform_t *) XFS_DFORK_APTR_ARCH(dip, ARCH_CONVERT); + + if (dinoc->di_aformat == XFS_DINODE_FMT_LOCAL) { + ASSERT(INT_GET(asf->hdr.totsize, ARCH_CONVERT) <= XFS_DFORK_ASIZE_ARCH(dip, mp, ARCH_CONVERT)); + err = process_shortform_attr(ino, dip, repair); + } else if (dinoc->di_aformat == XFS_DINODE_FMT_EXTENTS || + dinoc->di_aformat == XFS_DINODE_FMT_BTREE) { + err = process_longform_attr(mp, ino, dip, blkmap, + repair); + /* if err, convert this to shortform and clear it */ + /* if repair and no error, it's taken care of */ + } else { + do_warn("illegal attribute format %d, ino %llu\n", + dinoc->di_aformat, ino); + err = 1; + } + return (err); /* and repair */ +} + +/* + * Validate an ACL + */ +static int +acl_valid (struct acl *aclp) +{ + struct acl_entry *entry, *e; + int user = 0, group = 0, other = 0, mask = 0, mask_required = 0; + int i, j; + + if (aclp == NULL) + goto acl_invalid; + + if (aclp->acl_cnt > ACL_MAX_ENTRIES) + goto acl_invalid; + + for (i = 0; i < aclp->acl_cnt; i++) + { + + entry = &aclp->acl_entry[i]; + + switch (entry->ae_tag) + { + case ACL_USER_OBJ: + if (user++) + goto acl_invalid; + break; + case ACL_GROUP_OBJ: + if (group++) + goto acl_invalid; + break; + case ACL_OTHER_OBJ: + if (other++) + goto acl_invalid; + break; + case ACL_USER: + case ACL_GROUP: + for (j = i + 1; j < aclp->acl_cnt; j++) + { + e = &aclp->acl_entry[j]; + if (e->ae_id == entry->ae_id && e->ae_tag == entry->ae_tag) + goto acl_invalid; + } + mask_required++; + break; + case ACL_MASK: + if (mask++) + goto acl_invalid; + break; + default: + goto acl_invalid; + } + } + if (!user || !group || !other || (mask_required && !mask)) + goto acl_invalid; + else + return 0; +acl_invalid: + errno = EINVAL; + return (-1); +} + +/* + * Check a category or division set to ensure that all values are in + * ascending order and each division or category appears only once. + */ +static int +__check_setvalue(const unsigned short *list, unsigned short count) +{ + unsigned short i; + + for (i = 1; i < count ; i++) + if (list[i] <= list[i-1]) + return -1; + return 0; +} + + +/* + * mac_valid(lp) + * check the validity of a mac label + */ +static int +mac_valid(mac_t lp) +{ + if (lp == NULL) + return (0); + + /* + * if the total category set and division set is greater than 250 + * report error + */ + if ((lp->ml_catcount + lp->ml_divcount) > MAC_MAX_SETS) + return(0); + + /* + * check whether the msentype value is valid, and do they have + * appropriate level, category association. + */ + switch (lp->ml_msen_type) { + case MSEN_ADMIN_LABEL: + case MSEN_EQUAL_LABEL: + case MSEN_HIGH_LABEL: + case MSEN_MLD_HIGH_LABEL: + case MSEN_LOW_LABEL: + case MSEN_MLD_LOW_LABEL: + if (lp->ml_level != 0 || lp->ml_catcount > 0 ) + return (0); + break; + case MSEN_TCSEC_LABEL: + case MSEN_MLD_LABEL: + if (lp->ml_catcount > 0 && + __check_setvalue(lp->ml_list, + lp->ml_catcount) == -1) + return (0); + break; + case MSEN_UNKNOWN_LABEL: + default: + return (0); + } + + /* + * check whether the minttype value is valid, and do they have + * appropriate grade, division association. + */ + switch (lp->ml_mint_type) { + case MINT_BIBA_LABEL: + if (lp->ml_divcount > 0 && + __check_setvalue(lp->ml_list + lp->ml_catcount, + lp->ml_divcount) == -1) + return(0); + break; + case MINT_EQUAL_LABEL: + case MINT_HIGH_LABEL: + case MINT_LOW_LABEL: + if (lp->ml_grade != 0 || lp->ml_divcount > 0 ) + return(0); + break; + default: + return(0); + } + + return (1); +} diff --git a/repair/attr_repair.h b/repair/attr_repair.h new file mode 100644 index 000000000..61d3f212a --- /dev/null +++ b/repair/attr_repair.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#ifndef _XR_ATTRREPAIR_H +#define _XR_ATTRREPAIR_H + +struct blkmap; + +int +process_attributes( + xfs_mount_t *mp, + xfs_ino_t ino, + xfs_dinode_t *dip, + struct blkmap *blkmap, + int *repair); + + +#endif /* _XR_ATTRREPAIR_H */ diff --git a/repair/avl.c b/repair/avl.c new file mode 100644 index 000000000..4d1a4ac82 --- /dev/null +++ b/repair/avl.c @@ -0,0 +1,1465 @@ +/************************************************************************** + * * + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + * * + **************************************************************************/ + +#include + +#if defined(STAND_ALONE_DEBUG) || defined(AVL_USER_MODE_DEBUG) +#define AVL_DEBUG +#endif + +#include "avl.h" + +#define CERT ASSERT + +#ifdef AVL_DEBUG + +static void +avl_checknode( + register avltree_desc_t *tree, + register avlnode_t *np) +{ + register avlnode_t *back = np->avl_back; + register avlnode_t *forw = np->avl_forw; + register avlnode_t *nextino = np->avl_nextino; + register int bal = np->avl_balance; + + ASSERT(bal != AVL_BALANCE || (!back && !forw) || (back && forw)); + ASSERT(bal != AVL_FORW || forw); + ASSERT(bal != AVL_BACK || back); + + if (forw) { + ASSERT(AVL_START(tree, np) < AVL_START(tree, forw)); + ASSERT(np->avl_forw->avl_parent == np); + ASSERT(back || bal == AVL_FORW); + } else { + ASSERT(bal != AVL_FORW); + ASSERT(bal == AVL_BALANCE || back); + ASSERT(bal == AVL_BACK || !back); + } + + if (back) { + ASSERT(AVL_START(tree, np) > AVL_START(tree, back)); + ASSERT(np->avl_back->avl_parent == np); + ASSERT(forw || bal == AVL_BACK); + } else { + ASSERT(bal != AVL_BACK); + ASSERT(bal == AVL_BALANCE || forw); + ASSERT(bal == AVL_FORW || !forw); + } + + if (nextino == NULL) + ASSERT(forw == NULL); + else + ASSERT(AVL_END(tree, np) <= AVL_START(tree, nextino)); +} + +static void +avl_checktree( + register avltree_desc_t *tree, + register avlnode_t *root) +{ + register avlnode_t *nlast, *nnext, *np; + __psunsigned_t offset = 0; + __psunsigned_t end; + + nlast = nnext = root; + + ASSERT(!nnext || nnext->avl_parent == NULL); + + while (nnext) { + + avl_checknode(tree, nnext); + end = AVL_END(tree, nnext); + + if (end <= offset) { + if ((np = nnext->avl_forw) && np != nlast) { + nlast = nnext; + nnext = np; + } else { + nlast = nnext; + nnext = nnext->avl_parent; + } + continue; + } + + nlast = nnext; + if (np = nnext->avl_back) { + if (AVL_END(tree, np) > offset) { + nnext = np; + continue; + } + } + + np = nnext; + nnext = nnext->avl_forw; + if (!nnext) + nnext = np->avl_parent; + + offset = end; + } +} +#else /* ! AVL_DEBUG */ +#define avl_checktree(t,x) +#endif /* AVL_DEBUG */ + + +/* + * Reset balance for np up through tree. + * ``direction'' is the way that np's balance + * is headed after the deletion of one of its children -- + * e.g., deleting a avl_forw child sends avl_balance toward AVL_BACK. + * Called only when deleting a node from the tree. + */ +static void +retreat( + avltree_desc_t *tree, + register avlnode_t *np, + register int direction) +{ + register avlnode_t **rootp = &tree->avl_root; + register avlnode_t *parent; + register avlnode_t *child; + register avlnode_t *tmp; + register int bal; + + do { + ASSERT(direction == AVL_BACK || direction == AVL_FORW); + + if (np->avl_balance == AVL_BALANCE) { + np->avl_balance = direction; + return; + } + + parent = np->avl_parent; + + /* + * If balance is being restored, no local node + * reorganization is necessary, but may be at + * a higher node. Reset direction and continue. + */ + if (direction != np->avl_balance) { + np->avl_balance = AVL_BALANCE; + if (parent) { + if (parent->avl_forw == np) + direction = AVL_BACK; + else + direction = AVL_FORW; + + np = parent; + continue; + } + return; + } + + /* + * Imbalance. If a avl_forw node was removed, direction + * (and, by reduction, np->avl_balance) is/was AVL_BACK. + */ + if (np->avl_balance == AVL_BACK) { + + ASSERT(direction == AVL_BACK); + child = np->avl_back; + bal = child->avl_balance; + + if (bal != AVL_FORW) /* single LL */ { + /* + * np gets pushed down to lesser child's + * avl_forw branch. + * + * np-> -D +B + * / \ / \ + * child-> B deleted A -D + * / \ / + * A C C + */ +#ifdef AVL_PRINT + if (!(tree->avl_flags & AVLF_DUPLICITY)) + cmn_err(CE_CONT, "!LL delete b 0x%x c 0x%x\n", + np, child); +#endif + np->avl_back = child->avl_forw; + if (child->avl_forw) + child->avl_forw->avl_parent = np; + child->avl_forw = np; + + if (parent) { + if (parent->avl_forw == np) { + parent->avl_forw = child; + direction = AVL_BACK; + } else { + ASSERT(parent->avl_back == np); + parent->avl_back = child; + direction = AVL_FORW; + } + } else { + ASSERT(*rootp == np); + *rootp = child; + } + np->avl_parent = child; + child->avl_parent = parent; + + if (bal == AVL_BALANCE) { + np->avl_balance = AVL_BACK; + child->avl_balance = AVL_FORW; + return; + } else { + np->avl_balance = AVL_BALANCE; + child->avl_balance = AVL_BALANCE; + np = parent; + avl_checktree(tree, *rootp); + continue; + } + } + + /* child->avl_balance == AVL_FORW double LR rotation + * + * child's avl_forw node gets promoted up, along with + * its avl_forw subtree + * + * np-> -G C + * / \ / \ + * child-> +B H -B G + * / \ \ / / \ + * A +C deleted A D H + * \ + * D + */ +#ifdef AVL_PRINT + if (!(tree->avl_flags & AVLF_DUPLICITY)) + cmn_err(CE_CONT, "!LR delete b 0x%x c 0x%x t 0x%x\n", + np, child, child->avl_forw); +#endif + tmp = child->avl_forw; + bal = tmp->avl_balance; + + child->avl_forw = tmp->avl_back; + if (tmp->avl_back) + tmp->avl_back->avl_parent = child; + + tmp->avl_back = child; + child->avl_parent = tmp; + + np->avl_back = tmp->avl_forw; + if (tmp->avl_forw) + tmp->avl_forw->avl_parent = np; + tmp->avl_forw = np; + + if (bal == AVL_FORW) + child->avl_balance = AVL_BACK; + else + child->avl_balance = AVL_BALANCE; + + if (bal == AVL_BACK) + np->avl_balance = AVL_FORW; + else + np->avl_balance = AVL_BALANCE; + + goto next; + } + + ASSERT(np->avl_balance == AVL_FORW && direction == AVL_FORW); + + child = np->avl_forw; + bal = child->avl_balance; + + if (bal != AVL_BACK) /* single RR */ { + /* + * np gets pushed down to greater child's + * avl_back branch. + * + * np-> +B -D + * / \ / \ + * deleted D <-child +B E + * / \ \ + * C E C + */ +#ifdef AVL_PRINT + if (!(tree->avl_flags & AVLF_DUPLICITY)) + cmn_err(CE_CONT, "!RR delete b 0x%x c 0x%x\n", + np, child); +#endif + np->avl_forw = child->avl_back; + if (child->avl_back) + child->avl_back->avl_parent = np; + child->avl_back = np; + + if (parent) { + if (parent->avl_forw == np) { + parent->avl_forw = child; + direction = AVL_BACK; + } else { + ASSERT(parent->avl_back == np); + parent->avl_back = child; + direction = AVL_FORW; + } + } else { + ASSERT(*rootp == np); + *rootp = child; + } + np->avl_parent = child; + child->avl_parent = parent; + + if (bal == AVL_BALANCE) { + np->avl_balance = AVL_FORW; + child->avl_balance = AVL_BACK; + return; + } else { + np->avl_balance = AVL_BALANCE; + child->avl_balance = AVL_BALANCE; + np = parent; + avl_checktree(tree, *rootp); + continue; + } + } + + /* child->avl_balance == AVL_BACK double RL rotation */ +#ifdef AVL_PRINT + if (!(tree->avl_flags & AVLF_DUPLICITY)) + cmn_err(CE_CONT, "!RL delete b 0x%x c 0x%x t 0x%x\n", + np, child, child->avl_back); +#endif + tmp = child->avl_back; + bal = tmp->avl_balance; + + child->avl_back = tmp->avl_forw; + if (tmp->avl_forw) + tmp->avl_forw->avl_parent = child; + + tmp->avl_forw = child; + child->avl_parent = tmp; + + np->avl_forw = tmp->avl_back; + if (tmp->avl_back) + tmp->avl_back->avl_parent = np; + tmp->avl_back = np; + + if (bal == AVL_BACK) + child->avl_balance = AVL_FORW; + else + child->avl_balance = AVL_BALANCE; + + if (bal == AVL_FORW) + np->avl_balance = AVL_BACK; + else + np->avl_balance = AVL_BALANCE; +next: + np->avl_parent = tmp; + tmp->avl_balance = AVL_BALANCE; + tmp->avl_parent = parent; + + if (parent) { + if (parent->avl_forw == np) { + parent->avl_forw = tmp; + direction = AVL_BACK; + } else { + ASSERT(parent->avl_back == np); + parent->avl_back = tmp; + direction = AVL_FORW; + } + } else { + ASSERT(*rootp == np); + *rootp = tmp; + return; + } + + np = parent; + avl_checktree(tree, *rootp); + } while (np); +} + +/* + * Remove node from tree. + * avl_delete does the local tree manipulations, + * calls retreat() to rebalance tree up to its root. + */ +void +avl_delete( + register avltree_desc_t *tree, + register avlnode_t *np) +{ + register avlnode_t *forw = np->avl_forw; + register avlnode_t *back = np->avl_back; + register avlnode_t *parent = np->avl_parent; + register avlnode_t *nnext; + + + if (np->avl_back) { + /* + * a left child exits, then greatest left descendent's nextino + * is pointing to np; make it point to np->nextino. + */ + nnext = np->avl_back; + while (nnext) { + if (!nnext->avl_forw) + break; /* can't find anything bigger */ + nnext = nnext->avl_forw; + } + } else + if (np->avl_parent) { + /* + * find nearest ancestor with lesser value. That ancestor's + * nextino is pointing to np; make it point to np->nextino + */ + nnext = np->avl_parent; + while (nnext) { + if (AVL_END(tree, nnext) <= AVL_END(tree, np)) + break; + nnext = nnext->avl_parent; + } + } else + nnext = NULL; + + if (nnext) { + ASSERT(nnext->avl_nextino == np); + nnext->avl_nextino = np->avl_nextino; + /* + * Something preceeds np; np cannot be firstino. + */ + ASSERT(tree->avl_firstino != np); + } + else { + /* + * Nothing preceeding np; after deletion, np's nextino + * is firstino of tree. + */ + ASSERT(tree->avl_firstino == np); + tree->avl_firstino = np->avl_nextino; + } + + + /* + * Degenerate cases... + */ + if (forw == NULL) { + forw = back; + goto attach; + } + + if (back == NULL) { +attach: + if (forw) + forw->avl_parent = parent; + if (parent) { + if (parent->avl_forw == np) { + parent->avl_forw = forw; + retreat(tree, parent, AVL_BACK); + } else { + ASSERT(parent->avl_back == np); + parent->avl_back = forw; + retreat(tree, parent, AVL_FORW); + } + } else { + ASSERT(tree->avl_root == np); + tree->avl_root = forw; + } + avl_checktree(tree, tree->avl_root); + return; + } + + /* + * Harder case: children on both sides. + * If back's avl_forw pointer is null, just have back + * inherit np's avl_forw tree, remove np from the tree + * and adjust balance counters starting at back. + * + * np-> xI xH (befor retreat()) + * / \ / \ + * back-> H J G J + * / / \ / \ + * G ? ? ? ? + * / \ + * ? ? + */ + if ((forw = back->avl_forw) == NULL) { + /* + * AVL_FORW retreat below will set back's + * balance to AVL_BACK. + */ + back->avl_balance = np->avl_balance; + back->avl_forw = forw = np->avl_forw; + forw->avl_parent = back; + back->avl_parent = parent; + + if (parent) { + if (parent->avl_forw == np) + parent->avl_forw = back; + else { + ASSERT(parent->avl_back == np); + parent->avl_back = back; + } + } else { + ASSERT(tree->avl_root == np); + tree->avl_root = back; + } + + /* + * back is taking np's place in the tree, and + * has therefore lost a avl_back node (itself). + */ + retreat(tree, back, AVL_FORW); + avl_checktree(tree, tree->avl_root); + return; + } + + /* + * Hardest case: children on both sides, and back's + * avl_forw pointer isn't null. Find the immediately + * inferior buffer by following back's avl_forw line + * to the end, then have it inherit np's avl_forw tree. + * + * np-> xI xH + * / \ / \ + * G J back-> G J (before retreat()) + * / \ / \ + * F ?... F ?1 + * / \ + * ? H <-forw + * / + * ?1 + */ + while (back = forw->avl_forw) + forw = back; + + /* + * Will be adjusted by retreat() below. + */ + forw->avl_balance = np->avl_balance; + + /* + * forw inherits np's avl_forw... + */ + forw->avl_forw = np->avl_forw; + np->avl_forw->avl_parent = forw; + + /* + * ... forw's parent gets forw's avl_back... + */ + back = forw->avl_parent; + back->avl_forw = forw->avl_back; + if (forw->avl_back) + forw->avl_back->avl_parent = back; + + /* + * ... forw gets np's avl_back... + */ + forw->avl_back = np->avl_back; + np->avl_back->avl_parent = forw; + + /* + * ... and forw gets np's parent. + */ + forw->avl_parent = parent; + + if (parent) { + if (parent->avl_forw == np) + parent->avl_forw = forw; + else + parent->avl_back = forw; + } else { + ASSERT(tree->avl_root == np); + tree->avl_root = forw; + } + + /* + * What used to be forw's parent is the starting + * point for rebalancing. It has lost a avl_forw node. + */ + retreat(tree, back, AVL_BACK); + avl_checktree(tree, tree->avl_root); +} + + +/* + * avl_findanyrange: + * + * Given range r [start, end), find any range which is contained in r. + * if checklen is non-zero, then only ranges of non-zero length are + * considered in finding a match. + */ +avlnode_t * +avl_findanyrange( + register avltree_desc_t *tree, + register __psunsigned_t start, + register __psunsigned_t end, + int checklen) +{ + register avlnode_t *np = tree->avl_root; + + /* np = avl_findadjacent(tree, start, AVL_SUCCEED); */ + while (np) { + if (start < AVL_START(tree, np)) { + if (np->avl_back) { + np = np->avl_back; + continue; + } + /* if we were to add node with start, would + * have a growth of AVL_BACK + */ + /* if succeeding node is needed, this is it. + */ + break; + } + if (start >= AVL_END(tree, np)) { + if (np->avl_forw) { + np = np->avl_forw; + continue; + } + /* if we were to add node with start, would + * have a growth of AVL_FORW; + */ + /* we are looking for a succeeding node; + * this is nextino. + */ + np = np->avl_nextino; + break; + } + /* AVL_START(tree, np) <= start < AVL_END(tree, np) */ + break; + } + if (np) { + if (checklen == AVL_INCLUDE_ZEROLEN) { + if (end <= AVL_START(tree, np)) { + /* something follows start, but is + * is entierly after the range (end) + */ + return(NULL); + } + /* np may stradle [start, end) */ + return(np); + } + /* + * find non-zero length region + */ + while (np && (AVL_END(tree, np) - AVL_START(tree, np) == 0) + && (AVL_START(tree, np) < end)) + np = np->avl_nextino; + + if ((np == NULL) || (AVL_START(tree, np) >= end)) + return NULL; + return(np); + } + /* + * nothing succeeds start, all existing ranges are before start. + */ + return NULL; +} + + +/* + * Returns a pointer to range which contains value. + */ +avlnode_t * +avl_findrange( + register avltree_desc_t *tree, + register __psunsigned_t value) +{ + register avlnode_t *np = tree->avl_root; + + while (np) { + if (value < AVL_START(tree, np)) { + np = np->avl_back; + continue; + } + if (value >= AVL_END(tree, np)) { + np = np->avl_forw; + continue; + } + ASSERT(AVL_START(tree, np) <= value && + value < AVL_END(tree, np)); + return np; + } + return NULL; +} + + +/* + * Returns a pointer to node which contains exact value. + */ +avlnode_t * +avl_find( + register avltree_desc_t *tree, + register __psunsigned_t value) +{ + register avlnode_t *np = tree->avl_root; + register __psunsigned_t nvalue; + + while (np) { + nvalue = AVL_START(tree, np); + if (value < nvalue) { + np = np->avl_back; + continue; + } + if (value == nvalue) { + return np; + } + np = np->avl_forw; + } + return NULL; +} + + +/* + * Balance buffer AVL tree after attaching a new node to root. + * Called only by avl_insert. + */ +static void +avl_balance( + register avlnode_t **rootp, + register avlnode_t *np, + register int growth) +{ + /* + * At this point, np points to the node to which + * a new node has been attached. All that remains is to + * propagate avl_balance up the tree. + */ + for ( ; ; ) { + register avlnode_t *parent = np->avl_parent; + register avlnode_t *child; + + CERT(growth == AVL_BACK || growth == AVL_FORW); + + /* + * If the buffer was already balanced, set avl_balance + * to the new direction. Continue if there is a + * parent after setting growth to reflect np's + * relation to its parent. + */ + if (np->avl_balance == AVL_BALANCE) { + np->avl_balance = growth; + if (parent) { + if (parent->avl_forw == np) + growth = AVL_FORW; + else { + ASSERT(parent->avl_back == np); + growth = AVL_BACK; + } + + np = parent; + continue; + } + break; + } + + if (growth != np->avl_balance) { + /* + * Subtree is now balanced -- no net effect + * in the size of the subtree, so leave. + */ + np->avl_balance = AVL_BALANCE; + break; + } + + if (growth == AVL_BACK) { + + child = np->avl_back; + CERT(np->avl_balance == AVL_BACK && child); + + if (child->avl_balance == AVL_BACK) { /* single LL */ + /* + * ``A'' just got inserted; + * np points to ``E'', child to ``C'', + * and it is already AVL_BACK -- + * child will get promoted to top of subtree. + + np-> -E C + / \ / \ + child-> -C F -B E + / \ / / \ + -B D A D F + / + A + + Note that child->avl_parent and + avl_balance get set in common code. + */ + np->avl_parent = child; + np->avl_balance = AVL_BALANCE; + np->avl_back = child->avl_forw; + if (child->avl_forw) + child->avl_forw->avl_parent = np; + child->avl_forw = np; + } else { + /* + * double LR + * + * child's avl_forw node gets promoted to + * the top of the subtree. + + np-> -E C + / \ / \ + child-> +B F -B E + / \ / / \ + A +C A D F + \ + D + + */ + register avlnode_t *tmp = child->avl_forw; + + CERT(child->avl_balance == AVL_FORW && tmp); + + child->avl_forw = tmp->avl_back; + if (tmp->avl_back) + tmp->avl_back->avl_parent = child; + + tmp->avl_back = child; + child->avl_parent = tmp; + + np->avl_back = tmp->avl_forw; + if (tmp->avl_forw) + tmp->avl_forw->avl_parent = np; + + tmp->avl_forw = np; + np->avl_parent = tmp; + + if (tmp->avl_balance == AVL_BACK) + np->avl_balance = AVL_FORW; + else + np->avl_balance = AVL_BALANCE; + + if (tmp->avl_balance == AVL_FORW) + child->avl_balance = AVL_BACK; + else + child->avl_balance = AVL_BALANCE; + + /* + * Set child to point to tmp since it is + * now the top of the subtree, and will + * get attached to the subtree parent in + * the common code below. + */ + child = tmp; + } + + } else /* growth == AVL_BACK */ { + + /* + * This code is the mirror image of AVL_FORW above. + */ + + child = np->avl_forw; + CERT(np->avl_balance == AVL_FORW && child); + + if (child->avl_balance == AVL_FORW) { /* single RR */ + np->avl_parent = child; + np->avl_balance = AVL_BALANCE; + np->avl_forw = child->avl_back; + if (child->avl_back) + child->avl_back->avl_parent = np; + child->avl_back = np; + } else { + /* + * double RL + */ + register avlnode_t *tmp = child->avl_back; + + ASSERT(child->avl_balance == AVL_BACK && tmp); + + child->avl_back = tmp->avl_forw; + if (tmp->avl_forw) + tmp->avl_forw->avl_parent = child; + + tmp->avl_forw = child; + child->avl_parent = tmp; + + np->avl_forw = tmp->avl_back; + if (tmp->avl_back) + tmp->avl_back->avl_parent = np; + + tmp->avl_back = np; + np->avl_parent = tmp; + + if (tmp->avl_balance == AVL_FORW) + np->avl_balance = AVL_BACK; + else + np->avl_balance = AVL_BALANCE; + + if (tmp->avl_balance == AVL_BACK) + child->avl_balance = AVL_FORW; + else + child->avl_balance = AVL_BALANCE; + + child = tmp; + } + } + + child->avl_parent = parent; + child->avl_balance = AVL_BALANCE; + + if (parent) { + if (parent->avl_back == np) + parent->avl_back = child; + else + parent->avl_forw = child; + } else { + ASSERT(*rootp == np); + *rootp = child; + } + + break; + } +} + +static +avlnode_t * +avl_insert_find_growth( + register avltree_desc_t *tree, + register __psunsigned_t start, /* range start at start, */ + register __psunsigned_t end, /* exclusive */ + register int *growthp) /* OUT */ +{ + avlnode_t *root = tree->avl_root; + register avlnode_t *np; + + np = root; + ASSERT(np); /* caller ensures that there is atleast one node in tree */ + + for ( ; ; ) { + CERT(np->avl_parent || root == np); + CERT(!np->avl_parent || root != np); + CERT(!(np->avl_back) || np->avl_back->avl_parent == np); + CERT(!(np->avl_forw) || np->avl_forw->avl_parent == np); + CERT(np->avl_balance != AVL_FORW || np->avl_forw); + CERT(np->avl_balance != AVL_BACK || np->avl_back); + CERT(np->avl_balance != AVL_BALANCE || + np->avl_back == NULL || np->avl_forw); + CERT(np->avl_balance != AVL_BALANCE || + np->avl_forw == NULL || np->avl_back); + + if (AVL_START(tree, np) >= end) { + if (np->avl_back) { + np = np->avl_back; + continue; + } + *growthp = AVL_BACK; + break; + } + + if (AVL_END(tree, np) <= start) { + if (np->avl_forw) { + np = np->avl_forw; + continue; + } + *growthp = AVL_FORW; + break; + } + /* found exact match -- let caller decide if it is an error */ + return(NULL); + } + return(np); +} + + +static void +avl_insert_grow( + register avltree_desc_t *tree, + register avlnode_t *parent, + register avlnode_t *newnode, + register int growth) +{ + register avlnode_t *nnext; + register __psunsigned_t start = AVL_START(tree, newnode); + + if (growth == AVL_BACK) { + + parent->avl_back = newnode; + /* + * we are growing to the left; previous in-order to newnode is + * closest ancestor with lesser value. Before this + * insertion, this ancestor will be pointing to + * newnode's parent. After insertion, next in-order to newnode + * is the parent. + */ + newnode->avl_nextino = parent; + nnext = parent; + while (nnext) { + if (AVL_END(tree, nnext) <= start) + break; + nnext = nnext->avl_parent; + } + if (nnext) { + /* + * nnext will be null if newnode is + * the least element, and hence very first in the list. + */ + ASSERT(nnext->avl_nextino == parent); + nnext->avl_nextino = newnode; + } + } + else { + parent->avl_forw = newnode; + newnode->avl_nextino = parent->avl_nextino; + parent->avl_nextino = newnode; + } +} + + +avlnode_t * +avl_insert( + register avltree_desc_t *tree, + register avlnode_t *newnode) +{ + register avlnode_t *np; + register __psunsigned_t start = AVL_START(tree, newnode); + register __psunsigned_t end = AVL_END(tree, newnode); + int growth; + + ASSERT(newnode); + ASSERT(start <= end); + + /* + * Clean all pointers for sanity; some will be reset as necessary. + */ + newnode->avl_nextino = NULL; + newnode->avl_parent = NULL; + newnode->avl_forw = NULL; + newnode->avl_back = NULL; + newnode->avl_balance = AVL_BALANCE; + + if ((np = tree->avl_root) == NULL) { /* degenerate case... */ + tree->avl_root = newnode; + tree->avl_firstino = newnode; + return newnode; + } + + if ((np = avl_insert_find_growth(tree, start, end, &growth)) == NULL) { + if (start != end) { /* non-zero length range */ +#ifdef AVL_USER_MODE + printf( + "avl_insert: Warning! duplicate range [0x%x,0x%x)\n", + start, end); +#else + /* + * lockmetering tree can't afford printfs here. + */ + if (!(tree->avl_flags & AVLF_DUPLICITY)) + cmn_err(CE_CONT, + "!avl_insert: Warning! duplicate range [0x%x,0x%x)\n", + start, end); +#endif + } + return(NULL); + } + + avl_insert_grow(tree, np, newnode, growth); + if (growth == AVL_BACK) { + /* + * Growing to left. if np was firstino, newnode will be firstino + */ + if (tree->avl_firstino == np) + tree->avl_firstino = newnode; + } +#ifdef notneeded + else + if (growth == AVL_FORW) + /* + * Cannot possibly be firstino; there is somebody to our left. + */ + ; +#endif + + newnode->avl_parent = np; + CERT(np->avl_forw == newnode || np->avl_back == newnode); + + avl_balance(&tree->avl_root, np, growth); + + avl_checktree(tree, tree->avl_root); + + return newnode; +} + +/* + * + * avl_insert_immediate(tree, afterp, newnode): + * insert newnode immediately into tree immediately after afterp. + * after insertion, newnode is right child of afterp. + */ +void +avl_insert_immediate( + avltree_desc_t *tree, + avlnode_t *afterp, + avlnode_t *newnode) +{ + /* + * Clean all pointers for sanity; some will be reset as necessary. + */ + newnode->avl_nextino = NULL; + newnode->avl_parent = NULL; + newnode->avl_forw = NULL; + newnode->avl_back = NULL; + newnode->avl_balance = AVL_BALANCE; + + if (afterp == NULL) { + tree->avl_root = newnode; + tree->avl_firstino = newnode; + return; + } + + ASSERT(afterp->avl_forw == NULL); + avl_insert_grow(tree, afterp, newnode, AVL_FORW); /* grow to right */ + CERT(afterp->avl_forw == newnode); + avl_balance(&tree->avl_root, afterp, AVL_FORW); + avl_checktree(tree, tree->avl_root); +} + + +/* + * Returns first in order node + */ +avlnode_t * +avl_firstino(register avlnode_t *root) +{ + register avlnode_t *np; + + if ((np = root) == NULL) + return NULL; + + while (np->avl_back) + np = np->avl_back; + return np; +} + +#ifdef AVL_USER_MODE +/* + * leave this as a user-mode only routine until someone actually + * needs it in the kernel + */ + +/* + * Returns last in order node + */ +avlnode_t * +avl_lastino(register avlnode_t *root) +{ + register avlnode_t *np; + + if ((np = root) == NULL) + return NULL; + + while (np->avl_forw) + np = np->avl_forw; + return np; +} +#endif + +void +avl_init_tree(avltree_desc_t *tree, avlops_t *ops) +{ + tree->avl_root = NULL; + tree->avl_firstino = NULL; + tree->avl_ops = ops; +} + +#ifdef AVL_DEBUG +static void +avl_printnode(avltree_desc_t *tree, avlnode_t *np, int nl) +{ + printf("[%d-%d]%c", AVL_START(tree, np), + (AVL_END(tree, np) - 1), nl ? '\n' : ' '); +} +#endif +#ifdef STAND_ALONE_DEBUG + +struct avl_debug_node { + avlnode_t avl_node; + xfs_off_t avl_start; + unsigned int avl_size; +} + +avlops_t avl_debug_ops = { + avl_debug_start, + avl_debug_end, +} + +static __psunsigned_t +avl_debug_start(avlnode_t *node) +{ + return (__psunsigned_t)(struct avl_debug_node *)node->avl_start; +} + +static __psunsigned_t +avl_debug_end(avlnode_t *node) +{ + return (__psunsigned_t) + ((struct avl_debug_node *)node->avl_start + + (struct avl_debug_node *)node->avl_size); +} + +avl_debug_node freenodes[100]; +avl_debug_node *freehead = &freenodes[0]; + +static avlnode_t * +alloc_avl_debug_node() +{ + freehead->avl_balance = AVL_BALANCE; + freehead->avl_parent = freehead->avl_forw = freehead->avl_back = NULL; + return(freehead++); +} + +static void +avl_print(avltree_desc_t *tree, avlnode_t *root, int depth) +{ + int i; + + if (!root) + return; + if (root->avl_forw) + avl_print(tree, root->avl_forw, depth+5); + for (i = 0; i < depth; i++) + putchar((int) ' '); + avl_printnode(tree, root,1); + if (root->avl_back) + avl_print(tree, root->avl_back, depth+5); +} + +main() +{ + int i, j; + avlnode_t *np; + avltree_desc_t tree; + char linebuf[256], cmd[256]; + + avl_init_tree(&tree, &avl_debug_ops); + + for (i = 100; i > 0; i = i - 10) + { + np = alloc__debug_avlnode(); + ASSERT(np); + np->avl_start = i; + np->avl_size = 10; + avl_insert(&tree, np); + } + avl_print(&tree, tree.avl_root, 0); + + for (np = tree.avl_firstino; np != NULL; np = np->avl_nextino) + avl_printnode(&tree, np, 0); + printf("\n"); + + while (1) { + printf("Command [fpdir] : "); + fgets(linebuf, 256, stdin); + if (feof(stdin)) break; + cmd[0] = NULL; + if (sscanf(linebuf, "%[fpdir]%d", cmd, &i) != 2) + continue; + switch (cmd[0]) { + case 'd': + case 'f': + printf("end of range ? "); + fgets(linebuf, 256, stdin); + j = atoi(linebuf); + + if (i == j) j = i+1; + np = avl_findinrange(&tree,i,j); + if (np) { + avl_printnode(&tree, np, 1); + if (cmd[0] == 'd') + avl_delete(&tree, np); + } else + printf("Cannot find %d\n", i); + break; + case 'p': + avl_print(&tree, tree.avl_root, 0); + for (np = tree.avl_firstino; + np != NULL; np = np->avl_nextino) + avl_printnode(&tree, np, 0); + printf("\n"); + break; + case 'i': + np = alloc_avlnode(); + ASSERT(np); + np->avl_start = i; + printf("size of range ? "); + fgets(linebuf, 256, stdin); + j = atoi(linebuf); + + np->avl_size = j; + avl_insert(&tree, np); + break; + case 'r': { + avlnode_t *b, *e, *t; + int checklen; + + printf("End of range ? "); + fgets(linebuf, 256, stdin); + j = atoi(linebuf); + + printf("checklen 0/1 ? "); + fgets(linebuf, 256, stdin); + checklen = atoi(linebuf); + + + b = avl_findanyrange(&tree, i, j, checklen); + if (b) { + printf("Found something\n"); + t = b; + while (t) { + if (t != b && + AVL_START(&tree, t) >= j) + break; + avl_printnode(&tree, t, 0); + t = t->avl_nextino; + } + printf("\n"); + } + } + } + } +} +#endif + +/* + * Given a tree, find value; will find return range enclosing value, + * or range immediately succeeding value, + * or range immediately preceeding value. + */ +avlnode_t * +avl_findadjacent( + register avltree_desc_t *tree, + register __psunsigned_t value, + register int dir) +{ + register avlnode_t *np = tree->avl_root; + + while (np) { + if (value < AVL_START(tree, np)) { + if (np->avl_back) { + np = np->avl_back; + continue; + } + /* if we were to add node with value, would + * have a growth of AVL_BACK + */ + if (dir == AVL_SUCCEED) { + /* if succeeding node is needed, this is it. + */ + return(np); + } + if (dir == AVL_PRECEED) { + /* + * find nearest ancestor with lesser value. + */ + np = np->avl_parent; + while (np) { + if (AVL_END(tree, np) <= value) + break; + np = np->avl_parent; + } + return(np); + } + ASSERT(dir == AVL_SUCCEED || dir == AVL_PRECEED); + break; + } + if (value >= AVL_END(tree, np)) { + if (np->avl_forw) { + np = np->avl_forw; + continue; + } + /* if we were to add node with value, would + * have a growth of AVL_FORW; + */ + if (dir == AVL_SUCCEED) { + /* we are looking for a succeeding node; + * this is nextino. + */ + return(np->avl_nextino); + } + if (dir == AVL_PRECEED) { + /* looking for a preceeding node; this is it. */ + return(np); + } + ASSERT(dir == AVL_SUCCEED || dir == AVL_PRECEED); + } + /* AVL_START(tree, np) <= value < AVL_END(tree, np) */ + return(np); + } + return NULL; +} + + +#ifdef AVL_FUTURE_ENHANCEMENTS +/* + * avl_findranges: + * + * Given range r [start, end), find all ranges in tree which are contained + * in r. At return, startp and endp point to first and last of + * a chain of elements which describe the contained ranges. Elements + * in startp ... endp are in sort order, and can be accessed by + * using avl_nextino. + */ + +void +avl_findranges( + register avltree_desc_t *tree, + register __psunsigned_t start, + register __psunsigned_t end, + avlnode_t **startp, + avlnode_t **endp) +{ + register avlnode_t *np; + + np = avl_findadjacent(tree, start, AVL_SUCCEED); + if (np == NULL /* nothing succeding start */ + || (np && (end <= AVL_START(tree, np)))) + /* something follows start, + but... is entirely after end */ + { + *startp = NULL; + *endp = NULL; + return; + } + + *startp = np; + + /* see if end is in this region itself */ + if (end <= AVL_END(tree, np) || + np->avl_nextino == NULL || + (np->avl_nextino && + (end <= AVL_START(tree, np->avl_nextino)))) { + *endp = np; + return; + } + /* have to munge for end */ + /* + * note: have to look for (end - 1), since + * findadjacent will look for exact value, and does not + * care about the fact that end is actually one more + * than the value actually being looked for; thus feed it one less. + */ + *endp = avl_findadjacent(tree, (end-1), AVL_PRECEED); + ASSERT(*endp); +} + +#endif /* AVL_FUTURE_ENHANCEMENTS */ diff --git a/repair/avl.h b/repair/avl.h new file mode 100644 index 000000000..a6d53f5b9 --- /dev/null +++ b/repair/avl.h @@ -0,0 +1,143 @@ +/************************************************************************** + * * + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + * * + **************************************************************************/ +#ifndef __SYS_AVL_H__ +#define __SYS_AVL_H__ + + +typedef struct avlnode { + struct avlnode *avl_forw; /* pointer to right child (> parent) */ + struct avlnode *avl_back; /* pointer to left child (< parent) */ + struct avlnode *avl_parent; /* parent pointer */ + struct avlnode *avl_nextino; /* next in-order; NULL terminated list*/ + char avl_balance; /* tree balance */ +} avlnode_t; + +/* + * avl-tree operations + */ +typedef struct avlops { + __psunsigned_t (*avl_start)(avlnode_t *); + __psunsigned_t (*avl_end)(avlnode_t *); +} avlops_t; + +#define AVL_START(tree, n) (*(tree)->avl_ops->avl_start)(n) +#define AVL_END(tree, n) (*(tree)->avl_ops->avl_end)(n) + +/* + * tree descriptor: + * root points to the root of the tree. + * firstino points to the first in the ordered list. + */ +typedef struct avltree_desc { + avlnode_t *avl_root; + avlnode_t *avl_firstino; + avlops_t *avl_ops; + short avl_flags; +} avltree_desc_t; + +/* possible values for avl_balance */ + +#define AVL_BACK 1 +#define AVL_BALANCE 0 +#define AVL_FORW 2 + +/* possible values for avl_flags */ + +#define AVLF_DUPLICITY 0x0001 /* no warnings on insert dups */ + +/* + * 'Exported' avl tree routines + */ +avlnode_t +*avl_insert( + avltree_desc_t *tree, + avlnode_t *newnode); + +void +avl_delete( + avltree_desc_t *tree, + avlnode_t *np); + +void +avl_insert_immediate( + avltree_desc_t *tree, + avlnode_t *afterp, + avlnode_t *newnode); + +void +avl_init_tree( + avltree_desc_t *tree, + avlops_t *ops); + +avlnode_t * +avl_findrange( + avltree_desc_t *tree, + __psunsigned_t value); + +avlnode_t * +avl_find( + avltree_desc_t *tree, + __psunsigned_t value); + +avlnode_t * +avl_findanyrange( + avltree_desc_t *tree, + __psunsigned_t start, + __psunsigned_t end, + int checklen); + + +avlnode_t * +avl_findadjacent( + avltree_desc_t *tree, + __psunsigned_t value, + int dir); + +#ifdef AVL_FUTURE_ENHANCEMENTS +void +avl_findranges( + register avltree_desc_t *tree, + register __psunsigned_t start, + register __psunsigned_t end, + avlnode_t **startp, + avlnode_t **endp); +#endif + +#define AVL_PRECEED 0x1 +#define AVL_SUCCEED 0x2 + +#define AVL_INCLUDE_ZEROLEN 0x0000 +#define AVL_EXCLUDE_ZEROLEN 0x0001 + +#endif /* __SYS_AVL_H__ */ diff --git a/repair/avl64.c b/repair/avl64.c new file mode 100644 index 000000000..091bc8180 --- /dev/null +++ b/repair/avl64.c @@ -0,0 +1,1458 @@ +/************************************************************************** + * * + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + * * + **************************************************************************/ + +/* to allow use by user-level utilities */ + +#ifdef STAND_ALONE_DEBUG +#define AVL_USER_MODE +#endif + +#if defined(STAND_ALONE_DEBUG) || defined(AVL_USER_MODE_DEBUG) +#define AVL_DEBUG +#endif + +#include +#include +#include "avl64.h" + +#define CERT ASSERT + +#ifdef AVL_DEBUG + +static void +avl64_checknode( + register avl64tree_desc_t *tree, + register avl64node_t *np) +{ + register avl64node_t *back = np->avl_back; + register avl64node_t *forw = np->avl_forw; + register avl64node_t *nextino = np->avl_nextino; + register int bal = np->avl_balance; + + ASSERT(bal != AVL_BALANCE || (!back && !forw) || (back && forw)); + ASSERT(bal != AVL_FORW || forw); + ASSERT(bal != AVL_BACK || back); + + if (forw) { + ASSERT(AVL_START(tree, np) < AVL_START(tree, forw)); + ASSERT(np->avl_forw->avl_parent == np); + ASSERT(back || bal == AVL_FORW); + } else { + ASSERT(bal != AVL_FORW); + ASSERT(bal == AVL_BALANCE || back); + ASSERT(bal == AVL_BACK || !back); + } + + if (back) { + ASSERT(AVL_START(tree, np) > AVL_START(tree, back)); + ASSERT(np->avl_back->avl_parent == np); + ASSERT(forw || bal == AVL_BACK); + } else { + ASSERT(bal != AVL_BACK); + ASSERT(bal == AVL_BALANCE || forw); + ASSERT(bal == AVL_FORW || !forw); + } + + if (nextino == NULL) + ASSERT(forw == NULL); + else + ASSERT(AVL_END(tree, np) <= AVL_START(tree, nextino)); +} + +static void +avl64_checktree( + register avl64tree_desc_t *tree, + register avl64node_t *root) +{ + register avl64node_t *nlast, *nnext, *np; + __uint64_t offset = 0; + __uint64_t end; + + nlast = nnext = root; + + ASSERT(!nnext || nnext->avl_parent == NULL); + + while (nnext) { + + avl64_checknode(tree, nnext); + end = AVL_END(tree, nnext); + + if (end <= offset) { + if ((np = nnext->avl_forw) && np != nlast) { + nlast = nnext; + nnext = np; + } else { + nlast = nnext; + nnext = nnext->avl_parent; + } + continue; + } + + nlast = nnext; + if (np = nnext->avl_back) { + if (AVL_END(tree, np) > offset) { + nnext = np; + continue; + } + } + + np = nnext; + nnext = nnext->avl_forw; + if (!nnext) + nnext = np->avl_parent; + + offset = end; + } +} +#else /* ! AVL_DEBUG */ +#define avl64_checktree(t,x) +#endif /* AVL_DEBUG */ + + +/* + * Reset balance for np up through tree. + * ``direction'' is the way that np's balance + * is headed after the deletion of one of its children -- + * e.g., deleting a avl_forw child sends avl_balance toward AVL_BACK. + * Called only when deleting a node from the tree. + */ +static void +retreat( + avl64tree_desc_t *tree, + register avl64node_t *np, + register int direction) +{ + register avl64node_t **rootp = &tree->avl_root; + register avl64node_t *parent; + register avl64node_t *child; + register avl64node_t *tmp; + register int bal; + + do { + ASSERT(direction == AVL_BACK || direction == AVL_FORW); + + if (np->avl_balance == AVL_BALANCE) { + np->avl_balance = direction; + return; + } + + parent = np->avl_parent; + + /* + * If balance is being restored, no local node + * reorganization is necessary, but may be at + * a higher node. Reset direction and continue. + */ + if (direction != np->avl_balance) { + np->avl_balance = AVL_BALANCE; + if (parent) { + if (parent->avl_forw == np) + direction = AVL_BACK; + else + direction = AVL_FORW; + + np = parent; + continue; + } + return; + } + + /* + * Imbalance. If a avl_forw node was removed, direction + * (and, by reduction, np->avl_balance) is/was AVL_BACK. + */ + if (np->avl_balance == AVL_BACK) { + + ASSERT(direction == AVL_BACK); + child = np->avl_back; + bal = child->avl_balance; + + if (bal != AVL_FORW) /* single LL */ { + /* + * np gets pushed down to lesser child's + * avl_forw branch. + * + * np-> -D +B + * / \ / \ + * child-> B deleted A -D + * / \ / + * A C C + cmn_err(CE_CONT, "!LL delete b 0x%x c 0x%x\n", + np, child); + */ + + np->avl_back = child->avl_forw; + if (child->avl_forw) + child->avl_forw->avl_parent = np; + child->avl_forw = np; + + if (parent) { + if (parent->avl_forw == np) { + parent->avl_forw = child; + direction = AVL_BACK; + } else { + ASSERT(parent->avl_back == np); + parent->avl_back = child; + direction = AVL_FORW; + } + } else { + ASSERT(*rootp == np); + *rootp = child; + } + np->avl_parent = child; + child->avl_parent = parent; + + if (bal == AVL_BALANCE) { + np->avl_balance = AVL_BACK; + child->avl_balance = AVL_FORW; + return; + } else { + np->avl_balance = AVL_BALANCE; + child->avl_balance = AVL_BALANCE; + np = parent; + avl64_checktree(tree, *rootp); + continue; + } + } + + /* child->avl_balance == AVL_FORW double LR rotation + * + * child's avl_forw node gets promoted up, along with + * its avl_forw subtree + * + * np-> -G C + * / \ / \ + * child-> +B H -B G + * / \ \ / / \ + * A +C deleted A D H + * \ + * D + cmn_err(CE_CONT, "!LR delete b 0x%x c 0x%x t 0x%x\n", + np, child, child->avl_forw); + */ + + tmp = child->avl_forw; + bal = tmp->avl_balance; + + child->avl_forw = tmp->avl_back; + if (tmp->avl_back) + tmp->avl_back->avl_parent = child; + + tmp->avl_back = child; + child->avl_parent = tmp; + + np->avl_back = tmp->avl_forw; + if (tmp->avl_forw) + tmp->avl_forw->avl_parent = np; + tmp->avl_forw = np; + + if (bal == AVL_FORW) + child->avl_balance = AVL_BACK; + else + child->avl_balance = AVL_BALANCE; + + if (bal == AVL_BACK) + np->avl_balance = AVL_FORW; + else + np->avl_balance = AVL_BALANCE; + + goto next; + } + + ASSERT(np->avl_balance == AVL_FORW && direction == AVL_FORW); + + child = np->avl_forw; + bal = child->avl_balance; + + if (bal != AVL_BACK) /* single RR */ { + /* + * np gets pushed down to greater child's + * avl_back branch. + * + * np-> +B -D + * / \ / \ + * deleted D <-child +B E + * / \ \ + * C E C + cmn_err(CE_CONT, "!RR delete b 0x%x c 0x%x\n", + np, child); + */ + + np->avl_forw = child->avl_back; + if (child->avl_back) + child->avl_back->avl_parent = np; + child->avl_back = np; + + if (parent) { + if (parent->avl_forw == np) { + parent->avl_forw = child; + direction = AVL_BACK; + } else { + ASSERT(parent->avl_back == np); + parent->avl_back = child; + direction = AVL_FORW; + } + } else { + ASSERT(*rootp == np); + *rootp = child; + } + np->avl_parent = child; + child->avl_parent = parent; + + if (bal == AVL_BALANCE) { + np->avl_balance = AVL_FORW; + child->avl_balance = AVL_BACK; + return; + } else { + np->avl_balance = AVL_BALANCE; + child->avl_balance = AVL_BALANCE; + np = parent; + avl64_checktree(tree, *rootp); + continue; + } + } + + /* child->avl_balance == AVL_BACK double RL rotation + cmn_err(CE_CONT, "!RL delete b 0x%x c 0x%x t 0x%x\n", + np, child, child->avl_back); + */ + + tmp = child->avl_back; + bal = tmp->avl_balance; + + child->avl_back = tmp->avl_forw; + if (tmp->avl_forw) + tmp->avl_forw->avl_parent = child; + + tmp->avl_forw = child; + child->avl_parent = tmp; + + np->avl_forw = tmp->avl_back; + if (tmp->avl_back) + tmp->avl_back->avl_parent = np; + tmp->avl_back = np; + + if (bal == AVL_BACK) + child->avl_balance = AVL_FORW; + else + child->avl_balance = AVL_BALANCE; + + if (bal == AVL_FORW) + np->avl_balance = AVL_BACK; + else + np->avl_balance = AVL_BALANCE; +next: + np->avl_parent = tmp; + tmp->avl_balance = AVL_BALANCE; + tmp->avl_parent = parent; + + if (parent) { + if (parent->avl_forw == np) { + parent->avl_forw = tmp; + direction = AVL_BACK; + } else { + ASSERT(parent->avl_back == np); + parent->avl_back = tmp; + direction = AVL_FORW; + } + } else { + ASSERT(*rootp == np); + *rootp = tmp; + return; + } + + np = parent; + avl64_checktree(tree, *rootp); + } while (np); +} + +/* + * Remove node from tree. + * avl_delete does the local tree manipulations, + * calls retreat() to rebalance tree up to its root. + */ +void +avl64_delete( + register avl64tree_desc_t *tree, + register avl64node_t *np) +{ + register avl64node_t *forw = np->avl_forw; + register avl64node_t *back = np->avl_back; + register avl64node_t *parent = np->avl_parent; + register avl64node_t *nnext; + + + if (np->avl_back) { + /* + * a left child exits, then greatest left descendent's nextino + * is pointing to np; make it point to np->nextino. + */ + nnext = np->avl_back; + while (nnext) { + if (!nnext->avl_forw) + break; /* can't find anything bigger */ + nnext = nnext->avl_forw; + } + } else + if (np->avl_parent) { + /* + * find nearest ancestor with lesser value. That ancestor's + * nextino is pointing to np; make it point to np->nextino + */ + nnext = np->avl_parent; + while (nnext) { + if (AVL_END(tree, nnext) <= AVL_END(tree, np)) + break; + nnext = nnext->avl_parent; + } + } else + nnext = NULL; + + if (nnext) { + ASSERT(nnext->avl_nextino == np); + nnext->avl_nextino = np->avl_nextino; + /* + * Something preceeds np; np cannot be firstino. + */ + ASSERT(tree->avl_firstino != np); + } + else { + /* + * Nothing preceeding np; after deletion, np's nextino + * is firstino of tree. + */ + ASSERT(tree->avl_firstino == np); + tree->avl_firstino = np->avl_nextino; + } + + + /* + * Degenerate cases... + */ + if (forw == NULL) { + forw = back; + goto attach; + } + + if (back == NULL) { +attach: + if (forw) + forw->avl_parent = parent; + if (parent) { + if (parent->avl_forw == np) { + parent->avl_forw = forw; + retreat(tree, parent, AVL_BACK); + } else { + ASSERT(parent->avl_back == np); + parent->avl_back = forw; + retreat(tree, parent, AVL_FORW); + } + } else { + ASSERT(tree->avl_root == np); + tree->avl_root = forw; + } + avl64_checktree(tree, tree->avl_root); + return; + } + + /* + * Harder case: children on both sides. + * If back's avl_forw pointer is null, just have back + * inherit np's avl_forw tree, remove np from the tree + * and adjust balance counters starting at back. + * + * np-> xI xH (befor retreat()) + * / \ / \ + * back-> H J G J + * / / \ / \ + * G ? ? ? ? + * / \ + * ? ? + */ + if ((forw = back->avl_forw) == NULL) { + /* + * AVL_FORW retreat below will set back's + * balance to AVL_BACK. + */ + back->avl_balance = np->avl_balance; + back->avl_forw = forw = np->avl_forw; + forw->avl_parent = back; + back->avl_parent = parent; + + if (parent) { + if (parent->avl_forw == np) + parent->avl_forw = back; + else { + ASSERT(parent->avl_back == np); + parent->avl_back = back; + } + } else { + ASSERT(tree->avl_root == np); + tree->avl_root = back; + } + + /* + * back is taking np's place in the tree, and + * has therefore lost a avl_back node (itself). + */ + retreat(tree, back, AVL_FORW); + avl64_checktree(tree, tree->avl_root); + return; + } + + /* + * Hardest case: children on both sides, and back's + * avl_forw pointer isn't null. Find the immediately + * inferior buffer by following back's avl_forw line + * to the end, then have it inherit np's avl_forw tree. + * + * np-> xI xH + * / \ / \ + * G J back-> G J (before retreat()) + * / \ / \ + * F ?... F ?1 + * / \ + * ? H <-forw + * / + * ?1 + */ + while (back = forw->avl_forw) + forw = back; + + /* + * Will be adjusted by retreat() below. + */ + forw->avl_balance = np->avl_balance; + + /* + * forw inherits np's avl_forw... + */ + forw->avl_forw = np->avl_forw; + np->avl_forw->avl_parent = forw; + + /* + * ... forw's parent gets forw's avl_back... + */ + back = forw->avl_parent; + back->avl_forw = forw->avl_back; + if (forw->avl_back) + forw->avl_back->avl_parent = back; + + /* + * ... forw gets np's avl_back... + */ + forw->avl_back = np->avl_back; + np->avl_back->avl_parent = forw; + + /* + * ... and forw gets np's parent. + */ + forw->avl_parent = parent; + + if (parent) { + if (parent->avl_forw == np) + parent->avl_forw = forw; + else + parent->avl_back = forw; + } else { + ASSERT(tree->avl_root == np); + tree->avl_root = forw; + } + + /* + * What used to be forw's parent is the starting + * point for rebalancing. It has lost a avl_forw node. + */ + retreat(tree, back, AVL_BACK); + avl64_checktree(tree, tree->avl_root); +} + + +/* + * avl_findanyrange: + * + * Given range r [start, end), find any range which is contained in r. + * if checklen is non-zero, then only ranges of non-zero length are + * considered in finding a match. + */ +avl64node_t * +avl64_findanyrange( + register avl64tree_desc_t *tree, + register __uint64_t start, + register __uint64_t end, + int checklen) +{ + register avl64node_t *np = tree->avl_root; + + /* np = avl64_findadjacent(tree, start, AVL_SUCCEED); */ + while (np) { + if (start < AVL_START(tree, np)) { + if (np->avl_back) { + np = np->avl_back; + continue; + } + /* if we were to add node with start, would + * have a growth of AVL_BACK + */ + /* if succeeding node is needed, this is it. + */ + break; + } + if (start >= AVL_END(tree, np)) { + if (np->avl_forw) { + np = np->avl_forw; + continue; + } + /* if we were to add node with start, would + * have a growth of AVL_FORW; + */ + /* we are looking for a succeeding node; + * this is nextino. + */ + np = np->avl_nextino; + break; + } + /* AVL_START(tree, np) <= start < AVL_END(tree, np) */ + break; + } + if (np) { + if (checklen == AVL_INCLUDE_ZEROLEN) { + if (end <= AVL_START(tree, np)) { + /* something follows start, but is + * is entierly after the range (end) + */ + return(NULL); + } + /* np may stradle [start, end) */ + return(np); + } + /* + * find non-zero length region + */ + while (np && (AVL_END(tree, np) - AVL_START(tree, np) == 0) + && (AVL_START(tree, np) < end)) + np = np->avl_nextino; + + if ((np == NULL) || (AVL_START(tree, np) >= end)) + return NULL; + return(np); + } + /* + * nothing succeeds start, all existing ranges are before start. + */ + return NULL; +} + + +/* + * Returns a pointer to range which contains value. + */ +avl64node_t * +avl64_findrange( + register avl64tree_desc_t *tree, + register __uint64_t value) +{ + register avl64node_t *np = tree->avl_root; + + while (np) { + if (value < AVL_START(tree, np)) { + np = np->avl_back; + continue; + } + if (value >= AVL_END(tree, np)) { + np = np->avl_forw; + continue; + } + ASSERT(AVL_START(tree, np) <= value && + value < AVL_END(tree, np)); + return np; + } + return NULL; +} + + +/* + * Returns a pointer to node which contains exact value. + */ +avl64node_t * +avl64_find( + register avl64tree_desc_t *tree, + register __uint64_t value) +{ + register avl64node_t *np = tree->avl_root; + register __uint64_t nvalue; + + while (np) { + nvalue = AVL_START(tree, np); + if (value < nvalue) { + np = np->avl_back; + continue; + } + if (value == nvalue) { + return np; + } + np = np->avl_forw; + } + return NULL; +} + + +/* + * Balance buffer AVL tree after attaching a new node to root. + * Called only by avl_insert. + */ +static void +avl64_balance( + register avl64node_t **rootp, + register avl64node_t *np, + register int growth) +{ + /* + * At this point, np points to the node to which + * a new node has been attached. All that remains is to + * propagate avl_balance up the tree. + */ + for ( ; ; ) { + register avl64node_t *parent = np->avl_parent; + register avl64node_t *child; + + CERT(growth == AVL_BACK || growth == AVL_FORW); + + /* + * If the buffer was already balanced, set avl_balance + * to the new direction. Continue if there is a + * parent after setting growth to reflect np's + * relation to its parent. + */ + if (np->avl_balance == AVL_BALANCE) { + np->avl_balance = growth; + if (parent) { + if (parent->avl_forw == np) + growth = AVL_FORW; + else { + ASSERT(parent->avl_back == np); + growth = AVL_BACK; + } + + np = parent; + continue; + } + break; + } + + if (growth != np->avl_balance) { + /* + * Subtree is now balanced -- no net effect + * in the size of the subtree, so leave. + */ + np->avl_balance = AVL_BALANCE; + break; + } + + if (growth == AVL_BACK) { + + child = np->avl_back; + CERT(np->avl_balance == AVL_BACK && child); + + if (child->avl_balance == AVL_BACK) { /* single LL */ + /* + * ``A'' just got inserted; + * np points to ``E'', child to ``C'', + * and it is already AVL_BACK -- + * child will get promoted to top of subtree. + + np-> -E C + / \ / \ + child-> -C F -B E + / \ / / \ + -B D A D F + / + A + + Note that child->avl_parent and + avl_balance get set in common code. + */ + np->avl_parent = child; + np->avl_balance = AVL_BALANCE; + np->avl_back = child->avl_forw; + if (child->avl_forw) + child->avl_forw->avl_parent = np; + child->avl_forw = np; + } else { + /* + * double LR + * + * child's avl_forw node gets promoted to + * the top of the subtree. + + np-> -E C + / \ / \ + child-> +B F -B E + / \ / / \ + A +C A D F + \ + D + + */ + register avl64node_t *tmp = child->avl_forw; + + CERT(child->avl_balance == AVL_FORW && tmp); + + child->avl_forw = tmp->avl_back; + if (tmp->avl_back) + tmp->avl_back->avl_parent = child; + + tmp->avl_back = child; + child->avl_parent = tmp; + + np->avl_back = tmp->avl_forw; + if (tmp->avl_forw) + tmp->avl_forw->avl_parent = np; + + tmp->avl_forw = np; + np->avl_parent = tmp; + + if (tmp->avl_balance == AVL_BACK) + np->avl_balance = AVL_FORW; + else + np->avl_balance = AVL_BALANCE; + + if (tmp->avl_balance == AVL_FORW) + child->avl_balance = AVL_BACK; + else + child->avl_balance = AVL_BALANCE; + + /* + * Set child to point to tmp since it is + * now the top of the subtree, and will + * get attached to the subtree parent in + * the common code below. + */ + child = tmp; + } + + } else /* growth == AVL_BACK */ { + + /* + * This code is the mirror image of AVL_FORW above. + */ + + child = np->avl_forw; + CERT(np->avl_balance == AVL_FORW && child); + + if (child->avl_balance == AVL_FORW) { /* single RR */ + np->avl_parent = child; + np->avl_balance = AVL_BALANCE; + np->avl_forw = child->avl_back; + if (child->avl_back) + child->avl_back->avl_parent = np; + child->avl_back = np; + } else { + /* + * double RL + */ + register avl64node_t *tmp = child->avl_back; + + ASSERT(child->avl_balance == AVL_BACK && tmp); + + child->avl_back = tmp->avl_forw; + if (tmp->avl_forw) + tmp->avl_forw->avl_parent = child; + + tmp->avl_forw = child; + child->avl_parent = tmp; + + np->avl_forw = tmp->avl_back; + if (tmp->avl_back) + tmp->avl_back->avl_parent = np; + + tmp->avl_back = np; + np->avl_parent = tmp; + + if (tmp->avl_balance == AVL_FORW) + np->avl_balance = AVL_BACK; + else + np->avl_balance = AVL_BALANCE; + + if (tmp->avl_balance == AVL_BACK) + child->avl_balance = AVL_FORW; + else + child->avl_balance = AVL_BALANCE; + + child = tmp; + } + } + + child->avl_parent = parent; + child->avl_balance = AVL_BALANCE; + + if (parent) { + if (parent->avl_back == np) + parent->avl_back = child; + else + parent->avl_forw = child; + } else { + ASSERT(*rootp == np); + *rootp = child; + } + + break; + } +} + +static +avl64node_t * +avl64_insert_find_growth( + register avl64tree_desc_t *tree, + register __uint64_t start, /* range start at start, */ + register __uint64_t end, /* exclusive */ + register int *growthp) /* OUT */ +{ + avl64node_t *root = tree->avl_root; + register avl64node_t *np; + + np = root; + ASSERT(np); /* caller ensures that there is atleast one node in tree */ + + for ( ; ; ) { + CERT(np->avl_parent || root == np); + CERT(!np->avl_parent || root != np); + CERT(!(np->avl_back) || np->avl_back->avl_parent == np); + CERT(!(np->avl_forw) || np->avl_forw->avl_parent == np); + CERT(np->avl_balance != AVL_FORW || np->avl_forw); + CERT(np->avl_balance != AVL_BACK || np->avl_back); + CERT(np->avl_balance != AVL_BALANCE || + np->avl_back == NULL || np->avl_forw); + CERT(np->avl_balance != AVL_BALANCE || + np->avl_forw == NULL || np->avl_back); + + if (AVL_START(tree, np) >= end) { + if (np->avl_back) { + np = np->avl_back; + continue; + } + *growthp = AVL_BACK; + break; + } + + if (AVL_END(tree, np) <= start) { + if (np->avl_forw) { + np = np->avl_forw; + continue; + } + *growthp = AVL_FORW; + break; + } + /* found exact match -- let caller decide if it is an error */ + return(NULL); + } + return(np); +} + + +static void +avl64_insert_grow( + register avl64tree_desc_t *tree, + register avl64node_t *parent, + register avl64node_t *newnode, + register int growth) +{ + register avl64node_t *nnext; + register __uint64_t start = AVL_START(tree, newnode); + + if (growth == AVL_BACK) { + + parent->avl_back = newnode; + /* + * we are growing to the left; previous in-order to newnode is + * closest ancestor with lesser value. Before this + * insertion, this ancestor will be pointing to + * newnode's parent. After insertion, next in-order to newnode + * is the parent. + */ + newnode->avl_nextino = parent; + nnext = parent; + while (nnext) { + if (AVL_END(tree, nnext) <= start) + break; + nnext = nnext->avl_parent; + } + if (nnext) { + /* + * nnext will be null if newnode is + * the least element, and hence very first in the list. + */ + ASSERT(nnext->avl_nextino == parent); + nnext->avl_nextino = newnode; + } + } + else { + parent->avl_forw = newnode; + newnode->avl_nextino = parent->avl_nextino; + parent->avl_nextino = newnode; + } +} + + +avl64node_t * +avl64_insert( + register avl64tree_desc_t *tree, + register avl64node_t *newnode) +{ + register avl64node_t *np; + register __uint64_t start = AVL_START(tree, newnode); + register __uint64_t end = AVL_END(tree, newnode); + int growth; + + ASSERT(newnode); + /* + * Clean all pointers for sanity; some will be reset as necessary. + */ + newnode->avl_nextino = NULL; + newnode->avl_parent = NULL; + newnode->avl_forw = NULL; + newnode->avl_back = NULL; + newnode->avl_balance = AVL_BALANCE; + + if ((np = tree->avl_root) == NULL) { /* degenerate case... */ + tree->avl_root = newnode; + tree->avl_firstino = newnode; + return newnode; + } + + if ((np = avl64_insert_find_growth(tree, start, end, &growth)) + == NULL) { + if (start != end) { /* non-zero length range */ +#ifdef AVL_USER_MODE + printf("avl_insert: Warning! duplicate range [0x%llx,0x%llx)\n", + start, end); +#else + cmn_err(CE_CONT, + "!avl_insert: Warning! duplicate range [0x%llx,0x%llx)\n", + start, end); +#endif + } + return(NULL); + } + + avl64_insert_grow(tree, np, newnode, growth); + if (growth == AVL_BACK) { + /* + * Growing to left. if np was firstino, newnode will be firstino + */ + if (tree->avl_firstino == np) + tree->avl_firstino = newnode; + } +#ifdef notneeded + else + if (growth == AVL_FORW) + /* + * Cannot possibly be firstino; there is somebody to our left. + */ + ; +#endif + + newnode->avl_parent = np; + CERT(np->avl_forw == newnode || np->avl_back == newnode); + + avl64_balance(&tree->avl_root, np, growth); + + avl64_checktree(tree, tree->avl_root); + + return newnode; +} + +/* + * + * avl64_insert_immediate(tree, afterp, newnode): + * insert newnode immediately into tree immediately after afterp. + * after insertion, newnode is right child of afterp. + */ +void +avl64_insert_immediate( + avl64tree_desc_t *tree, + avl64node_t *afterp, + avl64node_t *newnode) +{ + /* + * Clean all pointers for sanity; some will be reset as necessary. + */ + newnode->avl_nextino = NULL; + newnode->avl_parent = NULL; + newnode->avl_forw = NULL; + newnode->avl_back = NULL; + newnode->avl_balance = AVL_BALANCE; + + if (afterp == NULL) { + tree->avl_root = newnode; + tree->avl_firstino = newnode; + return; + } + + ASSERT(afterp->avl_forw == NULL); + avl64_insert_grow(tree, afterp, newnode, AVL_FORW); /* grow to right */ + CERT(afterp->avl_forw == newnode); + avl64_balance(&tree->avl_root, afterp, AVL_FORW); + avl64_checktree(tree, tree->avl_root); +} + + +/* + * Returns first in order node + */ +avl64node_t * +avl64_firstino(register avl64node_t *root) +{ + register avl64node_t *np; + + if ((np = root) == NULL) + return NULL; + + while (np->avl_back) + np = np->avl_back; + return np; +} + +#ifdef AVL_USER_MODE +/* + * leave this as a user-mode only routine until someone actually + * needs it in the kernel + */ + +/* + * Returns last in order node + */ +avl64node_t * +avl64_lastino(register avl64node_t *root) +{ + register avl64node_t *np; + + if ((np = root) == NULL) + return NULL; + + while (np->avl_forw) + np = np->avl_forw; + return np; +} +#endif + +void +avl64_init_tree(avl64tree_desc_t *tree, avl64ops_t *ops) +{ + tree->avl_root = NULL; + tree->avl_firstino = NULL; + tree->avl_ops = ops; +} + +#ifdef AVL_DEBUG +static void +avl64_printnode(avl64tree_desc_t *tree, avl64node_t *np, int nl) +{ + printf("[%d-%d]%c", AVL_START(tree, np), + (AVL_END(tree, np) - 1), nl ? '\n' : ' '); +} +#endif +#ifdef STAND_ALONE_DEBUG + +struct avl_debug_node { + avl64node_t avl_node; + xfs_off_t avl_start; + unsigned int avl_size; +} + +avl64ops_t avl_debug_ops = { + avl_debug_start, + avl_debug_end, +} + +static __uint64_t +avl64_debug_start(avl64node_t *node) +{ + return (__uint64_t)(struct avl_debug_node *)node->avl_start; +} + +static __uint64_t +avl64_debug_end(avl64node_t *node) +{ + return (__uint64_t) + ((struct avl_debug_node *)node->avl_start + + (struct avl_debug_node *)node->avl_size); +} + +avl_debug_node freenodes[100]; +avl_debug_node *freehead = &freenodes[0]; + +static avl64node_t * +alloc_avl64_debug_node() +{ + freehead->avl_balance = AVL_BALANCE; + freehead->avl_parent = freehead->avl_forw = freehead->avl_back = NULL; + return(freehead++); +} + +static void +avl64_print(avl64tree_desc_t *tree, avl64node_t *root, int depth) +{ + int i; + + if (!root) + return; + if (root->avl_forw) + avl64_print(tree, root->avl_forw, depth+5); + for (i = 0; i < depth; i++) + putchar((int) ' '); + avl64_printnode(tree, root,1); + if (root->avl_back) + avl64_print(tree, root->avl_back, depth+5); +} + +main() +{ + int i, j; + avl64node_t *np; + avl64tree_desc_t tree; + char linebuf[256], cmd[256]; + + avl64_init_tree(&tree, &avl_debug_ops); + + for (i = 100; i > 0; i = i - 10) + { + np = alloc__debug_avlnode(); + ASSERT(np); + np->avl_start = i; + np->avl_size = 10; + avl64_insert(&tree, np); + } + avl64_print(&tree, tree.avl_root, 0); + + for (np = tree.avl_firstino; np != NULL; np = np->avl_nextino) + avl64_printnode(&tree, np, 0); + printf("\n"); + + while (1) { + printf("Command [fpdir] : "); + fgets(linebuf, 256, stdin); + if (feof(stdin)) break; + cmd[0] = NULL; + if (sscanf(linebuf, "%[fpdir]%d", cmd, &i) != 2) + continue; + switch (cmd[0]) { + case 'd': + case 'f': + printf("end of range ? "); + fgets(linebuf, 256, stdin); + j = atoi(linebuf); + + if (i == j) j = i+1; + np = avl64_findinrange(&tree,i,j); + if (np) { + avl64_printnode(&tree, np, 1); + if (cmd[0] == 'd') + avl64_delete(&tree, np); + } else + printf("Cannot find %d\n", i); + break; + case 'p': + avl64_print(&tree, tree.avl_root, 0); + for (np = tree.avl_firstino; + np != NULL; np = np->avl_nextino) + avl64_printnode(&tree, np, 0); + printf("\n"); + break; + case 'i': + np = alloc_avlnode(); + ASSERT(np); + np->avl_start = i; + printf("size of range ? "); + fgets(linebuf, 256, stdin); + j = atoi(linebuf); + + np->avl_size = j; + avl64_insert(&tree, np); + break; + case 'r': { + avl64node_t *b, *e, *t; + int checklen; + + printf("End of range ? "); + fgets(linebuf, 256, stdin); + j = atoi(linebuf); + + printf("checklen 0/1 ? "); + fgets(linebuf, 256, stdin); + checklen = atoi(linebuf); + + + b = avl64_findanyrange(&tree, i, j, checklen); + if (b) { + printf("Found something\n"); + t = b; + while (t) { + if (t != b && + AVL_START(&tree, t) >= j) + break; + avl64_printnode(&tree, t, 0); + t = t->avl_nextino; + } + printf("\n"); + } + } + } + } +} +#endif + +/* + * Given a tree, find value; will find return range enclosing value, + * or range immediately succeeding value, + * or range immediately preceeding value. + */ +avl64node_t * +avl64_findadjacent( + register avl64tree_desc_t *tree, + register __uint64_t value, + register int dir) +{ + register avl64node_t *np = tree->avl_root; + + while (np) { + if (value < AVL_START(tree, np)) { + if (np->avl_back) { + np = np->avl_back; + continue; + } + /* if we were to add node with value, would + * have a growth of AVL_BACK + */ + if (dir == AVL_SUCCEED) { + /* if succeeding node is needed, this is it. + */ + return(np); + } + if (dir == AVL_PRECEED) { + /* + * find nearest ancestor with lesser value. + */ + np = np->avl_parent; + while (np) { + if (AVL_END(tree, np) <= value) + break; + np = np->avl_parent; + } + return(np); + } + ASSERT(dir == AVL_SUCCEED || dir == AVL_PRECEED); + break; + } + if (value >= AVL_END(tree, np)) { + if (np->avl_forw) { + np = np->avl_forw; + continue; + } + /* if we were to add node with value, would + * have a growth of AVL_FORW; + */ + if (dir == AVL_SUCCEED) { + /* we are looking for a succeeding node; + * this is nextino. + */ + return(np->avl_nextino); + } + if (dir == AVL_PRECEED) { + /* looking for a preceeding node; this is it. */ + return(np); + } + ASSERT(dir == AVL_SUCCEED || dir == AVL_PRECEED); + } + /* AVL_START(tree, np) <= value < AVL_END(tree, np) */ + return(np); + } + return NULL; +} + + +#ifdef AVL_FUTURE_ENHANCEMENTS +/* + * avl_findranges: + * + * Given range r [start, end), find all ranges in tree which are contained + * in r. At return, startp and endp point to first and last of + * a chain of elements which describe the contained ranges. Elements + * in startp ... endp are in sort order, and can be accessed by + * using avl_nextino. + */ + +void +avl64_findranges( + register avl64tree_desc_t *tree, + register __uint64_t start, + register __uint64_t end, + avl64node_t **startp, + avl64node_t **endp) +{ + register avl64node_t *np; + + np = avl64_findadjacent(tree, start, AVL_SUCCEED); + if (np == NULL /* nothing succeding start */ + || (np && (end <= AVL_START(tree, np)))) + /* something follows start, + but... is entirely after end */ + { + *startp = NULL; + *endp = NULL; + return; + } + + *startp = np; + + /* see if end is in this region itself */ + if (end <= AVL_END(tree, np) || + np->avl_nextino == NULL || + (np->avl_nextino && + (end <= AVL_START(tree, np->avl_nextino)))) { + *endp = np; + return; + } + /* have to munge for end */ + /* + * note: have to look for (end - 1), since + * findadjacent will look for exact value, and does not + * care about the fact that end is actually one more + * than the value actually being looked for; thus feed it one less. + */ + *endp = avl64_findadjacent(tree, (end-1), AVL_PRECEED); + ASSERT(*endp); +} + +#endif /* AVL_FUTURE_ENHANCEMENTS */ diff --git a/repair/avl64.h b/repair/avl64.h new file mode 100644 index 000000000..26ed977c3 --- /dev/null +++ b/repair/avl64.h @@ -0,0 +1,151 @@ +/************************************************************************** + * * + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + * * + **************************************************************************/ +#ifndef __XR_AVL64_H__ +#define __XR_AVL64_H__ + +#include + +typedef struct avl64node { + struct avl64node *avl_forw; /* pointer to right child (> parent) */ + struct avl64node *avl_back; /* pointer to left child (< parent) */ + struct avl64node *avl_parent; /* parent pointer */ + struct avl64node *avl_nextino; /* next in-order; NULL terminated list*/ + char avl_balance; /* tree balance */ +} avl64node_t; + +/* + * avl-tree operations + */ +typedef struct avl64ops { + __uint64_t (*avl_start)(avl64node_t *); + __uint64_t (*avl_end)(avl64node_t *); +} avl64ops_t; + +/* + * avoid complaints about multiple def's since these are only used by + * the avl code internally + */ +#ifndef AVL_START +#define AVL_START(tree, n) (*(tree)->avl_ops->avl_start)(n) +#define AVL_END(tree, n) (*(tree)->avl_ops->avl_end)(n) +#endif + +/* + * tree descriptor: + * root points to the root of the tree. + * firstino points to the first in the ordered list. + */ +typedef struct avl64tree_desc { + avl64node_t *avl_root; + avl64node_t *avl_firstino; + avl64ops_t *avl_ops; +} avl64tree_desc_t; + +/* possible values for avl_balance */ + +#define AVL_BACK 1 +#define AVL_BALANCE 0 +#define AVL_FORW 2 + +/* + * 'Exported' avl tree routines + */ +avl64node_t +*avl64_insert( + avl64tree_desc_t *tree, + avl64node_t *newnode); + +void +avl64_delete( + avl64tree_desc_t *tree, + avl64node_t *np); + +void +avl64_insert_immediate( + avl64tree_desc_t *tree, + avl64node_t *afterp, + avl64node_t *newnode); + +void +avl64_init_tree( + avl64tree_desc_t *tree, + avl64ops_t *ops); + +avl64node_t * +avl64_findrange( + avl64tree_desc_t *tree, + __uint64_t value); + +avl64node_t * +avl64_find( + avl64tree_desc_t *tree, + __uint64_t value); + +avl64node_t * +avl64_findanyrange( + avl64tree_desc_t *tree, + __uint64_t start, + __uint64_t end, + int checklen); + + +avl64node_t * +avl64_findadjacent( + avl64tree_desc_t *tree, + __uint64_t value, + int dir); + +#ifdef AVL_FUTURE_ENHANCEMENTS +void +avl64_findranges( + register avl64tree_desc_t *tree, + register __uint64_t start, + register __uint64_t end, + avl64node_t **startp, + avl64node_t **endp); +#endif + +/* + * avoid complaints about multiple def's since these are only used by + * the avl code internally + */ +#ifndef AVL_PRECEED +#define AVL_PRECEED 0x1 +#define AVL_SUCCEED 0x2 + +#define AVL_INCLUDE_ZEROLEN 0x0000 +#define AVL_EXCLUDE_ZEROLEN 0x0001 +#endif + +#endif /* __XR_AVL64_H__ */ diff --git a/repair/bmap.c b/repair/bmap.c new file mode 100644 index 000000000..47c8bbf1c --- /dev/null +++ b/repair/bmap.c @@ -0,0 +1,409 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "err_protos.h" +#include "bmap.h" + +/* + * Block mapping code taken from xfs_db. + */ + +/* + * Append an extent to the block entry. + */ +void +blkent_append( + blkent_t **entp, + xfs_dfsbno_t b, + xfs_dfilblks_t c) +{ + blkent_t *ent; + size_t size; + int i; + + ent = *entp; + size = BLKENT_SIZE(c + ent->nblks); + if ((*entp = ent = realloc(ent, size)) == NULL) { + do_warn("realloc failed in blkent_append (%u bytes)\n", size); + return; + } + for (i = 0; i < c; i++) + ent->blks[ent->nblks + i] = b + i; + ent->nblks += c; +} + +/* + * Make a new block entry. + */ +blkent_t * +blkent_new( + xfs_dfiloff_t o, + xfs_dfsbno_t b, + xfs_dfilblks_t c) +{ + blkent_t *ent; + int i; + + if ((ent = malloc(BLKENT_SIZE(c))) == NULL) { + do_warn("malloc failed in blkent_new (%u bytes)\n", + BLKENT_SIZE(c)); + return ent; + } + ent->nblks = c; + ent->startoff = o; + for (i = 0; i < c; i++) + ent->blks[i] = b + i; + return ent; +} + +/* + * Prepend an extent to the block entry. + */ +void +blkent_prepend( + blkent_t **entp, + xfs_dfsbno_t b, + xfs_dfilblks_t c) +{ + int i; + blkent_t *newent; + blkent_t *oldent; + + oldent = *entp; + if ((newent = malloc(BLKENT_SIZE(oldent->nblks + c))) == NULL) { + do_warn("malloc failed in blkent_prepend (%u bytes)\n", + BLKENT_SIZE(oldent->nblks + c)); + *entp = newent; + return; + } + newent->nblks = oldent->nblks + c; + newent->startoff = oldent->startoff - c; + for (i = 0; i < c; i++) + newent->blks[i] = b + c; + for (; i < oldent->nblks + c; i++) + newent->blks[i] = oldent->blks[i - c]; + free(oldent); + *entp = newent; +} + +/* + * Allocate a block map. + */ +blkmap_t * +blkmap_alloc( + xfs_extnum_t nex) +{ + blkmap_t *blkmap; + + if (nex < 1) + nex = 1; + if ((blkmap = malloc(BLKMAP_SIZE(nex))) == NULL) { + do_warn("malloc failed in blkmap_alloc (%u bytes)\n", + BLKMAP_SIZE(nex)); + return blkmap; + } + blkmap->naents = nex; + blkmap->nents = 0; + return blkmap; +} + +/* + * Free a block map. + */ +void +blkmap_free( + blkmap_t *blkmap) +{ + blkent_t **entp; + xfs_extnum_t i; + + if (blkmap == NULL) + return; + for (i = 0, entp = blkmap->ents; i < blkmap->nents; i++, entp++) + free(*entp); + free(blkmap); +} + +/* + * Get one entry from a block map. + */ +xfs_dfsbno_t +blkmap_get( + blkmap_t *blkmap, + xfs_dfiloff_t o) +{ + blkent_t *ent; + blkent_t **entp; + int i; + + for (i = 0, entp = blkmap->ents; i < blkmap->nents; i++, entp++) { + ent = *entp; + if (o >= ent->startoff && o < ent->startoff + ent->nblks) + return ent->blks[o - ent->startoff]; + } + return NULLDFSBNO; +} + +/* + * Get a chunk of entries from a block map. + */ +int +blkmap_getn( + blkmap_t *blkmap, + xfs_dfiloff_t o, + xfs_dfilblks_t nb, + bmap_ext_t **bmpp) +{ + bmap_ext_t *bmp; + blkent_t *ent; + xfs_dfiloff_t ento; + blkent_t **entp; + int i; + int nex; + + for (i = nex = 0, bmp = NULL, entp = blkmap->ents; + i < blkmap->nents; + i++, entp++) { + ent = *entp; + if (ent->startoff >= o + nb) + break; + if (ent->startoff + ent->nblks <= o) + continue; + for (ento = ent->startoff; + ento < ent->startoff + ent->nblks && ento < o + nb; + ento++) { + if (ento < o) + continue; + if (bmp && + bmp[nex - 1].startoff + bmp[nex - 1].blockcount == + ento && + bmp[nex - 1].startblock + bmp[nex - 1].blockcount == + ent->blks[ento - ent->startoff]) + bmp[nex - 1].blockcount++; + else { + bmp = realloc(bmp, ++nex * sizeof(*bmp)); + if (bmp == NULL) { + do_warn("realloc failed in blkmap_getn" + " (%u bytes)\n", + nex * sizeof(*bmp)); + continue; + } + bmp[nex - 1].startoff = ento; + bmp[nex - 1].startblock = + ent->blks[ento - ent->startoff]; + bmp[nex - 1].blockcount = 1; + bmp[nex - 1].flag = 0; + } + } + } + *bmpp = bmp; + return nex; +} + +/* + * Make a block map larger. + */ +void +blkmap_grow( + blkmap_t **blkmapp, + blkent_t **entp, + blkent_t *newent) +{ + blkmap_t *blkmap; + size_t size; + int i; + int idx; + + blkmap = *blkmapp; + idx = (int)(entp - blkmap->ents); + if (blkmap->naents == blkmap->nents) { + size = BLKMAP_SIZE(blkmap->nents + 1); + if ((*blkmapp = blkmap = realloc(blkmap, size)) == NULL) { + do_warn("realloc failed in blkmap_grow (%u bytes)\n", + size); + return; + } + blkmap->naents++; + } + for (i = blkmap->nents; i > idx; i--) + blkmap->ents[i] = blkmap->ents[i - 1]; + blkmap->ents[idx] = newent; + blkmap->nents++; +} + +/* + * Return the last offset in a block map. + */ +xfs_dfiloff_t +blkmap_last_off( + blkmap_t *blkmap) +{ + blkent_t *ent; + + if (!blkmap->nents) + return NULLDFILOFF; + ent = blkmap->ents[blkmap->nents - 1]; + return ent->startoff + ent->nblks; +} + +/* + * Return the next offset in a block map. + */ +xfs_dfiloff_t +blkmap_next_off( + blkmap_t *blkmap, + xfs_dfiloff_t o, + int *t) +{ + blkent_t *ent; + blkent_t **entp; + + if (!blkmap->nents) + return NULLDFILOFF; + if (o == NULLDFILOFF) { + *t = 0; + ent = blkmap->ents[0]; + return ent->startoff; + } + entp = &blkmap->ents[*t]; + ent = *entp; + if (o < ent->startoff + ent->nblks - 1) + return o + 1; + entp++; + if (entp >= &blkmap->ents[blkmap->nents]) + return NULLDFILOFF; + (*t)++; + ent = *entp; + return ent->startoff; +} + +/* + * Set a block value in a block map. + */ +void +blkmap_set_blk( + blkmap_t **blkmapp, + xfs_dfiloff_t o, + xfs_dfsbno_t b) +{ + blkmap_t *blkmap; + blkent_t *ent; + blkent_t **entp; + blkent_t *nextent; + + blkmap = *blkmapp; + for (entp = blkmap->ents; entp < &blkmap->ents[blkmap->nents]; entp++) { + ent = *entp; + if (o < ent->startoff - 1) { + ent = blkent_new(o, b, 1); + blkmap_grow(blkmapp, entp, ent); + return; + } + if (o == ent->startoff - 1) { + blkent_prepend(entp, b, 1); + return; + } + if (o >= ent->startoff && o < ent->startoff + ent->nblks) { + ent->blks[o - ent->startoff] = b; + return; + } + if (o > ent->startoff + ent->nblks) + continue; + blkent_append(entp, b, 1); + if (entp == &blkmap->ents[blkmap->nents - 1]) + return; + ent = *entp; + nextent = entp[1]; + if (ent->startoff + ent->nblks < nextent->startoff) + return; + blkent_append(entp, nextent->blks[0], nextent->nblks); + blkmap_shrink(blkmap, &entp[1]); + return; + } + ent = blkent_new(o, b, 1); + blkmap_grow(blkmapp, entp, ent); +} + +/* + * Set an extent into a block map. + */ +void +blkmap_set_ext( + blkmap_t **blkmapp, + xfs_dfiloff_t o, + xfs_dfsbno_t b, + xfs_dfilblks_t c) +{ + blkmap_t *blkmap; + blkent_t *ent; + blkent_t **entp; + xfs_extnum_t i; + + blkmap = *blkmapp; + if (!blkmap->nents) { + blkmap->ents[0] = blkent_new(o, b, c); + blkmap->nents = 1; + return; + } + entp = &blkmap->ents[blkmap->nents - 1]; + ent = *entp; + if (ent->startoff + ent->nblks == o) { + blkent_append(entp, b, c); + return; + } + if (ent->startoff + ent->nblks < o) { + ent = blkent_new(o, b, c); + blkmap_grow(blkmapp, &blkmap->ents[blkmap->nents], ent); + return; + } + for (i = 0; i < c; i++) + blkmap_set_blk(blkmapp, o + i, b + i); +} + +/* + * Make a block map smaller. + */ +void +blkmap_shrink( + blkmap_t *blkmap, + blkent_t **entp) +{ + int i; + int idx; + + free(*entp); + idx = (int)(entp - blkmap->ents); + for (i = idx + 1; i < blkmap->nents; i++) + blkmap->ents[i] = blkmap->ents[i - 1]; + blkmap->nents--; +} diff --git a/repair/bmap.h b/repair/bmap.h new file mode 100644 index 000000000..0b184ee6e --- /dev/null +++ b/repair/bmap.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +/* + * Block mapping code taken from xfs_db. + */ + +/* + * Block map entry. + */ +typedef struct blkent { + xfs_dfiloff_t startoff; + xfs_dfilblks_t nblks; + xfs_dfsbno_t blks[1]; +} blkent_t; +#define BLKENT_SIZE(n) \ + (offsetof(blkent_t, blks) + (sizeof(xfs_dfsbno_t) * (n))) + +/* + * Block map. + */ +typedef struct blkmap { + int naents; + int nents; + blkent_t *ents[1]; +} blkmap_t; +#define BLKMAP_SIZE(n) \ + (offsetof(blkmap_t, ents) + (sizeof(blkent_t *) * (n))) + +/* + * Extent descriptor. + */ +typedef struct bmap_ext { + xfs_dfiloff_t startoff; + xfs_dfsbno_t startblock; + xfs_dfilblks_t blockcount; + int flag; +} bmap_ext_t; + +void blkent_append(blkent_t **entp, xfs_dfsbno_t b, + xfs_dfilblks_t c); +blkent_t *blkent_new(xfs_dfiloff_t o, xfs_dfsbno_t b, xfs_dfilblks_t c); +void blkent_prepend(blkent_t **entp, xfs_dfsbno_t b, + xfs_dfilblks_t c); +blkmap_t *blkmap_alloc(xfs_extnum_t); +void blkmap_free(blkmap_t *blkmap); +xfs_dfsbno_t blkmap_get(blkmap_t *blkmap, xfs_dfiloff_t o); +int blkmap_getn(blkmap_t *blkmap, xfs_dfiloff_t o, + xfs_dfilblks_t nb, bmap_ext_t **bmpp); +void blkmap_grow(blkmap_t **blkmapp, blkent_t **entp, + blkent_t *newent); +xfs_dfiloff_t blkmap_last_off(blkmap_t *blkmap); +xfs_dfiloff_t blkmap_next_off(blkmap_t *blkmap, xfs_dfiloff_t o, int *t); +void blkmap_set_blk(blkmap_t **blkmapp, xfs_dfiloff_t o, + xfs_dfsbno_t b); +void blkmap_set_ext(blkmap_t **blkmapp, xfs_dfiloff_t o, + xfs_dfsbno_t b, xfs_dfilblks_t c); +void blkmap_shrink(blkmap_t *blkmap, blkent_t **entp); diff --git a/repair/dino_chunks.c b/repair/dino_chunks.c new file mode 100644 index 000000000..90d2e9f93 --- /dev/null +++ b/repair/dino_chunks.c @@ -0,0 +1,1178 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "avl.h" +#include "globals.h" +#include "agheader.h" +#include "incore.h" +#include "protos.h" +#include "err_protos.h" +#include "dir.h" +#include "dinode.h" +#include "versions.h" + +/* + * validates inode block or chunk, returns # of good inodes + * the dinodes are verified using verify_uncertain_dinode() which + * means only the basic inode info is checked, no fork checks. + */ + +int +check_aginode_block(xfs_mount_t *mp, + xfs_agnumber_t agno, + xfs_agblock_t agbno) +{ + + xfs_dinode_t *dino_p; + int i; + int cnt = 0; + xfs_buf_t *bp; + + /* + * it's ok to read these possible inode blocks in one at + * a time because they don't belong to known inodes (if + * they did, we'd know about them courtesy of the incore inode + * tree and we wouldn't be here and we stale the buffers out + * so no one else will overlap them. + */ + bp = libxfs_readbuf(mp->m_dev, XFS_AGB_TO_DADDR(mp, agno, agbno), + XFS_FSB_TO_BB(mp, 1), 0); + if (!bp) { + do_warn("cannot read agbno (%u/%u), disk block %lld\n", agno, + agbno, (xfs_daddr_t)XFS_AGB_TO_DADDR(mp, agno, agbno)); + return(0); + } + + for (i = 0; i < mp->m_sb.sb_inopblock; i++) { + dino_p = XFS_MAKE_IPTR(mp, bp, i); + if (!verify_uncertain_dinode(mp, dino_p, agno, + XFS_OFFBNO_TO_AGINO(mp, agbno, i))) + cnt++; + } + + libxfs_putbuf(bp); + return(cnt); +} + +int +check_inode_block(xfs_mount_t *mp, + xfs_ino_t ino) +{ + return(check_aginode_block(mp, XFS_INO_TO_AGNO(mp, ino), + XFS_INO_TO_AGBNO(mp, ino))); +} + +/* + * tries to establish if the inode really exists in a valid + * inode chunk. returns number of new inodes if things are good + * and 0 if bad. start is the start of the discovered inode chunk. + * routine assumes that ino is a legal inode number + * (verified by verify_inum()). If the inode chunk turns out + * to be good, this routine will put the inode chunk into + * the good inode chunk tree if required. + * + * the verify_(ag)inode* family of routines are utility + * routines called by check_uncertain_aginodes() and + * process_uncertain_aginodes(). + */ +int +verify_inode_chunk(xfs_mount_t *mp, + xfs_ino_t ino, + xfs_ino_t *start_ino) +{ + xfs_agnumber_t agno; + xfs_agino_t agino; + xfs_agino_t start_agino; + xfs_agblock_t agbno; + xfs_agblock_t start_agbno = 0; + xfs_agblock_t end_agbno; + xfs_agblock_t max_agbno; + xfs_agblock_t cur_agbno; + xfs_agblock_t chunk_start_agbno; + xfs_agblock_t chunk_stop_agbno; + ino_tree_node_t *irec_before_p = NULL; + ino_tree_node_t *irec_after_p = NULL; + ino_tree_node_t *irec_p; + ino_tree_node_t *irec_next_p; + int irec_cnt; + int ino_cnt = 0; + int num_blks; + int i; + int j; + int state; + + agno = XFS_INO_TO_AGNO(mp, ino); + agino = XFS_INO_TO_AGINO(mp, ino); + agbno = XFS_INO_TO_AGBNO(mp, ino); + *start_ino = NULLFSINO; + + ASSERT(XFS_IALLOC_BLOCKS(mp) > 0); + + if (agno == mp->m_sb.sb_agcount - 1) + max_agbno = mp->m_sb.sb_dblocks - + (xfs_drfsbno_t) mp->m_sb.sb_agblocks * agno; + else + max_agbno = mp->m_sb.sb_agblocks; + + /* + * is the inode beyond the end of the AG? + */ + if (agbno >= max_agbno) + return(0); + + /* + * check for the easy case, inodes per block >= XFS_INODES_PER_CHUNK + * (multiple chunks per block) + */ + if (XFS_IALLOC_BLOCKS(mp) == 1) { + if (agbno > max_agbno) + return(0); + + if (check_inode_block(mp, ino) == 0) + return(0); + + switch (state = get_agbno_state(mp, agno, agbno)) { + case XR_E_INO: + do_warn("uncertain inode block %d/%d already known\n", + agno, agbno); + break; + case XR_E_UNKNOWN: + case XR_E_FREE1: + case XR_E_FREE: + set_agbno_state(mp, agno, agbno, XR_E_INO); + break; + case XR_E_MULT: + case XR_E_INUSE: + case XR_E_INUSE_FS: + case XR_E_FS_MAP: + /* + * if block is already claimed, forget it. + */ + do_warn( + "inode block %d/%d multiply claimed, (state %d)\n", + agno, agbno, state); + set_agbno_state(mp, agno, agbno, XR_E_MULT); + return(0); + default: + do_warn("inode block %d/%d bad state, (state %d)\n", + agno, agbno, state); + set_agbno_state(mp, agno, agbno, XR_E_INO); + break; + } + + start_agino = XFS_OFFBNO_TO_AGINO(mp, agbno, 0); + *start_ino = XFS_AGINO_TO_INO(mp, agno, start_agino); + + /* + * put new inode record(s) into inode tree + */ + for (j = 0; j < chunks_pblock; j++) { + if ((irec_p = find_inode_rec(agno, start_agino)) + == NULL) { + irec_p = set_inode_free_alloc(agno, + start_agino); + for (i = 1; i < XFS_INODES_PER_CHUNK; i++) + set_inode_free(irec_p, i); + } + if (start_agino <= agino && agino < + start_agino + XFS_INODES_PER_CHUNK) + set_inode_used(irec_p, agino - start_agino); + + start_agino += XFS_INODES_PER_CHUNK; + ino_cnt += XFS_INODES_PER_CHUNK; + } + + return(ino_cnt); + } else if (fs_aligned_inodes) { + /* + * next easy case -- aligned inode filesystem. + * just check out the chunk + */ + start_agbno = rounddown(XFS_INO_TO_AGBNO(mp, ino), + fs_ino_alignment); + end_agbno = start_agbno + XFS_IALLOC_BLOCKS(mp); + + /* + * if this fs has aligned inodes but the end of the + * chunk is beyond the end of the ag, this is a bad + * chunk + */ + if (end_agbno > max_agbno) + return(0); + + /* + * check out all blocks in chunk + */ + ino_cnt = 0; + for (cur_agbno = start_agbno; cur_agbno < end_agbno; + cur_agbno++) { + ino_cnt += check_aginode_block(mp, agno, cur_agbno); + } + + /* + * if we lose either 2 blocks worth of inodes or >25% of + * the chunk, just forget it. + */ + if (ino_cnt < XFS_INODES_PER_CHUNK - 2 * mp->m_sb.sb_inopblock + || ino_cnt < XFS_INODES_PER_CHUNK - 16) + return(0); + + /* + * ok, put the record into the tree. we know that it's + * not already there since the inode is guaranteed + * not to be in the tree. + */ + start_agino = XFS_OFFBNO_TO_AGINO(mp, start_agbno, 0); + *start_ino = XFS_AGINO_TO_INO(mp, agno, start_agino); + + irec_p = set_inode_free_alloc(agno, + XFS_OFFBNO_TO_AGINO(mp, start_agbno, 0)); + + for (i = 1; i < XFS_INODES_PER_CHUNK; i++) + set_inode_free(irec_p, i); + + ASSERT(start_agino <= agino && + start_agino + XFS_INODES_PER_CHUNK > agino); + + set_inode_used(irec_p, agino - start_agino); + + return(XFS_INODES_PER_CHUNK); + } + + /* + * hard case -- pre-6.3 filesystem. + * set default start/end agbnos and ensure agbnos are legal. + * we're setting a range [start_agbno, end_agbno) such that + * a discovered inode chunk completely within that range + * would include the inode passed into us. + */ + if (XFS_IALLOC_BLOCKS(mp) > 1) { + if (agino > XFS_IALLOC_INODES(mp)) + start_agbno = agbno - XFS_IALLOC_BLOCKS(mp) + 1; + else + start_agbno = 1; + } + + end_agbno = agbno + XFS_IALLOC_BLOCKS(mp); + + if (end_agbno > max_agbno) + end_agbno = max_agbno; + + /* + * search tree for known inodes within +/- 1 inode chunk range + */ + irec_before_p = irec_after_p = NULL; + + find_inode_rec_range(agno, XFS_OFFBNO_TO_AGINO(mp, start_agbno, 0), + XFS_OFFBNO_TO_AGINO(mp, end_agbno, mp->m_sb.sb_inopblock - 1), + &irec_before_p, &irec_after_p); + + /* + * if we have known inode chunks in our search range, establish + * their start and end-points to tighten our search range. range + * is [start, end) -- e.g. max/end agbno is one beyond the + * last block to be examined. the avl routines work this way. + */ + if (irec_before_p) { + /* + * only one inode record in the range, move one boundary in + */ + if (irec_before_p == irec_after_p) { + if (irec_before_p->ino_startnum < agino) + start_agbno = XFS_AGINO_TO_AGBNO(mp, + irec_before_p->ino_startnum + + XFS_INODES_PER_CHUNK); + else + end_agbno = XFS_AGINO_TO_AGBNO(mp, + irec_before_p->ino_startnum); + } + + /* + * find the start of the gap in the search range (which + * should contain our unknown inode). if the only irec + * within +/- 1 chunks starts after the inode we're + * looking for, skip this stuff since the end_agbno + * of the range has already been trimmed in to not + * include that irec. + */ + if (irec_before_p->ino_startnum < agino) { + irec_p = irec_before_p; + irec_next_p = next_ino_rec(irec_p); + + while(irec_next_p != NULL && + irec_p->ino_startnum + XFS_INODES_PER_CHUNK == + irec_next_p->ino_startnum) { + irec_p = irec_next_p; + irec_next_p = next_ino_rec(irec_next_p); + } + + start_agbno = XFS_AGINO_TO_AGBNO(mp, + irec_p->ino_startnum) + + XFS_IALLOC_BLOCKS(mp); + + /* + * we know that the inode we're trying to verify isn't + * in an inode chunk so the next ino_rec marks the end + * of the gap -- is it within the search range? + */ + if (irec_next_p != NULL && + agino + XFS_IALLOC_INODES(mp) >= + irec_next_p->ino_startnum) + end_agbno = XFS_AGINO_TO_AGBNO(mp, + irec_next_p->ino_startnum); + } + + ASSERT(start_agbno < end_agbno); + } + + /* + * if the gap is too small to contain a chunk, we lose. + * this means that inode chunks known to be good surround + * the inode in question and that the space between them + * is too small for a legal inode chunk + */ + if (end_agbno - start_agbno < XFS_IALLOC_BLOCKS(mp)) + return(0); + + /* + * now grunge around the disk, start at the inode block and + * go in each direction until you hit a non-inode block or + * run into a range boundary. A non-inode block is block + * with *no* good inodes in it. Unfortunately, we can't + * co-opt bad blocks into inode chunks (which might take + * care of disk blocks that turn into zeroes) because the + * filesystem could very well allocate two inode chunks + * with a one block file in between and we'd zap the file. + * We're better off just losing the rest of the + * inode chunk instead. + */ + for (cur_agbno = agbno; cur_agbno >= start_agbno; cur_agbno--) { + /* + * if the block has no inodes, it's a bad block so + * break out now without decrementing cur_agbno so + * chunk start blockno will be set to the last good block + */ + if (!(irec_cnt = check_aginode_block(mp, agno, cur_agbno))) + break; + ino_cnt += irec_cnt; + } + + chunk_start_agbno = cur_agbno + 1; + + for (cur_agbno = agbno + 1; cur_agbno < end_agbno; cur_agbno++) { + /* + * if the block has no inodes, it's a bad block so + * break out now without incrementing cur_agbno so + * chunk start blockno will be set to the block + * immediately after the last good block. + */ + if (!(irec_cnt = check_aginode_block(mp, agno, cur_agbno))) + break; + ino_cnt += irec_cnt; + } + + chunk_stop_agbno = cur_agbno; + + num_blks = chunk_stop_agbno - chunk_start_agbno; + + if (num_blks < XFS_IALLOC_BLOCKS(mp) || ino_cnt == 0) + return(0); + + /* + * XXX - later - if the entire range is selected and they're all + * good inodes, keep searching in either direction. + * until you the range of inodes end, then split into chunks + * for now, just take one chunk's worth starting at the lowest + * possible point and hopefully we'll pick the rest up later. + * + * XXX - if we were going to fix up an inode chunk for + * any good inodes in the chunk, this is where we would + * do it. For now, keep it simple and lose the rest of + * the chunk + */ + + if (num_blks % XFS_IALLOC_BLOCKS(mp) != 0) { + num_blks = rounddown(num_blks, XFS_IALLOC_BLOCKS(mp)); + chunk_stop_agbno = chunk_start_agbno + num_blks; + } + + /* + * ok, we've got a candidate inode chunk. now we have to + * verify that we aren't trying to use blocks that are already + * in use. If so, mark them as multiply claimed since odds + * are very low that we found this chunk by stumbling across + * user data -- we're probably here as a result of a directory + * entry or an iunlinked pointer + */ + for (j = 0, cur_agbno = chunk_start_agbno; + cur_agbno < chunk_stop_agbno; cur_agbno++) { + switch (state = get_agbno_state(mp, agno, cur_agbno)) { + case XR_E_MULT: + case XR_E_INUSE: + case XR_E_INUSE_FS: + case XR_E_FS_MAP: + do_warn( + "inode block %d/%d multiply claimed, (state %d)\n", + agno, cur_agbno, state); + set_agbno_state(mp, agno, cur_agbno, XR_E_MULT); + j = 1; + break; + case XR_E_INO: + do_error( + "uncertain inode block overlap, agbno = %d, ino = %llu\n", + agbno, ino); + break; + default: + break; + } + + if (j) + return(0); + } + + /* + * ok, chunk is good. put the record into the tree if required, + * and fill in the bitmap. All inodes will be marked as "free" + * except for the one that led us to discover the chunk. That's + * ok because we'll override the free setting later if the + * contents of the inode indicate it's in use. + */ + start_agino = XFS_OFFBNO_TO_AGINO(mp, chunk_start_agbno, 0); + *start_ino = XFS_AGINO_TO_INO(mp, agno, start_agino); + + ASSERT(find_inode_rec(agno, start_agino) == NULL); + + irec_p = set_inode_free_alloc(agno, start_agino); + for (i = 1; i < XFS_INODES_PER_CHUNK; i++) + set_inode_free(irec_p, i); + + ASSERT(start_agino <= agino && + start_agino + XFS_INODES_PER_CHUNK > agino); + + set_inode_used(irec_p, agino - start_agino); + + for (cur_agbno = chunk_start_agbno; + cur_agbno < chunk_stop_agbno; cur_agbno++) { + switch (state = get_agbno_state(mp, agno, cur_agbno)) { + case XR_E_INO: + do_error("uncertain inode block %llu already known\n", + XFS_AGB_TO_FSB(mp, agno, cur_agbno)); + break; + case XR_E_UNKNOWN: + case XR_E_FREE1: + case XR_E_FREE: + set_agbno_state(mp, agno, cur_agbno, XR_E_INO); + break; + case XR_E_MULT: + case XR_E_INUSE: + case XR_E_INUSE_FS: + case XR_E_FS_MAP: + do_error( + "inode block %d/%d multiply claimed, (state %d)\n", + agno, cur_agbno, state); + break; + default: + do_warn("inode block %d/%d bad state, (state %d)\n", + agno, cur_agbno, state); + set_agbno_state(mp, agno, cur_agbno, XR_E_INO); + break; + } + } + + return(ino_cnt); +} + +/* + * same as above only for ag inode chunks + */ +int +verify_aginode_chunk(xfs_mount_t *mp, + xfs_agnumber_t agno, + xfs_agino_t agino, + xfs_agino_t *agino_start) +{ + xfs_ino_t ino; + int res; + + res = verify_inode_chunk(mp, XFS_AGINO_TO_INO(mp, agno, agino), &ino); + + if (res) + *agino_start = XFS_INO_TO_AGINO(mp, ino); + else + *agino_start = NULLAGINO; + + return(res); +} + +/* + * this does the same as the two above only it returns a pointer + * to the inode record in the good inode tree + */ +ino_tree_node_t * +verify_aginode_chunk_irec(xfs_mount_t *mp, + xfs_agnumber_t agno, + xfs_agino_t agino) +{ + xfs_agino_t start_agino; + ino_tree_node_t *irec = NULL; + + if (verify_aginode_chunk(mp, agno, agino, &start_agino)) + irec = find_inode_rec(agno, start_agino); + + return(irec); +} + + + +/* + * processes an inode allocation chunk/block, returns 1 on I/O errors, + * 0 otherwise + * + * *bogus is set to 1 if the entire set of inodes is bad. + */ +/* ARGSUSED */ +int +process_inode_chunk(xfs_mount_t *mp, xfs_agnumber_t agno, int num_inos, + ino_tree_node_t *first_irec, int ino_discovery, + int check_dups, int extra_attr_check, int *bogus) +{ + xfs_ino_t parent; + ino_tree_node_t *ino_rec; + xfs_buf_t *bp; + xfs_dinode_t *dino; + int icnt; + int status; + int is_used; + int state; + int done; + int ino_dirty; + int irec_offset; + int ibuf_offset; + xfs_agino_t agino; + xfs_agblock_t agbno; + int dirty = 0; + int cleared = 0; + int isa_dir = 0; + + ASSERT(first_irec != NULL); + ASSERT(XFS_AGINO_TO_OFFSET(mp, first_irec->ino_startnum) == 0); + + *bogus = 0; + ASSERT(XFS_IALLOC_BLOCKS(mp) > 0); + + /* + * get all blocks required to read in this chunk (may wind up + * having to process more chunks in a multi-chunk per block fs) + */ + agbno = XFS_AGINO_TO_AGBNO(mp, first_irec->ino_startnum); + + bp = libxfs_readbuf(mp->m_dev, XFS_AGB_TO_DADDR(mp, agno, agbno), + XFS_FSB_TO_BB(mp, XFS_IALLOC_BLOCKS(mp)), 0); + if (!bp) { + do_warn("cannot read inode %llu, disk block %lld, cnt %d\n", + XFS_AGINO_TO_INO(mp, agno, first_irec->ino_startnum), + XFS_AGB_TO_DADDR(mp, agno, agbno), + (int)XFS_FSB_TO_BB(mp, XFS_IALLOC_BLOCKS(mp))); + return(1); + } + + /* + * set up first irec + */ + ino_rec = first_irec; + /* + * initialize counters + */ + irec_offset = 0; + ibuf_offset = 0; + icnt = 0; + status = 0; + done = 0; + + /* + * verify inode chunk if necessary + */ + if (ino_discovery) { + while (!done) { + /* + * make inode pointer + */ + dino = XFS_MAKE_IPTR(mp, bp, icnt); + agino = irec_offset + ino_rec->ino_startnum; + + /* + * we always think that the root and realtime + * inodes are verified even though we may have + * to reset them later to keep from losing the + * chunk that they're in + */ + if (verify_dinode(mp, dino, agno, agino) == 0 || + agno == 0 && + (mp->m_sb.sb_rootino == agino || + mp->m_sb.sb_rsumino == agino || + mp->m_sb.sb_rbmino == agino)) + status++; + + irec_offset++; + icnt++; + + if (icnt == XFS_IALLOC_INODES(mp) && + irec_offset == XFS_INODES_PER_CHUNK) { + /* + * done! - finished up irec and block + * simultaneously + */ + libxfs_putbuf(bp); + done = 1; + break; + } else if (irec_offset == XFS_INODES_PER_CHUNK) { + /* + * get new irec (multiple chunks per block fs) + */ + ino_rec = next_ino_rec(ino_rec); + ASSERT(ino_rec->ino_startnum == agino + 1); + irec_offset = 0; + } + } + + /* + * if chunk/block is bad, blow it off. the inode records + * will be deleted by the caller if appropriate. + */ + if (!status) { + *bogus = 1; + if (!done) /* already free'd */ + libxfs_putbuf(bp); + return(0); + } + + /* + * reset irec and counters + */ + ino_rec = first_irec; + + irec_offset = 0; + ibuf_offset = 0; + icnt = 0; + status = 0; + done = 0; + + /* nathans TODO ... memory leak here?: */ + + /* + * get first block + */ + bp = libxfs_readbuf(mp->m_dev, + XFS_AGB_TO_DADDR(mp, agno, agbno), + XFS_FSB_TO_BB(mp, XFS_IALLOC_BLOCKS(mp)), 0); + if (!bp) { + do_warn("can't read inode %llu, disk block %lld, " + "cnt %d\n", XFS_AGINO_TO_INO(mp, agno, agino), + XFS_AGB_TO_DADDR(mp, agno, agbno), + (int)XFS_FSB_TO_BB(mp, XFS_IALLOC_BLOCKS(mp))); + return(1); + } + } + + /* + * mark block as an inode block in the incore bitmap + */ + switch (state = get_agbno_state(mp, agno, agbno)) { + case XR_E_INO: /* already marked */ + break; + case XR_E_UNKNOWN: + case XR_E_FREE: + case XR_E_FREE1: + set_agbno_state(mp, agno, agbno, XR_E_INO); + break; + case XR_E_BAD_STATE: + do_error("bad state in block map %d\n", state); + break; + default: + set_agbno_state(mp, agno, agbno, XR_E_MULT); + do_warn("inode block %llu multiply claimed, state was %d\n", + XFS_AGB_TO_FSB(mp, agno, agbno), state); + break; + } + + while (!done) { + /* + * make inode pointer + */ + dino = XFS_MAKE_IPTR(mp, bp, icnt); + agino = irec_offset + ino_rec->ino_startnum; + + is_used = 3; + ino_dirty = 0; + parent = 0; + + status = process_dinode(mp, dino, agno, agino, + is_inode_free(ino_rec, irec_offset), + &ino_dirty, &cleared, &is_used, + ino_discovery, check_dups, + extra_attr_check, &isa_dir, &parent); + + ASSERT(is_used != 3); + if (ino_dirty) + dirty = 1; + /* + * XXX - if we want to try and keep + * track of whether we need to bang on + * the inode maps (instead of just + * blindly reconstructing them like + * we do now, this is where to start. + */ + if (is_used) { + if (is_inode_free(ino_rec, irec_offset)) { + if (verbose || no_modify || + XFS_AGINO_TO_INO(mp, agno, agino) != + old_orphanage_ino) { + do_warn("imap claims in-use inode %llu" + " is free, ", + XFS_AGINO_TO_INO(mp, agno, + agino)); + } + + if (verbose || (!no_modify && + XFS_AGINO_TO_INO(mp, agno, agino) != + old_orphanage_ino)) + do_warn("correcting imap\n"); + else + do_warn("would correct imap\n"); + } + set_inode_used(ino_rec, irec_offset); + } else { + set_inode_free(ino_rec, irec_offset); + } + + /* + * if we lose the root inode, or it turns into + * a non-directory, that allows us to double-check + * later whether or not we need to reinitialize it. + */ + if (isa_dir) { + set_inode_isadir(ino_rec, irec_offset); + /* + * we always set the parent but + * we may as well wait until + * phase 4 (no inode discovery) + * because the parent info will + * be solid then. + */ + if (!ino_discovery) { + ASSERT(parent != 0); + set_inode_parent(ino_rec, irec_offset, parent); + ASSERT(parent == + get_inode_parent(ino_rec, irec_offset)); + } + } else { + clear_inode_isadir(ino_rec, irec_offset); + } + + if (status) { + if (mp->m_sb.sb_rootino == + XFS_AGINO_TO_INO(mp, agno, agino)) { + need_root_inode = 1; + + if (!no_modify) { + do_warn("cleared root inode %llu\n", + XFS_AGINO_TO_INO(mp, agno, + agino)); + } else { + do_warn("would clear root inode %llu\n", + XFS_AGINO_TO_INO(mp, agno, + agino)); + } + } else if (mp->m_sb.sb_rbmino == + XFS_AGINO_TO_INO(mp, agno, agino)) { + need_rbmino = 1; + + if (!no_modify) { + do_warn("cleared realtime bitmap " + "inode %llu\n", + XFS_AGINO_TO_INO(mp, agno, + agino)); + } else { + do_warn("would clear realtime bitmap " + "inode %llu\n", + XFS_AGINO_TO_INO(mp, agno, + agino)); + } + } else if (mp->m_sb.sb_rsumino == + XFS_AGINO_TO_INO(mp, agno, agino)) { + need_rsumino = 1; + + if (!no_modify) { + do_warn("cleared realtime summary " + "inode %llu\n", + XFS_AGINO_TO_INO(mp, agno, + agino)); + } else { + do_warn("would clear realtime summary " + "inode %llu\n", + XFS_AGINO_TO_INO(mp, agno, + agino)); + } + } else if (!no_modify) { + do_warn("cleared inode %llu\n", + XFS_AGINO_TO_INO(mp, agno, agino)); + } else { + do_warn("would have cleared inode %llu\n", + XFS_AGINO_TO_INO(mp, agno, agino)); + } + } + + irec_offset++; + ibuf_offset++; + icnt++; + + if (icnt == XFS_IALLOC_INODES(mp) && + irec_offset == XFS_INODES_PER_CHUNK) { + /* + * done! - finished up irec and block simultaneously + */ + if (dirty && !no_modify) + libxfs_writebuf(bp, 0); + else + libxfs_putbuf(bp); + + done = 1; + break; + } else if (ibuf_offset == mp->m_sb.sb_inopblock) { + /* + * mark block as an inode block in the incore bitmap + * and reset inode buffer offset counter + */ + ibuf_offset = 0; + agbno++; + + switch (state = get_agbno_state(mp, agno, agbno)) { + case XR_E_INO: /* already marked */ + break; + case XR_E_UNKNOWN: + case XR_E_FREE: + case XR_E_FREE1: + set_agbno_state(mp, agno, agbno, XR_E_INO); + break; + case XR_E_BAD_STATE: + do_error( "bad state in block map %d\n", + state); + break; + default: + set_agbno_state(mp, agno, agbno, XR_E_MULT); + do_warn("inode block %llu multiply claimed, " + "state was %d\n", + XFS_AGB_TO_FSB(mp, agno, agbno), state); + break; + } + + } else if (irec_offset == XFS_INODES_PER_CHUNK) { + /* + * get new irec (multiple chunks per block fs) + */ + ino_rec = next_ino_rec(ino_rec); + ASSERT(ino_rec->ino_startnum == agino + 1); + irec_offset = 0; + } + } + return(0); +} + +/* + * check all inodes mentioned in the ag's incore inode maps. + * the map may be incomplete. If so, we'll catch the missing + * inodes (hopefully) when we traverse the directory tree. + * check_dirs is set to 1 if directory inodes should be + * processed for internal consistency, parent setting and + * discovery of unknown inodes. this only happens + * in phase 3. check_dups is set to 1 if we're looking for + * inodes that reference duplicate blocks so we can trash + * the inode right then and there. this is set only in + * phase 4 after we've run through and set the bitmap once. + */ +void +process_aginodes(xfs_mount_t *mp, xfs_agnumber_t agno, + int ino_discovery, int check_dups, int extra_attr_check) +{ + int num_inos, bogus; + ino_tree_node_t *ino_rec, *first_ino_rec, *prev_ino_rec; + + first_ino_rec = ino_rec = findfirst_inode_rec(agno); + while (ino_rec != NULL) { + /* + * paranoia - step through inode records until we step + * through a full allocation of inodes. this could + * be an issue in big-block filesystems where a block + * can hold more than one inode chunk. make sure to + * grab the record corresponding to the beginning of + * the next block before we call the processing routines. + */ + num_inos = XFS_INODES_PER_CHUNK; + while (num_inos < XFS_IALLOC_INODES(mp) && ino_rec != NULL) { + ASSERT(ino_rec != NULL); + /* + * inodes chunks will always be aligned and sized + * correctly + */ + if ((ino_rec = next_ino_rec(ino_rec)) != NULL) + num_inos += XFS_INODES_PER_CHUNK; + } + + ASSERT(num_inos == XFS_IALLOC_INODES(mp)); + + if (process_inode_chunk(mp, agno, num_inos, first_ino_rec, + ino_discovery, check_dups, extra_attr_check, &bogus)) { + /* XXX - i/o error, we've got a problem */ + abort(); + } + + if (!bogus) + first_ino_rec = ino_rec = next_ino_rec(ino_rec); + else { + /* + * inodes pointed to by this record are + * completely bogus, blow the records for + * this chunk out. + * the inode block(s) will get reclaimed + * in phase 4 when the block map is + * reconstructed after inodes claiming + * duplicate blocks are deleted. + */ + num_inos = 0; + ino_rec = first_ino_rec; + while (num_inos < XFS_IALLOC_INODES(mp) && + ino_rec != NULL) { + prev_ino_rec = ino_rec; + + if ((ino_rec = next_ino_rec(ino_rec)) != NULL) + num_inos += XFS_INODES_PER_CHUNK; + + get_inode_rec(agno, prev_ino_rec); + free_inode_rec(agno, prev_ino_rec); + } + + first_ino_rec = ino_rec; + } + } +} + +/* + * verify the uncertain inode list for an ag. + * Good inodes get moved into the good inode tree. + * returns 0 if there are no uncertain inode records to + * be processed, 1 otherwise. This routine destroys the + * the entire uncertain inode tree for the ag as a side-effect. + */ +void +check_uncertain_aginodes(xfs_mount_t *mp, xfs_agnumber_t agno) +{ + ino_tree_node_t *irec; + ino_tree_node_t *nrec; + xfs_agino_t start; + xfs_agino_t i; + xfs_agino_t agino; + int got_some; + + nrec = NULL; + got_some = 0; + + clear_uncertain_ino_cache(agno); + + if ((irec = findfirst_uncertain_inode_rec(agno)) == NULL) + return; + + /* + * the trick here is to find a contiguous range + * of inodes, make sure that it doesn't overlap + * with a known to exist chunk, and then make + * sure it is a number of entire chunks. + * we check on-disk once we have an idea of what's + * going on just to double-check. + * + * process the uncertain inode record list and look + * on disk to see if the referenced inodes are good + */ + + do_warn("found inodes not in the inode allocation tree\n"); + + do { + /* + * check every confirmed (which in this case means + * inode that we really suspect to be an inode) inode + */ + for (i = 0; i < XFS_INODES_PER_CHUNK; i++) { + if (!is_inode_confirmed(irec, i)) + continue; + + agino = i + irec->ino_startnum; + + if (verify_aginum(mp, agno, agino)) + continue; + + if (nrec != NULL && nrec->ino_startnum <= agino && + agino < nrec->ino_startnum + + XFS_INODES_PER_CHUNK) + continue; + + if ((nrec = find_inode_rec(agno, agino)) == NULL) + if (!verify_aginum(mp, agno, agino)) + if (verify_aginode_chunk(mp, agno, + agino, &start)) + got_some = 1; + } + + get_uncertain_inode_rec(agno, irec); + free_inode_rec(agno, irec); + + irec = findfirst_uncertain_inode_rec(agno); + } while (irec != NULL); + + if (got_some) + do_warn("found inodes not in the inode allocation tree\n"); + + return; +} + +/* + * verify and process the uncertain inodes for an ag. + * this is different from check_ in that we can't just + * move the good inodes into the good inode tree and let + * process_aginodes() deal with them because this gets called + * after process_aginodes() has been run on the ag inode tree. + * So we have to process the inodes as well as verify since + * we don't want to rerun process_aginodes() on a tree that has + * mostly been processed. + * + * Note that if this routine does process some inodes, it can + * add uncertain inodes to any ag which would require that + * the routine be called again to process those newly-added + * uncertain inodes. + * + * returns 0 if no inodes were processed and 1 if inodes + * were processed (and it is possible that new uncertain + * inodes were discovered). + * + * as a side-effect, this routine tears down the uncertain + * inode tree for the ag. + */ +int +process_uncertain_aginodes(xfs_mount_t *mp, xfs_agnumber_t agno) +{ + ino_tree_node_t *irec; + ino_tree_node_t *nrec; + xfs_agino_t agino; + int i; + int bogus; + int cnt; + int got_some; + +#ifdef XR_INODE_TRACE + fprintf(stderr, "in process_uncertain_aginodes, agno = %d\n", agno); +#endif + + got_some = 0; + + clear_uncertain_ino_cache(agno); + + if ((irec = findfirst_uncertain_inode_rec(agno)) == NULL) + return(0); + + nrec = NULL; + + do { + /* + * check every confirmed inode + */ + for (cnt = i = 0; i < XFS_INODES_PER_CHUNK; i++) { + if (!is_inode_confirmed(irec, i)) + continue; + cnt++; + agino = i + irec->ino_startnum; +#ifdef XR_INODE_TRACE + fprintf(stderr, "ag inode = %d (0x%x)\n", agino, agino); +#endif + /* + * skip over inodes already processed (in the + * good tree), bad inode numbers, and inode numbers + * pointing to bogus inodes + */ + if (verify_aginum(mp, agno, agino)) + continue; + + if (nrec != NULL && nrec->ino_startnum <= agino && + agino < nrec->ino_startnum + + XFS_INODES_PER_CHUNK) + continue; + + if ((nrec = find_inode_rec(agno, agino)) != NULL) + continue; + + /* + * verify the chunk. if good, it will be + * added to the good inode tree. + */ + if ((nrec = verify_aginode_chunk_irec(mp, + agno, agino)) == NULL) + continue; + + got_some = 1; + + /* + * process the inode record we just added + * to the good inode tree. The inode + * processing may add more records to the + * uncertain inode lists. + */ + if (process_inode_chunk(mp, agno, XFS_IALLOC_INODES(mp), + nrec, 1, 0, 0, &bogus)) { + /* XXX - i/o error, we've got a problem */ + abort(); + } + } + + ASSERT(cnt != 0); + /* + * now return the uncertain inode record to the free pool + * and pull another one off the list for processing + */ + get_uncertain_inode_rec(agno, irec); + free_inode_rec(agno, irec); + + irec = findfirst_uncertain_inode_rec(agno); + } while (irec != NULL); + + if (got_some) + do_warn("found inodes not in the inode allocation tree\n"); + + return(1); +} diff --git a/repair/dinode.c b/repair/dinode.c new file mode 100644 index 000000000..2dcd98298 --- /dev/null +++ b/repair/dinode.c @@ -0,0 +1,2914 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "avl.h" +#include "globals.h" +#include "agheader.h" +#include "incore.h" +#include "protos.h" +#include "err_protos.h" +#include "dir.h" +#include "dir2.h" +#include "dinode.h" +#include "scan.h" +#include "versions.h" +#include "attr_repair.h" +#include "bmap.h" + +/* + * inode clearing routines + */ + +/* + * return the offset into the inode where the attribute fork starts + */ +/* ARGSUSED */ +int +calc_attr_offset(xfs_mount_t *mp, xfs_dinode_t *dino) +{ + xfs_dinode_core_t *dinoc = &dino->di_core; + int offset = ((__psint_t) &dino->di_u) + - (__psint_t)dino; + + /* + * don't worry about alignment when calculating offset + * because the data fork is already 8-byte aligned + */ + switch (dinoc->di_format) { + case XFS_DINODE_FMT_DEV: + offset += sizeof(dev_t); + break; + case XFS_DINODE_FMT_LOCAL: + offset += INT_GET(dinoc->di_size, ARCH_CONVERT); + break; + case XFS_DINODE_FMT_UUID: + offset += sizeof(uuid_t); + break; + case XFS_DINODE_FMT_EXTENTS: + offset += INT_GET(dinoc->di_nextents, ARCH_CONVERT) * sizeof(xfs_bmbt_rec_32_t); + break; + case XFS_DINODE_FMT_BTREE: + offset += INT_GET(dino->di_u.di_bmbt.bb_numrecs, ARCH_CONVERT) * sizeof(xfs_bmbt_rec_32_t); + break; + default: + do_error("Unknown inode format.\n"); + abort(); + break; + } + + return(offset); +} + +/* ARGSUSED */ +int +clear_dinode_attr(xfs_mount_t *mp, xfs_dinode_t *dino, xfs_ino_t ino_num) +{ + xfs_dinode_core_t *dinoc = &dino->di_core; + + ASSERT(dinoc->di_forkoff != 0); + + if (!no_modify) + fprintf(stderr, "clearing inode %llu attributes \n", ino_num); + else + fprintf(stderr, "would have cleared inode %llu attributes\n", + ino_num); + + if (INT_GET(dinoc->di_anextents, ARCH_CONVERT) != 0) { + if (no_modify) + return(1); + INT_ZERO(dinoc->di_anextents, ARCH_CONVERT); + } + + if (dinoc->di_aformat != XFS_DINODE_FMT_EXTENTS) { + if (no_modify) + return(1); + dinoc->di_aformat = XFS_DINODE_FMT_EXTENTS; + } + + /* get rid of the fork by clearing forkoff */ + + /* Originally, when the attr repair code was added, the fork was cleared + * by turning it into shortform status. This meant clearing the + * hdr.totsize/count fields and also changing aformat to LOCAL + * (vs EXTENTS). Over various fixes, the aformat and forkoff have + * been updated to not show an attribute fork at all, however. + * It could be possible that resetting totsize/count are not needed, + * but just to be safe, leave it in for now. + */ + + if (!no_modify) { + xfs_attr_shortform_t *asf = (xfs_attr_shortform_t *) + XFS_DFORK_APTR_ARCH(dino, ARCH_CONVERT); + INT_SET(asf->hdr.totsize, ARCH_CONVERT, + sizeof(xfs_attr_sf_hdr_t)); + INT_SET(asf->hdr.count, ARCH_CONVERT, 0); + dinoc->di_forkoff = 0; /* got to do this after asf is set */ + } + + /* + * always returns 1 since the fork gets zapped + */ + return(1); +} + +/* ARGSUSED */ +int +clear_dinode_core(xfs_dinode_core_t *dinoc, xfs_ino_t ino_num) +{ + int dirty = 0; + + if (INT_GET(dinoc->di_magic, ARCH_CONVERT) != XFS_DINODE_MAGIC) { + dirty = 1; + + if (no_modify) + return(1); + + INT_SET(dinoc->di_magic, ARCH_CONVERT, XFS_DINODE_MAGIC); + } + + if (!XFS_DINODE_GOOD_VERSION(dinoc->di_version) || + (!fs_inode_nlink && dinoc->di_version > XFS_DINODE_VERSION_1)) { + dirty = 1; + + if (no_modify) + return(1); + + dinoc->di_version = (fs_inode_nlink) ? XFS_DINODE_VERSION_2 + : XFS_DINODE_VERSION_1; + } + + if (INT_GET(dinoc->di_mode, ARCH_CONVERT) != 0) { + dirty = 1; + + if (no_modify) + return(1); + + INT_ZERO(dinoc->di_mode, ARCH_CONVERT); + } + + if (INT_GET(dinoc->di_flags, ARCH_CONVERT) != 0) { + dirty = 1; + + if (no_modify) + return(1); + + INT_ZERO(dinoc->di_flags, ARCH_CONVERT); + } + + if (INT_GET(dinoc->di_dmevmask, ARCH_CONVERT) != 0) { + dirty = 1; + + if (no_modify) + return(1); + + INT_ZERO(dinoc->di_dmevmask, ARCH_CONVERT); + } + + if (dinoc->di_forkoff != 0) { + dirty = 1; + + if (no_modify) + return(1); + + dinoc->di_forkoff = 0; + } + + if (dinoc->di_format != XFS_DINODE_FMT_EXTENTS) { + dirty = 1; + + if (no_modify) + return(1); + + dinoc->di_format = XFS_DINODE_FMT_EXTENTS; + } + + if (dinoc->di_aformat != XFS_DINODE_FMT_EXTENTS) { + dirty = 1; + + if (no_modify) + return(1); + + dinoc->di_aformat = XFS_DINODE_FMT_EXTENTS; + } + + if (INT_GET(dinoc->di_size, ARCH_CONVERT) != 0) { + dirty = 1; + + if (no_modify) + return(1); + + INT_ZERO(dinoc->di_size, ARCH_CONVERT); + } + + if (INT_GET(dinoc->di_nblocks, ARCH_CONVERT) != 0) { + dirty = 1; + + if (no_modify) + return(1); + + INT_ZERO(dinoc->di_nblocks, ARCH_CONVERT); + } + + if (INT_GET(dinoc->di_onlink, ARCH_CONVERT) != 0) { + dirty = 1; + + if (no_modify) + return(1); + + INT_ZERO(dinoc->di_onlink, ARCH_CONVERT); + } + + if (INT_GET(dinoc->di_nextents, ARCH_CONVERT) != 0) { + dirty = 1; + + if (no_modify) + return(1); + + INT_ZERO(dinoc->di_nextents, ARCH_CONVERT); + } + + if (INT_GET(dinoc->di_anextents, ARCH_CONVERT) != 0) { + dirty = 1; + + if (no_modify) + return(1); + + INT_ZERO(dinoc->di_anextents, ARCH_CONVERT); + } + + if (dinoc->di_version > XFS_DINODE_VERSION_1 && + INT_GET(dinoc->di_nlink, ARCH_CONVERT) != 0) { + dirty = 1; + + if (no_modify) + return(1); + + INT_ZERO(dinoc->di_nlink, ARCH_CONVERT); + } + + return(dirty); +} + +/* ARGSUSED */ +int +clear_dinode_unlinked(xfs_mount_t *mp, xfs_dinode_t *dino) +{ + + if (dino->di_next_unlinked != NULLAGINO) { + if (!no_modify) + dino->di_next_unlinked = NULLAGINO; + return(1); + } + + return(0); +} + +/* + * this clears the unlinked list too so it should not be called + * until after the agi unlinked lists are walked in phase 3. + * returns > zero if the inode has been altered while being cleared + */ +int +clear_dinode(xfs_mount_t *mp, xfs_dinode_t *dino, xfs_ino_t ino_num) +{ + int dirty; + + dirty = clear_dinode_core(&dino->di_core, ino_num); + dirty += clear_dinode_unlinked(mp, dino); + + /* and clear the forks */ + + if (dirty && !no_modify) + bzero(&dino->di_u, XFS_LITINO(mp)); + + return(dirty); +} + + +/* + * misc. inode-related utility routines + */ + +/* + * returns 0 if inode number is valid, 1 if bogus + */ +int +verify_inum(xfs_mount_t *mp, + xfs_ino_t ino) +{ + xfs_agnumber_t agno; + xfs_agino_t agino; + xfs_agblock_t agbno; + xfs_sb_t *sbp = &mp->m_sb;; + + /* range check ag #, ag block. range-checking offset is pointless */ + + agno = XFS_INO_TO_AGNO(mp, ino); + agino = XFS_INO_TO_AGINO(mp, ino); + agbno = XFS_AGINO_TO_AGBNO(mp, agino); + + if (ino == 0 || ino == NULLFSINO) + return(1); + + if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) + return(1); + + if (agno >= sbp->sb_agcount || + (agno < sbp->sb_agcount && agbno >= sbp->sb_agblocks) || + (agno == sbp->sb_agcount && agbno >= sbp->sb_dblocks - + (sbp->sb_agcount-1) * sbp->sb_agblocks) || + (agbno == 0)) + return(1); + + return(0); +} + +/* + * have a separate routine to ensure that we don't accidentally + * lose illegally set bits in the agino by turning it into an FSINO + * to feed to the above routine + */ +int +verify_aginum(xfs_mount_t *mp, + xfs_agnumber_t agno, + xfs_agino_t agino) +{ + xfs_agblock_t agbno; + xfs_sb_t *sbp = &mp->m_sb;; + + /* range check ag #, ag block. range-checking offset is pointless */ + + if (agino == 0 || agino == NULLAGINO) + return(1); + + /* + * agino's can't be too close to NULLAGINO because the min blocksize + * is 9 bits and at most 1 bit of that gets used for the inode offset + * so if the agino gets shifted by the # of offset bits and compared + * to the legal agbno values, a bogus agino will be too large. there + * will be extra bits set at the top that shouldn't be set. + */ + agbno = XFS_AGINO_TO_AGBNO(mp, agino); + + if (agno >= sbp->sb_agcount || + (agno < sbp->sb_agcount && agbno >= sbp->sb_agblocks) || + (agno == sbp->sb_agcount && agbno >= sbp->sb_dblocks - + (sbp->sb_agcount-1) * sbp->sb_agblocks) || + (agbno == 0)) + return(1); + + return(0); +} + +/* + * return 1 if block number is good, 0 if out of range + */ +int +verify_dfsbno(xfs_mount_t *mp, + xfs_dfsbno_t fsbno) +{ + xfs_agnumber_t agno; + xfs_agblock_t agbno; + xfs_sb_t *sbp = &mp->m_sb;; + + /* range check ag #, ag block. range-checking offset is pointless */ + + agno = XFS_FSB_TO_AGNO(mp, fsbno); + agbno = XFS_FSB_TO_AGBNO(mp, fsbno); + + if (agno >= sbp->sb_agcount || + (agno < sbp->sb_agcount && agbno >= sbp->sb_agblocks) || + (agno == sbp->sb_agcount && agbno >= sbp->sb_dblocks - + (sbp->sb_agcount-1) * sbp->sb_agblocks)) + return(0); + + return(1); +} + +int +verify_agbno(xfs_mount_t *mp, + xfs_agnumber_t agno, + xfs_agblock_t agbno) +{ + xfs_sb_t *sbp = &mp->m_sb;; + + /* range check ag #, ag block. range-checking offset is pointless */ + + if (agno >= sbp->sb_agcount || + (agno < sbp->sb_agcount && agbno >= sbp->sb_agblocks) || + (agno == sbp->sb_agcount && agbno >= sbp->sb_dblocks - + (sbp->sb_agcount-1) * sbp->sb_agblocks)) + return(0); + + return(1); +} + +void +convert_extent( + xfs_bmbt_rec_32_t *rp, + xfs_dfiloff_t *op, /* starting offset (blockno in file) */ + xfs_dfsbno_t *sp, /* starting block (fs blockno) */ + xfs_dfilblks_t *cp, /* blockcount */ + int *fp) /* extent flag */ +{ + xfs_bmbt_irec_t irec, *s = &irec; + /* Just use the extent parsing routine from the kernel */ + libxfs_bmbt_get_all((xfs_bmbt_rec_t *)rp, s); + + if (fs_has_extflgbit) { + if (s->br_state == XFS_EXT_UNWRITTEN) { + *fp = 1; + } else { + *fp = 0; + } + } else { + *fp = 0; + } + *op = s->br_startoff; + *sp = s->br_startblock; + *cp = s->br_blockcount; +} + +/* + * return address of block fblock if it's within the range described + * by the extent list. Otherwise, returns a null address. + */ +/* ARGSUSED */ +xfs_dfsbno_t +get_bmbt_reclist( + xfs_mount_t *mp, + xfs_bmbt_rec_32_t *rp, + int numrecs, + xfs_dfiloff_t fblock) +{ + int i; + xfs_dfilblks_t cnt; + xfs_dfiloff_t off_bno; + xfs_dfsbno_t start; + int flag; + + for (i = 0; i < numrecs; i++, rp++) { + convert_extent(rp, &off_bno, &start, &cnt, &flag); + if (off_bno >= fblock && off_bno + cnt < fblock) + return(start + fblock - off_bno); + } + + return(NULLDFSBNO); +} + +/* + * return 1 if inode should be cleared, 0 otherwise + * if check_dups should be set to 1, that implies that + * the primary purpose of this call is to see if the + * file overlaps with any duplicate extents (in the + * duplicate extent list). + */ +/* ARGSUSED */ +int +process_bmbt_reclist_int( + xfs_mount_t *mp, + xfs_bmbt_rec_32_t *rp, + int numrecs, + int type, + xfs_ino_t ino, + xfs_drfsbno_t *tot, + blkmap_t **blkmapp, + xfs_dfiloff_t *first_key, + xfs_dfiloff_t *last_key, + int check_dups, + int whichfork) +{ + xfs_dfsbno_t b; + xfs_drtbno_t ext; + xfs_dfilblks_t c; /* count */ + xfs_dfilblks_t cp = 0; /* prev count */ + xfs_dfsbno_t s; /* start */ + xfs_dfsbno_t sp = 0; /* prev start */ + xfs_dfiloff_t o = 0; /* offset */ + xfs_dfiloff_t op = 0; /* prev offset */ + char *ftype; + char *forkname; + int i; + int state; + int flag; /* extent flag */ + + if (whichfork == XFS_DATA_FORK) + forkname = "data"; + else + forkname = "attr"; + + if (type == XR_INO_RTDATA) + ftype = "real-time"; + else + ftype = "regular"; + + for (i = 0; i < numrecs; i++, rp++) { + convert_extent(rp, &o, &s, &c, &flag); + if (i == 0) + *last_key = *first_key = o; + else + *last_key = o; + if (i > 0 && op + cp > o) { + do_warn( +"bmap rec out of order, inode %llu entry %d [o s c] [%llu %llu %llu], %d [%llu %llu %llu]\n", + ino, i, o, s, c, i-1, op, sp, cp); + return(1); + } + op = o; + cp = c; + sp = s; + + /* + * check numeric validity of the extent + */ + if (c == 0) { + do_warn( + "zero length extent (off = %llu, fsbno = %llu) in ino %llu\n", + o, s, ino); + return(1); + } + if (type == XR_INO_RTDATA) { + if (s >= mp->m_sb.sb_rblocks) { + do_warn( +"inode %llu - bad rt extent starting block number %llu, offset %llu\n", + ino, s, o); + return(1); + } + if (s + c - 1 >= mp->m_sb.sb_rblocks) { + do_warn( +"inode %llu - bad rt extent last block number %llu, offset %llu\n", + ino, s + c - 1, o); + return(1); + } + if (s + c - 1 < s) { + do_warn( +"inode %llu - bad rt extent overflows - start %llu, end %llu, offset %llu\n", + ino, s, s + c - 1, o); + return(1); + } + } else { + if (!verify_dfsbno(mp, s)) { + do_warn( +"inode %llu - bad extent starting block number %llu, offset %llu\n", + ino, s, o); + return(1); + } + if (!verify_dfsbno(mp, s + c - 1)) { + do_warn( +"inode %llu - bad extent last block number %llu, offset %llu\n", + ino, s + c - 1, o); + return(1); + } + if (s + c - 1 < s) { + do_warn( +"inode %llu - bad extent overflows - start %llu, end %llu, offset %llu\n", + ino, s, s + c - 1, o); + return(1); + } + if (o >= fs_max_file_offset) { + do_warn( +"inode %llu - extent offset too large - start %llu, count %llu, offset %llu\n", + ino, s, c, o); + return(1); + } + } + + /* + * realtime file data fork + */ + if (type == XR_INO_RTDATA && whichfork == XFS_DATA_FORK) { + /* + * XXX - verify that the blocks listed in the record + * are multiples of an extent + */ + if (s % mp->m_sb.sb_rextsize != 0 || + c % mp->m_sb.sb_rextsize != 0) { + do_warn( +"malformed rt inode extent [%llu %llu] (fs rtext size = %u)\n", + s, c, mp->m_sb.sb_rextsize); + return(1); + } + + /* + * XXX - set the appropriate number of extents + */ + for (b = s; b < s + c; b += mp->m_sb.sb_rextsize) { + ext = (xfs_drtbno_t) b / mp->m_sb.sb_rextsize; + + if (check_dups == 1) { + if (search_rt_dup_extent(mp, ext)) { + do_warn( +"data fork in rt ino %llu claims dup rt extent, off - %llu, start - %llu, count %llu\n", + ino, o, s, c); + return(1); + } + continue; + } + + state = get_rtbno_state(mp, ext); + + switch (state) { + case XR_E_FREE: +/* XXX - turn this back on after we + run process_rtbitmap() in phase2 + do_warn( + "%s fork in rt ino %llu claims free rt block %llu\n", + forkname, ino, ext); +*/ + /* fall through ... */ + case XR_E_UNKNOWN: + set_rtbno_state(mp, ext, XR_E_INUSE); + break; + case XR_E_BAD_STATE: + do_error( + "bad state in rt block map %llu\n", ext); + abort(); + break; + case XR_E_FS_MAP: + case XR_E_INO: + case XR_E_INUSE_FS: + do_error( + "%s fork in rt inode %llu found metadata block %llu in %s bmap\n", + forkname, ino, ext, ftype); + case XR_E_INUSE: + case XR_E_MULT: + set_rtbno_state(mp, ext, XR_E_MULT); + do_warn( + "%s fork in rt inode %llu claims used rt block %llu\n", + forkname, ino, ext); + return(1); + case XR_E_FREE1: + default: + do_error( + "illegal state %d in %s block map %llu\n", + state, ftype, b); + } + } + + /* + * bump up the block counter + */ + *tot += c; + + /* + * skip rest of loop processing since that's + * all for regular file forks and attr forks + */ + continue; + } + + + /* + * regular file data fork or attribute fork + */ + if (blkmapp && *blkmapp) + blkmap_set_ext(blkmapp, o, s, c); + for (b = s; b < s + c; b++) { + if (check_dups == 1) { + /* + * if we're just checking the bmap for dups, + * return if we find one, otherwise, continue + * checking each entry without setting the + * block bitmap + */ + if (search_dup_extent(mp, + XFS_FSB_TO_AGNO(mp, b), + XFS_FSB_TO_AGBNO(mp, b))) { + do_warn( +"%s fork in ino %llu claims dup extent, off - %llu, start - %llu, cnt %llu\n", + forkname, ino, o, s, c); + return(1); + } + continue; + } + + /* FIX FOR BUG 653709 -- EKN + * realtime attribute fork, should be valid block number + * in regular data space, not realtime partion. + */ + if (type == XR_INO_RTDATA && whichfork == XFS_ATTR_FORK) { + if (mp->m_sb.sb_agcount < XFS_FSB_TO_AGNO(mp, b)) + return(1); + } + + state = get_fsbno_state(mp, b); + switch (state) { + case XR_E_FREE: + case XR_E_FREE1: + do_warn( + "%s fork in ino %llu claims free block %llu\n", + forkname, ino, (__uint64_t) b); + /* fall through ... */ + case XR_E_UNKNOWN: + set_fsbno_state(mp, b, XR_E_INUSE); + break; + case XR_E_BAD_STATE: + do_error("bad state in block map %llu\n", b); + abort(); + break; + case XR_E_FS_MAP: + case XR_E_INO: + case XR_E_INUSE_FS: + do_warn( + "%s fork in inode %llu claims metadata block %llu\n", + forkname, ino, (__uint64_t) b); + return(1); + case XR_E_INUSE: + case XR_E_MULT: + set_fsbno_state(mp, b, XR_E_MULT); + do_warn( + "%s fork in %s inode %llu claims used block %llu\n", + forkname, ftype, ino, (__uint64_t) b); + return(1); + default: + do_error("illegal state %d in block map %llu\n", + state, b); + abort(); + } + } + *tot += c; + } + + return(0); +} + +/* + * return 1 if inode should be cleared, 0 otherwise, sets block bitmap + * as a side-effect + */ +int +process_bmbt_reclist( + xfs_mount_t *mp, + xfs_bmbt_rec_32_t *rp, + int numrecs, + int type, + xfs_ino_t ino, + xfs_drfsbno_t *tot, + blkmap_t **blkmapp, + xfs_dfiloff_t *first_key, + xfs_dfiloff_t *last_key, + int whichfork) +{ + return(process_bmbt_reclist_int(mp, rp, numrecs, type, ino, tot, + blkmapp, first_key, last_key, 0, + whichfork)); +} + +/* + * return 1 if inode should be cleared, 0 otherwise, does not set + * block bitmap + */ +int +scan_bmbt_reclist( + xfs_mount_t *mp, + xfs_bmbt_rec_32_t *rp, + int numrecs, + int type, + xfs_ino_t ino, + xfs_drfsbno_t *tot, + int whichfork) +{ + xfs_dfiloff_t first_key = 0; + xfs_dfiloff_t last_key = 0; + + return(process_bmbt_reclist_int(mp, rp, numrecs, type, ino, tot, + NULL, &first_key, &last_key, 1, + whichfork)); +} + +/* + * these two are meant for routines that read and work with inodes + * one at a time where the inodes may be in any order (like walking + * the unlinked lists to look for inodes). the caller is responsible + * for writing/releasing the buffer. + */ +xfs_buf_t * +get_agino_buf(xfs_mount_t *mp, + xfs_agnumber_t agno, + xfs_agino_t agino, + xfs_dinode_t **dipp) +{ + ino_tree_node_t *irec; + xfs_buf_t *bp; + int size; + + if ((irec = find_inode_rec(agno, agino)) == NULL) + return(NULL); + + size = XFS_FSB_TO_BB(mp, MAX(1, XFS_INODES_PER_CHUNK/inodes_per_block)); + bp = libxfs_readbuf(mp->m_dev, XFS_AGB_TO_DADDR(mp, agno, + XFS_AGINO_TO_AGBNO(mp, irec->ino_startnum)), size, 0); + if (!bp) { + do_warn("cannot read inode (%u/%u), disk block %lld\n", + agno, irec->ino_startnum, + XFS_AGB_TO_DADDR(mp, agno, + XFS_AGINO_TO_AGBNO(mp, irec->ino_startnum))); + return(NULL); + } + + *dipp = XFS_MAKE_IPTR(mp, bp, agino - + XFS_OFFBNO_TO_AGINO(mp, XFS_AGINO_TO_AGBNO(mp, + irec->ino_startnum), + 0)); + + return(bp); +} + +/* + * these next routines return the filesystem blockno of the + * block containing the block "bno" in the file whose bmap + * tree (or extent list) is rooted by "rootblock". + * + * the next routines are utility routines for the third + * routine, get_bmapi(). + */ +/* ARGSUSED */ +xfs_dfsbno_t +getfunc_extlist(xfs_mount_t *mp, + xfs_ino_t ino, + xfs_dinode_t *dip, + xfs_dfiloff_t bno, + int whichfork) +{ + xfs_dfiloff_t fbno; + xfs_dfilblks_t bcnt; + xfs_dfsbno_t fsbno; + xfs_dfsbno_t final_fsbno = NULLDFSBNO; + xfs_bmbt_rec_32_t *rootblock = (xfs_bmbt_rec_32_t *) + XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT); + xfs_extnum_t nextents = XFS_DFORK_NEXTENTS_ARCH(dip, whichfork, ARCH_CONVERT); + int i; + int flag; + + for (i = 0; i < nextents; i++) { + convert_extent(rootblock + i, &fbno, &fsbno, &bcnt, &flag); + + if (fbno <= bno && bno < fbno + bcnt) { + final_fsbno = bno - fbno + fsbno; + break; + } + } + + return(final_fsbno); +} + +xfs_dfsbno_t +getfunc_btree(xfs_mount_t *mp, + xfs_ino_t ino, + xfs_dinode_t *dip, + xfs_dfiloff_t bno, + int whichfork) +{ + int i; + int prev_level; + int flag; + int found; + xfs_bmbt_rec_32_t *rec; + xfs_bmbt_ptr_t *pp; + xfs_bmbt_key_t *key; + xfs_bmdr_key_t *rkey; + xfs_bmdr_ptr_t *rp; + xfs_dfiloff_t fbno; + xfs_dfsbno_t fsbno; + xfs_dfilblks_t bcnt; + xfs_buf_t *bp; + xfs_dfsbno_t final_fsbno = NULLDFSBNO; + xfs_bmbt_block_t *block; + xfs_bmdr_block_t *rootblock = (xfs_bmdr_block_t *) + XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT); + + ASSERT(rootblock->bb_level != 0); + /* + * deal with root block, it's got a slightly different + * header structure than interior nodes. We know that + * a btree should have at least 2 levels otherwise it + * would be an extent list. + */ + rkey = XFS_BTREE_KEY_ADDR( + XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_CONVERT), + xfs_bmdr, rootblock, 1, + XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_SIZE_ARCH(dip, + mp, whichfork, ARCH_CONVERT), + xfs_bmdr, 1)); + rp = XFS_BTREE_PTR_ADDR( + XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_CONVERT), + xfs_bmdr, rootblock, 1, + XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_SIZE_ARCH(dip, + mp, whichfork, ARCH_CONVERT), + xfs_bmdr, 1)); + for (found = -1, i = 0; i < rootblock->bb_numrecs - 1; i++) { + if (rkey[i].br_startoff <= bno + && bno < rkey[i+1].br_startoff) { + found = i; + break; + } + } + if (i == rootblock->bb_numrecs - 1 && bno >= rkey[i].br_startoff) + found = i; + + ASSERT(found != -1); + + fsbno = INT_GET(rp[found], ARCH_CONVERT); + + ASSERT(verify_dfsbno(mp, fsbno)); + + bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno), + XFS_FSB_TO_BB(mp, 1), 0); + if (!bp) { + do_error("cannot read bmap block %llu\n", fsbno); + return(NULLDFSBNO); + } + block = XFS_BUF_TO_BMBT_BLOCK(bp); + + /* + * ok, now traverse any interior btree nodes + */ + prev_level = rootblock->bb_level; + + while (INT_GET(block->bb_level, ARCH_CONVERT) > 0) { + ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) < prev_level); + + prev_level = INT_GET(block->bb_level, ARCH_CONVERT); + + if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > + mp->m_bmap_dmxr[1]) { + do_warn("# of bmap records in inode %llu exceeds max " + "(%u, max - %u)\n", + ino, INT_GET(block->bb_numrecs, ARCH_CONVERT), + mp->m_bmap_dmxr[1]); + libxfs_putbuf(bp); + return(NULLDFSBNO); + } + if (verbose && INT_GET(block->bb_numrecs, ARCH_CONVERT) < + mp->m_bmap_dmnr[1]) { + do_warn("- # of bmap records in inode %llu < than min " + "(%u, min - %u), proceeding ...\n", + ino, INT_GET(block->bb_numrecs, ARCH_CONVERT), + mp->m_bmap_dmnr[1]); + } + key = XFS_BTREE_KEY_ADDR(mp->m_sb.sb_blocksize, + xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]); + pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, + xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]); + for ( found = -1, i = 0; + i < INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1; + i++) { + if (INT_GET(key[i].br_startoff, ARCH_CONVERT) <= bno && + bno < INT_GET(key[i+1].br_startoff, ARCH_CONVERT)) { + found = i; + break; + } + } + if (i == INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1 && + bno >= INT_GET(key[i].br_startoff, ARCH_CONVERT)) + found = i; + + ASSERT(found != -1); + fsbno = INT_GET(pp[found], ARCH_CONVERT); + + ASSERT(verify_dfsbno(mp, fsbno)); + + /* + * release current btree block and read in the + * next btree block to be traversed + */ + libxfs_putbuf(bp); + bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno), + XFS_FSB_TO_BB(mp, 1), 0); + if (!bp) { + do_error("cannot read bmap block %llu\n", fsbno); + return(NULLDFSBNO); + } + block = XFS_BUF_TO_BMBT_BLOCK(bp); + } + + /* + * current block must be a leaf block + */ + ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) == 0); + if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_bmap_dmxr[0]) { + do_warn("# of bmap records in inode %llu greater than max " + "(%u, max - %u)\n", + ino, INT_GET(block->bb_numrecs, ARCH_CONVERT), + mp->m_bmap_dmxr[0]); + libxfs_putbuf(bp); + return(NULLDFSBNO); + } + if (verbose && INT_GET(block->bb_numrecs, ARCH_CONVERT) < + mp->m_bmap_dmnr[0]) + do_warn("- # of bmap records in inode %llu < min " + "(%u, min - %u), continuing...\n", + ino, INT_GET(block->bb_numrecs, ARCH_CONVERT), + mp->m_bmap_dmnr[0]); + + rec = (xfs_bmbt_rec_32_t *)XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, + xfs_bmbt, block, 1, mp->m_bmap_dmxr[0]); + for (i = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++) { + convert_extent(rec + i, &fbno, &fsbno, &bcnt, &flag); + + if (fbno <= bno && bno < fbno + bcnt) { + final_fsbno = bno - fbno + fsbno; + break; + } + } + libxfs_putbuf(bp); + + if (final_fsbno == NULLDFSBNO) + do_warn("could not map block %llu\n", bno); + + return(final_fsbno); +} + +/* + * this could be smarter. maybe we should have an open inode + * routine that would get the inode buffer and return back + * an inode handle. I'm betting for the moment that this + * is used only by the directory and attribute checking code + * and that the avl tree find and buffer cache search are + * relatively cheap. If they're too expensive, we'll just + * have to fix this and add an inode handle to the da btree + * cursor. + * + * caller is responsible for checking doubly referenced blocks + * and references to holes + */ +xfs_dfsbno_t +get_bmapi(xfs_mount_t *mp, xfs_dinode_t *dino_p, + xfs_ino_t ino_num, xfs_dfiloff_t bno, int whichfork) +{ + xfs_dfsbno_t fsbno; + + switch (XFS_DFORK_FORMAT_ARCH(dino_p, whichfork, ARCH_CONVERT)) { + case XFS_DINODE_FMT_EXTENTS: + fsbno = getfunc_extlist(mp, ino_num, dino_p, bno, whichfork); + break; + case XFS_DINODE_FMT_BTREE: + fsbno = getfunc_btree(mp, ino_num, dino_p, bno, whichfork); + break; + case XFS_DINODE_FMT_LOCAL: + do_error("get_bmapi() called for local inode %llu\n", ino_num); + fsbno = NULLDFSBNO; + break; + default: + /* + * shouldn't happen + */ + do_error("bad inode format for inode %llu\n", ino_num); + fsbno = NULLDFSBNO; + } + + return(fsbno); +} + +/* + * higher level inode processing stuff starts here: + * first, one utility routine for each type of inode + */ + +/* + * return 1 if inode should be cleared, 0 otherwise + */ +/* ARGSUSED */ +int +process_btinode( + xfs_mount_t *mp, + xfs_agnumber_t agno, + xfs_agino_t ino, + xfs_dinode_t *dip, + int type, + int *dirty, + xfs_drfsbno_t *tot, + __uint64_t *nex, + blkmap_t **blkmapp, + int whichfork, + int check_dups) +{ + xfs_bmdr_block_t *dib; + xfs_dfiloff_t last_key; + xfs_dfiloff_t first_key = 0; + xfs_ino_t lino; + xfs_bmbt_ptr_t *pp; + xfs_bmbt_key_t *pkey; + char *forkname; + int i; + bmap_cursor_t cursor; + + dib = (xfs_bmdr_block_t *)XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT); + lino = XFS_AGINO_TO_INO(mp, agno, ino); + *tot = 0; + *nex = 0; + + if (whichfork == XFS_DATA_FORK) + forkname = "data"; + else + forkname = "attr"; + + if (INT_GET(dib->bb_level, ARCH_CONVERT) == 0) { + /* + * This should never happen since a btree inode + * has to have at least one other block in the + * bmap in addition to the root block in the + * inode's data fork. + * + * XXX - if we were going to fix up the inode, + * we'd try to treat the fork as an interior + * node and see if we could get an accurate + * level value from one of the blocks pointed + * to by the pointers in the fork. For now + * though, we just bail (and blow out the inode). + */ + do_warn("bad level 0 in inode %llu bmap btree root block\n", + XFS_AGINO_TO_INO(mp, agno, ino)); + return(1); + } + /* + * use bmdr/dfork_dsize since the root block is in the data fork + */ + init_bm_cursor(&cursor, INT_GET(dib->bb_level, ARCH_CONVERT) + 1); + + if (XFS_BMDR_SPACE_CALC(INT_GET(dib->bb_numrecs, ARCH_CONVERT)) > + ((whichfork == XFS_DATA_FORK) ? + XFS_DFORK_DSIZE_ARCH(dip, mp, ARCH_CONVERT) : + XFS_DFORK_ASIZE_ARCH(dip, mp, ARCH_CONVERT))) { + do_warn( +"indicated size of %s btree root (%d bytes) > space in inode %llu %s fork\n", + forkname, XFS_BMDR_SPACE_CALC(INT_GET(dib->bb_numrecs, ARCH_CONVERT)), + lino, forkname); + return(1); + } + + pp = XFS_BTREE_PTR_ADDR(XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_CONVERT), + xfs_bmdr, dib, 1, + XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_CONVERT), + xfs_bmdr, 0)); + pkey = XFS_BTREE_KEY_ADDR(XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_CONVERT), + xfs_bmdr, dib, 1, + XFS_BTREE_BLOCK_MAXRECS(XFS_DFORK_SIZE_ARCH(dip, mp, whichfork, ARCH_CONVERT), + xfs_bmdr, 0)); + + last_key = NULLDFILOFF; + + for (i = 0; i < INT_GET(dib->bb_numrecs, ARCH_CONVERT); i++) { + /* + * XXX - if we were going to do more to fix up the inode + * btree, we'd do it right here. For now, if there's a + * problem, we'll bail out and presumably clear the inode. + */ + if (!verify_dfsbno(mp, INT_GET(pp[i], ARCH_CONVERT))) { + do_warn("bad bmap btree ptr 0x%llx in ino %llu\n", + INT_GET(pp[i], ARCH_CONVERT), lino); + return(1); + } + + if (scan_lbtree((xfs_dfsbno_t)INT_GET(pp[i], ARCH_CONVERT), INT_GET(dib->bb_level, ARCH_CONVERT), + scanfunc_bmap, type, whichfork, + lino, tot, nex, blkmapp, &cursor, + 1, check_dups)) + return(1); + /* + * fix key (offset) mismatches between the keys in root + * block records and the first key of each child block. + * fixes cases where entries have been shifted between + * blocks but the parent hasn't been updated + */ + if (check_dups == 0 && + cursor.level[INT_GET(dib->bb_level, ARCH_CONVERT)-1].first_key != + INT_GET(pkey[i].br_startoff, ARCH_CONVERT)) { + if (!no_modify) { + do_warn( +"correcting key in bmbt root (was %llu, now %llu) in inode %llu %s fork\n", + INT_GET(pkey[i].br_startoff, ARCH_CONVERT), + cursor.level[INT_GET(dib->bb_level, ARCH_CONVERT)-1].first_key, + XFS_AGINO_TO_INO(mp, agno, ino), + forkname); + *dirty = 1; + INT_SET(pkey[i].br_startoff, ARCH_CONVERT, cursor.level[INT_GET(dib->bb_level, ARCH_CONVERT)-1].first_key); + } else { + do_warn( +"bad key in bmbt root (is %llu, would reset to %llu) in inode %llu %s fork\n", + INT_GET(pkey[i].br_startoff, ARCH_CONVERT), + cursor.level[INT_GET(dib->bb_level, ARCH_CONVERT)-1].first_key, + XFS_AGINO_TO_INO(mp, agno, ino), + forkname); + } + } + /* + * make sure that keys are in ascending order. blow out + * inode if the ordering doesn't hold + */ + if (check_dups == 0) { + if (last_key != NULLDFILOFF && last_key >= + cursor.level[INT_GET(dib->bb_level, ARCH_CONVERT)-1].first_key) { + do_warn( + "out of order bmbt root key %llu in inode %llu %s fork\n", + first_key, + XFS_AGINO_TO_INO(mp, agno, ino), + forkname); + return(1); + } + last_key = cursor.level[INT_GET(dib->bb_level, ARCH_CONVERT)-1].first_key; + } + } + /* + * Check that the last child block's forward sibling pointer + * is NULL. + */ + if (check_dups == 0 && + cursor.level[0].right_fsbno != NULLDFSBNO) { + do_warn( + "bad fwd (right) sibling pointer (saw %llu should be NULLDFSBNO)\n", + cursor.level[0].right_fsbno); + do_warn( + "\tin inode %u (%s fork) bmap btree block %llu\n", + XFS_AGINO_TO_INO(mp, agno, ino), forkname, + cursor.level[0].fsbno); + return(1); + } + + return(0); +} + +/* + * return 1 if inode should be cleared, 0 otherwise + */ +/* ARGSUSED */ +int +process_exinode( + xfs_mount_t *mp, + xfs_agnumber_t agno, + xfs_agino_t ino, + xfs_dinode_t *dip, + int type, + int *dirty, + xfs_drfsbno_t *tot, + __uint64_t *nex, + blkmap_t **blkmapp, + int whichfork, + int check_dups) +{ + xfs_ino_t lino; + xfs_bmbt_rec_32_t *rp; + xfs_dfiloff_t first_key; + xfs_dfiloff_t last_key; + + lino = XFS_AGINO_TO_INO(mp, agno, ino); + rp = (xfs_bmbt_rec_32_t *)XFS_DFORK_PTR_ARCH(dip, whichfork, ARCH_CONVERT); + *tot = 0; + *nex = XFS_DFORK_NEXTENTS_ARCH(dip, whichfork, ARCH_CONVERT); + /* + * XXX - if we were going to fix up the btree record, + * we'd do it right here. For now, if there's a problem, + * we'll bail out and presumably clear the inode. + */ + if (check_dups == 0) + return(process_bmbt_reclist(mp, rp, *nex, type, lino, + tot, blkmapp, &first_key, &last_key, + whichfork)); + else + return(scan_bmbt_reclist(mp, rp, *nex, type, lino, tot, + whichfork)); +} + +/* + * return 1 if inode should be cleared, 0 otherwise + */ +/* ARGSUSED */ +int +process_lclinode( + xfs_mount_t *mp, + xfs_agnumber_t agno, + xfs_agino_t ino, + xfs_dinode_t *dip, + int type, + int *dirty, + xfs_drfsbno_t *tot, + __uint64_t *nex, + blkmap_t **blkmapp, + int whichfork, + int check_dups) +{ + xfs_attr_shortform_t *asf; + xfs_dinode_core_t *dic; + xfs_ino_t lino; + + *tot = 0; + *nex = 0; /* local inodes have 0 extents */ + + dic = &dip->di_core; + lino = XFS_AGINO_TO_INO(mp, agno, ino); + if (whichfork == XFS_DATA_FORK && + INT_GET(dic->di_size, ARCH_CONVERT) > XFS_DFORK_DSIZE_ARCH(dip, mp, ARCH_CONVERT)) { + do_warn( + "local inode %llu data fork is too large (size = %lld, max = %d)\n", + lino, INT_GET(dic->di_size, ARCH_CONVERT), XFS_DFORK_DSIZE_ARCH(dip, mp, ARCH_CONVERT)); + return(1); + } else if (whichfork == XFS_ATTR_FORK) { + asf = (xfs_attr_shortform_t *) XFS_DFORK_APTR_ARCH(dip, ARCH_CONVERT); + if (INT_GET(asf->hdr.totsize, ARCH_CONVERT) > XFS_DFORK_ASIZE_ARCH(dip, mp, ARCH_CONVERT)) { + do_warn( + "local inode %llu attr fork too large (size %d, max = %d)\n", + lino, INT_GET(asf->hdr.totsize, ARCH_CONVERT), + XFS_DFORK_ASIZE_ARCH(dip, mp, ARCH_CONVERT)); + return(1); + } + if (INT_GET(asf->hdr.totsize, ARCH_CONVERT) < sizeof(xfs_attr_sf_hdr_t)) { + do_warn( + "local inode %llu attr too small (size = %d, min size = %d)\n", + lino, INT_GET(asf->hdr.totsize, ARCH_CONVERT), + sizeof(xfs_attr_sf_hdr_t)); + return(1); + } + } + + return(0); +} + +int +process_symlink_extlist(xfs_mount_t *mp, xfs_ino_t lino, xfs_dinode_t *dino) +{ + xfs_dfsbno_t start; /* start */ + xfs_dfilblks_t cnt; /* count */ + xfs_dfiloff_t offset; /* offset */ + xfs_dfiloff_t expected_offset; + xfs_bmbt_rec_32_t *rp; + int numrecs; + int i; + int max_blocks; + int whichfork = XFS_DATA_FORK; + int flag; + + if (INT_GET(dino->di_core.di_size, ARCH_CONVERT) <= XFS_DFORK_SIZE_ARCH(dino, mp, whichfork, ARCH_CONVERT)) { + if (dino->di_core.di_format == XFS_DINODE_FMT_LOCAL) { + return(0); + } else { + do_warn( +"mismatch between format (%d) and size (%lld) in symlink ino %llu\n", + dino->di_core.di_format, + INT_GET(dino->di_core.di_size, ARCH_CONVERT), + lino); + return(1); + } + } else if (dino->di_core.di_format == XFS_DINODE_FMT_LOCAL) { + do_warn( +"mismatch between format (%d) and size (%lld) in symlink inode %llu\n", + dino->di_core.di_format, + INT_GET(dino->di_core.di_size, ARCH_CONVERT), + lino); + return(1); + } + + rp = (xfs_bmbt_rec_32_t *)XFS_DFORK_PTR_ARCH(dino, whichfork, ARCH_CONVERT); + numrecs = XFS_DFORK_NEXTENTS_ARCH(dino, whichfork, ARCH_CONVERT); + + /* + * the max # of extents in a symlink inode is equal to the + * number of max # of blocks required to store the symlink + */ + if (numrecs > max_symlink_blocks) { + do_warn( + "bad number of extents (%d) in symlink %llu data fork\n", + numrecs, lino); + return(1); + } + + max_blocks = max_symlink_blocks; + expected_offset = 0; + + for (i = 0; numrecs > 0; i++, numrecs--) { + convert_extent(rp, &offset, &start, &cnt, &flag); + + if (offset != expected_offset) { + do_warn( + "bad extent #%d offset (%llu) in symlink %llu data fork\n", + i, offset, lino); + return(1); + } + if (cnt == 0 || cnt > max_blocks) { + do_warn( + "bad extent #%d count (%llu) in symlink %llu data fork\n", + i, cnt, lino); + return(1); + } + + max_blocks -= cnt; + expected_offset += cnt; + } + + return(0); +} + +/* + * takes a name and length and returns 1 if the name contains + * a \0, returns 0 otherwise + */ +int +null_check(char *name, int length) +{ + int i; + + ASSERT(length < MAXPATHLEN); + + for (i = 0; i < length; i++, name++) { + if (*name == '\0') + return(1); + } + + return(0); +} + +/* + * like usual, returns 0 if everything's ok and 1 if something's + * bogus + */ +int +process_symlink(xfs_mount_t *mp, xfs_ino_t lino, xfs_dinode_t *dino, + blkmap_t *blkmap) +{ + xfs_dfsbno_t fsbno; + xfs_dinode_core_t *dinoc = &dino->di_core; + xfs_buf_t *bp = NULL; + char *symlink, *cptr, *buf_data; + int i, size, amountdone; + char data[MAXPATHLEN]; + + /* + * check size against kernel symlink limits. we know + * size is consistent with inode storage format -- e.g. + * the inode is structurally ok so we don't have to check + * for that + */ + if (INT_GET(dinoc->di_size, ARCH_CONVERT) >= MAXPATHLEN) { + do_warn("symlink in inode %llu too long (%lld chars)\n", + lino, INT_GET(dinoc->di_size, ARCH_CONVERT)); + return(1); + } + + /* + * have to check symlink component by component. + * get symlink contents into data area + */ + symlink = &data[0]; + if (INT_GET(dinoc->di_size, ARCH_CONVERT) + <= XFS_DFORK_DSIZE_ARCH(dino, mp, ARCH_CONVERT)) { + /* + * local symlink, just copy the symlink out of the + * inode into the data area + */ + bcopy((char *)XFS_DFORK_DPTR_ARCH(dino, ARCH_CONVERT), + symlink, INT_GET(dinoc->di_size, ARCH_CONVERT)); + } else { + /* + * stored in a meta-data file, have to bmap one block + * at a time and copy the symlink into the data area + */ + i = size = amountdone = 0; + cptr = symlink; + + while (amountdone < INT_GET(dinoc->di_size, ARCH_CONVERT)) { + fsbno = blkmap_get(blkmap, i); + if (fsbno != NULLDFSBNO) + bp = libxfs_readbuf(mp->m_dev, + XFS_FSB_TO_DADDR(mp, fsbno), + XFS_FSB_TO_BB(mp, 1), 0); + if (!bp || fsbno == NULLDFSBNO) { + do_warn("cannot read inode %llu, file block %d," + " disk block %llu\n", lino, i, fsbno); + return(1); + } + + buf_data = (char *)XFS_BUF_PTR(bp); + size = MIN(INT_GET(dinoc->di_size, ARCH_CONVERT) + - amountdone, (int)XFS_FSB_TO_BB(mp, 1)*BBSIZE); + bcopy(buf_data, cptr, size); + cptr += size; + amountdone += size; + i++; + libxfs_putbuf(bp); + } + } + data[INT_GET(dinoc->di_size, ARCH_CONVERT)] = '\0'; + + /* + * check for nulls + */ + if (null_check(symlink, (int) INT_GET(dinoc->di_size, ARCH_CONVERT))) { + do_warn("found illegal null character in symlink inode %llu\n", + lino); + return(1); + } + + /* + * check for any component being too long + */ + if (INT_GET(dinoc->di_size, ARCH_CONVERT) >= MAXNAMELEN) { + cptr = strchr(symlink, '/'); + + while (cptr != NULL) { + if (cptr - symlink >= MAXNAMELEN) { + do_warn( + "component of symlink in inode %llu too long\n", + lino); + return(1); + } + symlink = cptr + 1; + cptr = strchr(symlink, '/'); + } + + if (strlen(symlink) >= MAXNAMELEN) { + do_warn("component of symlink in inode %llu too long\n", + lino); + return(1); + } + } + + return(0); +} + +/* + * called to process the set of misc inode special inode types + * that have no associated data storage (fifos, pipes, devices, etc.). + */ +/* ARGSUSED */ +int +process_misc_ino_types(xfs_mount_t *mp, + xfs_dinode_t *dino, + xfs_ino_t lino, + int type) +{ + /* + * disallow mountpoint inodes until such time as the + * kernel actually allows them to be created (will + * probably require a superblock version rev, sigh). + */ + if (type == XR_INO_MOUNTPOINT) { + do_warn("inode %llu has bad inode type (IFMNT)\n", lino); + return(1); + } + + /* + * must also have a zero size + */ + if (INT_GET(dino->di_core.di_size, ARCH_CONVERT) != 0) { + switch (type) { + case XR_INO_CHRDEV: + do_warn("size of character device inode %llu != 0 " + "(%lld bytes)\n", lino, + INT_GET(dino->di_core.di_size, ARCH_CONVERT)); + break; + case XR_INO_BLKDEV: + do_warn("size of block device inode %llu != 0 " + "(%lld bytes)\n", lino, + INT_GET(dino->di_core.di_size, ARCH_CONVERT)); + break; + case XR_INO_SOCK: + do_warn("size of socket inode %llu != 0 " + "(%lld bytes)\n", lino, + INT_GET(dino->di_core.di_size, ARCH_CONVERT)); + break; + case XR_INO_FIFO: + do_warn("size of fifo inode %llu != 0 " + "(%lld bytes)\n", lino, + INT_GET(dino->di_core.di_size, ARCH_CONVERT)); + break; + default: + do_warn("Internal error - process_misc_ino_types, " + "illegal type %d\n", type); + abort(); + } + + return(1); + } + + return(0); +} + +int +process_misc_ino_types_blocks(xfs_drfsbno_t totblocks, xfs_ino_t lino, int type) +{ + /* + * you can not enforce all misc types have zero data fork blocks + * by checking dino->di_core.di_nblocks because atotblocks (attribute + * blocks) are part of nblocks. We must check this later when atotblocks + * has been calculated or by doing a simple check that anExtents == 0. + * We must also guarantee that totblocks is 0. Thus nblocks checking + * will be done later in process_dinode_int for misc types. + */ + + if (totblocks != 0) { + switch (type) { + case XR_INO_CHRDEV: + do_warn( + "size of character device inode %llu != 0 (%llu blocks)\n", + lino, totblocks); + break; + case XR_INO_BLKDEV: + do_warn( + "size of block device inode %llu != 0 (%llu blocks)\n", + lino, totblocks); + break; + case XR_INO_SOCK: + do_warn( + "size of socket inode %llu != 0 (%llu blocks)\n", + lino, totblocks); + break; + case XR_INO_FIFO: + do_warn( + "size of fifo inode %llu != 0 (%llu blocks)\n", + lino, totblocks); + break; + default: + return(0); + } + return(1); + } + return (0); +} + +/* + * returns 0 if the inode is ok, 1 if the inode is corrupt + * check_dups can be set to 1 *only* when called by the + * first pass of the duplicate block checking of phase 4. + * *dirty is set > 0 if the dinode has been altered and + * needs to be written out. + * + * for detailed, info, look at process_dinode() comments. + */ +/* ARGSUSED */ +int +process_dinode_int(xfs_mount_t *mp, + xfs_dinode_t *dino, + xfs_agnumber_t agno, + xfs_agino_t ino, + int was_free, /* 1 if inode is currently free */ + int *dirty, /* out == > 0 if inode is now dirty */ + int *cleared, /* out == 1 if inode was cleared */ + int *used, /* out == 1 if inode is in use */ + int verify_mode, /* 1 == verify but don't modify inode */ + int uncertain, /* 1 == inode is uncertain */ + int ino_discovery, /* 1 == check dirs for unknown inodes */ + int check_dups, /* 1 == check if inode claims + * duplicate blocks */ + int extra_attr_check, /* 1 == do attribute format and value checks */ + int *isa_dir, /* out == 1 if inode is a directory */ + xfs_ino_t *parent) /* out -- parent if ino is a dir */ +{ + xfs_drfsbno_t totblocks = 0; + xfs_drfsbno_t atotblocks = 0; + xfs_dinode_core_t *dinoc; + char *rstring; + int type; + int rtype; + int do_rt; + int err; + int retval = 0; + __uint64_t nextents; + __uint64_t anextents; + xfs_ino_t lino; + const int is_free = 0; + const int is_used = 1; + int repair = 0; + blkmap_t *ablkmap = NULL; + blkmap_t *dblkmap = NULL; + static char okfmts[] = { + 0, /* free inode */ + 1 << XFS_DINODE_FMT_DEV, /* FIFO */ + 1 << XFS_DINODE_FMT_DEV, /* CHR */ + 0, /* type 3 unused */ + (1 << XFS_DINODE_FMT_LOCAL) | + (1 << XFS_DINODE_FMT_EXTENTS) | + (1 << XFS_DINODE_FMT_BTREE), /* DIR */ + 0, /* type 5 unused */ + 1 << XFS_DINODE_FMT_DEV, /* BLK */ + 0, /* type 7 unused */ + (1 << XFS_DINODE_FMT_EXTENTS) | + (1 << XFS_DINODE_FMT_BTREE), /* REG */ + 0, /* type 9 unused */ + (1 << XFS_DINODE_FMT_LOCAL) | + (1 << XFS_DINODE_FMT_EXTENTS), /* LNK */ + 0, /* type 11 unused */ + 1 << XFS_DINODE_FMT_DEV, /* SOCK */ + 0, /* type 13 unused */ + 1 << XFS_DINODE_FMT_UUID, /* MNT */ + 0 /* type 15 unused */ + }; + + retval = 0; + totblocks = atotblocks = 0; + *dirty = *isa_dir = *cleared = 0; + *used = is_used; + type = rtype = XR_INO_UNKNOWN; + rstring = NULL; + do_rt = 0; + + dinoc = &dino->di_core; + lino = XFS_AGINO_TO_INO(mp, agno, ino); + + /* + * if in verify mode, don't modify the inode. + * + * if correcting, reset stuff that has known values + * + * if in uncertain mode, be silent on errors since we're + * trying to find out if these are inodes as opposed + * to assuming that they are. Just return the appropriate + * return code in that case. + */ + + if (INT_GET(dinoc->di_magic, ARCH_CONVERT) != XFS_DINODE_MAGIC) { + retval++; + if (!verify_mode) { + do_warn("bad magic number 0x%x on inode %llu, ", + INT_GET(dinoc->di_magic, ARCH_CONVERT), lino); + if (!no_modify) { + do_warn("resetting magic number\n"); + *dirty = 1; + INT_SET(dinoc->di_magic, ARCH_CONVERT, XFS_DINODE_MAGIC); + } else { + do_warn("would reset magic number\n"); + } + } else if (!uncertain) { + do_warn("bad magic number 0x%x on inode %llu\n", + INT_GET(dinoc->di_magic, ARCH_CONVERT), lino); + } + } + + if (!XFS_DINODE_GOOD_VERSION(dinoc->di_version) || + (!fs_inode_nlink && dinoc->di_version > XFS_DINODE_VERSION_1)) { + retval++; + if (!verify_mode) { + do_warn("bad version number 0x%x on inode %llu, ", + dinoc->di_version, lino); + if (!no_modify) { + do_warn("resetting version number\n"); + *dirty = 1; + dinoc->di_version = (fs_inode_nlink) ? + XFS_DINODE_VERSION_2 : + XFS_DINODE_VERSION_1; + } else { + do_warn("would reset version number\n"); + } + } else if (!uncertain) { + do_warn("bad version number 0x%x on inode %llu\n", + dinoc->di_version, lino); + } + } + + /* + * blow out of here if the inode size is < 0 + */ + if (INT_GET(dinoc->di_size, ARCH_CONVERT) < 0) { + retval++; + if (!verify_mode) { + do_warn("bad (negative) size %lld on inode %llu\n", + INT_GET(dinoc->di_size, ARCH_CONVERT), lino); + if (!no_modify) { + *dirty += clear_dinode(mp, dino, lino); + *cleared = 1; + } else { + *dirty = 1; + *cleared = 1; + } + *used = is_free; + } else if (!uncertain) { + do_warn("bad (negative) size %lld on inode %llu\n", + INT_GET(dinoc->di_size, ARCH_CONVERT), lino); + } + + return(1); + } + + /* + * was_free value is not meaningful if we're in verify mode + */ + if (!verify_mode && INT_GET(dinoc->di_mode, ARCH_CONVERT) == 0 && was_free == 1) { + /* + * easy case, inode free -- inode and map agree, clear + * it just in case to ensure that format, etc. are + * set correctly + */ + if (!no_modify) { + err = clear_dinode(mp, dino, lino); + if (err) { + *dirty = 1; + *cleared = 1; + } + } + *used = is_free; + return(0); + } else if (!verify_mode && INT_GET(dinoc->di_mode, ARCH_CONVERT) == 0 && was_free == 0) { + /* + * the inode looks free but the map says it's in use. + * clear the inode just to be safe and mark the inode + * free. + */ + do_warn("imap claims a free inode %llu is in use, ", lino); + + if (!no_modify) { + do_warn("correcting imap and clearing inode\n"); + + err = clear_dinode(mp, dino, lino); + if (err) { + retval++; + *dirty = 1; + *cleared = 1; + } + } else { + do_warn("would correct imap and clear inode\n"); + + *dirty = 1; + *cleared = 1; + } + + *used = is_free; + + return(retval > 0 ? 1 : 0); + } + + /* + * because of the lack of any write ordering guarantee, it's + * possible that the core got updated but the forks didn't. + * so rather than be ambitious (and probably incorrect), + * if there's an inconsistency, we get conservative and + * just pitch the file. blow off checking formats of + * free inodes since technically any format is legal + * as we reset the inode when we re-use it. + */ + if (INT_GET(dinoc->di_mode, ARCH_CONVERT) != 0 && + ((((INT_GET(dinoc->di_mode, ARCH_CONVERT) & IFMT) >> 12) > 15) || + dinoc->di_format < XFS_DINODE_FMT_DEV || + dinoc->di_format > XFS_DINODE_FMT_UUID || + (!(okfmts[(INT_GET(dinoc->di_mode, ARCH_CONVERT) & IFMT) >> 12] & + (1 << dinoc->di_format))))) { + /* bad inode format */ + retval++; + if (!uncertain) + do_warn("bad inode format in inode %llu\n", lino); + if (!verify_mode) { + if (!no_modify) { + *dirty += clear_dinode(mp, dino, lino); + ASSERT(*dirty > 0); + } + } + *cleared = 1; + *used = is_free; + + return(retval > 0 ? 1 : 0); + } + + if (verify_mode) + return(retval > 0 ? 1 : 0); + + /* + * clear the next unlinked field if necessary on a good + * inode only during phase 4 -- when checking for inodes + * referencing duplicate blocks. then it's safe because + * we've done the inode discovery and have found all the inodes + * we're going to find. check_dups is set to 1 only during + * phase 4. Ugly. + */ + if (check_dups && !no_modify) + *dirty += clear_dinode_unlinked(mp, dino); + + /* set type and map type info */ + + switch (INT_GET(dinoc->di_mode, ARCH_CONVERT) & IFMT) { + case IFDIR: + type = XR_INO_DIR; + *isa_dir = 1; + break; + case IFREG: + if (INT_GET(dinoc->di_flags, ARCH_CONVERT) & XFS_DIFLAG_REALTIME) + type = XR_INO_RTDATA; + else if (lino == mp->m_sb.sb_rbmino) + type = XR_INO_RTBITMAP; + else if (lino == mp->m_sb.sb_rsumino) + type = XR_INO_RTSUM; + else + type = XR_INO_DATA; + break; + case IFLNK: + type = XR_INO_SYMLINK; + break; + case IFCHR: + type = XR_INO_CHRDEV; + break; + case IFBLK: + type = XR_INO_BLKDEV; + break; + case IFSOCK: + type = XR_INO_SOCK; + break; + case IFIFO: + type = XR_INO_FIFO; + break; + case IFMNT: + type = XR_INO_MOUNTPOINT; + break; + default: + type = XR_INO_UNKNOWN; + do_warn("Unexpected inode type %#o inode %llu\n", + (int) (INT_GET(dinoc->di_mode, ARCH_CONVERT) & IFMT), lino); + abort(); + break; + } + + /* + * type checks for root, realtime inodes, and quota inodes + */ + if (lino == mp->m_sb.sb_rootino && type != XR_INO_DIR) { + do_warn("bad inode type for root inode %llu, ", lino); + type = XR_INO_DIR; + + if (!no_modify) { + do_warn("resetting to directory\n"); + INT_MOD_EXPR(dinoc->di_mode, ARCH_CONVERT, &= ~(INT_GET(dinoc->di_mode, ARCH_CONVERT) & IFMT)); + INT_MOD_EXPR(dinoc->di_mode, ARCH_CONVERT, |= INT_GET(dinoc->di_mode, ARCH_CONVERT) & IFDIR); + } else { + do_warn("would reset to directory\n"); + } + } else if (lino == mp->m_sb.sb_rsumino) { + do_rt = 1; + rstring = "summary"; + rtype = XR_INO_RTSUM; + } else if (lino == mp->m_sb.sb_rbmino) { + do_rt = 1; + rstring = "bitmap"; + rtype = XR_INO_RTBITMAP; + } else if (lino == mp->m_sb.sb_uquotino) { + if (type != XR_INO_DATA) { + do_warn("user quota inode has bad type 0x%x\n", + INT_GET(dinoc->di_mode, ARCH_CONVERT) & IFMT); + + if (!no_modify) { + *dirty += clear_dinode(mp, dino, lino); + ASSERT(*dirty > 0); + } + + *cleared = 1; + *used = is_free; + *isa_dir = 0; + + mp->m_sb.sb_uquotino = NULLFSINO; + + return(1); + } + } else if (lino == mp->m_sb.sb_pquotino) { + if (type != XR_INO_DATA) { + do_warn("project quota inode has bad type 0x%x\n", + INT_GET(dinoc->di_mode, ARCH_CONVERT) & IFMT); + + if (!no_modify) { + *dirty += clear_dinode(mp, dino, lino); + ASSERT(*dirty > 0); + } + + *cleared = 1; + *used = is_free; + *isa_dir = 0; + + mp->m_sb.sb_pquotino = NULLFSINO; + + return(1); + } + } + + if (do_rt && type != rtype) { + type = XR_INO_DATA; + + do_warn("bad inode type for realtime %s inode %llu, ", + rstring, lino); + + if (!no_modify) { + do_warn("resetting to regular file\n"); + INT_MOD_EXPR(dinoc->di_mode, ARCH_CONVERT, &= ~(INT_GET(dinoc->di_mode, ARCH_CONVERT) & IFMT)); + INT_MOD_EXPR(dinoc->di_mode, ARCH_CONVERT, |= INT_GET(dinoc->di_mode, ARCH_CONVERT) & IFREG); + } else { + do_warn("would reset to regular file\n"); + } + } + + /* + * only realtime inodes should have extsize set + */ + if (type != XR_INO_RTDATA && INT_GET(dinoc->di_extsize, ARCH_CONVERT) != 0) { + do_warn( +"bad non-zero extent size value %u for non-realtime inode %llu,", + INT_GET(dinoc->di_extsize, ARCH_CONVERT), lino); + + if (!no_modify) { + do_warn("resetting to zero\n"); + INT_ZERO(dinoc->di_extsize, ARCH_CONVERT); + *dirty = 1; + } else { + do_warn("would reset to zero\n"); + } + } + + /* + * for realtime inodes, check sizes to see that + * they are consistent with the # of realtime blocks. + * also, verify that they contain only one extent and + * are extent format files. If anything's wrong, clear + * the inode -- we'll recreate it in phase 6. + */ + if (do_rt && INT_GET(dinoc->di_size, ARCH_CONVERT) + != mp->m_sb.sb_rbmblocks * mp->m_sb.sb_blocksize) { + do_warn("bad size %llu for realtime %s inode %llu\n", + INT_GET(dinoc->di_size, ARCH_CONVERT), rstring, lino); + + if (!no_modify) { + *dirty += clear_dinode(mp, dino, lino); + ASSERT(*dirty > 0); + } + + *cleared = 1; + *used = is_free; + *isa_dir = 0; + + return(1); + } + + if (do_rt && mp->m_sb.sb_rblocks == 0 && INT_GET(dinoc->di_nextents, ARCH_CONVERT) != 0) { + do_warn("bad # of extents (%u) for realtime %s inode %llu\n", + INT_GET(dinoc->di_nextents, ARCH_CONVERT), rstring, lino); + + if (!no_modify) { + *dirty += clear_dinode(mp, dino, lino); + ASSERT(*dirty > 0); + } + + *cleared = 1; + *used = is_free; + *isa_dir = 0; + + return(1); + } + + /* + * Setup nextents and anextents for blkmap_alloc calls. + */ + nextents = INT_GET(dinoc->di_nextents, ARCH_CONVERT); + if (nextents > INT_GET(dinoc->di_nblocks, ARCH_CONVERT) || nextents > XFS_MAX_INCORE_EXTENTS) + nextents = 1; + anextents = INT_GET(dinoc->di_anextents, ARCH_CONVERT); + if (anextents > INT_GET(dinoc->di_nblocks, ARCH_CONVERT) || anextents > XFS_MAX_INCORE_EXTENTS) + anextents = 1; + + /* + * general size/consistency checks: + * + * if the size <= size of the data fork, directories must be + * local inodes unlike regular files which would be extent inodes. + * all the other mentioned types have to have a zero size value. + * + * if the size and format don't match, get out now rather than + * risk trying to process a non-existent extents or btree + * type data fork. + */ + switch (type) { + case XR_INO_DIR: + if (INT_GET(dinoc->di_size, ARCH_CONVERT) <= XFS_DFORK_DSIZE_ARCH(dino, mp, ARCH_CONVERT) + && dinoc->di_format != XFS_DINODE_FMT_LOCAL) { + do_warn( +"mismatch between format (%d) and size (%lld) in directory ino %llu\n", + dinoc->di_format, + INT_GET(dinoc->di_size, ARCH_CONVERT), + lino); + + if (!no_modify) { + *dirty += clear_dinode(mp, + dino, lino); + ASSERT(*dirty > 0); + } + + *cleared = 1; + *used = is_free; + *isa_dir = 0; + + return(1); + } + if (dinoc->di_format != XFS_DINODE_FMT_LOCAL) + dblkmap = blkmap_alloc(nextents); + break; + case XR_INO_SYMLINK: + if (process_symlink_extlist(mp, lino, dino)) { + do_warn("bad data fork in symlink %llu\n", lino); + + if (!no_modify) { + *dirty += clear_dinode(mp, + dino, lino); + ASSERT(*dirty > 0); + } + + *cleared = 1; + *used = is_free; + *isa_dir = 0; + + return(1); + } + if (dinoc->di_format != XFS_DINODE_FMT_LOCAL) + dblkmap = blkmap_alloc(nextents); + break; + case XR_INO_CHRDEV: /* fall through to FIFO case ... */ + case XR_INO_BLKDEV: /* fall through to FIFO case ... */ + case XR_INO_SOCK: /* fall through to FIFO case ... */ + case XR_INO_MOUNTPOINT: /* fall through to FIFO case ... */ + case XR_INO_FIFO: + if (process_misc_ino_types(mp, dino, lino, type)) { + if (!no_modify) { + *dirty += clear_dinode(mp, dino, lino); + ASSERT(*dirty > 0); + } + + *cleared = 1; + *used = is_free; + *isa_dir = 0; + + return(1); + } + break; + case XR_INO_RTDATA: + /* + * if we have no realtime blocks, any inode claiming + * to be a real-time file is bogus + */ + if (mp->m_sb.sb_rblocks == 0) { + do_warn( + "found inode %llu claiming to be a real-time file\n", + lino); + + if (!no_modify) { + *dirty += clear_dinode(mp, dino, lino); + ASSERT(*dirty > 0); + } + + *cleared = 1; + *used = is_free; + *isa_dir = 0; + + return(1); + } + break; + case XR_INO_RTBITMAP: + if (INT_GET(dinoc->di_size, ARCH_CONVERT) != (__int64_t) mp->m_sb.sb_rbmblocks * + mp->m_sb.sb_blocksize) { + do_warn( + "realtime bitmap inode %llu has bad size %lld (should be %lld)\n", + lino, INT_GET(dinoc->di_size, ARCH_CONVERT), + (__int64_t) mp->m_sb.sb_rbmblocks * + mp->m_sb.sb_blocksize); + + if (!no_modify) { + *dirty += clear_dinode(mp, dino, lino); + ASSERT(*dirty > 0); + } + + *cleared = 1; + *used = is_free; + *isa_dir = 0; + + return(1); + } + dblkmap = blkmap_alloc(nextents); + break; + case XR_INO_RTSUM: + if (INT_GET(dinoc->di_size, ARCH_CONVERT) != mp->m_rsumsize) { + do_warn( + "realtime summary inode %llu has bad size %lld (should be %d)\n", + lino, INT_GET(dinoc->di_size, ARCH_CONVERT), mp->m_rsumsize); + + if (!no_modify) { + *dirty += clear_dinode(mp, dino, lino); + ASSERT(*dirty > 0); + } + + *cleared = 1; + *used = is_free; + *isa_dir = 0; + + return(1); + } + dblkmap = blkmap_alloc(nextents); + break; + default: + break; + } + + /* + * check for illegal values of forkoff + */ + err = 0; + if (dinoc->di_forkoff != 0) { + switch (dinoc->di_format) { + case XFS_DINODE_FMT_DEV: + if (dinoc->di_forkoff != + (roundup(sizeof(dev_t), 8) >> 3)) { + do_warn( + "bad attr fork offset %d in dev inode %llu, should be %d\n", + (int) dinoc->di_forkoff, + lino, + (int) (roundup(sizeof(dev_t), 8) >> 3)); + err = 1; + } + break; + case XFS_DINODE_FMT_UUID: + if (dinoc->di_forkoff != + (roundup(sizeof(uuid_t), 8) >> 3)) { + do_warn( + "bad attr fork offset %d in uuid inode %llu, should be %d\n", + (int) dinoc->di_forkoff, + lino, + (int)(roundup(sizeof(uuid_t), 8) >> 3)); + err = 1; + } + break; + case XFS_DINODE_FMT_LOCAL: /* fall through ... */ + case XFS_DINODE_FMT_EXTENTS: /* fall through ... */ + case XFS_DINODE_FMT_BTREE: + if (dinoc->di_forkoff != mp->m_attroffset >> 3) { + do_warn( + "bad attr fork offset %d in inode %llu, should be %d\n", + (int) dinoc->di_forkoff, + lino, + (int) (mp->m_attroffset >> 3)); + err = 1; + } + break; + default: + do_error("unexpected inode format %d\n", + (int) dinoc->di_format); + break; + } + } + + if (err) { + if (!no_modify) { + *dirty += clear_dinode(mp, dino, lino); + ASSERT(*dirty > 0); + } + + *cleared = 1; + *used = is_free; + *isa_dir = 0; + blkmap_free(dblkmap); + return(1); + } + + /* + * check data fork -- if it's bad, clear the inode + */ + nextents = 0; + switch (dinoc->di_format) { + case XFS_DINODE_FMT_LOCAL: + err = process_lclinode(mp, agno, ino, dino, type, + dirty, &totblocks, &nextents, &dblkmap, + XFS_DATA_FORK, check_dups); + break; + case XFS_DINODE_FMT_EXTENTS: + err = process_exinode(mp, agno, ino, dino, type, + dirty, &totblocks, &nextents, &dblkmap, + XFS_DATA_FORK, check_dups); + break; + case XFS_DINODE_FMT_BTREE: + err = process_btinode(mp, agno, ino, dino, type, + dirty, &totblocks, &nextents, &dblkmap, + XFS_DATA_FORK, check_dups); + break; + case XFS_DINODE_FMT_DEV: /* fall through */ + case XFS_DINODE_FMT_UUID: + err = 0; + break; + default: + do_error("unknown format %d, ino %llu (mode = %d)\n", + dinoc->di_format, lino, INT_GET(dinoc->di_mode, ARCH_CONVERT)); + } + + if (err) { + /* + * problem in the data fork, clear out the inode + * and get out + */ + do_warn("bad data fork in inode %llu\n", lino); + + if (!no_modify) { + *dirty += clear_dinode(mp, dino, lino); + ASSERT(*dirty > 0); + } + + *cleared = 1; + *used = is_free; + *isa_dir = 0; + blkmap_free(dblkmap); + + return(1); + } + + if (check_dups) { + /* + * if check_dups was non-zero, we have to + * re-process data fork to set bitmap since the + * bitmap wasn't set the first time through + */ + switch (dinoc->di_format) { + case XFS_DINODE_FMT_LOCAL: + err = process_lclinode(mp, agno, ino, dino, type, + dirty, &totblocks, &nextents, &dblkmap, + XFS_DATA_FORK, 0); + break; + case XFS_DINODE_FMT_EXTENTS: + err = process_exinode(mp, agno, ino, dino, type, + dirty, &totblocks, &nextents, &dblkmap, + XFS_DATA_FORK, 0); + break; + case XFS_DINODE_FMT_BTREE: + err = process_btinode(mp, agno, ino, dino, type, + dirty, &totblocks, &nextents, &dblkmap, + XFS_DATA_FORK, 0); + break; + case XFS_DINODE_FMT_DEV: /* fall through */ + case XFS_DINODE_FMT_UUID: + err = 0; + break; + default: + do_error("unknown format %d, ino %llu (mode = %d)\n", + dinoc->di_format, lino, INT_GET(dinoc->di_mode, ARCH_CONVERT)); + } + + if (no_modify && err != 0) { + *cleared = 1; + *used = is_free; + *isa_dir = 0; + blkmap_free(dblkmap); + + return(1); + } + + ASSERT(err == 0); + } + + /* + * check attribute fork if necessary. attributes are + * always stored in the regular filesystem. + */ + + if (!XFS_DFORK_Q_ARCH(dino, ARCH_CONVERT) && dinoc->di_aformat != XFS_DINODE_FMT_EXTENTS) { + do_warn("bad attribute format %d in inode %llu, ", + dinoc->di_aformat, lino); + if (!no_modify) { + do_warn("resetting value\n"); + dinoc->di_aformat = XFS_DINODE_FMT_EXTENTS; + *dirty = 1; + } else + do_warn("would reset value\n"); + anextents = 0; + } else if (XFS_DFORK_Q_ARCH(dino, ARCH_CONVERT)) { + switch (dinoc->di_aformat) { + case XFS_DINODE_FMT_LOCAL: + anextents = 0; + err = process_lclinode(mp, agno, ino, dino, + type, dirty, &atotblocks, &anextents, &ablkmap, + XFS_ATTR_FORK, check_dups); + break; + case XFS_DINODE_FMT_EXTENTS: + ablkmap = blkmap_alloc(anextents); + anextents = 0; + err = process_exinode(mp, agno, ino, dino, + type, dirty, &atotblocks, &anextents, &ablkmap, + XFS_ATTR_FORK, check_dups); + break; + case XFS_DINODE_FMT_BTREE: + ablkmap = blkmap_alloc(anextents); + anextents = 0; + err = process_btinode(mp, agno, ino, dino, + type, dirty, &atotblocks, &anextents, &ablkmap, + XFS_ATTR_FORK, check_dups); + break; + default: + anextents = 0; + do_warn("illegal attribute format %d, ino %llu\n", + dinoc->di_aformat, lino); + err = 1; + break; + } + + if (err) { + /* + * clear the attribute fork if necessary. we can't + * clear the inode because we've already put the + * inode space info into the blockmap. + * + * XXX - put the inode onto the "move it" list and + * log the the attribute scrubbing + */ + do_warn("bad attribute fork in inode %llu", lino); + + if (!no_modify) { + if (delete_attr_ok) { + do_warn(", clearing attr fork\n"); + *dirty += clear_dinode_attr(mp, + dino, lino); + } else { + do_warn("\n"); + *dirty += clear_dinode(mp, + dino, lino); + } + ASSERT(*dirty > 0); + } else { + do_warn(", would clear attr fork\n"); + } + + atotblocks = 0; + anextents = 0; + + if (delete_attr_ok) { + if (!no_modify) + dinoc->di_aformat = XFS_DINODE_FMT_LOCAL; + } else { + *cleared = 1; + *used = is_free; + *isa_dir = 0; + blkmap_free(dblkmap); + blkmap_free(ablkmap); + } + return(1); + + } else if (check_dups) { + switch (dinoc->di_aformat) { + case XFS_DINODE_FMT_LOCAL: + err = process_lclinode(mp, agno, ino, dino, + type, dirty, &atotblocks, &anextents, + &ablkmap, XFS_ATTR_FORK, 0); + break; + case XFS_DINODE_FMT_EXTENTS: + err = process_exinode(mp, agno, ino, dino, + type, dirty, &atotblocks, &anextents, + &ablkmap, XFS_ATTR_FORK, 0); + break; + case XFS_DINODE_FMT_BTREE: + err = process_btinode(mp, agno, ino, dino, + type, dirty, &atotblocks, &anextents, + &ablkmap, XFS_ATTR_FORK, 0); + break; + default: + do_error("illegal attribute fmt %d, ino %llu\n", + dinoc->di_aformat, lino); + } + + if (no_modify && err != 0) { + *cleared = 1; + *used = is_free; + *isa_dir = 0; + blkmap_free(dblkmap); + blkmap_free(ablkmap); + + return(1); + } + + ASSERT(err == 0); + } + + /* + * do attribute semantic-based consistency checks now + */ + + /* get this only in phase 3, not in both phase 3 and 4 */ + if (extra_attr_check) { + if ((err = process_attributes(mp, lino, dino, ablkmap, + &repair))) { + do_warn("problem with attribute contents in inode %llu\n",lino); + if(!repair) { + /* clear attributes if not done already */ + if (!no_modify) { + *dirty += clear_dinode_attr( + mp, dino, lino); + dinoc->di_aformat = + XFS_DINODE_FMT_LOCAL; + } else { + do_warn("would clear attr fork\n"); + } + atotblocks = 0; + anextents = 0; + } + else { + *dirty = 1; /* it's been repaired */ + } + } + } + blkmap_free(ablkmap); + + } else + anextents = 0; + + /* + * enforce totblocks is 0 for misc types + */ + if (process_misc_ino_types_blocks(totblocks, lino, type)) { + if (!no_modify) { + *dirty += clear_dinode(mp, dino, lino); + ASSERT(*dirty > 0); + } + *cleared = 1; + *used = is_free; + *isa_dir = 0; + blkmap_free(dblkmap); + + return(1); + } + + /* + * correct space counters if required + */ + if (totblocks + atotblocks != INT_GET(dinoc->di_nblocks, ARCH_CONVERT)) { + if (!no_modify) { + do_warn("correcting nblocks for inode %llu, was %llu - counted %llu\n", + lino, INT_GET(dinoc->di_nblocks, ARCH_CONVERT), + totblocks + atotblocks); + *dirty = 1; + INT_SET(dinoc->di_nblocks, ARCH_CONVERT, totblocks + atotblocks); + } else { + do_warn( + "bad nblocks %llu for inode %llu, would reset to %llu\n", + INT_GET(dinoc->di_nblocks, ARCH_CONVERT), lino, + totblocks + atotblocks); + } + } + + if (nextents > MAXEXTNUM) { + do_warn("too many data fork extents (%llu) in inode %llu\n", + nextents, lino); + + if (!no_modify) { + *dirty += clear_dinode(mp, dino, lino); + ASSERT(*dirty > 0); + } + *cleared = 1; + *used = is_free; + *isa_dir = 0; + blkmap_free(dblkmap); + + return(1); + } + if (nextents != INT_GET(dinoc->di_nextents, ARCH_CONVERT)) { + if (!no_modify) { + do_warn("correcting nextents for inode %llu, was %d - counted %llu\n", + lino, INT_GET(dinoc->di_nextents, ARCH_CONVERT), nextents); + *dirty = 1; + INT_SET(dinoc->di_nextents, ARCH_CONVERT, (xfs_extnum_t) nextents); + } else { + do_warn( + "bad nextents %d for inode %llu, would reset to %llu\n", + INT_GET(dinoc->di_nextents, ARCH_CONVERT), lino, nextents); + } + } + + if (anextents > MAXAEXTNUM) { + do_warn("too many attr fork extents (%llu) in inode %llu\n", + anextents, lino); + + if (!no_modify) { + *dirty += clear_dinode(mp, dino, lino); + ASSERT(*dirty > 0); + } + *cleared = 1; + *used = is_free; + *isa_dir = 0; + blkmap_free(dblkmap); + + return(1); + } + if (anextents != INT_GET(dinoc->di_anextents, ARCH_CONVERT)) { + if (!no_modify) { + do_warn("correcting anextents for inode %llu, was %d - counted %llu\n", + lino, INT_GET(dinoc->di_anextents, ARCH_CONVERT), anextents); + *dirty = 1; + INT_SET(dinoc->di_anextents, ARCH_CONVERT, (xfs_aextnum_t) anextents); + } else { + do_warn( + "bad anextents %d for inode %llu, would reset to %llu\n", + INT_GET(dinoc->di_anextents, ARCH_CONVERT), lino, anextents); + } + } + + /* + * do any semantic type-based checking here + */ + switch (type) { + case XR_INO_DIR: + if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb)) + err = process_dir2(mp, lino, dino, ino_discovery, + dirty, "", parent, dblkmap); + else + err = process_dir(mp, lino, dino, ino_discovery, + dirty, "", parent, dblkmap); + if (err) + do_warn( + "problem with directory contents in inode %llu\n", + lino); + break; + case XR_INO_RTBITMAP: + /* process_rtbitmap XXX */ + err = 0; + break; + case XR_INO_RTSUM: + /* process_rtsummary XXX */ + err = 0; + break; + case XR_INO_SYMLINK: + if ((err = process_symlink(mp, lino, dino, dblkmap))) + do_warn("problem with symbolic link in inode %llu\n", + lino); + break; + case XR_INO_DATA: /* fall through to FIFO case ... */ + case XR_INO_RTDATA: /* fall through to FIFO case ... */ + case XR_INO_CHRDEV: /* fall through to FIFO case ... */ + case XR_INO_BLKDEV: /* fall through to FIFO case ... */ + case XR_INO_SOCK: /* fall through to FIFO case ... */ + case XR_INO_FIFO: + err = 0; + break; + default: + printf("Unexpected inode type\n"); + abort(); + } + + blkmap_free(dblkmap); + + if (err) { + /* + * problem in the inode type-specific semantic + * checking, clear out the inode and get out + */ + if (!no_modify) { + *dirty += clear_dinode(mp, dino, lino); + ASSERT(*dirty > 0); + } + *cleared = 1; + *used = is_free; + *isa_dir = 0; + + return(1); + } + + /* + * check nlinks feature, if it's a version 1 inode, + * just leave nlinks alone. even if it's set wrong, + * it'll be reset when read in. + */ + if (dinoc->di_version > XFS_DINODE_VERSION_1 && !fs_inode_nlink) { + /* + * do we have a fs/inode version mismatch with a valid + * version 2 inode here that has to stay version 2 or + * lose links? + */ + if (INT_GET(dinoc->di_nlink, ARCH_CONVERT) > XFS_MAXLINK_1) { + /* + * yes. are nlink inodes allowed? + */ + if (fs_inode_nlink_allowed) { + /* + * yes, update status variable which will + * cause sb to be updated later. + */ + fs_inode_nlink = 1; + do_warn( + "version 2 inode %llu claims > %u links,", + lino, XFS_MAXLINK_1); + if (!no_modify) { + do_warn( + "updating superblock version number\n"); + } else { + do_warn( + "would update superblock version number\n"); + } + } else { + /* + * no, have to convert back to onlinks + * even if we lose some links + */ + do_warn( + "WARNING: version 2 inode %llu claims > %u links,", + lino, XFS_MAXLINK_1); + if (!no_modify) { + do_warn( + "converting back to version 1,\n\tthis may destroy %d links\n", + INT_GET(dinoc->di_nlink, ARCH_CONVERT) + - XFS_MAXLINK_1); + + dinoc->di_version = + XFS_DINODE_VERSION_1; + INT_SET(dinoc->di_nlink, ARCH_CONVERT, XFS_MAXLINK_1); + INT_SET(dinoc->di_onlink, ARCH_CONVERT, XFS_MAXLINK_1); + + *dirty = 1; + } else { + do_warn( + "would convert back to version 1,\n\tthis might destroy %d links\n", + INT_GET(dinoc->di_nlink, ARCH_CONVERT) + - XFS_MAXLINK_1); + } + } + } else { + /* + * do we have a v2 inode that we could convert back + * to v1 without losing any links? if we do and + * we have a mismatch between superblock bits and the + * version bit, alter the version bit in this case. + * + * the case where we lost links was handled above. + */ + do_warn("found version 2 inode %llu, ", lino); + if (!no_modify) { + do_warn("converting back to version 1\n"); + + dinoc->di_version = + XFS_DINODE_VERSION_1; + INT_SET(dinoc->di_onlink, ARCH_CONVERT, INT_GET(dinoc->di_nlink, ARCH_CONVERT)); + + *dirty = 1; + } else { + do_warn("would convert back to version 1\n"); + } + } + } + + /* + * ok, if it's still a version 2 inode, it's going + * to stay a version 2 inode. it should have a zero + * onlink field, so clear it. + */ + if (dinoc->di_version > XFS_DINODE_VERSION_1 && + INT_GET(dinoc->di_onlink, ARCH_CONVERT) > 0 && fs_inode_nlink > 0) { + if (!no_modify) { + do_warn( +"clearing obsolete nlink field in version 2 inode %llu, was %d, now 0\n", + lino, INT_GET(dinoc->di_onlink, ARCH_CONVERT)); + INT_ZERO(dinoc->di_onlink, ARCH_CONVERT); + *dirty = 1; + } else { + do_warn( +"would clear obsolete nlink field in version 2 inode %llu, currently %d\n", + lino, INT_GET(dinoc->di_onlink, ARCH_CONVERT)); + *dirty = 1; + } + } + + return(retval > 0 ? 1 : 0); +} + +/* + * returns 1 if inode is used, 0 if free. + * performs any necessary salvaging actions. + * note that we leave the generation count alone + * because nothing we could set it to would be + * guaranteed to be correct so the best guess for + * the correct value is just to leave it alone. + * + * The trick is detecting empty files. For those, + * the core and the forks should all be in the "empty" + * or zero-length state -- a zero or possibly minimum length + * (in the case of dirs) extent list -- although inline directories + * and symlinks might be handled differently. So it should be + * possible to sanity check them against each other. + * + * If the forks are an empty extent list though, then forget it. + * The file is toast anyway since we can't recover its storage. + * + * Parameters: + * Ins: + * mp -- mount structure + * dino -- pointer to on-disk inode structure + * agno/ino -- inode numbers + * free -- whether the map thinks the inode is free (1 == free) + * ino_discovery -- whether we should examine directory + * contents to discover new inodes + * check_dups -- whether we should check to see if the + * inode references duplicate blocks + * if so, we compare the inode's claimed + * blocks against the contents of the + * duplicate extent list but we don't + * set the bitmap. If not, we set the + * bitmap and try and detect multiply + * claimed blocks using the bitmap. + * Outs: + * dirty -- whether we changed the inode (1 == yes) + * cleared -- whether we cleared the inode (1 == yes). In + * no modify mode, if we would have cleared it + * used -- 1 if the inode is used, 0 if free. In no modify + * mode, whether the inode should be used or free + * isa_dir -- 1 if the inode is a directory, 0 if not. In + * no modify mode, if the inode would be a dir or not. + * + * Return value -- 0 if the inode is good, 1 if it is/was corrupt + */ + +int +process_dinode(xfs_mount_t *mp, + xfs_dinode_t *dino, + xfs_agnumber_t agno, + xfs_agino_t ino, + int was_free, + int *dirty, + int *cleared, + int *used, + int ino_discovery, + int check_dups, + int extra_attr_check, + int *isa_dir, + xfs_ino_t *parent) +{ + const int verify_mode = 0; + const int uncertain = 0; + +#ifdef XR_INODE_TRACE + fprintf(stderr, "processing inode %d/%d\n", agno, ino); +#endif + return(process_dinode_int(mp, dino, agno, ino, was_free, dirty, + cleared, used, verify_mode, uncertain, + ino_discovery, check_dups, extra_attr_check, + isa_dir, parent)); +} + +/* + * a more cursory check, check inode core, *DON'T* check forks + * this basically just verifies whether the inode is an inode + * and whether or not it has been totally trashed. returns 0 + * if the inode passes the cursory sanity check, 1 otherwise. + */ +int +verify_dinode(xfs_mount_t *mp, + xfs_dinode_t *dino, + xfs_agnumber_t agno, + xfs_agino_t ino) +{ + xfs_ino_t parent; + int cleared = 0; + int used = 0; + int dirty = 0; + int isa_dir = 0; + const int verify_mode = 1; + const int check_dups = 0; + const int ino_discovery = 0; + const int uncertain = 0; + + return(process_dinode_int(mp, dino, agno, ino, 0, &dirty, + &cleared, &used, verify_mode, + uncertain, ino_discovery, check_dups, + 0, &isa_dir, &parent)); +} + +/* + * like above only for inode on the uncertain list. it sets + * the uncertain flag which makes process_dinode_int quieter. + * returns 0 if the inode passes the cursory sanity check, 1 otherwise. + */ +int +verify_uncertain_dinode(xfs_mount_t *mp, + xfs_dinode_t *dino, + xfs_agnumber_t agno, + xfs_agino_t ino) +{ + xfs_ino_t parent; + int cleared = 0; + int used = 0; + int dirty = 0; + int isa_dir = 0; + const int verify_mode = 1; + const int check_dups = 0; + const int ino_discovery = 0; + const int uncertain = 1; + + return(process_dinode_int(mp, dino, agno, ino, 0, &dirty, + &cleared, &used, verify_mode, + uncertain, ino_discovery, check_dups, + 0, &isa_dir, &parent)); +} diff --git a/repair/dinode.h b/repair/dinode.h new file mode 100644 index 000000000..196068af2 --- /dev/null +++ b/repair/dinode.h @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef _XR_DINODE_H +#define _XR_DINODE_H + +struct blkmap; + +int +verify_agbno(xfs_mount_t *mp, + xfs_agnumber_t agno, + xfs_agblock_t agbno); + +int +verify_dfsbno(xfs_mount_t *mp, + xfs_dfsbno_t fsbno); + +void +convert_extent( + xfs_bmbt_rec_32_t *rp, + xfs_dfiloff_t *op, /* starting offset (blockno in file) */ + xfs_dfsbno_t *sp, /* starting block (fs blockno) */ + xfs_dfilblks_t *cp, /* blockcount */ + int *fp); /* extent flag */ + +int +process_bmbt_reclist(xfs_mount_t *mp, + xfs_bmbt_rec_32_t *rp, + int numrecs, + int type, + xfs_ino_t ino, + xfs_drfsbno_t *tot, + struct blkmap **blkmapp, + __uint64_t *first_key, + __uint64_t *last_key, + int whichfork); + +int +scan_bmbt_reclist( + xfs_mount_t *mp, + xfs_bmbt_rec_32_t *rp, + int numrecs, + int type, + xfs_ino_t ino, + xfs_drfsbno_t *tot, + int whichfork); + +int +verify_inode_chunk(xfs_mount_t *mp, + xfs_ino_t ino, + xfs_ino_t *start_ino); + +int verify_aginode_chunk(xfs_mount_t *mp, + xfs_agnumber_t agno, + xfs_agino_t agino, + xfs_agino_t *agino_start); + +int +clear_dinode(xfs_mount_t *mp, xfs_dinode_t *dino, xfs_ino_t ino_num); + +void +update_rootino(xfs_mount_t *mp); + +int +process_dinode(xfs_mount_t *mp, + xfs_dinode_t *dino, + xfs_agnumber_t agno, + xfs_agino_t ino, + int was_free, + int *dirty, + int *tossit, + int *used, + int check_dirs, + int check_dups, + int extra_attr_check, + int *isa_dir, + xfs_ino_t *parent); + +int +verify_dinode(xfs_mount_t *mp, + xfs_dinode_t *dino, + xfs_agnumber_t agno, + xfs_agino_t ino); + +int +verify_uncertain_dinode(xfs_mount_t *mp, + xfs_dinode_t *dino, + xfs_agnumber_t agno, + xfs_agino_t ino); + +int +verify_inum(xfs_mount_t *mp, + xfs_ino_t ino); + +int +verify_aginum(xfs_mount_t *mp, + xfs_agnumber_t agno, + xfs_agino_t agino); + +int +process_uncertain_aginodes(xfs_mount_t *mp, + xfs_agnumber_t agno); +void +process_aginodes(xfs_mount_t *mp, + xfs_agnumber_t agno, + int check_dirs, + int check_dups, + int extra_attr_check); + +void +check_uncertain_aginodes(xfs_mount_t *mp, + xfs_agnumber_t agno); + +xfs_buf_t * +get_agino_buf(xfs_mount_t *mp, + xfs_agnumber_t agno, + xfs_agino_t agino, + xfs_dinode_t **dipp); + +xfs_dfsbno_t +get_bmapi(xfs_mount_t *mp, + xfs_dinode_t *dip, + xfs_ino_t ino_num, + xfs_dfiloff_t bno, + int whichfork ); + +#endif /* _XR_DINODE_H */ diff --git a/repair/dir.c b/repair/dir.c new file mode 100644 index 000000000..4854b54e5 --- /dev/null +++ b/repair/dir.c @@ -0,0 +1,3033 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "avl.h" +#include "globals.h" +#include "agheader.h" +#include "incore.h" +#include "protos.h" +#include "err_protos.h" +#include "dinode.h" +#include "dir.h" +#include "bmap.h" + +#if XFS_DIR_LEAF_MAPSIZE >= XFS_ATTR_LEAF_MAPSIZE +#define XR_DA_LEAF_MAPSIZE XFS_DIR_LEAF_MAPSIZE +#else +#define XR_DA_LEAF_MAPSIZE XFS_ATTR_LEAF_MAPSIZE +#endif + + + +typedef struct da_hole_map { + int lost_holes; + int num_holes; + struct { + int base; + int size; + } hentries[XR_DA_LEAF_MAPSIZE]; +} da_hole_map_t; + +/* + * takes a name and length (name need not be null-terminated) + * and returns 1 if the name contains a '/' or a \0, returns 0 + * otherwise + */ +int +namecheck(char *name, int length) +{ + char *c; + int i; + + ASSERT(length < MAXNAMELEN); + + for (c = name, i = 0; i < length; i++, c++) { + if (*c == '/' || *c == '\0') + return(1); + } + + return(0); +} + +/* + * this routine performs inode discovery and tries to fix things + * in place. available redundancy -- inode data size should match + * used directory space in inode. returns number of valid directory + * entries. a non-zero return value means the directory is bogus + * and should be blasted. + */ +/* ARGSUSED */ +int +process_shortform_dir( + xfs_mount_t *mp, + xfs_ino_t ino, + xfs_dinode_t *dip, + int ino_discovery, + int *dino_dirty, /* out - 1 if dinode buffer dirty? */ + xfs_ino_t *parent, /* out - NULLFSINO if entry doesn't exist */ + char *dirname, /* directory pathname */ + int *repair) /* out - 1 if dir was fixed up */ +{ + xfs_dir_shortform_t *sf; + xfs_dir_sf_entry_t *sf_entry, *next_sfe, *tmp_sfe; + xfs_ino_t lino; + int max_size; + __int64_t ino_dir_size; + int num_entries; + int ino_off; + int namelen; + int i; + int junkit; + int tmp_len; + int tmp_elen; + int bad_sfnamelen; + ino_tree_node_t *irec_p; + char name[MAXNAMELEN + 1]; + +#ifdef XR_DIR_TRACE + fprintf(stderr, "process_shortform_dir - inode %llu\n", ino); +#endif + + sf = &dip->di_u.di_dirsf; + + max_size = XFS_DFORK_DSIZE_ARCH(dip, mp, ARCH_CONVERT); + num_entries = INT_GET(sf->hdr.count, ARCH_CONVERT); + ino_dir_size = INT_GET(dip->di_core.di_size, ARCH_CONVERT); + *repair = 0; + + ASSERT(ino_dir_size <= max_size); + + /* + * check for bad entry count + */ + if (num_entries * sizeof(xfs_dir_sf_entry_t) + sizeof(xfs_dir_sf_hdr_t) + > max_size || num_entries == 0) + num_entries = 0xFF; + + /* + * run through entries, stop at first bad entry, don't need + * to check for .. since that's encoded in its own field + */ + sf_entry = next_sfe = &sf->list[0]; + for (i = 0; i < num_entries && ino_dir_size > + (__psint_t)next_sfe - (__psint_t)sf; i++) { + tmp_sfe = NULL; + sf_entry = next_sfe; + junkit = 0; + bad_sfnamelen = 0; + XFS_DIR_SF_GET_DIRINO_ARCH(&sf_entry->inumber, &lino, ARCH_CONVERT); + + /* + * if entry points to self, junk it since only '.' or '..' + * should do that and shortform dirs don't contain either + * entry. if inode number is invalid, trash entry. + * if entry points to special inodes, trash it. + * if inode is unknown but number is valid, + * add it to the list of uncertain inodes. don't + * have to worry about an entry pointing to a + * deleted lost+found inode because the entry was + * deleted at the same time that the inode was cleared. + */ + if (lino == ino) { + junkit = 1; + } else if (verify_inum(mp, lino)) { + /* + * junk the entry, mark lino as NULL since it's bad + */ + do_warn("invalid inode number %llu in directory %llu\n", + lino, ino); + lino = NULLFSINO; + junkit = 1; + } else if (lino == mp->m_sb.sb_rbmino) { + do_warn( + "entry in shorform dir %llu references realtime bitmap inode %llu\n", + ino, lino); + junkit = 1; + } else if (lino == mp->m_sb.sb_rsumino) { + do_warn( + "entry in shorform dir %llu references realtime summary inode %llu\n", + ino, lino); + junkit = 1; + } else if (lino == mp->m_sb.sb_uquotino) { + do_warn( + "entry in shorform dir %llu references user quota inode %llu\n", + ino, lino); + junkit = 1; + } else if (lino == mp->m_sb.sb_pquotino) { + do_warn( + "entry in shorform dir %llu references proj quota inode %llu\n", + ino, lino); + junkit = 1; + } else if ((irec_p = find_inode_rec(XFS_INO_TO_AGNO(mp, lino), + XFS_INO_TO_AGINO(mp, lino))) != NULL) { + /* + * if inode is marked free and we're in inode + * discovery mode, leave the entry alone for now. + * if the inode turns out to be used, we'll figure + * that out when we scan it. If the inode really + * is free, we'll hit this code again in phase 4 + * after we've finished inode discovery and blow + * out the entry then. + */ + ino_off = XFS_INO_TO_AGINO(mp, lino) - + irec_p->ino_startnum; + ASSERT(is_inode_confirmed(irec_p, ino_off)); + + if (!ino_discovery && is_inode_free(irec_p, ino_off)) { + do_warn( + "entry references free inode %llu in shortform directory %llu\n", + lino, ino); + junkit = 1; + } + } else if (ino_discovery) { + /* + * put the inode on the uncertain list. we'll + * pull the inode off the list and check it later. + * if the inode turns out be bogus, we'll delete + * this entry in phase 6. + */ + add_inode_uncertain(mp, lino, 0); + } else { + /* + * blow the entry out. we know about all + * undiscovered entries now (past inode discovery + * phase) so this is clearly a bogus entry. + */ + do_warn( + "entry references non-existent inode %llu in shortform dir %llu\n", + lino, ino); + junkit = 1; + } + + namelen = sf_entry->namelen; + + if (namelen == 0) { + /* + * if we're really lucky, this is + * the last entry in which case we + * can use the dir size to set the + * namelen value. otherwise, forget + * it because we're not going to be + * able to find the next entry. + */ + bad_sfnamelen = 1; + + if (i == num_entries - 1) { + namelen = ino_dir_size - + ((__psint_t) &sf_entry->name[0] - + (__psint_t) sf); + if (!no_modify) { + do_warn( + "zero length entry in shortform dir %llu, resetting to %d\n", + ino, namelen); + sf_entry->namelen = namelen; + } else { + do_warn( + "zero length entry in shortform dir %llu, would set to %d\n", + ino, namelen); + } + } else { + do_warn( + "zero length entry in shortform dir %llu", + ino); + if (!no_modify) + do_warn(", junking %d entries\n", + num_entries - i); + else + do_warn(", would junk %d entries\n", + num_entries - i); + /* + * don't process the rest of the directory, + * break out of processing looop + */ + break; + } + } else if ((__psint_t) sf_entry - (__psint_t) sf + + + XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry) + > ino_dir_size) { + bad_sfnamelen = 1; + + if (i == num_entries - 1) { + namelen = ino_dir_size - + ((__psint_t) &sf_entry->name[0] - + (__psint_t) sf); + do_warn( + "size of last entry overflows space left in in shortform dir %llu, ", + ino); + if (!no_modify) { + do_warn("resetting to %d\n", + namelen); + sf_entry->namelen = namelen; + *dino_dirty = 1; + } else { + do_warn("would reset to %d\n", + namelen); + } + } else { + do_warn( + "size of entry #%d overflows space left in in shortform dir %llu\n", + i, ino); + if (!no_modify) { + if (i == num_entries - 1) + do_warn("junking entry #%d\n", + i); + else + do_warn( + "junking %d entries\n", + num_entries - i); + } else { + if (i == num_entries - 1) + do_warn( + "would junk entry #%d\n", + i); + else + do_warn( + "would junk %d entries\n", + num_entries - i); + } + + break; + } + } + + /* + * check for illegal chars in name. + * no need to check for bad length because + * the length value is stored in a byte + * so it can't be too big, it can only wrap + */ + if (namecheck((char *)&sf_entry->name[0], namelen)) { + /* + * junk entry + */ + do_warn( + "entry contains illegal character in shortform dir %llu\n", + ino); + junkit = 1; + } + + /* + * junk the entry by copying up the rest of the + * fork over the current entry and decrementing + * the entry count. if we're in no_modify mode, + * just issue the warning instead. then continue + * the loop with the next_sfe pointer set to the + * correct place in the fork and other counters + * properly set to reflect the deletion if it + * happened. + */ + if (junkit) { + bcopy(sf_entry->name, name, namelen); + name[namelen] = '\0'; + + if (!no_modify) { + tmp_elen = XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry); + INT_MOD(dip->di_core.di_size, ARCH_CONVERT, -(tmp_elen)); + ino_dir_size -= tmp_elen; + + tmp_sfe = (xfs_dir_sf_entry_t *) + ((__psint_t) sf_entry + tmp_elen); + tmp_len = max_size - ((__psint_t) tmp_sfe + - (__psint_t) sf); + + memmove(sf_entry, tmp_sfe, tmp_len); + + INT_MOD(sf->hdr.count, ARCH_CONVERT, -1); + num_entries--; + bzero((void *) ((__psint_t) sf_entry + tmp_len), + tmp_elen); + + /* + * reset the tmp value to the current + * pointer so we'll process the entry + * we just moved up + */ + tmp_sfe = sf_entry; + + /* + * WARNING: drop the index i by one + * so it matches the decremented count + * for accurate comparisons later + */ + i--; + + *dino_dirty = 1; + *repair = 1; + + do_warn( + "junking entry \"%s\" in directory inode %llu\n", + name, ino); + } else { + do_warn( + "would have junked entry \"%s\" in directory inode %llu\n", + name, ino); + } + } + + /* + * go onto next entry unless we've just junked an + * entry in which the current entry pointer points + * to an unprocessed entry. have to take into zero-len + * entries into account in no modify mode since we + * calculate size based on next_sfe. + */ + next_sfe = (tmp_sfe == NULL) + ? (xfs_dir_sf_entry_t *) ((__psint_t) sf_entry + + ((!bad_sfnamelen) + ? XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry) + : sizeof(xfs_dir_sf_entry_t) - 1 + + namelen)) + : tmp_sfe; + } + + /* sync up sizes and entry counts */ + + if (INT_GET(sf->hdr.count, ARCH_CONVERT) != i) { + if (no_modify) { +do_warn("would have corrected entry count in directory %llu from %d to %d\n", + ino, INT_GET(sf->hdr.count, ARCH_CONVERT), i); + } else { +do_warn("corrected entry count in directory %llu, was %d, now %d\n", + ino, INT_GET(sf->hdr.count, ARCH_CONVERT), i); + INT_SET(sf->hdr.count, ARCH_CONVERT, i); + *dino_dirty = 1; + *repair = 1; + } + } + + if ((__psint_t) next_sfe - (__psint_t) sf != ino_dir_size) { + if (no_modify) { + do_warn( + "would have corrected directory %llu size from %lld to %lld\n", + ino, (__int64_t) ino_dir_size, + (__int64_t)((__psint_t) next_sfe - (__psint_t) sf)); + } else { + do_warn( + "corrected directory %llu size, was %lld, now %lld\n", + ino, (__int64_t) ino_dir_size, + (__int64_t)((__psint_t) next_sfe - (__psint_t) sf)); + + INT_SET(dip->di_core.di_size, ARCH_CONVERT, (xfs_fsize_t) + ((__psint_t) next_sfe - (__psint_t) sf)); + *dino_dirty = 1; + *repair = 1; + } + } + /* + * check parent (..) entry + */ + XFS_DIR_SF_GET_DIRINO_ARCH(&sf->hdr.parent, parent, ARCH_CONVERT); + + /* + * if parent entry is bogus, null it out. we'll fix it later . + */ + if (verify_inum(mp, *parent)) { + *parent = NULLFSINO; + + do_warn( + "bogus .. inode number (%llu) in directory inode %llu,", + *parent, ino); + if (!no_modify) { + do_warn("clearing inode number\n"); + + XFS_DIR_SF_PUT_DIRINO_ARCH(parent, &sf->hdr.parent, ARCH_CONVERT); + *dino_dirty = 1; + *repair = 1; + } else { + do_warn("would clear inode number\n"); + } + } else if (ino == mp->m_sb.sb_rootino && ino != *parent) { + /* + * root directories must have .. == . + */ + if (!no_modify) { + do_warn( + "corrected root directory %llu .. entry, was %llu, now %llu\n", + ino, *parent, ino); + *parent = ino; + XFS_DIR_SF_PUT_DIRINO_ARCH(parent, &sf->hdr.parent, ARCH_CONVERT); + *dino_dirty = 1; + *repair = 1; + } else { + do_warn( + "would have corrected root directory %llu .. entry from %llu to %llu\n", + ino, *parent, ino); + } + } else if (ino == *parent && ino != mp->m_sb.sb_rootino) { + /* + * likewise, non-root directories can't have .. pointing + * to . + */ + *parent = NULLFSINO; + do_warn("bad .. entry in dir ino %llu, points to self,", + ino); + if (!no_modify) { + do_warn(" clearing inode number\n"); + + XFS_DIR_SF_PUT_DIRINO_ARCH(parent, &sf->hdr.parent, ARCH_CONVERT); + *dino_dirty = 1; + *repair = 1; + } else { + do_warn(" would clear inode number\n"); + } + } + + return(0); +} + +/* + * freespace map for directory leaf blocks (1 bit per byte) + * 1 == used, 0 == free + */ +static da_freemap_t dir_freemap[DA_BMAP_SIZE]; + +#if 0 +unsigned char * +alloc_da_freemap(xfs_mount_t *mp) +{ + unsigned char *freemap; + + if ((freemap = malloc(mp->m_sb.sb_blocksize)) == NULL) + return(NULL); + + bzero(freemap, mp->m_sb.sb_blocksize/NBBY); + + return(freemap); +} +#endif + +void +init_da_freemap(da_freemap_t *dir_freemap) +{ + bzero(dir_freemap, sizeof(da_freemap_t) * DA_BMAP_SIZE); +} + +/* + * sets directory freemap, returns 1 if there is a conflict + * returns 0 if everything's good. the range [start, stop) is set. + * right now, we just use the static array since only one directory + * block will be processed at once even though the interface allows + * you to pass in arbitrary da_freemap_t array's. + * + * Within a char, the lowest bit of the char represents the byte with + * the smallest address + */ +int +set_da_freemap(xfs_mount_t *mp, da_freemap_t *map, int start, int stop) +{ + const da_freemap_t mask = 0x1; + int i; + + if (start > stop) { + /* + * allow == relation since [x, x) claims 1 byte + */ + do_warn("bad range claimed [%d, %d) in da block\n", + start, stop); + return(1); + } + + if (stop > mp->m_sb.sb_blocksize) { + do_warn( + "byte range end [%d %d) in da block larger than blocksize %d\n", + start, stop, mp->m_sb.sb_blocksize); + return(1); + } + + for (i = start; i < stop; i ++) { + if (map[i / NBBY] & (mask << i % NBBY)) { + do_warn("multiply claimed byte %d in da block\n", i); + return(1); + } + map[i / NBBY] |= (mask << i % NBBY); + } + + return(0); +} + +/* + * returns 0 if holemap is consistent with reality (as expressed by + * the da_freemap_t). returns 1 if there's a conflict. + */ +int +verify_da_freemap(xfs_mount_t *mp, da_freemap_t *map, da_hole_map_t *holes, + xfs_ino_t ino, xfs_dablk_t da_bno) +{ + int i, j, start, len; + const da_freemap_t mask = 0x1; + + for (i = 0; i < XFS_DIR_LEAF_MAPSIZE; i++) { + if (holes->hentries[i].size == 0) + continue; + + start = holes->hentries[i].base; + len = holes->hentries[i].size; + + if (start >= mp->m_sb.sb_blocksize || + start + len > mp->m_sb.sb_blocksize) { + do_warn( + "hole (start %d, len %d) out of range, block %d, dir ino %llu\n", + start, len, da_bno, ino); + return(1); + } + + for (j = start; j < start + len; j++) { + if ((map[j / NBBY] & (mask << (j % NBBY))) != 0) { + /* + * bad news -- hole claims a used byte is free + */ + do_warn( + "hole claims used byte %d, block %d, dir ino %llu\n", + j, da_bno, ino); + return(1); + } + } + } + + return(0); +} + +void +process_da_freemap(xfs_mount_t *mp, da_freemap_t *map, da_hole_map_t *holes) +{ + int i, j, in_hole, start, length, smallest, num_holes; + const da_freemap_t mask = 0x1; + + num_holes = in_hole = start = length = 0; + + for (i = 0; i < mp->m_sb.sb_blocksize; i++) { + if ((map[i / NBBY] & (mask << (i % NBBY))) == 0) { + /* + * byte is free (unused) + */ + if (in_hole == 1) + continue; + /* + * start of a new hole + */ + in_hole = 1; + start = i; + } else { + /* + * byte is used + */ + if (in_hole == 0) + continue; + /* + * end of a hole + */ + in_hole = 0; + /* + * if the hole disappears, throw it away + */ + length = i - start; + + if (length <= 0) + continue; + + num_holes++; + + for (smallest = j = 0; j < XR_DA_LEAF_MAPSIZE; j++) { + if (holes->hentries[j].size < + holes->hentries[smallest].size) + smallest = j; + + } + if (length > holes->hentries[smallest].size) { + holes->hentries[smallest].base = start; + holes->hentries[smallest].size = length; + } + } + } + + /* + * see if we have a big hole at the end + */ + if (in_hole == 1) { + /* + * duplicate of hole placement code above + */ + length = i - start; + + if (length > 0) { + num_holes++; + + for (smallest = j = 0; j < XR_DA_LEAF_MAPSIZE; j++) { + if (holes->hentries[j].size < + holes->hentries[smallest].size) + smallest = j; + + } + if (length > holes->hentries[smallest].size) { + holes->hentries[smallest].base = start; + holes->hentries[smallest].size = length; + } + } + } + + holes->lost_holes = MAX(num_holes - XR_DA_LEAF_MAPSIZE, 0); + holes->num_holes = num_holes; + + return; +} + +/* + * returns 1 if the hole info doesn't match, 0 if it does + */ +/* ARGSUSED */ +int +compare_da_freemaps(xfs_mount_t *mp, da_hole_map_t *holemap, + da_hole_map_t *block_hmap, int entries, + xfs_ino_t ino, xfs_dablk_t da_bno) +{ + int i, k, res, found; + + res = 0; + + /* + * we chop holemap->lost_holes down to being two-valued + * value (1 or 0) for the test because the filesystem + * value is two-valued + */ + if ((holemap->lost_holes > 0 ? 1 : 0) != block_hmap->lost_holes) { + if (verbose) { + do_warn( + "- derived hole value %d, saw %d, block %d, dir ino %llu\n", + holemap->lost_holes, block_hmap->lost_holes, + da_bno, ino); + res = 1; + } else + return(1); + } + + for (i = 0; i < entries; i++) { + for (found = k = 0; k < entries; k++) { + if (holemap->hentries[i].base == + block_hmap->hentries[k].base + && holemap->hentries[i].size == + block_hmap->hentries[k].size) + found = 1; + } + if (!found) { + if (verbose) { + do_warn( +"- derived hole (base %d, size %d) in block %d, dir inode %llu not found\n", + holemap->hentries[i].base, + holemap->hentries[i].size, + da_bno, ino); + res = 1; + } else + return(1); + } + } + + return(res); +} + +#if 0 +void +test(xfs_mount_t *mp) +{ + int i = 0; + da_hole_map_t holemap; + + init_da_freemap(dir_freemap); + bzero(&holemap, sizeof(da_hole_map_t)); + + set_da_freemap(mp, dir_freemap, 0, 50); + set_da_freemap(mp, dir_freemap, 100, 126); + set_da_freemap(mp, dir_freemap, 126, 129); + set_da_freemap(mp, dir_freemap, 130, 131); + set_da_freemap(mp, dir_freemap, 150, 160); + process_da_freemap(mp, dir_freemap, &holemap); + + return; +} +#endif + + +/* + * walk tree from root to the left-most leaf block reading in + * blocks and setting up cursor. passes back file block number of the + * left-most leaf block if successful (bno). returns 1 if successful, + * 0 if unsuccessful. + */ +int +traverse_int_dablock(xfs_mount_t *mp, + da_bt_cursor_t *da_cursor, + xfs_dablk_t *rbno, + int whichfork) +{ + xfs_dablk_t bno; + int i; + xfs_da_intnode_t *node; + xfs_dfsbno_t fsbno; + xfs_buf_t *bp; + + /* + * traverse down left-side of tree until we hit the + * left-most leaf block setting up the btree cursor along + * the way. + */ + bno = 0; + i = -1; + node = NULL; + da_cursor->active = 0; + + do { + /* + * read in each block along the way and set up cursor + */ + fsbno = blkmap_get(da_cursor->blkmap, bno); + + if (fsbno == NULLDFSBNO) + goto error_out; + + bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno), + XFS_FSB_TO_BB(mp, 1), 0); + if (!bp) { + if (whichfork == XFS_DATA_FORK) + do_warn("can't read block %u (fsbno %llu) for " + "directory inode %llu\n", + bno, fsbno, da_cursor->ino); + else + do_warn("can't read block %u (fsbno %llu) for " + "attrbute fork of inode %llu\n", + bno, fsbno, da_cursor->ino); + goto error_out; + } + + node = (xfs_da_intnode_t *)XFS_BUF_PTR(bp); + + if (INT_GET(node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC) { + do_warn("bad dir/attr magic number in inode %llu, file " + "bno = %u, fsbno = %llu\n", da_cursor->ino, bno, fsbno); + libxfs_putbuf(bp); + goto error_out; + } + if (INT_GET(node->hdr.count, ARCH_CONVERT) > XFS_DA_NODE_ENTRIES(mp)) { + do_warn("bad record count in inode %llu, count = %d, max = %d\n", + da_cursor->ino, INT_GET(node->hdr.count, ARCH_CONVERT), + XFS_DA_NODE_ENTRIES(mp)); + libxfs_putbuf(bp); + goto error_out; + } + + /* + * maintain level counter + */ + if (i == -1) + i = da_cursor->active = INT_GET(node->hdr.level, ARCH_CONVERT); + else { + if (INT_GET(node->hdr.level, ARCH_CONVERT) == i - 1) { + i--; + } else { + if (whichfork == XFS_DATA_FORK) + do_warn("bad directory btree for directory " + "inode %llu\n", da_cursor->ino); + else + do_warn("bad attribute fork btree for " + "inode %llu\n", da_cursor->ino); + libxfs_putbuf(bp); + goto error_out; + } + } + + da_cursor->level[i].hashval = + INT_GET(node->btree[0].hashval, ARCH_CONVERT); + da_cursor->level[i].bp = bp; + da_cursor->level[i].bno = bno; + da_cursor->level[i].index = 0; +#ifdef XR_DIR_TRACE + da_cursor->level[i].n = XFS_BUF_TO_DA_INTNODE(bp); +#endif + + /* + * set up new bno for next level down + */ + bno = INT_GET(node->btree[0].before, ARCH_CONVERT); + } while(node != NULL && i > 1); + + /* + * now return block number and get out + */ + *rbno = da_cursor->level[0].bno = bno; + return(1); + +error_out: + while (i > 1 && i <= da_cursor->active) { + libxfs_putbuf(da_cursor->level[i].bp); + i++; + } + + return(0); +} + +/* + * blow out buffer for this level and all the rest above as well + * if error == 0, we are not expecting to encounter any unreleased + * buffers (e.g. if we do, it's a mistake). if error == 1, we're + * in an error-handling case so unreleased buffers may exist. + */ +void +release_da_cursor_int(xfs_mount_t *mp, + da_bt_cursor_t *cursor, + int prev_level, + int error) +{ + int level = prev_level + 1; + + if (cursor->level[level].bp != NULL) { + if (!error) { + do_warn("release_da_cursor_int got unexpected non-null bp, " + "dabno = %u\n", cursor->level[level].bno); + } + ASSERT(error != 0); + + libxfs_putbuf(cursor->level[level].bp); + cursor->level[level].bp = NULL; + } + + if (level < cursor->active) + release_da_cursor_int(mp, cursor, level, error); + + return; +} + +void +release_da_cursor(xfs_mount_t *mp, + da_bt_cursor_t *cursor, + int prev_level) +{ + release_da_cursor_int(mp, cursor, prev_level, 0); +} + +void +err_release_da_cursor(xfs_mount_t *mp, + da_bt_cursor_t *cursor, + int prev_level) +{ + release_da_cursor_int(mp, cursor, prev_level, 1); +} + +/* + * like traverse_int_dablock only it does far less checking + * and doesn't maintain the cursor. Just gets you to the + * leftmost block in the directory. returns the fsbno + * of that block if successful, NULLDFSBNO if not. + */ +xfs_dfsbno_t +get_first_dblock_fsbno(xfs_mount_t *mp, + xfs_ino_t ino, + xfs_dinode_t *dino) +{ + xfs_dablk_t bno; + int i; + xfs_da_intnode_t *node; + xfs_dfsbno_t fsbno; + xfs_buf_t *bp; + + /* + * traverse down left-side of tree until we hit the + * left-most leaf block setting up the btree cursor along + * the way. + */ + bno = 0; + i = -1; + node = NULL; + + fsbno = get_bmapi(mp, dino, ino, bno, XFS_DATA_FORK); + + if (fsbno == NULLDFSBNO) { + do_warn("bmap of block #%u of inode %llu failed\n", + bno, ino); + return(fsbno); + } + + if (INT_GET(dino->di_core.di_size, ARCH_CONVERT) <= XFS_LBSIZE(mp)) + return(fsbno); + + do { + /* + * walk down left side of btree, release buffers as you + * go. if the root block is a leaf (single-level btree), + * just return it. + * + */ + + bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno), + XFS_FSB_TO_BB(mp, 1), 0); + if (!bp) { + do_warn("can't read block %u (fsbno %llu) for directory " + "inode %llu\n", bno, fsbno, ino); + return(NULLDFSBNO); + } + + node = (xfs_da_intnode_t *)XFS_BUF_PTR(bp); + + if (INT_GET(node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC) { + do_warn("bad dir/attr magic number in inode %llu, file " + "bno = %u, fsbno = %llu\n", ino, bno, fsbno); + libxfs_putbuf(bp); + return(NULLDFSBNO); + } + + if (i == -1) + i = INT_GET(node->hdr.level, ARCH_CONVERT); + bno = INT_GET(node->btree[0].before, ARCH_CONVERT); + + libxfs_putbuf(bp); + + fsbno = get_bmapi(mp, dino, ino, bno, XFS_DATA_FORK); + + if (fsbno == NULLDFSBNO) { + do_warn("bmap of block #%u of inode %llu failed\n", bno, ino); + return(NULLDFSBNO); + } + + i--; + } while(i > 0); + + return(fsbno); +} + +/* + * make sure that all entries in all blocks along the right side of + * of the tree are used and hashval's are consistent. level is the + * level of the descendent block. returns 0 if good (even if it had + * to be fixed up), and 1 if bad. The right edge of the tree is + * technically a block boundary. this routine should be used then + * instead of verify_da_path(). + */ +int +verify_final_da_path(xfs_mount_t *mp, + da_bt_cursor_t *cursor, + const int p_level) +{ + xfs_da_intnode_t *node; + int bad = 0; + int entry; + int this_level = p_level + 1; + +#ifdef XR_DIR_TRACE + fprintf(stderr, "in verify_final_da_path, this_level = %d\n", + this_level); +#endif + /* + * the index should point to the next "unprocessed" entry + * in the block which should be the final (rightmost) entry + */ + entry = cursor->level[this_level].index; + node = (xfs_da_intnode_t *)XFS_BUF_PTR(cursor->level[this_level].bp); + /* + * check internal block consistency on this level -- ensure + * that all entries are used, encountered and expected hashvals + * match, etc. + */ + if (entry != INT_GET(node->hdr.count, ARCH_CONVERT) - 1) { + do_warn("directory/attribute block used/count inconsistency - %d/%hu\n", + entry, INT_GET(node->hdr.count, ARCH_CONVERT)); + bad++; + } + /* + * hash values monotonically increasing ??? + */ + if (cursor->level[this_level].hashval >= + INT_GET(node->btree[entry].hashval, ARCH_CONVERT)) { + do_warn("directory/attribute block hashvalue inconsistency, " + "expected > %u / saw %u\n", cursor->level[this_level].hashval, + INT_GET(node->btree[entry].hashval, ARCH_CONVERT)); + bad++; + } + if (INT_GET(node->hdr.info.forw, ARCH_CONVERT) != 0) { + do_warn("bad directory/attribute forward block pointer, expected 0, " + "saw %u\n", INT_GET(node->hdr.info.forw, ARCH_CONVERT)); + bad++; + } + if (bad) { + do_warn("bad directory block in dir ino %llu\n", cursor->ino); + return(1); + } + /* + * keep track of greatest block # -- that gets + * us the length of the directory + */ + if (cursor->level[this_level].bno > cursor->greatest_bno) + cursor->greatest_bno = cursor->level[this_level].bno; + + /* + * ok, now check descendant block number against this level + */ + if (cursor->level[p_level].bno != + INT_GET(node->btree[entry].before, ARCH_CONVERT)) { +#ifdef XR_DIR_TRACE + fprintf(stderr, "bad directory btree pointer, child bno should be %d, " + "block bno is %d, hashval is %u\n", + INT_GET(node->btree[entry].before, ARCH_CONVERT), + cursor->level[p_level].bno, + cursor->level[p_level].hashval); + fprintf(stderr, "verify_final_da_path returns 1 (bad) #1a\n"); +#endif + return(1); + } + + if (cursor->level[p_level].hashval != + INT_GET(node->btree[entry].hashval, ARCH_CONVERT)) { + if (!no_modify) { + do_warn("correcting bad hashval in non-leaf dir/attr block\n"); + do_warn("\tin (level %d) in inode %llu.\n", + this_level, cursor->ino); + INT_SET(node->btree[entry].hashval, ARCH_CONVERT, + cursor->level[p_level].hashval); + cursor->level[this_level].dirty++; + } else { + do_warn("would correct bad hashval in non-leaf dir/attr " + "block\n\tin (level %d) in inode %llu.\n", + this_level, cursor->ino); + } + } + + /* + * release/write buffer + */ + ASSERT(cursor->level[this_level].dirty == 0 || + cursor->level[this_level].dirty && !no_modify); + + if (cursor->level[this_level].dirty && !no_modify) + libxfs_writebuf(cursor->level[this_level].bp, 0); + else + libxfs_putbuf(cursor->level[this_level].bp); + + cursor->level[this_level].bp = NULL; + + /* + * bail out if this is the root block (top of tree) + */ + if (this_level >= cursor->active) { +#ifdef XR_DIR_TRACE + fprintf(stderr, "verify_final_da_path returns 0 (ok)\n"); +#endif + return(0); + } + /* + * set hashvalue to correctl reflect the now-validated + * last entry in this block and continue upwards validation + */ + cursor->level[this_level].hashval = + INT_GET(node->btree[entry].hashval, ARCH_CONVERT); + return(verify_final_da_path(mp, cursor, this_level)); +} + +/* + * Verifies the path from a descendant block up to the root. + * Should be called when the descendant level traversal hits + * a block boundary before crossing the boundary (reading in a new + * block). + * + * the directory/attr btrees work differently to the other fs btrees. + * each interior block contains records that are + * pairs. The bno is a file bno, not a filesystem bno. The last + * hashvalue in the block will be . BUT unlike + * the freespace btrees, the *last* value in each block gets + * propagated up the tree instead of the first value in each block. + * that is, the interior records point to child blocks and the *greatest* + * hash value contained by the child block is the one the block above + * uses as the key for the child block. + * + * level is the level of the descendent block. returns 0 if good, + * and 1 if bad. The descendant block may be a leaf block. + * + * the invariant here is that the values in the cursor for the + * levels beneath this level (this_level) and the cursor index + * for this level *must* be valid. + * + * that is, the hashval/bno info is accurate for all + * DESCENDANTS and match what the node[index] information + * for the current index in the cursor for this level. + * + * the index values in the cursor for the descendant level + * are allowed to be off by one as they will reflect the + * next entry at those levels to be processed. + * + * the hashvalue for the current level can't be set until + * we hit the last entry in the block so, it's garbage + * until set by this routine. + * + * bno and bp for the current block/level are always valid + * since they have to be set so we can get a buffer for the + * block. + */ +int +verify_da_path(xfs_mount_t *mp, + da_bt_cursor_t *cursor, + const int p_level) +{ + xfs_da_intnode_t *node; + xfs_da_intnode_t *newnode; + xfs_dfsbno_t fsbno; + xfs_dablk_t dabno; + xfs_buf_t *bp; + int bad; + int entry; + int this_level = p_level + 1; + + /* + * index is currently set to point to the entry that + * should be processed now in this level. + */ + entry = cursor->level[this_level].index; + node = (xfs_da_intnode_t *)XFS_BUF_PTR(cursor->level[this_level].bp); + + /* + * if this block is out of entries, validate this + * block and move on to the next block. + * and update cursor value for said level + */ + if (entry >= INT_GET(node->hdr.count, ARCH_CONVERT)) { + /* + * update the hash value for this level before + * validating it. bno value should be ok since + * it was set when the block was first read in. + */ + cursor->level[this_level].hashval = + INT_GET(node->btree[entry - 1].hashval, ARCH_CONVERT); + + /* + * keep track of greatest block # -- that gets + * us the length of the directory + */ + if (cursor->level[this_level].bno > cursor->greatest_bno) + cursor->greatest_bno = cursor->level[this_level].bno; + + /* + * validate the path for the current used-up block + * before we trash it + */ + if (verify_da_path(mp, cursor, this_level)) + return(1); + /* + * ok, now get the next buffer and check sibling pointers + */ + dabno = INT_GET(node->hdr.info.forw, ARCH_CONVERT); + ASSERT(dabno != 0); + fsbno = blkmap_get(cursor->blkmap, dabno); + + if (fsbno == NULLDFSBNO) { + do_warn("can't get map info for block %u of directory " + "inode %llu\n", dabno, cursor->ino); + return(1); + } + + bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno), + XFS_FSB_TO_BB(mp, 1), 0); + if (!bp) { + do_warn("can't read block %u (%llu) for directory inode %llu\n", + dabno, fsbno, cursor->ino); + return(1); + } + + newnode = (xfs_da_intnode_t *)XFS_BUF_PTR(bp); + /* + * verify magic number and back pointer, sanity-check + * entry count, verify level + */ + bad = 0; + if (INT_GET(newnode->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC) { + do_warn("bad magic number %x in block %u (%llu) for directory " + "inode %llu\n", + INT_GET(newnode->hdr.info.magic, ARCH_CONVERT), + dabno, fsbno, cursor->ino); + bad++; + } + if (INT_GET(newnode->hdr.info.back, ARCH_CONVERT) != + cursor->level[this_level].bno) { + do_warn("bad back pointer in block %u (%llu) for directory " + "inode %llu\n", dabno, fsbno, cursor->ino); + bad++; + } + if (INT_GET(newnode->hdr.count, ARCH_CONVERT) > + XFS_DA_NODE_ENTRIES(mp)) { + do_warn("entry count %d too large in block %u (%llu) for " + "directory inode %llu\n", + INT_GET(newnode->hdr.count, ARCH_CONVERT), + dabno, fsbno, cursor->ino); + bad++; + } + if (INT_GET(newnode->hdr.level, ARCH_CONVERT) != this_level) { + do_warn("bad level %d in block %u (%llu) for directory inode " + "%llu\n", INT_GET(newnode->hdr.level, ARCH_CONVERT), + dabno, fsbno, cursor->ino); + bad++; + } + if (bad) { +#ifdef XR_DIR_TRACE + fprintf(stderr, "verify_da_path returns 1 (bad) #4\n"); +#endif + libxfs_putbuf(bp); + return(1); + } + /* + * update cursor, write out the *current* level if + * required. don't write out the descendant level + */ + ASSERT(cursor->level[this_level].dirty == 0 || + cursor->level[this_level].dirty && !no_modify); + + if (cursor->level[this_level].dirty && !no_modify) + libxfs_writebuf(cursor->level[this_level].bp, 0); + else + libxfs_putbuf(cursor->level[this_level].bp); + cursor->level[this_level].bp = bp; + cursor->level[this_level].dirty = 0; + cursor->level[this_level].bno = dabno; + cursor->level[this_level].hashval = + INT_GET(newnode->btree[0].hashval, ARCH_CONVERT); +#ifdef XR_DIR_TRACE + cursor->level[this_level].n = newnode; +#endif + node = newnode; + + entry = cursor->level[this_level].index = 0; + } + /* + * ditto for block numbers + */ + if (cursor->level[p_level].bno != + INT_GET(node->btree[entry].before, ARCH_CONVERT)) { +#ifdef XR_DIR_TRACE + fprintf(stderr, "bad directory btree pointer, child bno should be %d, " + "block bno is %d, hashval is %u\n", + INT_GET(node->btree[entry].before, ARCH_CONVERT), + cursor->level[p_level].bno, + cursor->level[p_level].hashval); + fprintf(stderr, "verify_da_path returns 1 (bad) #1a\n"); +#endif + return(1); + } + /* + * ok, now validate last hashvalue in the descendant + * block against the hashval in the current entry + */ + if (cursor->level[p_level].hashval != + INT_GET(node->btree[entry].hashval, ARCH_CONVERT)) { + if (!no_modify) { + do_warn("correcting bad hashval in interior dir/attr block\n"); + do_warn("\tin (level %d) in inode %llu.\n", + this_level, cursor->ino); + INT_SET(node->btree[entry].hashval, ARCH_CONVERT, + cursor->level[p_level].hashval); + cursor->level[this_level].dirty++; + } else { + do_warn("would correct bad hashval in interior dir/attr " + "block\n\tin (level %d) in inode %llu.\n", + this_level, cursor->ino); + } + } + /* + * increment index for this level to point to next entry + * (which should point to the next descendant block) + */ + cursor->level[this_level].index++; +#ifdef XR_DIR_TRACE + fprintf(stderr, "verify_da_path returns 0 (ok)\n"); +#endif + return(0); +} + +#if 0 +/* + * handles junking directory leaf block entries that have zero lengths + * buf_dirty is an in/out, set to 1 if the leaf was modified. + * we do NOT initialize it to zero if nothing happened because it + * may be already set by the caller. Assumes that the block + * has been compacted before calling this routine. + */ +void +junk_zerolen_dir_leaf_entries( + xfs_mount_t *mp, + xfs_dir_leafblock_t *leaf, + xfs_ino_t ino, + int *buf_dirty) +{ + xfs_dir_leaf_entry_t *entry; + xfs_dir_leaf_name_t *namest; + xfs_dir_leaf_hdr_t *hdr; + xfs_dir_leaf_map_t *map; + xfs_ino_t tmp_ino; + int bytes; + int tmp_bytes; + int current_hole = 0; + int i; + int j; + int tmp; + int start; + int before; + int after; + int smallest; + int tablesize; + + entry = &leaf->entries[0]; + hdr = &leaf->hdr; + + /* + * we can convert the entries to one character entries + * as long as we have space. Once we run out, then + * we have to delete really delete (copy over) an entry. + * however, that frees up some space that we could use ... + * + * so the idea is, we'll use up space from all the holes, + * potentially leaving each hole too small to do any good. + * then if need to, we'll delete entries and use that space + * up from the top-most byte down. that may leave a 4th hole + * but we can represent that by correctly setting the value + * of firstused. that leaves any hole between the end of + * the entry list and firstused so it doesn't have to be + * recorded in the hole map. + */ + + for (bytes = i = 0; i < INT_GET(hdr->count, ARCH_CONVERT); entry++, i++) { + /* + * skip over entries that are good or already converted + */ + if (entry->namelen != 0) + continue; + + *buf_dirty = 1; +#if 0 + /* + * try and use up existing holes first until they get + * too small, then set bytes to the # of bytes between + * the current heap beginning and the last used byte + * in the entry table. + */ + if (bytes < sizeof(xfs_dir_leaf_name_t) && + current_hole < XFS_DIR_LEAF_MAPSIZE) { + /* + * skip over holes that are too small + */ + while (current_hole < XFS_DIR_LEAF_MAPSIZE && + INT_GET(hdr->freemap[current_hole].size, ARCH_CONVERT) < + sizeof(xfs_dir_leaf_name_t)) { + current_hole++; + } + + if (current_hole < XFS_DIR_LEAF_MAPSIZE) + bytes = INT_GET(hdr->freemap[current_hole].size, ARCH_CONVERT); + else + bytes = (int) INT_GET(hdr->firstused, ARCH_CONVERT) - + ((__psint_t) &leaf->entries[INT_GET(hdr->count, ARCH_CONVERT)] - + (__psint_t) leaf); + } +#endif + current_hole = 0; + + for (map = &hdr->freemap[0]; + current_hole < XFS_DIR_LEAF_MAPSIZE && + INT_GET(map->size, ARCH_CONVERT) < sizeof(xfs_dir_leaf_name_t); + map++) { + current_hole++; + } + + /* + * if we can use an existing hole, do it. otherwise, + * delete entries until the deletions create a big enough + * hole to convert another entry. then use up those bytes + * bytes until you run low. then delete entries again ... + */ + if (current_hole < XFS_DIR_LEAF_MAPSIZE) { + ASSERT(sizeof(xfs_dir_leaf_name_t) <= bytes); + + do_warn("marking bad entry in directory inode %llu\n", + ino); + + entry->namelen = 1; + INT_SET(entry->nameidx, ARCH_CONVERT, INT_GET(hdr->freemap[current_hole].base, ARCH_CONVERT) + + bytes - sizeof(xfs_dir_leaf_name_t)); + + namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT)); + tmp_ino = NULLFSINO; + XFS_DIR_SF_PUT_DIRINO_ARCH(&tmp_ino, &namest->inumber, ARCH_CONVERT); + namest->name[0] = '/'; + + if (INT_GET(entry->nameidx, ARCH_CONVERT) < INT_GET(hdr->firstused, ARCH_CONVERT)) + INT_SET(hdr->firstused, ARCH_CONVERT, INT_GET(entry->nameidx, ARCH_CONVERT)); + INT_MOD(hdr->freemap[current_hole].size, ARCH_CONVERT, -(sizeof(xfs_dir_leaf_name_t))); + INT_MOD(hdr->namebytes, ARCH_CONVERT, +1); + } else { + /* + * delete the table entry and try and account for the + * space in the holemap. don't have to update namebytes + * or firstused since we're not actually deleting any + * bytes from the heap. following code swiped from + * xfs_dir_leaf_remove() in xfs_dir_leaf.c + */ + INT_MOD(hdr->count, ARCH_CONVERT, -1); + do_warn( + "deleting zero length entry in directory inode %llu\n", + ino); + /* + * overwrite the bad entry unless it's the + * last entry in the list (highly unlikely). + * zero out the free'd bytes. + */ + if (INT_GET(hdr->count, ARCH_CONVERT) - i > 0) { + memmove(entry, entry + 1, (INT_GET(hdr->count, ARCH_CONVERT) - i) * + sizeof(xfs_dir_leaf_entry_t)); + } + bzero((void *) ((__psint_t) entry + + (INT_GET(leaf->hdr.count, ARCH_CONVERT) - i - 1) * + sizeof(xfs_dir_leaf_entry_t)), + sizeof(xfs_dir_leaf_entry_t)); + + start = (__psint_t) &leaf->entries[INT_GET(hdr->count, ARCH_CONVERT)] - + (__psint_t) &leaf; + tablesize = sizeof(xfs_dir_leaf_entry_t) * + (INT_GET(hdr->count, ARCH_CONVERT) + 1) + sizeof(xfs_dir_leaf_hdr_t); + map = &hdr->freemap[0]; + tmp = INT_GET(map->size, ARCH_CONVERT); + before = after = -1; + smallest = XFS_DIR_LEAF_MAPSIZE - 1; + for (j = 0; j < XFS_DIR_LEAF_MAPSIZE; map++, j++) { + ASSERT(INT_GET(map->base, ARCH_CONVERT) < XFS_LBSIZE(mp)); + ASSERT(INT_GET(map->size, ARCH_CONVERT) < XFS_LBSIZE(mp)); + if (INT_GET(map->base, ARCH_CONVERT) == tablesize) { + INT_MOD(map->base, ARCH_CONVERT, -(sizeof(xfs_dir_leaf_entry_t))); + INT_MOD(map->size, ARCH_CONVERT, sizeof(xfs_dir_leaf_entry_t)); + } + + if ((INT_GET(map->base, ARCH_CONVERT) + INT_GET(map->size, ARCH_CONVERT)) == start) { + before = j; + } else if (INT_GET(map->base, ARCH_CONVERT) == start + + sizeof(xfs_dir_leaf_entry_t)) { + after = j; + } else if (INT_GET(map->size, ARCH_CONVERT) < tmp) { + tmp = INT_GET(map->size, ARCH_CONVERT); + smallest = j; + } + } + + /* + * Coalesce adjacent freemap regions, + * or replace the smallest region. + */ + if ((before >= 0) || (after >= 0)) { + if ((before >= 0) && (after >= 0)) { + map = &hdr->freemap[before]; + INT_MOD(map->size, ARCH_CONVERT, sizeof(xfs_dir_leaf_entry_t)); + INT_MOD(map->size, ARCH_CONVERT, INT_GET(hdr->freemap[after].size, ARCH_CONVERT)); + INT_ZERO(hdr->freemap[after].base, ARCH_CONVERT); + INT_ZERO(hdr->freemap[after].size, ARCH_CONVERT); + } else if (before >= 0) { + map = &hdr->freemap[before]; + INT_MOD(map->size, ARCH_CONVERT, sizeof(xfs_dir_leaf_entry_t)); + } else { + map = &hdr->freemap[after]; + INT_SET(map->base, ARCH_CONVERT, start); + INT_MOD(map->size, ARCH_CONVERT, sizeof(xfs_dir_leaf_entry_t)); + } + } else { + /* + * Replace smallest region + * (if it is smaller than free'd entry) + */ + map = &hdr->freemap[smallest]; + if (INT_GET(map->size, ARCH_CONVERT) < sizeof(xfs_dir_leaf_entry_t)) { + INT_SET(map->base, ARCH_CONVERT, start); + INT_SET(map->size, ARCH_CONVERT, sizeof(xfs_dir_leaf_entry_t)); + } + /* + * mark as needing compaction + */ + hdr->holes = 1; + } +#if 0 + /* + * do we have to delete stuff or is there + * room for deletions? + */ + ASSERT(current_hole == XFS_DIR_LEAF_MAPSIZE); + + /* + * here, bytes == number of unused bytes from + * end of list to top (beginning) of heap + * (firstused). It's ok to leave extra + * unused bytes in that region because they + * wind up before firstused (which we reset + * appropriately + */ + if (bytes < sizeof(xfs_dir_leaf_name_t)) { + /* + * have to delete an entry because + * we have no room to convert it to + * a bad entry + */ + do_warn( + "deleting entry in directory inode %llu\n", + ino); + /* + * overwrite the bad entry unless it's the + * last entry in the list (highly unlikely). + */ + if (INT_GET(leaf->hdr.count, ARCH_CONVERT) - i - 1> 0) { + memmove(entry, entry + 1, + (INT_GET(leaf->hdr.count, ARCH_CONVERT) - i - 1) * + sizeof(xfs_dir_leaf_entry_t)); + } + bzero((void *) ((__psint_t) entry + + (INT_GET(leaf->hdr.count, ARCH_CONVERT) - i - 1) * + sizeof(xfs_dir_leaf_entry_t)), + sizeof(xfs_dir_leaf_entry_t)); + + /* + * bump up free byte count, drop other + * index vars since the table just + * shrank by one entry and we don't + * want to miss any as we walk the table + */ + bytes += sizeof(xfs_dir_leaf_entry_t); + INT_MOD(leaf->hdr.count, ARCH_CONVERT, -1); + entry--; + i--; + } else { + /* + * convert entry using the bytes in between + * the end of the entry table and the heap + */ + entry->namelen = 1; + INT_MOD(leaf->hdr.firstused, ARCH_CONVERT, -(sizeof(xfs_dir_leaf_name_t))); + INT_SET(entry->nameidx, ARCH_CONVERT, INT_GET(leaf->hdr.firstused, ARCH_CONVERT)); + + namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, + INT_GET(entry->nameidx, ARCH_CONVERT)); + tmp_ino = NULLFSINO; + XFS_DIR_SF_PUT_DIRINO_ARCH(&tmp_ino, + &namest->inumber, ARCH_CONVERT); + namest->name[0] = '/'; + + bytes -= sizeof(xfs_dir_leaf_entry_t); + } +#endif + } + } + + return; +} +#endif + +static char dirbuf[64 * 1024]; + +/* + * called by both node dir and leaf dir processing routines + * validates all contents *but* the sibling pointers (forw/back) + * and the magic number. + * + * returns 0 if the directory is ok or has been brought to the + * stage that it can be fixed up later (in phase 6), + * 1 if it has to be junked. + * + * Right now we fix a lot of things (TBD == to be deleted). + * + * incorrect . entries - inode # is corrected + * entries with mismatched hashvalue/name strings - hashvalue reset + * entries whose hashvalues are out-of-order - entry marked TBD + * .. entries with invalid inode numbers - entry marked TBD + * entries with invalid inode numbers - entry marked TBD + * multiple . entries - all but the first entry are marked TBD + * zero-length entries - entry is deleted + * entries with an out-of-bounds name index ptr - entry is deleted + * + * entries marked TBD have the first character of the name (which + * lives in the heap) have the first character in the name set + * to '/' -- an illegal value. + * + * entries deleted right here are deleted by blowing away the entry + * (but leaving the heap untouched). any space that was used + * by the deleted entry will be reclaimed by the block freespace + * (da_freemap) processing code. + * + * if two entries claim the same space in the heap (say, due to + * bad entry name index pointers), we lose the directory. We could + * try harder to fix this but it'll do for now. + */ +/* ARGSUSED */ +int +process_leaf_dir_block( + xfs_mount_t *mp, + xfs_dir_leafblock_t *leaf, + xfs_dablk_t da_bno, + xfs_ino_t ino, + xfs_dahash_t last_hashval, /* last hashval encountered */ + int ino_discovery, + blkmap_t *blkmap, + int *dot, + int *dotdot, + xfs_ino_t *parent, + int *buf_dirty, /* is buffer dirty? */ + xfs_dahash_t *next_hashval) /* greatest hashval in block */ +{ + xfs_ino_t lino; + xfs_dir_leaf_entry_t *entry; + xfs_dir_leaf_entry_t *s_entry; + xfs_dir_leaf_entry_t *d_entry; + xfs_dir_leafblock_t *new_leaf; + char *first_byte; + xfs_dir_leaf_name_t *namest; + ino_tree_node_t *irec_p; + int num_entries; + xfs_dahash_t hashval; + int i; + int nm_illegal; + int bytes; + int start; + int stop; + int res = 0; + int ino_off; + int first_used; + int bytes_used; + int reset_holes; + int zero_len_entries; + char fname[MAXNAMELEN + 1]; + da_hole_map_t holemap; + da_hole_map_t bholemap; +#if 0 + unsigned char *dir_freemap; +#endif + +#ifdef XR_DIR_TRACE + fprintf(stderr, "\tprocess_leaf_dir_block - ino %llu\n", ino); +#endif + + /* + * clear static dir block freespace bitmap + */ + init_da_freemap(dir_freemap); + +#if 0 + /* + * XXX - alternatively, do this for parallel usage. + * set up block freespace map. head part of dir leaf block + * including all entries are packed so we can use sizeof + * and not worry about alignment. + */ + + if ((dir_freemap = alloc_da_freemap(mp)) == NULL) { + do_error("couldn't allocate directory block freemap\n"); + abort(); + } +#endif + + *buf_dirty = 0; + first_used = mp->m_sb.sb_blocksize; + zero_len_entries = 0; + bytes_used = 0; + + i = stop = sizeof(xfs_dir_leaf_hdr_t); + if (set_da_freemap(mp, dir_freemap, 0, stop)) { + do_warn( +"directory block header conflicts with used space in directory inode %llu\n", + ino); + return(1); + } + + /* + * verify structure: monotonically increasing hash value for + * all leaf entries, indexes for all entries must be within + * this fs block (trivially true for 64K blocks). also track + * used space so we can check the freespace map. check for + * zero-length entries. for now, if anything's wrong, we + * junk the directory and we'll pick up no-longer referenced + * inodes on a later pass. + */ + for (i = 0, entry = &leaf->entries[0]; + i < INT_GET(leaf->hdr.count, ARCH_CONVERT); + i++, entry++) { + /* + * check that the name index isn't out of bounds + * if it is, delete the entry since we can't + * grab the inode #. + */ + if (INT_GET(entry->nameidx, ARCH_CONVERT) >= mp->m_sb.sb_blocksize) { + if (!no_modify) { + *buf_dirty = 1; + + if (INT_GET(leaf->hdr.count, ARCH_CONVERT) > 1) { + do_warn( +"nameidx %d for entry #%d, bno %d, ino %llu > fs blocksize, deleting entry\n", + INT_GET(entry->nameidx, ARCH_CONVERT), i, da_bno, ino); + ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT) > i); + + bytes = (INT_GET(leaf->hdr.count, ARCH_CONVERT) - i) * + sizeof(xfs_dir_leaf_entry_t); + + /* + * compress table unless we're + * only dealing with 1 entry + * (the last one) in which case + * just zero it. + */ + if (bytes > + sizeof(xfs_dir_leaf_entry_t)) { + memmove(entry, entry + 1, + bytes); + bzero((void *) + ((__psint_t) entry + bytes), + sizeof(xfs_dir_leaf_entry_t)); + } else { + bzero(entry, + sizeof(xfs_dir_leaf_entry_t)); + } + + /* + * sync vars to match smaller table. + * don't have to worry about freespace + * map since we haven't set it for + * this entry yet. + */ + INT_MOD(leaf->hdr.count, ARCH_CONVERT, -1); + i--; + entry--; + } else { + do_warn( +"nameidx %d, entry #%d, bno %d, ino %llu > fs blocksize, marking entry bad\n", + INT_GET(entry->nameidx, ARCH_CONVERT), i, da_bno, ino); + INT_SET(entry->nameidx, ARCH_CONVERT, mp->m_sb.sb_blocksize - + sizeof(xfs_dir_leaf_name_t)); + namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, + INT_GET(entry->nameidx, ARCH_CONVERT)); + lino = NULLFSINO; + XFS_DIR_SF_PUT_DIRINO_ARCH(&lino, + &namest->inumber, ARCH_CONVERT); + namest->name[0] = '/'; + } + } else { + do_warn( +"nameidx %d, entry #%d, bno %d, ino %llu > fs blocksize, would delete entry\n", + INT_GET(entry->nameidx, ARCH_CONVERT), i, da_bno, ino); + } + continue; + } + /* + * inode processing -- make sure the inode + * is in our tree or we add it to the uncertain + * list if the inode # is valid. if namelen is 0, + * we can still try for the inode as long as nameidx + * is ok. + */ + namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT)); + XFS_DIR_SF_GET_DIRINO_ARCH(&namest->inumber, &lino, ARCH_CONVERT); + + /* + * we may have to blow out an entry because of bad + * inode numbers. do NOT touch the name until after + * we've computed the hashvalue and done a namecheck() + * on the name. + */ + if (!ino_discovery && lino == NULLFSINO) { + /* + * don't do a damned thing. We already + * found this (or did it ourselves) during + * phase 3. + */ + } else if (verify_inum(mp, lino)) { + /* + * bad inode number. clear the inode + * number and the entry will get removed + * later. We don't trash the directory + * since it's still structurally intact. + */ + do_warn( +"invalid ino number %llu in dir ino %llu, entry #%d, bno %d\n", + lino, ino, i, da_bno); + if (!no_modify) { + do_warn( + "\tclearing ino number in entry %d...\n", i); + + lino = NULLFSINO; + XFS_DIR_SF_PUT_DIRINO_ARCH(&lino, &namest->inumber, ARCH_CONVERT); + *buf_dirty = 1; + } else { + do_warn( + "\twould clear ino number in entry %d...\n", i); + } + } else if (lino == mp->m_sb.sb_rbmino) { + do_warn( +"entry #%d, bno %d in directory %llu references realtime bitmap inode %llu\n", + i, da_bno, ino, lino); + if (!no_modify) { + do_warn( + "\tclearing ino number in entry %d...\n", i); + + lino = NULLFSINO; + XFS_DIR_SF_PUT_DIRINO_ARCH(&lino, &namest->inumber, ARCH_CONVERT); + *buf_dirty = 1; + } else { + do_warn( + "\twould clear ino number in entry %d...\n", i); + } + } else if (lino == mp->m_sb.sb_rsumino) { + do_warn( +"entry #%d, bno %d in directory %llu references realtime summary inode %llu\n", + i, da_bno, ino, lino); + if (!no_modify) { + do_warn( + "\tclearing ino number in entry %d...\n", i); + + lino = NULLFSINO; + XFS_DIR_SF_PUT_DIRINO_ARCH(&lino, &namest->inumber, ARCH_CONVERT); + *buf_dirty = 1; + } else { + do_warn( + "\twould clear ino number in entry %d...\n", i); + } + } else if (lino == mp->m_sb.sb_uquotino) { + do_warn( +"entry #%d, bno %d in directory %llu references user quota inode %llu\n", + i, da_bno, ino, lino); + if (!no_modify) { + do_warn( + "\tclearing ino number in entry %d...\n", i); + + lino = NULLFSINO; + XFS_DIR_SF_PUT_DIRINO_ARCH(&lino, &namest->inumber, ARCH_CONVERT); + *buf_dirty = 1; + } else { + do_warn( + "\twould clear ino number in entry %d...\n", i); + } + } else if (lino == mp->m_sb.sb_pquotino) { + do_warn( +"entry #%d, bno %d in directory %llu references proj quota inode %llu\n", + i, da_bno, ino, lino); + if (!no_modify) { + do_warn( + "\tclearing ino number in entry %d...\n", i); + + lino = NULLFSINO; + XFS_DIR_SF_PUT_DIRINO_ARCH(&lino, &namest->inumber, ARCH_CONVERT); + *buf_dirty = 1; + } else { + do_warn( + "\twould clear ino number in entry %d...\n", i); + } + } else if (lino == old_orphanage_ino) { + /* + * do nothing, silently ignore it, entry has + * already been marked TBD since old_orphanage_ino + * is set non-zero. + */ + } else if ((irec_p = find_inode_rec( + XFS_INO_TO_AGNO(mp, lino), + XFS_INO_TO_AGINO(mp, lino))) != NULL) { + /* + * inode recs should have only confirmed + * inodes in them + */ + ino_off = XFS_INO_TO_AGINO(mp, lino) - + irec_p->ino_startnum; + ASSERT(is_inode_confirmed(irec_p, ino_off)); + /* + * if inode is marked free and we're in inode + * discovery mode, leave the entry alone for now. + * if the inode turns out to be used, we'll figure + * that out when we scan it. If the inode really + * is free, we'll hit this code again in phase 4 + * after we've finished inode discovery and blow + * out the entry then. + */ + if (!ino_discovery && is_inode_free(irec_p, ino_off)) { + if (!no_modify) { + do_warn( +"entry references free inode %llu in directory %llu, will clear entry\n", + lino, ino); + lino = NULLFSINO; + XFS_DIR_SF_PUT_DIRINO_ARCH(&lino, + &namest->inumber, ARCH_CONVERT); + *buf_dirty = 1; + } else { + do_warn( +"entry references free inode %llu in directory %llu, would clear entry\n", + lino, ino); + } + } + } else if (ino_discovery) { + add_inode_uncertain(mp, lino, 0); + } else { + do_warn( + "bad ino number %llu in dir ino %llu, entry #%d, bno %d\n", + lino, ino, i, da_bno); + if (!no_modify) { + do_warn("clearing inode number...\n"); + lino = NULLFSINO; + XFS_DIR_SF_PUT_DIRINO_ARCH(&lino, &namest->inumber, ARCH_CONVERT); + *buf_dirty = 1; + } else { + do_warn("would clear inode number...\n"); + } + } + /* + * if we have a zero-length entry, trash it. + * we may lose the inode (chunk) if we don't + * finish the repair successfully and the inode + * isn't mentioned anywhere else (like in the inode + * tree) but the alternative is to risk losing the + * entire directory by trying to use the next byte + * to turn the entry into a 1-char entry. That's + * probably a safe bet but if it didn't work, we'd + * lose the entire directory the way we currently do + * things. (Maybe we should change that later :-). + */ + if (entry->namelen == 0) { + *buf_dirty = 1; + + if (INT_GET(leaf->hdr.count, ARCH_CONVERT) > 1) { + do_warn( + "entry #%d, dir inode %llu, has zero-len name, deleting entry\n", + i, ino); + ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT) > i); + + bytes = (INT_GET(leaf->hdr.count, ARCH_CONVERT) - i) * + sizeof(xfs_dir_leaf_entry_t); + + /* + * compress table unless we're + * only dealing with 1 entry + * (the last one) in which case + * just zero it. + */ + if (bytes > sizeof(xfs_dir_leaf_entry_t)) { + memmove(entry, entry + 1, + bytes); + bzero((void *) + ((__psint_t) entry + bytes), + sizeof(xfs_dir_leaf_entry_t)); + } else { + bzero(entry, + sizeof(xfs_dir_leaf_entry_t)); + } + + /* + * sync vars to match smaller table. + * don't have to worry about freespace + * map since we haven't set it for + * this entry yet. + */ + INT_MOD(leaf->hdr.count, ARCH_CONVERT, -1); + i--; + entry--; + } else { + /* + * if it's the only entry, preserve the + * inode number for now + */ + do_warn( + "entry #%d, dir inode %llu, has zero-len name, marking entry bad\n", + i, ino); + INT_SET(entry->nameidx, ARCH_CONVERT, mp->m_sb.sb_blocksize - + sizeof(xfs_dir_leaf_name_t)); + namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, + INT_GET(entry->nameidx, ARCH_CONVERT)); + XFS_DIR_SF_PUT_DIRINO_ARCH(&lino, &namest->inumber, ARCH_CONVERT); + namest->name[0] = '/'; + } + } else if (INT_GET(entry->nameidx, ARCH_CONVERT) + entry->namelen > XFS_LBSIZE(mp)) { + do_warn( +"bad size, entry #%d in dir inode %llu, block %u -- entry overflows block\n", + i, ino, da_bno); + + return(1); + } + + start = (__psint_t)&leaf->entries[i] - (__psint_t)leaf;; + stop = start + sizeof(xfs_dir_leaf_entry_t); + + if (set_da_freemap(mp, dir_freemap, start, stop)) { + do_warn( +"dir entry slot %d in block %u conflicts with used space in dir inode %llu\n", + i, da_bno, ino); + return(1); + } + + /* + * check if the name is legal. if so, then + * check that the name and hashvalues match. + * + * if the name is illegal, we don't check the + * hashvalue computed from it. we just make + * sure that the hashvalue in the entry is + * monotonically increasing wrt to the previous + * entry. + * + * Note that we do NOT have to check the length + * because the length is stored in a one-byte + * unsigned int which max's out at MAXNAMELEN + * making it impossible for the stored length + * value to be out of range. + */ + bcopy(namest->name, fname, entry->namelen); + fname[entry->namelen] = '\0'; + hashval = libxfs_da_hashname(fname, entry->namelen); + + /* + * only complain about illegal names in phase 3 (when + * inode discovery is turned on). Otherwise, we'd complain + * a lot during phase 4. If the name is illegal, leave + * the hash value in that entry alone. + */ + nm_illegal = namecheck(fname, entry->namelen); + + if (ino_discovery && nm_illegal) { + /* + * junk the entry, illegal name + */ + if (!no_modify) { + do_warn( + "illegal name \"%s\" in directory inode %llu, entry will be cleared\n", + fname, ino); + namest->name[0] = '/'; + *buf_dirty = 1; + } else { + do_warn( + "illegal name \"%s\" in directory inode %llu, entry would be cleared\n", + fname, ino); + } + } else if (!nm_illegal && INT_GET(entry->hashval, ARCH_CONVERT) != hashval) { + /* + * try resetting the hashvalue to the correct + * value for the string, if the string has been + * corrupted, too, that will get picked up next + */ + do_warn("\tmismatched hash value for entry \"%s\"\n", + fname); + if (!no_modify) { + do_warn( + "\t\tin directory inode %llu. resetting hash value.\n", + ino); + INT_SET(entry->hashval, ARCH_CONVERT, hashval); + *buf_dirty = 1; + } else { + do_warn( + "\t\tin directory inode %llu. would reset hash value.\n", + ino); + } + } + + /* + * now we can mark entries with NULLFSINO's bad + */ + if (!no_modify && lino == NULLFSINO) { + namest->name[0] = '/'; + *buf_dirty = 1; + } + + /* + * regardless of whether the entry has or hasn't been + * marked for deletion, the hash value ordering must + * be maintained. + */ + if (INT_GET(entry->hashval, ARCH_CONVERT) < last_hashval) { + /* + * blow out the entry -- set hashval to sane value + * and set the first character in the string to + * the illegal value '/'. Reset the hash value + * to the last hashvalue so that verify_da_path + * will fix up the interior pointers correctly. + * the entry will be deleted later (by routines + * that need only the entry #). We keep the + * inode number in the entry so we can attach + * the inode to the orphanage later. + */ + do_warn("\tbad hash ordering for entry \"%s\"\n", + fname); + if (!no_modify) { + do_warn( + "\t\tin directory inode %llu. will clear entry\n", + ino); + INT_SET(entry->hashval, ARCH_CONVERT, last_hashval); + namest->name[0] = '/'; + *buf_dirty = 1; + } else { + do_warn( + "\t\tin directory inode %llu. would clear entry\n", + ino); + } + } + + *next_hashval = last_hashval = INT_GET(entry->hashval, ARCH_CONVERT); + + /* + * if heap data conflicts with something, + * blow it out and skip the rest of the loop + */ + if (set_da_freemap(mp, dir_freemap, INT_GET(entry->nameidx, ARCH_CONVERT), + INT_GET(entry->nameidx, ARCH_CONVERT) + sizeof(xfs_dir_leaf_name_t) + + entry->namelen - 1)) { + do_warn( +"name \"%s\" (block %u, slot %d) conflicts with used space in dir inode %llu\n", + fname, da_bno, i, ino); + if (!no_modify) { + entry->namelen = 0; + *buf_dirty = 1; + + do_warn( + "will clear entry \"%s\" (#%d) in directory inode %llu\n", + fname, i, ino); + } else { + do_warn( + "would clear entry \"%s\" (#%d)in directory inode %llu\n", + fname, i, ino); + } + continue; + } + + /* + * keep track of heap stats (first byte used, total bytes used) + */ + if (INT_GET(entry->nameidx, ARCH_CONVERT) < first_used) + first_used = INT_GET(entry->nameidx, ARCH_CONVERT); + bytes_used += entry->namelen; + + /* + * special . or .. entry processing + */ + if (entry->namelen == 2 && namest->name[0] == '.' && + namest->name[1] == '.') { + /* + * the '..' case + */ + if (!*dotdot) { + (*dotdot)++; + *parent = lino; +#ifdef XR_DIR_TRACE + fprintf(stderr, "process_leaf_dir_block found .. entry (parent) = %llu\n", lino); +#endif + /* + * what if .. == .? legal only in + * the root inode. blow out entry + * and set parent to NULLFSINO otherwise. + */ + if (ino == lino && + ino != mp->m_sb.sb_rootino) { + *parent = NULLFSINO; + do_warn( + "bad .. entry in dir ino %llu, points to self", + ino); + if (!no_modify) { + do_warn("will clear entry\n"); + + namest->name[0] = '/'; + *buf_dirty = 1; + } else { + do_warn("would clear entry\n"); + } + } else if (ino != lino && + ino == mp->m_sb.sb_rootino) { + /* + * we have to make sure that . == .. + * in the root inode + */ + if (!no_modify) { + do_warn( + "correcting .. entry in root inode %llu, was %llu\n", + ino, *parent); + XFS_DIR_SF_PUT_DIRINO_ARCH( + &ino, + &namest->inumber, ARCH_CONVERT); + *buf_dirty = 1; + } else { + do_warn( + "bad .. entry (%llu) in root inode %llu should be %llu\n", + *parent, + ino, ino); + } + } + } else { + /* + * can't fix the directory unless we know + * which .. entry is the right one. Both + * have valid inode numbers, match the hash + * value and the hash values are ordered + * properly or we wouldn't be here. So + * since both seem equally valid, trash + * this one. + */ + if (!no_modify) { + do_warn( +"multiple .. entries in directory inode %llu, will clear second entry\n", + ino); + namest->name[0] = '/'; + *buf_dirty = 1; + } else { + do_warn( +"multiple .. entries in directory inode %llu, would clear second entry\n", + ino); + } + } + } else if (entry->namelen == 1 && namest->name[0] == '.') { + /* + * the '.' case + */ + if (!*dot) { + (*dot)++; + if (lino != ino) { + if (!no_modify) { + do_warn( + ". in directory inode %llu has wrong value (%llu), fixing entry...\n", + ino, lino); + XFS_DIR_SF_PUT_DIRINO_ARCH(&ino, + &namest->inumber, ARCH_CONVERT); + *buf_dirty = 1; + } else { + do_warn( + ". in directory inode %llu has wrong value (%llu)\n", + ino, lino); + } + } + } else { + do_warn( + "multiple . entries in directory inode %llu\n", + ino); + /* + * mark entry as to be junked. + */ + if (!no_modify) { + do_warn( + "will clear one . entry in directory inode %llu\n", + ino); + namest->name[0] = '/'; + *buf_dirty = 1; + } else { + do_warn( + "would clear one . entry in directory inode %llu\n", + ino); + } + } + } else { + /* + * all the rest -- make sure only . references self + */ + if (lino == ino) { + do_warn( + "entry \"%s\" in directory inode %llu points to self, ", + fname, ino); + if (!no_modify) { + do_warn("will clear entry\n"); + namest->name[0] = '/'; + *buf_dirty = 1; + } else { + do_warn("would clear entry\n"); + } + } + } + } + + /* + * compare top of heap values and reset as required. if the + * holes flag is set, don't reset first_used unless it's + * pointing to used bytes. we're being conservative here + * since the block will get compacted anyhow by the kernel. + */ + if (leaf->hdr.holes == 0 && first_used != INT_GET(leaf->hdr.firstused, ARCH_CONVERT) || + INT_GET(leaf->hdr.firstused, ARCH_CONVERT) > first_used) { + if (!no_modify) { + if (verbose) + do_warn( +"- resetting first used heap value from %d to %d in block %u of dir ino %llu\n", + (int) INT_GET(leaf->hdr.firstused, ARCH_CONVERT), first_used, + da_bno, ino); + INT_SET(leaf->hdr.firstused, ARCH_CONVERT, first_used); + *buf_dirty = 1; + } else { + if (verbose) + do_warn( +"- would reset first used value from %d to %d in block %u of dir ino %llu\n", + (int) INT_GET(leaf->hdr.firstused, ARCH_CONVERT), first_used, + da_bno, ino); + } + } + + if (bytes_used != INT_GET(leaf->hdr.namebytes, ARCH_CONVERT)) { + if (!no_modify) { + if (verbose) + do_warn( +"- resetting namebytes cnt from %d to %d in block %u of dir inode %llu\n", + (int) INT_GET(leaf->hdr.namebytes, ARCH_CONVERT), bytes_used, + da_bno, ino); + INT_SET(leaf->hdr.namebytes, ARCH_CONVERT, bytes_used); + *buf_dirty = 1; + } else { + if (verbose) + do_warn( +"- would reset namebytes cnt from %d to %d in block %u of dir inode %llu\n", + (int) INT_GET(leaf->hdr.namebytes, ARCH_CONVERT), bytes_used, + da_bno, ino); + } + } + + /* + * If the hole flag is not set, then we know that there can + * be no lost holes. If the hole flag is set, then it's ok + * if the on-disk holemap doesn't describe everything as long + * as what it does describe doesn't conflict with reality. + */ + + reset_holes = 0; + + bholemap.lost_holes = leaf->hdr.holes; + for (i = 0; i < XFS_DIR_LEAF_MAPSIZE; i++) { + bholemap.hentries[i].base = INT_GET(leaf->hdr.freemap[i].base, ARCH_CONVERT); + bholemap.hentries[i].size = INT_GET(leaf->hdr.freemap[i].size, ARCH_CONVERT); + } + + /* + * Ok, now set up our own freespace list + * (XFS_DIR_LEAF_MAPSIZE (3) * biggest regions) + * and see if they match what's in the block + */ + bzero(&holemap, sizeof(da_hole_map_t)); + process_da_freemap(mp, dir_freemap, &holemap); + + if (zero_len_entries) { + reset_holes = 1; + } else if (leaf->hdr.holes == 0) { + if (holemap.lost_holes > 0) { + if (verbose) + do_warn( + "- found unexpected lost holes in block %u, dir inode %llu\n", + da_bno, ino); + + reset_holes = 1; + } else if (compare_da_freemaps(mp, &holemap, &bholemap, + XFS_DIR_LEAF_MAPSIZE, ino, da_bno)) { + if (verbose) + do_warn( + "- hole info non-optimal in block %u, dir inode %llu\n", + da_bno, ino); + reset_holes = 1; + } + } else if (verify_da_freemap(mp, dir_freemap, &holemap, ino, da_bno)) { + if (verbose) + do_warn( + "- hole info incorrect in block %u, dir inode %llu\n", + da_bno, ino); + reset_holes = 1; + } + + if (reset_holes) { + /* + * have to reset block hole info + */ + if (verbose) { + do_warn( + "- existing hole info for block %d, dir inode %llu (base, size) - \n", + da_bno, ino); + do_warn("- \t"); + for (i = 0; i < XFS_DIR_LEAF_MAPSIZE; i++) { + do_warn( + "- (%d, %d) ", bholemap.hentries[i].base, + bholemap.hentries[i].size); + } + do_warn("- holes flag = %d\n", bholemap.lost_holes); + } + + if (!no_modify) { + if (verbose) + do_warn( + "- compacting block %u in dir inode %llu\n", + da_bno, ino); + + new_leaf = (xfs_dir_leafblock_t *) &dirbuf[0]; + + /* + * copy leaf block header + */ + bcopy(&leaf->hdr, &new_leaf->hdr, + sizeof(xfs_dir_leaf_hdr_t)); + + /* + * reset count in case we have some zero length entries + * that are being junked + */ + num_entries = 0; + first_used = XFS_LBSIZE(mp); + first_byte = (char *) new_leaf + + (__psint_t) XFS_LBSIZE(mp); + + /* + * copy entry table and pack names starting from the end + * of the block + */ + for (i = 0, s_entry = &leaf->entries[0], + d_entry = &new_leaf->entries[0]; + i < INT_GET(leaf->hdr.count, ARCH_CONVERT); + i++, s_entry++) { + /* + * skip zero-length entries + */ + if (s_entry->namelen == 0) + continue; + + bytes = sizeof(xfs_dir_leaf_name_t) + + s_entry->namelen - 1; + + if ((__psint_t) first_byte - bytes < + sizeof(xfs_dir_leaf_entry_t) + + (__psint_t) d_entry) { + do_warn( + "not enough space in block %u of dir inode %llu for all entries\n", + da_bno, ino); + break; + } + + first_used -= bytes; + first_byte -= bytes; + + INT_SET(d_entry->nameidx, ARCH_CONVERT, first_used); + INT_SET(d_entry->hashval, ARCH_CONVERT, INT_GET(s_entry->hashval, ARCH_CONVERT)); + d_entry->namelen = s_entry->namelen; + d_entry->pad2 = 0; + + bcopy((char *) leaf + INT_GET(s_entry->nameidx, ARCH_CONVERT), + first_byte, bytes); + + num_entries++; + d_entry++; + } + + ASSERT((char *) first_byte >= (char *) d_entry); + ASSERT(first_used <= XFS_LBSIZE(mp)); + + /* + * zero space between end of table and top of heap + */ + bzero(d_entry, (__psint_t) first_byte + - (__psint_t) d_entry); + + /* + * reset header info + */ + if (num_entries != INT_GET(new_leaf->hdr.count, ARCH_CONVERT)) + INT_SET(new_leaf->hdr.count, ARCH_CONVERT, num_entries); + + INT_SET(new_leaf->hdr.firstused, ARCH_CONVERT, first_used); + new_leaf->hdr.holes = 0; + new_leaf->hdr.pad1 = 0; + + INT_SET(new_leaf->hdr.freemap[0].base, ARCH_CONVERT, (__psint_t) d_entry + - (__psint_t) new_leaf); + INT_SET(new_leaf->hdr.freemap[0].size, ARCH_CONVERT, (__psint_t) first_byte + - (__psint_t) d_entry); + + ASSERT(INT_GET(new_leaf->hdr.freemap[0].base, ARCH_CONVERT) < first_used); + ASSERT(INT_GET(new_leaf->hdr.freemap[0].base, ARCH_CONVERT) == + (__psint_t) (&new_leaf->entries[0]) + - (__psint_t) new_leaf + + i * sizeof(xfs_dir_leaf_entry_t)); + ASSERT(INT_GET(new_leaf->hdr.freemap[0].base, ARCH_CONVERT) < XFS_LBSIZE(mp)); + ASSERT(INT_GET(new_leaf->hdr.freemap[0].size, ARCH_CONVERT) < XFS_LBSIZE(mp)); + ASSERT(INT_GET(new_leaf->hdr.freemap[0].base, ARCH_CONVERT) + + INT_GET(new_leaf->hdr.freemap[0].size, ARCH_CONVERT) == first_used); + + INT_ZERO(new_leaf->hdr.freemap[1].base, ARCH_CONVERT); + INT_ZERO(new_leaf->hdr.freemap[1].size, ARCH_CONVERT); + INT_ZERO(new_leaf->hdr.freemap[2].base, ARCH_CONVERT); + INT_ZERO(new_leaf->hdr.freemap[2].size, ARCH_CONVERT); + + /* + * final step, copy block back + */ + bcopy(new_leaf, leaf, mp->m_sb.sb_blocksize); + + *buf_dirty = 1; + } else { + if (verbose) + do_warn( + "- would compact block %u in dir inode %llu\n", + da_bno, ino); + } + } +#if 0 + if (!no_modify) { + /* + * now take care of deleting or marking the entries with + * zero-length namelen's + */ + junk_zerolen_dir_leaf_entries(mp, leaf, ino, buf_dirty); + } +#endif +#ifdef XR_DIR_TRACE + fprintf(stderr, "process_leaf_dir_block returns %d\n", res); +#endif + return((res > 0) ? 1 : 0); +} + +/* + * returns 0 if the directory is ok, 1 if it has to be junked. + */ +int +process_leaf_dir_level(xfs_mount_t *mp, + da_bt_cursor_t *da_cursor, + int ino_discovery, + int *repair, + int *dot, + int *dotdot, + xfs_ino_t *parent) +{ + xfs_dir_leafblock_t *leaf; + xfs_buf_t *bp; + xfs_ino_t ino; + xfs_dfsbno_t dev_bno; + xfs_dablk_t da_bno; + xfs_dablk_t prev_bno; + int res = 0; + int buf_dirty = 0; + xfs_daddr_t bd_addr; + xfs_dahash_t current_hashval = 0; + xfs_dahash_t greatest_hashval; + +#ifdef XR_DIR_TRACE + fprintf(stderr, "process_leaf_dir_level - ino %llu\n", da_cursor->ino); +#endif + *repair = 0; + da_bno = da_cursor->level[0].bno; + ino = da_cursor->ino; + prev_bno = 0; + + do { + dev_bno = blkmap_get(da_cursor->blkmap, da_bno); + /* + * directory code uses 0 as the NULL block pointer + * since 0 is the root block and no directory block + * pointer can point to the root block of the btree + */ + ASSERT(da_bno != 0); + + if (dev_bno == NULLDFSBNO) { + do_warn("can't map block %u for directory inode %llu\n", + da_bno, ino); + goto error_out; + } + + bd_addr = (xfs_daddr_t)XFS_FSB_TO_DADDR(mp, dev_bno); + + bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, dev_bno), + XFS_FSB_TO_BB(mp, 1), 0); + if (!bp) { + do_warn("can't read file block %u (fsbno %llu, daddr %lld) " + "for directory inode %llu\n", + da_bno, dev_bno, (__int64_t) bd_addr, ino); + goto error_out; + } + + leaf = (xfs_dir_leafblock_t *)XFS_BUF_PTR(bp); + + /* + * check magic number for leaf directory btree block + */ + if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC) { + do_warn("bad directory leaf magic # %#x for dir ino %llu\n", + INT_GET(leaf->hdr.info.magic, ARCH_CONVERT), ino); + libxfs_putbuf(bp); + goto error_out; + } + /* + * keep track of greatest block # -- that gets + * us the length of the directory + */ + if (da_bno > da_cursor->greatest_bno) + da_cursor->greatest_bno = da_bno; + + buf_dirty = 0; + /* + * for each block, process the block, verify it's path, + * then get next block. update cursor values along the way + */ + if (process_leaf_dir_block(mp, leaf, da_bno, ino, + current_hashval, ino_discovery, + da_cursor->blkmap, dot, dotdot, parent, + &buf_dirty, &greatest_hashval)) { + libxfs_putbuf(bp); + goto error_out; + } + + /* + * index can be set to hdr.count so match the + * indexes of the interior blocks -- which at the + * end of the block will point to 1 after the final + * real entry in the block + */ + da_cursor->level[0].hashval = greatest_hashval; + da_cursor->level[0].bp = bp; + da_cursor->level[0].bno = da_bno; + da_cursor->level[0].index = INT_GET(leaf->hdr.count, ARCH_CONVERT); + da_cursor->level[0].dirty = buf_dirty; + + if (INT_GET(leaf->hdr.info.back, ARCH_CONVERT) != prev_bno) { + do_warn("bad sibling back pointer for directory block %u " + "in directory inode %llu\n", da_bno, ino); + libxfs_putbuf(bp); + goto error_out; + } + + prev_bno = da_bno; + da_bno = INT_GET(leaf->hdr.info.forw, ARCH_CONVERT); + + if (da_bno != 0) + if (verify_da_path(mp, da_cursor, 0)) { + libxfs_putbuf(bp); + goto error_out; + } + + current_hashval = greatest_hashval; + + ASSERT(buf_dirty == 0 || buf_dirty && !no_modify); + + if (buf_dirty && !no_modify) { + *repair = 1; + libxfs_writebuf(bp, 0); + } + else + libxfs_putbuf(bp); + } while (da_bno != 0 && res == 0); + + if (verify_final_da_path(mp, da_cursor, 0)) { + /* + * verify the final path up (right-hand-side) if still ok + */ + do_warn("bad hash path in directory %llu\n", da_cursor->ino); + goto error_out; + } + +#ifdef XR_DIR_TRACE + fprintf(stderr, "process_leaf_dir_level returns %d (%s)\n", + res, ((res) ? "bad" : "ok")); +#endif + /* + * redundant but just for testing + */ + release_da_cursor(mp, da_cursor, 0); + + return(res); + +error_out: + /* + * release all buffers holding interior btree blocks + */ + err_release_da_cursor(mp, da_cursor, 0); + + return(1); +} + +/* + * a node directory is a true btree directory -- where the directory + * has gotten big enough that it is represented as a non-trivial (e.g. + * has more than just a root block) btree. + * + * Note that if we run into any problems, we trash the + * directory. Even if it's the root directory, + * we'll be able to traverse all the disconnected + * subtrees later (phase 6). + * + * one day, if we actually fix things, we'll set repair to 1 to + * indicate that we have or that we should. + * + * dirname can be set to NULL if the name is unknown (or to + * the string representation of the inode) + * + * returns 0 if things are ok, 1 if bad (directory needs to be junked) + */ +/* ARGSUSED */ +int +process_node_dir( + xfs_mount_t *mp, + xfs_ino_t ino, + xfs_dinode_t *dip, + int ino_discovery, + blkmap_t *blkmap, + int *dot, + int *dotdot, + xfs_ino_t *parent, /* out - parent ino # or NULLFSINO */ + char *dirname, + int *repair) +{ + xfs_dablk_t bno; + int error = 0; + da_bt_cursor_t da_cursor; + +#ifdef XR_DIR_TRACE + fprintf(stderr, "process_node_dir - ino %llu\n", ino); +#endif + *repair = *dot = *dotdot = 0; + *parent = NULLFSINO; + + /* + * try again -- traverse down left-side of tree until we hit + * the left-most leaf block setting up the btree cursor along + * the way. Then walk the leaf blocks left-to-right, calling + * a parent-verification routine each time we traverse a block. + */ + bzero(&da_cursor, sizeof(da_bt_cursor_t)); + + da_cursor.active = 0; + da_cursor.type = 0; + da_cursor.ino = ino; + da_cursor.dip = dip; + da_cursor.greatest_bno = 0; + da_cursor.blkmap = blkmap; + + /* + * now process interior node + */ + + error = traverse_int_dablock(mp, &da_cursor, &bno, XFS_DATA_FORK); + + if (error == 0) + return(1); + + /* + * now pass cursor and bno into leaf-block processing routine + * the leaf dir level routine checks the interior paths + * up to the root including the final right-most path. + */ + + error = process_leaf_dir_level(mp, &da_cursor, ino_discovery, + repair, dot, dotdot, parent); + + if (error) + return(1); + + /* + * sanity check inode size + */ + if (INT_GET(dip->di_core.di_size, ARCH_CONVERT) < + (da_cursor.greatest_bno + 1) * mp->m_sb.sb_blocksize) { + if ((xfs_fsize_t) (da_cursor.greatest_bno + * mp->m_sb.sb_blocksize) > UINT_MAX) { + do_warn( +"out of range internal directory block numbers (inode %llu)\n", + ino); + return(1); + } + + do_warn( +"setting directory inode (%llu) size to %llu bytes, was %lld bytes\n", + ino, + (xfs_dfiloff_t) (da_cursor.greatest_bno + 1) + * mp->m_sb.sb_blocksize, + INT_GET(dip->di_core.di_size, ARCH_CONVERT)); + + INT_SET(dip->di_core.di_size, ARCH_CONVERT, (xfs_fsize_t) + (da_cursor.greatest_bno + 1) * mp->m_sb.sb_blocksize); + } + return(0); +} + +/* + * a leaf directory is one where the directory is too big for + * the inode data fork but is small enough to fit into one + * directory btree block (filesystem block) outside the inode + * + * returns NULLFSINO if the directory is cannot be salvaged + * and the .. ino if things are ok (even if the directory had + * to be altered to make it ok). + * + * dirname can be set to NULL if the name is unknown (or to + * the string representation of the inode) + * + * returns 0 if things are ok, 1 if bad (directory needs to be junked) + */ +/* ARGSUSED */ +int +process_leaf_dir( + xfs_mount_t *mp, + xfs_ino_t ino, + xfs_dinode_t *dip, + int ino_discovery, + int *dino_dirty, + blkmap_t *blkmap, + int *dot, /* out - 1 if there is a dot, else 0 */ + int *dotdot, /* out - 1 if there's a dotdot, else 0 */ + xfs_ino_t *parent, /* out - parent ino # or NULLFSINO */ + char *dirname, /* in - directory pathname */ + int *repair) /* out - 1 if something was fixed */ +{ + xfs_dir_leafblock_t *leaf; + xfs_dahash_t next_hashval; + xfs_dfsbno_t bno; + xfs_buf_t *bp; + int buf_dirty = 0; + +#ifdef XR_DIR_TRACE + fprintf(stderr, "process_leaf_dir - ino %llu\n", ino); +#endif + *repair = *dot = *dotdot = 0; + *parent = NULLFSINO; + + bno = blkmap_get(blkmap, 0); + if (bno == NULLDFSBNO) { + do_warn("block 0 for directory inode %llu is missing\n", ino); + return(1); + } + bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, bno), + XFS_FSB_TO_BB(mp, 1), 0); + if (!bp) { + do_warn("can't read block 0 for directory inode %llu\n", ino); + return(1); + } + /* + * verify leaf block + */ + leaf = (xfs_dir_leafblock_t *)XFS_BUF_PTR(bp); + + /* + * check magic number for leaf directory btree block + */ + if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC) { + do_warn("bad directory leaf magic # %#x for dir ino %llu\n", + INT_GET(leaf->hdr.info.magic, ARCH_CONVERT), ino); + libxfs_putbuf(bp); + return(1); + } + + if (process_leaf_dir_block(mp, leaf, 0, ino, 0, ino_discovery, blkmap, + dot, dotdot, parent, &buf_dirty, &next_hashval)) { + /* + * the block is bad. lose the directory. + * XXX - later, we should try and just lose + * the block without losing the entire directory + */ + ASSERT(*dotdot == 0 || *dotdot == 1 && *parent != NULLFSINO); + libxfs_putbuf(bp); + return(1); + } + + /* + * check sibling pointers in leaf block (above doesn't do it) + */ + if (INT_GET(leaf->hdr.info.forw, ARCH_CONVERT) != 0 || + INT_GET(leaf->hdr.info.back, ARCH_CONVERT) != 0) { + if (!no_modify) { + do_warn("clearing forw/back pointers for directory inode " + "%llu\n", ino); + buf_dirty = 1; + INT_ZERO(leaf->hdr.info.forw, ARCH_CONVERT); + INT_ZERO(leaf->hdr.info.back, ARCH_CONVERT); + } else { + do_warn("would clear forw/back pointers for directory inode " + "%llu\n", ino); + } + } + + ASSERT(buf_dirty == 0 || buf_dirty && !no_modify); + + if (buf_dirty && !no_modify) + libxfs_writebuf(bp, 0); + else + libxfs_putbuf(bp); + + return(0); +} + +/* + * returns 1 if things are bad (directory needs to be junked) + * and 0 if things are ok. If ino_discovery is 1, add unknown + * inodes to uncertain inode list. + */ +int +process_dir( + xfs_mount_t *mp, + xfs_ino_t ino, + xfs_dinode_t *dip, + int ino_discovery, + int *dino_dirty, + char *dirname, + xfs_ino_t *parent, + blkmap_t *blkmap) +{ + int dot; + int dotdot; + int repair = 0; + int res = 0; + + *parent = NULLFSINO; + dot = dotdot = 0; + + /* + * branch off depending on the type of inode. This routine + * is only called ONCE so all the subordinate routines will + * fix '.' and junk '..' if they're bogus. + */ + if (INT_GET(dip->di_core.di_size, ARCH_CONVERT) <= XFS_DFORK_DSIZE_ARCH(dip, mp, ARCH_CONVERT)) { + dot = 1; + dotdot = 1; + if (process_shortform_dir(mp, ino, dip, ino_discovery, + dino_dirty, parent, dirname, &repair)) { + res = 1; + } + } else if (INT_GET(dip->di_core.di_size, ARCH_CONVERT) <= XFS_LBSIZE(mp)) { + if (process_leaf_dir(mp, ino, dip, ino_discovery, + dino_dirty, blkmap, &dot, &dotdot, + parent, dirname, &repair)) { + res = 1; + } + } else { + if (process_node_dir(mp, ino, dip, ino_discovery, + blkmap, &dot, &dotdot, + parent, dirname, &repair)) { + res = 1; + } + } + /* + * bad . entries in all directories will be fixed up in phase 6 + */ + if (dot == 0) { + do_warn("no . entry for directory %llu\n", ino); + } + + /* + * shortform dirs always have a .. entry. .. for all longform + * directories will get fixed in phase 6. .. for other shortform + * dirs also get fixed there. .. for a shortform root was + * fixed in place since we know what it should be + */ + if (dotdot == 0 && ino != mp->m_sb.sb_rootino) { + do_warn("no .. entry for directory %llu\n", ino); + } else if (dotdot == 0 && ino == mp->m_sb.sb_rootino) { + do_warn("no .. entry for root directory %llu\n", ino); + need_root_dotdot = 1; + } + +#ifdef XR_DIR_TRACE + fprintf(stderr, "(process_dir), parent of %llu is %llu\n", ino, parent); +#endif + return(res); +} diff --git a/repair/dir.h b/repair/dir.h new file mode 100644 index 000000000..9d2b069b0 --- /dev/null +++ b/repair/dir.h @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#ifndef _XR_DIR_H +#define _XR_DIR_H + +struct blkmap; + +/* 1 bit per byte, max XFS blocksize == 64K bits / NBBY */ +#define DA_BMAP_SIZE 8192 + +typedef unsigned char da_freemap_t; + +/* + * the cursor gets passed up and down the da btree processing + * routines. The interior block processing routines use the + * cursor to determine if the pointers to and from the preceding + * and succeeding sibling blocks are ok and whether the values in + * the current block are consistent with the entries in the parent + * nodes. When a block is traversed, a parent-verification routine + * is called to verify if the next logical entry in the next level up + * is consistent with the greatest hashval in the next block of the + * current level. The verification routine is itself recursive and + * calls itself if it has to traverse an interior block to get + * the next logical entry. The routine recurses upwards through + * the tree until it finds a block where it can simply step to + * the next entry. The hashval in that entry should be equal to + * the hashval being passed to it (the greatest hashval in the block + * that the entry points to). If that isn't true, then the tree + * is blown and we need to trash it, salvage and trash it, or fix it. + * Currently, we just trash it. + */ +typedef struct da_level_state { + xfs_buf_t *bp; /* block bp */ +#ifdef XR_DIR_TRACE + xfs_da_intnode_t *n; /* bp data */ +#endif + xfs_dablk_t bno; /* file block number */ + xfs_dahash_t hashval; /* last verified hashval */ + int index; /* current index in block */ + int dirty; /* is buffer dirty ? (1 == yes) */ +} da_level_state_t; + +typedef struct da_bt_cursor { + int active; /* highest level in tree (# levels-1) */ + int type; /* 0 if dir, 1 if attr */ + xfs_ino_t ino; + xfs_dablk_t greatest_bno; + xfs_dinode_t *dip; + da_level_state_t level[XFS_DA_NODE_MAXDEPTH]; + struct blkmap *blkmap; +} da_bt_cursor_t; + + +/* ROUTINES */ + +void +err_release_da_cursor( + xfs_mount_t *mp, + da_bt_cursor_t *cursor, + int prev_level); + +xfs_dfsbno_t +get_first_dblock_fsbno( + xfs_mount_t *mp, + xfs_ino_t ino, + xfs_dinode_t *dino); + +void +init_da_freemap( + da_freemap_t *dir_freemap); + +int +namecheck( + char *name, + int length); + +int +process_shortform_dir( + xfs_mount_t *mp, + xfs_ino_t ino, + xfs_dinode_t *dip, + int ino_discovery, + int *dino_dirty, /* is dinode buffer dirty? */ + xfs_ino_t *parent, /* out - NULLFSINO if entry doesn't exist */ + char *dirname, /* directory pathname */ + int *repair); /* out - 1 if dir was fixed up */ + +int +process_dir( + xfs_mount_t *mp, + xfs_ino_t ino, + xfs_dinode_t *dip, + int ino_discovery, + int *dirty, + char *dirname, + xfs_ino_t *parent, + struct blkmap *blkmap); + +void +release_da_cursor( + xfs_mount_t *mp, + da_bt_cursor_t *cursor, + int prev_level); + +int +set_da_freemap( + xfs_mount_t *mp, da_freemap_t *map, + int start, int stop); + +int +traverse_int_dablock( + xfs_mount_t *mp, + da_bt_cursor_t *da_cursor, + xfs_dablk_t *rbno, + int whichfork); + +int +verify_da_path( + xfs_mount_t *mp, + da_bt_cursor_t *cursor, + const int p_level); + +int +verify_final_da_path( + xfs_mount_t *mp, + da_bt_cursor_t *cursor, + const int p_level); + + +#endif /* _XR_DIR_H */ diff --git a/repair/dir2.c b/repair/dir2.c new file mode 100644 index 000000000..e2675df9b --- /dev/null +++ b/repair/dir2.c @@ -0,0 +1,2070 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "avl.h" +#include "globals.h" +#include "incore.h" +#include "err_protos.h" +#include "dinode.h" +#include "dir.h" +#include "dir2.h" +#include "bmap.h" + +/* + * Tag bad directory entries with this. + * We can't tag them with -1 since that will look like a + * data_unused_t instead of a data_entry_t. + */ +#define BADFSINO ((xfs_ino_t)0xfeffffffffffffffULL) + +/* + * Known bad inode list. These are seen when the leaf and node + * block linkages are incorrect. + */ +typedef struct dir2_bad { + xfs_ino_t ino; + struct dir2_bad *next; +} dir2_bad_t; +dir2_bad_t *dir2_bad_list; + +void +dir2_add_badlist( + xfs_ino_t ino) +{ + dir2_bad_t *l; + + if ((l = malloc(sizeof(dir2_bad_t))) == NULL) { + do_error("malloc failed (%u bytes) dir2_add_badlist:ino %llu\n", + sizeof(dir2_bad_t), ino); + exit(1); + } + l->next = dir2_bad_list; + dir2_bad_list = l; + l->ino = ino; +} + +int +dir2_is_badino( + xfs_ino_t ino) +{ + dir2_bad_t *l; + + for (l = dir2_bad_list; l; l = l->next) + if (l->ino == ino) + return 1; + return 0; +} + +/* + * Multibuffer handling. + * V2 directory blocks can be noncontiguous, needing multiple buffers. + */ +xfs_dabuf_t * +da_read_buf( + xfs_mount_t *mp, + int nex, + bmap_ext_t *bmp) +{ + xfs_buf_t *bp; + xfs_buf_t **bplist; + xfs_dabuf_t *dabuf; + int i; + int off; + + bplist = calloc(nex, sizeof(*bplist)); + if (bplist == NULL) { + do_error("couldn't malloc dir2 buffer list\n"); + exit(1); + } + for (i = 0; i < nex; i++) { + bplist[i] = libxfs_readbuf(mp->m_dev, + XFS_FSB_TO_DADDR(mp, bmp[i].startblock), + XFS_FSB_TO_BB(mp, bmp[i].blockcount), 0); + if (!bplist[i]) + goto failed; + } + dabuf = malloc(XFS_DA_BUF_SIZE(nex)); + if (dabuf == NULL) { + do_error("couldn't malloc dir2 buffer header\n"); + exit(1); + } + dabuf->dirty = 0; + dabuf->nbuf = nex; + if (nex == 1) { + bp = bplist[0]; + dabuf->bbcount = (short)BTOBB(XFS_BUF_COUNT(bp)); + dabuf->data = XFS_BUF_PTR(bp); + dabuf->bps[0] = bp; + } else { + for (i = 0, dabuf->bbcount = 0; i < nex; i++) { + dabuf->bps[i] = bp = bplist[i]; + dabuf->bbcount += BTOBB(XFS_BUF_COUNT(bp)); + } + dabuf->data = malloc(BBTOB(dabuf->bbcount)); + if (dabuf->data == NULL) { + do_error("couldn't malloc dir2 buffer data\n"); + exit(1); + } + for (i = off = 0; i < nex; i++, off += XFS_BUF_COUNT(bp)) { + bp = bplist[i]; + bcopy(XFS_BUF_PTR(bp), (char *)dabuf->data + off, + XFS_BUF_COUNT(bp)); + } + } + return dabuf; +failed: + for (i = 0; i < nex; i++) + libxfs_putbuf(bplist[i]); + free(bplist); + return NULL; +} + +static void +da_buf_clean( + xfs_dabuf_t *dabuf) +{ + xfs_buf_t *bp; + int i; + int off; + + if (dabuf->dirty) { + dabuf->dirty = 0; + for (i=off=0; i < dabuf->nbuf; i++, off += XFS_BUF_COUNT(bp)) { + bp = dabuf->bps[i]; + bcopy((char *)dabuf->data + off, XFS_BUF_PTR(bp), + XFS_BUF_COUNT(bp)); + } + } +} + +static void +da_buf_done( + xfs_dabuf_t *dabuf) +{ + da_buf_clean(dabuf); + if (dabuf->nbuf > 1) + free(dabuf->data); + free(dabuf); +} + +int +da_bwrite( + xfs_mount_t *mp, + xfs_dabuf_t *dabuf) +{ + xfs_buf_t *bp; + xfs_buf_t **bplist; + int e; + int error; + int i; + int nbuf; + + if ((nbuf = dabuf->nbuf) == 1) { + bplist = &bp; + bp = dabuf->bps[0]; + } else { + bplist = malloc(nbuf * sizeof(*bplist)); + if (bplist == NULL) { + do_error("couldn't malloc dir2 buffer list\n"); + exit(1); + } + bcopy(dabuf->bps, bplist, nbuf * sizeof(*bplist)); + } + da_buf_done(dabuf); + for (i = error = 0; i < nbuf; i++) { + e = libxfs_writebuf(bplist[i], 0); + if (e) + error = e; + } + if (bplist != &bp) + free(bplist); + return error; +} + +void +da_brelse( + xfs_dabuf_t *dabuf) +{ + xfs_buf_t *bp; + xfs_buf_t **bplist; + int i; + int nbuf; + + if ((nbuf = dabuf->nbuf) == 1) { + bplist = &bp; + bp = dabuf->bps[0]; + } else { + bplist = malloc(nbuf * sizeof(*bplist)); + if (bplist == NULL) { + do_error("couldn't malloc dir2 buffer list\n"); + exit(1); + } + bcopy(dabuf->bps, bplist, nbuf * sizeof(*bplist)); + } + da_buf_done(dabuf); + for (i = 0; i < nbuf; i++) + libxfs_putbuf(bplist[i]); + if (bplist != &bp) + free(bplist); +} + +/* + * walk tree from root to the left-most leaf block reading in + * blocks and setting up cursor. passes back file block number of the + * left-most leaf block if successful (bno). returns 1 if successful, + * 0 if unsuccessful. + */ +int +traverse_int_dir2block(xfs_mount_t *mp, + dir2_bt_cursor_t *da_cursor, + xfs_dablk_t *rbno) +{ + bmap_ext_t *bmp; + xfs_dablk_t bno; + xfs_dabuf_t *bp; + int i; + int nex; + xfs_da_intnode_t *node; + + /* + * traverse down left-side of tree until we hit the + * left-most leaf block setting up the btree cursor along + * the way. + */ + bno = mp->m_dirleafblk; + i = -1; + node = NULL; + da_cursor->active = 0; + + do { + /* + * read in each block along the way and set up cursor + */ + nex = blkmap_getn(da_cursor->blkmap, bno, mp->m_dirblkfsbs, + &bmp); + + if (nex == 0) + goto error_out; + + bp = da_read_buf(mp, nex, bmp); + free(bmp); + if (bp == NULL) { + do_warn("can't read block %u for directory inode " + "%llu\n", + bno, da_cursor->ino); + goto error_out; + } + + node = bp->data; + + if (INT_GET(node->hdr.info.magic, ARCH_CONVERT) == + XFS_DIR2_LEAFN_MAGIC) { + if ( i != -1 ) { + do_warn("found non-root LEAFN node in inode " + "%llu bno = %u\n", + da_cursor->ino, bno); + } + if (INT_GET(node->hdr.level, ARCH_CONVERT) >= 1) { + do_warn("LEAFN node level is %d inode %llu " + "bno = %u\n", + INT_GET(node->hdr.level, ARCH_CONVERT), + da_cursor->ino, bno); + } + *rbno = 0; + da_brelse(bp); + return(1); + } else if (INT_GET(node->hdr.info.magic, ARCH_CONVERT) != + XFS_DA_NODE_MAGIC) { + da_brelse(bp); + do_warn("bad dir magic number 0x%x in inode %llu " + "bno = %u\n", + INT_GET(node->hdr.info.magic, ARCH_CONVERT), + da_cursor->ino, bno); + goto error_out; + } + if (INT_GET(node->hdr.count, ARCH_CONVERT) > + XFS_DA_NODE_ENTRIES(mp)) { + da_brelse(bp); + do_warn("bad record count in inode %llu, count = %d, " + "max = %d\n", da_cursor->ino, + INT_GET(node->hdr.count, ARCH_CONVERT), + XFS_DA_NODE_ENTRIES(mp)); + goto error_out; + } + + /* + * maintain level counter + */ + if (i == -1) + i = da_cursor->active = + INT_GET(node->hdr.level, ARCH_CONVERT); + else { + if (INT_GET(node->hdr.level, ARCH_CONVERT) == i - 1) { + i--; + } else { + do_warn("bad directory btree for directory " + "inode %llu\n", + da_cursor->ino); + da_brelse(bp); + goto error_out; + } + } + + da_cursor->level[i].hashval = + INT_GET(node->btree[0].hashval, ARCH_CONVERT); + da_cursor->level[i].bp = bp; + da_cursor->level[i].bno = bno; + da_cursor->level[i].index = 0; + + /* + * set up new bno for next level down + */ + bno = INT_GET(node->btree[0].before, ARCH_CONVERT); + } while (node != NULL && i > 1); + + /* + * now return block number and get out + */ + *rbno = da_cursor->level[0].bno = bno; + return(1); + +error_out: + while (i > 1 && i <= da_cursor->active) { + da_brelse(da_cursor->level[i].bp); + i++; + } + + return(0); +} + +/* + * blow out buffer for this level and all the rest above as well + * if error == 0, we are not expecting to encounter any unreleased + * buffers (e.g. if we do, it's a mistake). if error == 1, we're + * in an error-handling case so unreleased buffers may exist. + */ +void +release_dir2_cursor_int(xfs_mount_t *mp, + dir2_bt_cursor_t *cursor, + int prev_level, + int error) +{ + int level = prev_level + 1; + + if (cursor->level[level].bp != NULL) { + if (!error) { + do_warn("release_dir2_cursor_int got unexpected " + "non-null bp, dabno = %u\n", + cursor->level[level].bno); + } + ASSERT(error != 0); + + da_brelse(cursor->level[level].bp); + cursor->level[level].bp = NULL; + } + + if (level < cursor->active) + release_dir2_cursor_int(mp, cursor, level, error); + + return; +} + +void +release_dir2_cursor(xfs_mount_t *mp, + dir2_bt_cursor_t *cursor, + int prev_level) +{ + release_dir2_cursor_int(mp, cursor, prev_level, 0); +} + +void +err_release_dir2_cursor(xfs_mount_t *mp, + dir2_bt_cursor_t *cursor, + int prev_level) +{ + release_dir2_cursor_int(mp, cursor, prev_level, 1); +} + +/* + * make sure that all entries in all blocks along the right side of + * of the tree are used and hashval's are consistent. level is the + * level of the descendent block. returns 0 if good (even if it had + * to be fixed up), and 1 if bad. The right edge of the tree is + * technically a block boundary. This routine should be used then + * instead of verify_dir2_path(). + */ +int +verify_final_dir2_path(xfs_mount_t *mp, + dir2_bt_cursor_t *cursor, + const int p_level) +{ + xfs_da_intnode_t *node; + int bad = 0; + int entry; + int this_level = p_level + 1; + + /* + * the index should point to the next "unprocessed" entry + * in the block which should be the final (rightmost) entry + */ + entry = cursor->level[this_level].index; + node = (xfs_da_intnode_t *)(cursor->level[this_level].bp->data); + /* + * check internal block consistency on this level -- ensure + * that all entries are used, encountered and expected hashvals + * match, etc. + */ + if (entry != INT_GET(node->hdr.count, ARCH_CONVERT) - 1) { + do_warn("directory block used/count inconsistency - %d / %hu\n", + entry, INT_GET(node->hdr.count, ARCH_CONVERT)); + bad++; + } + /* + * hash values monotonically increasing ??? + */ + if (cursor->level[this_level].hashval >= INT_GET(node->btree[entry].hashval, ARCH_CONVERT)) { + do_warn("directory/attribute block hashvalue inconsistency, " + "expected > %u / saw %u\n", + cursor->level[this_level].hashval, + INT_GET(node->btree[entry].hashval, ARCH_CONVERT)); + bad++; + } + if (INT_GET(node->hdr.info.forw, ARCH_CONVERT) != 0) { + do_warn("bad directory/attribute forward block pointer, " + "expected 0, saw %u\n", + INT_GET(node->hdr.info.forw, ARCH_CONVERT)); + bad++; + } + if (bad) { + do_warn("bad directory block in inode %llu\n", cursor->ino); + return(1); + } + /* + * keep track of greatest block # -- that gets + * us the length of the directory + */ + if (cursor->level[this_level].bno > cursor->greatest_bno) + cursor->greatest_bno = cursor->level[this_level].bno; + + /* + * ok, now check descendant block number against this level + */ + if (cursor->level[p_level].bno != INT_GET(node->btree[entry].before, ARCH_CONVERT)) { + return(1); + } + + if (cursor->level[p_level].hashval != INT_GET(node->btree[entry].hashval, ARCH_CONVERT)) { + if (!no_modify) { + do_warn("correcting bad hashval in non-leaf dir " + "block\n"); + do_warn("\tin (level %d) in inode %llu.\n", + this_level, cursor->ino); + INT_SET(node->btree[entry].hashval, ARCH_CONVERT, cursor->level[p_level].hashval); + cursor->level[this_level].dirty++; + } else { + do_warn("would correct bad hashval in non-leaf dir " + "block\n"); + do_warn("\tin (level %d) in inode %llu.\n", + this_level, cursor->ino); + } + } + + /* + * release/write buffer + */ + ASSERT(cursor->level[this_level].dirty == 0 || + cursor->level[this_level].dirty && !no_modify); + + if (cursor->level[this_level].dirty && !no_modify) + da_bwrite(mp, cursor->level[this_level].bp); + else + da_brelse(cursor->level[this_level].bp); + + cursor->level[this_level].bp = NULL; + + /* + * bail out if this is the root block (top of tree) + */ + if (this_level >= cursor->active) { + return(0); + } + /* + * set hashvalue to correctl reflect the now-validated + * last entry in this block and continue upwards validation + */ + cursor->level[this_level].hashval = INT_GET(node->btree[entry].hashval, ARCH_CONVERT); + + return(verify_final_dir2_path(mp, cursor, this_level)); +} + +/* + * Verifies the path from a descendant block up to the root. + * Should be called when the descendant level traversal hits + * a block boundary before crossing the boundary (reading in a new + * block). + * + * the directory/attr btrees work differently to the other fs btrees. + * each interior block contains records that are + * pairs. The bno is a file bno, not a filesystem bno. The last + * hashvalue in the block will be . BUT unlike + * the freespace btrees, the *last* value in each block gets + * propagated up the tree instead of the first value in each block. + * that is, the interior records point to child blocks and the *greatest* + * hash value contained by the child block is the one the block above + * uses as the key for the child block. + * + * level is the level of the descendent block. returns 0 if good, + * and 1 if bad. The descendant block may be a leaf block. + * + * the invariant here is that the values in the cursor for the + * levels beneath this level (this_level) and the cursor index + * for this level *must* be valid. + * + * that is, the hashval/bno info is accurate for all + * DESCENDANTS and match what the node[index] information + * for the current index in the cursor for this level. + * + * the index values in the cursor for the descendant level + * are allowed to be off by one as they will reflect the + * next entry at those levels to be processed. + * + * the hashvalue for the current level can't be set until + * we hit the last entry in the block so, it's garbage + * until set by this routine. + * + * bno and bp for the current block/level are always valid + * since they have to be set so we can get a buffer for the + * block. + */ +int +verify_dir2_path(xfs_mount_t *mp, + dir2_bt_cursor_t *cursor, + const int p_level) +{ + xfs_da_intnode_t *node; + xfs_da_intnode_t *newnode; + xfs_dablk_t dabno; + xfs_dabuf_t *bp; + int bad; + int entry; + int this_level = p_level + 1; + bmap_ext_t *bmp; + int nex; + + /* + * index is currently set to point to the entry that + * should be processed now in this level. + */ + entry = cursor->level[this_level].index; + node = cursor->level[this_level].bp->data; + + /* + * if this block is out of entries, validate this + * block and move on to the next block. + * and update cursor value for said level + */ + if (entry >= INT_GET(node->hdr.count, ARCH_CONVERT)) { + /* + * update the hash value for this level before + * validating it. bno value should be ok since + * it was set when the block was first read in. + */ + cursor->level[this_level].hashval = + INT_GET(node->btree[entry - 1].hashval, ARCH_CONVERT); + + /* + * keep track of greatest block # -- that gets + * us the length of the directory + */ + if (cursor->level[this_level].bno > cursor->greatest_bno) + cursor->greatest_bno = cursor->level[this_level].bno; + + /* + * validate the path for the current used-up block + * before we trash it + */ + if (verify_dir2_path(mp, cursor, this_level)) + return(1); + /* + * ok, now get the next buffer and check sibling pointers + */ + dabno = INT_GET(node->hdr.info.forw, ARCH_CONVERT); + ASSERT(dabno != 0); + nex = blkmap_getn(cursor->blkmap, dabno, mp->m_dirblkfsbs, + &bmp); + if (nex == 0) { + do_warn("can't get map info for block %u of directory " + "inode %llu\n", + dabno, cursor->ino); + return(1); + } + + bp = da_read_buf(mp, nex, bmp); + + if (bp == NULL) { + do_warn("can't read block %u for directory inode " + "%llu\n", + dabno, cursor->ino); + return(1); + } + + newnode = bp->data; + /* + * verify magic number and back pointer, sanity-check + * entry count, verify level + */ + bad = 0; + if (INT_GET(newnode->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC) { + do_warn("bad magic number %x in block %u for directory " + "inode %llu\n", + INT_GET(newnode->hdr.info.magic, ARCH_CONVERT), dabno, cursor->ino); + bad++; + } + if (INT_GET(newnode->hdr.info.back, ARCH_CONVERT) != cursor->level[this_level].bno) { + do_warn("bad back pointer in block %u for directory " + "inode %llu\n", + dabno, cursor->ino); + bad++; + } + if (INT_GET(newnode->hdr.count, ARCH_CONVERT) > XFS_DA_NODE_ENTRIES(mp)) { + do_warn("entry count %d too large in block %u for " + "directory inode %llu\n", + INT_GET(newnode->hdr.count, ARCH_CONVERT), dabno, cursor->ino); + bad++; + } + if (INT_GET(newnode->hdr.level, ARCH_CONVERT) != this_level) { + do_warn("bad level %d in block %u for directory inode " + "%llu\n", + INT_GET(newnode->hdr.level, ARCH_CONVERT), dabno, cursor->ino); + bad++; + } + if (bad) { + da_brelse(bp); + return(1); + } + /* + * update cursor, write out the *current* level if + * required. don't write out the descendant level + */ + ASSERT(cursor->level[this_level].dirty == 0 || + cursor->level[this_level].dirty && !no_modify); + + if (cursor->level[this_level].dirty && !no_modify) + da_bwrite(mp, cursor->level[this_level].bp); + else + da_brelse(cursor->level[this_level].bp); + cursor->level[this_level].bp = bp; + cursor->level[this_level].dirty = 0; + cursor->level[this_level].bno = dabno; + cursor->level[this_level].hashval = INT_GET(newnode->btree[0].hashval, ARCH_CONVERT); + node = newnode; + + entry = cursor->level[this_level].index = 0; + } + /* + * ditto for block numbers + */ + if (cursor->level[p_level].bno != INT_GET(node->btree[entry].before, ARCH_CONVERT)) { + return(1); + } + /* + * ok, now validate last hashvalue in the descendant + * block against the hashval in the current entry + */ + if (cursor->level[p_level].hashval != INT_GET(node->btree[entry].hashval, ARCH_CONVERT)) { + if (!no_modify) { + do_warn("correcting bad hashval in interior dir " + "block\n"); + do_warn("\tin (level %d) in inode %llu.\n", + this_level, cursor->ino); + INT_SET(node->btree[entry].hashval, ARCH_CONVERT, cursor->level[p_level].hashval); + cursor->level[this_level].dirty++; + } else { + do_warn("would correct bad hashval in interior dir " + "block\n"); + do_warn("\tin (level %d) in inode %llu.\n", + this_level, cursor->ino); + } + } + /* + * increment index for this level to point to next entry + * (which should point to the next descendant block) + */ + cursor->level[this_level].index++; + return(0); +} + +/* + * Fix up a shortform directory which was in long form (i8count set) + * and is now in short form (i8count clear). + * Return pointer to the end of the data when done. + */ +void +process_sf_dir2_fixi8( + xfs_dir2_sf_t *sfp, + xfs_dir2_sf_entry_t **next_sfep) +{ + xfs_ino_t ino; + xfs_dir2_sf_t *newsfp; + xfs_dir2_sf_entry_t *newsfep; + xfs_dir2_sf_t *oldsfp; + xfs_dir2_sf_entry_t *oldsfep; + int oldsize; + + newsfp = sfp; + oldsize = (__psint_t)*next_sfep - (__psint_t)sfp; + oldsfp = malloc(oldsize); + if (oldsfp == NULL) { + do_error("couldn't malloc dir2 shortform copy\n"); + exit(1); + } + memmove(oldsfp, newsfp, oldsize); + INT_SET(newsfp->hdr.count, ARCH_CONVERT, INT_GET(oldsfp->hdr.count, ARCH_CONVERT)); + newsfp->hdr.i8count = 0; + ino = XFS_DIR2_SF_GET_INUMBER_ARCH(oldsfp, &oldsfp->hdr.parent, ARCH_CONVERT); + XFS_DIR2_SF_PUT_INUMBER_ARCH(newsfp, &ino, &newsfp->hdr.parent, ARCH_CONVERT); + oldsfep = XFS_DIR2_SF_FIRSTENTRY(oldsfp); + newsfep = XFS_DIR2_SF_FIRSTENTRY(newsfp); + while ((int)((char *)oldsfep - (char *)oldsfp) < oldsize) { + newsfep->namelen = oldsfep->namelen; + XFS_DIR2_SF_PUT_OFFSET_ARCH(newsfep, + XFS_DIR2_SF_GET_OFFSET_ARCH(oldsfep, ARCH_CONVERT), ARCH_CONVERT); + memmove(newsfep->name, oldsfep->name, newsfep->namelen); + ino = XFS_DIR2_SF_GET_INUMBER_ARCH(oldsfp, + XFS_DIR2_SF_INUMBERP(oldsfep), ARCH_CONVERT); + XFS_DIR2_SF_PUT_INUMBER_ARCH(newsfp, &ino, + XFS_DIR2_SF_INUMBERP(newsfep), ARCH_CONVERT); + oldsfep = XFS_DIR2_SF_NEXTENTRY(oldsfp, oldsfep); + newsfep = XFS_DIR2_SF_NEXTENTRY(newsfp, newsfep); + } + *next_sfep = newsfep; + free(oldsfp); +} + +/* + * Regenerate legal (minimal) offsets for the shortform directory. + */ +static void +process_sf_dir2_fixoff( + xfs_dinode_t *dip) +{ + int i; + int offset; + xfs_dir2_sf_entry_t *sfep; + xfs_dir2_sf_t *sfp; + + for (i = 0, sfp = &dip->di_u.di_dir2sf, + sfep = XFS_DIR2_SF_FIRSTENTRY(sfp), + offset = XFS_DIR2_DATA_FIRST_OFFSET; + i < INT_GET(sfp->hdr.count, ARCH_CONVERT); + i++, sfep = XFS_DIR2_SF_NEXTENTRY(sfp, sfep)) { + XFS_DIR2_SF_PUT_OFFSET_ARCH(sfep, offset, ARCH_CONVERT); + offset += XFS_DIR2_DATA_ENTSIZE(sfep->namelen); + } +} + +/* + * this routine performs inode discovery and tries to fix things + * in place. available redundancy -- inode data size should match + * used directory space in inode. + * a non-zero return value means the directory is bogus and should be blasted. + */ +/* ARGSUSED */ +static int +process_sf_dir2( + xfs_mount_t *mp, + xfs_ino_t ino, + xfs_dinode_t *dip, + int ino_discovery, + int *dino_dirty, /* out - 1 if dinode buffer dirty */ + char *dirname, /* directory pathname */ + xfs_ino_t *parent, /* out - NULLFSINO if entry not exist */ + int *repair) /* out - 1 if dir was fixed up */ +{ + int bad_offset; + int bad_sfnamelen; + int i; + int i8; + __int64_t ino_dir_size; + int ino_off; + ino_tree_node_t *irec_p; + int junkit; + char *junkreason = NULL; + xfs_ino_t lino; + int max_size; + char name[MAXNAMELEN + 1]; + int namelen; + xfs_dir2_sf_entry_t *next_sfep; + int num_entries; + int offset; + xfs_dir2_sf_t *sfp; + xfs_dir2_sf_entry_t *sfep; + int tmp_elen; + int tmp_len; + xfs_dir2_sf_entry_t *tmp_sfep; + xfs_ino_t zero = 0; + + sfp = &dip->di_u.di_dir2sf; + max_size = XFS_DFORK_DSIZE_ARCH(dip, mp, ARCH_CONVERT); + num_entries = INT_GET(sfp->hdr.count, ARCH_CONVERT); + ino_dir_size = INT_GET(dip->di_core.di_size, ARCH_CONVERT); + offset = XFS_DIR2_DATA_FIRST_OFFSET; + i8 = bad_offset = *repair = 0; + + ASSERT(ino_dir_size <= max_size); + + /* + * check for bad entry count + */ + if (num_entries * XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, 1) + + XFS_DIR2_SF_HDR_SIZE(0) > max_size || + num_entries == 0) + num_entries = 0xFF; + + /* + * run through entries, stop at first bad entry, don't need + * to check for .. since that's encoded in its own field + */ + sfep = next_sfep = XFS_DIR2_SF_FIRSTENTRY(sfp); + for (i = 0; + i < num_entries && ino_dir_size > (char *)next_sfep - (char *)sfp; + i++) { + tmp_sfep = NULL; + sfep = next_sfep; + junkit = 0; + bad_sfnamelen = 0; + lino = XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT); + /* + * if entry points to self, junk it since only '.' or '..' + * should do that and shortform dirs don't contain either + * entry. if inode number is invalid, trash entry. + * if entry points to special inodes, trash it. + * if inode is unknown but number is valid, + * add it to the list of uncertain inodes. don't + * have to worry about an entry pointing to a + * deleted lost+found inode because the entry was + * deleted at the same time that the inode was cleared. + */ + if (lino == ino) { + junkit = 1; + junkreason = "current"; + } else if (verify_inum(mp, lino)) { + junkit = 1; + junkreason = "invalid"; + } else if (lino == mp->m_sb.sb_rbmino) { + junkit = 1; + junkreason = "realtime bitmap"; + } else if (lino == mp->m_sb.sb_rsumino) { + junkit = 1; + junkreason = "realtime summary"; + } else if (lino == mp->m_sb.sb_uquotino) { + junkit = 1; + junkreason = "user quota"; + } else if (lino == mp->m_sb.sb_pquotino) { + junkit = 1; + junkreason = "project quota"; + } else if ((irec_p = find_inode_rec(XFS_INO_TO_AGNO(mp, lino), + XFS_INO_TO_AGINO(mp, lino))) != NULL) { + /* + * if inode is marked free and we're in inode + * discovery mode, leave the entry alone for now. + * if the inode turns out to be used, we'll figure + * that out when we scan it. If the inode really + * is free, we'll hit this code again in phase 4 + * after we've finished inode discovery and blow + * out the entry then. + */ + ino_off = XFS_INO_TO_AGINO(mp, lino) - + irec_p->ino_startnum; + ASSERT(is_inode_confirmed(irec_p, ino_off)); + if (is_inode_free(irec_p, ino_off) && !ino_discovery) { + junkit = 1; + junkreason = "free"; + } + } else if (ino_discovery) { + /* + * put the inode on the uncertain list. we'll + * pull the inode off the list and check it later. + * if the inode turns out be bogus, we'll delete + * this entry in phase 6. + */ + add_inode_uncertain(mp, lino, 0); + } else { + /* + * blow the entry out. we know about all + * undiscovered entries now (past inode discovery + * phase) so this is clearly a bogus entry. + */ + junkit = 1; + junkreason = "non-existent"; + } + namelen = sfep->namelen; + if (junkit) + do_warn("entry \"%*.*s\" in shortform directory %llu " + "references %s inode %llu\n", + namelen, namelen, sfep->name, ino, junkreason, + lino); + if (namelen == 0) { + /* + * if we're really lucky, this is + * the last entry in which case we + * can use the dir size to set the + * namelen value. otherwise, forget + * it because we're not going to be + * able to find the next entry. + */ + bad_sfnamelen = 1; + + if (i == num_entries - 1) { + namelen = ino_dir_size - + ((__psint_t) &sfep->name[0] - + (__psint_t) sfp); + if (!no_modify) { + do_warn("zero length entry in " + "shortform dir %llu, resetting " + "to %d\n", + ino, namelen); + sfep->namelen = namelen; + } else { + do_warn("zero length entry in " + "shortform dir %llu, would set " + "to %d\n", + ino, namelen); + } + } else { + do_warn("zero length entry in shortform dir " + "%llu", + ino); + if (!no_modify) + do_warn(", junking %d entries\n", + num_entries - i); + else + do_warn(", would junk %d entries\n", + num_entries - i); + /* + * don't process the rest of the directory, + * break out of processing looop + */ + break; + } + } else if ((__psint_t) sfep - (__psint_t) sfp + + + XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp, sfep) + > ino_dir_size) { + bad_sfnamelen = 1; + + if (i == num_entries - 1) { + namelen = ino_dir_size - + ((__psint_t) &sfep->name[0] - + (__psint_t) sfp); + do_warn("size of last entry overflows space " + "left in in shortform dir %llu, ", + ino); + if (!no_modify) { + do_warn("resetting to %d\n", + namelen); + sfep->namelen = namelen; + *dino_dirty = 1; + } else { + do_warn("would reset to %d\n", + namelen); + } + } else { + do_warn("size of entry #%d overflows space " + "left in in shortform dir %llu\n", + i, ino); + if (!no_modify) { + if (i == num_entries - 1) + do_warn("junking entry #%d\n", + i); + else + do_warn("junking %d entries\n", + num_entries - i); + } else { + if (i == num_entries - 1) + do_warn("would junk entry " + "#%d\n", + i); + else + do_warn("would junk %d " + "entries\n", + num_entries - i); + } + + break; + } + } + + /* + * check for illegal chars in name. + * no need to check for bad length because + * the length value is stored in a byte + * so it can't be too big, it can only wrap + */ + if (namecheck((char *)&sfep->name[0], namelen)) { + /* + * junk entry + */ + do_warn("entry contains illegal character in shortform " + "dir %llu\n", + ino); + junkit = 1; + } + + if (XFS_DIR2_SF_GET_OFFSET_ARCH(sfep, ARCH_CONVERT) < offset) { + do_warn("entry contains offset out of order in " + "shortform dir %llu\n", + ino); + bad_offset = 1; + } + offset = XFS_DIR2_SF_GET_OFFSET_ARCH(sfep, ARCH_CONVERT) + + XFS_DIR2_DATA_ENTSIZE(namelen); + + /* + * junk the entry by copying up the rest of the + * fork over the current entry and decrementing + * the entry count. if we're in no_modify mode, + * just issue the warning instead. then continue + * the loop with the next_sfep pointer set to the + * correct place in the fork and other counters + * properly set to reflect the deletion if it + * happened. + */ + if (junkit) { + bcopy(sfep->name, name, namelen); + name[namelen] = '\0'; + + if (!no_modify) { + tmp_elen = + XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp, sfep); + INT_MOD(dip->di_core.di_size, ARCH_CONVERT, -(tmp_elen)); + ino_dir_size -= tmp_elen; + + tmp_sfep = (xfs_dir2_sf_entry_t *) + ((__psint_t) sfep + tmp_elen); + tmp_len = max_size - ((__psint_t) tmp_sfep + - (__psint_t) sfp); + + memmove(sfep, tmp_sfep, tmp_len); + + INT_MOD(sfp->hdr.count, ARCH_CONVERT, -1); + num_entries--; + bzero((void *) ((__psint_t) sfep + tmp_len), + tmp_elen); + + /* + * reset the tmp value to the current + * pointer so we'll process the entry + * we just moved up + */ + tmp_sfep = sfep; + + /* + * WARNING: drop the index i by one + * so it matches the decremented count + * for accurate comparisons later + */ + i--; + + *dino_dirty = 1; + *repair = 1; + + do_warn("junking entry \"%s\" in directory " + "inode %llu\n", + name, ino); + } else { + do_warn("would have junked entry \"%s\" in " + "directory inode %llu\n", + name, ino); + } + } else if (lino > XFS_DIR2_MAX_SHORT_INUM) + i8++; + /* + * go onto next entry unless we've just junked an + * entry in which the current entry pointer points + * to an unprocessed entry. have to take into zero-len + * entries into account in no modify mode since we + * calculate size based on next_sfep. + */ + next_sfep = (tmp_sfep == NULL) + ? (xfs_dir2_sf_entry_t *) ((__psint_t) sfep + + ((!bad_sfnamelen) + ? XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp, + sfep) + : XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, + namelen))) + : tmp_sfep; + } + + /* sync up sizes and entry counts */ + + if (INT_GET(sfp->hdr.count, ARCH_CONVERT) != i) { + if (no_modify) { + do_warn("would have corrected entry count in directory " + "%llu from %d to %d\n", + ino, INT_GET(sfp->hdr.count, ARCH_CONVERT), i); + } else { + do_warn("corrected entry count in directory %llu, was " + "%d, now %d\n", + ino, INT_GET(sfp->hdr.count, ARCH_CONVERT), i); + INT_SET(sfp->hdr.count, ARCH_CONVERT, i); + *dino_dirty = 1; + *repair = 1; + } + } + + if (sfp->hdr.i8count != i8) { + if (no_modify) { + do_warn("would have corrected i8 count in directory " + "%llu from %d to %d\n", + ino, sfp->hdr.i8count, i8); + } else { + do_warn("corrected i8 count in directory %llu, was %d, " + "now %d\n", + ino, sfp->hdr.i8count, i8); + if (i8 == 0) + process_sf_dir2_fixi8(sfp, &next_sfep); + else + sfp->hdr.i8count = i8; + *dino_dirty = 1; + *repair = 1; + } + } + + if ((__psint_t) next_sfep - (__psint_t) sfp != ino_dir_size) { + if (no_modify) { + do_warn("would have corrected directory %llu size from " + "%lld to %lld\n", + ino, (__int64_t) ino_dir_size, + (__int64_t)((__psint_t)next_sfep - + (__psint_t)sfp)); + } else { + do_warn("corrected directory %llu size, was %lld, now " + "%lld\n", + ino, (__int64_t) ino_dir_size, + (__int64_t)((__psint_t)next_sfep - + (__psint_t)sfp)); + + INT_SET(dip->di_core.di_size, ARCH_CONVERT, (xfs_fsize_t)((__psint_t)next_sfep - + (__psint_t)sfp)); + *dino_dirty = 1; + *repair = 1; + } + } + if (offset + (INT_GET(sfp->hdr.count, ARCH_CONVERT) + 2) * sizeof(xfs_dir2_leaf_entry_t) + + sizeof(xfs_dir2_block_tail_t) > mp->m_dirblksize) { + do_warn("directory %llu offsets too high\n", ino); + bad_offset = 1; + } + if (bad_offset) { + if (no_modify) { + do_warn("would have corrected entry offsets in " + "directory %llu\n", + ino); + } else { + do_warn("corrected entry offsets in directory %llu\n", + ino); + process_sf_dir2_fixoff(dip); + *dino_dirty = 1; + *repair = 1; + } + } + + /* + * check parent (..) entry + */ + *parent = XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, &sfp->hdr.parent, ARCH_CONVERT); + + /* + * if parent entry is bogus, null it out. we'll fix it later . + */ + if (verify_inum(mp, *parent)) { + + do_warn("bogus .. inode number (%llu) in directory inode " + "%llu, ", + *parent, ino); + *parent = NULLFSINO; + if (!no_modify) { + do_warn("clearing inode number\n"); + + XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &zero, &sfp->hdr.parent, ARCH_CONVERT); + *dino_dirty = 1; + *repair = 1; + } else { + do_warn("would clear inode number\n"); + } + } else if (ino == mp->m_sb.sb_rootino && ino != *parent) { + /* + * root directories must have .. == . + */ + if (!no_modify) { + do_warn("corrected root directory %llu .. entry, was " + "%llu, now %llu\n", + ino, *parent, ino); + *parent = ino; + XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, parent, &sfp->hdr.parent, ARCH_CONVERT); + *dino_dirty = 1; + *repair = 1; + } else { + do_warn("would have corrected root directory %llu .. " + "entry from %llu to %llu\n", + ino, *parent, ino); + } + } else if (ino == *parent && ino != mp->m_sb.sb_rootino) { + /* + * likewise, non-root directories can't have .. pointing + * to . + */ + *parent = NULLFSINO; + do_warn("bad .. entry in directory inode %llu, points to " + "self,", + ino); + if (!no_modify) { + do_warn(" clearing inode number\n"); + + XFS_DIR2_SF_PUT_INUMBER_ARCH(sfp, &zero, &sfp->hdr.parent, ARCH_CONVERT); + *dino_dirty = 1; + *repair = 1; + } else { + do_warn(" would clear inode number\n"); + } + } + + return(0); +} + +/* + * Process one directory data block. + */ +/* ARGSUSED */ +static int +process_dir2_data( + xfs_mount_t *mp, + xfs_ino_t ino, + xfs_dinode_t *dip, + int ino_discovery, + char *dirname, /* directory pathname */ + xfs_ino_t *parent, /* out - NULLFSINO if entry not exist */ + xfs_dabuf_t *bp, + int *dot, /* out - 1 if there is a dot, else 0 */ + int *dotdot, /* out - 1 if there's a dotdot, else 0 */ + xfs_dablk_t da_bno, + char *endptr) +{ + int badbest; + xfs_dir2_data_free_t *bf; + int clearino; + char *clearreason = NULL; + xfs_dir2_data_t *d; + xfs_dir2_data_entry_t *dep; + xfs_dir2_data_free_t *dfp; + xfs_dir2_data_unused_t *dup; + int freeseen; + int i; + int ino_off; + ino_tree_node_t *irec_p; + int junkit; + int lastfree; + int nm_illegal; + char *ptr; + + d = bp->data; + bf = d->hdr.bestfree; + ptr = (char *)d->u; + badbest = lastfree = freeseen = 0; + if (INT_GET(bf[0].length, ARCH_CONVERT) == 0) { + badbest |= INT_GET(bf[0].offset, ARCH_CONVERT) != 0; + freeseen |= 1 << 0; + } + if (INT_GET(bf[1].length, ARCH_CONVERT) == 0) { + badbest |= INT_GET(bf[1].offset, ARCH_CONVERT) != 0; + freeseen |= 1 << 1; + } + if (INT_GET(bf[2].length, ARCH_CONVERT) == 0) { + badbest |= INT_GET(bf[2].offset, ARCH_CONVERT) != 0; + freeseen |= 1 << 2; + } + badbest |= INT_GET(bf[0].length, ARCH_CONVERT) < INT_GET(bf[1].length, ARCH_CONVERT); + badbest |= INT_GET(bf[1].length, ARCH_CONVERT) < INT_GET(bf[2].length, ARCH_CONVERT); + while (ptr < endptr) { + dup = (xfs_dir2_data_unused_t *)ptr; + /* + * If it's unused, look for the space in the bestfree table. + * If we find it, account for that, else make sure it doesn't + * need to be there. + */ + if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) { + if (ptr + INT_GET(dup->length, ARCH_CONVERT) > endptr || INT_GET(dup->length, ARCH_CONVERT) == 0 || + (INT_GET(dup->length, ARCH_CONVERT) & (XFS_DIR2_DATA_ALIGN - 1))) + break; + if (INT_GET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT), ARCH_CONVERT) != + (char *)dup - (char *)d) + break; + badbest |= lastfree != 0; + dfp = xfs_dir2_data_freefind(d, dup); + if (dfp) { + i = dfp - bf; + badbest |= (freeseen & (1 << i)) != 0; + freeseen |= 1 << i; + } else + badbest |= INT_GET(dup->length, ARCH_CONVERT) > INT_GET(bf[2].length, ARCH_CONVERT); + ptr += INT_GET(dup->length, ARCH_CONVERT); + lastfree = 1; + continue; + } + dep = (xfs_dir2_data_entry_t *)ptr; + if (ptr + XFS_DIR2_DATA_ENTSIZE(dep->namelen) > endptr) + break; + if (INT_GET(*XFS_DIR2_DATA_ENTRY_TAG_P(dep), ARCH_CONVERT) != (char *)dep - (char *)d) + break; + ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen); + lastfree = 0; + } + /* + * Dropped out before we processed everything, give up. + * Phase 6 will kill this block if we don't kill the inode. + */ + if (ptr != endptr) { + do_warn("corrupt block %u in directory inode %llu\n", + da_bno, ino); + if (!no_modify) + do_warn("\twill junk block\n"); + else + do_warn("\twould junk block\n"); + return 1; + } + ptr = (char *)d->u; + /* + * Process the entries now. + */ + while (ptr < endptr) { + dup = (xfs_dir2_data_unused_t *)ptr; + if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) { + ptr += INT_GET(dup->length, ARCH_CONVERT); + continue; + } + dep = (xfs_dir2_data_entry_t *)ptr; + /* + * We may have to blow out an entry because of bad inode + * numbers. Do NOT touch the name until after we've computed + * the hashvalue and done a namecheck() on the name. + */ + if (!ino_discovery && INT_GET(dep->inumber, ARCH_CONVERT) == BADFSINO) { + /* + * Don't do a damned thing. We already found this + * (or did it ourselves) during phase 3. + */ + clearino = 0; + } else if (verify_inum(mp, INT_GET(dep->inumber, ARCH_CONVERT))) { + /* + * Bad inode number. Clear the inode number and the + * entry will get removed later. We don't trash the + * directory since it's still structurally intact. + */ + clearino = 1; + clearreason = "invalid"; + } else if (INT_GET(dep->inumber, ARCH_CONVERT) == mp->m_sb.sb_rbmino) { + clearino = 1; + clearreason = "realtime bitmap"; + } else if (INT_GET(dep->inumber, ARCH_CONVERT) == mp->m_sb.sb_rsumino) { + clearino = 1; + clearreason = "realtime summary"; + } else if (INT_GET(dep->inumber, ARCH_CONVERT) == mp->m_sb.sb_uquotino) { + clearino = 1; + clearreason = "user quota"; + } else if (INT_GET(dep->inumber, ARCH_CONVERT) == mp->m_sb.sb_pquotino) { + clearino = 1; + clearreason = "project quota"; + } else if (INT_GET(dep->inumber, ARCH_CONVERT) == old_orphanage_ino) { + /* + * Do nothing, silently ignore it, entry has already + * been marked TBD since old_orphanage_ino is set + * non-zero. + */ + clearino = 0; + } else if ((irec_p = find_inode_rec( + XFS_INO_TO_AGNO(mp, INT_GET(dep->inumber, ARCH_CONVERT)), + XFS_INO_TO_AGINO(mp, INT_GET(dep->inumber, ARCH_CONVERT)))) != NULL) { + /* + * Inode recs should have only confirmed inodes in them. + */ + ino_off = + XFS_INO_TO_AGINO(mp, INT_GET(dep->inumber, ARCH_CONVERT)) - + irec_p->ino_startnum; + ASSERT(is_inode_confirmed(irec_p, ino_off)); + /* + * If inode is marked free and we're in inode discovery + * mode, leave the entry alone for now. If the inode + * turns out to be used, we'll figure that out when we + * scan it. If the inode really is free, we'll hit this + * code again in phase 4 after we've finished inode + * discovery and blow out the entry then. + */ + if (!ino_discovery && is_inode_free(irec_p, ino_off)) { + clearino = 1; + clearreason = "free"; + } else + clearino = 0; + } else if (ino_discovery) { + add_inode_uncertain(mp, INT_GET(dep->inumber, ARCH_CONVERT), 0); + clearino = 0; + } else { + clearino = 1; + clearreason = "non-existent"; + } + if (clearino) + do_warn("entry \"%*.*s\" at block %u offset %d in " + "directory inode %llu references %s inode " + "%llu\n", + dep->namelen, dep->namelen, dep->name, + da_bno, (char *)ptr - (char *)d, ino, + clearreason, INT_GET(dep->inumber, ARCH_CONVERT)); + /* + * If the name length is 0 (illegal) make it 1 and blast + * the entry. + */ + if (dep->namelen == 0) { + do_warn("entry at block %u offset %d in directory " + "inode %llu has 0 namelength\n", + da_bno, (char *)ptr - (char *)d, ino); + if (!no_modify) + dep->namelen = 1; + clearino = 1; + } + /* + * If needed to clear the inode number, do it now. + */ + if (clearino) { + if (!no_modify) { + do_warn("\tclearing inode number in entry at " + "offset %d...\n", + (char *)ptr - (char *)d); + INT_SET(dep->inumber, ARCH_CONVERT, BADFSINO); + bp->dirty = 1; + } else { + do_warn("\twould clear inode number in entry " + "at offset %d...\n", + (char *)ptr - (char *)d); + } + } + /* + * Only complain about illegal names in phase 3 (when inode + * discovery is turned on). Otherwise, we'd complain a lot + * during phase 4. + */ + junkit = INT_GET(dep->inumber, ARCH_CONVERT) == BADFSINO; + nm_illegal = namecheck((char *)dep->name, dep->namelen); + if (ino_discovery && nm_illegal) { + do_warn("entry at block %u offset %d in directory " + "inode %llu has illegal name \"%*.*s\": ", + da_bno, (char *)ptr - (char *)d, ino, + dep->namelen, dep->namelen, dep->name); + junkit = 1; + } + /* + * Now we can mark entries with BADFSINO's bad. + */ + if (!no_modify && INT_GET(dep->inumber, ARCH_CONVERT) == BADFSINO) { + dep->name[0] = '/'; + bp->dirty = 1; + junkit = 0; + } + /* + * Special .. entry processing. + */ + if (dep->namelen == 2 && + dep->name[0] == '.' && dep->name[1] == '.') { + if (!*dotdot) { + (*dotdot)++; + *parent = INT_GET(dep->inumber, ARCH_CONVERT); + /* + * What if .. == .? Legal only in the root + * inode. Blow out entry and set parent to + * NULLFSINO otherwise. + */ + if (ino == INT_GET(dep->inumber, ARCH_CONVERT) && + ino != mp->m_sb.sb_rootino) { + *parent = NULLFSINO; + do_warn("bad .. entry in directory " + "inode %llu, points to self: ", + ino); + junkit = 1; + } + /* + * We have to make sure that . == .. in the + * root inode. + */ + else if (ino != INT_GET(dep->inumber, ARCH_CONVERT) && + ino == mp->m_sb.sb_rootino) { + do_warn("bad .. entry in root " + "directory inode %llu, was " + "%llu: ", + ino, INT_GET(dep->inumber, ARCH_CONVERT)); + if (!no_modify) { + do_warn("correcting\n"); + INT_SET(dep->inumber, ARCH_CONVERT, ino); + bp->dirty = 1; + } else { + do_warn("would correct\n"); + } + } + } + /* + * Can't fix the directory unless we know which .. + * entry is the right one. Both have valid inode + * numbers or we wouldn't be here. So since both + * seem equally valid, trash this one. + */ + else { + do_warn("multiple .. entries in directory " + "inode %llu: ", + ino); + junkit = 1; + } + } + /* + * Special . entry processing. + */ + else if (dep->namelen == 1 && dep->name[0] == '.') { + if (!*dot) { + (*dot)++; + if (INT_GET(dep->inumber, ARCH_CONVERT) != ino) { + do_warn("bad . entry in directory " + "inode %llu, was %llu: ", + ino, INT_GET(dep->inumber, ARCH_CONVERT)); + if (!no_modify) { + do_warn("correcting\n"); + INT_SET(dep->inumber, ARCH_CONVERT, ino); + bp->dirty = 1; + } else { + do_warn("would correct\n"); + } + } + } else { + do_warn("multiple . entries in directory " + "inode %llu: ", + ino); + junkit = 1; + } + } + /* + * All other entries -- make sure only . references self. + */ + else if (INT_GET(dep->inumber, ARCH_CONVERT) == ino) { + do_warn("entry \"%*.*s\" in directory inode %llu " + "points to self: ", + dep->namelen, dep->namelen, dep->name, ino); + junkit = 1; + } + /* + * Clear junked entries. + */ + if (junkit) { + if (!no_modify) { + dep->name[0] = '/'; + bp->dirty = 1; + do_warn("clearing entry\n"); + } else { + do_warn("would clear entry\n"); + } + } + /* + * Advance to the next entry. + */ + ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen); + } + /* + * Check the bestfree table. + */ + if (freeseen != 7 || badbest) { + do_warn("bad bestfree table in block %u in directory inode " + "%llu: ", + da_bno, ino); + if (!no_modify) { + do_warn("repairing table\n"); + libxfs_dir2_data_freescan(mp, d, &i, endptr); + bp->dirty = 1; + } else { + do_warn("would repair table\n"); + } + } + return 0; +} + +/* + * Process a block-format directory. + */ +/* ARGSUSED */ +static int +process_block_dir2( + xfs_mount_t *mp, + xfs_ino_t ino, + xfs_dinode_t *dip, + int ino_discovery, + int *dino_dirty, /* out - 1 if dinode buffer dirty */ + char *dirname, /* directory pathname */ + xfs_ino_t *parent, /* out - NULLFSINO if entry not exist */ + blkmap_t *blkmap, + int *dot, /* out - 1 if there is a dot, else 0 */ + int *dotdot, /* out - 1 if there's a dotdot, else 0 */ + int *repair) /* out - 1 if something was fixed */ +{ + xfs_dir2_block_t *block; + xfs_dir2_leaf_entry_t *blp; + bmap_ext_t *bmp; + xfs_dabuf_t *bp; + xfs_dir2_block_tail_t *btp; + int nex; + int rval; + + *repair = *dot = *dotdot = 0; + *parent = NULLFSINO; + nex = blkmap_getn(blkmap, mp->m_dirdatablk, mp->m_dirblkfsbs, &bmp); + if (nex == 0) { + do_warn("block %u for directory inode %llu is missing\n", + mp->m_dirdatablk, ino); + return 1; + } + bp = da_read_buf(mp, nex, bmp); + free(bmp); + if (bp == NULL) { + do_warn("can't read block %u for directory inode %llu\n", + mp->m_dirdatablk, ino); + return 1; + } + /* + * Verify the block + */ + block = bp->data; + if (INT_GET(block->hdr.magic, ARCH_CONVERT) != XFS_DIR2_BLOCK_MAGIC) + do_warn("bad directory block magic # %#x in block %u for " + "directory inode %llu\n", + INT_GET(block->hdr.magic, ARCH_CONVERT), mp->m_dirdatablk, ino); + /* + * process the data area + * this also checks & fixes the bestfree + */ + btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); + blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT); + /* + * Don't let this go past the end of the block. + */ + if ((char *)blp > (char *)btp) + blp = (xfs_dir2_leaf_entry_t *)btp; + rval = process_dir2_data(mp, ino, dip, ino_discovery, dirname, parent, + bp, dot, dotdot, mp->m_dirdatablk, (char *)blp); + if (bp->dirty && !no_modify) { + *repair = 1; + da_bwrite(mp, bp); + } else + da_brelse(bp); + return rval; +} + +/* + * Validates leaf contents, node format directories only. + * magic number and sibling pointers checked by caller. + * Returns 0 if block is ok, 1 if the block is bad. + * Looking for: out of order hash values, bad stale counts. + */ +static int +process_leaf_block_dir2( + xfs_mount_t *mp, + xfs_dir2_leaf_t *leaf, + xfs_dablk_t da_bno, + xfs_ino_t ino, + xfs_dahash_t last_hashval, + xfs_dahash_t *next_hashval) +{ + int i; + int stale; + + for (i = stale = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT); i++) { + if ((char *)&leaf->ents[i] >= (char *)leaf + mp->m_dirblksize) { + do_warn("bad entry count in block %u of directory " + "inode %llu\n", + da_bno, ino); + return 1; + } + if (INT_GET(leaf->ents[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR) + stale++; + else if (INT_GET(leaf->ents[i].hashval, ARCH_CONVERT) < last_hashval) { + do_warn("bad hash ordering in block %u of directory " + "inode %llu\n", + da_bno, ino); + return 1; + } + *next_hashval = last_hashval = INT_GET(leaf->ents[i].hashval, ARCH_CONVERT); + } + if (stale != INT_GET(leaf->hdr.stale, ARCH_CONVERT)) { + do_warn("bad stale count in block %u of directory inode %llu\n", + da_bno, ino); + return 1; + } + return 0; +} + +/* + * Returns 0 if the directory is ok, 1 if it has to be rebuilt. + */ +static int +process_leaf_level_dir2( + xfs_mount_t *mp, + dir2_bt_cursor_t *da_cursor, + int *repair) +{ + bmap_ext_t *bmp; + xfs_dabuf_t *bp; + int buf_dirty; + xfs_dahash_t current_hashval; + xfs_dablk_t da_bno; + xfs_dahash_t greatest_hashval; + xfs_ino_t ino; + xfs_dir2_leaf_t *leaf; + int nex; + xfs_dablk_t prev_bno; + + da_bno = da_cursor->level[0].bno; + ino = da_cursor->ino; + prev_bno = 0; + bmp = NULL; + current_hashval = 0; + buf_dirty = 0; + + do { + nex = blkmap_getn(da_cursor->blkmap, da_bno, mp->m_dirblkfsbs, + &bmp); + /* + * Directory code uses 0 as the NULL block pointer since 0 + * is the root block and no directory block pointer can point + * to the root block of the btree. + */ + ASSERT(da_bno != 0); + + if (nex == 0) { + do_warn("can't map block %u for directory inode %llu\n", + da_bno, ino); + goto error_out; + } + bp = da_read_buf(mp, nex, bmp); + free(bmp); + bmp = NULL; + if (bp == NULL) { + do_warn("can't read file block %u for directory inode " + "%llu\n", + da_bno, ino); + goto error_out; + } + leaf = bp->data; + /* + * Check magic number for leaf directory btree block. + */ + if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR2_LEAFN_MAGIC) { + do_warn("bad directory leaf magic # %#x for directory " + "inode %llu block %u\n", + INT_GET(leaf->hdr.info.magic, ARCH_CONVERT), ino, da_bno); + da_brelse(bp); + goto error_out; + } + buf_dirty = 0; + /* + * For each block, process the block, verify its path, + * then get next block. Update cursor values along the way. + */ + if (process_leaf_block_dir2(mp, leaf, da_bno, ino, + current_hashval, &greatest_hashval)) { + da_brelse(bp); + goto error_out; + } + /* + * Index can be set to hdr.count so match the indices of the + * interior blocks -- which at the end of the block will point + * to 1 after the final real entry in the block. + */ + da_cursor->level[0].hashval = greatest_hashval; + da_cursor->level[0].bp = bp; + da_cursor->level[0].bno = da_bno; + da_cursor->level[0].index = INT_GET(leaf->hdr.count, ARCH_CONVERT); + da_cursor->level[0].dirty = buf_dirty; + + if (INT_GET(leaf->hdr.info.back, ARCH_CONVERT) != prev_bno) { + do_warn("bad sibling back pointer for block %u in " + "directory inode %llu\n", + da_bno, ino); + da_brelse(bp); + goto error_out; + } + prev_bno = da_bno; + da_bno = INT_GET(leaf->hdr.info.forw, ARCH_CONVERT); + if (da_bno != 0) { + if (verify_dir2_path(mp, da_cursor, 0)) { + da_brelse(bp); + goto error_out; + } + } + current_hashval = greatest_hashval; + ASSERT(buf_dirty == 0 || buf_dirty && !no_modify); + if (buf_dirty && !no_modify) { + *repair = 1; + da_bwrite(mp, bp); + } else + da_brelse(bp); + } while (da_bno != 0); + if (verify_final_dir2_path(mp, da_cursor, 0)) { + /* + * Verify the final path up (right-hand-side) if still ok. + */ + do_warn("bad hash path in directory %llu\n", ino); + goto error_out; + } + /* + * Redundant but just for testing. + */ + release_dir2_cursor(mp, da_cursor, 0); + return 0; + +error_out: + /* + * Release all buffers holding interior btree blocks. + */ + err_release_dir2_cursor(mp, da_cursor, 0); + if (bmp) + free(bmp); + return 1; +} + +/* + * Return 1 if the directory's leaf/node space is corrupted and + * needs to be rebuilt, 0 if it's ok. + */ +static int +process_node_dir2( + xfs_mount_t *mp, + xfs_ino_t ino, + xfs_dinode_t *dip, + blkmap_t *blkmap, + int *repair) +{ + xfs_dablk_t bno; + dir2_bt_cursor_t da_cursor; + + /* + * Try again -- traverse down left-side of tree until we hit the + * left-most leaf block setting up the btree cursor along the way. + * Then walk the leaf blocks left-to-right, calling a parent + * verification routine each time we traverse a block. + */ + bzero(&da_cursor, sizeof(da_cursor)); + da_cursor.ino = ino; + da_cursor.dip = dip; + da_cursor.blkmap = blkmap; + + /* + * Now process interior node. + */ + if (traverse_int_dir2block(mp, &da_cursor, &bno) == 0) + return 1; + + /* + * Skip directories with a root marked XFS_DIR2_LEAFN_MAGIC + */ + if (bno == 0) { + release_dir2_cursor(mp, &da_cursor, 0); + return 0; + } else { + /* + * Now pass cursor and bno into leaf-block processing routine. + * The leaf dir level routine checks the interior paths up to + * the root including the final right-most path. + */ + return process_leaf_level_dir2(mp, &da_cursor, repair); + } +} + +/* + * Process leaf and node directories. + * Process the data blocks then, if it's a node directory, check + * the consistency of those blocks. + */ +static int +process_leaf_node_dir2( + xfs_mount_t *mp, + xfs_ino_t ino, + xfs_dinode_t *dip, + int ino_discovery, + char *dirname, /* directory pathname */ + xfs_ino_t *parent, /* out - NULLFSINO if entry not exist */ + blkmap_t *blkmap, + int *dot, /* out - 1 if there is a dot, else 0 */ + int *dotdot, /* out - 1 if there's a dotdot, else 0 */ + int *repair, /* out - 1 if something was fixed */ + int isnode) /* node directory not leaf */ +{ + bmap_ext_t *bmp; + xfs_dabuf_t *bp; + xfs_dir2_data_t *data; + xfs_dfiloff_t dbno; + int good; + int i; + xfs_dfiloff_t ndbno; + int nex; + int t; + + *repair = *dot = *dotdot = good = 0; + *parent = NULLFSINO; + ndbno = NULLDFILOFF; + while ((dbno = blkmap_next_off(blkmap, ndbno, &t)) < mp->m_dirleafblk) { + nex = blkmap_getn(blkmap, dbno, mp->m_dirblkfsbs, &bmp); + ndbno = dbno + mp->m_dirblkfsbs - 1; + if (nex == 0) { + do_warn("block %llu for directory inode %llu is " + "missing\n", + dbno, ino); + continue; + } + bp = da_read_buf(mp, nex, bmp); + free(bmp); + if (bp == NULL) { + do_warn("can't read block %llu for directory inode " + "%llu\n", + dbno, ino); + continue; + } + data = bp->data; + if (INT_GET(data->hdr.magic, ARCH_CONVERT) != XFS_DIR2_DATA_MAGIC) + do_warn("bad directory block magic # %#x in block %llu " + "for directory inode %llu\n", + INT_GET(data->hdr.magic, ARCH_CONVERT), dbno, ino); + i = process_dir2_data(mp, ino, dip, ino_discovery, dirname, + parent, bp, dot, dotdot, (xfs_dablk_t)dbno, + (char *)data + mp->m_dirblksize); + if (i == 0) + good++; + if (bp->dirty && !no_modify) { + *repair = 1; + da_bwrite(mp, bp); + } else + da_brelse(bp); + } + if (good == 0) + return 1; + if (!isnode) + return 0; + if (dir2_is_badino(ino)) + return 0; + + if (process_node_dir2(mp, ino, dip, blkmap, repair)) + dir2_add_badlist(ino); + return 0; + +} + +/* + * Returns 1 if things are bad (directory needs to be junked) + * and 0 if things are ok. If ino_discovery is 1, add unknown + * inodes to uncertain inode list. + */ +int +process_dir2( + xfs_mount_t *mp, + xfs_ino_t ino, + xfs_dinode_t *dip, + int ino_discovery, + int *dino_dirty, + char *dirname, + xfs_ino_t *parent, + blkmap_t *blkmap) +{ + int dot; + int dotdot; + xfs_dfiloff_t last; + int repair; + int res; + + *parent = NULLFSINO; + dot = dotdot = 0; + last = 0; + + /* + * branch off depending on the type of inode. This routine + * is only called ONCE so all the subordinate routines will + * fix '.' and junk '..' if they're bogus. + */ + if (blkmap) + last = blkmap_last_off(blkmap); + if (INT_GET(dip->di_core.di_size, ARCH_CONVERT) <= XFS_DFORK_DSIZE_ARCH(dip, mp, ARCH_CONVERT) && + dip->di_core.di_format == XFS_DINODE_FMT_LOCAL) { + dot = dotdot = 1; + res = process_sf_dir2(mp, ino, dip, ino_discovery, dino_dirty, + dirname, parent, &repair); + } else if (last == mp->m_dirblkfsbs && + (dip->di_core.di_format == XFS_DINODE_FMT_EXTENTS || + dip->di_core.di_format == XFS_DINODE_FMT_BTREE)) { + res = process_block_dir2(mp, ino, dip, ino_discovery, + dino_dirty, dirname, parent, blkmap, &dot, &dotdot, + &repair); + } else if (last >= mp->m_dirleafblk + mp->m_dirblkfsbs && + (dip->di_core.di_format == XFS_DINODE_FMT_EXTENTS || + dip->di_core.di_format == XFS_DINODE_FMT_BTREE)) { + res = process_leaf_node_dir2(mp, ino, dip, ino_discovery, + dirname, parent, blkmap, &dot, &dotdot, &repair, + last > mp->m_dirleafblk + mp->m_dirblkfsbs); + } else { + do_warn("bad size/format for directory %llu\n", ino); + return 1; + } + /* + * bad . entries in all directories will be fixed up in phase 6 + */ + if (dot == 0) { + do_warn("no . entry for directory %llu\n", ino); + } + + /* + * shortform dirs always have a .. entry. .. for all longform + * directories will get fixed in phase 6. .. for other shortform + * dirs also get fixed there. .. for a shortform root was + * fixed in place since we know what it should be + */ + if (dotdot == 0 && ino != mp->m_sb.sb_rootino) { + do_warn("no .. entry for directory %llu\n", ino); + } else if (dotdot == 0 && ino == mp->m_sb.sb_rootino) { + do_warn("no .. entry for root directory %llu\n", ino); + need_root_dotdot = 1; + } + + ASSERT(ino != mp->m_sb.sb_rootino && ino != *parent || + ino == mp->m_sb.sb_rootino && + (ino == *parent || need_root_dotdot == 1)); + + return res; +} diff --git a/repair/dir2.h b/repair/dir2.h new file mode 100644 index 000000000..9583447fe --- /dev/null +++ b/repair/dir2.h @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#ifndef _XR_DIR2_H +#define _XR_DIR2_H + +struct blkmap; +struct bmap_ext; + +/* + * the cursor gets passed up and down the da btree processing + * routines. The interior block processing routines use the + * cursor to determine if the pointers to and from the preceding + * and succeeding sibling blocks are ok and whether the values in + * the current block are consistent with the entries in the parent + * nodes. When a block is traversed, a parent-verification routine + * is called to verify if the next logical entry in the next level up + * is consistent with the greatest hashval in the next block of the + * current level. The verification routine is itself recursive and + * calls itself if it has to traverse an interior block to get + * the next logical entry. The routine recurses upwards through + * the tree until it finds a block where it can simply step to + * the next entry. The hashval in that entry should be equal to + * the hashval being passed to it (the greatest hashval in the block + * that the entry points to). If that isn't true, then the tree + * is blown and we need to trash it, salvage and trash it, or fix it. + * Currently, we just trash it. + */ +typedef struct dir2_level_state { + xfs_dabuf_t *bp; /* block bp */ + xfs_dablk_t bno; /* file block number */ + xfs_dahash_t hashval; /* last verified hashval */ + int index; /* current index in block */ + int dirty; /* is buffer dirty ? (1 == yes) */ +} dir2_level_state_t; + +typedef struct dir2_bt_cursor { + int active; /* highest level in tree (# levels-1) */ + int type; /* 0 if dir, 1 if attr */ + xfs_ino_t ino; + xfs_dablk_t greatest_bno; + xfs_dinode_t *dip; + dir2_level_state_t level[XFS_DA_NODE_MAXDEPTH]; + struct blkmap *blkmap; +} dir2_bt_cursor_t; + + +/* ROUTINES */ + +void +err_release_dir2_cursor( + xfs_mount_t *mp, + dir2_bt_cursor_t *cursor, + int prev_level); + +xfs_dabuf_t * +da_read_buf( + xfs_mount_t *mp, + int nex, + struct bmap_ext *bmp); + +int +da_bwrite( + xfs_mount_t *mp, + xfs_dabuf_t *bp); + +void +da_brelse( + xfs_dabuf_t *bp); + +int +process_dir2( + xfs_mount_t *mp, + xfs_ino_t ino, + xfs_dinode_t *dip, + int ino_discovery, + int *dirty, + char *dirname, + xfs_ino_t *parent, + struct blkmap *blkmap); + +void +process_sf_dir2_fixi8( + xfs_dir2_sf_t *sfp, + xfs_dir2_sf_entry_t **next_sfep); + +void +dir2_add_badlist( + xfs_ino_t ino); + +int +dir2_is_badino( + xfs_ino_t ino); + +#endif /* _XR_DIR2_H */ diff --git a/repair/dir_stack.c b/repair/dir_stack.c new file mode 100644 index 000000000..1d0aae4af --- /dev/null +++ b/repair/dir_stack.c @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "dir_stack.h" +#include "err_protos.h" + +/* + * a directory stack for holding directories while + * we traverse filesystem hierarchy subtrees. + * names are kind of misleading as this is really + * implemented as an inode stack. so sue me... + */ + +static dir_stack_t dirstack_freelist; +static int dirstack_init = 0; + +void +dir_stack_init(dir_stack_t *stack) +{ + stack->cnt = 0; + stack->head = NULL; + + if (dirstack_init == 0) { + dirstack_init = 1; + dir_stack_init(&dirstack_freelist); + } + + stack->cnt = 0; + stack->head = NULL; + + return; +} + +static void +dir_stack_push(dir_stack_t *stack, dir_stack_elem_t *elem) +{ + ASSERT(stack->cnt > 0 || stack->cnt == 0 && stack->head == NULL); + + elem->next = stack->head; + stack->head = elem; + stack->cnt++; + + return; +} + +static dir_stack_elem_t * +dir_stack_pop(dir_stack_t *stack) +{ + dir_stack_elem_t *elem; + + if (stack->cnt == 0) { + ASSERT(stack->head == NULL); + return(NULL); + } + + elem = stack->head; + + ASSERT(elem != NULL); + + stack->head = elem->next; + elem->next = NULL; + stack->cnt--; + + return(elem); +} + +void +push_dir(dir_stack_t *stack, xfs_ino_t ino) +{ + dir_stack_elem_t *elem; + + if (dirstack_freelist.cnt == 0) { + if ((elem = malloc(sizeof(dir_stack_elem_t))) == NULL) { + do_error( + "couldn't malloc dir stack element, try more swap\n"); + exit(1); + } + } else { + elem = dir_stack_pop(&dirstack_freelist); + } + + elem->ino = ino; + + dir_stack_push(stack, elem); + + return; +} + +xfs_ino_t +pop_dir(dir_stack_t *stack) +{ + dir_stack_elem_t *elem; + xfs_ino_t ino; + + elem = dir_stack_pop(stack); + + if (elem == NULL) + return(NULLFSINO); + + ino = elem->ino; + elem->ino = NULLFSINO; + + dir_stack_push(&dirstack_freelist, elem); + + return(ino); +} diff --git a/repair/dir_stack.h b/repair/dir_stack.h new file mode 100644 index 000000000..9a8305be8 --- /dev/null +++ b/repair/dir_stack.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +typedef struct dir_stack_elem { + xfs_ino_t ino; + struct dir_stack_elem *next; +} dir_stack_elem_t; + +typedef struct dir_stack { + int cnt; + dir_stack_elem_t *head; +} dir_stack_t; + + +void dir_stack_init(dir_stack_t *stack); + +void push_dir(dir_stack_t *stack, xfs_ino_t ino); +xfs_ino_t pop_dir(dir_stack_t *stack); diff --git a/repair/err_protos.h b/repair/err_protos.h new file mode 100644 index 000000000..7d5aa5c52 --- /dev/null +++ b/repair/err_protos.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +void do_abort(char const *, ...); /* abort, internal error */ +void do_error(char const *, ...); /* abort, system error */ +void do_warn(char const *, ...); /* issue warning */ +void do_log(char const *, ...); /* issue log message */ diff --git a/repair/globals.c b/repair/globals.c new file mode 100644 index 000000000..206d08410 --- /dev/null +++ b/repair/globals.c @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include + +#define EXTERN +#include "globals.h" + diff --git a/repair/globals.h b/repair/globals.h new file mode 100644 index 000000000..5c33d5f6f --- /dev/null +++ b/repair/globals.h @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#ifndef _XFS_REPAIR_GLOBAL_H +#define _XFS_REPAIR_GLOBAL_H + +#ifndef EXTERN +#define EXTERN extern +#endif + +/* useful macros */ + +#define rounddown(x, y) (((x)/(y))*(y)) + +/* error flags */ + +#define XR_OK 0 /* good */ +#define XR_BAD_MAGIC 1 /* bad magic number */ +#define XR_BAD_BLOCKSIZE 2 /* bad block size */ +#define XR_BAD_BLOCKLOG 3 /* bad sb_blocklog field */ +#define XR_BAD_VERSION 4 /* bad version number */ +#define XR_BAD_INPROGRESS 5 /* in progress set */ +#define XR_BAD_FS_SIZE_DATA 6 /* ag sizes, number, fs size mismatch */ +#define XR_BAD_INO_SIZE_DATA 7 /* bad inode size or perblock fields */ +#define XR_BAD_SECT_SIZE_DATA 8 /* bad sector size info */ +#define XR_AGF_GEO_MISMATCH 9 /* agf info conflicts with sb */ +#define XR_AGI_GEO_MISMATCH 10 /* agf info conflicts with sb */ +#define XR_SB_GEO_MISMATCH 11 /* sb geo conflicts with fs sb geo */ +#define XR_EOF 12 /* seeked beyond EOF */ +#define XR_BAD_RT_GEO_DATA 13 /* realtime geometry inconsistent */ +#define XR_BAD_INO_MAX_PCT 14 /* max % of inodes > 100% */ +#define XR_BAD_INO_ALIGN 15 /* bad inode alignment value */ +#define XR_INSUFF_SEC_SB 16 /* not enough matching secondary sbs */ +#define XR_BAD_SB_UNIT 17 /* bad stripe unit */ +#define XR_BAD_SB_WIDTH 18 /* bad stripe width */ +#define XR_BAD_SVN 19 /* bad shared version number */ +#define XR_BAD_ERR_CODE 20 /* Bad error code */ + +/* XFS filesystem (il)legal values */ + +#define XR_LOG2BSIZE_MIN 9 /* min/max fs blocksize (log2) */ +#define XR_LOG2BSIZE_MAX 16 /* 2^XR_* == blocksize */ + +#define NUM_SBS 8 /* max # of sbs to verify */ +#define NUM_AGH_SECTS 4 /* # of components in an ag header */ + +#define MEM_ALIGN 128 /* I/O buf alignment - a cache line */ + +/* + * secondary sb mask -- if the secondary sb feature bits has a + * the partial sb mask bit set, then you depend on the fields + * in it up to and including sb_inoalignmt but the unused part of the + * sector may have trash in it. If the sb has any bits set that are in + * the good mask, then the entire sb and sector are good (was bzero'ed + * by mkfs). The third mask is for filesystems made by pre-6.5 campus + * alpha mkfs's. Those are rare so we'll check for those under + * a special option. + */ +#define XR_PART_SECSB_VNMASK 0x0F80 /* >= XFS_SB_VERSION_ALIGNBIT */ +#define XR_GOOD_SECSB_VNMASK 0x0F00 /* >= XFS_SB_VERSION_DALIGNBIT */ +#define XR_ALPHA_SECSB_VNMASK 0x0180 /* DALIGN|ALIGN bits */ + +/* global variables for xfs_repair */ + +/* arguments and argument flag variables */ + +EXTERN char *fs_name; /* name of filesystem */ +EXTERN int verbose; /* verbose flag, mostly for debugging */ + + +/* for reading stuff in manually (bypassing libsim) */ + +EXTERN char *iobuf; /* large buffer */ +EXTERN int iobuf_size; +EXTERN char *smallbuf; /* small (1-4 page) buffer */ +EXTERN int smallbuf_size; +EXTERN char *sb_bufs[NUM_SBS]; /* superblock buffers */ +EXTERN int sbbuf_size; + +/* direct I/O info */ + +EXTERN int minio_align; /* min I/O size and alignment */ +EXTERN int mem_align; /* memory alignment */ +EXTERN int max_iosize; /* max I/O size */ + +/* file descriptors */ + +EXTERN int fs_fd; /* filesystem fd */ + +/* command-line flags */ + +EXTERN int verbose; +EXTERN int no_modify; +EXTERN int isa_file; +EXTERN int dumpcore; /* abort, not exit on fatal errs */ +EXTERN int delete_attr_ok; /* can clear attrs w/o clearing files */ +EXTERN int force_geo; /* can set geo on low confidence info */ +EXTERN int assume_xfs; /* assume we have an xfs fs */ +EXTERN int pre_65_beta; /* fs was mkfs'ed by a version earlier * than 6.5-beta */ +EXTERN char *log_name; /* Name of log device */ +EXTERN int log_spec; /* Log dev specified as option */ + +/* misc status variables */ + +EXTERN int primary_sb_modified; +EXTERN int bad_ino_btree; +EXTERN int clear_sunit; +EXTERN int fs_is_dirty; + +/* for hunting down the root inode */ + +EXTERN int need_root_inode; +EXTERN int need_root_dotdot; + +EXTERN int need_rbmino; +EXTERN int need_rsumino; + +EXTERN int lost_quotas; +EXTERN int have_uquotino; +EXTERN int have_pquotino; +EXTERN int lost_uquotino; +EXTERN int lost_pquotino; + +EXTERN xfs_agino_t first_prealloc_ino; +EXTERN xfs_agino_t last_prealloc_ino; +EXTERN xfs_agblock_t bnobt_root; +EXTERN xfs_agblock_t bcntbt_root; +EXTERN xfs_agblock_t inobt_root; + +/* configuration vars -- fs geometry dependent */ + +EXTERN int inodes_per_block; +EXTERN int inodes_per_cluster; /* inodes per inode buffer */ +EXTERN unsigned int glob_agcount; +EXTERN int chunks_pblock; /* # of 64-ino chunks per allocation */ +EXTERN int max_symlink_blocks; +EXTERN __int64_t fs_max_file_offset; + +/* block allocation bitmaps */ + +EXTERN __uint64_t **ba_bmap; /* see incore.h */ +EXTERN __uint64_t *rt_ba_bmap; /* see incore.h */ + +/* realtime info */ + +EXTERN xfs_rtword_t *btmcompute; +EXTERN xfs_suminfo_t *sumcompute; + +/* inode tree records have full or partial backptr fields ? */ + +EXTERN int full_backptrs; /* + * if 1, use backptrs_t component + * of ino_un union, if 0, use + * parent_list_t component. see + * incore.h for more details + */ + +#define ORPHANAGE "lost+found" + +/* superblock counters */ + +EXTERN __uint64_t sb_icount; /* allocated (made) inodes */ +EXTERN __uint64_t sb_ifree; /* free inodes */ +EXTERN __uint64_t sb_fdblocks; /* free data blocks */ +EXTERN __uint64_t sb_frextents; /* free realtime extents */ + +EXTERN xfs_ino_t orphanage_ino; +EXTERN xfs_ino_t old_orphanage_ino; + +/* superblock geometry info */ + +EXTERN xfs_extlen_t sb_inoalignmt; +EXTERN __uint32_t sb_unit; +EXTERN __uint32_t sb_width; + +#endif /* _XFS_REPAIR_GLOBAL_H */ diff --git a/repair/incore.c b/repair/incore.c new file mode 100644 index 000000000..499854188 --- /dev/null +++ b/repair/incore.c @@ -0,0 +1,308 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include +#include "avl.h" +#include "globals.h" +#include "incore.h" +#include "agheader.h" +#include "protos.h" +#include "err_protos.h" + +/* + * push a block allocation record onto list. assumes list + * if set to NULL if empty. + */ +void +record_allocation(ba_rec_t *addr, ba_rec_t *list) +{ + addr->next = list; + list = addr; + + return; +} + +void +free_allocations(ba_rec_t *list) +{ + ba_rec_t *current = list; + + while (list != NULL) { + list = list->next; + free(current); + current = list; + } + + return; +} + +/* ba bmap setupstuff. setting/getting state is in incore.h */ + +void +setup_bmap(xfs_agnumber_t agno, xfs_agblock_t numblocks, xfs_drtbno_t rtblocks) +{ + int i; + xfs_drfsbno_t size; + + ba_bmap = (__uint64_t**)malloc(agno*sizeof(__uint64_t *)); + if (!ba_bmap) { + do_error("couldn't allocate block map pointers\n"); + return; + } + for (i = 0; i < agno; i++) { + int size; + + size = roundup(numblocks * (NBBY/XR_BB),sizeof(__uint64_t)); + + ba_bmap[i] = (__uint64_t*)memalign(sizeof(__uint64_t), size); + if (!ba_bmap[i]) { + do_error("couldn't allocate block map, size = %d\n", + numblocks); + return; + } + bzero(ba_bmap[i], size); + } + + if (rtblocks == 0) { + rt_ba_bmap = NULL; + return; + } + + size = roundup(rtblocks * (NBBY/XR_BB), sizeof(__uint64_t)); + + rt_ba_bmap=(__uint64_t*)memalign(sizeof(__uint64_t), size); + if (!rt_ba_bmap) { + do_error( + "couldn't allocate real-time block map, size = %llu\n", + rtblocks); + return; + } + + /* + * start all real-time as free blocks + */ + set_bmap_rt(rtblocks); + + return; +} + +/* ARGSUSED */ +void +teardown_rt_bmap(xfs_mount_t *mp) +{ + if (rt_ba_bmap != NULL) { + free(rt_ba_bmap); + rt_ba_bmap = NULL; + } + + return; +} + +/* ARGSUSED */ +void +teardown_ag_bmap(xfs_mount_t *mp, xfs_agnumber_t agno) +{ + ASSERT(ba_bmap[agno] != NULL); + + free(ba_bmap[agno]); + ba_bmap[agno] = NULL; + + return; +} + +/* ARGSUSED */ +void +teardown_bmap_finish(xfs_mount_t *mp) +{ + free(ba_bmap); + ba_bmap = NULL; + + return; +} + +void +teardown_bmap(xfs_mount_t *mp) +{ + xfs_agnumber_t i; + + for (i = 0; i < mp->m_sb.sb_agcount; i++) { + teardown_ag_bmap(mp, i); + } + + teardown_rt_bmap(mp); + teardown_bmap_finish(mp); + + return; +} + +/* + * block map initialization routines -- realtime, log, fs + */ +void +set_bmap_rt(xfs_drtbno_t num) +{ + xfs_drtbno_t j; + xfs_drtbno_t size; + + /* + * for now, initialize all realtime blocks to be free + * (state == XR_E_FREE) + */ + size = howmany(num * (NBBY/XR_BB), sizeof(__uint64_t)); + + for (j = 0; j < size; j++) + rt_ba_bmap[j] = 0x2222222222222222LL; + + return; +} + +void +set_bmap_log(xfs_mount_t *mp) +{ + xfs_dfsbno_t logend, i; + + if (mp->m_sb.sb_logstart == 0) + return; + + logend = mp->m_sb.sb_logstart + mp->m_sb.sb_logblocks; + + for (i = mp->m_sb.sb_logstart; i < logend ; i++) { + set_fsbno_state(mp, i, XR_E_INUSE_FS); + } + + return; +} + +void +set_bmap_fs(xfs_mount_t *mp) +{ + xfs_agnumber_t i; + xfs_agblock_t j; + xfs_agblock_t end; + + /* + * AG header is 4 sectors + */ + end = howmany(4 * mp->m_sb.sb_sectsize, mp->m_sb.sb_blocksize); + + for (i = 0; i < mp->m_sb.sb_agcount; i++) + for (j = 0; j < end; j++) + set_agbno_state(mp, i, j, XR_E_INUSE_FS); + + return; +} + +#if 0 +void +set_bmap_fs_bt(xfs_mount_t *mp) +{ + xfs_agnumber_t i; + xfs_agblock_t j; + xfs_agblock_t begin; + xfs_agblock_t end; + + begin = bnobt_root; + end = inobt_root + 1; + + for (i = 0; i < mp->m_sb.sb_agcount; i++) { + /* + * account for btree roots + */ + for (j = begin; j < end; j++) + set_agbno_state(mp, i, j, XR_E_INUSE_FS); + } + + return; +} +#endif + +void +incore_init(xfs_mount_t *mp) +{ + int agcount = mp->m_sb.sb_agcount; + extern void incore_ino_init(xfs_mount_t *); + extern void incore_ext_init(xfs_mount_t *); + + /* init block alloc bmap */ + + setup_bmap(agcount, mp->m_sb.sb_agblocks, mp->m_sb.sb_rextents); + incore_ino_init(mp); + incore_ext_init(mp); + + /* initialize random globals now that we know the fs geometry */ + + inodes_per_block = mp->m_sb.sb_inopblock; + + return; +} + +#if defined(XR_BMAP_TRACE) || defined(XR_BMAP_DBG) +int +get_agbno_state(xfs_mount_t *mp, xfs_agnumber_t agno, + xfs_agblock_t ag_blockno) +{ + __uint64_t *addr; + + addr = ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM; + + return((*addr >> (((ag_blockno)%XR_BB_NUM)*XR_BB)) & XR_BB_MASK); +} + +void set_agbno_state(xfs_mount_t *mp, xfs_agnumber_t agno, + xfs_agblock_t ag_blockno, int state) +{ + __uint64_t *addr; + + addr = ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM; + + *addr = (((*addr) & + (~((__uint64_t) XR_BB_MASK << (((ag_blockno)%XR_BB_NUM)*XR_BB)))) | + (((__uint64_t) (state)) << (((ag_blockno)%XR_BB_NUM)*XR_BB))); +} + +int +get_fsbno_state(xfs_mount_t *mp, xfs_dfsbno_t blockno) +{ + return(get_agbno_state(mp, XFS_FSB_TO_AGNO(mp, blockno), + XFS_FSB_TO_AGBNO(mp, blockno))); +} + +void +set_fsbno_state(xfs_mount_t *mp, xfs_dfsbno_t blockno, int state) +{ + set_agbno_state(mp, XFS_FSB_TO_AGNO(mp, blockno), + XFS_FSB_TO_AGBNO(mp, blockno), state); + + return; +} +#endif diff --git a/repair/incore.h b/repair/incore.h new file mode 100644 index 000000000..22ffdea4f --- /dev/null +++ b/repair/incore.h @@ -0,0 +1,564 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +/* + * contains definition information. implementation (code) + * is spread out in separate files. + */ + +/* + * block allocation lists + */ +typedef struct ba_rec { + void *addr; + struct ba_rec *next; +} ba_rec_t; + +void record_allocation(ba_rec_t *addr, ba_rec_t *list); +void free_allocations(ba_rec_t *list); + +/* + * block bit map defs -- track state of each filesystem block. + * ba_bmap is an array of bitstrings declared in the globals.h file. + * the bitstrings are broken up into 64-bit chunks. one bitstring per AG. + */ +#define BA_BMAP_SIZE(x) (howmany(x, 4)) + +void set_bmap_rt(xfs_drfsbno_t numblocks); +void set_bmap_log(xfs_mount_t *mp); +void set_bmap_fs(xfs_mount_t *mp); +void teardown_bmap(xfs_mount_t *mp); + +void teardown_rt_bmap(xfs_mount_t *mp); +void teardown_ag_bmap(xfs_mount_t *mp, xfs_agnumber_t agno); +void teardown_bmap_finish(xfs_mount_t *mp); + +/* blocks are numbered from zero */ + +/* block records fit into __uint64_t's units */ + +#define XR_BB_UNIT 64 /* number of bits/unit */ +#define XR_BB 4 /* bits per block record */ +#define XR_BB_NUM (XR_BB_UNIT/XR_BB) /* number of records per unit */ +#define XR_BB_MASK 0xF /* block record mask */ + +/* + * bitstring ops -- set/get block states, either in filesystem + * bno's or in agbno's. turns out that fsbno addressing is + * more convenient when dealing with bmap extracted addresses + * and agbno addressing is more convenient when dealing with + * meta-data extracted addresses. So the fsbno versions use + * mtype (which can be one of the block map types above) to + * set the correct block map while the agbno versions assume + * you want to use the regular block map. + */ + +#if defined(XR_BMAP_TRACE) || defined(XR_BMAP_DBG) +/* + * implemented as functions for debugging purposes + */ +int get_agbno_state(xfs_mount_t *mp, xfs_agnumber_t agno, + xfs_agblock_t ag_blockno); +void set_agbno_state(xfs_mount_t *mp, xfs_agnumber_t agno, + xfs_agblock_t ag_blockno, int state); + +int get_fsbno_state(xfs_mount_t *mp, xfs_dfsbno_t blockno); +void set_fsbno_state(xfs_mount_t *mp, xfs_dfsbno_t blockno, int state); +#else +/* + * implemented as macros for performance purposes + */ + +#define get_agbno_state(mp, agno, ag_blockno) \ + ((int) (*(ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM) \ + >> (((ag_blockno)%XR_BB_NUM)*XR_BB)) \ + & XR_BB_MASK) +#define set_agbno_state(mp, agno, ag_blockno, state) \ + *(ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM) = \ + ((*(ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM) & \ + (~((__uint64_t) XR_BB_MASK << (((ag_blockno)%XR_BB_NUM)*XR_BB)))) | \ + (((__uint64_t) (state)) << (((ag_blockno)%XR_BB_NUM)*XR_BB))) + +#define get_fsbno_state(mp, blockno) \ + get_agbno_state(mp, XFS_FSB_TO_AGNO(mp, (blockno)), \ + XFS_FSB_TO_AGBNO(mp, (blockno))) +#define set_fsbno_state(mp, blockno, state) \ + set_agbno_state(mp, XFS_FSB_TO_AGNO(mp, (blockno)), \ + XFS_FSB_TO_AGBNO(mp, (blockno)), (state)) + + +#define get_agbno_rec(mp, agno, ag_blockno) \ + (*(ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM)) +#endif /* XR_BMAP_TRACE */ + +/* + * these work in real-time extents (e.g. fsbno == rt extent number) + */ +#define get_rtbno_state(mp, fsbno) \ + ((*(rt_ba_bmap + (fsbno)/XR_BB_NUM) >> \ + (((fsbno)%XR_BB_NUM)*XR_BB)) & XR_BB_MASK) +#define set_rtbno_state(mp, fsbno, state) \ + *(rt_ba_bmap + (fsbno)/XR_BB_NUM) = \ + ((*(rt_ba_bmap + (fsbno)/XR_BB_NUM) & \ + (~((__uint64_t) XR_BB_MASK << (((fsbno)%XR_BB_NUM)*XR_BB)))) | \ + (((__uint64_t) (state)) << (((fsbno)%XR_BB_NUM)*XR_BB))) + + +/* + * extent tree definitions + * right now, there are 3 trees per AG, a bno tree, a bcnt tree + * and a tree for dup extents. If the code is modified in the + * future to use an extent tree instead of a bitmask for tracking + * fs blocks, then we could lose the dup extent tree if we labelled + * each extent with the inode that owned it. + */ + +typedef unsigned char extent_state_t; + +typedef struct extent_tree_node { + avlnode_t avl_node; + xfs_agblock_t ex_startblock; /* starting block (agbno) */ + xfs_extlen_t ex_blockcount; /* number of blocks in extent */ + extent_state_t ex_state; /* see state flags below */ + + struct extent_tree_node *next; /* for bcnt extent lists */ +#if 0 + xfs_ino_t ex_inode; /* owner, NULL if free or */ + /* multiply allocated */ +#endif +} extent_tree_node_t; + +typedef struct rt_extent_tree_node { + avlnode_t avl_node; + xfs_drtbno_t rt_startblock; /* starting realtime block */ + xfs_extlen_t rt_blockcount; /* number of blocks in extent */ + extent_state_t rt_state; /* see state flags below */ + +#if 0 + xfs_ino_t ex_inode; /* owner, NULL if free or */ + /* multiply allocated */ +#endif +} rt_extent_tree_node_t; + +/* extent states, prefix with XR_ to avoid conflict with buffer cache defines */ + +#define XR_E_UNKNOWN 0 /* unknown state */ +#define XR_E_FREE1 1 /* free block (marked by one fs space tree) */ +#define XR_E_FREE 2 /* free block (marked by both fs space trees) */ +#define XR_E_INUSE 3 /* extent used by file/dir data or metadata */ +#define XR_E_INUSE_FS 4 /* extent used by fs ag header or log */ +#define XR_E_MULT 5 /* extent is multiply referenced */ +#define XR_E_INO 6 /* extent used by inodes (inode blocks) */ +#define XR_E_FS_MAP 7 /* extent used by fs space/inode maps */ +#define XR_E_BAD_STATE 8 + +/* separate state bit, OR'ed into high (4th) bit of ex_state field */ + +#define XR_E_WRITTEN 0x8 /* extent has been written out, can't reclaim */ +#define good_state(state) (((state) & (~XR_E_WRITTEN)) >= XR_E_UNKNOWN && \ + ((state) & (~XR_E_WRITTEN) < XF_E_BAD_STATE)) +#define written(state) ((state) & XR_E_WRITTEN) +#define set_written(state) (state) &= XR_E_WRITTEN + +/* + * bno extent tree functions + */ +void +add_bno_extent(xfs_agnumber_t agno, xfs_agblock_t startblock, + xfs_extlen_t blockcount); + +extent_tree_node_t * +findfirst_bno_extent(xfs_agnumber_t agno); + +extent_tree_node_t * +find_bno_extent(xfs_agnumber_t agno, xfs_agblock_t agbno); + +extent_tree_node_t * +findfirst_bno_extent(xfs_agnumber_t agno); + +#define findnext_bno_extent(exent_ptr) \ + ((extent_tree_node_t *) ((exent_ptr)->avl_node.avl_nextino)) + +void +get_bno_extent(xfs_agnumber_t agno, extent_tree_node_t *ext); + +/* + * bcnt tree functions + */ +void +add_bcnt_extent(xfs_agnumber_t agno, xfs_agblock_t startblock, + xfs_extlen_t blockcount); + +extent_tree_node_t * +findfirst_bcnt_extent(xfs_agnumber_t agno); + +extent_tree_node_t * +find_bcnt_extent(xfs_agnumber_t agno, xfs_agblock_t agbno); + +extent_tree_node_t * +findbiggest_bcnt_extent(xfs_agnumber_t agno); + +extent_tree_node_t * +findnext_bcnt_extent(xfs_agnumber_t agno, extent_tree_node_t *ext); + +extent_tree_node_t * +get_bcnt_extent(xfs_agnumber_t agno, xfs_agblock_t startblock, + xfs_extlen_t blockcount); + +/* + * duplicate extent tree functions + */ +void add_dup_extent(xfs_agnumber_t agno, + xfs_agblock_t startblock, + xfs_extlen_t blockcount); + +int search_dup_extent(xfs_mount_t *mp, + xfs_agnumber_t agno, + xfs_agblock_t agbno); + +void add_rt_dup_extent(xfs_drtbno_t startblock, + xfs_extlen_t blockcount); + +int search_rt_dup_extent(xfs_mount_t *mp, + xfs_drtbno_t bno); + +/* + * extent/tree recyling and deletion routines + */ + +/* + * return an extent node to the extent node free list + */ +void release_extent_tree_node(extent_tree_node_t *node); + +/* + * recycle all the nodes in the per-AG tree + */ +void release_dup_extent_tree(xfs_agnumber_t agno); +void release_agbno_extent_tree(xfs_agnumber_t agno); +void release_agbcnt_extent_tree(xfs_agnumber_t agno); + +/* + * realtime duplicate extent tree - this one actually frees the memory + */ +void free_rt_dup_extent_tree(xfs_mount_t *mp); + +/* + * per-AG extent trees shutdown routine -- all (bno, bcnt and dup) + * at once. this one actually frees the memory instead of just recyling + * the nodes. + */ +void incore_ext_teardown(xfs_mount_t *mp); + +/* + * inode definitions + */ + +/* inode types */ + +#define XR_INO_UNKNOWN 0 /* unknown */ +#define XR_INO_DIR 1 /* directory */ +#define XR_INO_RTDATA 2 /* realtime file */ +#define XR_INO_RTBITMAP 3 /* realtime bitmap inode */ +#define XR_INO_RTSUM 4 /* realtime summary inode */ +#define XR_INO_DATA 5 /* regular file */ +#define XR_INO_SYMLINK 6 /* symlink */ +#define XR_INO_CHRDEV 7 /* character device */ +#define XR_INO_BLKDEV 8 /* block device */ +#define XR_INO_SOCK 9 /* socket */ +#define XR_INO_FIFO 10 /* fifo */ +#define XR_INO_MOUNTPOINT 11 /* mountpoint */ + +/* inode allocation tree */ + +/* + * Inodes in the inode allocation trees are allocated in chunks. + * Those groups can be easily duplicated in our trees. + * Disconnected inodes are harder. We can do one of two + * things in that case: if we know the inode allocation btrees + * are good, then we can disallow directory references to unknown + * inode chunks. If the inode allocation trees have been trashed or + * we feel like being aggressive, then as we hit unknown inodes, + * we can search on the disk for all contiguous inodes and see if + * they fit into chunks. Before putting them into the inode tree, + * we can scan each inode starting at the earliest inode to see which + * ones are good. This protects us from the pathalogical case of + * inodes appearing in user-data. We still may have to mark the + * inodes as "possibly fake" so that if a file claims the blocks, + * we decide to believe the inodes, especially if they're not + * connected. + */ + +#define PLIST_CHUNK_SIZE 4 + +typedef xfs_ino_t parent_entry_t; + +typedef struct parent_list { + __uint64_t pmask; + parent_entry_t *pentries; +#ifdef DEBUG + short cnt; +#endif +} parent_list_t; + +typedef struct backptrs { + __uint64_t ino_reached; /* bit == 1 if reached */ + __uint64_t ino_processed; /* reference checked bit mask */ + __uint32_t nlinks[XFS_INODES_PER_CHUNK]; + parent_list_t *parents; +} backptrs_t; + +typedef struct ino_tree_node { + avlnode_t avl_node; + xfs_agino_t ino_startnum; /* starting inode # */ + xfs_inofree_t ir_free; /* inode free bit mask */ + __uint64_t ino_confirmed; /* confirmed bitmask */ + __uint64_t ino_isa_dir; /* bit == 1 if a directory */ + union { + backptrs_t *backptrs; + parent_list_t *plist; + } ino_un; +} ino_tree_node_t; + +#define INOS_PER_IREC (sizeof(__uint64_t) * NBBY) +void add_ino_backptrs(xfs_mount_t *mp); + +/* + * return an inode record to the free inode record pool + */ +void free_inode_rec(xfs_agnumber_t agno, ino_tree_node_t *ino_rec); + +/* + * get pulls the inode record from the good inode tree + */ +void get_inode_rec(xfs_agnumber_t agno, ino_tree_node_t *ino_rec); + +ino_tree_node_t *findfirst_inode_rec(xfs_agnumber_t agno); +ino_tree_node_t *find_inode_rec(xfs_agnumber_t agno, xfs_agino_t ino); +void find_inode_rec_range(xfs_agnumber_t agno, + xfs_agino_t start_ino, xfs_agino_t end_ino, + ino_tree_node_t **first, ino_tree_node_t **last); + +/* + * set inode states -- setting an inode to used or free also + * automatically marks it as "existing". Note -- all the inode + * add/set/get routines assume a valid inode number. + */ +ino_tree_node_t *set_inode_used_alloc(xfs_agnumber_t agno, xfs_agino_t ino); +ino_tree_node_t *set_inode_free_alloc(xfs_agnumber_t agno, xfs_agino_t ino); + +void print_inode_list(xfs_agnumber_t agno); +void print_uncertain_inode_list(xfs_agnumber_t agno); + +/* + * separate trees for uncertain inodes (they may not exist). + */ +ino_tree_node_t *findfirst_uncertain_inode_rec(xfs_agnumber_t agno); +void add_inode_uncertain(xfs_mount_t *mp, + xfs_ino_t ino, int free); +void add_aginode_uncertain(xfs_agnumber_t agno, + xfs_agino_t agino, int free); +void get_uncertain_inode_rec(xfs_agnumber_t agno, + ino_tree_node_t *ino_rec); +void clear_uncertain_ino_cache(xfs_agnumber_t agno); + +/* + * return next in-order inode tree node. takes an "ino_tree_node_t *" + */ +#define next_ino_rec(ino_node_ptr) \ + ((ino_tree_node_t *) ((ino_node_ptr)->avl_node.avl_nextino)) +/* + * return the next linked inode (forward avl tree link)-- meant to be used + * by linked list routines (uncertain inode routines/records) + */ +#define next_link_rec(ino_node_ptr) \ + ((ino_tree_node_t *) ((ino_node_ptr)->avl_node.avl_forw)) + +/* + * Bit manipulations for processed field + */ +#define XFS_INOPROC_MASK(i) ((__uint64_t)1 << (i)) +#define XFS_INOPROC_MASKN(i,n) ((__uint64_t)((1 << (n)) - 1) << (i)) + +#define XFS_INOPROC_IS_PROC(rp, i) \ + (((rp)->ino_un.backptrs->ino_processed & XFS_INOPROC_MASK((i))) == 0LL \ + ? 0 : 1) +#define XFS_INOPROC_SET_PROC(rp, i) \ + ((rp)->ino_un.backptrs->ino_processed |= XFS_INOPROC_MASK((i))) +/* +#define XFS_INOPROC_CLR_PROC(rp, i) \ + ((rp)->ino_un.backptrs->ino_processed &= ~XFS_INOPROC_MASK((i))) +*/ + +/* + * same for ir_confirmed. + */ +#define XFS_INOCF_MASK(i) ((__uint64_t)1 << (i)) +#define XFS_INOCF_MASKN(i,n) ((__uint64_t)((1 << (n)) - 1) << (i)) + +#define XFS_INOCF_IS_CF(rp, i) \ + (((rp)->ino_confirmed & XFS_INOCF_MASK((i))) == 0LL \ + ? 0 : 1) +#define XFS_INOCF_SET_CF(rp, i) \ + ((rp)->ino_confirmed |= XFS_INOCF_MASK((i))) +#define XFS_INOCF_CLR_CF(rp, i) \ + ((rp)->ino_confirmed &= ~XFS_INOCF_MASK((i))) + +/* + * same for backptr->ino_reached + */ +#define XFS_INO_RCHD_MASK(i) ((__uint64_t)1 << (i)) + +#define XFS_INO_RCHD_IS_RCHD(rp, i) \ + (((rp)->ino_un.backptrs->ino_reached & XFS_INO_RCHD_MASK((i))) == 0LL \ + ? 0 : 1) +#define XFS_INO_RCHD_SET_RCHD(rp, i) \ + ((rp)->ino_un.backptrs->ino_reached |= XFS_INO_RCHD_MASK((i))) +#define XFS_INO_RCHD_CLR_RCHD(rp, i) \ + ((rp)->ino_un.backptrs->ino_reached &= ~XFS_INO_RCHD_MASK((i))) +/* + * set/clear/test is inode a directory inode + */ +#define XFS_INO_ISADIR_MASK(i) ((__uint64_t)1 << (i)) + +#define inode_isadir(ino_rec, ino_offset) \ + (((ino_rec)->ino_isa_dir & XFS_INO_ISADIR_MASK((ino_offset))) == 0LL \ + ? 0 : 1) +#define set_inode_isadir(ino_rec, ino_offset) \ + ((ino_rec)->ino_isa_dir |= XFS_INO_ISADIR_MASK((ino_offset))) +#define clear_inode_isadir(ino_rec, ino_offset) \ + ((ino_rec)->ino_isa_dir &= ~XFS_INO_ISADIR_MASK((ino_offset))) + + +/* + * set/clear/test is inode known to be valid (although perhaps corrupt) + */ +#define clear_inode_confirmed(ino_rec, ino_offset) \ + XFS_INOCF_CLR_CF((ino_rec), (ino_offset)) + +#define set_inode_confirmed(ino_rec, ino_offset) \ + XFS_INOCF_SET_CF((ino_rec), (ino_offset)) + +#define is_inode_confirmed(ino_rec, ino_offset) \ + XFS_INOCF_IS_CF(ino_rec, ino_offset) + +/* + * set/clear/test is inode free or used + */ +#define set_inode_free(ino_rec, ino_offset) \ + XFS_INOCF_SET_CF((ino_rec), (ino_offset)), \ + XFS_INOBT_SET_FREE((ino_rec), (ino_offset),(ARCH_NOCONVERT)) + +#define set_inode_used(ino_rec, ino_offset) \ + XFS_INOCF_SET_CF((ino_rec), (ino_offset)), \ + XFS_INOBT_CLR_FREE((ino_rec), (ino_offset),(ARCH_NOCONVERT)) + +#define is_inode_used(ino_rec, ino_offset) \ + !XFS_INOBT_IS_FREE((ino_rec), (ino_offset),(ARCH_NOCONVERT)) + +#define is_inode_free(ino_rec, ino_offset) \ + XFS_INOBT_IS_FREE((ino_rec), (ino_offset),(ARCH_NOCONVERT)) + +/* + * add_inode_reached() is set on inode I only if I has been reached + * by an inode P claiming to be the parent and if I is a directory, + * the .. link in the I says that P is I's parent. + * + * add_inode_ref() is called every time a link to an inode is + * detected and drop_inode_ref() is called every time a link to + * an inode that we've counted is removed. + */ + +void add_inode_reached(ino_tree_node_t *ino_rec, int ino_offset); +void add_inode_ref(ino_tree_node_t *ino_rec, int ino_offset); +void drop_inode_ref(ino_tree_node_t *ino_rec, int ino_offset); +int is_inode_reached(ino_tree_node_t *ino_rec, int ino_offset); +int is_inode_referenced(ino_tree_node_t *ino_rec, int ino_offset); +__uint32_t num_inode_references(ino_tree_node_t *ino_rec, int ino_offset); + +/* + * has an inode been processed for phase 6 (reference count checking)? + * add_inode_refchecked() is set on an inode when it gets traversed + * during the reference count phase (6). It's set so that if the inode + * is a directory, it's traversed (and it's links counted) only once. + */ +#ifndef XR_INO_REF_DEBUG +#define add_inode_refchecked(ino, ino_rec, ino_offset) \ + XFS_INOPROC_SET_PROC((ino_rec), (ino_offset)) +#define is_inode_refchecked(ino, ino_rec, ino_offset) \ + (XFS_INOPROC_IS_PROC(ino_rec, ino_offset) == 0LL ? 0 : 1) +#else +void add_inode_refchecked(xfs_ino_t ino, + ino_tree_node_t *ino_rec, int ino_offset); +int is_inode_refchecked(xfs_ino_t ino, + ino_tree_node_t *ino_rec, int ino_offset); +#endif /* XR_INO_REF_DEBUG */ + +/* + * set/get inode number of parent -- works for directory inodes only + */ +void set_inode_parent(ino_tree_node_t *irec, int ino_offset, + xfs_ino_t ino); +#if 0 +void clear_inode_parent(ino_tree_node_t *irec, int offset); +#endif +xfs_ino_t get_inode_parent(ino_tree_node_t *irec, int ino_offset); + +/* + * bmap cursor for tracking and fixing bmap btrees. All xfs btrees number + * the levels with 0 being the leaf and every level up being 1 greater. + */ + +#define XR_MAX_BMLEVELS 10 /* XXX - rcc need to verify number */ + +typedef struct bm_level_state { + xfs_dfsbno_t fsbno; + xfs_dfsbno_t left_fsbno; + xfs_dfsbno_t right_fsbno; + __uint64_t first_key; + __uint64_t last_key; +/* + int level; + __uint64_t prev_last_key; + xfs_buf_t *bp; + xfs_bmbt_block_t *block; +*/ +} bm_level_state_t; + +typedef struct bm_cursor { + int num_levels; + xfs_ino_t ino; + xfs_dinode_t *dip; + bm_level_state_t level[XR_MAX_BMLEVELS]; +} bmap_cursor_t; + +void init_bm_cursor(bmap_cursor_t *cursor, int num_level); diff --git a/repair/incore_bmc.c b/repair/incore_bmc.c new file mode 100644 index 000000000..89111feb5 --- /dev/null +++ b/repair/incore_bmc.c @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "avl.h" +#include "globals.h" +#include "incore.h" +#include "agheader.h" +#include "protos.h" +#include "err_protos.h" + +void +init_bm_cursor(bmap_cursor_t *cursor, int num_levels) +{ + int i; + + bzero(cursor, sizeof(bmap_cursor_t)); + cursor->ino = NULLFSINO; + cursor->num_levels = num_levels; + + for (i = 0; i < XR_MAX_BMLEVELS; i++) { + cursor->level[i].fsbno = NULLDFSBNO; + cursor->level[i].right_fsbno = NULLDFSBNO; + cursor->level[i].left_fsbno = NULLDFSBNO; + cursor->level[i].first_key = NULLDFILOFF; + cursor->level[i].last_key = NULLDFILOFF; + } +} diff --git a/repair/incore_ext.c b/repair/incore_ext.c new file mode 100644 index 000000000..5c3708b66 --- /dev/null +++ b/repair/incore_ext.c @@ -0,0 +1,1000 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "avl.h" +#include "globals.h" +#include "incore.h" +#include "agheader.h" +#include "protos.h" +#include "err_protos.h" +#include "avl64.h" +#define ALLOC_NUM_EXTS 100 + +/* + * paranoia -- account for any weird padding, 64/32-bit alignment, etc. + */ +typedef struct extent_alloc_rec { + ba_rec_t alloc_rec; + extent_tree_node_t extents[ALLOC_NUM_EXTS]; +} extent_alloc_rec_t; + +typedef struct rt_extent_alloc_rec { + ba_rec_t alloc_rec; + rt_extent_tree_node_t extents[ALLOC_NUM_EXTS]; +} rt_extent_alloc_rec_t; + +/* + * note: there are 4 sets of incore things handled here: + * block bitmaps, extent trees, uncertain inode list, + * and inode tree. The tree-based code uses the AVL + * tree package used by the IRIX kernel VM code + * (sys/avl.h). The inode list code uses the same records + * as the inode tree code for convenience. The bitmaps + * and bitmap operators are mostly macros defined in incore.h. + * There are one of everything per AG except for extent + * trees. There's one duplicate extent tree, one bno and + * one bcnt extent tree per AG. Not all of the above exist + * through all phases. The duplicate extent tree gets trashed + * at the end of phase 4. The bno/bcnt trees don't appear until + * phase 5. The uncertain inode list goes away at the end of + * phase 3. The inode tree and bno/bnct trees go away after phase 5. + */ +typedef struct ext_flist_s { + extent_tree_node_t *list; + int cnt; +} ext_flist_t; + +static ext_flist_t ext_flist; + +typedef struct rt_ext_flist_s { + rt_extent_tree_node_t *list; + int cnt; +} rt_ext_flist_t; + +static rt_ext_flist_t rt_ext_flist; + +static avl64tree_desc_t *rt_ext_tree_ptr; /* dup extent tree for rt */ + +static avltree_desc_t **extent_tree_ptrs; /* array of extent tree ptrs */ + /* one per ag for dups */ +static avltree_desc_t **extent_bno_ptrs; /* + * array of extent tree ptrs + * one per ag for free extents + * sorted by starting block + * number + */ +static avltree_desc_t **extent_bcnt_ptrs; /* + * array of extent tree ptrs + * one per ag for free extents + * sorted by size + */ + +/* + * list of allocated "blocks" for easy freeing later + */ +static ba_rec_t *ba_list; +static ba_rec_t *rt_ba_list; + +/* + * extent tree stuff is avl trees of duplicate extents, + * sorted in order by block number. there is one tree per ag. + */ + +static extent_tree_node_t * +mk_extent_tree_nodes(xfs_agblock_t new_startblock, + xfs_extlen_t new_blockcount, extent_state_t new_state) +{ + int i; + extent_tree_node_t *new; + extent_alloc_rec_t *rec; + + if (ext_flist.cnt == 0) { + ASSERT(ext_flist.list == NULL); + + if ((rec = malloc(sizeof(extent_alloc_rec_t))) == NULL) + do_error("couldn't allocate new extent descriptors.\n"); + + record_allocation(&rec->alloc_rec, ba_list); + + new = &rec->extents[0]; + + for (i = 0; i < ALLOC_NUM_EXTS; i++) { + new->avl_node.avl_nextino = (avlnode_t *) + ext_flist.list; + ext_flist.list = new; + ext_flist.cnt++; + new++; + } + } + + ASSERT(ext_flist.list != NULL); + + new = ext_flist.list; + ext_flist.list = (extent_tree_node_t *) new->avl_node.avl_nextino; + ext_flist.cnt--; + new->avl_node.avl_nextino = NULL; + + /* initialize node */ + + new->ex_startblock = new_startblock; + new->ex_blockcount = new_blockcount; + new->ex_state = new_state; + new->next = NULL; + + return(new); +} + +void +release_extent_tree_node(extent_tree_node_t *node) +{ + node->avl_node.avl_nextino = (avlnode_t *) ext_flist.list; + ext_flist.list = node; + ext_flist.cnt++; + + return; +} + +/* + * routines to recycle all nodes in a tree. it walks the tree + * and puts all nodes back on the free list so the nodes can be + * reused. the duplicate and bno/bcnt extent trees for each AG + * are recycled after they're no longer needed to save memory + */ +void +release_extent_tree(avltree_desc_t *tree) +{ + extent_tree_node_t *ext; + extent_tree_node_t *tmp; + extent_tree_node_t *lext; + extent_tree_node_t *ltmp; + + if (tree->avl_firstino == NULL) + return; + + ext = (extent_tree_node_t *) tree->avl_firstino; + + while (ext != NULL) { + tmp = (extent_tree_node_t *) ext->avl_node.avl_nextino; + + /* + * ext->next is guaranteed to be set only in bcnt trees + */ + if (ext->next != NULL) { + lext = ext->next; + while (lext != NULL) { + ltmp = lext->next; + release_extent_tree_node(lext); + lext = ltmp; + } + } + + release_extent_tree_node(ext); + ext = tmp; + } + + tree->avl_root = tree->avl_firstino = NULL; + + return; +} + +/* + * top-level (visible) routines + */ +void +release_dup_extent_tree(xfs_agnumber_t agno) +{ + release_extent_tree(extent_tree_ptrs[agno]); + + return; +} + +void +release_agbno_extent_tree(xfs_agnumber_t agno) +{ + release_extent_tree(extent_bno_ptrs[agno]); + + return; +} + +void +release_agbcnt_extent_tree(xfs_agnumber_t agno) +{ + release_extent_tree(extent_bcnt_ptrs[agno]); + + return; +} + +/* + * the next 4 routines manage the trees of free extents -- 2 trees + * per AG. The first tree is sorted by block number. The second + * tree is sorted by extent size. This is the bno tree. + */ +void +add_bno_extent(xfs_agnumber_t agno, xfs_agblock_t startblock, + xfs_extlen_t blockcount) +{ + extent_tree_node_t *ext; + + ASSERT(extent_bno_ptrs != NULL); + ASSERT(extent_bno_ptrs[agno] != NULL); + + ext = mk_extent_tree_nodes(startblock, blockcount, XR_E_FREE); + + if (avl_insert(extent_bno_ptrs[agno], (avlnode_t *) ext) == NULL) { + do_error("xfs_repair: duplicate bno extent range\n"); + } +} + +extent_tree_node_t * +findfirst_bno_extent(xfs_agnumber_t agno) +{ + ASSERT(extent_bno_ptrs != NULL); + ASSERT(extent_bno_ptrs[agno] != NULL); + + return((extent_tree_node_t *) extent_bno_ptrs[agno]->avl_firstino); +} + +extent_tree_node_t * +find_bno_extent(xfs_agnumber_t agno, xfs_agblock_t startblock) +{ + ASSERT(extent_bno_ptrs != NULL); + ASSERT(extent_bno_ptrs[agno] != NULL); + + return((extent_tree_node_t *) avl_find(extent_bno_ptrs[agno], + startblock)); +} + +/* + * delete a node that's in the tree (pointer obtained by a find routine) + */ +void +get_bno_extent(xfs_agnumber_t agno, extent_tree_node_t *ext) +{ + ASSERT(extent_bno_ptrs != NULL); + ASSERT(extent_bno_ptrs[agno] != NULL); + + avl_delete(extent_bno_ptrs[agno], &ext->avl_node); + + return; +} + +/* + * normalizing constant for bcnt size -> address conversion (see avl ops) + * used by the AVL tree code to convert sizes and must be used when + * doing an AVL search in the tree (e.g. avl_findrange(s)) + */ +#define MAXBCNT 0xFFFFFFFF +#define BCNT_ADDR(cnt) ((unsigned int) MAXBCNT - (cnt)) + +/* + * the next 4 routines manage the trees of free extents -- 2 trees + * per AG. The first tree is sorted by block number. The second + * tree is sorted by extent size. This is the bcnt tree. + */ +void +add_bcnt_extent(xfs_agnumber_t agno, xfs_agblock_t startblock, + xfs_extlen_t blockcount) +{ + extent_tree_node_t *ext, *prev, *current, *top; + xfs_agblock_t tmp_startblock; + xfs_extlen_t tmp_blockcount; + extent_state_t tmp_state; + + ASSERT(extent_bcnt_ptrs != NULL); + ASSERT(extent_bcnt_ptrs[agno] != NULL); + + ext = mk_extent_tree_nodes(startblock, blockcount, XR_E_FREE); + + ASSERT(ext->next == NULL); + +#ifdef XR_BCNT_TRACE + fprintf(stderr, "adding bcnt: agno = %d, start = %u, count = %u\n", + agno, startblock, blockcount); +#endif + if ((current = (extent_tree_node_t *) avl_find(extent_bcnt_ptrs[agno], + blockcount)) != NULL) { + /* + * avl tree code doesn't handle dups so insert + * onto linked list in increasing startblock order + */ + top = prev = current; + while (current != NULL && + startblock > current->ex_startblock) { + prev = current; + current = current->next; + } + + if (top == current) { + ASSERT(top == prev); + /* + * swap the values of to-be-inserted element + * and the values of the head of the list. + * then insert as the 2nd element on the list. + * + * see the comment in get_bcnt_extent() + * as to why we have to do this. + */ + tmp_startblock = top->ex_startblock; + tmp_blockcount = top->ex_blockcount; + tmp_state = top->ex_state; + + top->ex_startblock = ext->ex_startblock; + top->ex_blockcount = ext->ex_blockcount; + top->ex_state = ext->ex_state; + + ext->ex_startblock = tmp_startblock; + ext->ex_blockcount = tmp_blockcount; + ext->ex_state = tmp_state; + + current = top->next; + prev = top; + } + + prev->next = ext; + ext->next = current; + + return; + } + + if (avl_insert(extent_bcnt_ptrs[agno], (avlnode_t *) ext) == NULL) { + do_error("xfs_repair: duplicate bno extent range\n"); + } + + return; +} + +extent_tree_node_t * +findfirst_bcnt_extent(xfs_agnumber_t agno) +{ + ASSERT(extent_bcnt_ptrs != NULL); + ASSERT(extent_bcnt_ptrs[agno] != NULL); + + return((extent_tree_node_t *) extent_bcnt_ptrs[agno]->avl_firstino); +} + +extent_tree_node_t * +findbiggest_bcnt_extent(xfs_agnumber_t agno) +{ + extern avlnode_t *avl_lastino(avlnode_t *root); + + ASSERT(extent_bcnt_ptrs != NULL); + ASSERT(extent_bcnt_ptrs[agno] != NULL); + + return((extent_tree_node_t *) avl_lastino(extent_bcnt_ptrs[agno]->avl_root)); +} + +extent_tree_node_t * +findnext_bcnt_extent(xfs_agnumber_t agno, extent_tree_node_t *ext) +{ + avlnode_t *nextino; + + if (ext->next != NULL) { + ASSERT(ext->ex_blockcount == ext->next->ex_blockcount); + ASSERT(ext->ex_startblock < ext->next->ex_startblock); + return(ext->next); + } else { + /* + * have to look at the top of the list to get the + * correct avl_nextino pointer since that pointer + * is maintained and altered by the AVL code. + */ + nextino = avl_find(extent_bcnt_ptrs[agno], ext->ex_blockcount); + ASSERT(nextino != NULL); + if (nextino->avl_nextino != NULL) { + ASSERT(ext->ex_blockcount < ((extent_tree_node_t *) + nextino->avl_nextino)->ex_blockcount); + } + return((extent_tree_node_t *) nextino->avl_nextino); + } +} + +/* + * this is meant to be called after you walk the bno tree to + * determine exactly which extent you want (so you'll know the + * desired value for startblock when you call this routine). + */ +extent_tree_node_t * +get_bcnt_extent(xfs_agnumber_t agno, xfs_agblock_t startblock, + xfs_extlen_t blockcount) +{ + extent_tree_node_t *ext, *prev, *top; + xfs_agblock_t tmp_startblock; + xfs_extlen_t tmp_blockcount; + extent_state_t tmp_state; + + prev = NULL; + ASSERT(extent_bcnt_ptrs != NULL); + ASSERT(extent_bcnt_ptrs[agno] != NULL); + + if ((ext = (extent_tree_node_t *) avl_find(extent_bcnt_ptrs[agno], + blockcount)) == NULL) + return(NULL); + + top = ext; + + if (ext->next != NULL) { + /* + * pull it off the list + */ + while (ext != NULL && startblock != ext->ex_startblock) { + prev = ext; + ext = ext->next; + } + ASSERT(ext != NULL); + if (ext == top) { + /* + * this node is linked into the tree so we + * swap the core values so we can delete + * the next item on the list instead of + * the head of the list. This is because + * the rest of the tree undoubtedly has + * pointers to the piece of memory that + * is the head of the list so pulling + * the item out of the list and hence + * the avl tree would be a bad idea. + * + * (cheaper than the alternative, a tree + * delete of this node followed by a tree + * insert of the next node on the list). + */ + tmp_startblock = ext->next->ex_startblock; + tmp_blockcount = ext->next->ex_blockcount; + tmp_state = ext->next->ex_state; + + ext->next->ex_startblock = ext->ex_startblock; + ext->next->ex_blockcount = ext->ex_blockcount; + ext->next->ex_state = ext->ex_state; + + ext->ex_startblock = tmp_startblock; + ext->ex_blockcount = tmp_blockcount; + ext->ex_state = tmp_state; + + ext = ext->next; + prev = top; + } + /* + * now, a simple list deletion + */ + prev->next = ext->next; + ext->next = NULL; + } else { + /* + * no list, just one node. simply delete + */ + avl_delete(extent_bcnt_ptrs[agno], &ext->avl_node); + } + + ASSERT(ext->ex_startblock == startblock); + ASSERT(ext->ex_blockcount == blockcount); + return(ext); +} + +/* + * the next 2 routines manage the trees of duplicate extents -- 1 tree + * per AG + */ +void +add_dup_extent(xfs_agnumber_t agno, xfs_agblock_t startblock, + xfs_extlen_t blockcount) +{ + extent_tree_node_t *first, *last, *ext, *next_ext; + xfs_agblock_t new_startblock; + xfs_extlen_t new_blockcount; + + ASSERT(agno < glob_agcount); + +#ifdef XR_DUP_TRACE + fprintf(stderr, "Adding dup extent - %d/%d %d\n", agno, startblock, blockcount); +#endif + avl_findranges(extent_tree_ptrs[agno], startblock - 1, + startblock + blockcount + 1, + (avlnode_t **) &first, (avlnode_t **) &last); + /* + * find adjacent and overlapping extent blocks + */ + if (first == NULL && last == NULL) { + /* nothing, just make and insert new extent */ + + ext = mk_extent_tree_nodes(startblock, blockcount, XR_E_MULT); + + if (avl_insert(extent_tree_ptrs[agno], + (avlnode_t *) ext) == NULL) { + do_error("xfs_repair: duplicate extent range\n"); + } + + return; + } + + ASSERT(first != NULL && last != NULL); + + /* + * find the new composite range, delete old extent nodes + * as we go + */ + new_startblock = startblock; + new_blockcount = blockcount; + + for (ext = first; + ext != (extent_tree_node_t *) last->avl_node.avl_nextino; + ext = next_ext) { + /* + * preserve the next inorder node + */ + next_ext = (extent_tree_node_t *) ext->avl_node.avl_nextino; + /* + * just bail if the new extent is contained within an old one + */ + if (ext->ex_startblock <= startblock && + ext->ex_blockcount >= blockcount) + return; + /* + * now check for overlaps and adjacent extents + */ + if (ext->ex_startblock + ext->ex_blockcount >= startblock + || ext->ex_startblock <= startblock + blockcount) { + + if (ext->ex_startblock < new_startblock) + new_startblock = ext->ex_startblock; + + if (ext->ex_startblock + ext->ex_blockcount > + new_startblock + new_blockcount) + new_blockcount = ext->ex_startblock + + ext->ex_blockcount - + new_startblock; + + avl_delete(extent_tree_ptrs[agno], (avlnode_t *) ext); + continue; + } + } + + ext = mk_extent_tree_nodes(new_startblock, new_blockcount, XR_E_MULT); + + if (avl_insert(extent_tree_ptrs[agno], (avlnode_t *) ext) == NULL) { + do_error("xfs_repair: duplicate extent range\n"); + } + + return; +} + +/* + * returns 1 if block is a dup, 0 if not + */ +/* ARGSUSED */ +int +search_dup_extent(xfs_mount_t *mp, xfs_agnumber_t agno, xfs_agblock_t agbno) +{ + ASSERT(agno < glob_agcount); + + if (avl_findrange(extent_tree_ptrs[agno], agbno) != NULL) + return(1); + + return(0); +} + +static __psunsigned_t +avl_ext_start(avlnode_t *node) +{ + return((__psunsigned_t) + ((extent_tree_node_t *) node)->ex_startblock); +} + +static __psunsigned_t +avl_ext_end(avlnode_t *node) +{ + return((__psunsigned_t) ( + ((extent_tree_node_t *) node)->ex_startblock + + ((extent_tree_node_t *) node)->ex_blockcount)); +} + +/* + * convert size to an address for the AVL tree code -- the bigger the size, + * the lower the address so the biggest extent will be first in the tree + */ +static __psunsigned_t +avl_ext_bcnt_start(avlnode_t *node) +{ +/* + return((__psunsigned_t) (BCNT_ADDR(((extent_tree_node_t *) + node)->ex_blockcount))); +*/ + return((__psunsigned_t) ((extent_tree_node_t *)node)->ex_blockcount); +} + +static __psunsigned_t +avl_ext_bcnt_end(avlnode_t *node) +{ +/* + return((__psunsigned_t) (BCNT_ADDR(((extent_tree_node_t *) + node)->ex_blockcount))); +*/ + return((__psunsigned_t) ((extent_tree_node_t *)node)->ex_blockcount); +} + +avlops_t avl_extent_bcnt_tree_ops = { + avl_ext_bcnt_start, + avl_ext_bcnt_end +}; + +avlops_t avl_extent_tree_ops = { + avl_ext_start, + avl_ext_end +}; + +/* + * for real-time extents -- have to dup code since realtime extent + * startblocks can be 64-bit values. + */ +static rt_extent_tree_node_t * +mk_rt_extent_tree_nodes(xfs_drtbno_t new_startblock, + xfs_extlen_t new_blockcount, extent_state_t new_state) +{ + int i; + rt_extent_tree_node_t *new; + rt_extent_alloc_rec_t *rec; + + if (rt_ext_flist.cnt == 0) { + ASSERT(rt_ext_flist.list == NULL); + + if ((rec = malloc(sizeof(rt_extent_alloc_rec_t))) == NULL) + do_error("couldn't allocate new extent descriptors.\n"); + + record_allocation(&rec->alloc_rec, rt_ba_list); + + new = &rec->extents[0]; + + for (i = 0; i < ALLOC_NUM_EXTS; i++) { + new->avl_node.avl_nextino = (avlnode_t *) + rt_ext_flist.list; + rt_ext_flist.list = new; + rt_ext_flist.cnt++; + new++; + } + } + + ASSERT(rt_ext_flist.list != NULL); + + new = rt_ext_flist.list; + rt_ext_flist.list = (rt_extent_tree_node_t *) new->avl_node.avl_nextino; + rt_ext_flist.cnt--; + new->avl_node.avl_nextino = NULL; + + /* initialize node */ + + new->rt_startblock = new_startblock; + new->rt_blockcount = new_blockcount; + new->rt_state = new_state; + + return(new); +} + +#if 0 +void +release_rt_extent_tree_node(rt_extent_tree_node_t *node) +{ + node->avl_node.avl_nextino = (avlnode_t *) rt_ext_flist.list; + rt_ext_flist.list = node; + rt_ext_flist.cnt++; + + return; +} + +void +release_rt_extent_tree() +{ + extent_tree_node_t *ext; + extent_tree_node_t *tmp; + extent_tree_node_t *lext; + extent_tree_node_t *ltmp; + avl64tree_desc_t *tree; + + tree = rt_extent_tree_ptr; + + if (tree->avl_firstino == NULL) + return; + + ext = (extent_tree_node_t *) tree->avl_firstino; + + while (ext != NULL) { + tmp = (extent_tree_node_t *) ext->avl_node.avl_nextino; + release_rt_extent_tree_node(ext); + ext = tmp; + } + + tree->avl_root = tree->avl_firstino = NULL; + + return; +} +#endif + +/* + * don't need release functions for realtime tree teardown + * since we only have one tree, not one per AG + */ +/* ARGSUSED */ +void +free_rt_dup_extent_tree(xfs_mount_t *mp) +{ + ASSERT(mp->m_sb.sb_rblocks != 0); + + free_allocations(rt_ba_list); + free(rt_ext_tree_ptr); + + rt_ba_list = NULL; + rt_ext_tree_ptr = NULL; + + return; +} + +/* + * add a duplicate real-time extent + */ +void +add_rt_dup_extent(xfs_drtbno_t startblock, xfs_extlen_t blockcount) +{ + rt_extent_tree_node_t *first, *last, *ext, *next_ext; + xfs_drtbno_t new_startblock; + xfs_extlen_t new_blockcount; + + avl64_findranges(rt_ext_tree_ptr, startblock - 1, + startblock + blockcount + 1, + (avl64node_t **) &first, (avl64node_t **) &last); + /* + * find adjacent and overlapping extent blocks + */ + if (first == NULL && last == NULL) { + /* nothing, just make and insert new extent */ + + ext = mk_rt_extent_tree_nodes(startblock, + blockcount, XR_E_MULT); + + if (avl64_insert(rt_ext_tree_ptr, + (avl64node_t *) ext) == NULL) { + do_error("xfs_repair: duplicate extent range\n"); + } + + return; + } + + ASSERT(first != NULL && last != NULL); + + /* + * find the new composite range, delete old extent nodes + * as we go + */ + new_startblock = startblock; + new_blockcount = blockcount; + + for (ext = first; + ext != (rt_extent_tree_node_t *) last->avl_node.avl_nextino; + ext = next_ext) { + /* + * preserve the next inorder node + */ + next_ext = (rt_extent_tree_node_t *) ext->avl_node.avl_nextino; + /* + * just bail if the new extent is contained within an old one + */ + if (ext->rt_startblock <= startblock && + ext->rt_blockcount >= blockcount) + return; + /* + * now check for overlaps and adjacent extents + */ + if (ext->rt_startblock + ext->rt_blockcount >= startblock + || ext->rt_startblock <= startblock + blockcount) { + + if (ext->rt_startblock < new_startblock) + new_startblock = ext->rt_startblock; + + if (ext->rt_startblock + ext->rt_blockcount > + new_startblock + new_blockcount) + new_blockcount = ext->rt_startblock + + ext->rt_blockcount - + new_startblock; + + avl64_delete(rt_ext_tree_ptr, (avl64node_t *) ext); + continue; + } + } + + ext = mk_rt_extent_tree_nodes(new_startblock, + new_blockcount, XR_E_MULT); + + if (avl64_insert(rt_ext_tree_ptr, (avl64node_t *) ext) == NULL) { + do_error("xfs_repair: duplicate extent range\n"); + } + + return; +} + +/* + * returns 1 if block is a dup, 0 if not + */ +/* ARGSUSED */ +int +search_rt_dup_extent(xfs_mount_t *mp, xfs_drtbno_t bno) +{ + if (avl64_findrange(rt_ext_tree_ptr, bno) != NULL) + return(1); + + return(0); +} + +static __uint64_t +avl64_rt_ext_start(avl64node_t *node) +{ + return(((rt_extent_tree_node_t *) node)->rt_startblock); +} + +static __uint64_t +avl64_ext_end(avl64node_t *node) +{ + return(((rt_extent_tree_node_t *) node)->rt_startblock + + ((rt_extent_tree_node_t *) node)->rt_blockcount); +} + +avl64ops_t avl64_extent_tree_ops = { + avl64_rt_ext_start, + avl64_ext_end +}; + +void +incore_ext_init(xfs_mount_t *mp) +{ + int i; + xfs_agnumber_t agcount = mp->m_sb.sb_agcount; + + ba_list = NULL; + rt_ba_list = NULL; + + if ((extent_tree_ptrs = malloc(agcount * + sizeof(avltree_desc_t *))) == NULL) + do_error("couldn't malloc dup extent tree descriptor table\n"); + + if ((extent_bno_ptrs = malloc(agcount * + sizeof(avltree_desc_t *))) == NULL) + do_error("couldn't malloc free by-bno extent tree descriptor table\n"); + + if ((extent_bcnt_ptrs = malloc(agcount * + sizeof(avltree_desc_t *))) == NULL) + do_error("couldn't malloc free by-bcnt extent tree descriptor table\n"); + + for (i = 0; i < agcount; i++) { + if ((extent_tree_ptrs[i] = + malloc(sizeof(avltree_desc_t))) == NULL) + do_error("couldn't malloc dup extent tree descriptor\n"); + if ((extent_bno_ptrs[i] = + malloc(sizeof(avltree_desc_t))) == NULL) + do_error("couldn't malloc bno extent tree descriptor\n"); + if ((extent_bcnt_ptrs[i] = + malloc(sizeof(avltree_desc_t))) == NULL) + do_error("couldn't malloc bcnt extent tree descriptor\n"); + } + + for (i = 0; i < agcount; i++) { + avl_init_tree(extent_tree_ptrs[i], &avl_extent_tree_ops); + avl_init_tree(extent_bno_ptrs[i], &avl_extent_tree_ops); + avl_init_tree(extent_bcnt_ptrs[i], &avl_extent_bcnt_tree_ops); + } + + if ((rt_ext_tree_ptr = malloc(sizeof(avltree_desc_t))) == NULL) + do_error("couldn't malloc dup rt extent tree descriptor\n"); + + avl64_init_tree(rt_ext_tree_ptr, &avl64_extent_tree_ops); + + ext_flist.cnt = 0; + ext_flist.list = NULL; + + return; +} + +/* + * this routine actually frees all the memory used to track per-AG trees + */ +void +incore_ext_teardown(xfs_mount_t *mp) +{ + xfs_agnumber_t i; + + free_allocations(ba_list); + + for (i = 0; i < mp->m_sb.sb_agcount; i++) { + free(extent_tree_ptrs[i]); + free(extent_bno_ptrs[i]); + free(extent_bcnt_ptrs[i]); + } + + free(extent_bcnt_ptrs); + free(extent_bno_ptrs); + free(extent_tree_ptrs); + + extent_bcnt_ptrs = extent_bno_ptrs = extent_tree_ptrs = NULL; + + return; +} + +int +count_extents(xfs_agnumber_t agno, avltree_desc_t *tree, int whichtree) +{ + extent_tree_node_t *node; + int i = 0; + + node = (extent_tree_node_t *) tree->avl_firstino; + + while (node != NULL) { + i++; + if (whichtree) + node = findnext_bcnt_extent(agno, node); + else + node = findnext_bno_extent(node); + } + + return(i); +} + +int +count_bno_extents_blocks(xfs_agnumber_t agno, uint *numblocks) +{ + __uint64_t nblocks; + extent_tree_node_t *node; + int i = 0; + + ASSERT(agno < glob_agcount); + + nblocks = 0; + + node = (extent_tree_node_t *) extent_bno_ptrs[agno]->avl_firstino; + + while (node != NULL) { + nblocks += node->ex_blockcount; + i++; + node = findnext_bno_extent(node); + } + + *numblocks = nblocks; + return(i); +} + +int +count_bno_extents(xfs_agnumber_t agno) +{ + ASSERT(agno < glob_agcount); + return(count_extents(agno, extent_bno_ptrs[agno], 0)); +} + +int +count_bcnt_extents(xfs_agnumber_t agno) +{ + ASSERT(agno < glob_agcount); + return(count_extents(agno, extent_bcnt_ptrs[agno], 1)); +} diff --git a/repair/incore_ino.c b/repair/incore_ino.c new file mode 100644 index 000000000..bb14a370f --- /dev/null +++ b/repair/incore_ino.c @@ -0,0 +1,834 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include +#include "avl.h" +#include "globals.h" +#include "incore.h" +#include "agheader.h" +#include "protos.h" +#include "err_protos.h" + +extern avlnode_t *avl_firstino(avlnode_t *root); + +/* + * array of inode tree ptrs, one per ag + */ +static avltree_desc_t **inode_tree_ptrs; + +/* + * ditto for uncertain inodes + */ +static avltree_desc_t **inode_uncertain_tree_ptrs; + +#define ALLOC_NUM_INOS 100 + +/* free lists -- inode nodes and extent nodes */ + +typedef struct ino_flist_s { + ino_tree_node_t *list; + ino_tree_node_t *last; + long long cnt; +} ino_flist_t; + +static ino_flist_t ino_flist; /* free list must be initialized before use */ + +/* + * next is the uncertain inode list -- a sorted (in ascending order) + * list of inode records sorted on the starting inode number. There + * is one list per ag. + */ + +/* + * common code for creating inode records for use by trees and lists. + * called only from add_inodes and add_inodes_uncertain + * + * IMPORTANT: all inodes (inode records) start off as free and + * unconfirmed. + */ +/* ARGSUSED */ +static ino_tree_node_t * +mk_ino_tree_nodes(xfs_agino_t starting_ino) +{ + int i; + ino_tree_node_t *new; + avlnode_t *node; + + if (ino_flist.cnt == 0) { + ASSERT(ino_flist.list == NULL); + + if ((new = malloc(sizeof(ino_tree_node_t[ALLOC_NUM_INOS]))) + == NULL) + do_error("inode map malloc failed\n"); + + for (i = 0; i < ALLOC_NUM_INOS; i++) { + new->avl_node.avl_nextino = + (avlnode_t *) ino_flist.list; + ino_flist.list = new; + ino_flist.cnt++; + new++; + } + } + + ASSERT(ino_flist.list != NULL); + + new = ino_flist.list; + ino_flist.list = (ino_tree_node_t *) new->avl_node.avl_nextino; + ino_flist.cnt--; + node = &new->avl_node; + node->avl_nextino = node->avl_forw = node->avl_back = NULL; + + /* initialize node */ + + new->ino_startnum = 0; + new->ino_confirmed = 0; + new->ino_isa_dir = 0; + new->ir_free = (xfs_inofree_t) - 1; + new->ino_un.backptrs = NULL; + + return(new); +} + +/* + * return inode record to free list, will be initialized when + * it gets pulled off list + */ +static void +free_ino_tree_node(ino_tree_node_t *ino_rec) +{ + ino_rec->avl_node.avl_nextino = NULL; + ino_rec->avl_node.avl_forw = NULL; + ino_rec->avl_node.avl_back = NULL; + + if (ino_flist.list != NULL) { + ASSERT(ino_flist.cnt > 0); + ino_rec->avl_node.avl_nextino = (avlnode_t *) ino_flist.list; + } else { + ASSERT(ino_flist.cnt == 0); + ino_rec->avl_node.avl_nextino = NULL; + } + + ino_flist.list = ino_rec; + ino_flist.cnt++; + + if (ino_rec->ino_un.backptrs != NULL) { + if (full_backptrs && ino_rec->ino_un.backptrs->parents != NULL) + free(ino_rec->ino_un.backptrs->parents); + if (ino_rec->ino_un.plist != NULL) + free(ino_rec->ino_un.plist); + } + + return; +} + +/* + * last referenced cache for uncertain inodes + */ +static ino_tree_node_t **last_rec; + +/* + * ok, the uncertain inodes are a set of trees just like the + * good inodes but all starting inode records are (arbitrarily) + * aligned on XFS_CHUNK_PER_INODE boundaries to prevent overlaps. + * this means we may have partials records in the tree (e.g. records + * without 64 confirmed uncertain inodes). Tough. + * + * free is set to 1 if the inode is thought to be free, 0 if used + */ +void +add_aginode_uncertain(xfs_agnumber_t agno, xfs_agino_t ino, int free) +{ + ino_tree_node_t *ino_rec; + xfs_agino_t s_ino; + int offset; + + ASSERT(agno < glob_agcount); + ASSERT(last_rec != NULL); + + s_ino = rounddown(ino, XFS_INODES_PER_CHUNK); + + /* + * check for a cache hit + */ + if (last_rec[agno] != NULL && last_rec[agno]->ino_startnum == s_ino) { + offset = ino - s_ino; + if (free) + set_inode_free(last_rec[agno], offset); + else + set_inode_used(last_rec[agno], offset); + + return; + } + + /* + * check to see if record containing inode is already in the tree. + * if not, add it + */ + if ((ino_rec = (ino_tree_node_t *) + avl_findrange(inode_uncertain_tree_ptrs[agno], + s_ino)) == NULL) { + ino_rec = mk_ino_tree_nodes(s_ino); + ino_rec->ino_startnum = s_ino; + + if (avl_insert(inode_uncertain_tree_ptrs[agno], + (avlnode_t *) ino_rec) == NULL) { + do_error("xfs_repair: duplicate inode range\n"); + } + } + + if (free) + set_inode_free(ino_rec, ino - s_ino); + else + set_inode_used(ino_rec, ino - s_ino); + + /* + * set cache entry + */ + last_rec[agno] = ino_rec; + + return; +} + +/* + * like add_aginode_uncertain() only it needs an xfs_mount_t * + * to perform the inode number conversion. + */ +void +add_inode_uncertain(xfs_mount_t *mp, xfs_ino_t ino, int free) +{ + add_aginode_uncertain(XFS_INO_TO_AGNO(mp, ino), + XFS_INO_TO_AGINO(mp, ino), free); +} + +/* + * pull the indicated inode record out of the uncertain inode tree + */ +void +get_uncertain_inode_rec(xfs_agnumber_t agno, ino_tree_node_t *ino_rec) +{ + ASSERT(inode_tree_ptrs != NULL); + ASSERT(inode_tree_ptrs[agno] != NULL); + + avl_delete(inode_uncertain_tree_ptrs[agno], &ino_rec->avl_node); + + ino_rec->avl_node.avl_nextino = NULL; + ino_rec->avl_node.avl_forw = NULL; + ino_rec->avl_node.avl_back = NULL; +} + +ino_tree_node_t * +findfirst_uncertain_inode_rec(xfs_agnumber_t agno) +{ + return((ino_tree_node_t *) + inode_uncertain_tree_ptrs[agno]->avl_firstino); +} + +void +clear_uncertain_ino_cache(xfs_agnumber_t agno) +{ + last_rec[agno] = NULL; + + return; +} + + +/* + * next comes the inode trees. One per ag. AVL trees + * of inode records, each inode record tracking 64 inodes + */ +/* + * set up an inode tree record for a group of inodes that will + * include the requested inode. + * + * does NOT error-check for duplicate records. Caller is + * responsible for checking that. + * + * ino must be the start of an XFS_INODES_PER_CHUNK (64) inode chunk + * + * Each inode resides in a 64-inode chunk which can be part + * one or more chunks (MAX(64, inodes-per-block). The fs allocates + * in chunks (as opposed to 1 chunk) when a block can hold more than + * one chunk (inodes per block > 64). Allocating in one chunk pieces + * causes us problems when it takes more than one fs block to contain + * an inode chunk because the chunks can start on *any* block boundary. + * So we assume that the caller has a clue because at this level, we + * don't. + */ +static ino_tree_node_t * +add_inode(xfs_agnumber_t agno, xfs_agino_t ino) +{ + ino_tree_node_t *ino_rec; + + /* no record exists, make some and put them into the tree */ + + ino_rec = mk_ino_tree_nodes(ino); + ino_rec->ino_startnum = ino; + + if (avl_insert(inode_tree_ptrs[agno], + (avlnode_t *) ino_rec) == NULL) { + do_error("xfs_repair: duplicate inode range\n"); + } + + return(ino_rec); +} + +/* + * pull the indicated inode record out of the inode tree + */ +void +get_inode_rec(xfs_agnumber_t agno, ino_tree_node_t *ino_rec) +{ + ASSERT(inode_tree_ptrs != NULL); + ASSERT(inode_tree_ptrs[agno] != NULL); + + avl_delete(inode_tree_ptrs[agno], &ino_rec->avl_node); + + ino_rec->avl_node.avl_nextino = NULL; + ino_rec->avl_node.avl_forw = NULL; + ino_rec->avl_node.avl_back = NULL; +} + +/* + * free the designated inode record (return it to the free pool) + */ +/* ARGSUSED */ +void +free_inode_rec(xfs_agnumber_t agno, ino_tree_node_t *ino_rec) +{ + free_ino_tree_node(ino_rec); + + return; +} + +/* + * returns the inode record desired containing the inode + * returns NULL if inode doesn't exist. The tree-based find + * routines do NOT pull records out of the tree. + */ +ino_tree_node_t * +find_inode_rec(xfs_agnumber_t agno, xfs_agino_t ino) +{ + return((ino_tree_node_t *) + avl_findrange(inode_tree_ptrs[agno], ino)); +} + +void +find_inode_rec_range(xfs_agnumber_t agno, xfs_agino_t start_ino, + xfs_agino_t end_ino, ino_tree_node_t **first, + ino_tree_node_t **last) +{ + *first = *last = NULL; + + avl_findranges(inode_tree_ptrs[agno], start_ino, + end_ino, (avlnode_t **) first, (avlnode_t **) last); + return; +} + +/* + * if ino doesn't exist, it must be properly aligned -- on a + * filesystem block boundary or XFS_INODES_PER_CHUNK boundary, + * whichever alignment is larger. + */ +ino_tree_node_t * +set_inode_used_alloc(xfs_agnumber_t agno, xfs_agino_t ino) +{ + ino_tree_node_t *ino_rec; + + /* + * check alignment -- the only way to detect this + * is too see if the chunk overlaps another chunk + * already in the tree + */ + ino_rec = add_inode(agno, ino); + + ASSERT(ino_rec != NULL); + ASSERT(ino >= ino_rec->ino_startnum && + ino - ino_rec->ino_startnum < XFS_INODES_PER_CHUNK); + + set_inode_used(ino_rec, ino - ino_rec->ino_startnum); + + return(ino_rec); +} + +ino_tree_node_t * +set_inode_free_alloc(xfs_agnumber_t agno, xfs_agino_t ino) +{ + ino_tree_node_t *ino_rec; + + ino_rec = add_inode(agno, ino); + + ASSERT(ino_rec != NULL); + ASSERT(ino >= ino_rec->ino_startnum && + ino - ino_rec->ino_startnum < XFS_INODES_PER_CHUNK); + + set_inode_free(ino_rec, ino - ino_rec->ino_startnum); + + return(ino_rec); +} + +ino_tree_node_t * +findfirst_inode_rec(xfs_agnumber_t agno) +{ + return((ino_tree_node_t *) inode_tree_ptrs[agno]->avl_firstino); +} + +void +print_inode_list_int(xfs_agnumber_t agno, int uncertain) +{ + ino_tree_node_t *ino_rec; + + if (!uncertain) { + fprintf(stderr, "good inode list is --\n"); + ino_rec = findfirst_inode_rec(agno); + } else { + fprintf(stderr, "uncertain inode list is --\n"); + ino_rec = findfirst_uncertain_inode_rec(agno); + } + + if (ino_rec == NULL) { + fprintf(stderr, "agno %d -- no inodes\n", agno); + return; + } + + printf("agno %d\n", agno); + + while(ino_rec != NULL) { + fprintf(stderr, + "\tptr = %p, start = 0x%x, free = 0x%llx, confirmed = 0x%llx\n", + ino_rec, + ino_rec->ino_startnum, + ino_rec->ir_free, + ino_rec->ino_confirmed); + if (ino_rec->ino_startnum == 0) + ino_rec = ino_rec; + ino_rec = next_ino_rec(ino_rec); + } +} + +void +print_inode_list(xfs_agnumber_t agno) +{ + print_inode_list_int(agno, 0); +} + +void +print_uncertain_inode_list(xfs_agnumber_t agno) +{ + print_inode_list_int(agno, 1); +} + +/* + * set parent -- use a bitmask and a packed array. The bitmask + * indicate which inodes have an entry in the array. An inode that + * is the Nth bit set in the mask is stored in the Nth location in + * the array where N starts at 0. + */ +void +set_inode_parent(ino_tree_node_t *irec, int offset, xfs_ino_t parent) +{ + int i; + int cnt; + int target; + __uint64_t bitmask; + parent_entry_t *tmp; + + ASSERT(full_backptrs == 0); + + if (irec->ino_un.plist == NULL) { + irec->ino_un.plist = + (parent_list_t*)malloc(sizeof(parent_list_t)); + if (!irec->ino_un.plist) + do_error("couldn't malloc parent list table\n"); + + irec->ino_un.plist->pmask = 1LL << offset; + irec->ino_un.plist->pentries = + (xfs_ino_t*)memalign(sizeof(xfs_ino_t), sizeof(xfs_ino_t)); + if (!irec->ino_un.plist->pentries) + do_error("couldn't memalign pentries table\n"); +#ifdef DEBUG + irec->ino_un.plist->cnt = 1; +#endif + irec->ino_un.plist->pentries[0] = parent; + + return; + } + + if (irec->ino_un.plist->pmask & (1LL << offset)) { + bitmask = 1LL; + target = 0; + + for (i = 0; i < offset; i++) { + if (irec->ino_un.plist->pmask & bitmask) + target++; + bitmask <<= 1; + } +#ifdef DEBUG + ASSERT(target < irec->ino_un.plist->cnt); +#endif + irec->ino_un.plist->pentries[target] = parent; + + return; + } + + bitmask = 1LL; + cnt = target = 0; + + for (i = 0; i < XFS_INODES_PER_CHUNK; i++) { + if (irec->ino_un.plist->pmask & bitmask) { + cnt++; + if (i < offset) + target++; + } + + bitmask <<= 1; + } + +#ifdef DEBUG + ASSERT(cnt == irec->ino_un.plist->cnt); +#endif + ASSERT(cnt >= target); + + tmp = (xfs_ino_t*)memalign(sizeof(xfs_ino_t), (cnt + 1) * sizeof(xfs_ino_t)); + if (!tmp) + do_error("couldn't memalign pentries table\n"); + + (void) bcopy(irec->ino_un.plist->pentries, tmp, + target * sizeof(parent_entry_t)); + + if (cnt > target) + (void) bcopy(irec->ino_un.plist->pentries + target, + tmp + target + 1, + (cnt - target) * sizeof(parent_entry_t)); + + free(irec->ino_un.plist->pentries); + + irec->ino_un.plist->pentries = tmp; + +#ifdef DEBUG + irec->ino_un.plist->cnt++; +#endif + irec->ino_un.plist->pentries[target] = parent; + irec->ino_un.plist->pmask |= (1LL << offset); + + return; +} + +#if 0 +/* + * not needed for now since we don't set the parent info + * until phase 4 -- at which point we know that the directory + * inode won't be going away -- so we won't ever need to clear + * directory parent data that we set. + */ +void +clear_inode_parent(ino_tree_node_t *irec, int offset) +{ + ASSERT(full_backptrs == 0); + ASSERT(irec->ino_un.plist != NULL); + + return; +} +#endif + +xfs_ino_t +get_inode_parent(ino_tree_node_t *irec, int offset) +{ + __uint64_t bitmask; + parent_list_t *ptbl; + int i; + int target; + + if (full_backptrs) + ptbl = irec->ino_un.backptrs->parents; + else + ptbl = irec->ino_un.plist; + + if (ptbl->pmask & (1LL << offset)) { + bitmask = 1LL; + target = 0; + + for (i = 0; i < offset; i++) { + if (ptbl->pmask & bitmask) + target++; + bitmask <<= 1; + } +#ifdef DEBUG + ASSERT(target < ptbl->cnt); +#endif + return(ptbl->pentries[target]); + } + + return(0LL); +} + +/* + * code that deals with the inode descriptor appendages -- the back + * pointers, link counts and reached bits for phase 6 and phase 7. + */ + +void +add_inode_reached(ino_tree_node_t *ino_rec, int ino_offset) +{ + ASSERT(ino_rec->ino_un.backptrs != NULL); + + ino_rec->ino_un.backptrs->nlinks[ino_offset]++; + XFS_INO_RCHD_SET_RCHD(ino_rec, ino_offset); + + ASSERT(is_inode_reached(ino_rec, ino_offset)); + + return; +} + +int +is_inode_reached(ino_tree_node_t *ino_rec, int ino_offset) +{ + ASSERT(ino_rec->ino_un.backptrs != NULL); + return(XFS_INO_RCHD_IS_RCHD(ino_rec, ino_offset)); +} + +void +add_inode_ref(ino_tree_node_t *ino_rec, int ino_offset) +{ + ASSERT(ino_rec->ino_un.backptrs != NULL); + + ino_rec->ino_un.backptrs->nlinks[ino_offset]++; + + return; +} + +void +drop_inode_ref(ino_tree_node_t *ino_rec, int ino_offset) +{ + ASSERT(ino_rec->ino_un.backptrs != NULL); + ASSERT(ino_rec->ino_un.backptrs->nlinks[ino_offset] > 0); + + if (--ino_rec->ino_un.backptrs->nlinks[ino_offset] == 0) + XFS_INO_RCHD_CLR_RCHD(ino_rec, ino_offset); + + return; +} + +int +is_inode_referenced(ino_tree_node_t *ino_rec, int ino_offset) +{ + ASSERT(ino_rec->ino_un.backptrs != NULL); + return(ino_rec->ino_un.backptrs->nlinks[ino_offset] > 0); +} + +__uint32_t +num_inode_references(ino_tree_node_t *ino_rec, int ino_offset) +{ + ASSERT(ino_rec->ino_un.backptrs != NULL); + return(ino_rec->ino_un.backptrs->nlinks[ino_offset]); +} + +#if 0 +static backptrs_t *bptrs; +static int bptrs_index; +#define BPTR_ALLOC_NUM 1000 + +backptrs_t * +get_backptr(void) +{ + backptrs_t *bptr; + + if (bptrs_index == BPTR_ALLOC_NUM) { + ASSERT(bptrs == NULL); + + if ((bptrs = malloc(sizeof(backptrs_t[BPTR_ALLOC_NUM]))) + == NULL) { + do_error("couldn't malloc ino rec backptrs.\n"); + } + + bptrs_index = 0; + } + + ASSERT(bptrs != NULL); + + bptr = &bptrs[bptrs_index]; + bptrs_index++; + + if (bptrs_index == BPTR_ALLOC_NUM) + bptrs = NULL; + + bzero(bptr, sizeof(backptrs_t)); + + return(bptr); +} +#endif + +backptrs_t * +get_backptr(void) +{ + backptrs_t *ptr; + + if ((ptr = malloc(sizeof(backptrs_t))) == NULL) + do_error("could not malloc back pointer table\n"); + + bzero(ptr, sizeof(backptrs_t)); + + return(ptr); +} + +void +add_ino_backptrs(xfs_mount_t *mp) +{ +#ifdef XR_BCKPTR_DBG + xfs_ino_t ino; + int j, k; +#endif /* XR_BCKPTR_DBG */ + ino_tree_node_t *ino_rec; + parent_list_t *tmp; + xfs_agnumber_t i; + + for (i = 0; i < mp->m_sb.sb_agcount; i++) { + ino_rec = findfirst_inode_rec(i); + + while (ino_rec != NULL) { + tmp = ino_rec->ino_un.plist; + ino_rec->ino_un.backptrs = get_backptr(); + ino_rec->ino_un.backptrs->parents = tmp; + +#ifdef XR_BCKPTR_DBG + if (tmp != NULL) { + k = 0; + for (j = 0; j < XFS_INODES_PER_CHUNK; j++) { + ino = XFS_AGINO_TO_INO(mp, i, + ino_rec->ino_startnum + j); + if (ino == 25165846) { + do_warn("THERE 1 !!!\n"); + } + if (tmp->pentries[j] != 0) { + k++; + do_warn( + "inode %llu - parent %llu\n", + ino, + tmp->pentries[j]); + if (ino == 25165846) { + do_warn("THERE!!!\n"); + } + } + } + + if (k != tmp->cnt) { + do_warn( + "ERROR - count = %d, counted %d\n", + tmp->cnt, k); + } + } +#endif /* XR_BCKPTR_DBG */ + ino_rec = next_ino_rec(ino_rec); + } + } + + full_backptrs = 1; + + return; +} + +static __psunsigned_t +avl_ino_start(avlnode_t *node) +{ + return((__psunsigned_t) ((ino_tree_node_t *) node)->ino_startnum); +} + +static __psunsigned_t +avl_ino_end(avlnode_t *node) +{ + return((__psunsigned_t) ( + ((ino_tree_node_t *) node)->ino_startnum + + XFS_INODES_PER_CHUNK)); +} + +avlops_t avl_ino_tree_ops = { + avl_ino_start, + avl_ino_end +}; + +void +incore_ino_init(xfs_mount_t *mp) +{ + int i; + int agcount = mp->m_sb.sb_agcount; + + if ((inode_tree_ptrs = malloc(agcount * + sizeof(avltree_desc_t *))) == NULL) + do_error("couldn't malloc inode tree descriptor table\n"); + if ((inode_uncertain_tree_ptrs = malloc(agcount * + sizeof(avltree_desc_t *))) == NULL) + do_error("couldn't malloc uncertain ino tree descriptor table\n"); + + for (i = 0; i < agcount; i++) { + if ((inode_tree_ptrs[i] = + malloc(sizeof(avltree_desc_t))) == NULL) + do_error("couldn't malloc inode tree descriptor\n"); + if ((inode_uncertain_tree_ptrs[i] = + malloc(sizeof(avltree_desc_t))) == NULL) + do_error( + "couldn't malloc uncertain ino tree descriptor\n"); + } + for (i = 0; i < agcount; i++) { + avl_init_tree(inode_tree_ptrs[i], &avl_ino_tree_ops); + avl_init_tree(inode_uncertain_tree_ptrs[i], &avl_ino_tree_ops); + } + + ino_flist.cnt = 0; + ino_flist.list = NULL; + + if ((last_rec = malloc(sizeof(ino_tree_node_t *) * agcount)) == NULL) + do_error("couldn't malloc uncertain inode cache area\n"); + + bzero(last_rec, sizeof(ino_tree_node_t *) * agcount); + + full_backptrs = 0; + + return; +} + +#ifdef XR_INO_REF_DEBUG +void +add_inode_refchecked(xfs_ino_t ino, ino_tree_node_t *ino_rec, int ino_offset) +{ + XFS_INOPROC_SET_PROC((ino_rec), (ino_offset)); + + ASSERT(is_inode_refchecked(ino, ino_rec, ino_offset)); + + return; +} + +int +is_inode_refchecked(xfs_ino_t ino, ino_tree_node_t *ino_rec, int ino_offset) +{ + return(XFS_INOPROC_IS_PROC(ino_rec, ino_offset) == 0LL ? 0 : 1); +} +#endif /* XR_INO_REF_DEBUG */ diff --git a/repair/init.c b/repair/init.c new file mode 100644 index 000000000..cc61650c7 --- /dev/null +++ b/repair/init.c @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "globals.h" +#include "agheader.h" +#include "protos.h" +#include "err_protos.h" + +void +xfs_init(libxfs_init_t *args) +{ + memset(args, 0, sizeof(libxfs_init_t)); + + if (isa_file) { + args->disfile = 1; + args->dname = fs_name; + args->volname = NULL; + } else { + args->disfile = 0; + args->volname = fs_name; + args->dname = NULL; + } + + if (log_spec) { /* External log specified */ + args->logname = log_name; + args->lisfile = (isa_file?1:0); + /* XXX assume data file also means log file */ + /* REVISIT: Need to do fs sanity / log validity checking */ + } + + args->notvolmsg = "you should never get this message - %s"; + args->notvolok = 1; + + if (no_modify) + args->isreadonly = (LIBXFS_ISREADONLY | LIBXFS_ISINACTIVE); + + if (!libxfs_init(args)) + do_error("couldn't initialize XFS library\n"); +} diff --git a/repair/io.c b/repair/io.c new file mode 100644 index 000000000..0b400ceef --- /dev/null +++ b/repair/io.c @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include +#include "globals.h" +#include "agheader.h" +#include "protos.h" +#include "err_protos.h" + +void +io_init(void) +{ + int i; + + /* open up filesystem device */ + + ASSERT(fs_name != NULL && *fs_name != '\0'); + + if ((fs_fd = open (fs_name, O_RDWR)) < 0) { + do_error("couldn't open filesystem \"%s\"\n", + fs_name); + } + + /* initialize i/o buffers */ + + iobuf_size = 1000 * 1024; + smallbuf_size = 4 * 4096; /* enough for an ag */ + + /* + * sbbuf_size must be < XFS_MIN_AG_BLOCKS (64) * smallest block size, + * otherwise you might get an EOF when reading in the sb/agf from + * the last ag if that ag is small + */ + sbbuf_size = 2 * 4096; /* 2 * max sector size */ + + if ((iobuf = malloc(iobuf_size)) == NULL) + do_error("couldn't malloc io buffer\n"); + + if ((smallbuf = malloc(smallbuf_size)) == NULL) + do_error("couldn't malloc secondary io buffer\n"); + + for (i = 0; i < NUM_SBS; i++) { + if ((sb_bufs[i] = malloc(sbbuf_size)) == NULL) + do_error("couldn't malloc sb io buffers\n"); + } +} diff --git a/repair/phase1.c b/repair/phase1.c new file mode 100644 index 000000000..a3bc895a9 --- /dev/null +++ b/repair/phase1.c @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include +#include "globals.h" +#include "agheader.h" +#include "protos.h" +#include "err_protos.h" + +void +no_sb(void) +{ + do_warn("Sorry, could not find valid secondary superblock\n"); + do_warn("Exiting now.\n"); + exit(1); +} + +char * +alloc_ag_buf(int size) +{ + char *bp; + + bp = (char *)memalign(MEM_ALIGN, size); + if (!bp) + do_error("could not allocate ag header buffer (%d bytes)\n", + size); + return(bp); +} + +/* + * this has got to be big enough to hold 4 sectors + */ +#define MAX_SECTSIZE (512 * 1024) + +/* ARGSUSED */ +void +phase1(xfs_mount_t *mp) +{ + xfs_sb_t *sb; + char *ag_bp; + int rval; + + io_init(); + + do_log("Phase 1 - find and verify superblock...\n"); + + primary_sb_modified = 0; + need_root_inode = 0; + need_root_dotdot = 0; + need_rbmino = 0; + need_rsumino = 0; + lost_quotas = 0; + old_orphanage_ino = (xfs_ino_t) 0; + + /* + * get AG 0 into ag header buf + */ + ag_bp = alloc_ag_buf(MAX_SECTSIZE); + sb = (xfs_sb_t *) ag_bp; + + if (get_sb(sb, 0LL, MAX_SECTSIZE, 0) == XR_EOF) { + do_error("error reading primary superblock\n"); + } + + /* + * is this really an sb, verify internal consistency + */ + if ((rval = verify_sb(sb, 1)) != XR_OK) { + do_warn("bad primary superblock - %s !!!\n", + err_string(rval)); + if (!find_secondary_sb(sb)) + no_sb(); + primary_sb_modified = 1; + } else if ((rval = verify_set_primary_sb(sb, 0, + &primary_sb_modified)) != XR_OK) { + do_warn("couldn't verify primary superblock - %s !!!\n", + err_string(rval)); + if (!find_secondary_sb(sb)) + no_sb(); + primary_sb_modified = 1; + } + + if (primary_sb_modified) { + if (!no_modify) { + do_warn("writing modified primary superblock\n"); + write_primary_sb(sb, sb->sb_sectsize); + } else { + do_warn("would write modified primary superblock\n"); + } + } + + /* + * misc. global var initialization + */ + sb_ifree = sb_icount = sb_fdblocks = sb_frextents = 0; + + free(sb); +} diff --git a/repair/phase2.c b/repair/phase2.c new file mode 100644 index 000000000..a906892b1 --- /dev/null +++ b/repair/phase2.c @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "avl.h" +#include "globals.h" +#include "agheader.h" +#include "protos.h" +#include "err_protos.h" +#include "incore.h" + +void set_mp(xfs_mount_t *mpp); +void scan_ag(xfs_agnumber_t agno); + +static void +zero_log(xfs_mount_t *mp, libxfs_init_t *args) +{ + int logdev = (mp->m_sb.sb_logstart == 0) ? args->logdev : args->ddev; + + libxfs_log_clear(logdev, + XFS_FSB_TO_DADDR(mp, mp->m_sb.sb_logstart), + (xfs_extlen_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks), + &mp->m_sb.sb_uuid, + XLOG_FMT); +} + +/* + * ok, at this point, the fs is mounted but the root inode may be + * trashed and the ag headers haven't been checked. So we have + * a valid xfs_mount_t and superblock but that's about it. That + * means we can use macros that use mount/sb fields in calculations + * but I/O or btree routines that depend on space maps or inode maps + * being correct are verboten. + */ + +void +phase2(xfs_mount_t *mp, libxfs_init_t *args) +{ + xfs_agnumber_t i; + xfs_agblock_t b; + int j; + ino_tree_node_t *ino_rec; + + /* now we can start using the buffer cache routines */ + set_mp(mp); + + /* Check whether this fs has internal or external log */ + if (mp->m_sb.sb_logstart == 0) { + if (!args->logname) { + fprintf (stderr, + "This filesystem has an external log. " + "Specify log device with the -l option.\n"); + exit (1); + } + + fprintf (stderr, "Phase 2 - using external log on %s\n", + args->logname); + } else + fprintf (stderr, "Phase 2 - using internal log\n"); + + /* Zero log if applicable */ + if (!no_modify) { + do_log(" - zero log...\n"); + zero_log(mp, args); + } + + do_log(" - scan filesystem freespace and inode maps...\n"); + + /* + * account for space used by ag headers and log if internal + */ + set_bmap_log(mp); + set_bmap_fs(mp); + + bad_ino_btree = 0; + + for (i = 0; i < mp->m_sb.sb_agcount; i++) { + scan_ag(i); +#ifdef XR_INODE_TRACE + print_inode_list(i); +#endif + } + + /* + * make sure we know about the root inode chunk + */ + if ((ino_rec = find_inode_rec(0, mp->m_sb.sb_rootino)) == NULL) { + ASSERT(mp->m_sb.sb_rbmino == mp->m_sb.sb_rootino + 1 && + mp->m_sb.sb_rsumino == mp->m_sb.sb_rootino + 2); + do_warn("root inode chunk not found\n"); + + /* + * mark the first 3 used, the rest are free + */ + ino_rec = set_inode_used_alloc(0, + (xfs_agino_t) mp->m_sb.sb_rootino); + set_inode_used(ino_rec, 1); + set_inode_used(ino_rec, 2); + + for (j = 3; j < XFS_INODES_PER_CHUNK; j++) + set_inode_free(ino_rec, j); + + /* + * also mark blocks + */ + for (b = 0; b < mp->m_ialloc_blks; b++) { + set_agbno_state(mp, 0, + b + XFS_INO_TO_AGBNO(mp, mp->m_sb.sb_rootino), + XR_E_INO); + } + } else { + do_log(" - found root inode chunk\n"); + + /* + * blocks are marked, just make sure they're in use + */ + if (is_inode_free(ino_rec, 0)) { + do_warn("root inode marked free, "); + set_inode_used(ino_rec, 0); + if (!no_modify) + do_warn("correcting\n"); + else + do_warn("would correct\n"); + } + + if (is_inode_free(ino_rec, 1)) { + do_warn("realtime bitmap inode marked free, "); + set_inode_used(ino_rec, 1); + if (!no_modify) + do_warn("correcting\n"); + else + do_warn("would correct\n"); + } + + if (is_inode_free(ino_rec, 2)) { + do_warn("realtime summary inode marked free, "); + set_inode_used(ino_rec, 2); + if (!no_modify) + do_warn("correcting\n"); + else + do_warn("would correct\n"); + } + } +} diff --git a/repair/phase3.c b/repair/phase3.c new file mode 100644 index 000000000..e9f7af53d --- /dev/null +++ b/repair/phase3.c @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "avl.h" +#include "globals.h" +#include "agheader.h" +#include "incore.h" +#include "protos.h" +#include "err_protos.h" +#include "dinode.h" + +/* + * walks an unlinked list, returns 1 on an error (bogus pointer) or + * I/O error + */ +int +walk_unlinked_list(xfs_mount_t *mp, xfs_agnumber_t agno, xfs_agino_t start_ino) +{ + xfs_buf_t *bp; + xfs_dinode_t *dip; + xfs_agino_t current_ino = start_ino; + xfs_agblock_t agbno; + int state; + + while (current_ino != NULLAGINO) { + if (!verify_aginum(mp, agno, current_ino)) + return(1); + if ((bp = get_agino_buf(mp, agno, current_ino, &dip)) == NULL) + return(1); + /* + * if this looks like a decent inode, then continue + * following the unlinked pointers. If not, bail. + */ + if (verify_dinode(mp, dip, agno, current_ino) == 0) { + /* + * check if the unlinked list points to an unknown + * inode. if so, put it on the uncertain inode list + * and set block map appropriately. + */ + if (find_inode_rec(agno, current_ino) == NULL) { + add_aginode_uncertain(agno, current_ino, 1); + agbno = XFS_AGINO_TO_AGBNO(mp, current_ino); + + switch (state = get_agbno_state(mp, + agno, agbno)) { + case XR_E_UNKNOWN: + case XR_E_FREE: + case XR_E_FREE1: + set_agbno_state(mp, agno, agbno, + XR_E_INO); + break; + case XR_E_BAD_STATE: + do_error( + "bad state in block map %d\n", + state); + abort(); + break; + default: + /* + * the block looks like inodes + * so be conservative and try + * to scavenge what's in there. + * if what's there is completely + * bogus, it'll show up later + * and the inode will be trashed + * anyway, hopefully without + * losing too much other data + */ + set_agbno_state(mp, agno, agbno, + XR_E_INO); + break; + } + } + current_ino = dip->di_next_unlinked; + } else { + current_ino = NULLAGINO;; + } + libxfs_putbuf(bp); + } + + return(0); +} + +void +process_agi_unlinked(xfs_mount_t *mp, xfs_agnumber_t agno) +{ + xfs_agnumber_t i; + xfs_buf_t *bp; + xfs_agi_t *agip; + int err = 0; + int agi_dirty = 0; + + bp = libxfs_readbuf(mp->m_dev, XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR), + mp->m_sb.sb_sectsize/BBSIZE, 0); + if (!bp) { + do_error("cannot read agi block %lld for ag %u\n", + XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR), agno); + exit(1); + } + + agip = XFS_BUF_TO_AGI(bp); + + ASSERT(no_modify || INT_GET(agip->agi_seqno, ARCH_CONVERT) == agno); + + for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) { + if (INT_GET(agip->agi_unlinked[i], ARCH_CONVERT) != NULLAGINO) { + err += walk_unlinked_list(mp, agno, + INT_GET(agip->agi_unlinked[i], ARCH_CONVERT)); + /* + * clear the list + */ + if (!no_modify) { + INT_SET(agip->agi_unlinked[i], ARCH_CONVERT, NULLAGINO); + agi_dirty = 1; + } + } + } + + if (err) + do_warn("error following ag %d unlinked list\n", agno); + + ASSERT(agi_dirty == 0 || agi_dirty && !no_modify); + + if (agi_dirty && !no_modify) + libxfs_writebuf(bp, 0); + else + libxfs_putbuf(bp); +} + +void +phase3(xfs_mount_t *mp) +{ + int i, j; + + printf("Phase 3 - for each AG...\n"); + if (!no_modify) + printf(" - scan and clear agi unlinked lists...\n"); + else + printf(" - scan (but don't clear) agi unlinked lists...\n"); + + /* + * first, let's look at the possibly bogus inodes + */ + for (i = 0; i < mp->m_sb.sb_agcount; i++) { + /* + * walk unlinked list to add more potential inodes to list + */ + process_agi_unlinked(mp, i); + check_uncertain_aginodes(mp, i); + } + + /* ok, now that the tree's ok, let's take a good look */ + + printf( + " - process known inodes and perform inode discovery...\n"); + + for (i = 0; i < mp->m_sb.sb_agcount; i++) { + do_log(" - agno = %d\n", i); + /* + * turn on directory processing (inode discovery) and + * attribute processing (extra_attr_check) + */ + process_aginodes(mp, i, 1, 0, 1); + } + + /* + * process newly discovered inode chunks + */ + printf(" - process newly discovered inodes...\n"); + do { + /* + * have to loop until no ag has any uncertain + * inodes + */ + j = 0; + for (i = 0; i < mp->m_sb.sb_agcount; i++) { + j += process_uncertain_aginodes(mp, i); +#ifdef XR_INODE_TRACE + fprintf(stderr, + "\t\t phase 3 - process_uncertain_inodes returns %d\n", j); +#endif + } + } while (j != 0); +} + diff --git a/repair/phase4.c b/repair/phase4.c new file mode 100644 index 000000000..d3e0bd10d --- /dev/null +++ b/repair/phase4.c @@ -0,0 +1,1337 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "avl.h" +#include "globals.h" +#include "agheader.h" +#include "incore.h" +#include "protos.h" +#include "err_protos.h" +#include "dinode.h" +#include "dir.h" +#include "bmap.h" +#include "versions.h" +#include "dir2.h" + + +/* ARGSUSED */ +int +lf_block_delete_orphanage(xfs_mount_t *mp, + xfs_ino_t ino, + xfs_dir_leafblock_t *leaf, + int *dirty, + xfs_buf_t *rootino_bp, + int *rbuf_dirty) +{ + xfs_dir_leaf_entry_t *entry; + xfs_dinode_t *dino; + xfs_buf_t *bp; + ino_tree_node_t *irec; + xfs_ino_t lino; + xfs_dir_leaf_name_t *namest; + xfs_agino_t agino; + xfs_agnumber_t agno; + xfs_agino_t root_agino; + xfs_agnumber_t root_agno; + int i; + int ino_offset; + int ino_dirty; + int use_rbuf; + int len; + char fname[MAXNAMELEN + 1]; + int res; + + entry = &leaf->entries[0]; + *dirty = 0; + use_rbuf = 0; + res = 0; + root_agno = XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino); + root_agino = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino); + + for (i = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT); entry++, i++) { + namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, + INT_GET(entry->nameidx, ARCH_CONVERT)); + XFS_DIR_SF_GET_DIRINO_ARCH(&namest->inumber, &lino, ARCH_CONVERT); + bcopy(namest->name, fname, entry->namelen); + fname[entry->namelen] = '\0'; + + if (fname[0] != '/' && !strcmp(fname, ORPHANAGE)) { + agino = XFS_INO_TO_AGINO(mp, lino); + agno = XFS_INO_TO_AGNO(mp, lino); + + old_orphanage_ino = lino; + + irec = find_inode_rec(agno, agino); + + /* + * if the orphange inode is in the tree, + * get it, clear it, and mark it free. + * the inodes in the orphanage will get + * reattached to the new orphanage. + */ + if (irec != NULL) { + ino_offset = agino - irec->ino_startnum; + + /* + * check if we have to use the root inode + * buffer or read one in ourselves. Note + * that the root inode is always the first + * inode of the chunk that it's in so there + * are two possible cases where lost+found + * might be in the same buffer as the root + * inode. One case is a large block + * filesystem where the two inodes are + * in different inode chunks but wind + * up in the same block (multiple chunks + * per block) and the second case (one or + * more blocks per chunk) is where the two + * inodes are in the same chunk. Note that + * inodes are allocated on disk in units + * of MAX(XFS_INODES_PER_CHUNK,sb_inopblock). + */ + if (XFS_INO_TO_FSB(mp, mp->m_sb.sb_rootino) + == XFS_INO_TO_FSB(mp, lino) || + (agno == root_agno && + agino < root_agino + XFS_INODES_PER_CHUNK)) { + use_rbuf = 1; + bp = rootino_bp; + dino = XFS_MAKE_IPTR(mp, bp, agino - + XFS_INO_TO_AGINO(mp, + mp->m_sb.sb_rootino)); + } else { + len = (int)XFS_FSB_TO_BB(mp, + MAX(1, XFS_INODES_PER_CHUNK/ + inodes_per_block)); + bp = libxfs_readbuf(mp->m_dev, + XFS_AGB_TO_DADDR(mp, agno, + XFS_AGINO_TO_AGBNO(mp, + irec->ino_startnum)), + len, 0); + if (!bp) + do_error("couldn't read %s inode %llu\n", + ORPHANAGE, lino); + + /* + * get the agbno containing the first + * inode in the chunk. In multi-block + * chunks, this gets us the offset + * relative to the beginning of a + * properly aligned buffer. In + * multi-chunk blocks, this gets us + * the correct block number. Then + * turn the block number back into + * an agino and calculate the offset + * from there to feed to make the iptr. + * the last term in effect rounds down + * to the first agino in the buffer. + */ + dino = XFS_MAKE_IPTR(mp, bp, + agino - XFS_OFFBNO_TO_AGINO(mp, + XFS_AGINO_TO_AGBNO(mp, + irec->ino_startnum), + 0)); + } + + do_warn(" - clearing existing \"%s\" inode\n", + ORPHANAGE); + + ino_dirty = clear_dinode(mp, dino, lino); + + if (!use_rbuf) { + ASSERT(ino_dirty == 0 || + ino_dirty && !no_modify); + + if (ino_dirty && !no_modify) + libxfs_writebuf(bp, 0); + else + libxfs_putbuf(bp); + } else { + if (ino_dirty) + *rbuf_dirty = 1; + } + + if (inode_isadir(irec, ino_offset)) + clear_inode_isadir(irec, ino_offset); + + set_inode_free(irec, ino_offset); + } + + /* + * regardless of whether the inode num is good or + * bad, mark the entry to be junked so the + * createname in phase 6 will succeed. + */ + namest->name[0] = '/'; + *dirty = 1; + do_warn(" - marking entry \"%s\" to be deleted\n", fname); + res++; + } + } + + return(res); +} + +int +longform_delete_orphanage(xfs_mount_t *mp, + xfs_ino_t ino, + xfs_dinode_t *dino, + xfs_buf_t *rootino_bp, + int *rbuf_dirty) +{ + xfs_dir_leafblock_t *leaf; + xfs_buf_t *bp; + xfs_dfsbno_t fsbno; + xfs_dablk_t da_bno; + int dirty; + int res; + + da_bno = 0; + *rbuf_dirty = 0; + + if ((fsbno = get_first_dblock_fsbno(mp, ino, dino)) == NULLDFSBNO) { + do_error("couldn't map first leaf block of directory inode %llu\n", ino); + exit(1); + } + + /* + * cycle through the entire directory looking to delete + * every "lost+found" entry. make sure to catch duplicate + * entries. + * + * We could probably speed this up by doing a smarter lookup + * to get us to the first block that contains the hashvalue + * of "lost+found" but what the heck. that would require a + * double lookup for each level. and how big can '/' get??? + * It's probably not worth it. + */ + res = 0; + + do { + ASSERT(fsbno != NULLDFSBNO); + bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno), + XFS_FSB_TO_BB(mp, 1), 0); + if (!bp) { + do_error("can't read block %u (fsbno %llu) for directory inode " + "%llu\n", da_bno, fsbno, ino); + exit(1); + } + + leaf = (xfs_dir_leafblock_t *)XFS_BUF_PTR(bp); + + if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC) { + do_error("bad magic # (0x%x) for directory leaf block " + "(bno %u fsbno %llu)\n", + INT_GET(leaf->hdr.info.magic, ARCH_CONVERT), + da_bno, fsbno); + exit(1); + } + + da_bno = INT_GET(leaf->hdr.info.forw, ARCH_CONVERT); + + res += lf_block_delete_orphanage(mp, ino, leaf, &dirty, + rootino_bp, rbuf_dirty); + + ASSERT(dirty == 0 || dirty && !no_modify); + + if (dirty && !no_modify) + libxfs_writebuf(bp, 0); + else + libxfs_putbuf(bp); + + if (da_bno != 0) + fsbno = get_bmapi(mp, dino, ino, da_bno, XFS_DATA_FORK); + + } while (da_bno != 0); + + return(res); +} + +/* + * returns 1 if a deletion happened, 0 otherwise. + */ +/* ARGSUSED */ +int +shortform_delete_orphanage(xfs_mount_t *mp, + xfs_ino_t ino, + xfs_dinode_t *root_dino, + xfs_buf_t *rootino_bp, + int *ino_dirty) +{ + xfs_dir_shortform_t *sf; + xfs_dinode_t *dino; + xfs_dir_sf_entry_t *sf_entry, *next_sfe, *tmp_sfe; + xfs_buf_t *bp; + xfs_ino_t lino; + xfs_agino_t agino; + xfs_agino_t root_agino; + int max_size; + xfs_agnumber_t agno; + xfs_agnumber_t root_agno; + int ino_dir_size; + ino_tree_node_t *irec; + int ino_offset; + int i; + int dirty; + int tmp_len; + int tmp_elen; + int len; + int use_rbuf; + char fname[MAXNAMELEN + 1]; + int res; + + sf = &root_dino->di_u.di_dirsf; + *ino_dirty = 0; + res = 0; + irec = NULL; + ino_dir_size = INT_GET(root_dino->di_core.di_size, ARCH_CONVERT); + max_size = XFS_DFORK_DSIZE_ARCH(root_dino, mp, ARCH_CONVERT); + use_rbuf = 0; + root_agno = XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino); + root_agino = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino); + + /* + * run through entries looking for "lost+found". + */ + sf_entry = next_sfe = &sf->list[0]; + for (i = 0; i < INT_GET(sf->hdr.count, ARCH_CONVERT) && ino_dir_size > + (__psint_t)next_sfe - (__psint_t)sf; i++) { + tmp_sfe = NULL; + sf_entry = next_sfe; + XFS_DIR_SF_GET_DIRINO_ARCH(&sf_entry->inumber, &lino, ARCH_CONVERT); + bcopy(sf_entry->name, fname, sf_entry->namelen); + fname[sf_entry->namelen] = '\0'; + + if (!strcmp(ORPHANAGE, fname)) { + agno = XFS_INO_TO_AGNO(mp, lino); + agino = XFS_INO_TO_AGINO(mp, lino); + + irec = find_inode_rec(agno, agino); + + /* + * if the orphange inode is in the tree, + * get it, clear it, and mark it free. + * the inodes in the orphanage will get + * reattached to the new orphanage. + */ + if (irec != NULL) { + do_warn(" - clearing existing \"%s\" inode\n", + ORPHANAGE); + + ino_offset = agino - irec->ino_startnum; + + /* + * check if we have to use the root inode + * buffer or read one in ourselves. Note + * that the root inode is always the first + * inode of the chunk that it's in so there + * are two possible cases where lost+found + * might be in the same buffer as the root + * inode. One case is a large block + * filesystem where the two inodes are + * in different inode chunks but wind + * up in the same block (multiple chunks + * per block) and the second case (one or + * more blocks per chunk) is where the two + * inodes are in the same chunk. Note that + * inodes are allocated on disk in units + * of MAX(XFS_INODES_PER_CHUNK,sb_inopblock). + */ + if (XFS_INO_TO_FSB(mp, mp->m_sb.sb_rootino) + == XFS_INO_TO_FSB(mp, lino) || + (agno == root_agno && + agino < root_agino + XFS_INODES_PER_CHUNK)) { + use_rbuf = 1; + bp = rootino_bp; + + dino = XFS_MAKE_IPTR(mp, bp, agino - + XFS_INO_TO_AGINO(mp, + mp->m_sb.sb_rootino)); + } else { + len = (int)XFS_FSB_TO_BB(mp, + MAX(1, XFS_INODES_PER_CHUNK/ + inodes_per_block)); + bp = libxfs_readbuf(mp->m_dev, + XFS_AGB_TO_DADDR(mp, agno, + XFS_AGINO_TO_AGBNO(mp, + irec->ino_startnum)), + len, 0); + if (!bp) + do_error("could not read %s inode " + "%llu\n", ORPHANAGE, lino); + /* + * get the agbno containing the first + * inode in the chunk. In multi-block + * chunks, this gets us the offset + * relative to the beginning of a + * properly aligned buffer. In + * multi-chunk blocks, this gets us + * the correct block number. Then + * turn the block number back into + * an agino and calculate the offset + * from there to feed to make the iptr. + * the last term in effect rounds down + * to the first agino in the buffer. + */ + dino = XFS_MAKE_IPTR(mp, bp, + agino - XFS_OFFBNO_TO_AGINO(mp, + XFS_AGINO_TO_AGBNO(mp, + irec->ino_startnum), + 0)); + } + + dirty = clear_dinode(mp, dino, lino); + + ASSERT(dirty == 0 || dirty && !no_modify); + + /* + * if we read the lost+found inode in to + * it, get rid of it here. if the lost+found + * inode is in the root inode buffer, the + * buffer will be marked dirty anyway since + * the lost+found entry in the root inode is + * also being deleted which makes the root + * inode buffer automatically dirty. + */ + if (!use_rbuf) { + dino = NULL; + if (dirty && !no_modify) + libxfs_writebuf(bp, 0); + else + libxfs_putbuf(bp); + } + + if (inode_isadir(irec, ino_offset)) + clear_inode_isadir(irec, ino_offset); + + set_inode_free(irec, ino_offset); + } + + do_warn(" - deleting existing \"%s\" entry\n", + ORPHANAGE); + + /* + * note -- exactly the same deletion code as in + * process_shortform_dir() + */ + tmp_elen = XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry); + INT_MOD(root_dino->di_core.di_size, ARCH_CONVERT, -(tmp_elen)); + + tmp_sfe = (xfs_dir_sf_entry_t *) + ((__psint_t) sf_entry + tmp_elen); + tmp_len = max_size - ((__psint_t) tmp_sfe + - (__psint_t) sf); + + memmove(sf_entry, tmp_sfe, tmp_len); + + INT_MOD(sf->hdr.count, ARCH_CONVERT, -1); + + bzero((void *) ((__psint_t) sf_entry + tmp_len), + tmp_elen); + + /* + * set the tmp value to the current + * pointer so we'll process the entry + * we just moved up + */ + tmp_sfe = sf_entry; + + /* + * WARNING: drop the index i by one + * so it matches the decremented count for + * accurate comparisons in the loop test. + * mark root inode as dirty to make deletion + * permanent. + */ + i--; + + *ino_dirty = 1; + res++; + + } + next_sfe = (tmp_sfe == NULL) + ? (xfs_dir_sf_entry_t *) ((__psint_t) sf_entry + + XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry)) + : tmp_sfe; + } + + return(res); +} + +/* ARGSUSED */ +int +lf2_block_delete_orphanage(xfs_mount_t *mp, + xfs_ino_t ino, + xfs_dir2_data_t *data, + int *dirty, + xfs_buf_t *rootino_bp, + int *rbuf_dirty) +{ + xfs_dinode_t *dino; + xfs_buf_t *bp; + ino_tree_node_t *irec; + xfs_ino_t lino; + xfs_agino_t agino; + xfs_agnumber_t agno; + xfs_agino_t root_agino; + xfs_agnumber_t root_agno; + int ino_offset; + int ino_dirty; + int use_rbuf; + int len; + char fname[MAXNAMELEN + 1]; + int res; + char *ptr; + char *endptr; + xfs_dir2_block_tail_t *btp; + xfs_dir2_data_entry_t *dep; + xfs_dir2_data_unused_t *dup; + + ptr = (char *)data->u; + if (INT_GET(data->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC) { + btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)data); + endptr = (char *)XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT); + } else + endptr = (char *)data + mp->m_dirblksize; + *dirty = 0; + use_rbuf = 0; + res = 0; + root_agno = XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino); + root_agino = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino); + + while (ptr < endptr) { + dup = (xfs_dir2_data_unused_t *)ptr; + if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) { + if (ptr + INT_GET(dup->length, ARCH_CONVERT) > endptr || + INT_GET(dup->length, ARCH_CONVERT) == 0 || + (INT_GET(dup->length, ARCH_CONVERT) & + (XFS_DIR2_DATA_ALIGN - 1))) + break; + ptr += INT_GET(dup->length, ARCH_CONVERT); + continue; + } + dep = (xfs_dir2_data_entry_t *)ptr; + lino = INT_GET(dep->inumber, ARCH_CONVERT); + bcopy(dep->name, fname, dep->namelen); + fname[dep->namelen] = '\0'; + + if (fname[0] != '/' && !strcmp(fname, ORPHANAGE)) { + agino = XFS_INO_TO_AGINO(mp, lino); + agno = XFS_INO_TO_AGNO(mp, lino); + + old_orphanage_ino = lino; + + irec = find_inode_rec(agno, agino); + + /* + * if the orphange inode is in the tree, + * get it, clear it, and mark it free. + * the inodes in the orphanage will get + * reattached to the new orphanage. + */ + if (irec != NULL) { + ino_offset = agino - irec->ino_startnum; + + /* + * check if we have to use the root inode + * buffer or read one in ourselves. Note + * that the root inode is always the first + * inode of the chunk that it's in so there + * are two possible cases where lost+found + * might be in the same buffer as the root + * inode. One case is a large block + * filesystem where the two inodes are + * in different inode chunks but wind + * up in the same block (multiple chunks + * per block) and the second case (one or + * more blocks per chunk) is where the two + * inodes are in the same chunk. Note that + * inodes are allocated on disk in units + * of MAX(XFS_INODES_PER_CHUNK,sb_inopblock). + */ + if (XFS_INO_TO_FSB(mp, mp->m_sb.sb_rootino) + == XFS_INO_TO_FSB(mp, lino) || + (agno == root_agno && + agino < root_agino + XFS_INODES_PER_CHUNK)) { + use_rbuf = 1; + bp = rootino_bp; + dino = XFS_MAKE_IPTR(mp, bp, agino - + XFS_INO_TO_AGINO(mp, + mp->m_sb.sb_rootino)); + } else { + len = (int)XFS_FSB_TO_BB(mp, + MAX(1, XFS_INODES_PER_CHUNK/ + inodes_per_block)); + bp = libxfs_readbuf(mp->m_dev, + XFS_AGB_TO_DADDR(mp, agno, + XFS_AGINO_TO_AGBNO(mp, + irec->ino_startnum)), + len, 0); + if (!bp) + do_error("couldn't read %s inode %llu\n", + ORPHANAGE, lino); + + /* + * get the agbno containing the first + * inode in the chunk. In multi-block + * chunks, this gets us the offset + * relative to the beginning of a + * properly aligned buffer. In + * multi-chunk blocks, this gets us + * the correct block number. Then + * turn the block number back into + * an agino and calculate the offset + * from there to feed to make the iptr. + * the last term in effect rounds down + * to the first agino in the buffer. + */ + dino = XFS_MAKE_IPTR(mp, bp, + agino - XFS_OFFBNO_TO_AGINO(mp, + XFS_AGINO_TO_AGBNO(mp, + irec->ino_startnum), + 0)); + } + + do_warn(" - clearing existing \"%s\" inode\n", + ORPHANAGE); + + ino_dirty = clear_dinode(mp, dino, lino); + + if (!use_rbuf) { + ASSERT(ino_dirty == 0 || + ino_dirty && !no_modify); + + if (ino_dirty && !no_modify) + libxfs_writebuf(bp, 0); + else + libxfs_putbuf(bp); + } else { + if (ino_dirty) + *rbuf_dirty = 1; + } + + if (inode_isadir(irec, ino_offset)) + clear_inode_isadir(irec, ino_offset); + + set_inode_free(irec, ino_offset); + + } + + /* + * regardless of whether the inode num is good or + * bad, mark the entry to be junked so the + * createname in phase 6 will succeed. + */ + dep->name[0] = '/'; + *dirty = 1; + do_warn( + " - marking entry \"%s\" to be deleted\n", + fname); + res++; + } + ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen); + } + + return(res); +} + +int +longform2_delete_orphanage(xfs_mount_t *mp, + xfs_ino_t ino, + xfs_dinode_t *dino, + xfs_buf_t *rootino_bp, + int *rbuf_dirty) +{ + xfs_dir2_data_t *data; + xfs_dabuf_t *bp; + xfs_dfsbno_t fsbno; + xfs_dablk_t da_bno; + int dirty; + int res; + bmap_ext_t *bmp; + int i; + + da_bno = 0; + *rbuf_dirty = 0; + fsbno = NULLDFSBNO; + bmp = malloc(mp->m_dirblkfsbs * sizeof(*bmp)); + if (!bmp) { + do_error( + "malloc failed (%u bytes) in longform2_delete_orphanage, ino %llu\n", + mp->m_dirblkfsbs * sizeof(*bmp), ino); + exit(1); + } + + /* + * cycle through the entire directory looking to delete + * every "lost+found" entry. make sure to catch duplicate + * entries. + * + * We could probably speed this up by doing a smarter lookup + * to get us to the first block that contains the hashvalue + * of "lost+found" but what the heck. that would require a + * double lookup for each level. and how big can '/' get??? + * It's probably not worth it. + */ + res = 0; + + for (da_bno = 0; + da_bno < XFS_B_TO_FSB(mp, INT_GET(dino->di_core.di_size, ARCH_CONVERT)); + da_bno += mp->m_dirblkfsbs) { + for (i = 0; i < mp->m_dirblkfsbs; i++) { + fsbno = get_bmapi(mp, dino, ino, da_bno + i, + XFS_DATA_FORK); + if (fsbno == NULLDFSBNO) + break; + bmp[i].startoff = da_bno + i; + bmp[i].startblock = fsbno; + bmp[i].blockcount = 1; + bmp[i].flag = 0; + } + if (fsbno == NULLDFSBNO) + continue; + bp = da_read_buf(mp, mp->m_dirblkfsbs, bmp); + if (bp == NULL) { + do_error( + "can't read block %u (fsbno %llu) for directory inode %llu\n", + da_bno, bmp[0].startblock, ino); + exit(1); + } + + data = (xfs_dir2_data_t *)bp->data; + + if (INT_GET(data->hdr.magic, ARCH_CONVERT) != XFS_DIR2_DATA_MAGIC && + INT_GET(data->hdr.magic, ARCH_CONVERT) != XFS_DIR2_BLOCK_MAGIC) { + do_error( + "bad magic # (0x%x) for directory data block (bno %u fsbno %llu)\n", + INT_GET(data->hdr.magic, ARCH_CONVERT), da_bno, bmp[0].startblock); + exit(1); + } + + res += lf2_block_delete_orphanage(mp, ino, data, &dirty, + rootino_bp, rbuf_dirty); + + ASSERT(dirty == 0 || dirty && !no_modify); + + if (dirty && !no_modify) + da_bwrite(mp, bp); + else + da_brelse(bp); + } + free(bmp); + + return(res); +} + +/* + * returns 1 if a deletion happened, 0 otherwise. + */ +/* ARGSUSED */ +int +shortform2_delete_orphanage(xfs_mount_t *mp, + xfs_ino_t ino, + xfs_dinode_t *root_dino, + xfs_buf_t *rootino_bp, + int *ino_dirty) +{ + xfs_dir2_sf_t *sf; + xfs_dinode_t *dino; + xfs_dir2_sf_entry_t *sf_entry, *next_sfe, *tmp_sfe; + xfs_buf_t *bp; + xfs_ino_t lino; + xfs_agino_t agino; + xfs_agino_t root_agino; + int max_size; + xfs_agnumber_t agno; + xfs_agnumber_t root_agno; + int ino_dir_size; + ino_tree_node_t *irec; + int ino_offset; + int i; + int dirty; + int tmp_len; + int tmp_elen; + int len; + int use_rbuf; + char fname[MAXNAMELEN + 1]; + int res; + + sf = &root_dino->di_u.di_dir2sf; + *ino_dirty = 0; + irec = NULL; + ino_dir_size = INT_GET(root_dino->di_core.di_size, ARCH_CONVERT); + max_size = XFS_DFORK_DSIZE_ARCH(root_dino, mp, ARCH_CONVERT); + use_rbuf = 0; + res = 0; + root_agno = XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino); + root_agino = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino); + + /* + * run through entries looking for "lost+found". + */ + sf_entry = next_sfe = XFS_DIR2_SF_FIRSTENTRY(sf); + for (i = 0; i < INT_GET(sf->hdr.count, ARCH_CONVERT) && ino_dir_size > + (__psint_t)next_sfe - (__psint_t)sf; i++) { + tmp_sfe = NULL; + sf_entry = next_sfe; + lino = XFS_DIR2_SF_GET_INUMBER_ARCH(sf, + XFS_DIR2_SF_INUMBERP(sf_entry), ARCH_CONVERT); + bcopy(sf_entry->name, fname, sf_entry->namelen); + fname[sf_entry->namelen] = '\0'; + + if (!strcmp(ORPHANAGE, fname)) { + agno = XFS_INO_TO_AGNO(mp, lino); + agino = XFS_INO_TO_AGINO(mp, lino); + + irec = find_inode_rec(agno, agino); + + /* + * if the orphange inode is in the tree, + * get it, clear it, and mark it free. + * the inodes in the orphanage will get + * reattached to the new orphanage. + */ + if (irec != NULL) { + do_warn(" - clearing existing \"%s\" inode\n", + ORPHANAGE); + + ino_offset = agino - irec->ino_startnum; + + /* + * check if we have to use the root inode + * buffer or read one in ourselves. Note + * that the root inode is always the first + * inode of the chunk that it's in so there + * are two possible cases where lost+found + * might be in the same buffer as the root + * inode. One case is a large block + * filesystem where the two inodes are + * in different inode chunks but wind + * up in the same block (multiple chunks + * per block) and the second case (one or + * more blocks per chunk) is where the two + * inodes are in the same chunk. Note that + * inodes are allocated on disk in units + * of MAX(XFS_INODES_PER_CHUNK,sb_inopblock). + */ + if (XFS_INO_TO_FSB(mp, mp->m_sb.sb_rootino) + == XFS_INO_TO_FSB(mp, lino) || + (agno == root_agno && + agino < root_agino + XFS_INODES_PER_CHUNK)) { + use_rbuf = 1; + bp = rootino_bp; + + dino = XFS_MAKE_IPTR(mp, bp, agino - + XFS_INO_TO_AGINO(mp, + mp->m_sb.sb_rootino)); + } else { + len = (int)XFS_FSB_TO_BB(mp, + MAX(1, XFS_INODES_PER_CHUNK/ + inodes_per_block)); + bp = libxfs_readbuf(mp->m_dev, + XFS_AGB_TO_DADDR(mp, agno, + XFS_AGINO_TO_AGBNO(mp, + irec->ino_startnum)), + len, 0); + if (!bp) + do_error("could not read %s inode " + "%llu\n", ORPHANAGE, lino); + /* + * get the agbno containing the first + * inode in the chunk. In multi-block + * chunks, this gets us the offset + * relative to the beginning of a + * properly aligned buffer. In + * multi-chunk blocks, this gets us + * the correct block number. Then + * turn the block number back into + * an agino and calculate the offset + * from there to feed to make the iptr. + * the last term in effect rounds down + * to the first agino in the buffer. + */ + dino = XFS_MAKE_IPTR(mp, bp, + agino - XFS_OFFBNO_TO_AGINO(mp, + XFS_AGINO_TO_AGBNO(mp, + irec->ino_startnum), + 0)); + } + + dirty = clear_dinode(mp, dino, lino); + + ASSERT(dirty == 0 || dirty && !no_modify); + + /* + * if we read the lost+found inode in to + * it, get rid of it here. if the lost+found + * inode is in the root inode buffer, the + * buffer will be marked dirty anyway since + * the lost+found entry in the root inode is + * also being deleted which makes the root + * inode buffer automatically dirty. + */ + if (!use_rbuf) { + dino = NULL; + if (dirty && !no_modify) + libxfs_writebuf(bp, 0); + else + libxfs_putbuf(bp); + } + + + if (inode_isadir(irec, ino_offset)) + clear_inode_isadir(irec, ino_offset); + + set_inode_free(irec, ino_offset); + } + + do_warn(" - deleting existing \"%s\" entry\n", + ORPHANAGE); + + /* + * note -- exactly the same deletion code as in + * process_shortform_dir() + */ + tmp_elen = XFS_DIR2_SF_ENTSIZE_BYENTRY(sf, sf_entry); + INT_MOD(root_dino->di_core.di_size, ARCH_CONVERT, -(tmp_elen)); + + tmp_sfe = (xfs_dir2_sf_entry_t *) + ((__psint_t) sf_entry + tmp_elen); + tmp_len = max_size - ((__psint_t) tmp_sfe + - (__psint_t) sf); + + memmove(sf_entry, tmp_sfe, tmp_len); + + INT_MOD(sf->hdr.count, ARCH_CONVERT, -1); + if (lino > XFS_DIR2_MAX_SHORT_INUM) + sf->hdr.i8count--; + + bzero((void *) ((__psint_t) sf_entry + tmp_len), + tmp_elen); + + /* + * set the tmp value to the current + * pointer so we'll process the entry + * we just moved up + */ + tmp_sfe = sf_entry; + + /* + * WARNING: drop the index i by one + * so it matches the decremented count for + * accurate comparisons in the loop test. + * mark root inode as dirty to make deletion + * permanent. + */ + i--; + + *ino_dirty = 1; + + res++; + } + next_sfe = (tmp_sfe == NULL) + ? (xfs_dir2_sf_entry_t *) ((__psint_t) sf_entry + + XFS_DIR2_SF_ENTSIZE_BYENTRY(sf, sf_entry)) + : tmp_sfe; + } + + return(res); +} + +void +delete_orphanage(xfs_mount_t *mp) +{ + xfs_ino_t ino; + xfs_dinode_t *dino; + xfs_buf_t *dbp; + int dirty, res, len; + + ASSERT(!no_modify); + + dbp = NULL; + dirty = res = 0; + ino = mp->m_sb.sb_rootino; + + /* + * we know the root is in use or we wouldn't be here + */ + len = (int)XFS_FSB_TO_BB(mp, + MAX(1, XFS_INODES_PER_CHUNK/inodes_per_block)); + dbp = libxfs_readbuf(mp->m_dev, + XFS_FSB_TO_DADDR(mp, XFS_INO_TO_FSB(mp, ino)), len, 0); + if (!dbp) { + do_error("could not read buffer for root inode %llu " + "(daddr %lld, size %d)\n", ino, + XFS_FSB_TO_DADDR(mp, XFS_INO_TO_FSB(mp, ino)), + XFS_FSB_TO_BB(mp, 1)); + } + + /* + * we also know that the root inode is always the first inode + * allocated in the system, therefore it'll be at the beginning + * of the root inode chunk + */ + dino = XFS_MAKE_IPTR(mp, dbp, 0); + + switch (dino->di_core.di_format) { + case XFS_DINODE_FMT_EXTENTS: + case XFS_DINODE_FMT_BTREE: + if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb)) + res = longform2_delete_orphanage(mp, ino, dino, dbp, + &dirty); + else + res = longform_delete_orphanage(mp, ino, dino, dbp, + &dirty); + break; + case XFS_DINODE_FMT_LOCAL: + if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb)) + res = shortform2_delete_orphanage(mp, ino, dino, dbp, + &dirty); + else + res = shortform_delete_orphanage(mp, ino, dino, dbp, + &dirty); + ASSERT(res == 0 && dirty == 0 || res == 1 && dirty == 1); + break; + default: + break; + } + + if (res) { + switch (dino->di_core.di_version) { + case XFS_DINODE_VERSION_1: + INT_MOD(dino->di_core.di_onlink, ARCH_CONVERT, -1); + INT_SET(dino->di_core.di_nlink, ARCH_CONVERT, + INT_GET(dino->di_core.di_onlink, ARCH_CONVERT)); + break; + case XFS_DINODE_VERSION_2: + INT_MOD(dino->di_core.di_nlink, ARCH_CONVERT, -1); + break; + default: + do_error("unknown version #%d in root inode\n", + dino->di_core.di_version); + } + + dirty = 1; + } + + if (dirty) + libxfs_writebuf(dbp, 0); + else + libxfs_putbuf(dbp); +} + +/* + * null out quota inode fields in sb if they point to non-existent inodes. + * this isn't as redundant as it looks since it's possible that the sb field + * might be set but the imap and inode(s) agree that the inode is + * free in which case they'd never be cleared so the fields wouldn't + * be cleared by process_dinode(). + */ +void +quotino_check(xfs_mount_t *mp) +{ + ino_tree_node_t *irec; + + if (mp->m_sb.sb_uquotino != NULLFSINO && mp->m_sb.sb_uquotino != 0) { + irec = find_inode_rec(XFS_INO_TO_AGNO(mp, mp->m_sb.sb_uquotino), + XFS_INO_TO_AGINO(mp, mp->m_sb.sb_uquotino)); + + if (irec == NULL || is_inode_free(irec, + mp->m_sb.sb_uquotino - irec->ino_startnum)) { + mp->m_sb.sb_uquotino = NULLFSINO; + lost_uquotino = 1; + } else + lost_uquotino = 0; + } + + if (mp->m_sb.sb_pquotino != NULLFSINO && mp->m_sb.sb_pquotino != 0) { + irec = find_inode_rec(XFS_INO_TO_AGNO(mp, mp->m_sb.sb_pquotino), + XFS_INO_TO_AGINO(mp, mp->m_sb.sb_pquotino)); + + if (irec == NULL || is_inode_free(irec, + mp->m_sb.sb_pquotino - irec->ino_startnum)) { + mp->m_sb.sb_pquotino = NULLFSINO; + lost_pquotino = 1; + } else + lost_pquotino = 0; + } +} + +void +quota_sb_check(xfs_mount_t *mp) +{ + /* + * if the sb says we have quotas and we lost both, + * signal a superblock downgrade. that will cause + * the quota flags to get zeroed. (if we only lost + * one quota inode, do nothing and complain later.) + * + * if the sb says we have quotas but we didn't start out + * with any quota inodes, signal a superblock downgrade. + * + * The sb downgrades are so that older systems can mount + * the filesystem. + * + * if the sb says we don't have quotas but it looks like + * we do have quota inodes, then signal a superblock upgrade. + * + * if the sb says we don't have quotas and we have no + * quota inodes, then leave will enough alone. + */ + + if (fs_quotas && + (mp->m_sb.sb_uquotino == NULLFSINO || mp->m_sb.sb_uquotino == 0) && + (mp->m_sb.sb_pquotino == NULLFSINO || mp->m_sb.sb_pquotino == 0)) { + lost_quotas = 1; + fs_quotas = 0; + } else if (!verify_inum(mp, mp->m_sb.sb_uquotino) && + !verify_inum(mp, mp->m_sb.sb_uquotino)) { + fs_quotas = 1; + } +} + + +void +phase4(xfs_mount_t *mp) +{ + ino_tree_node_t *irec; + xfs_drtbno_t bno; + xfs_drtbno_t rt_start; + xfs_extlen_t rt_len; + xfs_agnumber_t i; + xfs_agblock_t j; + xfs_agblock_t ag_end; + xfs_agblock_t extent_start; + xfs_extlen_t extent_len; + int ag_hdr_len = 4 * mp->m_sb.sb_sectsize; + int ag_hdr_block; + int bstate; + int count_bcnt_extents(xfs_agnumber_t agno); + int count_bno_extents(xfs_agnumber_t agno); + + ag_hdr_block = howmany(ag_hdr_len, mp->m_sb.sb_blocksize); + + printf("Phase 4 - check for duplicate blocks...\n"); + printf(" - setting up duplicate extent list...\n"); + + irec = find_inode_rec(XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino), + XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino)); + + /* + * we always have a root inode, even if it's free... + * if the root is free, forget it, lost+found is already gone + */ + if (is_inode_free(irec, 0) || !inode_isadir(irec, 0)) { + need_root_inode = 1; + if (no_modify) + do_warn("root inode would be lost\n"); + else + do_warn("root inode lost\n"); + } + + /* + * have to delete lost+found first so that blocks used + * by lost+found don't show up as used + */ + if (!no_modify) { + printf(" - clear lost+found (if it exists) ...\n"); + if (!need_root_inode) + delete_orphanage(mp); + } + + for (i = 0; i < mp->m_sb.sb_agcount; i++) { + ag_end = (i < mp->m_sb.sb_agcount - 1) ? mp->m_sb.sb_agblocks : + mp->m_sb.sb_dblocks - + (xfs_drfsbno_t) mp->m_sb.sb_agblocks * i; + extent_start = extent_len = 0; + /* + * set up duplicate extent list for this ag + */ + for (j = ag_hdr_block; j < ag_end; j++) { + + bstate = get_agbno_state(mp, i, j); + + switch (bstate) { + case XR_E_BAD_STATE: + default: + do_warn("unknown block state, ag %d, \ +block %d\n", + i, j); + /* fall through .. */ + case XR_E_UNKNOWN: + case XR_E_FREE1: + case XR_E_FREE: + case XR_E_INUSE: + case XR_E_INUSE_FS: + case XR_E_INO: + case XR_E_FS_MAP: + if (extent_start == 0) + continue; + else { + /* + * add extent and reset extent state + */ + add_dup_extent(i, extent_start, + extent_len); + extent_start = 0; + extent_len = 0; + } + break; + case XR_E_MULT: + if (extent_start == 0) { + extent_start = j; + extent_len = 1; + } else if (extent_len == MAXEXTLEN) { + add_dup_extent(i, extent_start, + extent_len); + extent_start = j; + extent_len = 1; + } else + extent_len++; + break; + } + } + /* + * catch tail-case, extent hitting the end of the ag + */ + if (extent_start != 0) + add_dup_extent(i, extent_start, extent_len); + } + + /* + * initialize realtime bitmap + */ + rt_start = 0; + rt_len = 0; + + for (bno = 0; bno < mp->m_sb.sb_rextents; bno++) { + + bstate = get_rtbno_state(mp, bno); + + switch (bstate) { + case XR_E_BAD_STATE: + default: + do_warn("unknown rt extent state, extent %llu\n", bno); + /* fall through .. */ + case XR_E_UNKNOWN: + case XR_E_FREE1: + case XR_E_FREE: + case XR_E_INUSE: + case XR_E_INUSE_FS: + case XR_E_INO: + case XR_E_FS_MAP: + if (rt_start == 0) + continue; + else { + /* + * add extent and reset extent state + */ + add_rt_dup_extent(rt_start, rt_len); + rt_start = 0; + rt_len = 0; + } + break; + case XR_E_MULT: + if (rt_start == 0) { + rt_start = bno; + rt_len = 1; + } else if (rt_len == MAXEXTLEN) { + /* + * large extent case + */ + add_rt_dup_extent(rt_start, rt_len); + rt_start = bno; + rt_len = 1; + } else + rt_len++; + break; + } + } + + /* + * catch tail-case, extent hitting the end of the ag + */ + if (rt_start != 0) + add_rt_dup_extent(rt_start, rt_len); + + /* + * initialize bitmaps for all AGs + */ + for (i = 0; i < mp->m_sb.sb_agcount; i++) { + ag_end = (i < mp->m_sb.sb_agcount - 1) ? mp->m_sb.sb_agblocks : + mp->m_sb.sb_dblocks - + (xfs_drfsbno_t) mp->m_sb.sb_agblocks * i; + /* + * now reset the bitmap for all ags + */ + bzero(ba_bmap[i], roundup(mp->m_sb.sb_agblocks*(NBBY/XR_BB), + sizeof(__uint64_t))); + for (j = 0; j < ag_hdr_block; j++) + set_agbno_state(mp, i, j, XR_E_INUSE_FS); + } + set_bmap_rt(mp->m_sb.sb_rextents); + set_bmap_log(mp); + set_bmap_fs(mp); + + printf(" - check for inodes claiming duplicate blocks...\n"); + for (i = 0; i < mp->m_sb.sb_agcount; i++) { + /* + * ok, now process the inodes -- signal 2-pass check per inode. + * first pass checks if the inode conflicts with a known + * duplicate extent. if so, the inode is cleared and second + * pass is skipped. second pass sets the block bitmap + * for all blocks claimed by the inode. directory + * and attribute processing is turned OFF since we did that + * already in phase 3. + */ + do_log(" - agno = %d\n", i); + process_aginodes(mp, i, 0, 1, 0); + + /* + * now recycle the per-AG duplicate extent records + */ + release_dup_extent_tree(i); + } + + /* + * free up memory used to track trealtime duplicate extents + */ + if (rt_start != 0) + free_rt_dup_extent_tree(mp); + + /* + * ensure consistency of quota inode pointers in superblock, + * make sure they point to real inodes + */ + quotino_check(mp); + quota_sb_check(mp); +} diff --git a/repair/phase5.c b/repair/phase5.c new file mode 100644 index 000000000..2e306bd25 --- /dev/null +++ b/repair/phase5.c @@ -0,0 +1,1633 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "avl.h" +#include "globals.h" +#include "agheader.h" +#include "incore.h" +#include "protos.h" +#include "err_protos.h" +#include "dinode.h" +#include "rt.h" +#include "versions.h" + +/* + * we maintain the current slice (path from root to leaf) + * of the btree incore. when we need a new block, we ask + * the block allocator for the address of a block on that + * level, map the block in, and set up the appropriate + * pointers (child, silbing, etc.) and keys that should + * point to the new block. + */ +typedef struct bt_stat_level { + /* + * set in setup_cursor routine and maintained in the tree-building + * routines + */ + xfs_buf_t *buf_p; /* 2 buffer pointers to ... */ + xfs_buf_t *prev_buf_p; + xfs_agblock_t agbno; /* current block being filled */ + xfs_agblock_t prev_agbno; /* previous block */ + /* + * set in calculate/init cursor routines for each btree level + */ + int num_recs_tot; /* # tree recs in level */ + int num_blocks; /* # tree blocks in level */ + int num_recs_pb; /* num_recs_tot / num_blocks */ + int modulo; /* num_recs_tot % num_blocks */ +} bt_stat_level_t; + +typedef struct bt_status { + int init; /* cursor set up once? */ + int num_levels; /* # of levels in btree */ + xfs_extlen_t num_tot_blocks; /* # blocks alloc'ed for tree */ + xfs_extlen_t num_free_blocks;/* # blocks currently unused */ + + xfs_agblock_t root; /* root block */ + /* + * list of blocks to be used to set up this tree + * and pointer to the first unused block on the list + */ + xfs_agblock_t *btree_blocks; /* block list */ + xfs_agblock_t *free_btree_blocks; /* first unused block */ + /* + * per-level status info + */ + bt_stat_level_t level[XFS_BTREE_MAXLEVELS]; +} bt_status_t; + + +int +mk_incore_fstree(xfs_mount_t *mp, xfs_agnumber_t agno) +{ + int in_extent; + int num_extents; + xfs_agblock_t extent_start; + xfs_extlen_t extent_len; + xfs_agblock_t agbno; + xfs_agblock_t ag_end; + uint free_blocks; +#ifdef XR_BLD_FREE_TRACE + int old_state; + int state = XR_E_BAD_STATE; +#endif + + /* + * scan the bitmap for the ag looking for continuous + * extents of free blocks. At this point, we know + * that blocks in the bitmap are either set to an + * "in use" state or set to unknown (0) since the + * bmaps were bzero'ed in phase 4 and only blocks + * being used by inodes, inode bmaps, ag headers, + * and the files themselves were put into the bitmap. + * + */ + ASSERT(agno < mp->m_sb.sb_agcount); + + extent_start = extent_len = 0; + in_extent = 0; + num_extents = free_blocks = 0; + + if (agno < mp->m_sb.sb_agcount - 1) + ag_end = mp->m_sb.sb_agblocks; + else + ag_end = mp->m_sb.sb_dblocks - + mp->m_sb.sb_agblocks * (mp->m_sb.sb_agcount - 1); + + /* + * ok, now find the number of extents, keep track of the + * largest extent. + */ + for (agbno = 0; agbno < ag_end; agbno++) { +#if 0 + old_state = state; + state = get_agbno_state(mp, agno, agbno); + if (state != old_state) { + fprintf(stderr, "agbno %u - new state is %d\n", + agbno, state); + } +#endif + if (get_agbno_state(mp, agno, agbno) < XR_E_INUSE) { + free_blocks++; + if (in_extent == 0) { + /* + * found the start of a free extent + */ + in_extent = 1; + num_extents++; + extent_start = agbno; + extent_len = 1; + } else { + extent_len++; + } + } else { + if (in_extent) { + /* + * free extent ends here, add extent to the + * 2 incore extent (avl-to-be-B+) trees + */ + in_extent = 0; +#if defined(XR_BLD_FREE_TRACE) && defined(XR_BLD_ADD_EXTENT) + fprintf(stderr, "adding extent %u [%u %u]\n", + agno, extent_start, extent_len); +#endif + add_bno_extent(agno, extent_start, extent_len); + add_bcnt_extent(agno, extent_start, extent_len); + } + } + } + if (in_extent) { + /* + * free extent ends here + */ + in_extent = 0; +#if defined(XR_BLD_FREE_TRACE) && defined(XR_BLD_ADD_EXTENT) + fprintf(stderr, "adding extent %u [%u %u]\n", + agno, extent_start, extent_len); +#endif + add_bno_extent(agno, extent_start, extent_len); + add_bcnt_extent(agno, extent_start, extent_len); + } + + return(num_extents); +} + +/* ARGSUSED */ +xfs_agblock_t +get_next_blockaddr(xfs_agnumber_t agno, int level, bt_status_t *curs) +{ + ASSERT(curs->free_btree_blocks < curs->btree_blocks + + curs->num_tot_blocks); + ASSERT(curs->num_free_blocks > 0); + + curs->num_free_blocks--; + return(*curs->free_btree_blocks++); +} + +/* + * set up the dynamically allocated block allocation data in the btree + * cursor that depends on the info in the static portion of the cursor. + * allocates space from the incore bno/bcnt extent trees and sets up + * the first path up the left side of the tree. Also sets up the + * cursor pointer to the btree root. called by init_freespace_cursor() + * and init_ino_cursor() + */ +/* ARGSUSED */ +void +setup_cursor(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *curs) +{ + int j; + unsigned int u; + xfs_extlen_t big_extent_len; + xfs_agblock_t big_extent_start; + extent_tree_node_t *ext_ptr; + extent_tree_node_t *bno_ext_ptr; + xfs_extlen_t blocks_allocated; + xfs_agblock_t *agb_ptr; + + /* + * get the number of blocks we need to allocate, then + * set up block number array, set the free block pointer + * to the first block in the array, and null the array + */ + big_extent_len = curs->num_tot_blocks; + blocks_allocated = 0; + + ASSERT(big_extent_len > 0); + + if ((curs->btree_blocks = malloc(sizeof(xfs_agblock_t *) + * big_extent_len)) == NULL) { + do_error("could not set up btree block array\n"); + exit(1); + } + + agb_ptr = curs->free_btree_blocks = curs->btree_blocks; + + for (j = 0; j < curs->num_free_blocks; j++, agb_ptr++) + *agb_ptr = NULLAGBLOCK; + + /* + * grab the smallest extent and use it up, then get the + * next smallest. This mimics the init_*_cursor code. + */ + if ((ext_ptr = findfirst_bcnt_extent(agno)) == NULL) { + do_error("error - not enough free space in filesystem\n"); + exit(1); + } + + agb_ptr = curs->btree_blocks; + j = curs->level[0].num_blocks; + + /* + * set up the free block array + */ + while (blocks_allocated < big_extent_len) { + /* + * use up the extent we've got + */ + for (u = 0; u < ext_ptr->ex_blockcount && + blocks_allocated < big_extent_len; u++) { + ASSERT(agb_ptr < curs->btree_blocks + + curs->num_tot_blocks); + *agb_ptr++ = ext_ptr->ex_startblock + u; + blocks_allocated++; + } + + /* + * if we only used part of this last extent, then we + * need only to reset the extent in the extent + * trees and we're done + */ + if (u < ext_ptr->ex_blockcount) { + big_extent_start = ext_ptr->ex_startblock + u; + big_extent_len = ext_ptr->ex_blockcount - u; + + ASSERT(big_extent_len > 0); + + bno_ext_ptr = find_bno_extent(agno, + ext_ptr->ex_startblock); + ASSERT(bno_ext_ptr != NULL); + get_bno_extent(agno, bno_ext_ptr); + release_extent_tree_node(bno_ext_ptr); + + ext_ptr = get_bcnt_extent(agno, ext_ptr->ex_startblock, + ext_ptr->ex_blockcount); + release_extent_tree_node(ext_ptr); +#ifdef XR_BLD_FREE_TRACE + fprintf(stderr, "releasing extent: %u [%u %u]\n", + agno, ext_ptr->ex_startblock, + ext_ptr->ex_blockcount); + fprintf(stderr, "blocks_allocated = %d\n", + blocks_allocated); +#endif + + add_bno_extent(agno, big_extent_start, big_extent_len); + add_bcnt_extent(agno, big_extent_start, big_extent_len); + + return; + } + /* + * delete the used-up extent from both extent trees and + * find next biggest extent + */ +#ifdef XR_BLD_FREE_TRACE + fprintf(stderr, "releasing extent: %u [%u %u]\n", + agno, ext_ptr->ex_startblock, ext_ptr->ex_blockcount); +#endif + bno_ext_ptr = find_bno_extent(agno, ext_ptr->ex_startblock); + ASSERT(bno_ext_ptr != NULL); + get_bno_extent(agno, bno_ext_ptr); + release_extent_tree_node(bno_ext_ptr); + + ext_ptr = get_bcnt_extent(agno, ext_ptr->ex_startblock, + ext_ptr->ex_blockcount); + ASSERT(ext_ptr != NULL); + release_extent_tree_node(ext_ptr); + + ext_ptr = findfirst_bcnt_extent(agno); + } +#ifdef XR_BLD_FREE_TRACE + fprintf(stderr, "blocks_allocated = %d\n", + blocks_allocated); +#endif +} + +void +write_cursor(bt_status_t *curs) +{ + int i; + + for (i = 0; i < curs->num_levels; i++) { +#if defined(XR_BLD_FREE_TRACE) || defined(XR_BLD_INO_TRACE) + fprintf(stderr, "writing bt block %u\n", curs->level[i].agbno); +#endif + if (curs->level[i].prev_buf_p != NULL) { + ASSERT(curs->level[i].prev_agbno != NULLAGBLOCK); + libxfs_writebuf(curs->level[i].prev_buf_p, 0); + } + libxfs_writebuf(curs->level[i].buf_p, 0); + } +} + +void +finish_cursor(bt_status_t *curs) +{ + ASSERT(curs->num_free_blocks == 0); + free(curs->btree_blocks); +} + +/* + * no-cursor versions of the XFS equivalents. The address calculators + * should be used only for interior btree nodes. + * these are adapted from xfs_alloc_btree.h and xfs_tree.h + */ +#define XR_ALLOC_KEY_ADDR(mp, bp, i) \ + (xfs_alloc_key_t *) ((char *) (bp) + sizeof(xfs_alloc_block_t) \ + + ((i)-1) * sizeof(xfs_alloc_key_t)) + +#define XR_ALLOC_PTR_ADDR(mp, bp, i) \ + (xfs_alloc_ptr_t *) ((char *) (bp) + sizeof(xfs_alloc_block_t) \ + + (mp)->m_alloc_mxr[1] * sizeof(xfs_alloc_key_t) \ + + ((i)-1) * sizeof(xfs_alloc_ptr_t)) + +#define XR_ALLOC_BLOCK_MAXRECS(mp, level) \ + XFS_BTREE_BLOCK_MAXRECS((mp)->m_sb.sb_blocksize, \ + xfs_alloc, (level) == 0) + +/* + * this calculates a freespace cursor for an ag. + * btree_curs is an in/out. returns the number of + * blocks that will show up in the AGFL. + */ + +int +calculate_freespace_cursor(xfs_mount_t *mp, xfs_agnumber_t agno, + xfs_agblock_t *extents, bt_status_t *btree_curs) +{ + xfs_extlen_t blocks_needed; /* a running count */ + xfs_extlen_t blocks_allocated_pt; /* per tree */ + xfs_extlen_t blocks_allocated_total; /* for both trees */ + xfs_agblock_t num_extents; + int i; + int extents_used; + int extra_blocks; + bt_stat_level_t *lptr; + bt_stat_level_t *p_lptr; + extent_tree_node_t *ext_ptr; + int level; +#ifdef XR_BLD_FREE_TRACE + int old_state; + int state = XR_E_BAD_STATE; +#endif +#ifdef XR_BLD_FREE_TRACE + fprintf(stderr, + "in init_freespace_cursor, agno = %d\n", agno); +#endif + + num_extents = *extents; + extents_used = 0; + + ASSERT(num_extents != 0); + + lptr = &btree_curs->level[0]; + btree_curs->init = 1; + + /* + * figure out how much space we need for the leaf level + * of the tree and set up the cursor for the leaf level + * (note that the same code is duplicated further down) + */ + lptr->num_blocks = howmany(num_extents, XR_ALLOC_BLOCK_MAXRECS(mp, 0)); + lptr->num_recs_pb = num_extents / lptr->num_blocks; + lptr->modulo = num_extents % lptr->num_blocks; + lptr->num_recs_tot = num_extents; + level = 1; + + /* + * if we need more levels, set them up. # of records + * per level is the # of blocks in the level below it + */ + if (lptr->num_blocks > 1) { + for (; btree_curs->level[level - 1].num_blocks > 1 + && level < XFS_BTREE_MAXLEVELS; + level++) { + lptr = &btree_curs->level[level]; + p_lptr = &btree_curs->level[level - 1]; + lptr->num_blocks = howmany(p_lptr->num_blocks, + XR_ALLOC_BLOCK_MAXRECS(mp, level)); + lptr->modulo = p_lptr->num_blocks + % lptr->num_blocks; + lptr->num_recs_pb = p_lptr->num_blocks + / lptr->num_blocks; + lptr->num_recs_tot = p_lptr->num_blocks; + } + } + + ASSERT(lptr->num_blocks == 1); + btree_curs->num_levels = level; + + /* + * ok, now we have a hypothetical cursor that + * will work for both the bno and bcnt trees. + * now figure out if using up blocks to set up the + * trees will perturb the shape of the freespace tree. + * if so, we've over-allocated. the freespace trees + * as they will be *after* accounting for the free space + * we've used up will need fewer blocks to to represent + * than we've allocated. We can use the AGFL to hold + * XFS_AGFL_SIZE (128) blocks but that's it. + * Thus we limit things to XFS_AGFL_SIZE/2 for each of the 2 btrees. + * if the number of extra blocks is more than that, + * we'll have to be called again. + */ + for (blocks_needed = 0, i = 0; i < level; i++) { + blocks_needed += btree_curs->level[i].num_blocks; + } + + /* + * record the # of blocks we've allocated + */ + blocks_allocated_pt = blocks_needed; + blocks_needed *= 2; + blocks_allocated_total = blocks_needed; + + /* + * figure out how many free extents will be used up by + * our space allocation + */ + if ((ext_ptr = findfirst_bcnt_extent(agno)) == NULL) { + do_error("can't rebuild fs trees -- not enough free space " + "on ag %u\n", agno); + exit(1); + } + + i = 0; + while (ext_ptr != NULL && blocks_needed > 0) { + if (ext_ptr->ex_blockcount <= blocks_needed) { + blocks_needed -= ext_ptr->ex_blockcount; + extents_used++; + } else { + blocks_needed = 0; + } + + ext_ptr = findnext_bcnt_extent(agno, ext_ptr); + +#ifdef XR_BLD_FREE_TRACE + if (ext_ptr != NULL) { + fprintf(stderr, "got next extent [%u %u]\n", + ext_ptr->ex_startblock, ext_ptr->ex_blockcount); + } else { + fprintf(stderr, "out of extents\n"); + } +#endif + } + if (blocks_needed > 0) { + do_error("ag %u - not enough free space to build freespace " + "btrees\n", agno); + exit(1); + } + + ASSERT(num_extents >= extents_used); + + num_extents -= extents_used; + + /* + * see if the number of leaf blocks will change as a result + * of the number of extents changing + */ + if (howmany(num_extents, XR_ALLOC_BLOCK_MAXRECS(mp, 0)) + != btree_curs->level[0].num_blocks) { + /* + * yes -- recalculate the cursor. If the number of + * excess (overallocated) blocks is < XFS_AGFL_SIZE/2, we're ok. + * we can put those into the AGFL. we don't try + * and get things to converge exactly (reach a + * state with zero excess blocks) because there + * exist pathological cases which will never + * converge. first, check for the zero-case. + */ + if (num_extents == 0) { + /* + * ok, we've used up all the free blocks + * trying to lay out the leaf level. go + * to a one block (empty) btree and put the + * already allocated blocks into the AGFL + */ + if (btree_curs->level[0].num_blocks != 1) { + /* + * we really needed more blocks because + * the old tree had more than one level. + * this is bad. + */ + do_warn("not enough free blocks left to " + "describe all free blocks in AG %u\n", + agno); + } +#ifdef XR_BLD_FREE_TRACE + fprintf(stderr, + "ag %u -- no free extents, alloc'ed %d\n", + agno, blocks_allocated_pt); +#endif + lptr->num_blocks = 1; + lptr->modulo = 0; + lptr->num_recs_pb = 0; + lptr->num_recs_tot = 0; + + btree_curs->num_levels = 1; + + /* + * don't reset the allocation stats, assume + * they're all extra blocks + * don't forget to return the total block count + * not the per-tree block count. these are the + * extras that will go into the AGFL. subtract + * two for the root blocks. + */ + btree_curs->num_tot_blocks = blocks_allocated_pt; + btree_curs->num_free_blocks = blocks_allocated_pt; + + *extents = 0; + + return(blocks_allocated_total - 2); + } + + lptr = &btree_curs->level[0]; + lptr->num_blocks = howmany(num_extents, + XR_ALLOC_BLOCK_MAXRECS(mp, 0)); + lptr->num_recs_pb = num_extents / lptr->num_blocks; + lptr->modulo = num_extents % lptr->num_blocks; + lptr->num_recs_tot = num_extents; + level = 1; + + /* + * if we need more levels, set them up + */ + if (lptr->num_blocks > 1) { + for (level = 1; btree_curs->level[level-1].num_blocks + > 1 && level < XFS_BTREE_MAXLEVELS; + level++) { + lptr = &btree_curs->level[level]; + p_lptr = &btree_curs->level[level-1]; + lptr->num_blocks = howmany(p_lptr->num_blocks, + XR_ALLOC_BLOCK_MAXRECS(mp, + level)); + lptr->modulo = p_lptr->num_blocks + % lptr->num_blocks; + lptr->num_recs_pb = p_lptr->num_blocks + / lptr->num_blocks; + lptr->num_recs_tot = p_lptr->num_blocks; + } + } + ASSERT(lptr->num_blocks == 1); + btree_curs->num_levels = level; + + /* + * now figure out the number of excess blocks + */ + for (blocks_needed = 0, i = 0; i < level; i++) { + blocks_needed += btree_curs->level[i].num_blocks; + } + blocks_needed *= 2; + + ASSERT(blocks_allocated_total >= blocks_needed); + extra_blocks = blocks_allocated_total - blocks_needed; + } else { + if (extents_used > 0) { + /* + * reset the leaf level geometry to account + * for consumed extents. we can leave the + * rest of the cursor alone since the number + * of leaf blocks hasn't changed. + */ + lptr = &btree_curs->level[0]; + + lptr->num_recs_pb = num_extents / lptr->num_blocks; + lptr->modulo = num_extents % lptr->num_blocks; + lptr->num_recs_tot = num_extents; + } + + extra_blocks = 0; + } + + btree_curs->num_tot_blocks = blocks_allocated_pt; + btree_curs->num_free_blocks = blocks_allocated_pt; + + *extents = num_extents; + + return(extra_blocks); +} + +void +prop_freespace_cursor(xfs_mount_t *mp, xfs_agnumber_t agno, + bt_status_t *btree_curs, xfs_agblock_t startblock, + xfs_extlen_t blockcount, int level, __uint32_t magic) +{ + xfs_alloc_block_t *bt_hdr; + xfs_alloc_key_t *bt_key; + xfs_alloc_ptr_t *bt_ptr; + xfs_agblock_t agbno; + bt_stat_level_t *lptr; + + level++; + + if (level >= btree_curs->num_levels) + return; + + lptr = &btree_curs->level[level]; + bt_hdr = XFS_BUF_TO_ALLOC_BLOCK(lptr->buf_p); + + if (INT_GET(bt_hdr->bb_numrecs, ARCH_CONVERT) == 0) { + /* + * only happens once when initializing the + * left-hand side of the tree. + */ + prop_freespace_cursor(mp, agno, btree_curs, startblock, + blockcount, level, magic); + } + + if (INT_GET(bt_hdr->bb_numrecs, ARCH_CONVERT) == + lptr->num_recs_pb + (lptr->modulo > 0)) { + /* + * write out current prev block, grab us a new block, + * and set the rightsib pointer of current block + */ +#ifdef XR_BLD_FREE_TRACE + fprintf(stderr, " %d ", lptr->prev_agbno); +#endif + if (lptr->prev_agbno != NULLAGBLOCK) { + ASSERT(lptr->prev_buf_p != NULL); + libxfs_writebuf(lptr->prev_buf_p, 0); + } + lptr->prev_agbno = lptr->agbno;; + lptr->prev_buf_p = lptr->buf_p; + agbno = get_next_blockaddr(agno, level, btree_curs); + + INT_SET(bt_hdr->bb_rightsib, ARCH_CONVERT, agbno); + + lptr->buf_p = libxfs_getbuf(mp->m_dev, + XFS_AGB_TO_DADDR(mp, agno, agbno), + XFS_FSB_TO_BB(mp, 1)); + lptr->agbno = agbno; + + if (lptr->modulo) + lptr->modulo--; + + /* + * initialize block header + */ + bt_hdr = XFS_BUF_TO_ALLOC_BLOCK(lptr->buf_p); + bzero(bt_hdr, mp->m_sb.sb_blocksize); + + INT_SET(bt_hdr->bb_magic, ARCH_CONVERT, magic); + INT_SET(bt_hdr->bb_level, ARCH_CONVERT, level); + INT_SET(bt_hdr->bb_leftsib, ARCH_CONVERT, lptr->prev_agbno); + INT_SET(bt_hdr->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK); + INT_ZERO(bt_hdr->bb_numrecs, ARCH_CONVERT); + + /* + * propagate extent record for first extent in new block up + */ + prop_freespace_cursor(mp, agno, btree_curs, startblock, + blockcount, level, magic); + } + /* + * add extent info to current block + */ + INT_MOD(bt_hdr->bb_numrecs, ARCH_CONVERT, +1); + + bt_key = XR_ALLOC_KEY_ADDR(mp, bt_hdr, + INT_GET(bt_hdr->bb_numrecs, ARCH_CONVERT)); + bt_ptr = XR_ALLOC_PTR_ADDR(mp, bt_hdr, + INT_GET(bt_hdr->bb_numrecs, ARCH_CONVERT)); + + INT_SET(bt_key->ar_startblock, ARCH_CONVERT, startblock); + INT_SET(bt_key->ar_blockcount, ARCH_CONVERT, blockcount); + INT_SET(*bt_ptr, ARCH_CONVERT, btree_curs->level[level-1].agbno); +} + +/* + * rebuilds a freespace tree given a cursor and magic number of type + * of tree to build (bno or bcnt). returns the number of free blocks + * represented by the tree. + */ +xfs_extlen_t +build_freespace_tree(xfs_mount_t *mp, xfs_agnumber_t agno, + bt_status_t *btree_curs, __uint32_t magic) +{ + xfs_agnumber_t i; + xfs_agblock_t j; + xfs_alloc_block_t *bt_hdr; + xfs_alloc_rec_t *bt_rec; + int level; + xfs_agblock_t agbno; + extent_tree_node_t *ext_ptr; + bt_stat_level_t *lptr; + xfs_extlen_t freeblks; + +#ifdef XR_BLD_FREE_TRACE + fprintf(stderr, "in build_freespace_tree, agno = %d\n", agno); +#endif + level = btree_curs->num_levels; + freeblks = 0; + + ASSERT(level > 0); + + /* + * initialize the first block on each btree level + */ + for (i = 0; i < level; i++) { + lptr = &btree_curs->level[i]; + + agbno = get_next_blockaddr(agno, i, btree_curs); + lptr->buf_p = libxfs_getbuf(mp->m_dev, + XFS_AGB_TO_DADDR(mp, agno, agbno), + XFS_FSB_TO_BB(mp, 1)); + + if (i == btree_curs->num_levels - 1) + btree_curs->root = agbno; + + lptr->agbno = agbno; + lptr->prev_agbno = NULLAGBLOCK; + lptr->prev_buf_p = NULL; + /* + * initialize block header + */ + bt_hdr = XFS_BUF_TO_ALLOC_BLOCK(lptr->buf_p); + bzero(bt_hdr, mp->m_sb.sb_blocksize); + + INT_SET(bt_hdr->bb_magic, ARCH_CONVERT, magic); + INT_SET(bt_hdr->bb_level, ARCH_CONVERT, i); + INT_SET(bt_hdr->bb_leftsib, ARCH_CONVERT, + bt_hdr->bb_rightsib = NULLAGBLOCK); + INT_ZERO(bt_hdr->bb_numrecs, ARCH_CONVERT); + } + /* + * run along leaf, setting up records. as we have to switch + * blocks, call the prop_freespace_cursor routine to set up the new + * pointers for the parent. that can recurse up to the root + * if required. set the sibling pointers for leaf level here. + */ + if (magic == XFS_ABTB_MAGIC) + ext_ptr = findfirst_bno_extent(agno); + else + ext_ptr = findfirst_bcnt_extent(agno); + +#ifdef XR_BLD_FREE_TRACE + fprintf(stderr, "bft, agno = %d, start = %u, count = %u\n", + agno, ext_ptr->ex_startblock, ext_ptr->ex_blockcount); +#endif + + lptr = &btree_curs->level[0]; + + for (i = 0; i < btree_curs->level[0].num_blocks; i++) { + /* + * block initialization, lay in block header + */ + bt_hdr = XFS_BUF_TO_ALLOC_BLOCK(lptr->buf_p); + bzero(bt_hdr, mp->m_sb.sb_blocksize); + + INT_SET(bt_hdr->bb_magic, ARCH_CONVERT, magic); + INT_ZERO(bt_hdr->bb_level, ARCH_CONVERT); + INT_SET(bt_hdr->bb_leftsib, ARCH_CONVERT, lptr->prev_agbno); + INT_SET(bt_hdr->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK); + INT_SET(bt_hdr->bb_numrecs, ARCH_CONVERT, + lptr->num_recs_pb + (lptr->modulo > 0)); +#ifdef XR_BLD_FREE_TRACE + fprintf(stderr, "bft, bb_numrecs = %d\n", + INT_GET(bt_hdr->bb_numrecs, ARCH_CONVERT)); +#endif + + if (lptr->modulo > 0) + lptr->modulo--; + + /* + * initialize values in the path up to the root if + * this is a multi-level btree + */ + if (btree_curs->num_levels > 1) + prop_freespace_cursor(mp, agno, btree_curs, + ext_ptr->ex_startblock, + ext_ptr->ex_blockcount, + 0, magic); + + bt_rec = (xfs_alloc_rec_t *) ((char *) bt_hdr + + sizeof(xfs_alloc_block_t)); + for (j = 0; j < INT_GET(bt_hdr->bb_numrecs,ARCH_CONVERT); j++) { + ASSERT(ext_ptr != NULL); + INT_SET(bt_rec[j].ar_startblock, ARCH_CONVERT, + ext_ptr->ex_startblock); + INT_SET(bt_rec[j].ar_blockcount, ARCH_CONVERT, + ext_ptr->ex_blockcount); + freeblks += ext_ptr->ex_blockcount; + if (magic == XFS_ABTB_MAGIC) + ext_ptr = findnext_bno_extent(ext_ptr); + else + ext_ptr = findnext_bcnt_extent(agno, ext_ptr); +#if 0 +#ifdef XR_BLD_FREE_TRACE + if (ext_ptr == NULL) + fprintf(stderr, "null extent pointer, j = %d\n", + j); + else + fprintf(stderr, + "bft, agno = %d, start = %u, count = %u\n", + agno, ext_ptr->ex_startblock, + ext_ptr->ex_blockcount); +#endif +#endif + } + + if (ext_ptr != NULL) { + /* + * get next leaf level block + */ + if (lptr->prev_buf_p != NULL) { +#ifdef XR_BLD_FREE_TRACE + fprintf(stderr, " writing fst agbno %u\n", + lptr->prev_agbno); +#endif + ASSERT(lptr->prev_agbno != NULLAGBLOCK); + libxfs_writebuf(lptr->prev_buf_p, 0); + } + lptr->prev_buf_p = lptr->buf_p; + lptr->prev_agbno = lptr->agbno; + + INT_SET(bt_hdr->bb_rightsib, ARCH_CONVERT, lptr->agbno = + get_next_blockaddr(agno, 0, btree_curs)); + + lptr->buf_p = libxfs_getbuf(mp->m_dev, + XFS_AGB_TO_DADDR(mp, agno, lptr->agbno), + XFS_FSB_TO_BB(mp, 1)); + } + } + + return(freeblks); +} + +/* + * no-cursor versions of the XFS equivalents. The address calculators + * should be used only for interior btree nodes. + * these are adapted from xfs_ialloc_btree.h and xfs_tree.h + */ +#define XR_INOBT_KEY_ADDR(mp, bp, i) \ + (xfs_inobt_key_t *) ((char *) (bp) + sizeof(xfs_inobt_block_t) \ + + ((i)-1) * sizeof(xfs_inobt_key_t)) + +#define XR_INOBT_PTR_ADDR(mp, bp, i) \ + (xfs_inobt_ptr_t *) ((char *) (bp) + sizeof(xfs_inobt_block_t) \ + + (mp)->m_inobt_mxr[1] * sizeof(xfs_inobt_key_t) \ + + ((i)-1) * sizeof(xfs_inobt_ptr_t)) + +#define XR_INOBT_BLOCK_MAXRECS(mp, level) \ + XFS_BTREE_BLOCK_MAXRECS((mp)->m_sb.sb_blocksize, \ + xfs_inobt, (level) == 0) + +/* + * we don't have to worry here about how chewing up free extents + * may perturb things because inode tree building happens before + * freespace tree building. + */ +void +init_ino_cursor(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs, + __uint64_t *num_inos, __uint64_t *num_free_inos) +{ + __uint64_t ninos; + __uint64_t nfinos; + ino_tree_node_t *ino_rec; + int num_recs; + int level; + bt_stat_level_t *lptr; + bt_stat_level_t *p_lptr; + xfs_extlen_t blocks_allocated; + int i; + + *num_inos = *num_free_inos = 0; + ninos = nfinos = 0; + + lptr = &btree_curs->level[0]; + btree_curs->init = 1; + + if ((ino_rec = findfirst_inode_rec(agno)) == NULL) { + /* + * easy corner-case -- no inode records + */ + lptr->num_blocks = 1; + lptr->modulo = 0; + lptr->num_recs_pb = 0; + lptr->num_recs_tot = 0; + + btree_curs->num_levels = 1; + btree_curs->num_tot_blocks = btree_curs->num_free_blocks = 1; + + setup_cursor(mp, agno, btree_curs); + + return; + } + + /* + * build up statistics + */ + for (num_recs = 0; ino_rec != NULL; ino_rec = next_ino_rec(ino_rec)) { + ninos += XFS_INODES_PER_CHUNK; + num_recs++; + for (i = 0; i < XFS_INODES_PER_CHUNK; i++) { + ASSERT(is_inode_confirmed(ino_rec, i)); + if (is_inode_free(ino_rec, i)) + nfinos++; + } + } + + blocks_allocated = lptr->num_blocks = howmany(num_recs, + XR_INOBT_BLOCK_MAXRECS(mp, 0)); + + lptr->modulo = num_recs % lptr->num_blocks; + lptr->num_recs_pb = num_recs / lptr->num_blocks; + lptr->num_recs_tot = num_recs; + level = 1; + + if (lptr->num_blocks > 1) { + for (; btree_curs->level[level-1].num_blocks > 1 + && level < XFS_BTREE_MAXLEVELS; + level++) { + lptr = &btree_curs->level[level]; + p_lptr = &btree_curs->level[level - 1]; + lptr->num_blocks = howmany(p_lptr->num_blocks, + XR_INOBT_BLOCK_MAXRECS(mp, level)); + lptr->modulo = p_lptr->num_blocks % lptr->num_blocks; + lptr->num_recs_pb = p_lptr->num_blocks + / lptr->num_blocks; + lptr->num_recs_tot = p_lptr->num_blocks; + + blocks_allocated += lptr->num_blocks; + } + } + ASSERT(lptr->num_blocks == 1); + btree_curs->num_levels = level; + + btree_curs->num_tot_blocks = btree_curs->num_free_blocks + = blocks_allocated; + + setup_cursor(mp, agno, btree_curs); + + *num_inos = ninos; + *num_free_inos = nfinos; + + return; +} + +void +prop_ino_cursor(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs, + xfs_agino_t startino, int level) +{ + xfs_inobt_block_t *bt_hdr; + xfs_inobt_key_t *bt_key; + xfs_inobt_ptr_t *bt_ptr; + xfs_agblock_t agbno; + bt_stat_level_t *lptr; + + level++; + + if (level >= btree_curs->num_levels) + return; + + lptr = &btree_curs->level[level]; + bt_hdr = XFS_BUF_TO_INOBT_BLOCK(lptr->buf_p); + + if (INT_GET(bt_hdr->bb_numrecs, ARCH_CONVERT) == 0) { + /* + * this only happens once to initialize the + * first path up the left side of the tree + * where the agbno's are already set up + */ + prop_ino_cursor(mp, agno, btree_curs, startino, level); + } + + if (INT_GET(bt_hdr->bb_numrecs, ARCH_CONVERT) == + lptr->num_recs_pb + (lptr->modulo > 0)) { + /* + * write out current prev block, grab us a new block, + * and set the rightsib pointer of current block + */ +#ifdef XR_BLD_INO_TRACE + fprintf(stderr, " ino prop agbno %d ", lptr->prev_agbno); +#endif + if (lptr->prev_agbno != NULLAGBLOCK) { + ASSERT(lptr->prev_buf_p != NULL); + libxfs_writebuf(lptr->prev_buf_p, 0); + } + lptr->prev_agbno = lptr->agbno;; + lptr->prev_buf_p = lptr->buf_p; + agbno = get_next_blockaddr(agno, level, btree_curs); + + INT_SET(bt_hdr->bb_rightsib, ARCH_CONVERT, agbno); + + lptr->buf_p = libxfs_getbuf(mp->m_dev, + XFS_AGB_TO_DADDR(mp, agno, agbno), + XFS_FSB_TO_BB(mp, 1)); + lptr->agbno = agbno; + + if (lptr->modulo) + lptr->modulo--; + + /* + * initialize block header + */ + bt_hdr = XFS_BUF_TO_INOBT_BLOCK(lptr->buf_p); + bzero(bt_hdr, mp->m_sb.sb_blocksize); + + INT_SET(bt_hdr->bb_magic, ARCH_CONVERT, XFS_IBT_MAGIC); + INT_SET(bt_hdr->bb_level, ARCH_CONVERT, level); + INT_SET(bt_hdr->bb_leftsib, ARCH_CONVERT, lptr->prev_agbno); + INT_SET(bt_hdr->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK); + INT_ZERO(bt_hdr->bb_numrecs, ARCH_CONVERT); + /* + * propagate extent record for first extent in new block up + */ + prop_ino_cursor(mp, agno, btree_curs, startino, level); + } + /* + * add inode info to current block + */ + INT_MOD(bt_hdr->bb_numrecs, ARCH_CONVERT, +1); + + bt_key = XR_INOBT_KEY_ADDR(mp, bt_hdr, + INT_GET(bt_hdr->bb_numrecs, ARCH_CONVERT)); + bt_ptr = XR_INOBT_PTR_ADDR(mp, bt_hdr, + INT_GET(bt_hdr->bb_numrecs, ARCH_CONVERT)); + + INT_SET(bt_key->ir_startino, ARCH_CONVERT, startino); + INT_SET(*bt_ptr, ARCH_CONVERT, btree_curs->level[level-1].agbno); +} + +void +build_agi(xfs_mount_t *mp, xfs_agnumber_t agno, + bt_status_t *btree_curs, xfs_agino_t first_agino, + xfs_agino_t count, xfs_agino_t freecount) +{ + xfs_buf_t *agi_buf; + xfs_agi_t *agi; + int i; + + agi_buf = libxfs_getbuf(mp->m_dev, + XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR), + mp->m_sb.sb_sectsize/BBSIZE); + agi = XFS_BUF_TO_AGI(agi_buf); + bzero(agi, mp->m_sb.sb_sectsize); + + INT_SET(agi->agi_magicnum, ARCH_CONVERT, XFS_AGI_MAGIC); + INT_SET(agi->agi_versionnum, ARCH_CONVERT, XFS_AGI_VERSION); + INT_SET(agi->agi_seqno, ARCH_CONVERT, agno); + if (agno < mp->m_sb.sb_agcount - 1) + INT_SET(agi->agi_length, ARCH_CONVERT, mp->m_sb.sb_agblocks); + else + INT_SET(agi->agi_length, ARCH_CONVERT, mp->m_sb.sb_dblocks - + (xfs_drfsbno_t) mp->m_sb.sb_agblocks * agno); + INT_SET(agi->agi_count, ARCH_CONVERT, count); + INT_SET(agi->agi_root, ARCH_CONVERT, btree_curs->root); + INT_SET(agi->agi_level, ARCH_CONVERT, btree_curs->num_levels); + INT_SET(agi->agi_freecount, ARCH_CONVERT, freecount); + INT_SET(agi->agi_newino, ARCH_CONVERT, first_agino); + INT_SET(agi->agi_dirino, ARCH_CONVERT, NULLAGINO); + + for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) { + INT_SET(agi->agi_unlinked[i], ARCH_CONVERT, NULLAGINO); + } + + libxfs_writebuf(agi_buf, 0); +} + +/* + * rebuilds an inode tree given a cursor. We're lazy here and call + * the routine that builds the agi + */ +void +build_ino_tree(xfs_mount_t *mp, xfs_agnumber_t agno, + bt_status_t *btree_curs) +{ + xfs_agnumber_t i; + xfs_agblock_t j; + xfs_agblock_t agbno; + xfs_agino_t first_agino; + xfs_inobt_block_t *bt_hdr; + xfs_inobt_rec_t *bt_rec; + ino_tree_node_t *ino_rec; + bt_stat_level_t *lptr; + xfs_agino_t count = 0; + xfs_agino_t freecount = 0; + int inocnt; + int k; + int level = btree_curs->num_levels; + + for (i = 0; i < level; i++) { + lptr = &btree_curs->level[i]; + + agbno = get_next_blockaddr(agno, i, btree_curs); + lptr->buf_p = libxfs_getbuf(mp->m_dev, + XFS_AGB_TO_DADDR(mp, agno, agbno), + XFS_FSB_TO_BB(mp, 1)); + + if (i == btree_curs->num_levels - 1) + btree_curs->root = agbno; + + lptr->agbno = agbno; + lptr->prev_agbno = NULLAGBLOCK; + lptr->prev_buf_p = NULL; + /* + * initialize block header + */ + bt_hdr = XFS_BUF_TO_INOBT_BLOCK(lptr->buf_p); + bzero(bt_hdr, mp->m_sb.sb_blocksize); + + INT_SET(bt_hdr->bb_magic, ARCH_CONVERT, XFS_IBT_MAGIC); + INT_SET(bt_hdr->bb_level, ARCH_CONVERT, i); + INT_SET(bt_hdr->bb_leftsib, ARCH_CONVERT, + bt_hdr->bb_rightsib = NULLAGBLOCK); + INT_ZERO(bt_hdr->bb_numrecs, ARCH_CONVERT); + } + /* + * run along leaf, setting up records. as we have to switch + * blocks, call the prop_ino_cursor routine to set up the new + * pointers for the parent. that can recurse up to the root + * if required. set the sibling pointers for leaf level here. + */ + ino_rec = findfirst_inode_rec(agno); + + if (ino_rec != NULL) + first_agino = ino_rec->ino_startnum; + else + first_agino = NULLAGINO; + + lptr = &btree_curs->level[0]; + + for (i = 0; i < lptr->num_blocks; i++) { + /* + * block initialization, lay in block header + */ + bt_hdr = XFS_BUF_TO_INOBT_BLOCK(lptr->buf_p); + bzero(bt_hdr, mp->m_sb.sb_blocksize); + + INT_SET(bt_hdr->bb_magic, ARCH_CONVERT, XFS_IBT_MAGIC); + INT_ZERO(bt_hdr->bb_level, ARCH_CONVERT); + INT_SET(bt_hdr->bb_leftsib, ARCH_CONVERT, lptr->prev_agbno); + INT_SET(bt_hdr->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK); + INT_SET(bt_hdr->bb_numrecs, ARCH_CONVERT, + lptr->num_recs_pb + (lptr->modulo > 0)); + + if (lptr->modulo > 0) + lptr->modulo--; + + if (lptr->num_recs_pb > 0) + prop_ino_cursor(mp, agno, btree_curs, + ino_rec->ino_startnum, 0); + + bt_rec = (xfs_inobt_rec_t *) ((char *) bt_hdr + + sizeof(xfs_inobt_block_t)); + for (j = 0; j < INT_GET(bt_hdr->bb_numrecs,ARCH_CONVERT); j++) { + ASSERT(ino_rec != NULL); + INT_SET(bt_rec[j].ir_startino, ARCH_CONVERT, + ino_rec->ino_startnum); + INT_SET(bt_rec[j].ir_free, ARCH_CONVERT, + ino_rec->ir_free); + + inocnt = 0; + for (k = 0; k < sizeof(xfs_inofree_t)*NBBY; k++) { + ASSERT(is_inode_confirmed(ino_rec, k)); + inocnt += is_inode_free(ino_rec, k); + } + + INT_SET(bt_rec[j].ir_freecount, ARCH_CONVERT, inocnt); + freecount += inocnt; + count += XFS_INODES_PER_CHUNK; + ino_rec = next_ino_rec(ino_rec); + } + + if (ino_rec != NULL) { + /* + * get next leaf level block + */ + if (lptr->prev_buf_p != NULL) { +#ifdef XR_BLD_INO_TRACE + fprintf(stderr, "writing inobt agbno %u\n", + lptr->prev_agbno); +#endif + ASSERT(lptr->prev_agbno != NULLAGBLOCK); + libxfs_writebuf(lptr->prev_buf_p, 0); + } + lptr->prev_buf_p = lptr->buf_p; + lptr->prev_agbno = lptr->agbno; + + INT_SET(bt_hdr->bb_rightsib, ARCH_CONVERT, lptr->agbno= + get_next_blockaddr(agno, 0, btree_curs)); + + lptr->buf_p = libxfs_getbuf(mp->m_dev, + XFS_AGB_TO_DADDR(mp, agno, lptr->agbno), + XFS_FSB_TO_BB(mp, 1)); + } + } + + build_agi(mp, agno, btree_curs, first_agino, count, freecount); +} + +/* + * build both the agf and the agfl for an agno given both + * btree cursors + */ +void +build_agf_agfl(xfs_mount_t *mp, + xfs_agnumber_t agno, + bt_status_t *bno_bt, + bt_status_t *bcnt_bt, + xfs_extlen_t freeblks, /* # free blocks in tree */ + int lostblocks) /* # blocks that will be lost */ +{ + extent_tree_node_t *ext_ptr; + xfs_buf_t *agf_buf, *agfl_buf; + int i; + int j; + xfs_agfl_t *agfl; + xfs_agf_t *agf; + + agf_buf = libxfs_getbuf(mp->m_dev, + XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR), + mp->m_sb.sb_sectsize/BBSIZE); + agf = XFS_BUF_TO_AGF(agf_buf); + bzero(agf, mp->m_sb.sb_sectsize); + +#ifdef XR_BLD_FREE_TRACE + fprintf(stderr, "agf = 0x%x, agf_buf->b_un.b_addr = 0x%x\n", + (__psint_t) agf, (__psint_t) agf_buf->b_un.b_addr); +#endif + + /* + * set up fixed part of agf + */ + INT_SET(agf->agf_magicnum, ARCH_CONVERT, XFS_AGF_MAGIC); + INT_SET(agf->agf_versionnum, ARCH_CONVERT, XFS_AGF_VERSION); + INT_SET(agf->agf_seqno, ARCH_CONVERT, agno); + + if (agno < mp->m_sb.sb_agcount - 1) + INT_SET(agf->agf_length, ARCH_CONVERT, mp->m_sb.sb_agblocks); + else + INT_SET(agf->agf_length, ARCH_CONVERT, mp->m_sb.sb_dblocks - + (xfs_drfsbno_t) mp->m_sb.sb_agblocks * agno); + + INT_SET(agf->agf_roots[XFS_BTNUM_BNO], ARCH_CONVERT, bno_bt->root); + INT_SET(agf->agf_levels[XFS_BTNUM_BNO], ARCH_CONVERT, + bno_bt->num_levels); + INT_SET(agf->agf_roots[XFS_BTNUM_CNT], ARCH_CONVERT, bcnt_bt->root); + INT_SET(agf->agf_levels[XFS_BTNUM_CNT], ARCH_CONVERT, + bcnt_bt->num_levels); + INT_SET(agf->agf_freeblks, ARCH_CONVERT, freeblks); + +#ifdef XR_BLD_FREE_TRACE + fprintf(stderr, "bno root = %u, bcnt root = %u, indices = %u %u\n", + INT_GET(agf->agf_roots[XFS_BTNUM_BNO], ARCH_CONVERT), + INT_GET(agf->agf_roots[XFS_BTNUM_CNT], ARCH_CONVERT), + XFS_BTNUM_BNO, + XFS_BTNUM_CNT); +#endif + + /* + * do we have left-over blocks in the btree cursors that should + * be used to fill the AGFL? + */ + if (bno_bt->num_free_blocks > 0 || bcnt_bt->num_free_blocks > 0) { + /* + * yes - grab the AGFL buffer + */ + agfl_buf = libxfs_getbuf(mp->m_dev, + XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR), + mp->m_sb.sb_sectsize/BBSIZE); + agfl = XFS_BUF_TO_AGFL(agfl_buf); + bzero(agfl, mp->m_sb.sb_sectsize); + /* + * ok, now grab as many blocks as we can + */ + i = j = 0; + while (bno_bt->num_free_blocks > 0 && i < XFS_AGFL_SIZE) { + INT_SET(agfl->agfl_bno[i], ARCH_CONVERT, + get_next_blockaddr(agno, 0, bno_bt)); + i++; + } + + while (bcnt_bt->num_free_blocks > 0 && i < XFS_AGFL_SIZE) { + INT_SET(agfl->agfl_bno[i], ARCH_CONVERT, + get_next_blockaddr(agno, 0, bcnt_bt)); + i++; + } + /* + * now throw the rest of the blocks away and complain + */ + while (bno_bt->num_free_blocks > 0) { + (void) get_next_blockaddr(agno, 0, bno_bt); + j++; + } + while (bcnt_bt->num_free_blocks > 0) { + (void) get_next_blockaddr(agno, 0, bcnt_bt); + j++; + } + + if (j > 0) { + if (j == lostblocks) + do_warn("lost %d blocks in ag %u\n", j, agno); + else + do_warn("thought we were going to lose %d " + "blocks in ag %u, actually lost %d\n", + lostblocks, j, agno); + } + + INT_ZERO(agf->agf_flfirst, ARCH_CONVERT); + INT_SET(agf->agf_fllast, ARCH_CONVERT, i - 1); + INT_SET(agf->agf_flcount, ARCH_CONVERT, i); + +#ifdef XR_BLD_FREE_TRACE + fprintf(stderr, "writing agfl for ag %u\n", agno); +#endif + + libxfs_writebuf(agfl_buf, 0); + } else { + INT_ZERO(agf->agf_flfirst, ARCH_CONVERT); + INT_SET(agf->agf_fllast, ARCH_CONVERT, XFS_AGFL_SIZE - 1); + INT_ZERO(agf->agf_flcount, ARCH_CONVERT); + } + + ext_ptr = findbiggest_bcnt_extent(agno); + INT_SET(agf->agf_longest, ARCH_CONVERT, + (ext_ptr != NULL) ? ext_ptr->ex_blockcount : 0); + + ASSERT(INT_GET(agf->agf_roots[XFS_BTNUM_BNOi], ARCH_CONVERT) != + INT_GET(agf->agf_roots[XFS_BTNUM_CNTi], ARCH_CONVERT)); + + libxfs_writebuf(agf_buf, 0); + +#ifdef XR_BLD_FREE_TRACE + fprintf(stderr, "wrote agf for ag %u, error = %d\n", agno, error); +#endif +} + +/* + * update the superblock counters, sync the sb version numbers and + * feature bits to the filesystem, and sync up the on-disk superblock + * to match the incore superblock. + */ +void +sync_sb(xfs_mount_t *mp) +{ + xfs_sb_t *sbp; + xfs_buf_t *bp; + + bp = libxfs_getsb(mp, 0); + if (!bp) + do_error("couldn't get superblock\n"); + + sbp = XFS_BUF_TO_SBP(bp); + + mp->m_sb.sb_icount = sb_icount; + mp->m_sb.sb_ifree = sb_ifree; + mp->m_sb.sb_fdblocks = sb_fdblocks; + mp->m_sb.sb_frextents = sb_frextents; + + update_sb_version(mp); + + *sbp = mp->m_sb; + libxfs_xlate_sb(XFS_BUF_PTR(bp), sbp, -1, ARCH_CONVERT, + XFS_SB_ALL_BITS); + libxfs_writebuf(bp, 0); +} + +/* + * make sure the root and realtime inodes show up allocated + * even if they've been freed. they get reinitialized in phase6. + */ +void +keep_fsinos(xfs_mount_t *mp) +{ + ino_tree_node_t *irec; + int i; + + irec = find_inode_rec(XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino), + XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino)); + + for (i = 0; i < 3; i++) + set_inode_used(irec, i); +} + +void +phase5(xfs_mount_t *mp) +{ + __uint64_t num_inos; + __uint64_t num_free_inos; + bt_status_t bno_btree_curs; + bt_status_t bcnt_btree_curs; + bt_status_t ino_btree_curs; + xfs_agnumber_t agno; + int extra_blocks = 0; + uint num_freeblocks; + xfs_extlen_t freeblks1; + xfs_extlen_t freeblks2; + xfs_agblock_t num_extents; + extern int count_bno_extents(xfs_agnumber_t); + extern int count_bno_extents_blocks(xfs_agnumber_t, uint *); +#ifdef XR_BLD_FREE_TRACE + extern int count_bcnt_extents(xfs_agnumber_t); +#endif + + do_log("Phase 5 - rebuild AG headers and trees...\n"); + +#ifdef XR_BLD_FREE_TRACE + fprintf(stderr, "inobt level 1, maxrec = %d, minrec = %d\n", + XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_inobt, 0), + XFS_BTREE_BLOCK_MINRECS(mp->m_sb.sb_blocksize, xfs_inobt, 0) + ); + fprintf(stderr, "inobt level 0 (leaf), maxrec = %d, minrec = %d\n", + XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_inobt, 1), + XFS_BTREE_BLOCK_MINRECS(mp->m_sb.sb_blocksize, xfs_inobt, 1) + ); + fprintf(stderr, "xr inobt level 0 (leaf), maxrec = %d\n", + XR_INOBT_BLOCK_MAXRECS(mp, 0)); + fprintf(stderr, "xr inobt level 1 (int), maxrec = %d\n", + XR_INOBT_BLOCK_MAXRECS(mp, 1)); + fprintf(stderr, "bnobt level 1, maxrec = %d, minrec = %d\n", + XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_alloc, 0), + XFS_BTREE_BLOCK_MINRECS(mp->m_sb.sb_blocksize, xfs_alloc, 0)); + fprintf(stderr, "bnobt level 0 (leaf), maxrec = %d, minrec = %d\n", + XFS_BTREE_BLOCK_MAXRECS(mp->m_sb.sb_blocksize, xfs_alloc, 1), + XFS_BTREE_BLOCK_MINRECS(mp->m_sb.sb_blocksize, xfs_alloc, 1)); +#endif + + /* + * make sure the root and realtime inodes show up allocated + */ + keep_fsinos(mp); + + for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { + /* + * build up incore bno and bcnt extent btrees + */ + num_extents = mk_incore_fstree(mp, agno); + +#ifdef XR_BLD_FREE_TRACE + fprintf(stderr, "# of bno extents is %d\n", + count_bno_extents(agno)); +#endif + + if (num_extents == 0) { + /* + * XXX - what we probably should do here is pick an + * inode for a regular file in the allocation group + * that has space allocated and shoot it by traversing + * the bmap list and putting all its extents on the + * incore freespace trees, clearing the inode, + * and clearing the in-use bit in the incore inode + * tree. Then try mk_incore_fstree() again. + */ + do_error("unable to rebuild AG %u. " + "Not enough free space in on-disk AG.\n", agno); + } + + /* + * done with the AG bitmap, toss it... + */ + teardown_ag_bmap(mp, agno); + + /* + * ok, now set up the btree cursors for the + * on-disk btrees (includs pre-allocating all + * required blocks for the trees themselves) + */ + init_ino_cursor(mp, agno, &ino_btree_curs, + &num_inos, &num_free_inos); + + sb_icount += num_inos; + sb_ifree += num_free_inos; + + num_extents = count_bno_extents_blocks(agno, &num_freeblocks); + /* + * lose two blocks per AG -- the space tree roots + * are counted as allocated since the space trees + * always have roots + */ + sb_fdblocks += num_freeblocks - 2; + + if (num_extents == 0) { + /* + * XXX - what we probably should do here is pick an + * inode for a regular file in the allocation group + * that has space allocated and shoot it by traversing + * the bmap list and putting all its extents on the + * incore freespace trees, clearing the inode, + * and clearing the in-use bit in the incore inode + * tree. Then try mk_incore_fstree() again. + */ + do_error("unable to rebuild AG %u. No free space.\n", + agno); + exit(1); + } + +#ifdef XR_BLD_FREE_TRACE + fprintf(stderr, "# of bno extents is %d\n", num_extents); +#endif + + /* + * track blocks that we might really lose + */ + extra_blocks = calculate_freespace_cursor(mp, agno, + &num_extents, &bno_btree_curs); + + /* + * freespace btrees live in the "free space" but + * the filesystem treats AGFL blocks as allocated + * since they aren't described by the freespace trees + */ + + /* + * see if we can fit all the extra blocks into the AGFL + */ + extra_blocks = (extra_blocks - XFS_AGFL_SIZE > 0) + ? extra_blocks - XFS_AGFL_SIZE + : 0; + + if (extra_blocks > 0) { + do_warn("lost %d blocks in agno %d, sorry.\n", + extra_blocks, agno); + sb_fdblocks -= extra_blocks; + } + + bcnt_btree_curs = bno_btree_curs; + + setup_cursor(mp, agno, &bno_btree_curs); + setup_cursor(mp, agno, &bcnt_btree_curs); + +#ifdef XR_BLD_FREE_TRACE + fprintf(stderr, "# of bno extents is %d\n", + count_bno_extents(agno)); + fprintf(stderr, "# of bcnt extents is %d\n", + count_bcnt_extents(agno)); +#endif + /* + * now rebuild the freespace trees + */ + freeblks1 = build_freespace_tree(mp, agno, &bno_btree_curs, + XFS_ABTB_MAGIC); +#ifdef XR_BLD_FREE_TRACE + fprintf(stderr, "# of free blocks == %d\n", freeblks1); +#endif + write_cursor(&bno_btree_curs); + + freeblks2 = build_freespace_tree(mp, agno, &bcnt_btree_curs, + XFS_ABTC_MAGIC); + write_cursor(&bcnt_btree_curs); + + ASSERT(freeblks1 == freeblks2); + + /* + * set up agf and agfl + */ + build_agf_agfl(mp, agno, &bno_btree_curs, + &bcnt_btree_curs, freeblks1, extra_blocks); + /* + * build inode allocation tree. this also build the agi + */ + build_ino_tree(mp, agno, &ino_btree_curs); + write_cursor(&ino_btree_curs); + /* + * tear down cursors + */ + finish_cursor(&bno_btree_curs); + finish_cursor(&ino_btree_curs); + finish_cursor(&bcnt_btree_curs); + /* + * release the incore per-AG bno/bcnt trees so + * the extent nodes can be recycled + */ + release_agbno_extent_tree(agno); + release_agbcnt_extent_tree(agno); + } + + if (mp->m_sb.sb_rblocks) { + do_log( + " - generate realtime summary info and bitmap...\n"); + rtinit(mp); + generate_rtinfo(mp, btmcompute, sumcompute); + teardown_rt_bmap(mp); + } + + do_log(" - reset superblock...\n"); + + /* + * sync superblock counter and set version bits correctly + */ + sync_sb(mp); + + bad_ino_btree = 0; +} diff --git a/repair/phase6.c b/repair/phase6.c new file mode 100644 index 000000000..1babc07c9 --- /dev/null +++ b/repair/phase6.c @@ -0,0 +1,3971 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include +#include "avl.h" +#include "globals.h" +#include "agheader.h" +#include "incore.h" +#include "dir.h" +#include "dir2.h" +#include "dir_stack.h" +#include "protos.h" +#include "err_protos.h" +#include "dinode.h" +#include "versions.h" + +static cred_t zerocr; +static int orphanage_entered; + +/* + * Data structures and routines to keep track of directory entries + * and whether their leaf entry has been seen + */ +typedef struct dir_hash_ent { + struct dir_hash_ent *next; /* pointer to next entry */ + xfs_dir2_leaf_entry_t ent; /* address and hash value */ + short junkit; /* name starts with / */ + short seen; /* have seen leaf entry */ +} dir_hash_ent_t; + +typedef struct dir_hash_tab { + int size; /* size of hash table */ + dir_hash_ent_t *tab[1];/* actual hash table, variable size */ +} dir_hash_tab_t; +#define DIR_HASH_TAB_SIZE(n) \ + (offsetof(dir_hash_tab_t, tab) + (sizeof(dir_hash_ent_t *) * (n))) +#define DIR_HASH_FUNC(t,a) ((a) % (t)->size) + +/* + * Track the contents of the freespace table in a directory. + */ +typedef struct freetab { + int naents; + int nents; + struct freetab_ent { + xfs_dir2_data_off_t v; + short s; + } ents[1]; +} freetab_t; +#define FREETAB_SIZE(n) \ + (offsetof(freetab_t, ents) + (sizeof(struct freetab_ent) * (n))) + +#define DIR_HASH_CK_OK 0 +#define DIR_HASH_CK_DUPLEAF 1 +#define DIR_HASH_CK_BADHASH 2 +#define DIR_HASH_CK_NODATA 3 +#define DIR_HASH_CK_NOLEAF 4 +#define DIR_HASH_CK_BADSTALE 5 + +static void +dir_hash_add( + dir_hash_tab_t *hashtab, + xfs_dahash_t hash, + xfs_dir2_dataptr_t addr, + int junk) +{ + int i; + dir_hash_ent_t *p; + + i = DIR_HASH_FUNC(hashtab, addr); + if ((p = malloc(sizeof(*p))) == NULL) { + do_error("malloc failed in dir_hash_add (%u bytes)\n", + sizeof(*p)); + exit(1); + } + p->next = hashtab->tab[i]; + hashtab->tab[i] = p; + if (!(p->junkit = junk)) + p->ent.hashval = hash; + p->ent.address = addr; + p->seen = 0; +} + +static int +dir_hash_unseen( + dir_hash_tab_t *hashtab) +{ + int i; + dir_hash_ent_t *p; + + for (i = 0; i < hashtab->size; i++) { + for (p = hashtab->tab[i]; p; p = p->next) { + if (p->seen == 0) + return 1; + } + } + return 0; +} + +static int +dir_hash_check( + dir_hash_tab_t *hashtab, + xfs_inode_t *ip, + int seeval) +{ + static char *seevalstr[] = { + "ok", + "duplicate leaf", + "hash value mismatch", + "no data entry", + "no leaf entry", + "bad stale count", + }; + + if (seeval == DIR_HASH_CK_OK && dir_hash_unseen(hashtab)) + seeval = DIR_HASH_CK_NOLEAF; + if (seeval == DIR_HASH_CK_OK) + return 0; + do_warn("bad hash table for directory inode %llu (%s): ", ip->i_ino, + seevalstr[seeval]); + if (!no_modify) + do_warn("rebuilding\n"); + else + do_warn("would rebuild\n"); + return 1; +} + +static void +dir_hash_done( + dir_hash_tab_t *hashtab) +{ + int i; + dir_hash_ent_t *n; + dir_hash_ent_t *p; + + for (i = 0; i < hashtab->size; i++) { + for (p = hashtab->tab[i]; p; p = n) { + n = p->next; + free(p); + } + } + free(hashtab); +} + +static dir_hash_tab_t * +dir_hash_init( + xfs_fsize_t size) +{ + dir_hash_tab_t *hashtab; + int hsize; + + hsize = size / (16 * 4); + if (hsize > 1024) + hsize = 1024; + else if (hsize < 16) + hsize = 16; + if ((hashtab = calloc(DIR_HASH_TAB_SIZE(hsize), 1)) == NULL) { + do_error("calloc failed in dir_hash_init\n"); + exit(1); + } + hashtab->size = hsize; + return hashtab; +} + +static int +dir_hash_see( + dir_hash_tab_t *hashtab, + xfs_dahash_t hash, + xfs_dir2_dataptr_t addr) +{ + int i; + dir_hash_ent_t *p; + + i = DIR_HASH_FUNC(hashtab, addr); + for (p = hashtab->tab[i]; p; p = p->next) { + if (p->ent.address != addr) + continue; + if (p->seen) + return DIR_HASH_CK_DUPLEAF; + if (p->junkit == 0 && p->ent.hashval != hash) + return DIR_HASH_CK_BADHASH; + p->seen = 1; + return DIR_HASH_CK_OK; + } + return DIR_HASH_CK_NODATA; +} + +static int +dir_hash_see_all( + dir_hash_tab_t *hashtab, + xfs_dir2_leaf_entry_t *ents, + int count, + int stale) +{ + int i; + int j; + int rval; + + for (i = j = 0; i < count; i++) { + if (INT_GET(ents[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR) { + j++; + continue; + } + rval = dir_hash_see(hashtab, INT_GET(ents[i].hashval, ARCH_CONVERT), INT_GET(ents[i].address, ARCH_CONVERT)); + if (rval != DIR_HASH_CK_OK) + return rval; + } + return j == stale ? DIR_HASH_CK_OK : DIR_HASH_CK_BADSTALE; +} + + +/* + * Version 1 or 2 directory routine wrappers +*/ +static void +dir_init(xfs_mount_t *mp, xfs_trans_t *tp, xfs_inode_t *dp, xfs_inode_t *pdp) +{ + if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb)) + libxfs_dir2_init(tp, dp, pdp); + else + libxfs_dir_init(tp, dp, pdp); +} + +static int +dir_createname(xfs_mount_t *mp, xfs_trans_t *tp, xfs_inode_t *pip, + char *name, int namelen, xfs_ino_t inum, xfs_fsblock_t *first, + xfs_bmap_free_t *flist, xfs_extlen_t total) +{ + if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb)) + return libxfs_dir2_createname(tp, pip, name, namelen, + inum, first, flist, total); + else + return libxfs_dir_createname(tp, pip, name, namelen, + inum, first, flist, total); +} + +static int +dir_lookup(xfs_mount_t *mp, xfs_trans_t *tp, xfs_inode_t *dp, char *name, + int namelen, xfs_ino_t *inum) +{ + if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb)) + return libxfs_dir2_lookup(tp, dp, name, namelen, inum); + else + return libxfs_dir_lookup(tp, dp, name, namelen, inum); +} + +static int +dir_replace(xfs_mount_t *mp, xfs_trans_t *tp, xfs_inode_t *dp, char *name, + int namelen, xfs_ino_t inum, xfs_fsblock_t *firstblock, + xfs_bmap_free_t *flist, xfs_extlen_t total) +{ + if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb)) + return libxfs_dir2_replace(tp, dp, name, namelen, inum, + firstblock, flist, total); + else + return libxfs_dir_replace(tp, dp, name, namelen, inum, + firstblock, flist, total); +} + +static int +dir_removename(xfs_mount_t *mp, xfs_trans_t *tp, xfs_inode_t *dp, char *name, + int namelen, xfs_ino_t inum, xfs_fsblock_t *firstblock, + xfs_bmap_free_t *flist, xfs_extlen_t total) +{ + if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb)) + return libxfs_dir2_removename(tp, dp, name, namelen, inum, + firstblock, flist, total); + else + return libxfs_dir_removename(tp, dp, name, namelen, inum, + firstblock, flist, total); +} + +static int +dir_bogus_removename(xfs_mount_t *mp, xfs_trans_t *tp, xfs_inode_t *dp, + char *name, xfs_fsblock_t *firstblock, xfs_bmap_free_t *flist, + xfs_extlen_t total, xfs_dahash_t hashval, int namelen) +{ + if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb)) + return libxfs_dir2_bogus_removename(tp, dp, name, firstblock, + flist, total, hashval, namelen); + else + return libxfs_dir_bogus_removename(tp, dp, name, firstblock, + flist, total, hashval, namelen); +} + + +static void +res_failed( + int err) +{ + if (err == ENOSPC) { + do_error("ran out of disk space!\n"); + } else + do_error("xfs_trans_reserve returned %d\n", err); +} + +void +mk_rbmino(xfs_mount_t *mp) +{ + xfs_trans_t *tp; + xfs_inode_t *ip; + xfs_bmbt_irec_t *ep; + xfs_fsblock_t first; + int i; + int nmap; + int committed; + int error; + xfs_bmap_free_t flist; + xfs_dfiloff_t bno; + xfs_bmbt_irec_t map[XFS_BMAP_MAX_NMAP]; + + /* + * first set up inode + */ + tp = libxfs_trans_alloc(mp, 0); + + if (i = libxfs_trans_reserve(tp, 10, 0, 0, 0, 0)) + res_failed(i); + + error = libxfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, &ip); + if (error) { + do_error("couldn't iget realtime bitmap inode -- error - %d\n", + error); + } + + bzero(&ip->i_d, sizeof(xfs_dinode_core_t)); + + ip->i_d.di_magic = XFS_DINODE_MAGIC; + ip->i_d.di_mode = IFREG; + ip->i_d.di_version = XFS_DINODE_VERSION_1; + ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; + ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; + + ip->i_d.di_nlink = 1; /* account for sb ptr */ + + /* + * now the ifork + */ + ip->i_df.if_flags = XFS_IFEXTENTS; + ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0; + ip->i_df.if_u1.if_extents = NULL; + + ip->i_d.di_size = mp->m_sb.sb_rbmblocks * mp->m_sb.sb_blocksize; + + /* + * commit changes + */ + libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + libxfs_trans_ihold(tp, ip); + libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, NULL); + + /* + * then allocate blocks for file and fill with zeroes (stolen + * from mkfs) + */ + tp = libxfs_trans_alloc(mp, 0); + if (error = libxfs_trans_reserve(tp, mp->m_sb.sb_rbmblocks + + (XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 1), 0, 0, 0, 0)) + res_failed(error); + + libxfs_trans_ijoin(tp, ip, 0); + bno = 0; + XFS_BMAP_INIT(&flist, &first); + while (bno < mp->m_sb.sb_rbmblocks) { + nmap = XFS_BMAP_MAX_NMAP; + error = libxfs_bmapi(tp, ip, bno, + (xfs_extlen_t)(mp->m_sb.sb_rbmblocks - bno), + XFS_BMAPI_WRITE, &first, mp->m_sb.sb_rbmblocks, + map, &nmap, &flist); + if (error) { + do_error("couldn't allocate realtime bitmap - err %d\n", + error); + } + for (i = 0, ep = map; i < nmap; i++, ep++) { + libxfs_device_zero(mp->m_dev, + XFS_FSB_TO_DADDR(mp, ep->br_startblock), + XFS_FSB_TO_BB(mp, ep->br_blockcount)); + bno += ep->br_blockcount; + } + } + error = libxfs_bmap_finish(&tp, &flist, first, &committed); + if (error) { + do_error( + "allocation of the realtime bitmap failed, error = %d\n", + error); + } + libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, 0); +} + +int +fill_rbmino(xfs_mount_t *mp) +{ + xfs_buf_t *bp; + xfs_trans_t *tp; + xfs_inode_t *ip; + xfs_rtword_t *bmp; + xfs_fsblock_t first; + int nmap; + int error; + xfs_dfiloff_t bno; + xfs_bmbt_irec_t map; + + bmp = btmcompute; + bno = 0; + + tp = libxfs_trans_alloc(mp, 0); + + if (error = libxfs_trans_reserve(tp, 10, 0, 0, 0, 0)) + res_failed(error); + + error = libxfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, &ip); + if (error) { + do_error("couldn't iget realtime bitmap inode -- error - %d\n", + error); + } + + while (bno < mp->m_sb.sb_rbmblocks) { + /* + * fill the file one block at a time + */ + nmap = 1; + error = libxfs_bmapi(tp, ip, bno, 1, XFS_BMAPI_WRITE, + &first, 1, &map, &nmap, NULL); + if (error || nmap != 1) { + do_error( + "couldn't map realtime bitmap block %llu - err %d\n", + bno, error); + } + + ASSERT(map.br_startblock != HOLESTARTBLOCK); + + error = libxfs_trans_read_buf( + mp, tp, mp->m_dev, + XFS_FSB_TO_DADDR(mp, map.br_startblock), + XFS_FSB_TO_BB(mp, 1), 1, &bp); + + if (error) { + do_warn( + "can't access block %llu (fsbno %llu) of realtime bitmap inode %llu\n", + bno, map.br_startblock, mp->m_sb.sb_rbmino); + return(1); + } + + bcopy(bmp, XFS_BUF_PTR(bp), mp->m_sb.sb_blocksize); + + libxfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1); + + bmp = (xfs_rtword_t *)((__psint_t) bmp + mp->m_sb.sb_blocksize); + bno++; + } + + libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, 0); + + return(0); +} + +int +fill_rsumino(xfs_mount_t *mp) +{ + xfs_buf_t *bp; + xfs_trans_t *tp; + xfs_inode_t *ip; + xfs_suminfo_t *smp; + xfs_fsblock_t first; + int nmap; + int error; + xfs_dfiloff_t bno; + xfs_dfiloff_t end_bno; + xfs_bmbt_irec_t map; + + smp = sumcompute; + bno = 0; + end_bno = mp->m_rsumsize >> mp->m_sb.sb_blocklog; + + tp = libxfs_trans_alloc(mp, 0); + + if (error = libxfs_trans_reserve(tp, 10, 0, 0, 0, 0)) + res_failed(error); + + error = libxfs_trans_iget(mp, tp, mp->m_sb.sb_rsumino, 0, &ip); + if (error) { + do_error("couldn't iget realtime summary inode -- error - %d\n", + error); + } + + while (bno < end_bno) { + /* + * fill the file one block at a time + */ + nmap = 1; + error = libxfs_bmapi(tp, ip, bno, 1, XFS_BMAPI_WRITE, + &first, 1, &map, &nmap, NULL); + if (error || nmap != 1) { + do_error( + "couldn't map realtime summary inode block %llu - err %d\n", + bno, error); + } + + ASSERT(map.br_startblock != HOLESTARTBLOCK); + + error = libxfs_trans_read_buf( + mp, tp, mp->m_dev, + XFS_FSB_TO_DADDR(mp, map.br_startblock), + XFS_FSB_TO_BB(mp, 1), 1, &bp); + + if (error) { + do_warn( + "can't access block %llu (fsbno %llu) of realtime summary inode %llu\n", + bno, map.br_startblock, mp->m_sb.sb_rsumino); + return(1); + } + + bcopy(smp, XFS_BUF_PTR(bp), mp->m_sb.sb_blocksize); + + libxfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1); + + smp = (xfs_suminfo_t *)((__psint_t)smp + mp->m_sb.sb_blocksize); + bno++; + } + + libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, 0); + + return(0); +} + +void +mk_rsumino(xfs_mount_t *mp) +{ + xfs_trans_t *tp; + xfs_inode_t *ip; + xfs_bmbt_irec_t *ep; + xfs_fsblock_t first; + int i; + int nmap; + int committed; + int error; + int nsumblocks; + xfs_bmap_free_t flist; + xfs_dfiloff_t bno; + xfs_bmbt_irec_t map[XFS_BMAP_MAX_NMAP]; + + /* + * first set up inode + */ + tp = libxfs_trans_alloc(mp, 0); + + if (i = libxfs_trans_reserve(tp, 10, XFS_ICHANGE_LOG_RES(mp), 0, + XFS_TRANS_PERM_LOG_RES, XFS_MKDIR_LOG_COUNT)) + res_failed(i); + + error = libxfs_trans_iget(mp, tp, mp->m_sb.sb_rsumino, 0, &ip); + if (error) { + do_error("couldn't iget realtime summary inode -- error - %d\n", + error); + } + + bzero(&ip->i_d, sizeof(xfs_dinode_core_t)); + + ip->i_d.di_magic = XFS_DINODE_MAGIC; + ip->i_d.di_mode = IFREG; + ip->i_d.di_version = XFS_DINODE_VERSION_1; + ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; + ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; + + ip->i_d.di_nlink = 1; /* account for sb ptr */ + + /* + * now the ifork + */ + ip->i_df.if_flags = XFS_IFEXTENTS; + ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0; + ip->i_df.if_u1.if_extents = NULL; + + ip->i_d.di_size = mp->m_rsumsize; + + /* + * commit changes + */ + libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + libxfs_trans_ihold(tp, ip); + libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, 0); + + /* + * then allocate blocks for file and fill with zeroes (stolen + * from mkfs) + */ + tp = libxfs_trans_alloc(mp, 0); + XFS_BMAP_INIT(&flist, &first); + + nsumblocks = mp->m_rsumsize >> mp->m_sb.sb_blocklog; + if (error = libxfs_trans_reserve(tp, + mp->m_sb.sb_rbmblocks + + (XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 1), + BBTOB(128), 0, XFS_TRANS_PERM_LOG_RES, + XFS_DEFAULT_PERM_LOG_COUNT)) + res_failed(error); + + libxfs_trans_ijoin(tp, ip, 0); + bno = 0; + XFS_BMAP_INIT(&flist, &first); + while (bno < nsumblocks) { + nmap = XFS_BMAP_MAX_NMAP; + error = libxfs_bmapi(tp, ip, bno, + (xfs_extlen_t)(nsumblocks - bno), + XFS_BMAPI_WRITE, &first, nsumblocks, + map, &nmap, &flist); + if (error) { + do_error( + "couldn't allocate realtime summary inode - err %d\n", + error); + } + for (i = 0, ep = map; i < nmap; i++, ep++) { + libxfs_device_zero(mp->m_dev, + XFS_FSB_TO_DADDR(mp, ep->br_startblock), + XFS_FSB_TO_BB(mp, ep->br_blockcount)); + do_error("dev_zero of rtbitmap failed\n"); + bno += ep->br_blockcount; + } + } + error = libxfs_bmap_finish(&tp, &flist, first, &committed); + if (error) { + do_error( + "allocation of the realtime summary ino failed, err = %d\n", + error); + } + libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, 0); +} + +/* + * makes a new root directory. + */ +void +mk_root_dir(xfs_mount_t *mp) +{ + xfs_trans_t *tp; + xfs_inode_t *ip; + int i; + int error; + const mode_t mode = 0755; + + tp = libxfs_trans_alloc(mp, 0); + ip = NULL; + + if (i = libxfs_trans_reserve(tp, 10, XFS_ICHANGE_LOG_RES(mp), 0, + XFS_TRANS_PERM_LOG_RES, XFS_MKDIR_LOG_COUNT)) + res_failed(i); + + error = libxfs_trans_iget(mp, tp, mp->m_sb.sb_rootino, 0, &ip); + if (error) { + do_error("could not iget root inode -- error - %d\n", error); + } + + /* + * take care of the core -- initialization from xfs_ialloc() + */ + bzero(&ip->i_d, sizeof(xfs_dinode_core_t)); + + ip->i_d.di_magic = XFS_DINODE_MAGIC; + ip->i_d.di_mode = (__uint16_t) mode|IFDIR; + ip->i_d.di_version = XFS_DINODE_VERSION_1; + ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; + ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; + + ip->i_d.di_nlink = 1; /* account for . */ + + libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + + /* + * now the ifork + */ + ip->i_df.if_flags = XFS_IFEXTENTS; + ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0; + ip->i_df.if_u1.if_extents = NULL; + + mp->m_rootip = ip; + + /* + * initialize the directory + */ + dir_init(mp, tp, ip, ip); + + libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, 0); +} + +/* + * orphanage name == lost+found + */ +xfs_ino_t +mk_orphanage(xfs_mount_t *mp) +{ + xfs_ino_t ino; + xfs_trans_t *tp; + xfs_inode_t *ip; + xfs_inode_t *pip; + xfs_fsblock_t first; + int i; + int committed; + int error; + xfs_bmap_free_t flist; + const int mode = 0755; + const int uid = 0; + const int gid = 0; + int nres; + + tp = libxfs_trans_alloc(mp, 0); + XFS_BMAP_INIT(&flist, &first); + + nres = XFS_MKDIR_SPACE_RES(mp, strlen(ORPHANAGE)); + if (i = libxfs_trans_reserve(tp, nres, XFS_MKDIR_LOG_RES(mp), 0, + XFS_TRANS_PERM_LOG_RES, XFS_MKDIR_LOG_COUNT)) + res_failed(i); + + /* + * use iget/ijoin instead of trans_iget because the ialloc + * wrapper can commit the transaction and start a new one + */ + if (i = libxfs_iget(mp, NULL, mp->m_sb.sb_rootino, 0, &pip, 0)) + do_error("%d - couldn't iget root inode to make %s\n", + i, ORPHANAGE); + + error = libxfs_inode_alloc(&tp, pip, mode|IFDIR, + 1, mp->m_dev, &zerocr, &ip); + + if (error) { + do_error("%s inode allocation failed %d\n", + ORPHANAGE, error); + } + + ip->i_d.di_uid = uid; + ip->i_d.di_gid = gid; + ip->i_d.di_nlink++; /* account for . */ + + /* + * now that we know the transaction will stay around, + * add the root inode to it + */ + libxfs_trans_ijoin(tp, pip, 0); + + /* + * create the actual entry + */ + if (error = dir_createname(mp, tp, pip, ORPHANAGE, + strlen(ORPHANAGE), ip->i_ino, &first, &flist, nres)) { + do_warn("can't make %s, createname error %d, will try later\n", + ORPHANAGE, error); + orphanage_entered = 0; + } else + orphanage_entered = 1; + + /* + * bump up the link count in the root directory to account + * for .. in the new directory + */ + pip->i_d.di_nlink++; + + libxfs_trans_log_inode(tp, pip, XFS_ILOG_CORE); + dir_init(mp, tp, ip, pip); + libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + + error = libxfs_bmap_finish(&tp, &flist, first, &committed); + if (error) { + do_error("%s directory creation failed -- bmapf error %d\n", + ORPHANAGE, error); + } + + ino = ip->i_ino; + + libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, 0); + + /* need libxfs_iput here? - nathans TODO - possible memory leak? */ + + return(ino); +} + +/* + * move a file to the orphange. the orphanage is guaranteed + * at this point to only have file in it whose name == file inode # + */ +void +mv_orphanage(xfs_mount_t *mp, + xfs_ino_t dir_ino, /* orphange inode # */ + xfs_ino_t ino, /* inode # to be moved */ + int isa_dir) /* 1 if inode is a directory */ +{ + xfs_ino_t entry_ino_num; + xfs_inode_t *dir_ino_p; + xfs_inode_t *ino_p; + xfs_trans_t *tp; + xfs_fsblock_t first; + xfs_bmap_free_t flist; + int err; + int committed; + char fname[MAXPATHLEN + 1]; + int nres; + + sprintf(fname, "%llu", ino); + + if (err = libxfs_iget(mp, NULL, dir_ino, 0, &dir_ino_p, 0)) + do_error("%d - couldn't iget orphanage inode\n", err); + + tp = libxfs_trans_alloc(mp, 0); + + if (err = libxfs_iget(mp, NULL, ino, 0, &ino_p, 0)) + do_error("%d - couldn't iget disconnected inode\n", err); + + if (isa_dir) { + nres = XFS_DIRENTER_SPACE_RES(mp, strlen(fname)) + + XFS_DIRENTER_SPACE_RES(mp, 2); + if (err = dir_lookup(mp, tp, ino_p, "..", 2, + &entry_ino_num)) { + ASSERT(err == ENOENT); + + if (err = libxfs_trans_reserve(tp, nres, + XFS_RENAME_LOG_RES(mp), 0, + XFS_TRANS_PERM_LOG_RES, + XFS_RENAME_LOG_COUNT)) + do_error( + "space reservation failed (%d), filesystem may be out of space\n", + err); + + libxfs_trans_ijoin(tp, dir_ino_p, 0); + libxfs_trans_ijoin(tp, ino_p, 0); + + XFS_BMAP_INIT(&flist, &first); + if (err = dir_createname(mp, tp, dir_ino_p, fname, + strlen(fname), ino, &first, + &flist, nres)) + do_error( + "name create failed in %s (%d), filesystem may be out of space\n", + ORPHANAGE, err); + + dir_ino_p->i_d.di_nlink++; + libxfs_trans_log_inode(tp, dir_ino_p, XFS_ILOG_CORE); + + if (err = dir_createname(mp, tp, ino_p, "..", 2, + dir_ino, &first, &flist, nres)) + do_error( + "creation of .. entry failed (%d), filesystem may be out of space\n", + err); + + ino_p->i_d.di_nlink++; + libxfs_trans_log_inode(tp, ino_p, XFS_ILOG_CORE); + + if (err = libxfs_bmap_finish(&tp, &flist, first, &committed)) + do_error( + "bmap finish failed (err - %d), filesystem may be out of space\n", + err); + + libxfs_trans_commit(tp, + XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, 0); + } else { + if (err = libxfs_trans_reserve(tp, nres, + XFS_RENAME_LOG_RES(mp), 0, + XFS_TRANS_PERM_LOG_RES, + XFS_RENAME_LOG_COUNT)) + do_error( + "space reservation failed (%d), filesystem may be out of space\n", + err); + + libxfs_trans_ijoin(tp, dir_ino_p, 0); + libxfs_trans_ijoin(tp, ino_p, 0); + + XFS_BMAP_INIT(&flist, &first); + + if (err = dir_createname(mp, tp, dir_ino_p, fname, + strlen(fname), ino, &first, + &flist, nres)) + do_error( + "name create failed in %s (%d), filesystem may be out of space\n", + ORPHANAGE, err); + + dir_ino_p->i_d.di_nlink++; + libxfs_trans_log_inode(tp, dir_ino_p, XFS_ILOG_CORE); + + /* + * don't replace .. value if it already points + * to us. that'll pop a libxfs/kernel ASSERT. + */ + if (entry_ino_num != dir_ino) { + if (err = dir_replace(mp, tp, ino_p, "..", + 2, dir_ino, &first, + &flist, nres)) + do_error( + "name replace op failed (%d), filesystem may be out of space\n", + err); + } + + if (err = libxfs_bmap_finish(&tp, &flist, first, + &committed)) + do_error( + "bmap finish failed (%d), filesystem may be out of space\n", + err); + + libxfs_trans_commit(tp, + XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, 0); + } + } else { + /* + * use the remove log reservation as that's + * more accurate. we're only creating the + * links, we're not doing the inode allocation + * also accounted for in the create + */ + nres = XFS_DIRENTER_SPACE_RES(mp, strlen(fname)); + if (err = libxfs_trans_reserve(tp, nres, XFS_REMOVE_LOG_RES(mp), 0, + XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT)) + do_error( + "space reservation failed (%d), filesystem may be out of space\n", + err); + + libxfs_trans_ijoin(tp, dir_ino_p, 0); + libxfs_trans_ijoin(tp, ino_p, 0); + + XFS_BMAP_INIT(&flist, &first); + if (err = dir_createname(mp, tp, dir_ino_p, fname, + strlen(fname), ino, &first, &flist, nres)) + do_error( + "name create failed in %s (%d), filesystem may be out of space\n", + ORPHANAGE, err); + ASSERT(err == 0); + + ino_p->i_d.di_nlink = 1; + libxfs_trans_log_inode(tp, ino_p, XFS_ILOG_CORE); + + if (err = libxfs_bmap_finish(&tp, &flist, first, &committed)) + do_error( + "bmap finish failed (%d), filesystem may be out of space\n", + err); + + libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, 0); + } +} + +/* + * like get_first_dblock_fsbno only it uses the simulation code instead + * of raw I/O. + * + * Returns the fsbno of the first (leftmost) block in the directory leaf. + * sets *bno to the directory block # corresponding to the returned fsbno. + */ +xfs_dfsbno_t +map_first_dblock_fsbno(xfs_mount_t *mp, + xfs_ino_t ino, + xfs_inode_t *ip, + xfs_dablk_t *bno) +{ + xfs_fsblock_t fblock; + xfs_da_intnode_t *node; + xfs_buf_t *bp; + xfs_dablk_t da_bno; + xfs_dfsbno_t fsbno; + xfs_bmbt_irec_t map; + int nmap; + int i; + int error; + char *ftype; + + /* + * traverse down left-side of tree until we hit the + * left-most leaf block setting up the btree cursor along + * the way. + */ + da_bno = 0; + *bno = 0; + i = -1; + node = NULL; + fblock = NULLFSBLOCK; + ftype = "dir"; + + nmap = 1; + error = libxfs_bmapi(NULL, ip, (xfs_fileoff_t) da_bno, 1, + XFS_BMAPI_METADATA, &fblock, 0, + &map, &nmap, NULL); + if (error || nmap != 1) { + if (!no_modify) + do_error( +"can't map block %d in %s inode %llu, xfs_bmapi returns %d, nmap = %d\n", + da_bno, ftype, ino, error, nmap); + else { + do_warn( +"can't map block %d in %s inode %llu, xfs_bmapi returns %d, nmap = %d\n", + da_bno, ftype, ino, error, nmap); + return(NULLDFSBNO); + } + } + + if ((fsbno = map.br_startblock) == HOLESTARTBLOCK) { + if (!no_modify) + do_error("block %d in %s ino %llu doesn't exist\n", + da_bno, ftype, ino); + else { + do_warn("block %d in %s ino %llu doesn't exist\n", + da_bno, ftype, ino); + return(NULLDFSBNO); + } + } + + if (ip->i_d.di_size <= XFS_LBSIZE(mp)) + return(fsbno); + + if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb)) + return(fsbno); + + do { + /* + * walk down left side of btree, release buffers as you + * go. if the root block is a leaf (single-level btree), + * just return it. + * + */ + + bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno), + XFS_FSB_TO_BB(mp, 1), 0); + + if (!bp) { + do_warn( + "can't read block %u (fsbno %llu) for directory inode %llu\n", + da_bno, fsbno, ino); + return(NULLDFSBNO); + } + + node = (xfs_da_intnode_t *)XFS_BUF_PTR(bp); + + if (INT_GET(node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC) { + libxfs_putbuf(bp); + do_warn( +"bad dir/attr magic number in inode %llu, file bno = %u, fsbno = %llu\n", + ino, da_bno, fsbno); + return(NULLDFSBNO); + } + + if (i == -1) + i = INT_GET(node->hdr.level, ARCH_CONVERT); + + da_bno = INT_GET(node->btree[0].before, ARCH_CONVERT); + + libxfs_putbuf(bp); + bp = NULL; + + nmap = 1; + error = libxfs_bmapi(NULL, ip, (xfs_fileoff_t) da_bno, 1, + XFS_BMAPI_METADATA, &fblock, 0, + &map, &nmap, NULL); + if (error || nmap != 1) { + if (!no_modify) + do_error( + "can't map block %d in %s ino %llu, xfs_bmapi returns %d, nmap = %d\n", + da_bno, ftype, ino, error, nmap); + else { + do_warn( + "can't map block %d in %s ino %llu, xfs_bmapi returns %d, nmap = %d\n", + da_bno, ftype, ino, error, nmap); + return(NULLDFSBNO); + } + } + if ((fsbno = map.br_startblock) == HOLESTARTBLOCK) { + if (!no_modify) + do_error( + "block %d in %s inode %llu doesn't exist\n", + da_bno, ftype, ino); + else { + do_warn( + "block %d in %s inode %llu doesn't exist\n", + da_bno, ftype, ino); + return(NULLDFSBNO); + } + } + + i--; + } while(i > 0); + + *bno = da_bno; + return(fsbno); +} + +/* + * scan longform directory and prune first bad entry. returns 1 if + * it had to remove something, 0 if it made it all the way through + * the directory. prune_lf_dir_entry does all the necessary bmap calls. + * + * hashval is an in/out -- starting hashvalue in, hashvalue of the + * deleted entry (if there was one) out + * + * this routine can NOT be called if running in no modify mode + */ +int +prune_lf_dir_entry(xfs_mount_t *mp, xfs_ino_t ino, xfs_inode_t *ip, + xfs_dahash_t *hashval) +{ + xfs_dfsbno_t fsbno; + int i; + int index; + int error; + int namelen; + xfs_bmap_free_t free_list; + xfs_fsblock_t first_block; + xfs_buf_t *bp; + xfs_dir_leaf_name_t *namest; + xfs_dir_leafblock_t *leaf; + xfs_dir_leaf_entry_t *entry; + xfs_trans_t *tp; + xfs_dablk_t da_bno; + xfs_fsblock_t fblock; + int committed; + int nmap; + xfs_bmbt_irec_t map; + char fname[MAXNAMELEN + 1]; + char *ftype; + int nres; + + /* + * ok, this is kind of a schizoid routine. we use our + * internal bmapi routines to walk the directory. when + * we find a bogus entry, we release the buffer so + * the simulation code doesn't deadlock and use the + * sim code to remove the entry. That will cause an + * extra bmap traversal to map the block but I think + * that's preferable to hacking the bogus removename + * function to be really different and then trying to + * maintain both versions as time goes on. + * + * first, grab the dinode and find the right leaf block. + */ + + ftype = "dir"; + da_bno = 0; + bp = NULL; + namest = NULL; + fblock = NULLFSBLOCK; + + fsbno = map_first_dblock_fsbno(mp, ino, ip, &da_bno); + + /* + * now go foward along the leaves of the btree looking + * for an entry beginning with '/' + */ + do { + bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno), + XFS_FSB_TO_BB(mp, 1), 0); + + if (!bp) { + do_error( + "can't read directory inode %llu (leaf) block %u (fsbno %llu)\n", + ino, da_bno, fsbno); + /* NOTREACHED */ + } + + leaf = (xfs_dir_leafblock_t *)XFS_BUF_PTR(bp); + ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC); + entry = &leaf->entries[0]; + + for (index = -1, i = 0; + i < INT_GET(leaf->hdr.count, ARCH_CONVERT) && index == -1; + i++) { + namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT)); + if (namest->name[0] != '/') + entry++; + else + index = i; + } + + /* + * if we got a bogus entry, exit loop with a pointer to + * the leaf block buffer. otherwise, keep trying blocks + */ + da_bno = INT_GET(leaf->hdr.info.forw, ARCH_CONVERT); + + if (index == -1) { + if (bp != NULL) { + libxfs_putbuf(bp); + bp = NULL; + } + + /* + * map next leaf block unless we've run out + */ + if (da_bno != 0) { + nmap = 1; + error = libxfs_bmapi(NULL, ip, + (xfs_fileoff_t) da_bno, 1, + XFS_BMAPI_METADATA, &fblock, 0, + &map, &nmap, NULL); + if (error || nmap != 1) + do_error( +"can't map block %d in directory %llu, xfs_bmapi returns %d, nmap = %d\n", + da_bno, ino, error, nmap); + if ((fsbno = map.br_startblock) + == HOLESTARTBLOCK) { + do_error( + "%s ino %llu block %d doesn't exist\n", + ftype, ino, da_bno); + } + } + } + } while (da_bno != 0 && index == -1); + + /* + * if we hit the edge of the tree with no bad entries, we're done + * and the buffer was released. + */ + if (da_bno == 0 && index == -1) + return(0); + + ASSERT(index >= 0); + ASSERT(entry == &leaf->entries[index]); + ASSERT(namest == XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT))); + + /* + * snag the info we need out of the directory then release all buffers + */ + bcopy(namest->name, fname, entry->namelen); + fname[entry->namelen] = '\0'; + *hashval = INT_GET(entry->hashval, ARCH_CONVERT); + namelen = entry->namelen; + + libxfs_putbuf(bp); + + /* + * ok, now the hard part, blow away the index'th entry in this block + * + * allocate a remove transaction for it. that's not quite true since + * we're only messing with one inode, not two but... + */ + + tp = libxfs_trans_alloc(mp, XFS_TRANS_REMOVE); + + nres = XFS_REMOVE_SPACE_RES(mp); + error = libxfs_trans_reserve(tp, nres, XFS_REMOVE_LOG_RES(mp), + 0, XFS_TRANS_PERM_LOG_RES, + XFS_REMOVE_LOG_COUNT); + if (error) + res_failed(error); + + libxfs_trans_ijoin(tp, ip, 0); + libxfs_trans_ihold(tp, ip); + + XFS_BMAP_INIT(&free_list, &first_block); + + error = dir_bogus_removename(mp, tp, ip, fname, + &first_block, &free_list, nres, *hashval, namelen); + + if (error) { + do_error( +"couldn't remove bogus entry \"%s\" in\n\tdirectory inode %llu, errno = %d\n", + fname, ino, error); + /* NOTREACHED */ + } + + error = libxfs_bmap_finish(&tp, &free_list, first_block, &committed); + + ASSERT(error == 0); + + libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_SYNC, 0); + + return(1); +} + +/* + * process a leaf block, also checks for .. entry + * and corrects it to match what we think .. should be + */ +void +lf_block_dir_entry_check(xfs_mount_t *mp, + xfs_ino_t ino, + xfs_dir_leafblock_t *leaf, + int *dirty, + int *num_illegal, + int *need_dot, + dir_stack_t *stack, + ino_tree_node_t *current_irec, + int current_ino_offset) +{ + xfs_dir_leaf_entry_t *entry; + ino_tree_node_t *irec; + xfs_ino_t lino; + xfs_ino_t parent; + xfs_dir_leaf_name_t *namest; + int i; + int junkit; + int ino_offset; + int nbad; + char fname[MAXNAMELEN + 1]; + + entry = &leaf->entries[0]; + *dirty = 0; + nbad = 0; + + /* + * look at each entry. reference inode pointed to by each + * entry in the incore inode tree. + * if not a directory, set reached flag, increment link count + * if a directory and reached, mark entry as to be deleted. + * if a directory, check to see if recorded parent + * matches current inode #, + * if so, then set reached flag, increment link count + * of current and child dir inodes, push the child + * directory inode onto the directory stack. + * if current inode != parent, then mark entry to be deleted. + * + * return + */ + for (i = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT); entry++, i++) { + /* + * snag inode #, update link counts, and make sure + * this isn't a loop if the child is a directory + */ + namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT)); + + /* + * skip bogus entries (leading '/'). they'll be deleted + * later + */ + if (namest->name[0] == '/') { + nbad++; + continue; + } + + junkit = 0; + + XFS_DIR_SF_GET_DIRINO_ARCH(&namest->inumber, &lino, ARCH_CONVERT); + bcopy(namest->name, fname, entry->namelen); + fname[entry->namelen] = '\0'; + + ASSERT(lino != NULLFSINO); + + /* + * skip the '..' entry since it's checked when the + * directory is reached by something else. if it never + * gets reached, it'll be moved to the orphanage and we'll + * take care of it then. + */ + if (entry->namelen == 2 && namest->name[0] == '.' && + namest->name[1] == '.') { + continue; + } + ASSERT(no_modify || !verify_inum(mp, lino)); + + /* + * special case the . entry. we know there's only one + * '.' and only '.' points to itself because bogus entries + * got trashed in phase 3 if there were > 1. + * bump up link count for '.' but don't set reached + * until we're actually reached by another directory + * '..' is already accounted for or will be taken care + * of when directory is moved to orphanage. + */ + if (ino == lino) { + ASSERT(namest->name[0] == '.' && entry->namelen == 1); + add_inode_ref(current_irec, current_ino_offset); + *need_dot = 0; + continue; + } + + /* + * special case the "lost+found" entry if pointing + * to where we think lost+found should be. if that's + * the case, that's the one we created in phase 6. + * just skip it. no need to process it and it's .. + * link is already accounted for. + */ + + if (lino == orphanage_ino && strcmp(fname, ORPHANAGE) == 0) + continue; + + /* + * skip entries with bogus inumbers if we're in no modify mode + */ + if (no_modify && verify_inum(mp, lino)) + continue; + + /* + * ok, now handle the rest of the cases besides '.' and '..' + */ + irec = find_inode_rec(XFS_INO_TO_AGNO(mp, lino), + XFS_INO_TO_AGINO(mp, lino)); + + if (irec == NULL) { + nbad++; + do_warn( + "entry \"%s\" in dir inode %llu points to non-existent inode, ", + fname, ino); + + if (!no_modify) { + namest->name[0] = '/'; + *dirty = 1; + do_warn("marking entry to be junked\n"); + } else { + do_warn("would junk entry\n"); + } + + continue; + } + + ino_offset = XFS_INO_TO_AGINO(mp, lino) - irec->ino_startnum; + + /* + * if it's a free inode, blow out the entry. + * by now, any inode that we think is free + * really is free. + */ + if (is_inode_free(irec, ino_offset)) { + /* + * don't complain if this entry points to the old + * and now-free lost+found inode + */ + if (verbose || no_modify || lino != old_orphanage_ino) + do_warn( + "entry \"%s\" in dir inode %llu points to free inode %llu", + fname, ino, lino); + nbad++; + + if (!no_modify) { + if (verbose || lino != old_orphanage_ino) + do_warn(", marking entry to be junked\n"); + + else + do_warn("\n"); + namest->name[0] = '/'; + *dirty = 1; + } else { + do_warn(", would junk entry\n"); + } + + continue; + } + + /* + * check easy case first, regular inode, just bump + * the link count and continue + */ + if (!inode_isadir(irec, ino_offset)) { + add_inode_reached(irec, ino_offset); + continue; + } + + parent = get_inode_parent(irec, ino_offset); + ASSERT(parent != 0); + + /* + * bump up the link counts in parent and child + * directory but if the link doesn't agree with + * the .. in the child, blow out the entry. + * if the directory has already been reached, + * blow away the entry also. + */ + if (is_inode_reached(irec, ino_offset)) { + junkit = 1; + do_warn( +"entry \"%s\" in dir %llu points to an already connected dir inode %llu,\n", + fname, ino, lino); + } else if (parent == ino) { + add_inode_reached(irec, ino_offset); + add_inode_ref(current_irec, current_ino_offset); + + if (!is_inode_refchecked(lino, irec, ino_offset)) + push_dir(stack, lino); + } else { + junkit = 1; + do_warn( +"entry \"%s\" in dir ino %llu not consistent with .. value (%llu) in ino %llu,\n", + fname, ino, parent, lino); + } + + if (junkit) { + junkit = 0; + nbad++; + + if (!no_modify) { + namest->name[0] = '/'; + *dirty = 1; + if (verbose || lino != old_orphanage_ino) + do_warn("\twill clear entry \"%s\"\n", + fname); + } else { + do_warn("\twould clear entry \"%s\"\n", fname); + } + } + } + + *num_illegal += nbad; +} + +/* + * succeeds or dies, inode never gets dirtied since all changes + * happen in file blocks. the inode size and other core info + * is already correct, it's just the leaf entries that get altered. + */ +void +longform_dir_entry_check(xfs_mount_t *mp, + xfs_ino_t ino, + xfs_inode_t *ip, + int *num_illegal, + int *need_dot, + dir_stack_t *stack, + ino_tree_node_t *irec, + int ino_offset) +{ + xfs_dir_leafblock_t *leaf; + xfs_buf_t *bp; + xfs_dfsbno_t fsbno; + xfs_fsblock_t fblock; + xfs_dablk_t da_bno; + int dirty; + int nmap; + int error; + int skipit; + xfs_bmbt_irec_t map; + char *ftype; + + da_bno = 0; + fblock = NULLFSBLOCK; + *need_dot = 1; + ftype = "dir"; + + fsbno = map_first_dblock_fsbno(mp, ino, ip, &da_bno); + + if (fsbno == NULLDFSBNO && no_modify) { + do_warn("cannot map block 0 of directory inode %llu\n", ino); + return; + } + + do { + ASSERT(fsbno != NULLDFSBNO); + skipit = 0; + + bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno), + XFS_FSB_TO_BB(mp, 1), 0); + + if (!bp) { + do_error( + "can't read block %u (fsbno %llu) for directory inode %llu\n", + da_bno, fsbno, ino); + /* NOTREACHED */ + } + + leaf = (xfs_dir_leafblock_t *)XFS_BUF_PTR(bp); + + da_bno = INT_GET(leaf->hdr.info.forw, ARCH_CONVERT); + + if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC) { + if (!no_modify) { + do_error( + "bad magic # (0x%x) for dir ino %llu leaf block (bno %u fsbno %llu)\n", + INT_GET(leaf->hdr.info.magic, ARCH_CONVERT), + ino, da_bno, fsbno); + /* NOTREACHED */ + } else { + /* + * this block's bad but maybe the + * forward pointer is good... + */ + skipit = 1; + dirty = 0; + } + } + + if (!skipit) + lf_block_dir_entry_check(mp, ino, leaf, &dirty, + num_illegal, need_dot, stack, + irec, ino_offset); + + ASSERT(dirty == 0 || dirty && !no_modify); + + if (dirty && !no_modify) + libxfs_writebuf(bp, 0); + else + libxfs_putbuf(bp); + bp = NULL; + + if (da_bno != 0) { + nmap = 1; + error = libxfs_bmapi(NULL, ip, (xfs_fileoff_t)da_bno, 1, + XFS_BMAPI_METADATA, &fblock, 0, + &map, &nmap, NULL); + if (error || nmap != 1) { + if (!no_modify) + do_error( +"can't map leaf block %d in dir %llu, xfs_bmapi returns %d, nmap = %d\n", + da_bno, ino, error, nmap); + else { + do_warn( +"can't map leaf block %d in dir %llu, xfs_bmapi returns %d, nmap = %d\n", + da_bno, ino, error, nmap); + return; + } + } + if ((fsbno = map.br_startblock) == HOLESTARTBLOCK) { + if (!no_modify) + do_error( + "block %d in %s ino %llu doesn't exist\n", + da_bno, ftype, ino); + else { + do_warn( + "block %d in %s ino %llu doesn't exist\n", + da_bno, ftype, ino); + return; + } + } + } + } while (da_bno != 0); +} + +/* + * Kill a block in a version 2 inode. + * Makes its own transaction. + */ +static void +dir2_kill_block( + xfs_mount_t *mp, + xfs_inode_t *ip, + xfs_dablk_t da_bno, + xfs_dabuf_t *bp) +{ + xfs_da_args_t args; + int committed; + int error; + xfs_fsblock_t firstblock; + xfs_bmap_free_t flist; + int nres; + xfs_trans_t *tp; + + tp = libxfs_trans_alloc(mp, 0); + nres = XFS_REMOVE_SPACE_RES(mp); + error = libxfs_trans_reserve(tp, nres, XFS_REMOVE_LOG_RES(mp), 0, + XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT); + if (error) + res_failed(error); + libxfs_trans_ijoin(tp, ip, 0); + libxfs_trans_ihold(tp, ip); + libxfs_da_bjoin(tp, bp); + bzero(&args, sizeof(args)); + XFS_BMAP_INIT(&flist, &firstblock); + args.dp = ip; + args.trans = tp; + args.firstblock = &firstblock; + args.flist = &flist; + args.whichfork = XFS_DATA_FORK; + if (da_bno >= mp->m_dirleafblk && da_bno < mp->m_dirfreeblk) + error = libxfs_da_shrink_inode(&args, da_bno, bp); + else + error = libxfs_dir2_shrink_inode(&args, + XFS_DIR2_DA_TO_DB(mp, da_bno), bp); + if (error) + do_error("shrink_inode failed inode %llu block %u\n", + ip->i_ino, da_bno); + libxfs_bmap_finish(&tp, &flist, firstblock, &committed); + libxfs_trans_commit(tp, 0, 0); +} + +/* + * process a data block, also checks for .. entry + * and corrects it to match what we think .. should be + */ +static void +longform_dir2_entry_check_data( + xfs_mount_t *mp, + xfs_inode_t *ip, + int *num_illegal, + int *need_dot, + dir_stack_t *stack, + ino_tree_node_t *current_irec, + int current_ino_offset, + xfs_dabuf_t **bpp, + dir_hash_tab_t *hashtab, + freetab_t **freetabp, + xfs_dablk_t da_bno, + int isblock) +{ + xfs_dir2_dataptr_t addr; + xfs_dir2_leaf_entry_t *blp; + xfs_dabuf_t *bp; + xfs_dir2_block_tail_t *btp; + int committed; + xfs_dir2_data_t *d; + xfs_dir2_db_t db; + xfs_dir2_data_entry_t *dep; + xfs_dir2_data_unused_t *dup; + char *endptr; + int error; + xfs_fsblock_t firstblock; + xfs_bmap_free_t flist; + char fname[MAXNAMELEN + 1]; + freetab_t *freetab; + int i; + int ino_offset; + ino_tree_node_t *irec; + int junkit; + int lastfree; + int len; + int nbad; + int needlog; + int needscan; + xfs_ino_t parent; + char *ptr; + xfs_trans_t *tp; + int wantmagic; + + bp = *bpp; + d = bp->data; + ptr = (char *)d->u; + nbad = 0; + needscan = needlog = 0; + freetab = *freetabp; + if (isblock) { + btp = XFS_DIR2_BLOCK_TAIL_P(mp, d); + blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT); + endptr = (char *)blp; + if (endptr > (char *)btp) + endptr = (char *)btp; + wantmagic = XFS_DIR2_BLOCK_MAGIC; + } else { + endptr = (char *)d + mp->m_dirblksize; + wantmagic = XFS_DIR2_DATA_MAGIC; + } + db = XFS_DIR2_DA_TO_DB(mp, da_bno); + if (freetab->naents <= db) { + struct freetab_ent e; + + *freetabp = freetab = realloc(freetab, FREETAB_SIZE(db + 1)); + if (!freetab) { + do_error( + "realloc failed in longform_dir2_entry_check_data (%u bytes)\n", + FREETAB_SIZE(db + 1)); + exit(1); + } + e.v = NULLDATAOFF; + e.s = 0; + for (i = freetab->naents; i < db; i++) + freetab->ents[i] = e; + freetab->naents = db + 1; + } + if (freetab->nents < db + 1) + freetab->nents = db + 1; + while (ptr < endptr) { + dup = (xfs_dir2_data_unused_t *)ptr; + if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) { + if (ptr + INT_GET(dup->length, ARCH_CONVERT) > endptr || INT_GET(dup->length, ARCH_CONVERT) == 0 || + (INT_GET(dup->length, ARCH_CONVERT) & (XFS_DIR2_DATA_ALIGN - 1))) + break; + if (INT_GET(*XFS_DIR2_DATA_UNUSED_TAG_P_ARCH(dup, ARCH_CONVERT), ARCH_CONVERT) != + (char *)dup - (char *)d) + break; + ptr += INT_GET(dup->length, ARCH_CONVERT); + if (ptr >= endptr) + break; + } + dep = (xfs_dir2_data_entry_t *)ptr; + if (ptr + XFS_DIR2_DATA_ENTSIZE(dep->namelen) > endptr) + break; + if (INT_GET(*XFS_DIR2_DATA_ENTRY_TAG_P(dep), ARCH_CONVERT) != (char *)dep - (char *)d) + break; + ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen); + } + if (ptr != endptr) { + do_warn("corrupt block %u in directory inode %llu: ", + da_bno, ip->i_ino); + if (!no_modify) { + do_warn("junking block\n"); + dir2_kill_block(mp, ip, da_bno, bp); + } else { + do_warn("would junk block\n"); + libxfs_da_brelse(NULL, bp); + } + freetab->ents[db].v = NULLDATAOFF; + *bpp = NULL; + return; + } + tp = libxfs_trans_alloc(mp, 0); + error = libxfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0, + XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT); + if (error) + res_failed(error); + libxfs_trans_ijoin(tp, ip, 0); + libxfs_trans_ihold(tp, ip); + libxfs_da_bjoin(tp, bp); + if (isblock) + libxfs_da_bhold(tp, bp); + XFS_BMAP_INIT(&flist, &firstblock); + if (INT_GET(d->hdr.magic, ARCH_CONVERT) != wantmagic) { + do_warn("bad directory block magic # %#x for directory inode " + "%llu block %d: ", + INT_GET(d->hdr.magic, ARCH_CONVERT), ip->i_ino, da_bno); + if (!no_modify) { + do_warn("fixing magic # to %#x\n", wantmagic); + INT_SET(d->hdr.magic, ARCH_CONVERT, wantmagic); + needlog = 1; + } else + do_warn("would fix magic # to %#x\n", wantmagic); + } + lastfree = 0; + ptr = (char *)d->u; + /* + * look at each entry. reference inode pointed to by each + * entry in the incore inode tree. + * if not a directory, set reached flag, increment link count + * if a directory and reached, mark entry as to be deleted. + * if a directory, check to see if recorded parent + * matches current inode #, + * if so, then set reached flag, increment link count + * of current and child dir inodes, push the child + * directory inode onto the directory stack. + * if current inode != parent, then mark entry to be deleted. + */ + while (ptr < endptr) { + dup = (xfs_dir2_data_unused_t *)ptr; + if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) { + if (lastfree) { + do_warn("directory inode %llu block %u has " + "consecutive free entries: ", + ip->i_ino, da_bno); + if (!no_modify) { + do_warn("joining together\n"); + len = INT_GET(dup->length, ARCH_CONVERT); + libxfs_dir2_data_use_free(tp, bp, dup, + ptr - (char *)d, len, &needlog, + &needscan); + libxfs_dir2_data_make_free(tp, bp, + ptr - (char *)d, len, &needlog, + &needscan); + } else + do_warn("would join together\n"); + } + ptr += INT_GET(dup->length, ARCH_CONVERT); + lastfree = 1; + continue; + } + addr = XFS_DIR2_DB_OFF_TO_DATAPTR(mp, db, ptr - (char *)d); + dep = (xfs_dir2_data_entry_t *)ptr; + ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen); + lastfree = 0; + dir_hash_add(hashtab, + libxfs_da_hashname((char *)dep->name, dep->namelen), + addr, dep->name[0] == '/'); + /* + * skip bogus entries (leading '/'). they'll be deleted + * later + */ + if (dep->name[0] == '/') { + nbad++; + continue; + } + junkit = 0; + bcopy(dep->name, fname, dep->namelen); + fname[dep->namelen] = '\0'; + ASSERT(INT_GET(dep->inumber, ARCH_CONVERT) != NULLFSINO); + /* + * skip the '..' entry since it's checked when the + * directory is reached by something else. if it never + * gets reached, it'll be moved to the orphanage and we'll + * take care of it then. + */ + if (dep->namelen == 2 && dep->name[0] == '.' && + dep->name[1] == '.') + continue; + ASSERT(no_modify || !verify_inum(mp, INT_GET(dep->inumber, ARCH_CONVERT))); + /* + * special case the . entry. we know there's only one + * '.' and only '.' points to itself because bogus entries + * got trashed in phase 3 if there were > 1. + * bump up link count for '.' but don't set reached + * until we're actually reached by another directory + * '..' is already accounted for or will be taken care + * of when directory is moved to orphanage. + */ + if (ip->i_ino == INT_GET(dep->inumber, ARCH_CONVERT)) { + ASSERT(dep->name[0] == '.' && dep->namelen == 1); + add_inode_ref(current_irec, current_ino_offset); + *need_dot = 0; + continue; + } + /* + * special case the "lost+found" entry if pointing + * to where we think lost+found should be. if that's + * the case, that's the one we created in phase 6. + * just skip it. no need to process it and it's .. + * link is already accounted for. + */ + if (INT_GET(dep->inumber, ARCH_CONVERT) == orphanage_ino && + strcmp(fname, ORPHANAGE) == 0) + continue; + /* + * skip entries with bogus inumbers if we're in no modify mode + */ + if (no_modify && verify_inum(mp, INT_GET(dep->inumber, ARCH_CONVERT))) + continue; + /* + * ok, now handle the rest of the cases besides '.' and '..' + */ + irec = find_inode_rec(XFS_INO_TO_AGNO(mp, INT_GET(dep->inumber, ARCH_CONVERT)), + XFS_INO_TO_AGINO(mp, INT_GET(dep->inumber, ARCH_CONVERT))); + if (irec == NULL) { + nbad++; + do_warn("entry \"%s\" in directory inode %llu points " + "to non-existent inode, ", + fname, ip->i_ino); + if (!no_modify) { + dep->name[0] = '/'; + libxfs_dir2_data_log_entry(tp, bp, dep); + do_warn("marking entry to be junked\n"); + } else { + do_warn("would junk entry\n"); + } + continue; + } + ino_offset = + XFS_INO_TO_AGINO(mp, INT_GET(dep->inumber, ARCH_CONVERT)) - irec->ino_startnum; + /* + * if it's a free inode, blow out the entry. + * by now, any inode that we think is free + * really is free. + */ + if (is_inode_free(irec, ino_offset)) { + /* + * don't complain if this entry points to the old + * and now-free lost+found inode + */ + if (verbose || no_modify || + INT_GET(dep->inumber, ARCH_CONVERT) != old_orphanage_ino) + do_warn("entry \"%s\" in directory inode %llu " + "points to free inode %llu", + fname, ip->i_ino, INT_GET(dep->inumber, ARCH_CONVERT)); + nbad++; + if (!no_modify) { + if (verbose || + INT_GET(dep->inumber, ARCH_CONVERT) != old_orphanage_ino) + do_warn(", marking entry to be " + "junked\n"); + else + do_warn("\n"); + dep->name[0] = '/'; + libxfs_dir2_data_log_entry(tp, bp, dep); + } else { + do_warn(", would junk entry\n"); + } + continue; + } + /* + * check easy case first, regular inode, just bump + * the link count and continue + */ + if (!inode_isadir(irec, ino_offset)) { + add_inode_reached(irec, ino_offset); + continue; + } + parent = get_inode_parent(irec, ino_offset); + ASSERT(parent != 0); + /* + * bump up the link counts in parent and child + * directory but if the link doesn't agree with + * the .. in the child, blow out the entry. + * if the directory has already been reached, + * blow away the entry also. + */ + if (is_inode_reached(irec, ino_offset)) { + junkit = 1; + do_warn("entry \"%s\" in dir %llu points to an already " + "connected directory inode %llu,\n", fname, + ip->i_ino, INT_GET(dep->inumber, ARCH_CONVERT)); + } else if (parent == ip->i_ino) { + add_inode_reached(irec, ino_offset); + add_inode_ref(current_irec, current_ino_offset); + if (!is_inode_refchecked(INT_GET(dep->inumber, ARCH_CONVERT), irec, + ino_offset)) + push_dir(stack, INT_GET(dep->inumber, ARCH_CONVERT)); + } else { + junkit = 1; + do_warn("entry \"%s\" in directory inode %llu not " + "consistent with .. value (%llu) in ino " + "%llu,\n", + fname, ip->i_ino, parent, INT_GET(dep->inumber, ARCH_CONVERT)); + } + if (junkit) { + junkit = 0; + nbad++; + if (!no_modify) { + dep->name[0] = '/'; + libxfs_dir2_data_log_entry(tp, bp, dep); + if (verbose || + INT_GET(dep->inumber, ARCH_CONVERT) != old_orphanage_ino) + do_warn("\twill clear entry \"%s\"\n", + fname); + } else { + do_warn("\twould clear entry \"%s\"\n", fname); + } + } + } + *num_illegal += nbad; + if (needscan) + libxfs_dir2_data_freescan(mp, d, &needlog, NULL); + if (needlog) + libxfs_dir2_data_log_header(tp, bp); + libxfs_bmap_finish(&tp, &flist, firstblock, &committed); + libxfs_trans_commit(tp, 0, 0); + freetab->ents[db].v = INT_GET(d->hdr.bestfree[0].length, ARCH_CONVERT); + freetab->ents[db].s = 0; +} + +/* + * Check contents of leaf-form block. + */ +int +longform_dir2_check_leaf( + xfs_mount_t *mp, + xfs_inode_t *ip, + dir_hash_tab_t *hashtab, + freetab_t *freetab) +{ + int badtail; + xfs_dir2_data_off_t *bestsp; + xfs_dabuf_t *bp; + xfs_dablk_t da_bno; + int i; + xfs_dir2_leaf_t *leaf; + xfs_dir2_leaf_tail_t *ltp; + int seeval; + + da_bno = mp->m_dirleafblk; + if (libxfs_da_read_bufr(NULL, ip, da_bno, -1, &bp, XFS_DATA_FORK)) { + do_error("can't read block %u for directory inode %llu\n", + da_bno, ip->i_ino); + /* NOTREACHED */ + } + leaf = bp->data; + ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf); + bestsp = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT); + if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR2_LEAF1_MAGIC || + INT_GET(leaf->hdr.info.forw, ARCH_CONVERT) || INT_GET(leaf->hdr.info.back, ARCH_CONVERT) || + INT_GET(leaf->hdr.count, ARCH_CONVERT) < INT_GET(leaf->hdr.stale, ARCH_CONVERT) || + INT_GET(leaf->hdr.count, ARCH_CONVERT) > XFS_DIR2_MAX_LEAF_ENTS(mp) || + (char *)&leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT)] > (char *)bestsp) { + do_warn("leaf block %u for directory inode %llu bad header\n", + da_bno, ip->i_ino); + libxfs_da_brelse(NULL, bp); + return 1; + } + seeval = dir_hash_see_all(hashtab, leaf->ents, INT_GET(leaf->hdr.count, ARCH_CONVERT), + INT_GET(leaf->hdr.stale, ARCH_CONVERT)); + if (dir_hash_check(hashtab, ip, seeval)) { + libxfs_da_brelse(NULL, bp); + return 1; + } + badtail = freetab->nents != INT_GET(ltp->bestcount, ARCH_CONVERT); + for (i = 0; !badtail && i < INT_GET(ltp->bestcount, ARCH_CONVERT); i++) { + freetab->ents[i].s = 1; + badtail = freetab->ents[i].v != INT_GET(bestsp[i], ARCH_CONVERT); + } + if (badtail) { + do_warn("leaf block %u for directory inode %llu bad tail\n", + da_bno, ip->i_ino); + libxfs_da_brelse(NULL, bp); + return 1; + } + libxfs_da_brelse(NULL, bp); + return 0; +} + +/* + * Check contents of the node blocks (leaves) + * Looks for matching hash values for the data entries. + */ +int +longform_dir2_check_node( + xfs_mount_t *mp, + xfs_inode_t *ip, + dir_hash_tab_t *hashtab, + freetab_t *freetab) +{ + xfs_dabuf_t *bp; + xfs_dablk_t da_bno; + xfs_dir2_db_t fdb; + xfs_dir2_free_t *free; + int i; + xfs_dir2_leaf_t *leaf; + xfs_fileoff_t next_da_bno; + int seeval = 0; + int used; + + for (da_bno = mp->m_dirleafblk, next_da_bno = 0; + next_da_bno != NULLFILEOFF && da_bno < mp->m_dirfreeblk; + da_bno = (xfs_dablk_t)next_da_bno) { + next_da_bno = da_bno + mp->m_dirblkfsbs - 1; + if (libxfs_bmap_next_offset(NULL, ip, &next_da_bno, XFS_DATA_FORK)) + break; + if (libxfs_da_read_bufr(NULL, ip, da_bno, -1, &bp, + XFS_DATA_FORK)) { + do_error("can't read block %u for directory inode " + "%llu\n", + da_bno, ip->i_ino); + /* NOTREACHED */ + } + leaf = bp->data; + if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR2_LEAFN_MAGIC) { + if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) { + libxfs_da_brelse(NULL, bp); + continue; + } + do_warn("unknown magic number %#x for block %u in " + "directory inode %llu\n", + INT_GET(leaf->hdr.info.magic, ARCH_CONVERT), da_bno, ip->i_ino); + libxfs_da_brelse(NULL, bp); + return 1; + } + if (INT_GET(leaf->hdr.count, ARCH_CONVERT) < INT_GET(leaf->hdr.stale, ARCH_CONVERT) || + INT_GET(leaf->hdr.count, ARCH_CONVERT) > XFS_DIR2_MAX_LEAF_ENTS(mp)) { + do_warn("leaf block %u for directory inode %llu bad " + "header\n", + da_bno, ip->i_ino); + libxfs_da_brelse(NULL, bp); + return 1; + } + seeval = dir_hash_see_all(hashtab, leaf->ents, INT_GET(leaf->hdr.count, ARCH_CONVERT), + INT_GET(leaf->hdr.stale, ARCH_CONVERT)); + libxfs_da_brelse(NULL, bp); + if (seeval != DIR_HASH_CK_OK) + return 1; + } + if (dir_hash_check(hashtab, ip, seeval)) + return 1; + for (da_bno = mp->m_dirfreeblk, next_da_bno = 0; + next_da_bno != NULLFILEOFF; + da_bno = (xfs_dablk_t)next_da_bno) { + next_da_bno = da_bno + mp->m_dirblkfsbs - 1; + if (libxfs_bmap_next_offset(NULL, ip, &next_da_bno, XFS_DATA_FORK)) + break; + if (libxfs_da_read_bufr(NULL, ip, da_bno, -1, &bp, + XFS_DATA_FORK)) { + do_error("can't read block %u for directory inode " + "%llu\n", + da_bno, ip->i_ino); + /* NOTREACHED */ + } + free = bp->data; + fdb = XFS_DIR2_DA_TO_DB(mp, da_bno); + if (INT_GET(free->hdr.magic, ARCH_CONVERT) != XFS_DIR2_FREE_MAGIC || + INT_GET(free->hdr.firstdb, ARCH_CONVERT) != + (fdb - XFS_DIR2_FREE_FIRSTDB(mp)) * + XFS_DIR2_MAX_FREE_BESTS(mp) || + INT_GET(free->hdr.nvalid, ARCH_CONVERT) < INT_GET(free->hdr.nused, ARCH_CONVERT)) { + do_warn("free block %u for directory inode %llu bad " + "header\n", + da_bno, ip->i_ino); + libxfs_da_brelse(NULL, bp); + return 1; + } + for (i = used = 0; i < INT_GET(free->hdr.nvalid, ARCH_CONVERT); i++) { + if (i + INT_GET(free->hdr.firstdb, ARCH_CONVERT) >= freetab->nents || + freetab->ents[i + INT_GET(free->hdr.firstdb, ARCH_CONVERT)].v != + INT_GET(free->bests[i], ARCH_CONVERT)) { + do_warn("free block %u entry %i for directory " + "ino %llu bad\n", + da_bno, i, ip->i_ino); + libxfs_da_brelse(NULL, bp); + return 1; + } + used += INT_GET(free->bests[i], ARCH_CONVERT) != NULLDATAOFF; + freetab->ents[i + INT_GET(free->hdr.firstdb, ARCH_CONVERT)].s = 1; + } + if (used != INT_GET(free->hdr.nused, ARCH_CONVERT)) { + do_warn("free block %u for directory inode %llu bad " + "nused\n", + da_bno, ip->i_ino); + libxfs_da_brelse(NULL, bp); + return 1; + } + libxfs_da_brelse(NULL, bp); + } + for (i = 0; i < freetab->nents; i++) { + if (freetab->ents[i].s == 0) { + do_warn("missing freetab entry %u for directory inode " + "%llu\n", + i, ip->i_ino); + return 1; + } + } + return 0; +} + +/* + * Rebuild a directory: set up. + * Turn it into a node-format directory with no contents in the + * upper area. Also has correct freespace blocks. + */ +void +longform_dir2_rebuild_setup( + xfs_mount_t *mp, + xfs_ino_t ino, + xfs_inode_t *ip, + freetab_t *freetab) +{ + xfs_da_args_t args; + int committed; + xfs_dir2_data_t *data; + xfs_dabuf_t *dbp; + int error; + xfs_dir2_db_t fbno; + xfs_dabuf_t *fbp; + xfs_fsblock_t firstblock; + xfs_bmap_free_t flist; + xfs_dir2_free_t *free; + int i; + int j; + xfs_dablk_t lblkno; + xfs_dabuf_t *lbp; + xfs_dir2_leaf_t *leaf; + int nres; + xfs_trans_t *tp; + + tp = libxfs_trans_alloc(mp, 0); + nres = XFS_DAENTER_SPACE_RES(mp, XFS_DATA_FORK); + error = libxfs_trans_reserve(tp, + nres, XFS_CREATE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, + XFS_CREATE_LOG_COUNT); + if (error) + res_failed(error); + libxfs_trans_ijoin(tp, ip, 0); + libxfs_trans_ihold(tp, ip); + XFS_BMAP_INIT(&flist, &firstblock); + if (libxfs_da_read_buf(tp, ip, mp->m_dirdatablk, -2, &dbp, + XFS_DATA_FORK)) { + do_error("can't read block %u for directory inode %llu\n", + mp->m_dirdatablk, ino); + /* NOTREACHED */ + } + if (dbp && (data = dbp->data)->hdr.magic == XFS_DIR2_BLOCK_MAGIC) { + xfs_dir2_block_t *block; + xfs_dir2_leaf_entry_t *blp; + xfs_dir2_block_tail_t *btp; + int needlog; + int needscan; + + INT_SET(data->hdr.magic, ARCH_CONVERT, XFS_DIR2_DATA_MAGIC); + block = (xfs_dir2_block_t *)data; + btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); + blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT); + needlog = needscan = 0; + libxfs_dir2_data_make_free(tp, dbp, (char *)blp - (char *)block, + (char *)block + mp->m_dirblksize - (char *)blp, + &needlog, &needscan); + if (needscan) + libxfs_dir2_data_freescan(mp, data, &needlog, NULL); + libxfs_da_log_buf(tp, dbp, 0, mp->m_dirblksize - 1); + } + bzero(&args, sizeof(args)); + args.trans = tp; + args.dp = ip; + args.whichfork = XFS_DATA_FORK; + args.firstblock = &firstblock; + args.flist = &flist; + args.total = nres; + if ((error = libxfs_da_grow_inode(&args, &lblkno)) || + (error = libxfs_da_get_buf(tp, ip, lblkno, -1, &lbp, XFS_DATA_FORK))) { + do_error("can't add btree block to directory inode %llu\n", + ino); + /* NOTREACHED */ + } + leaf = lbp->data; + bzero(leaf, mp->m_dirblksize); + INT_SET(leaf->hdr.info.magic, ARCH_CONVERT, XFS_DIR2_LEAFN_MAGIC); + libxfs_da_log_buf(tp, lbp, 0, mp->m_dirblksize - 1); + libxfs_bmap_finish(&tp, &flist, firstblock, &committed); + libxfs_trans_commit(tp, 0, 0); + + for (i = 0; i < freetab->nents; i += XFS_DIR2_MAX_FREE_BESTS(mp)) { + tp = libxfs_trans_alloc(mp, 0); + nres = XFS_DAENTER_SPACE_RES(mp, XFS_DATA_FORK); + error = libxfs_trans_reserve(tp, + nres, XFS_CREATE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, + XFS_CREATE_LOG_COUNT); + if (error) + res_failed(error); + libxfs_trans_ijoin(tp, ip, 0); + libxfs_trans_ihold(tp, ip); + XFS_BMAP_INIT(&flist, &firstblock); + bzero(&args, sizeof(args)); + args.trans = tp; + args.dp = ip; + args.whichfork = XFS_DATA_FORK; + args.firstblock = &firstblock; + args.flist = &flist; + args.total = nres; + if ((error = libxfs_dir2_grow_inode(&args, XFS_DIR2_FREE_SPACE, + &fbno)) || + (error = libxfs_da_get_buf(tp, ip, XFS_DIR2_DB_TO_DA(mp, fbno), + -1, &fbp, XFS_DATA_FORK))) { + do_error("can't add free block to directory inode " + "%llu\n", + ino); + /* NOTREACHED */ + } + free = fbp->data; + bzero(free, mp->m_dirblksize); + INT_SET(free->hdr.magic, ARCH_CONVERT, XFS_DIR2_FREE_MAGIC); + INT_SET(free->hdr.firstdb, ARCH_CONVERT, i); + INT_SET(free->hdr.nvalid, ARCH_CONVERT, XFS_DIR2_MAX_FREE_BESTS(mp)); + if (i + INT_GET(free->hdr.nvalid, ARCH_CONVERT) > freetab->nents) + INT_SET(free->hdr.nvalid, ARCH_CONVERT, freetab->nents - i); + for (j = 0; j < INT_GET(free->hdr.nvalid, ARCH_CONVERT); j++) { + INT_SET(free->bests[j], ARCH_CONVERT, freetab->ents[i + j].v); + if (INT_GET(free->bests[j], ARCH_CONVERT) != NULLDATAOFF) + INT_MOD(free->hdr.nused, ARCH_CONVERT, +1); + } + libxfs_da_log_buf(tp, fbp, 0, mp->m_dirblksize - 1); + libxfs_bmap_finish(&tp, &flist, firstblock, &committed); + libxfs_trans_commit(tp, 0, 0); + } +} + +/* + * Rebuild the entries from a single data block. + */ +void +longform_dir2_rebuild_data( + xfs_mount_t *mp, + xfs_ino_t ino, + xfs_inode_t *ip, + xfs_dablk_t da_bno) +{ + xfs_dabuf_t *bp; + xfs_dir2_block_tail_t *btp; + int committed; + xfs_dir2_data_t *data; + xfs_dir2_db_t dbno; + xfs_dir2_data_entry_t *dep; + xfs_dir2_data_unused_t *dup; + char *endptr; + int error; + xfs_dir2_free_t *fblock; + xfs_dabuf_t *fbp; + xfs_dir2_db_t fdb; + int fi; + xfs_fsblock_t firstblock; + xfs_bmap_free_t flist; + int needlog; + int needscan; + int nres; + char *ptr; + xfs_trans_t *tp; + + if (libxfs_da_read_buf(NULL, ip, da_bno, da_bno == 0 ? -2 : -1, &bp, + XFS_DATA_FORK)) { + do_error("can't read block %u for directory inode %llu\n", + da_bno, ino); + /* NOTREACHED */ + } + if (da_bno == 0 && bp == NULL) + /* + * The block was punched out. + */ + return; + ASSERT(bp); + dbno = XFS_DIR2_DA_TO_DB(mp, da_bno); + fdb = XFS_DIR2_DB_TO_FDB(mp, dbno); + if (libxfs_da_read_buf(NULL, ip, XFS_DIR2_DB_TO_DA(mp, fdb), -1, &fbp, + XFS_DATA_FORK)) { + do_error("can't read block %u for directory inode %llu\n", + XFS_DIR2_DB_TO_DA(mp, fdb), ino); + /* NOTREACHED */ + } + data = malloc(mp->m_dirblksize); + if (!data) { + do_error( + "malloc failed in longform_dir2_rebuild_data (%u bytes)\n", + mp->m_dirblksize); + exit(1); + } + bcopy(bp->data, data, mp->m_dirblksize); + ptr = (char *)data->u; + if (INT_GET(data->hdr.magic, ARCH_CONVERT) == XFS_DIR2_BLOCK_MAGIC) { + btp = XFS_DIR2_BLOCK_TAIL_P(mp, (xfs_dir2_block_t *)data); + endptr = (char *)XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT); + } else + endptr = (char *)data + mp->m_dirblksize; + fblock = fbp->data; + fi = XFS_DIR2_DB_TO_FDINDEX(mp, dbno); + tp = libxfs_trans_alloc(mp, 0); + error = libxfs_trans_reserve(tp, 0, XFS_CREATE_LOG_RES(mp), 0, + XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT); + if (error) + res_failed(error); + libxfs_trans_ijoin(tp, ip, 0); + libxfs_trans_ihold(tp, ip); + libxfs_da_bjoin(tp, bp); + libxfs_da_bhold(tp, bp); + libxfs_da_bjoin(tp, fbp); + libxfs_da_bhold(tp, fbp); + XFS_BMAP_INIT(&flist, &firstblock); + needlog = needscan = 0; + bzero(((xfs_dir2_data_t *)(bp->data))->hdr.bestfree, + sizeof(data->hdr.bestfree)); + libxfs_dir2_data_make_free(tp, bp, (xfs_dir2_data_aoff_t)sizeof(data->hdr), + mp->m_dirblksize - sizeof(data->hdr), &needlog, &needscan); + ASSERT(needscan == 0); + libxfs_dir2_data_log_header(tp, bp); + INT_SET(fblock->bests[fi], ARCH_CONVERT, + INT_GET(((xfs_dir2_data_t *)(bp->data))->hdr.bestfree[0].length, ARCH_CONVERT)); + libxfs_dir2_free_log_bests(tp, fbp, fi, fi); + libxfs_bmap_finish(&tp, &flist, firstblock, &committed); + libxfs_trans_commit(tp, 0, 0); + + while (ptr < endptr) { + dup = (xfs_dir2_data_unused_t *)ptr; + if (INT_GET(dup->freetag, ARCH_CONVERT) == XFS_DIR2_DATA_FREE_TAG) { + ptr += INT_GET(dup->length, ARCH_CONVERT); + continue; + } + dep = (xfs_dir2_data_entry_t *)ptr; + ptr += XFS_DIR2_DATA_ENTSIZE(dep->namelen); + if (dep->name[0] == '/') + continue; + tp = libxfs_trans_alloc(mp, 0); + nres = XFS_CREATE_SPACE_RES(mp, dep->namelen); + error = libxfs_trans_reserve(tp, nres, XFS_CREATE_LOG_RES(mp), 0, + XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT); + if (error) + res_failed(error); + libxfs_trans_ijoin(tp, ip, 0); + libxfs_trans_ihold(tp, ip); + libxfs_da_bjoin(tp, bp); + libxfs_da_bhold(tp, bp); + libxfs_da_bjoin(tp, fbp); + libxfs_da_bhold(tp, fbp); + XFS_BMAP_INIT(&flist, &firstblock); + error = dir_createname(mp, tp, ip, (char *)dep->name, + dep->namelen, INT_GET(dep->inumber, ARCH_CONVERT), + &firstblock, &flist, nres); + ASSERT(error == 0); + libxfs_bmap_finish(&tp, &flist, firstblock, &committed); + libxfs_trans_commit(tp, 0, 0); + } + libxfs_da_brelse(NULL, bp); + libxfs_da_brelse(NULL, fbp); + free(data); +} + +/* + * Finish the rebuild of a directory. + * Stuff / in and then remove it, this forces the directory to end + * up in the right format. + */ +void +longform_dir2_rebuild_finish( + xfs_mount_t *mp, + xfs_ino_t ino, + xfs_inode_t *ip) +{ + int committed; + int error; + xfs_fsblock_t firstblock; + xfs_bmap_free_t flist; + int nres; + xfs_trans_t *tp; + + tp = libxfs_trans_alloc(mp, 0); + nres = XFS_CREATE_SPACE_RES(mp, 1); + error = libxfs_trans_reserve(tp, nres, XFS_CREATE_LOG_RES(mp), 0, + XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT); + if (error) + res_failed(error); + libxfs_trans_ijoin(tp, ip, 0); + libxfs_trans_ihold(tp, ip); + XFS_BMAP_INIT(&flist, &firstblock); + error = dir_createname(mp, tp, ip, "/", 1, ino, + &firstblock, &flist, nres); + ASSERT(error == 0); + libxfs_bmap_finish(&tp, &flist, firstblock, &committed); + libxfs_trans_commit(tp, 0, 0); + + /* could kill trailing empty data blocks here */ + + tp = libxfs_trans_alloc(mp, 0); + nres = XFS_REMOVE_SPACE_RES(mp); + error = libxfs_trans_reserve(tp, nres, XFS_REMOVE_LOG_RES(mp), 0, + XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT); + if (error) + res_failed(error); + libxfs_trans_ijoin(tp, ip, 0); + libxfs_trans_ihold(tp, ip); + XFS_BMAP_INIT(&flist, &firstblock); + error = dir_removename(mp, tp, ip, "/", 1, ino, + &firstblock, &flist, nres); + ASSERT(error == 0); + libxfs_bmap_finish(&tp, &flist, firstblock, &committed); + libxfs_trans_commit(tp, 0, 0); +} + +/* + * Rebuild a directory. + * Remove all the non-data blocks. + * Re-initialize to (empty) node form. + * Loop over the data blocks reinserting each entry. + * Force the directory into the right format. + */ +void +longform_dir2_rebuild( + xfs_mount_t *mp, + xfs_ino_t ino, + xfs_inode_t *ip, + int *num_illegal, + freetab_t *freetab, + int isblock) +{ + xfs_dabuf_t *bp; + xfs_dablk_t da_bno; + xfs_fileoff_t next_da_bno; + + do_warn("rebuilding directory inode %llu\n", ino); + for (da_bno = mp->m_dirleafblk, next_da_bno = isblock ? NULLFILEOFF : 0; + next_da_bno != NULLFILEOFF; + da_bno = (xfs_dablk_t)next_da_bno) { + next_da_bno = da_bno + mp->m_dirblkfsbs - 1; + if (libxfs_bmap_next_offset(NULL, ip, &next_da_bno, XFS_DATA_FORK)) + break; + if (libxfs_da_get_buf(NULL, ip, da_bno, -1, &bp, XFS_DATA_FORK)) { + do_error("can't get block %u for directory inode " + "%llu\n", + da_bno, ino); + /* NOTREACHED */ + } + dir2_kill_block(mp, ip, da_bno, bp); + } + longform_dir2_rebuild_setup(mp, ino, ip, freetab); + for (da_bno = mp->m_dirdatablk, next_da_bno = 0; + da_bno < mp->m_dirleafblk && next_da_bno != NULLFILEOFF; + da_bno = (xfs_dablk_t)next_da_bno) { + next_da_bno = da_bno + mp->m_dirblkfsbs - 1; + if (libxfs_bmap_next_offset(NULL, ip, &next_da_bno, XFS_DATA_FORK)) + break; + longform_dir2_rebuild_data(mp, ino, ip, da_bno); + } + longform_dir2_rebuild_finish(mp, ino, ip); + *num_illegal = 0; +} + +/* + * succeeds or dies, inode never gets dirtied since all changes + * happen in file blocks. the inode size and other core info + * is already correct, it's just the leaf entries that get altered. + * XXX above comment is wrong for v2 - need to see why it matters + */ +void +longform_dir2_entry_check(xfs_mount_t *mp, + xfs_ino_t ino, + xfs_inode_t *ip, + int *num_illegal, + int *need_dot, + dir_stack_t *stack, + ino_tree_node_t *irec, + int ino_offset) +{ + xfs_dir2_block_t *block; + xfs_dir2_leaf_entry_t *blp; + xfs_dabuf_t *bp; + xfs_dir2_block_tail_t *btp; + xfs_dablk_t da_bno; + freetab_t *freetab; + dir_hash_tab_t *hashtab; + int i; + int isblock; + int isleaf; + xfs_fileoff_t next_da_bno; + int seeval; + int fixit; + + *need_dot = 1; + freetab = malloc(FREETAB_SIZE(ip->i_d.di_size / mp->m_dirblksize)); + if (!freetab) { + do_error( + "malloc failed in longform_dir2_entry_check (%u bytes)\n", + FREETAB_SIZE(ip->i_d.di_size / mp->m_dirblksize)); + exit(1); + } + freetab->naents = ip->i_d.di_size / mp->m_dirblksize; + freetab->nents = 0; + for (i = 0; i < freetab->naents; i++) { + freetab->ents[i].v = NULLDATAOFF; + freetab->ents[i].s = 0; + } + libxfs_dir2_isblock(NULL, ip, &isblock); + libxfs_dir2_isleaf(NULL, ip, &isleaf); + hashtab = dir_hash_init(ip->i_d.di_size); + for (da_bno = 0, next_da_bno = 0; + next_da_bno != NULLFILEOFF && da_bno < mp->m_dirleafblk; + da_bno = (xfs_dablk_t)next_da_bno) { + next_da_bno = da_bno + mp->m_dirblkfsbs - 1; + if (libxfs_bmap_next_offset(NULL, ip, &next_da_bno, XFS_DATA_FORK)) + break; + if (libxfs_da_read_bufr(NULL, ip, da_bno, -1, &bp, + XFS_DATA_FORK)) { + do_error("can't read block %u for directory inode " + "%llu\n", + da_bno, ino); + /* NOTREACHED */ + } + longform_dir2_entry_check_data(mp, ip, num_illegal, need_dot, + stack, irec, ino_offset, &bp, hashtab, &freetab, da_bno, + isblock); + /* it releases the buffer unless isblock is set */ + } + fixit = (*num_illegal != 0) || dir2_is_badino(ino); + if (isblock) { + ASSERT(bp); + block = bp->data; + btp = XFS_DIR2_BLOCK_TAIL_P(mp, block); + blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT); + seeval = dir_hash_see_all(hashtab, blp, INT_GET(btp->count, ARCH_CONVERT), INT_GET(btp->stale, ARCH_CONVERT)); + if (dir_hash_check(hashtab, ip, seeval)) + fixit |= 1; + libxfs_da_brelse(NULL, bp); + } else if (isleaf) { + fixit |= longform_dir2_check_leaf(mp, ip, hashtab, freetab); + } else { + fixit |= longform_dir2_check_node(mp, ip, hashtab, freetab); + } + dir_hash_done(hashtab); + if (!no_modify && fixit) + longform_dir2_rebuild(mp, ino, ip, num_illegal, freetab, + isblock); + free(freetab); +} + +/* + * shortform directory processing routines -- entry verification and + * bad entry deletion (pruning). + */ +void +shortform_dir_entry_check(xfs_mount_t *mp, + xfs_ino_t ino, + xfs_inode_t *ip, + int *ino_dirty, + dir_stack_t *stack, + ino_tree_node_t *current_irec, + int current_ino_offset) +{ + xfs_ino_t lino; + xfs_ino_t parent; + xfs_dir_shortform_t *sf; + xfs_dir_sf_entry_t *sf_entry, *next_sfe, *tmp_sfe; + xfs_ifork_t *ifp; + ino_tree_node_t *irec; + int max_size; + int ino_offset; + int i; + int junkit; + int tmp_len; + int tmp_elen; + int bad_sfnamelen; + int namelen; + int bytes_deleted; + char fname[MAXNAMELEN + 1]; + + ifp = &ip->i_df; + sf = (xfs_dir_shortform_t *) ifp->if_u1.if_data; + *ino_dirty = 0; + bytes_deleted = 0; + + max_size = ifp->if_bytes; + ASSERT(ip->i_d.di_size <= ifp->if_bytes); + + /* + * no '.' entry in shortform dirs, just bump up ref count by 1 + * '..' was already (or will be) accounted for and checked when + * the directory is reached or will be taken care of when the + * directory is moved to orphanage. + */ + add_inode_ref(current_irec, current_ino_offset); + + /* + * now run through entries, stop at first bad entry, don't need + * to skip over '..' since that's encoded in its own field and + * no need to worry about '.' since it doesn't exist. + */ + sf_entry = next_sfe = &sf->list[0]; + if (sf == NULL) { + junkit = 1; + do_warn("shortform dir inode %llu has null data entries \n", ino); + + } + else { + for (i = 0; i < INT_GET(sf->hdr.count, ARCH_CONVERT) && max_size > + (__psint_t)next_sfe - (__psint_t)sf; + sf_entry = next_sfe, i++) { + junkit = 0; + bad_sfnamelen = 0; + tmp_sfe = NULL; + + XFS_DIR_SF_GET_DIRINO_ARCH(&sf_entry->inumber, &lino, ARCH_CONVERT); + + namelen = sf_entry->namelen; + + ASSERT(no_modify || namelen > 0); + + if (no_modify && namelen == 0) { + /* + * if we're really lucky, this is + * the last entry in which case we + * can use the dir size to set the + * namelen value. otherwise, forget + * it because we're not going to be + * able to find the next entry. + */ + bad_sfnamelen = 1; + + if (i == INT_GET(sf->hdr.count, ARCH_CONVERT) - 1) { + namelen = ip->i_d.di_size - + ((__psint_t) &sf_entry->name[0] - + (__psint_t) sf); + } else { + /* + * don't process the rest of the directory, + * break out of processing looop + */ + break; + } + } else if (no_modify && (__psint_t) sf_entry - (__psint_t) sf + + + XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry) + > ip->i_d.di_size) { + bad_sfnamelen = 1; + + if (i == INT_GET(sf->hdr.count, ARCH_CONVERT) - 1) { + namelen = ip->i_d.di_size - + ((__psint_t) &sf_entry->name[0] - + (__psint_t) sf); + } else { + /* + * don't process the rest of the directory, + * break out of processing looop + */ + break; + } + } + + bcopy(sf_entry->name, fname, sf_entry->namelen); + fname[sf_entry->namelen] = '\0'; + + ASSERT(no_modify || lino != NULLFSINO); + ASSERT(no_modify || !verify_inum(mp, lino)); + + /* + * special case the "lost+found" entry if it's pointing + * to where we think lost+found should be. if that's + * the case, that's the one we created in phase 6. + * just skip it. no need to process it and its .. + * link is already accounted for. Also skip entries + * with bogus inode numbers if we're in no modify mode. + */ + + if (lino == orphanage_ino && strcmp(fname, ORPHANAGE) == 0 + || no_modify && verify_inum(mp, lino)) { + next_sfe = (xfs_dir_sf_entry_t *) + ((__psint_t) sf_entry + + XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry)); + continue; + } + + irec = find_inode_rec(XFS_INO_TO_AGNO(mp, lino), + XFS_INO_TO_AGINO(mp, lino)); + + if (irec == NULL && no_modify) { + do_warn( +"entry \"%s\" in shortform dir %llu references non-existent ino %llu\n", + fname, ino, lino); + do_warn("would junk entry\n"); + continue; + } + + ASSERT(irec != NULL); + + ino_offset = XFS_INO_TO_AGINO(mp, lino) - irec->ino_startnum; + + /* + * if it's a free inode, blow out the entry. + * by now, any inode that we think is free + * really is free. + */ + if (is_inode_free(irec, ino_offset)) { + /* + * don't complain if this entry points to the old + * and now-free lost+found inode + */ + if (verbose || no_modify || lino != old_orphanage_ino) + do_warn( + "entry \"%s\" in shortform dir inode %llu points to free inode %llu\n", + fname, ino, lino); + + if (!no_modify) { + junkit = 1; + } else { + do_warn("would junk entry \"%s\"\n", + fname); + } + } else if (!inode_isadir(irec, ino_offset)) { + /* + * check easy case first, regular inode, just bump + * the link count and continue + */ + add_inode_reached(irec, ino_offset); + + next_sfe = (xfs_dir_sf_entry_t *) + ((__psint_t) sf_entry + + XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry)); + continue; + } else { + parent = get_inode_parent(irec, ino_offset); + + /* + * bump up the link counts in parent and child. + * directory but if the link doesn't agree with + * the .. in the child, blow out the entry + */ + if (is_inode_reached(irec, ino_offset)) { + junkit = 1; + do_warn( + "entry \"%s\" in dir %llu references already connected dir ino %llu,\n", + fname, ino, lino); + } else if (parent == ino) { + add_inode_reached(irec, ino_offset); + add_inode_ref(current_irec, current_ino_offset); + + if (!is_inode_refchecked(lino, irec, + ino_offset)) + push_dir(stack, lino); + } else { + junkit = 1; + do_warn( +"entry \"%s\" in dir %llu not consistent with .. value (%llu) in dir ino %llu,\n", + fname, ino, parent, lino); + } + } + + if (junkit) { + if (!no_modify) { + tmp_elen = XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry); + tmp_sfe = (xfs_dir_sf_entry_t *) + ((__psint_t) sf_entry + tmp_elen); + tmp_len = max_size - ((__psint_t) tmp_sfe + - (__psint_t) sf); + max_size -= tmp_elen; + bytes_deleted += tmp_elen; + + memmove(sf_entry, tmp_sfe, tmp_len); + + INT_MOD(sf->hdr.count, ARCH_CONVERT, -1); + bzero((void *) ((__psint_t) sf_entry + tmp_len), + tmp_elen); + + /* + * set the tmp value to the current + * pointer so we'll process the entry + * we just moved up + */ + tmp_sfe = sf_entry; + + /* + * WARNING: drop the index i by one + * so it matches the decremented count for + * accurate comparisons in the loop test + */ + i--; + + *ino_dirty = 1; + + if (verbose || lino != old_orphanage_ino) + do_warn( + "junking entry \"%s\" in directory inode %llu\n", + fname, lino); + } else { + do_warn("would junk entry \"%s\"\n", fname); + } + } + + /* + * go onto next entry unless we've just junked an + * entry in which the current entry pointer points + * to an unprocessed entry. have to take into entries + * with bad namelen into account in no modify mode since we + * calculate size based on next_sfe. + */ + ASSERT(no_modify || bad_sfnamelen == 0); + + next_sfe = (tmp_sfe == NULL) + ? (xfs_dir_sf_entry_t *) ((__psint_t) sf_entry + + ((!bad_sfnamelen) + ? XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry) + : sizeof(xfs_dir_sf_entry_t) - 1 + + namelen)) + : tmp_sfe; + } + } + + /* + * sync up sizes if required + */ + if (*ino_dirty) { + ASSERT(bytes_deleted > 0); + ASSERT(!no_modify); + libxfs_idata_realloc(ip, -bytes_deleted, XFS_DATA_FORK); + ip->i_d.di_size -= bytes_deleted; + } + + if (ip->i_d.di_size != ip->i_df.if_bytes) { + ASSERT(ip->i_df.if_bytes == (xfs_fsize_t) + ((__psint_t) next_sfe - (__psint_t) sf)); + ip->i_d.di_size = (xfs_fsize_t) + ((__psint_t) next_sfe - (__psint_t) sf); + do_warn( + "setting size to %lld bytes to reflect junked entries\n", + ip->i_d.di_size); + *ino_dirty = 1; + } +} + +/* ARGSUSED */ +void +prune_sf_dir_entry(xfs_mount_t *mp, xfs_ino_t ino, xfs_inode_t *ip) +{ + /* REFERENCED */ + xfs_ino_t lino; + xfs_dir_shortform_t *sf; + xfs_dir_sf_entry_t *sf_entry, *next_sfe, *tmp_sfe; + xfs_ifork_t *ifp; + int max_size; + int i; + int tmp_len; + int tmp_elen; + int bytes_deleted; + char fname[MAXNAMELEN + 1]; + + ifp = &ip->i_df; + sf = (xfs_dir_shortform_t *) ifp->if_u1.if_data; + bytes_deleted = 0; + + max_size = ifp->if_bytes; + ASSERT(ip->i_d.di_size <= ifp->if_bytes); + + /* + * now run through entries and delete every bad entry + */ + sf_entry = next_sfe = &sf->list[0]; + + for (i = 0; i < INT_GET(sf->hdr.count, ARCH_CONVERT) && max_size > + (__psint_t)next_sfe - (__psint_t)sf; + sf_entry = next_sfe, i++) { + tmp_sfe = NULL; + + XFS_DIR_SF_GET_DIRINO_ARCH(&sf_entry->inumber, &lino, ARCH_CONVERT); + + bcopy(sf_entry->name, fname, sf_entry->namelen); + fname[sf_entry->namelen] = '\0'; + + if (sf_entry->name[0] == '/') { + if (!no_modify) { + tmp_elen = XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry); + tmp_sfe = (xfs_dir_sf_entry_t *) + ((__psint_t) sf_entry + tmp_elen); + tmp_len = max_size - ((__psint_t) tmp_sfe + - (__psint_t) sf); + max_size -= tmp_elen; + bytes_deleted += tmp_elen; + + memmove(sf_entry, tmp_sfe, tmp_len); + + INT_MOD(sf->hdr.count, ARCH_CONVERT, -1); + bzero((void *) ((__psint_t) sf_entry + tmp_len), + tmp_elen); + + /* + * set the tmp value to the current + * pointer so we'll process the entry + * we just moved up + */ + tmp_sfe = sf_entry; + + /* + * WARNING: drop the index i by one + * so it matches the decremented count for + * accurate comparisons in the loop test + */ + i--; + } + } + next_sfe = (tmp_sfe == NULL) + ? (xfs_dir_sf_entry_t *) ((__psint_t) sf_entry + + XFS_DIR_SF_ENTSIZE_BYENTRY(sf_entry)) + : tmp_sfe; + } + + /* + * sync up sizes if required + */ + if (bytes_deleted > 0) { + libxfs_idata_realloc(ip, -bytes_deleted, XFS_DATA_FORK); + ip->i_d.di_size -= bytes_deleted; + } + + if (ip->i_d.di_size != ip->i_df.if_bytes) { + ASSERT(ip->i_df.if_bytes == (xfs_fsize_t) + ((__psint_t) next_sfe - (__psint_t) sf)); + ip->i_d.di_size = (xfs_fsize_t) + ((__psint_t) next_sfe - (__psint_t) sf); + do_warn( + "setting size to %lld bytes to reflect junked entries\n", + ip->i_d.di_size); + } +} + +/* + * shortform directory v2 processing routines -- entry verification and + * bad entry deletion (pruning). + */ +void +shortform_dir2_entry_check(xfs_mount_t *mp, + xfs_ino_t ino, + xfs_inode_t *ip, + int *ino_dirty, + dir_stack_t *stack, + ino_tree_node_t *current_irec, + int current_ino_offset) +{ + xfs_ino_t lino; + xfs_ino_t parent; + xfs_dir2_sf_t *sfp; + xfs_dir2_sf_entry_t *sfep, *next_sfep, *tmp_sfep; + xfs_ifork_t *ifp; + ino_tree_node_t *irec; + int max_size; + int ino_offset; + int i; + int junkit; + int tmp_len; + int tmp_elen; + int bad_sfnamelen; + int namelen; + int bytes_deleted; + char fname[MAXNAMELEN + 1]; + int i8; + + ifp = &ip->i_df; + sfp = (xfs_dir2_sf_t *) ifp->if_u1.if_data; + *ino_dirty = 0; + bytes_deleted = i8 = 0; + + max_size = ifp->if_bytes; + ASSERT(ip->i_d.di_size <= ifp->if_bytes); + + /* + * no '.' entry in shortform dirs, just bump up ref count by 1 + * '..' was already (or will be) accounted for and checked when + * the directory is reached or will be taken care of when the + * directory is moved to orphanage. + */ + add_inode_ref(current_irec, current_ino_offset); + + /* + * now run through entries, stop at first bad entry, don't need + * to skip over '..' since that's encoded in its own field and + * no need to worry about '.' since it doesn't exist. + */ + sfep = next_sfep = XFS_DIR2_SF_FIRSTENTRY(sfp); + + for (i = 0; i < INT_GET(sfp->hdr.count, ARCH_CONVERT) && max_size > + (__psint_t)next_sfep - (__psint_t)sfp; + sfep = next_sfep, i++) { + junkit = 0; + bad_sfnamelen = 0; + tmp_sfep = NULL; + + lino = XFS_DIR2_SF_GET_INUMBER_ARCH(sfp, XFS_DIR2_SF_INUMBERP(sfep), ARCH_CONVERT); + + namelen = sfep->namelen; + + ASSERT(no_modify || namelen > 0); + + if (no_modify && namelen == 0) { + /* + * if we're really lucky, this is + * the last entry in which case we + * can use the dir size to set the + * namelen value. otherwise, forget + * it because we're not going to be + * able to find the next entry. + */ + bad_sfnamelen = 1; + + if (i == INT_GET(sfp->hdr.count, ARCH_CONVERT) - 1) { + namelen = ip->i_d.di_size - + ((__psint_t) &sfep->name[0] - + (__psint_t) sfp); + } else { + /* + * don't process the rest of the directory, + * break out of processing loop + */ + break; + } + } else if (no_modify && (__psint_t) sfep - (__psint_t) sfp + + + XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp, sfep) + > ip->i_d.di_size) { + bad_sfnamelen = 1; + + if (i == INT_GET(sfp->hdr.count, ARCH_CONVERT) - 1) { + namelen = ip->i_d.di_size - + ((__psint_t) &sfep->name[0] - + (__psint_t) sfp); + } else { + /* + * don't process the rest of the directory, + * break out of processing loop + */ + break; + } + } + + bcopy(sfep->name, fname, sfep->namelen); + fname[sfep->namelen] = '\0'; + + ASSERT(no_modify || (lino != NULLFSINO && lino != 0)); + ASSERT(no_modify || !verify_inum(mp, lino)); + + /* + * special case the "lost+found" entry if it's pointing + * to where we think lost+found should be. if that's + * the case, that's the one we created in phase 6. + * just skip it. no need to process it and its .. + * link is already accounted for. Also skip entries + * with bogus inode numbers if we're in no modify mode. + */ + + if (lino == orphanage_ino && strcmp(fname, ORPHANAGE) == 0 + || no_modify && verify_inum(mp, lino)) { + next_sfep = (xfs_dir2_sf_entry_t *) + ((__psint_t) sfep + + XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp, sfep)); + continue; + } + + irec = find_inode_rec(XFS_INO_TO_AGNO(mp, lino), + XFS_INO_TO_AGINO(mp, lino)); + + if (irec == NULL && no_modify) { + do_warn("entry \"%s\" in shortform directory %llu " + "references non-existent inode %llu\n", + fname, ino, lino); + do_warn("would junk entry\n"); + continue; + } + + ASSERT(irec != NULL); + + ino_offset = XFS_INO_TO_AGINO(mp, lino) - irec->ino_startnum; + + /* + * if it's a free inode, blow out the entry. + * by now, any inode that we think is free + * really is free. + */ + if (is_inode_free(irec, ino_offset)) { + /* + * don't complain if this entry points to the old + * and now-free lost+found inode + */ + if (verbose || no_modify || lino != old_orphanage_ino) + do_warn("entry \"%s\" in shortform directory " + "inode %llu points to free inode " + "%llu\n", + fname, ino, lino); + + if (!no_modify) { + junkit = 1; + } else { + do_warn("would junk entry \"%s\"\n", + fname); + } + } else if (!inode_isadir(irec, ino_offset)) { + /* + * check easy case first, regular inode, just bump + * the link count and continue + */ + add_inode_reached(irec, ino_offset); + + next_sfep = (xfs_dir2_sf_entry_t *) + ((__psint_t) sfep + + XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp, sfep)); + continue; + } else { + parent = get_inode_parent(irec, ino_offset); + + /* + * bump up the link counts in parent and child. + * directory but if the link doesn't agree with + * the .. in the child, blow out the entry + */ + if (is_inode_reached(irec, ino_offset)) { + junkit = 1; + do_warn("entry \"%s\" in directory inode %llu " + "references already connected inode " + "%llu,\n", + fname, ino, lino); + } else if (parent == ino) { + add_inode_reached(irec, ino_offset); + add_inode_ref(current_irec, current_ino_offset); + + if (!is_inode_refchecked(lino, irec, + ino_offset)) + push_dir(stack, lino); + } else { + junkit = 1; + do_warn("entry \"%s\" in directory inode %llu " + "not consistent with .. value (%llu) " + "in inode %llu,\n", + fname, ino, parent, lino); + } + } + + if (junkit) { + if (!no_modify) { + tmp_elen = XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp, sfep); + tmp_sfep = (xfs_dir2_sf_entry_t *) + ((__psint_t) sfep + tmp_elen); + tmp_len = max_size - ((__psint_t) tmp_sfep + - (__psint_t) sfp); + max_size -= tmp_elen; + bytes_deleted += tmp_elen; + + memmove(sfep, tmp_sfep, tmp_len); + + INT_MOD(sfp->hdr.count, ARCH_CONVERT, -1); + bzero((void *) ((__psint_t) sfep + tmp_len), + tmp_elen); + + /* + * set the tmp value to the current + * pointer so we'll process the entry + * we just moved up + */ + tmp_sfep = sfep; + + /* + * WARNING: drop the index i by one + * so it matches the decremented count for + * accurate comparisons in the loop test + */ + i--; + + *ino_dirty = 1; + + if (verbose || lino != old_orphanage_ino) + do_warn("junking entry \"%s\" in " + "directory inode %llu\n", + fname, lino); + } else { + do_warn("would junk entry \"%s\"\n", fname); + } + } else if (lino > XFS_DIR2_MAX_SHORT_INUM) + i8++; + + /* + * go onto next entry unless we've just junked an + * entry in which the current entry pointer points + * to an unprocessed entry. have to take into entries + * with bad namelen into account in no modify mode since we + * calculate size based on next_sfep. + */ + ASSERT(no_modify || bad_sfnamelen == 0); + + next_sfep = (tmp_sfep == NULL) + ? (xfs_dir2_sf_entry_t *) ((__psint_t) sfep + + ((!bad_sfnamelen) + ? XFS_DIR2_SF_ENTSIZE_BYENTRY(sfp, sfep) + : XFS_DIR2_SF_ENTSIZE_BYNAME(sfp, namelen))) + : tmp_sfep; + } + + if (sfp->hdr.i8count != i8) { + if (no_modify) { + do_warn("would fix i8count in inode %llu\n", ino); + } else { + if (i8 == 0) { + tmp_sfep = next_sfep; + process_sf_dir2_fixi8(sfp, &tmp_sfep); + bytes_deleted += + (__psint_t)next_sfep - + (__psint_t)tmp_sfep; + next_sfep = tmp_sfep; + } else + sfp->hdr.i8count = i8; + *ino_dirty = 1; + do_warn("fixing i8count in inode %llu\n", ino); + } + } + + /* + * sync up sizes if required + */ + if (*ino_dirty) { + ASSERT(bytes_deleted > 0); + ASSERT(!no_modify); + libxfs_idata_realloc(ip, -bytes_deleted, XFS_DATA_FORK); + ip->i_d.di_size -= bytes_deleted; + } + + if (ip->i_d.di_size != ip->i_df.if_bytes) { + ASSERT(ip->i_df.if_bytes == (xfs_fsize_t) + ((__psint_t) next_sfep - (__psint_t) sfp)); + ip->i_d.di_size = (xfs_fsize_t) + ((__psint_t) next_sfep - (__psint_t) sfp); + do_warn("setting size to %lld bytes to reflect junked " + "entries\n", + ip->i_d.di_size); + *ino_dirty = 1; + } +} + +/* + * processes all directories reachable via the inodes on the stack + * returns 0 if things are good, 1 if there's a problem + */ +void +process_dirstack(xfs_mount_t *mp, dir_stack_t *stack) +{ + xfs_bmap_free_t flist; + xfs_fsblock_t first; + xfs_ino_t ino; + xfs_inode_t *ip; + xfs_trans_t *tp; + xfs_dahash_t hashval; + ino_tree_node_t *irec; + int ino_offset, need_dot, committed; + int dirty, num_illegal, error, nres; + + /* + * pull directory inode # off directory stack + * + * open up directory inode, check all entries, + * then call prune_dir_entries to remove all + * remaining illegal directory entries. + */ + + while ((ino = pop_dir(stack)) != NULLFSINO) { + irec = find_inode_rec(XFS_INO_TO_AGNO(mp, ino), + XFS_INO_TO_AGINO(mp, ino)); + ASSERT(irec != NULL); + + ino_offset = XFS_INO_TO_AGINO(mp, ino) - irec->ino_startnum; + + ASSERT(!is_inode_refchecked(ino, irec, ino_offset)); + + if (error = libxfs_iget(mp, NULL, ino, 0, &ip, 0)) { + if (!no_modify) + do_error("couldn't map inode %llu, err = %d\n", + ino, error); + else { + do_warn("couldn't map inode %llu, err = %d\n", + ino, error); + /* + * see below for what we're doing if this + * is root. Why do we need to do this here? + * to ensure that the root doesn't show up + * as being disconnected in the no_modify case. + */ + if (mp->m_sb.sb_rootino == ino) { + add_inode_reached(irec, 0); + add_inode_ref(irec, 0); + } + } + + add_inode_refchecked(ino, irec, 0); + continue; + } + + need_dot = dirty = num_illegal = 0; + + if (mp->m_sb.sb_rootino == ino) { + /* + * mark root inode reached and bump up + * link count for root inode to account + * for '..' entry since the root inode is + * never reached by a parent. we know + * that root's '..' is always good -- + * guaranteed by phase 3 and/or below. + */ + add_inode_reached(irec, ino_offset); + /* + * account for link for the orphanage + * "lost+found". if we're running in + * modify mode and it already existed, + * we deleted it so it's '..' reference + * never got counted. so add it here if + * we're going to create lost+found. + * + * if we're running in no_modify mode, + * we never deleted lost+found and we're + * not going to create it so do nothing. + * + * either way, the counts will match when + * we look at the root inode's nlinks + * field and compare that to our incore + * count in phase 7. + */ + if (!no_modify) + add_inode_ref(irec, ino_offset); + } + + add_inode_refchecked(ino, irec, ino_offset); + + /* + * look for bogus entries + */ + switch (ip->i_d.di_format) { + case XFS_DINODE_FMT_EXTENTS: + case XFS_DINODE_FMT_BTREE: + /* + * also check for missing '.' in longform dirs. + * missing .. entries are added if required when + * the directory is connected to lost+found. but + * we need to create '.' entries here. + */ + if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb)) + longform_dir2_entry_check(mp, ino, ip, + &num_illegal, &need_dot, + stack, irec, + ino_offset); + else + longform_dir_entry_check(mp, ino, ip, + &num_illegal, &need_dot, + stack, irec, + ino_offset); + break; + case XFS_DINODE_FMT_LOCAL: + tp = libxfs_trans_alloc(mp, 0); + /* + * using the remove reservation is overkill + * since at most we'll only need to log the + * inode but it's easier than wedging a + * new define in ourselves. + */ + nres = no_modify ? 0 : XFS_REMOVE_SPACE_RES(mp); + error = libxfs_trans_reserve(tp, nres, + XFS_REMOVE_LOG_RES(mp), 0, + XFS_TRANS_PERM_LOG_RES, + XFS_REMOVE_LOG_COUNT); + if (error) + res_failed(error); + + libxfs_trans_ijoin(tp, ip, 0); + libxfs_trans_ihold(tp, ip); + + if (XFS_SB_VERSION_HASDIRV2(&mp->m_sb)) + shortform_dir2_entry_check(mp, ino, ip, &dirty, + stack, irec, + ino_offset); + else + shortform_dir_entry_check(mp, ino, ip, &dirty, + stack, irec, + ino_offset); + + ASSERT(dirty == 0 || dirty && !no_modify); + if (dirty) { + libxfs_trans_log_inode(tp, ip, + XFS_ILOG_CORE | XFS_ILOG_DDATA); + libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES + |XFS_TRANS_SYNC, 0); + } else { + libxfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES); + } + break; + default: + break; + } + + hashval = 0; + + if (!no_modify && !orphanage_entered && + ino == mp->m_sb.sb_rootino) { + do_warn("re-entering %s into root directory\n", + ORPHANAGE); + tp = libxfs_trans_alloc(mp, 0); + nres = XFS_MKDIR_SPACE_RES(mp, strlen(ORPHANAGE)); + error = libxfs_trans_reserve(tp, nres, + XFS_MKDIR_LOG_RES(mp), 0, + XFS_TRANS_PERM_LOG_RES, + XFS_MKDIR_LOG_COUNT); + if (error) + res_failed(error); + libxfs_trans_ijoin(tp, ip, 0); + libxfs_trans_ihold(tp, ip); + XFS_BMAP_INIT(&flist, &first); + if (error = dir_createname(mp, tp, ip, ORPHANAGE, + strlen(ORPHANAGE), + orphanage_ino, &first, &flist, + nres)) + do_error("can't make %s entry in root inode " + "%llu, createname error %d\n", + ORPHANAGE, ino, error); + libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + error = libxfs_bmap_finish(&tp, &flist, first, &committed); + ASSERT(error == 0); + libxfs_trans_commit(tp, + XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_SYNC, 0); + orphanage_entered = 1; + } + + /* + * if we have to create a .. for /, do it now *before* + * we delete the bogus entries, otherwise the directory + * could transform into a shortform dir which would + * probably cause the simulation to choke. Even + * if the illegal entries get shifted around, it's ok + * because the entries are structurally intact and in + * in hash-value order so the simulation won't get confused + * if it has to move them around. + */ + if (!no_modify && need_root_dotdot && + ino == mp->m_sb.sb_rootino) { + ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_LOCAL); + + do_warn("recreating root directory .. entry\n"); + + tp = libxfs_trans_alloc(mp, 0); + ASSERT(tp != NULL); + + nres = XFS_MKDIR_SPACE_RES(mp, 2); + error = libxfs_trans_reserve(tp, nres, + XFS_MKDIR_LOG_RES(mp), + 0, + XFS_TRANS_PERM_LOG_RES, + XFS_MKDIR_LOG_COUNT); + + if (error) + res_failed(error); + + libxfs_trans_ijoin(tp, ip, 0); + libxfs_trans_ihold(tp, ip); + + XFS_BMAP_INIT(&flist, &first); + + if (error = dir_createname(mp, tp, ip, "..", 2, + ip->i_ino, &first, &flist, nres)) + do_error( +"can't make \"..\" entry in root inode %llu, createname error %d\n", + ino, error); + + libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + + error = libxfs_bmap_finish(&tp, &flist, first, + &committed); + ASSERT(error == 0); + libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES + |XFS_TRANS_SYNC, 0); + + need_root_dotdot = 0; + } else if (need_root_dotdot && ino == mp->m_sb.sb_rootino) { + do_warn("would recreate root directory .. entry\n"); + } + + /* + * delete any illegal entries -- which should only exist + * if the directory is a longform directory. bogus + * shortform directory entries were deleted in phase 4. + */ + if (!no_modify && num_illegal > 0) { + ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_LOCAL); + ASSERT(!XFS_SB_VERSION_HASDIRV2(&mp->m_sb)); + + while (num_illegal > 0 && ip->i_d.di_format != + XFS_DINODE_FMT_LOCAL) { + prune_lf_dir_entry(mp, ino, ip, &hashval); + num_illegal--; + } + + /* + * handle case where we've deleted so many + * entries that the directory has changed from + * a longform to a shortform directory. have + * to allocate a transaction since we're working + * with the incore data fork. + */ + if (num_illegal > 0) { + ASSERT(ip->i_d.di_format == + XFS_DINODE_FMT_LOCAL); + tp = libxfs_trans_alloc(mp, 0); + /* + * using the remove reservation is overkill + * since at most we'll only need to log the + * inode but it's easier than wedging a + * new define in ourselves. 10 block fs + * space reservation is also overkill but + * what the heck... + */ + nres = XFS_REMOVE_SPACE_RES(mp); + error = libxfs_trans_reserve(tp, nres, + XFS_REMOVE_LOG_RES(mp), 0, + XFS_TRANS_PERM_LOG_RES, + XFS_REMOVE_LOG_COUNT); + if (error) + res_failed(error); + + libxfs_trans_ijoin(tp, ip, 0); + libxfs_trans_ihold(tp, ip); + + prune_sf_dir_entry(mp, ino, ip); + + libxfs_trans_log_inode(tp, ip, + XFS_ILOG_CORE | XFS_ILOG_DDATA); + ASSERT(error == 0); + libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES + |XFS_TRANS_SYNC, 0); + } + } + + /* + * if we need to create the '.' entry, do so only if + * the directory is a longform dir. it it's been + * turned into a shortform dir, then the inode is ok + * since shortform dirs have no '.' entry and the inode + * has already been committed by prune_lf_dir_entry(). + */ + if (need_dot) { + /* + * bump up our link count but don't + * bump up the inode link count. chances + * are good that even though we lost '.' + * the inode link counts reflect '.' so + * leave the inode link count alone and if + * it turns out to be wrong, we'll catch + * that in phase 7. + */ + add_inode_ref(irec, ino_offset); + + if (no_modify) { + do_warn( + "would create missing \".\" entry in dir ino %llu\n", + ino); + } else if (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) { + /* + * need to create . entry in longform dir. + */ + do_warn( + "creating missing \".\" entry in dir ino %llu\n", + ino); + + tp = libxfs_trans_alloc(mp, 0); + ASSERT(tp != NULL); + + nres = XFS_MKDIR_SPACE_RES(mp, 1); + error = libxfs_trans_reserve(tp, nres, + XFS_MKDIR_LOG_RES(mp), + 0, + XFS_TRANS_PERM_LOG_RES, + XFS_MKDIR_LOG_COUNT); + + if (error) + res_failed(error); + + libxfs_trans_ijoin(tp, ip, 0); + libxfs_trans_ihold(tp, ip); + + XFS_BMAP_INIT(&flist, &first); + + if (error = dir_createname(mp, tp, ip, ".", + 1, ip->i_ino, &first, &flist, + nres)) + do_error( + "can't make \".\" entry in dir ino %llu, createname error %d\n", + ino, error); + + libxfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + + error = libxfs_bmap_finish(&tp, &flist, first, + &committed); + ASSERT(error == 0); + libxfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES + |XFS_TRANS_SYNC, 0); + } + } + + libxfs_iput(ip, 0); + } +} + +/* + * mark realtime bitmap and summary inodes as reached. + * quota inode will be marked here as well + */ +void +mark_standalone_inodes(xfs_mount_t *mp) +{ + ino_tree_node_t *irec; + int offset; + + irec = find_inode_rec(XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rbmino), + XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rbmino)); + + ASSERT(irec != NULL); + + offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rbmino) - + irec->ino_startnum; + + add_inode_reached(irec, offset); + + irec = find_inode_rec(XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rsumino), + XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rsumino)); + + offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rsumino) - + irec->ino_startnum; + + ASSERT(irec != NULL); + + add_inode_reached(irec, offset); + + if (fs_quotas) { + if (mp->m_sb.sb_uquotino + && mp->m_sb.sb_uquotino != NULLFSINO) { + irec = find_inode_rec(XFS_INO_TO_AGNO(mp, + mp->m_sb.sb_uquotino), + XFS_INO_TO_AGINO(mp, mp->m_sb.sb_uquotino)); + offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_uquotino) + - irec->ino_startnum; + add_inode_reached(irec, offset); + } + if (mp->m_sb.sb_pquotino + && mp->m_sb.sb_pquotino != NULLFSINO) { + irec = find_inode_rec(XFS_INO_TO_AGNO(mp, + mp->m_sb.sb_pquotino), + XFS_INO_TO_AGINO(mp, mp->m_sb.sb_pquotino)); + offset = XFS_INO_TO_AGINO(mp, mp->m_sb.sb_pquotino) + - irec->ino_startnum; + add_inode_reached(irec, offset); + } + } +} + +void +phase6(xfs_mount_t *mp) +{ + xfs_ino_t ino; + ino_tree_node_t *irec; + dir_stack_t stack; + int i; + int j; + + bzero(&zerocr, sizeof(cred_t)); + + do_log("Phase 6 - check inode connectivity...\n"); + + if (!no_modify) + teardown_bmap_finish(mp); + else + teardown_bmap(mp); + + incore_ext_teardown(mp); + + add_ino_backptrs(mp); + + /* + * verify existence of root directory - if we have to + * make one, it's ok for the incore data structs not to + * know about it since everything about it (and the other + * inodes in its chunk if a new chunk was created) are ok + */ + if (need_root_inode) { + if (!no_modify) { + do_warn("reinitializing root directory\n"); + mk_root_dir(mp); + need_root_inode = 0; + need_root_dotdot = 0; + } else { + do_warn("would reinitialize root directory\n"); + } + } + + if (need_rbmino) { + if (!no_modify) { + do_warn("reinitializing realtime bitmap inode\n"); + mk_rbmino(mp); + need_rbmino = 0; + } else { + do_warn("would reinitialize realtime bitmap inode\n"); + } + } + + if (need_rsumino) { + if (!no_modify) { + do_warn("reinitializing realtime summary inode\n"); + mk_rsumino(mp); + need_rsumino = 0; + } else { + do_warn("would reinitialize realtime summary inode\n"); + } + } + + if (!no_modify) { + do_log( + " - resetting contents of realtime bitmap and summary inodes\n"); + if (fill_rbmino(mp)) { + do_warn( + "Warning: realtime bitmap may be inconsistent\n"); + } + + if (fill_rsumino(mp)) { + do_warn( + "Warning: realtime bitmap may be inconsistent\n"); + } + } + + /* + * make orphanage (it's guaranteed to not exist now) + */ + if (!no_modify) { + do_log(" - ensuring existence of %s directory\n", + ORPHANAGE); + orphanage_ino = mk_orphanage(mp); + } + + dir_stack_init(&stack); + + mark_standalone_inodes(mp); + + /* + * push root dir on stack, then go + */ + if (!need_root_inode) { + do_log(" - traversing filesystem starting at / ... \n"); + + push_dir(&stack, mp->m_sb.sb_rootino); + process_dirstack(mp, &stack); + + do_log(" - traversal finished ... \n"); + } else { + ASSERT(no_modify != 0); + + do_log( +" - root inode lost, cannot make new one in no modify mode ... \n"); + do_log( +" - skipping filesystem traversal from / ... \n"); + } + + do_log(" - traversing all unattached subtrees ... \n"); + + irec = find_inode_rec(XFS_INO_TO_AGNO(mp, mp->m_sb.sb_rootino), + XFS_INO_TO_AGINO(mp, mp->m_sb.sb_rootino)); + + /* + * we always have a root inode, even if it's free... + * if the root is free, forget it, lost+found is already gone + */ + if (is_inode_free(irec, 0) || !inode_isadir(irec, 0)) { + need_root_inode = 1; + } + + /* + * then process all unreached inodes + * by walking incore inode tree + * + * get next unreached directory inode # from + * incore list + * push inode on dir stack + * call process_dirstack + */ + for (i = 0; i < glob_agcount; i++) { + irec = findfirst_inode_rec(i); + + if (irec == NULL) + continue; + + while (irec != NULL) { + for (j = 0; j < XFS_INODES_PER_CHUNK; j++) { + if (!is_inode_confirmed(irec, j)) + continue; + /* + * skip directories that have already been + * processed, even if they haven't been + * reached. If they are reachable, we'll + * pick them up when we process their parent. + */ + ino = XFS_AGINO_TO_INO(mp, i, + j + irec->ino_startnum); + if (inode_isadir(irec, j) && + !is_inode_refchecked(ino, + irec, j)) { + push_dir(&stack, ino); + process_dirstack(mp, &stack); + } + } + irec = next_ino_rec(irec); + } + } + + do_log(" - traversals finished ... \n"); + do_log(" - moving disconnected inodes to lost+found ... \n"); + + /* + * move all disconnected inodes to the orphanage + */ + for (i = 0; i < glob_agcount; i++) { + irec = findfirst_inode_rec(i); + + if (irec == NULL) + continue; + + while (irec != NULL) { + for (j = 0; j < XFS_INODES_PER_CHUNK; j++) { + ASSERT(is_inode_confirmed(irec, j)); + if (is_inode_free(irec, j)) + continue; + if (!is_inode_reached(irec, j)) { + ASSERT(inode_isadir(irec, j) || + num_inode_references(irec, j) + == 0); + ino = XFS_AGINO_TO_INO(mp, i, + j + irec->ino_startnum); + if (inode_isadir(irec, j)) + do_warn( + "disconnected dir inode %llu, ", + ino); + else + do_warn( + "disconnected inode %llu, ", + ino); + if (!no_modify) { + do_warn("moving to %s\n", + ORPHANAGE); + mv_orphanage(mp, orphanage_ino, + ino, + inode_isadir(irec, j)); + } else { + do_warn("would move to %s\n", + ORPHANAGE); + } + /* + * for read-only case, even though + * the inode isn't really reachable, + * set the flag (and bump our link + * count) anyway to fool phase 7 + */ + add_inode_reached(irec, j); + } + } + irec = next_ino_rec(irec); + } + } +} diff --git a/repair/phase7.c b/repair/phase7.c new file mode 100644 index 000000000..670afeebd --- /dev/null +++ b/repair/phase7.c @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "avl.h" +#include "globals.h" +#include "agheader.h" +#include "incore.h" +#include "protos.h" +#include "err_protos.h" +#include "dinode.h" +#include "versions.h" + +/* dinoc is a pointer to the IN-CORE dinode core */ +void +set_nlinks(xfs_dinode_core_t *dinoc, + xfs_ino_t ino, + __uint32_t nrefs, + int *dirty) +{ + if (!no_modify) { + if (INT_GET(dinoc->di_nlink, ARCH_NOCONVERT) != nrefs) { + *dirty = 1; + do_warn("resetting inode %llu nlinks from %d to %d\n", + ino, INT_GET(dinoc->di_nlink, ARCH_NOCONVERT), nrefs); + + if (nrefs > XFS_MAXLINK_1) { + ASSERT(fs_inode_nlink); + do_warn( +"nlinks %d will overflow v1 ino, ino %llu will be converted to version 2\n", + nrefs, ino); + + } + INT_SET(dinoc->di_nlink, ARCH_NOCONVERT, nrefs); + } + } else { + if (INT_GET(dinoc->di_nlink, ARCH_NOCONVERT) != nrefs) + do_warn( + "would have reset inode %llu nlinks from %d to %d\n", + ino, INT_GET(dinoc->di_nlink, ARCH_NOCONVERT), nrefs); + } +} + +void +phase7(xfs_mount_t *mp) +{ + ino_tree_node_t *irec; + xfs_inode_t *ip; + xfs_trans_t *tp; + int i; + int j; + int error; + int dirty; + xfs_ino_t ino; + __uint32_t nrefs; + + if (!no_modify) + printf("Phase 7 - verify and correct link counts...\n"); + else + printf("Phase 7 - verify link counts...\n"); + + tp = libxfs_trans_alloc(mp, XFS_TRANS_REMOVE); + + error = libxfs_trans_reserve(tp, (no_modify ? 0 : 10), + XFS_REMOVE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, + XFS_REMOVE_LOG_COUNT); + + ASSERT(error == 0); + + /* + * for each ag, look at each inode 1 at a time using the + * sim code. if the number of links is bad, reset it, + * log the inode core, commit the transaction, and + * allocate a new transaction + */ + for (i = 0; i < glob_agcount; i++) { + irec = findfirst_inode_rec(i); + + while (irec != NULL) { + for (j = 0; j < XFS_INODES_PER_CHUNK; j++) { + ASSERT(is_inode_confirmed(irec, j)); + + if (is_inode_free(irec, j)) + continue; + + ASSERT(no_modify || is_inode_reached(irec, j)); + ASSERT(no_modify || + is_inode_referenced(irec, j)); + + nrefs = num_inode_references(irec, j); + + ino = XFS_AGINO_TO_INO(mp, i, + irec->ino_startnum + j); + + error = libxfs_trans_iget(mp, tp, ino, 0, &ip); + + if (error) { + if (!no_modify) + do_error( + "couldn't map inode %llu, err = %d\n", + ino, error); + else { + do_warn( + "couldn't map inode %llu, err = %d, can't compare link counts\n", + ino, error); + continue; + } + } + + dirty = 0; + + /* + * compare and set links for all inodes + * but the lost+found inode. we keep + * that correct as we go. + */ + if (ino != orphanage_ino) + set_nlinks(&ip->i_d, ino, nrefs, + &dirty); + + if (!dirty) { + libxfs_trans_iput(tp, ip, 0); + } else { + libxfs_trans_log_inode(tp, ip, + XFS_ILOG_CORE); + /* + * no need to do a bmap finish since + * we're not allocating anything + */ + ASSERT(error == 0); + error = libxfs_trans_commit(tp, + XFS_TRANS_RELEASE_LOG_RES| + XFS_TRANS_SYNC, NULL); + + ASSERT(error == 0); + + tp = libxfs_trans_alloc(mp, + XFS_TRANS_REMOVE); + + error = libxfs_trans_reserve(tp, + (no_modify ? 0 : 10), + XFS_REMOVE_LOG_RES(mp), + 0, XFS_TRANS_PERM_LOG_RES, + XFS_REMOVE_LOG_COUNT); + ASSERT(error == 0); + } + } + irec = next_ino_rec(irec); + } + } + + /* + * always have one unfinished transaction coming out + * of the loop. cancel it. + */ + libxfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES); +} diff --git a/repair/protos.h b/repair/protos.h new file mode 100644 index 000000000..4f3f8d506 --- /dev/null +++ b/repair/protos.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +void xfs_init(libxfs_init_t *args); +void io_init(void); + +int verify_sb(xfs_sb_t *sb, + int is_primary_sb); +int verify_set_primary_sb(xfs_sb_t *root_sb, + int sb_index, + int *sb_modified); +int get_sb(xfs_sb_t *sbp, + xfs_off_t off, + int size, + xfs_agnumber_t agno); +void write_primary_sb(xfs_sb_t *sbp, + int size); + +int find_secondary_sb(xfs_sb_t *sb); + +int check_growfs(xfs_off_t off, int bufnum, xfs_agnumber_t agnum); + +void get_sb_geometry(fs_geometry_t *geo, + xfs_sb_t *sbp); + +char *alloc_ag_buf(int size); + +void print_inode_list(xfs_agnumber_t i); +char * err_string(int err_code); + diff --git a/repair/rt.c b/repair/rt.c new file mode 100644 index 000000000..ac4e8c377 --- /dev/null +++ b/repair/rt.c @@ -0,0 +1,297 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "avl.h" +#include "globals.h" +#include "agheader.h" +#include "incore.h" +#include "dinode.h" +#include "protos.h" +#include "err_protos.h" +#include "rt.h" + +#define xfs_highbit64 libxfs_highbit64 /* for XFS_RTBLOCKLOG macro */ + +void +rtinit(xfs_mount_t *mp) +{ + if (mp->m_sb.sb_rblocks == 0) + return; + + /* + * realtime init -- blockmap initialization is + * handled by incore_init() + */ + /* + sumfile = calloc(mp->m_rsumsize, 1); + */ + if ((btmcompute = calloc(mp->m_sb.sb_rbmblocks * + mp->m_sb.sb_blocksize, 1)) == NULL) + do_error( + "couldn't allocate memory for incore realtime bitmap.\n"); + + if ((sumcompute = calloc(mp->m_rsumsize, 1)) == NULL) + do_error( + "couldn't allocate memory for incore realtime summary info.\n"); +} + +/* + * generate the real-time bitmap and summary info based on the + * incore realtime extent map. + */ +int +generate_rtinfo(xfs_mount_t *mp, + xfs_rtword_t *words, + xfs_suminfo_t *sumcompute) +{ + xfs_drtbno_t extno; + xfs_drtbno_t start_ext; + int bitsperblock; + int bmbno; + xfs_rtword_t freebit; + xfs_rtword_t bits; + int start_bmbno; + int i; + int offs; + int log; + int len; + int in_extent; + + ASSERT(mp->m_rbmip == NULL); + + bitsperblock = mp->m_sb.sb_blocksize * NBBY; + extno = start_ext = 0; + bmbno = in_extent = start_bmbno = 0; + + /* + * slower but simple, don't play around with trying to set + * things one word at a time, just set bit as required. + * Have to * track start and end (size) of each range of + * free extents to set the summary info properly. + */ + while (extno < mp->m_sb.sb_rextents) { + freebit = 1; + *words = 0; + bits = 0; + for (i = 0; i < sizeof(xfs_rtword_t) * NBBY && + extno < mp->m_sb.sb_rextents; i++, extno++) { + if (get_rtbno_state(mp, extno) == XR_E_FREE) { + sb_frextents++; + bits |= freebit; + + if (in_extent == 0) { + start_ext = extno; + start_bmbno = bmbno; + in_extent = 1; + } + } else if (in_extent == 1) { + len = (int) (extno - start_ext); + log = XFS_RTBLOCKLOG(len); + offs = XFS_SUMOFFS(mp, log, start_bmbno); + sumcompute[offs]++; + in_extent = 0; + } + + freebit <<= 1; + } + *words = bits; + words++; + + if (extno % bitsperblock == 0) + bmbno++; + } + if (in_extent == 1) { + len = (int) (extno - start_ext); + log = XFS_RTBLOCKLOG(len); + offs = XFS_SUMOFFS(mp, log, start_bmbno); + sumcompute[offs]++; + } + + return(0); +} + +#if 0 +/* + * returns 1 if bad, 0 if good + */ +int +check_summary(xfs_mount_t *mp) +{ + xfs_drfsbno_t bno; + xfs_suminfo_t *csp; + xfs_suminfo_t *fsp; + int log; + int error = 0; + + error = 0; + csp = sumcompute; + fsp = sumfile; + for (log = 0; log < mp->m_rsumlevels; log++) { + for (bno = 0; + bno < mp->m_sb.sb_rbmblocks; + bno++, csp++, fsp++) { + if (*csp != *fsp) { + do_warn( + "rt summary mismatch, size %d block %llu, file: %d, computed: %d\n", + log, bno, *fsp, *csp); + error = 1; + } + } + } + + return(error); +} + +/* + * examine the real-time bitmap file and compute summary + * info off it. Should probably be changed to compute + * the summary information off the incore computed bitmap + * instead of the realtime bitmap file + */ +void +process_rtbitmap(xfs_mount_t *mp, + xfs_dinode_t *dino, + blkmap_t *blkmap) +{ + int error; + int bit; + int bitsperblock; + int bmbno; + int end_bmbno; + xfs_dfsbno_t bno; + xfs_buf_t *bp; + xfs_drtbno_t extno; + int i; + int len; + int log; + int offs; + int prevbit; + int start_bmbno; + int start_bit; + xfs_rtword_t *words; + + ASSERT(mp->m_rbmip == NULL); + + bitsperblock = mp->m_sb.sb_blocksize * NBBY; + prevbit = 0; + extno = 0; + error = 0; + + end_bmbno = howmany(INT_GET(dino->di_core.di_size, ARCH_CONVERT), mp->m_sb.sb_blocksize); + + for (bmbno = 0; bmbno < end_bmbno; bmbno++) { + bno = blkmap_get(blkmap, bmbno); + + if (bno == NULLDFSBNO) { + do_warn("can't find block %d for rtbitmap inode\n", + bmbno); + error = 1; + continue; + } + bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, bno), + XFS_FSB_TO_BB(mp, 1)); + if (!bp) { + do_warn("can't read block %d for rtbitmap inode\n", + bmbno); + error = 1; + continue; + } + words = (xfs_rtword_t *)bp->b_un.b_addr; + for (bit = 0; + bit < bitsperblock && extno < mp->m_sb.sb_rextents; + bit++, extno++) { + if (isset(words, bit)) { + set_rtbno_state(mp, extno, XR_E_FREE); + sb_frextents++; + if (prevbit == 0) { + start_bmbno = bmbno; + start_bit = bit; + prevbit = 1; + } + } else if (prevbit == 1) { + len = (bmbno - start_bmbno) * bitsperblock + + (bit - start_bit); + log = XFS_RTBLOCKLOG(len); + offs = XFS_SUMOFFS(mp, log, start_bmbno); + sumcompute[offs]++; + prevbit = 0; + } + } + libxfs_putbuf(bp); + if (extno == mp->m_sb.sb_rextents) + break; + } + if (prevbit == 1) { + len = (bmbno - start_bmbno) * bitsperblock + (bit - start_bit); + log = XFS_RTBLOCKLOG(len); + offs = XFS_SUMOFFS(mp, log, start_bmbno); + sumcompute[offs]++; + } +} + +/* + * copy the real-time summary file data into memory + */ +void +process_rtsummary(xfs_mount_t *mp, + xfs_dinode_t *dino, + blkmap_t *blkmap) +{ + xfs_fsblock_t bno; + xfs_buf_t *bp; + char *bytes; + int sumbno; + + for (sumbno = 0; sumbno < blkmap->count; sumbno++) { + bno = blkmap_get(blkmap, sumbno); + if (bno == NULLDFSBNO) { + do_warn("block %d for rtsummary inode is missing\n", + sumbno); + error++; + continue; + } + bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, bno), + XFS_FSB_TO_BB(mp, 1)); + if (!bp) { + do_warn("can't read block %d for rtsummary inode\n", + sumbno); + error++; + continue; + } + bytes = bp->b_un.b_addr; + bcopy(bytes, (char *)sumfile + sumbno * mp->m_sb.sb_blocksize, + mp->m_sb.sb_blocksize); + libxfs_putbuf(bp); + } +} +#endif diff --git a/repair/rt.h b/repair/rt.h new file mode 100644 index 000000000..d29241d2a --- /dev/null +++ b/repair/rt.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +struct blkmap; + +void +rtinit(xfs_mount_t *mp); + +int +generate_rtinfo(xfs_mount_t *mp, + xfs_rtword_t *words, + xfs_suminfo_t *sumcompute); + +#if 0 + +int +check_summary(xfs_mount_t *mp); + +void +process_rtbitmap(xfs_mount_t *mp, + xfs_dinode_t *dino, + struct blkmap *blkmap); + +void +process_rtsummary(xfs_mount_t *mp, + struct blkmap *blkmap); +#endif diff --git a/repair/sb.c b/repair/sb.c new file mode 100644 index 000000000..5133f2063 --- /dev/null +++ b/repair/sb.c @@ -0,0 +1,824 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include +#include +#include "agheader.h" +#include "globals.h" +#include "protos.h" +#include "err_protos.h" + + +/* + * copy the fields of a superblock that are present in primary and + * secondaries -- preserve fields that are different in the primary. + */ +void +copy_sb(xfs_sb_t *source, xfs_sb_t *dest) +{ + xfs_ino_t rootino; + xfs_ino_t rbmino; + xfs_ino_t rsumino; + xfs_ino_t uquotino; + xfs_ino_t pquotino; + __uint16_t versionnum; + + rootino = dest->sb_rootino; + rbmino = dest->sb_rbmino; + rsumino = dest->sb_rsumino; + uquotino = dest->sb_uquotino; + pquotino = dest->sb_pquotino; + + versionnum = dest->sb_versionnum; + + *dest = *source; + + dest->sb_rootino = rootino; + dest->sb_rbmino = rbmino; + dest->sb_rsumino = rsumino; + dest->sb_uquotino = uquotino; + dest->sb_pquotino = pquotino; + + dest->sb_versionnum = versionnum; + + /* + * copy over version bits that are stamped into all + * secondaries and cannot be changed at run time in + * the primary superblock + */ + if (XFS_SB_VERSION_HASDALIGN(source)) + XFS_SB_VERSION_ADDDALIGN(dest); + if (XFS_SB_VERSION_HASEXTFLGBIT(source)) + XFS_SB_VERSION_ADDEXTFLGBIT(dest); + + /* + * these are all supposed to be zero or will get reset anyway + */ + dest->sb_icount = 0; + dest->sb_ifree = 0; + dest->sb_fdblocks = 0; + dest->sb_frextents = 0; + + bzero(source->sb_fname, 12); +} + +#define BSIZE (1024 * 1024) + +/* + * find a secondary superblock, copy it into the sb buffer + */ +int +find_secondary_sb(xfs_sb_t *rsb) +{ + xfs_off_t off; + xfs_sb_t *sb; + xfs_sb_t bufsb; + char *c_bufsb; + int done; + int i; + int dirty; + int retval; + int bsize; + + do_warn("\nattempting to find secondary superblock...\n"); + + sb = (xfs_sb_t *) memalign(MEM_ALIGN, BSIZE); + if (!sb) { + do_error( + "error finding secondary superblock -- failed to memalign buffer\n"); + exit(1); + } + + bzero(&bufsb, sizeof(xfs_sb_t)); + retval = 0; + dirty = 0; + bsize = 0; + + /* + * skip first sector since we know that's bad + */ + for (done = 0, off = XFS_AG_MIN_BYTES; !done ; off += bsize) { + /* + * read disk 1 MByte at a time. + */ + if (lseek64(fs_fd, off, SEEK_SET) != off) { + done = 1; + } + + if (!done && (bsize = read(fs_fd, sb, BSIZE)) == 0) { + done = 1; + } + + do_warn("."); + + /* + * check the buffer 512 bytes at a time since + * we don't know how big the sectors really are. + */ + for (i = 0; !done && i < bsize; i += BBSIZE) { + c_bufsb = (char *) sb + i; + libxfs_xlate_sb(c_bufsb, &bufsb, 1, ARCH_CONVERT, + XFS_SB_ALL_BITS); + + if (verify_sb(&bufsb, 0) != XR_OK) + continue; + + do_warn("found candidate secondary superblock...\n"); + + /* + * found one. now verify it by looking + * for other secondaries. + */ + bcopy(&bufsb, rsb, bufsb.sb_sectsize); + rsb->sb_inprogress = 0; + clear_sunit = 1; + + if (verify_set_primary_sb(rsb, 0, &dirty) == XR_OK) { + do_warn("verified secondary superblock...\n"); + done = 1; + retval = 1; + } else { + do_warn( + "unable to verify superblock, continuing...\n"); + } + } + } + + free(sb); + return(retval); +} + +/* + * calculate what inode alignment field ought to be + * based on internal superblock info + */ +int +calc_ino_align(xfs_sb_t *sb) +{ + xfs_extlen_t align; + + align = XFS_INODE_BIG_CLUSTER_SIZE >> sb->sb_blocklog; + + return(align); +} + +/* + * verify a superblock -- does not verify root inode # + * can only check that geometry info is internally + * consistent. because of growfs, that's no guarantee + * of correctness (e.g. geometry may have changed) + * + * fields verified or consistency checked: + * + * sb_magicnum + * + * sb_versionnum + * + * sb_inprogress + * + * sb_blocksize (as a group) + * sb_blocklog + * + * geometry info - sb_dblocks (as a group) + * sb_agcount + * sb_agblocks + * sb_agblklog + * + * inode info - sb_inodesize (x-checked with geo info) + * sb_inopblock + * + * sector size info - + * sb_sectsize + * sb_sectlog + * + * not checked here - + * sb_rootino + * sb_fname + * sb_fpack + * sb_logstart + * sb_uuid + * + * ALL real-time fields + * final 4 summary counters + */ + +int +verify_sb(xfs_sb_t *sb, int is_primary_sb) +{ + __uint32_t bsize; + xfs_extlen_t align; + int i; + + /* check magic number and version number */ + + if (sb->sb_magicnum != XFS_SB_MAGIC) + return(XR_BAD_MAGIC); + + if (!XFS_SB_GOOD_VERSION(sb)) + return(XR_BAD_VERSION); + + /* does sb think mkfs really finished ? */ + + if (is_primary_sb && sb->sb_inprogress == 1) + return(XR_BAD_INPROGRESS); + + /* check to make sure blocksize is legal 2^N, 9 <= N <= 16 */ + + if (sb->sb_blocksize == 0) + return(XR_BAD_BLOCKSIZE); + + bsize = 1; + + for (i = 0; bsize < sb->sb_blocksize && i < 32; i++) { + bsize <<= 1; + } + + if (i < XR_LOG2BSIZE_MIN || i > XR_LOG2BSIZE_MAX) + return(XR_BAD_BLOCKSIZE); + + /* check sb blocksize field against sb blocklog field */ + + if (i != sb->sb_blocklog) + return(XR_BAD_BLOCKLOG); + + /* sanity check ag count, size fields against data size field */ + + if (sb->sb_dblocks == 0 || + sb->sb_dblocks > sb->sb_agcount * sb->sb_agblocks || + sb->sb_dblocks < (sb->sb_agcount - 1) + * sb->sb_agblocks + XFS_MIN_AG_BLOCKS) + return(XR_BAD_FS_SIZE_DATA); + + if (sb->sb_agblklog != (__uint8_t)libxfs_log2_roundup(sb->sb_agblocks)) + return(XR_BAD_FS_SIZE_DATA); + + if (sb->sb_inodesize < XFS_DINODE_MIN_SIZE || + sb->sb_inodesize > XFS_DINODE_MAX_SIZE || + sb->sb_inopblock != howmany(sb->sb_blocksize,sb->sb_inodesize)) + return(XR_BAD_INO_SIZE_DATA); + + /* check sector size against log(sector size) field */ + + bsize = 1; + + for (i = 0; bsize < sb->sb_sectsize && i < 15; i++) { + bsize <<= 1; + } + + if (sb->sb_sectsize == 0 || i == 16 || + sb->sb_sectsize != (1 << i)) + return(XR_BAD_SECT_SIZE_DATA); + + /* + * real-time extent size is always set + */ + if (sb->sb_rextsize * sb->sb_blocksize > XFS_MAX_RTEXTSIZE) + return(XR_BAD_RT_GEO_DATA); + + if (sb->sb_rextsize * sb->sb_blocksize < XFS_MIN_RTEXTSIZE) + return(XR_BAD_RT_GEO_DATA); + + if (sb->sb_rblocks == 0) { + if (sb->sb_rextents != 0) + return(XR_BAD_RT_GEO_DATA); + + if (sb->sb_rbmblocks != 0) + return(XR_BAD_RT_GEO_DATA); + + if (sb->sb_rextslog != 0) + return(XR_BAD_RT_GEO_DATA); + + if (sb->sb_frextents != 0) + return(XR_BAD_RT_GEO_DATA); + } else { + /* + * if we have a real-time partition, sanity-check geometry + */ + if (sb->sb_rblocks / sb->sb_rextsize != sb->sb_rextents) + return(XR_BAD_RT_GEO_DATA); + + if (sb->sb_rextslog != + libxfs_highbit32((unsigned int)sb->sb_rextents)) + return(XR_BAD_RT_GEO_DATA); + + if (sb->sb_rbmblocks != (xfs_extlen_t) howmany(sb->sb_rextents, + NBBY * sb->sb_blocksize)) + return(XR_BAD_RT_GEO_DATA); + } + + /* + * verify correctness of inode alignment if it's there + */ + if (XFS_SB_VERSION_HASALIGN(sb)) { + align = calc_ino_align(sb); + + if (align != sb->sb_inoalignmt) + return(XR_BAD_INO_ALIGN); + } + + /* + * verify max. % of inodes (sb_imax_pct) + */ + if (sb->sb_imax_pct > 100) + return(XR_BAD_INO_MAX_PCT); + + /* + * verify stripe alignment fields if present + */ + if (XFS_SB_VERSION_HASDALIGN(sb)) { + if ((!sb->sb_unit && sb->sb_width) || + (sb->sb_unit && sb->sb_agblocks % sb->sb_unit)) + return(XR_BAD_SB_UNIT); + if ((sb->sb_unit && !sb->sb_width) || + (sb->sb_width && sb->sb_unit && sb->sb_width % sb->sb_unit)) + return(XR_BAD_SB_WIDTH); + } + + /* + * if shared bit is set, verify that the version number is sane + */ + if (XFS_SB_VERSION_HASSHARED(sb)) { + if (sb->sb_shared_vn > XFS_SB_MAX_SHARED_VN) + return(XR_BAD_SVN); + } + + /* + * mkfs's that stamped a feature bit besides the ones in the + * mask below could leave garbage in the secondary superblock + * sectors. Anything stamping the shared fs bit or better into + * the secondaries is ok and should generate clean secondary + * superblock sectors. + * + * check primary and clean secondary superblocks more strictly + */ + if (is_primary_sb || sb->sb_versionnum & XR_PART_SECSB_VNMASK) { + /* + * return errors if shared vn or alignment fields + * are set without their feature bits being set + */ + if (!pre_65_beta && sb->sb_versionnum & XR_PART_SECSB_VNMASK || + pre_65_beta && sb->sb_versionnum & XR_ALPHA_SECSB_VNMASK) { + /* + * shared version # and inode alignment fields + * should be valid + */ + if (sb->sb_shared_vn && !XFS_SB_VERSION_HASSHARED(sb)) + return(XR_BAD_SVN); + if (sb->sb_inoalignmt && !XFS_SB_VERSION_HASALIGN(sb)) + return(XR_BAD_INO_ALIGN); + } + if ((!pre_65_beta && + (sb->sb_versionnum & XR_GOOD_SECSB_VNMASK)) || + (pre_65_beta && + (sb->sb_versionnum & XFS_SB_VERSION_DALIGNBIT))) { + /* + * stripe alignment values should be valid + */ + if (sb->sb_unit && !XFS_SB_VERSION_HASDALIGN(sb)) + return(XR_BAD_SB_UNIT); + if (sb->sb_width && !XFS_SB_VERSION_HASDALIGN(sb)) + return(XR_BAD_SB_WIDTH); + } + +#if 0 + /* + * checks involving later superblock fields get added here... + */ + if (sb->sb_versionnum & XR_GOOD_SECSB_VNMASK) { + } +#endif + } + + return(XR_OK); +} + +void +write_primary_sb(xfs_sb_t *sbp, int size) +{ + void *buf; + + if (no_modify) + return; + + if ((buf = calloc(size, 1)) == NULL) { + do_error("failed to malloc superblock buffer\n"); + return; + } + + if (lseek64(fs_fd, 0LL, SEEK_SET) != 0LL) { + free(buf); + do_error("couldn't seek to offset 0 in filesystem\n"); + } + + libxfs_xlate_sb(buf, sbp, -1, ARCH_CONVERT, XFS_SB_ALL_BITS); + + if (write(fs_fd, buf, size) != size) { + free(buf); + do_error("primary superblock write failed!\n"); + } + + free(buf); +} + +/* + * get a possible superblock -- don't check for internal consistency + */ +int +get_sb(xfs_sb_t *sbp, xfs_off_t off, int size, xfs_agnumber_t agno) +{ + int error, rval; + void *buf; + + if ((buf = calloc(size, 1)) == NULL) { + do_error( + "error reading superblock %u -- failed to malloc buffer\n", + agno, off); + exit(1); + } + + /* try and read it first */ + + if (lseek64(fs_fd, off, SEEK_SET) != off) { + do_warn( + "error reading superblock %u -- seek to offset %lld failed\n", + agno, off); + return(XR_EOF); + } + + if ((rval = read(fs_fd, buf, size)) != size) { + error = errno; + do_warn( +"superblock read failed, offset %lld, size %d, ag %u, rval %d\n", + off, size, rval, agno); + do_error("%s\n", strerror(error)); + } + libxfs_xlate_sb(buf, sbp, 1, ARCH_CONVERT, XFS_SB_ALL_BITS); + free(buf); + + return (verify_sb(sbp, 0)); +} + +#if 0 +int +check_growfs(xfs_off_t off, int bufnum, xfs_agnumber_t agnum) +{ + int rval; + + ASSERT(bufnum < NUM_SBS); + + /* try and read it first */ + + if (lseek64(fs_fd, off, SEEK_SET) != off) + return(XR_EOF); + + if ((rval = read(fs_fd, sb_bufs[bufnum], sbbuf_size)) != sbbuf_size) { + /* + * we didn't get a full block so the filesystem + * could not have been grown. return a non-XR_OK + * result code. + */ + return(XR_EOF); + } + + return(get_sb(off, bufnum, agnum)); +} +#endif +/* returns element on list with highest reference count */ + +fs_geo_list_t * +get_best_geo(fs_geo_list_t *list) +{ + int cnt = 0; + fs_geo_list_t *current, *rval = NULL; + + current = list; + + while (current != NULL) { + if (current->refs > cnt) { + rval = current; + cnt = current->refs; + } + current = current->next; + } + + return(rval); +} + +/* adds geometry info to linked list. returns (sometimes new) head of list */ + +fs_geo_list_t * +add_geo(fs_geo_list_t *list, fs_geometry_t *geo_p, int index) +{ + fs_geo_list_t *current = list; + + while (current != NULL) { + if (memcmp(geo_p, ¤t->geo, sizeof(fs_geometry_t)) == 0) { + current->refs++; + return(list); + } + + current = current->next; + } + + if ((current = malloc(sizeof(fs_geo_list_t))) == NULL) { + do_error("couldn't malloc geometry structure\n"); + exit(1); + } + + current->geo = *geo_p; + current->refs = 1; + current->next = list; + current->index = index; + + return(current); +} + +void +free_geo(fs_geo_list_t *list) +{ + fs_geo_list_t *next; + fs_geo_list_t *current; + + current = list; + + for (current = list; current != NULL; current = next) { + next = current->next; + free(current); + } +} + +void +get_sb_geometry(fs_geometry_t *geo, xfs_sb_t *sbp) +{ + bzero(geo, sizeof(fs_geometry_t)); + + /* + * blindly set fields that we know are always good + */ + geo->sb_blocksize = sbp->sb_blocksize; + geo->sb_dblocks = sbp->sb_dblocks; + geo->sb_rblocks = sbp->sb_rblocks; + geo->sb_rextents = sbp->sb_rextents; + geo->sb_logstart = sbp->sb_logstart; + geo->sb_rextsize = sbp->sb_rextsize; + geo->sb_agblocks = sbp->sb_agblocks; + geo->sb_agcount = sbp->sb_agcount; + geo->sb_rbmblocks = sbp->sb_rbmblocks; + geo->sb_logblocks = sbp->sb_logblocks; + geo->sb_sectsize = sbp->sb_sectsize; + geo->sb_inodesize = sbp->sb_inodesize; + + if (XFS_SB_VERSION_HASALIGN(sbp)) + geo->sb_ialignbit = 1; + + if (XFS_SB_VERSION_HASSHARED(sbp) || + sbp->sb_versionnum & XR_PART_SECSB_VNMASK) + geo->sb_sharedbit = 1; + + if (XFS_SB_VERSION_HASDALIGN(sbp)) + geo->sb_salignbit = 1; + + if (XFS_SB_VERSION_HASEXTFLGBIT(sbp)) + geo->sb_extflgbit = 1; + + /* + * protect against pre-6.5 mkfs-generated garbaged + * fields in the secondary superblocks. pay attention + * to those fields if and only if their corresponding + * feature bits are set in the feature bits of the + * version number or we can deduce from the version bits + * that are set that our field was properly initialized + * because a field after the field we care about was + * properly initialized as well. + */ + + /* + * inode alignment field lives before the data alignment field + */ + if (!pre_65_beta && sbp->sb_versionnum & XR_PART_SECSB_VNMASK || + pre_65_beta && sbp->sb_versionnum & XR_ALPHA_SECSB_VNMASK) + geo->sb_inoalignmt = sbp->sb_inoalignmt; + + if (!pre_65_beta && sbp->sb_versionnum & XR_GOOD_SECSB_VNMASK || + pre_65_beta && XFS_SB_VERSION_HASDALIGN(sbp)) { + geo->sb_unit = sbp->sb_unit; + geo->sb_width = sbp->sb_width; + } + + /* + * shared vn always set if either ino or data alignment is on + * since that field lives between the quota and inode alignment + * fields + */ + if (sbp->sb_versionnum & XR_PART_SECSB_VNMASK) + geo->sb_shared_vn = sbp->sb_shared_vn; + + /* + * superblock fields located after sb_widthfields get set + * into the geometry structure only if we can determine + * from the features enabled in this superblock whether + * or not the sector was bzero'd at mkfs time. + */ + if (!pre_65_beta && sbp->sb_versionnum & XR_GOOD_SECSB_VNMASK || + pre_65_beta && sbp->sb_versionnum & XR_ALPHA_SECSB_VNMASK) { + geo->sb_fully_zeroed = 1; + } +} + +/* + * the way to verify that a primary sb is consistent with the + * filesystem is find the secondaries given the info in the + * primary and compare the geometries in the secondaries against + * the geometry indicated by the primary. + * + * returns 1 if bad, 0 if ok + */ +int +verify_set_primary_sb(xfs_sb_t *rsb, + int sb_index, + int *sb_modified) +{ + xfs_off_t off; + fs_geometry_t geo; + xfs_sb_t *sb; + fs_geo_list_t *list; + fs_geo_list_t *current; + char *checked; + xfs_agnumber_t agno; + int num_sbs; + int skip; + int size; + int num_ok; + int retval; + int round; + + /* + * select the number of secondaries to try for + */ + num_sbs = MIN(NUM_SBS, rsb->sb_agcount); + skip = howmany(num_sbs, rsb->sb_agcount); + size = NUM_AGH_SECTS * rsb->sb_sectsize; + retval = 0; + list = NULL; + num_ok = 0; + *sb_modified = 0; + + sb = (xfs_sb_t *) alloc_ag_buf(size); + checked = calloc(rsb->sb_agcount, sizeof(char)); + if (!checked) { + do_error("calloc failed in verify_set_primary_sb\n"); + exit(1); + } + + /* + * put the primary sb geometry info onto the geometry list + */ + checked[sb_index] = 1; + get_sb_geometry(&geo, rsb); + list = add_geo(list, &geo, sb_index); + + /* + * grab N secondaries. check them off as we get them + * so we only process each one once + */ + for (round = 0; round < skip; round++) { + for (agno = round; agno < rsb->sb_agcount; agno += skip) { + if (checked[agno]) + continue; + + off = (xfs_off_t)agno * rsb->sb_agblocks << rsb->sb_blocklog; + + checked[agno] = 1; + + if (get_sb(sb, off, size, agno) == XR_EOF) { + retval = 1; + goto out; + } + + if (verify_sb(sb, 0) == XR_OK) { + /* + * save away geometry info. + * don't bother checking the sb + * against the agi/agf as the odds + * of the sb being corrupted in a way + * that it is internally consistent + * but not consistent with the rest + * of the filesystem is really really low. + */ + get_sb_geometry(&geo, sb); + list = add_geo(list, &geo, agno); + num_ok++; + } + } + } + + /* + * see if we have enough superblocks to bother with + */ + if (num_ok < num_sbs / 2) + return(XR_INSUFF_SEC_SB); + + current = get_best_geo(list); + + /* + * check that enough sbs agree that we're willing to + * go with this geometry. if not, print out the + * geometry and a message about the force option. + */ + switch (num_sbs) { + case 2: + /* + * all them have to be right. if not, report geometry + * and get out unless force option is in effect (-F) + */ + if (current->refs != 2) { + if (!force_geo) { + do_warn("Only two AGs detected and they do not match - cannot proceed.\n"); + exit(1); + } + } + break; + case 1: + /* + * just report the geometry info and get out. + * refuse to run further unless the force (-F) + * option is in effect. + */ + if (!force_geo) { + do_warn("Only one AG detected - cannot proceed.\n"); + exit(1); + } + default: + /* + * at least half of the probed superblocks have + * to agree. if they don't, this fs is probably + * too far gone anyway considering the fact that + * XFS normally doesn't alter the secondary superblocks. + */ + if (current->refs < num_sbs / 2) { + do_warn("Not enough matching superblocks - cannot proceed.\n"); + exit(1); + } + } + + /* + * set the geometry into primary superblock if necessary. + */ + + if (current->index != sb_index) { + *sb_modified = 1; + off = current->index * current->geo.sb_agblocks + * current->geo.sb_blocksize; + if (get_sb(sb, off, current->geo.sb_sectsize, + current->index) != XR_OK) + do_error("could not read superblock\n"); + + copy_sb(sb, rsb); + + /* + * turn off inprogress bit since this is the primary. + * also save away values that we need to ensure are + * consistent in the other secondaries. + */ + rsb->sb_inprogress = 0; + sb_inoalignmt = sb->sb_inoalignmt; + sb_unit = sb->sb_unit; + sb_width = sb->sb_width; + } + + free_geo(list); +out: + free(sb); + free(checked); + return(retval); +} diff --git a/repair/scan.c b/repair/scan.c new file mode 100644 index 000000000..e6228a230 --- /dev/null +++ b/repair/scan.c @@ -0,0 +1,1279 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "avl.h" +#include "globals.h" +#include "agheader.h" +#include "incore.h" +#include "protos.h" +#include "err_protos.h" +#include "dinode.h" +#include "scan.h" +#include "versions.h" +#include "bmap.h" + +extern int verify_set_agheader(xfs_mount_t *mp, xfs_buf_t *sbuf, xfs_sb_t *sb, + xfs_agf_t *agf, xfs_agi_t *agi, xfs_agnumber_t i); + +static xfs_mount_t *mp = NULL; +static xfs_extlen_t bno_agffreeblks; +static xfs_extlen_t cnt_agffreeblks; +static xfs_extlen_t bno_agflongest; +static xfs_extlen_t cnt_agflongest; +static xfs_agino_t agicount; +static xfs_agino_t agifreecount; + +void +set_mp(xfs_mount_t *mpp) +{ + mp = mpp; +} + +void +scan_sbtree( + xfs_agblock_t root, + int nlevels, + xfs_agnumber_t agno, + int suspect, + void (*func)(xfs_btree_sblock_t *block, + int level, + xfs_agblock_t bno, + xfs_agnumber_t agno, + int suspect, + int isroot), + int isroot) +{ + xfs_buf_t *bp; + + bp = libxfs_readbuf(mp->m_dev, XFS_AGB_TO_DADDR(mp, agno, root), + XFS_FSB_TO_BB(mp, 1), 0); + if (!bp) { + do_error("can't read btree block %d/%d\n", agno, root); + return; + } + (*func)((xfs_btree_sblock_t *)XFS_BUF_PTR(bp), + nlevels - 1, root, agno, suspect, isroot); + libxfs_putbuf(bp); +} + +/* + * returns 1 on bad news (inode needs to be cleared), 0 on good + */ +int +scan_lbtree( + xfs_dfsbno_t root, + int nlevels, + int (*func)(xfs_btree_lblock_t *block, + int level, + int type, + int whichfork, + xfs_dfsbno_t bno, + xfs_ino_t ino, + xfs_drfsbno_t *tot, + __uint64_t *nex, + blkmap_t **blkmapp, + bmap_cursor_t *bm_cursor, + int isroot, + int check_dups, + int *dirty), + int type, + int whichfork, + xfs_ino_t ino, + xfs_drfsbno_t *tot, + __uint64_t *nex, + blkmap_t **blkmapp, + bmap_cursor_t *bm_cursor, + int isroot, + int check_dups) +{ + xfs_buf_t *bp; + int err; + int dirty = 0; + + bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, root), + XFS_FSB_TO_BB(mp, 1), 0); + if (!bp) { + do_error("can't read btree block %d/%d\n", + XFS_FSB_TO_AGNO(mp, root), + XFS_FSB_TO_AGBNO(mp, root)); + return(1); + } + err = (*func)((xfs_btree_lblock_t *)XFS_BUF_PTR(bp), nlevels - 1, + type, whichfork, root, ino, tot, nex, blkmapp, + bm_cursor, isroot, check_dups, &dirty); + + ASSERT(dirty == 0 || dirty && !no_modify); + + if (dirty && !no_modify) + libxfs_writebuf(bp, 0); + else + libxfs_putbuf(bp); + + return(err); +} + +int +scanfunc_bmap( + xfs_btree_lblock_t *ablock, + int level, + int type, + int whichfork, + xfs_dfsbno_t bno, + xfs_ino_t ino, + xfs_drfsbno_t *tot, + __uint64_t *nex, + blkmap_t **blkmapp, + bmap_cursor_t *bm_cursor, + int isroot, + int check_dups, + int *dirty) +{ + xfs_bmbt_block_t *block = (xfs_bmbt_block_t *)ablock; + int i; + int err; + xfs_bmbt_ptr_t *pp; + xfs_bmbt_key_t *pkey; + xfs_bmbt_rec_32_t *rp; + xfs_dfiloff_t first_key; + xfs_dfiloff_t last_key; + char *forkname; + + if (whichfork == XFS_DATA_FORK) + forkname = "data"; + else + forkname = "attr"; + + /* + * unlike the ag freeblock btrees, if anything looks wrong + * in an inode bmap tree, just bail. it's possible that + * we'll miss a case where the to-be-toasted inode and + * another inode are claiming the same block but that's + * highly unlikely. + */ + if (INT_GET(block->bb_magic, ARCH_CONVERT) != XFS_BMAP_MAGIC) { + do_warn( + "bad magic # %#x in inode %llu (%s fork) bmbt block %llu\n", + INT_GET(block->bb_magic, ARCH_CONVERT), ino, forkname, bno); + return(1); + } + if (INT_GET(block->bb_level, ARCH_CONVERT) != level) { + do_warn( + "expected level %d got %d in inode %llu, (%s fork) bmbt block %llu\n", + level, INT_GET(block->bb_level, ARCH_CONVERT), ino, forkname, bno); + return(1); + } + + if (check_dups == 0) { + /* + * check sibling pointers. if bad we have a conflict + * between the sibling pointers and the child pointers + * in the parent block. blow out the inode if that happens + */ + if (bm_cursor->level[level].fsbno != NULLDFSBNO) { + /* + * this is not the first block on this level + * so the cursor for this level has recorded the + * values for this's block left-sibling. + */ + if (bno != bm_cursor->level[level].right_fsbno) { + do_warn( + "bad fwd (right) sibling pointer (saw %llu parent block says %llu)\n", + bm_cursor->level[level].right_fsbno, + bno); + do_warn( + "\tin inode %llu (%s fork) bmap btree block %llu\n", + ino, forkname, + bm_cursor->level[level].fsbno); + return(1); + } + if (INT_GET(block->bb_leftsib, ARCH_CONVERT) != + bm_cursor->level[level].fsbno) { + do_warn( + "bad back (left) sibling pointer (saw %llu parent block says %llu)\n", + INT_GET(block->bb_leftsib, ARCH_CONVERT), + bm_cursor->level[level].fsbno); + do_warn( + "\tin inode %llu (%s fork) bmap btree block %llu\n", + ino, forkname, bno); + return(1); + } + } else { + /* + * This is the first or only block on this level. + * Check that the left sibling pointer is NULL + */ + if (INT_GET(block->bb_leftsib, ARCH_CONVERT) != + NULLDFSBNO) { + do_warn( + "bad back (left) sibling pointer (saw %llu should be NULL (0))\n", + INT_GET(block->bb_leftsib, ARCH_CONVERT)); + do_warn( + "\tin inode %llu (%s fork) bmap btree block %llu\n", + ino, forkname, bno); + return(1); + } + } + + /* + * update cursor block pointers to reflect this block + */ + bm_cursor->level[level].fsbno = bno; + bm_cursor->level[level].left_fsbno = INT_GET(block->bb_leftsib, ARCH_CONVERT); + bm_cursor->level[level].right_fsbno = INT_GET(block->bb_rightsib, ARCH_CONVERT); + + switch (get_fsbno_state(mp, bno)) { + case XR_E_UNKNOWN: + case XR_E_FREE1: + case XR_E_FREE: + set_fsbno_state(mp, bno, XR_E_INUSE); + break; + case XR_E_FS_MAP: + case XR_E_INUSE: + /* + * we'll try and continue searching here since + * the block looks like it's been claimed by file + * to store user data, a directory to store directory + * data, or the space allocation btrees but since + * we made it here, the block probably + * contains btree data. + */ + set_fsbno_state(mp, bno, XR_E_MULT); + do_warn( + "inode 0x%llx bmap block 0x%llx claimed, state is %d\n", + ino, (__uint64_t) bno, + get_fsbno_state(mp, bno)); + break; + case XR_E_MULT: + case XR_E_INUSE_FS: + set_fsbno_state(mp, bno, XR_E_MULT); + do_warn( + "inode 0x%llx bmap block 0x%llx claimed, state is %d\n", + ino, (__uint64_t) bno, + get_fsbno_state(mp, bno)); + /* + * if we made it to here, this is probably a bmap block + * that is being used by *another* file as a bmap block + * so the block will be valid. Both files should be + * trashed along with any other file that impinges on + * any blocks referenced by either file. So we + * continue searching down this btree to mark all + * blocks duplicate + */ + break; + case XR_E_BAD_STATE: + default: + do_warn( + "bad state %d, inode 0x%llx bmap block 0x%llx\n", + get_fsbno_state(mp, bno), + ino, (__uint64_t) bno); + break; + } + } else { + /* + * attribute fork for realtime files is in the regular + * filesystem + */ + if (type != XR_INO_RTDATA || whichfork != XFS_DATA_FORK) { + if (search_dup_extent(mp, XFS_FSB_TO_AGNO(mp, bno), + XFS_FSB_TO_AGBNO(mp, bno))) + return(1); + } else { + if (search_rt_dup_extent(mp, bno)) + return(1); + } + } + (*tot)++; + if (level == 0) { + if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_bmap_dmxr[0] || + isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_bmap_dmnr[0]) { +do_warn("inode 0x%llx bad # of bmap records (%u, min - %u, max - %u)\n", + ino, INT_GET(block->bb_numrecs, ARCH_CONVERT), + mp->m_bmap_dmnr[0], mp->m_bmap_dmxr[0]); + return(1); + } + rp = (xfs_bmbt_rec_32_t *) + XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, + block, 1, mp->m_bmap_dmxr[0]); + *nex += INT_GET(block->bb_numrecs, ARCH_CONVERT); + /* + * XXX - if we were going to fix up the btree record, + * we'd do it right here. For now, if there's a problem, + * we'll bail out and presumably clear the inode. + */ + if (check_dups == 0) { + err = process_bmbt_reclist(mp, rp, INT_GET(block->bb_numrecs, ARCH_CONVERT), + type, ino, tot, blkmapp, + &first_key, &last_key, + whichfork); + if (err) + return(1); + /* + * check that key ordering is monotonically increasing. + * if the last_key value in the cursor is set to + * NULLDFILOFF, then we know this is the first block + * on the leaf level and we shouldn't check the + * last_key value. + */ + if (first_key <= bm_cursor->level[level].last_key && + bm_cursor->level[level].last_key != + NULLDFILOFF) { + do_warn( +"out-of-order bmap key (file offset) in inode %llu, %s fork, fsbno %llu\n", + ino, forkname, bno); + return(1); + } + /* + * update cursor keys to reflect this block. + * don't have to check if last_key is > first_key + * since that gets checked by process_bmbt_reclist. + */ + bm_cursor->level[level].first_key = first_key; + bm_cursor->level[level].last_key = last_key; + + return(0); + } else + return(scan_bmbt_reclist(mp, rp, INT_GET(block->bb_numrecs, ARCH_CONVERT), + type, ino, tot, whichfork)); + } + if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_bmap_dmxr[1] || + isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_bmap_dmnr[1]) { +do_warn("inode 0x%llx bad # of bmap records (%u, min - %u, max - %u)\n", + ino, INT_GET(block->bb_numrecs, ARCH_CONVERT), + mp->m_bmap_dmnr[1], mp->m_bmap_dmxr[1]); + return(1); + } + pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, 1, + mp->m_bmap_dmxr[1]); + pkey = XFS_BTREE_KEY_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block, 1, + mp->m_bmap_dmxr[1]); + + last_key = NULLDFILOFF; + + for (i = 0, err = 0; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++) { + /* + * XXX - if we were going to fix up the interior btree nodes, + * we'd do it right here. For now, if there's a problem, + * we'll bail out and presumably clear the inode. + */ + if (!verify_dfsbno(mp, INT_GET(pp[i], ARCH_CONVERT))) { + do_warn("bad bmap btree ptr 0x%llx in ino %llu\n", + INT_GET(pp[i], ARCH_CONVERT), ino); + return(1); + } + + err = scan_lbtree(INT_GET(pp[i], ARCH_CONVERT), level, scanfunc_bmap, type, whichfork, + ino, tot, nex, blkmapp, bm_cursor, 0, + check_dups); + if (err) + return(1); + + /* + * fix key (offset) mismatches between the first key + * in the child block (as recorded in the cursor) and the + * key in the interior node referencing the child block. + * + * fixes cases where entries have been shifted between + * child blocks but the parent hasn't been updated. We + * don't have to worry about the key values in the cursor + * not being set since we only look at the key values of + * our child and those are guaranteed to be set by the + * call to scan_lbtree() above. + */ + if (check_dups == 0 && INT_GET(pkey[i].br_startoff, ARCH_CONVERT) != + bm_cursor->level[level-1].first_key) { + if (!no_modify) { + do_warn( + "correcting bt key (was %llu, now %llu) in inode %llu\n", + INT_GET(pkey[i].br_startoff, ARCH_CONVERT), + bm_cursor->level[level-1].first_key, + ino); + do_warn("\t\t%s fork, btree block %llu\n", + forkname, bno); + *dirty = 1; + INT_SET(pkey[i].br_startoff, ARCH_CONVERT, bm_cursor->level[level-1].first_key); + } else { + do_warn( +"bad btree key (is %llu, should be %llu) in inode %llu\n", + INT_GET(pkey[i].br_startoff, ARCH_CONVERT), + bm_cursor->level[level-1].first_key, + ino); + do_warn("\t\t%s fork, btree block %llu\n", + forkname, bno); + } + } + } + + /* + * Check that the last child block's forward sibling pointer + * is NULL. + */ + if (check_dups == 0 && + bm_cursor->level[level - 1].right_fsbno != NULLDFSBNO) { + do_warn( + "bad fwd (right) sibling pointer (saw %llu should be NULLDFSBNO)\n", + bm_cursor->level[level - 1].right_fsbno); + do_warn( + "\tin inode %llu (%s fork) bmap btree block %llu\n", + ino, forkname, + bm_cursor->level[level].fsbno); + return(1); + } + + /* + * update cursor keys to reflect this block + */ + if (check_dups == 0) { + bm_cursor->level[level].first_key = + INT_GET(pkey[0].br_startoff, ARCH_CONVERT); + i = INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1; + bm_cursor->level[level].last_key = + INT_GET(pkey[i].br_startoff, ARCH_CONVERT); + } + + return(0); +} + +void +scanfunc_bno( + xfs_btree_sblock_t *ablock, + int level, + xfs_agblock_t bno, + xfs_agnumber_t agno, + int suspect, + int isroot + ) +{ + xfs_agblock_t b; + xfs_alloc_block_t *block = (xfs_alloc_block_t *)ablock; + int i; + xfs_alloc_ptr_t *pp; + xfs_alloc_rec_t *rp; + int hdr_errors = 0; + int numrecs; + int state; + + if (INT_GET(block->bb_magic, ARCH_CONVERT) != XFS_ABTB_MAGIC) { + do_warn("bad magic # %#x in btbno block %d/%d\n", + INT_GET(block->bb_magic, ARCH_CONVERT), agno, bno); + hdr_errors++; + if (suspect) + return; + } + if (INT_GET(block->bb_level, ARCH_CONVERT) != level) { + do_warn("expected level %d got %d in btbno block %d/%d\n", + level, INT_GET(block->bb_level, ARCH_CONVERT), agno, bno); + hdr_errors++; + if (suspect) + return; + } + + /* + * check for btree blocks multiply claimed + */ + state = get_agbno_state(mp, agno, bno); + + switch (state) { + case XR_E_UNKNOWN: + set_agbno_state(mp, agno, bno, XR_E_FS_MAP); + break; + default: + set_agbno_state(mp, agno, bno, XR_E_MULT); + do_warn( +"bno freespace btree block claimed (state %d), agno %d, bno %d, suspect %d\n", + state, agno, bno, suspect); + return; + } + + if (level == 0) { + numrecs = INT_GET(block->bb_numrecs, ARCH_CONVERT); + + if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_alloc_mxr[0]) { + numrecs = mp->m_alloc_mxr[0]; + hdr_errors++; + } + if (isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_alloc_mnr[0]) { + numrecs = mp->m_alloc_mnr[0]; + hdr_errors++; + } + + if (hdr_errors) + suspect++; + + rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, + 1, mp->m_alloc_mxr[0]); + for (i = 0; i < numrecs; i++) { + if (INT_GET(rp[i].ar_blockcount, ARCH_CONVERT) == 0 || + INT_GET(rp[i].ar_startblock, ARCH_CONVERT) == 0 || + !verify_agbno(mp, agno, INT_GET(rp[i].ar_startblock, ARCH_CONVERT)) || + INT_GET(rp[i].ar_blockcount, ARCH_CONVERT) > MAXEXTLEN) + continue; + + bno_agffreeblks += INT_GET(rp[i].ar_blockcount, ARCH_CONVERT); + if (INT_GET(rp[i].ar_blockcount, ARCH_CONVERT) > bno_agflongest) + bno_agflongest = INT_GET(rp[i].ar_blockcount, ARCH_CONVERT); + for (b = INT_GET(rp[i].ar_startblock, ARCH_CONVERT); + b < INT_GET(rp[i].ar_startblock, ARCH_CONVERT) + INT_GET(rp[i].ar_blockcount, ARCH_CONVERT); + b++) { + if (get_agbno_state(mp, agno, b) + == XR_E_UNKNOWN) + set_agbno_state(mp, agno, b, + XR_E_FREE1); + else { +do_warn("block (%d,%d) multiply claimed by bno space tree, state - %d\n", + agno, b, get_agbno_state(mp, agno, b)); + } + } + } + return; + } + + /* + * interior record + */ + pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, 1, + mp->m_alloc_mxr[1]); + + numrecs = INT_GET(block->bb_numrecs, ARCH_CONVERT); + if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_alloc_mxr[1]) { + numrecs = mp->m_alloc_mxr[1]; + hdr_errors++; + } + if (isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_alloc_mnr[1]) { + numrecs = mp->m_alloc_mnr[1]; + hdr_errors++; + } + + /* + * don't pass bogus tree flag down further if this block + * looked ok. bail out if two levels in a row look bad. + */ + + if (suspect && !hdr_errors) + suspect = 0; + + if (hdr_errors) { + if (suspect) + return; + else suspect++; + } + + for (i = 0; i < numrecs; i++) { + /* + * XXX - put sibling detection right here. + * we know our sibling chain is good. So as we go, + * we check the entry before and after each entry. + * If either of the entries references a different block, + * check the sibling pointer. If there's a sibling + * pointer mismatch, try and extract as much data + * as possible. + */ + if (INT_GET(pp[i], ARCH_CONVERT) != 0 && verify_agbno(mp, agno, INT_GET(pp[i], ARCH_CONVERT))) + scan_sbtree(INT_GET(pp[i], ARCH_CONVERT), level, agno, suspect, + scanfunc_bno, 0); + } +} + +void +scanfunc_cnt( + xfs_btree_sblock_t *ablock, + int level, + xfs_agblock_t bno, + xfs_agnumber_t agno, + int suspect, + int isroot + ) +{ + xfs_alloc_block_t *block; + xfs_alloc_ptr_t *pp; + xfs_alloc_rec_t *rp; + xfs_agblock_t b; + int i; + int hdr_errors; + int numrecs; + int state; + + block = (xfs_alloc_block_t *)ablock; + hdr_errors = 0; + + if (INT_GET(block->bb_magic, ARCH_CONVERT) != XFS_ABTC_MAGIC) { + do_warn("bad magic # %#x in btcnt block %d/%d\n", + INT_GET(block->bb_magic, ARCH_CONVERT), agno, bno); + hdr_errors++; + if (suspect) + return; + } + if (INT_GET(block->bb_level, ARCH_CONVERT) != level) { + do_warn("expected level %d got %d in btcnt block %d/%d\n", + level, INT_GET(block->bb_level, ARCH_CONVERT), agno, bno); + hdr_errors++; + if (suspect) + return; + } + + /* + * check for btree blocks multiply claimed + */ + state = get_agbno_state(mp, agno, bno); + + switch (state) { + case XR_E_UNKNOWN: + set_agbno_state(mp, agno, bno, XR_E_FS_MAP); + break; + default: + set_agbno_state(mp, agno, bno, XR_E_MULT); + do_warn( +"bcnt freespace btree block claimed (state %d), agno %d, bno %d, suspect %d\n", + state, agno, bno, suspect); + return; + } + + if (level == 0) { + numrecs = INT_GET(block->bb_numrecs, ARCH_CONVERT); + + if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_alloc_mxr[0]) { + numrecs = mp->m_alloc_mxr[0]; + hdr_errors++; + } + if (isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_alloc_mnr[0]) { + numrecs = mp->m_alloc_mnr[0]; + hdr_errors++; + } + + if (hdr_errors) + suspect++; + + rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, + 1, mp->m_alloc_mxr[0]); + for (i = 0; i < numrecs; i++) { + if (INT_GET(rp[i].ar_blockcount, ARCH_CONVERT) == 0 || + INT_GET(rp[i].ar_startblock, ARCH_CONVERT) == 0 || + !verify_agbno(mp, agno, INT_GET(rp[i].ar_startblock, ARCH_CONVERT)) || + INT_GET(rp[i].ar_blockcount, ARCH_CONVERT) > MAXEXTLEN) + continue; + + cnt_agffreeblks += INT_GET(rp[i].ar_blockcount, ARCH_CONVERT); + if (INT_GET(rp[i].ar_blockcount, ARCH_CONVERT) > cnt_agflongest) + cnt_agflongest = INT_GET(rp[i].ar_blockcount, ARCH_CONVERT); + for (b = INT_GET(rp[i].ar_startblock, ARCH_CONVERT); + b < INT_GET(rp[i].ar_startblock, ARCH_CONVERT) + INT_GET(rp[i].ar_blockcount, ARCH_CONVERT); + b++) { + state = get_agbno_state(mp, agno, b); + /* + * no warning messages -- we'll catch + * FREE1 blocks later + */ + switch (state) { + case XR_E_FREE1: + set_agbno_state(mp, agno, b, XR_E_FREE); + break; + case XR_E_UNKNOWN: + set_agbno_state(mp, agno, b, + XR_E_FREE1); + break; + default: + do_warn( + "block (%d,%d) already used, state %d\n", + agno, b, state); + break; + } + } + } + return; + } + + /* + * interior record + */ + pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_alloc, block, 1, + mp->m_alloc_mxr[1]); + + numrecs = INT_GET(block->bb_numrecs, ARCH_CONVERT); + if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_alloc_mxr[1]) { + numrecs = mp->m_alloc_mxr[1]; + hdr_errors++; + } + if (isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_alloc_mnr[1]) { + numrecs = mp->m_alloc_mnr[1]; + hdr_errors++; + } + + /* + * don't pass bogus tree flag down further if this block + * looked ok. bail out if two levels in a row look bad. + */ + + if (suspect && !hdr_errors) + suspect = 0; + + if (hdr_errors) { + if (suspect) + return; + else suspect++; + } + + for (i = 0; i < numrecs; i++) + if (INT_GET(pp[i], ARCH_CONVERT) != 0 && verify_agbno(mp, agno, INT_GET(pp[i], ARCH_CONVERT))) + scan_sbtree(INT_GET(pp[i], ARCH_CONVERT), level, agno, + suspect, scanfunc_cnt, 0); +} + +/* + * this one walks the inode btrees sucking the info there into + * the incore avl tree. We try and rescue corrupted btree records + * to minimize our chances of losing inodes. Inode info from potentially + * corrupt sources could be bogus so rather than put the info straight + * into the tree, instead we put it on a list and try and verify the + * info in the next phase by examining what's on disk. At that point, + * we'll be able to figure out what's what and stick the corrected info + * into the tree. We do bail out at some point and give up on a subtree + * so as to avoid walking randomly all over the ag. + * + * Note that it's also ok if the free/inuse info wrong, we can correct + * that when we examine the on-disk inode. The important thing is to + * get the start and alignment of the inode chunks right. Those chunks + * that we aren't sure about go into the uncertain list. + */ +void +scanfunc_ino( + xfs_btree_sblock_t *ablock, + int level, + xfs_agblock_t bno, + xfs_agnumber_t agno, + int suspect, + int isroot + ) +{ + xfs_ino_t lino; + xfs_inobt_block_t *block; + int i; + xfs_agino_t ino; + xfs_agblock_t agbno; + int j; + int nfree; + int off; + int numrecs; + int state; + xfs_inobt_ptr_t *pp; + xfs_inobt_rec_t *rp; + ino_tree_node_t *ino_rec, *first_rec, *last_rec; + int hdr_errors; + + block = (xfs_inobt_block_t *)ablock; + hdr_errors = 0; + + if (INT_GET(block->bb_magic, ARCH_CONVERT) != XFS_IBT_MAGIC) { + do_warn("bad magic # %#x in inobt block %d/%d\n", + INT_GET(block->bb_magic, ARCH_CONVERT), agno, bno); + hdr_errors++; + bad_ino_btree = 1; + if (suspect) + return; + } + if (INT_GET(block->bb_level, ARCH_CONVERT) != level) { + do_warn("expected level %d got %d in inobt block %d/%d\n", + level, INT_GET(block->bb_level, ARCH_CONVERT), agno, bno); + hdr_errors++; + bad_ino_btree = 1; + if (suspect) + return; + } + + /* + * check for btree blocks multiply claimed, any unknown/free state + * is ok in the bitmap block. + */ + state = get_agbno_state(mp, agno, bno); + + switch (state) { + case XR_E_UNKNOWN: + case XR_E_FREE1: + case XR_E_FREE: + set_agbno_state(mp, agno, bno, XR_E_FS_MAP); + break; + default: + set_agbno_state(mp, agno, bno, XR_E_MULT); + do_warn( +"inode btree block claimed (state %d), agno %d, bno %d, suspect %d\n", + state, agno, bno, suspect); + } + + numrecs = INT_GET(block->bb_numrecs, ARCH_CONVERT); + + /* + * leaf record in btree + */ + if (level == 0) { + /* check for trashed btree block */ + + if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_inobt_mxr[0]) { + numrecs = mp->m_inobt_mxr[0]; + hdr_errors++; + } + if (isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_inobt_mnr[0]) { + numrecs = mp->m_inobt_mnr[0]; + hdr_errors++; + } + + if (hdr_errors) { + bad_ino_btree = 1; + do_warn("dubious inode btree block header %d/%d\n", + agno, bno); + suspect++; + } + + rp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_inobt, block, + 1, mp->m_inobt_mxr[0]); + + /* + * step through the records, each record points to + * a chunk of inodes. The start of inode chunks should + * be block-aligned. Each inode btree rec should point + * to the start of a block of inodes or the start of a group + * of INODES_PER_CHUNK (64) inodes. off is the offset into + * the block. skip processing of bogus records. + */ + for (i = 0; i < numrecs; i++) { + ino = INT_GET(rp[i].ir_startino, ARCH_CONVERT); + off = XFS_AGINO_TO_OFFSET(mp, ino); + agbno = XFS_AGINO_TO_AGBNO(mp, ino); + lino = XFS_AGINO_TO_INO(mp, agno, ino); + /* + * on multi-block block chunks, all chunks start + * at the beginning of the block. with multi-chunk + * blocks, all chunks must start on 64-inode boundaries + * since each block can hold N complete chunks. if + * fs has aligned inodes, all chunks must start + * at a fs_ino_alignment*N'th agbno. skip recs + * with badly aligned starting inodes. + */ + if (ino == 0 || + (inodes_per_block <= XFS_INODES_PER_CHUNK && + off != 0) || + (inodes_per_block > XFS_INODES_PER_CHUNK && + off % XFS_INODES_PER_CHUNK != 0) || + (fs_aligned_inodes && + agbno % fs_ino_alignment != 0)) { + do_warn( + "badly aligned inode rec (starting inode = %llu)\n", + lino); + suspect++; + } + + /* + * verify numeric validity of inode chunk first + * before inserting into a tree. don't have to + * worry about the overflow case because the + * starting ino number of a chunk can only get + * within 255 inodes of max (NULLAGINO). if it + * gets closer, the agino number will be illegal + * as the agbno will be too large. + */ + if (verify_aginum(mp, agno, ino)) { + do_warn( +"bad starting inode # (%llu (0x%x 0x%x)) in ino rec, skipping rec\n", + lino, agno, ino); + suspect++; + continue; + } + + if (verify_aginum(mp, agno, + ino + XFS_INODES_PER_CHUNK - 1)) { + do_warn( +"bad ending inode # (%llu (0x%x 0x%x)) in ino rec, skipping rec\n", + lino + XFS_INODES_PER_CHUNK - 1, + agno, ino + XFS_INODES_PER_CHUNK - 1); + suspect++; + continue; + } + + /* + * set state of each block containing inodes + */ + if (off == 0 && !suspect) { + for (j = 0; + j < XFS_INODES_PER_CHUNK; + j += mp->m_sb.sb_inopblock) { + agbno = XFS_AGINO_TO_AGBNO(mp, ino + j); + state = get_agbno_state(mp, + agno, agbno); + + if (state == XR_E_UNKNOWN) { + set_agbno_state(mp, agno, + agbno, XR_E_INO); + } else if (state == XR_E_INUSE_FS && + agno == 0 && + ino + j >= first_prealloc_ino && + ino + j < last_prealloc_ino) { + set_agbno_state(mp, agno, + agbno, XR_E_INO); + } else { + do_warn( +"inode chunk claims used block, inobt block - agno %d, bno %d, inopb %d\n", + agno, bno, + mp->m_sb.sb_inopblock); + suspect++; + /* + * XXX - maybe should mark + * block a duplicate + */ + continue; + } + } + } + /* + * ensure only one avl entry per chunk + */ + find_inode_rec_range(agno, ino, + ino + XFS_INODES_PER_CHUNK, + &first_rec, + &last_rec); + if (first_rec != NULL) { + /* + * this chunk overlaps with one (or more) + * already in the tree + */ + do_warn( +"inode rec for ino %llu (%d/%d) overlaps existing rec (start %d/%d)\n", + lino, agno, ino, + agno, first_rec->ino_startnum); + suspect++; + + /* + * if the 2 chunks start at the same place, + * then we don't have to put this one + * in the uncertain list. go to the next one. + */ + if (first_rec->ino_startnum == ino) + continue; + } + + agicount += XFS_INODES_PER_CHUNK; + agifreecount += INT_GET(rp[i].ir_freecount, ARCH_CONVERT); + nfree = 0; + + /* + * now mark all the inodes as existing and free or used. + * if the tree is suspect, put them into the uncertain + * inode tree. + */ + if (!suspect) { + if (XFS_INOBT_IS_FREE(&rp[i], 0, ARCH_CONVERT)) { + nfree++; + ino_rec = set_inode_free_alloc(agno, + ino); + } else { + ino_rec = set_inode_used_alloc(agno, + ino); + } + for (j = 1; j < XFS_INODES_PER_CHUNK; j++) { + if (XFS_INOBT_IS_FREE(&rp[i], j, ARCH_CONVERT)) { + nfree++; + set_inode_free(ino_rec, j); + } else { + set_inode_used(ino_rec, j); + } + } + } else { + for (j = 0; j < XFS_INODES_PER_CHUNK; j++) { + if (XFS_INOBT_IS_FREE(&rp[i], j, ARCH_CONVERT)) { + nfree++; + add_aginode_uncertain(agno, + ino + j, 1); + } else { + add_aginode_uncertain(agno, + ino + j, 0); + } + } + } + + if (nfree != INT_GET(rp[i].ir_freecount, ARCH_CONVERT)) { + do_warn( "ir_freecount/free mismatch, inode chunk \ +%d/%d, freecount %d nfree %d\n", + agno, ino, INT_GET(rp[i].ir_freecount, ARCH_CONVERT), nfree); + } + } + + if (suspect) + bad_ino_btree = 1; + + return; + } + + /* + * interior record, continue on + */ + if (INT_GET(block->bb_numrecs, ARCH_CONVERT) > mp->m_inobt_mxr[1]) { + numrecs = mp->m_inobt_mxr[1]; + hdr_errors++; + } + if (isroot == 0 && INT_GET(block->bb_numrecs, ARCH_CONVERT) < mp->m_inobt_mnr[1]) { + numrecs = mp->m_inobt_mnr[1]; + hdr_errors++; + } + + pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_inobt, block, 1, + mp->m_inobt_mxr[1]); + + /* + * don't pass bogus tree flag down further if this block + * looked ok. bail out if two levels in a row look bad. + */ + + if (suspect && !hdr_errors) + suspect = 0; + + if (hdr_errors) { + bad_ino_btree = 1; + if (suspect) + return; + else suspect++; + } + + for (i = 0; i < numrecs; i++) { + if (INT_GET(pp[i], ARCH_CONVERT) != 0 && verify_agbno(mp, agno, INT_GET(pp[i], ARCH_CONVERT))) + scan_sbtree(INT_GET(pp[i], ARCH_CONVERT), level, agno, suspect, + scanfunc_ino, 0); + } +} + +void +scan_freelist( + xfs_agf_t *agf) +{ + xfs_agfl_t *agfl; + xfs_buf_t *agflbuf; + xfs_agblock_t bno; + int count; + int i; + + if (XFS_SB_BLOCK(mp) != XFS_AGFL_BLOCK(mp) && + XFS_AGF_BLOCK(mp) != XFS_AGFL_BLOCK(mp) && + XFS_AGI_BLOCK(mp) != XFS_AGFL_BLOCK(mp)) + set_agbno_state(mp, INT_GET(agf->agf_seqno, ARCH_CONVERT), + XFS_AGFL_BLOCK(mp), XR_E_FS_MAP); + if (INT_GET(agf->agf_flcount, ARCH_CONVERT) == 0) + return; + agflbuf = libxfs_readbuf(mp->m_dev, + XFS_AG_DADDR(mp, INT_GET(agf->agf_seqno, ARCH_CONVERT), + XFS_AGFL_DADDR), 1, 0); + if (!agflbuf) { + do_abort("can't read agfl block for ag %d\n", + INT_GET(agf->agf_seqno, ARCH_CONVERT)); + return; + } + agfl = XFS_BUF_TO_AGFL(agflbuf); + i = INT_GET(agf->agf_flfirst, ARCH_CONVERT); + count = 0; + for (;;) { + bno = INT_GET(agfl->agfl_bno[i], ARCH_CONVERT); + if (verify_agbno(mp, INT_GET(agf->agf_seqno,ARCH_CONVERT), bno)) + set_agbno_state(mp, + INT_GET(agf->agf_seqno, ARCH_CONVERT), + bno, XR_E_FREE); + else + do_warn("bad agbno %u in agfl, agno %d\n", + bno, INT_GET(agf->agf_seqno, ARCH_CONVERT)); + count++; + if (i == INT_GET(agf->agf_fllast, ARCH_CONVERT)) + break; + if (++i == XFS_AGFL_SIZE) + i = 0; + } + if (count != INT_GET(agf->agf_flcount, ARCH_CONVERT)) { + do_warn("freeblk count %d != flcount %d in ag %d\n", count, + INT_GET(agf->agf_flcount, ARCH_CONVERT), + INT_GET(agf->agf_seqno, ARCH_CONVERT)); + } + libxfs_putbuf(agflbuf); +} + +void +scan_ag( + xfs_agnumber_t agno) +{ + xfs_agf_t *agf; + xfs_buf_t *agfbuf; + int agf_dirty; + xfs_agi_t *agi; + xfs_buf_t *agibuf; + int agi_dirty; + xfs_sb_t *sb; + xfs_buf_t *sbbuf; + int sb_dirty; + int status; + + cnt_agffreeblks = cnt_agflongest = 0; + bno_agffreeblks = bno_agflongest = 0; + + agi_dirty = agf_dirty = sb_dirty = 0; + + agicount = agifreecount = 0; + + sbbuf = libxfs_readbuf(mp->m_dev, XFS_AG_DADDR(mp, agno, XFS_SB_DADDR), + 1, 0); + if (!sbbuf) { + do_error("can't get root superblock for ag %d\n", agno); + return; + } + + sb = (xfs_sb_t *)calloc(BBSIZE, 1); + if (!sb) { + do_error("can't allocate memory for superblock\n"); + libxfs_putbuf(sbbuf); + return; + } + libxfs_xlate_sb(XFS_BUF_TO_SBP(sbbuf), sb, 1, ARCH_CONVERT, + XFS_SB_ALL_BITS); + + agfbuf = libxfs_readbuf(mp->m_dev, + XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR), 1, 0); + if (!agfbuf) { + do_error("can't read agf block for ag %d\n", agno); + libxfs_putbuf(sbbuf); + free(sb); + return; + } + agf = XFS_BUF_TO_AGF(agfbuf); + + agibuf = libxfs_readbuf(mp->m_dev, + XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR), 1, 0); + if (!agibuf) { + do_error("can't read agi block for ag %d\n", agno); + libxfs_putbuf(agfbuf); + libxfs_putbuf(sbbuf); + free(sb); + return; + } + agi = XFS_BUF_TO_AGI(agibuf); + + /* fix up bad ag headers */ + + status = verify_set_agheader(mp, sbbuf, sb, agf, agi, agno); + + if (status & XR_AG_SB_SEC) { + if (!no_modify) + sb_dirty = 1; + /* + * clear bad sector bit because we don't want + * to skip further processing. we just want to + * ensure that we write out the modified sb buffer. + */ + status &= ~XR_AG_SB_SEC; + } + if (status & XR_AG_SB) { + if (!no_modify) + sb_dirty = 1; + else + do_warn("would "); + + do_warn("reset bad sb for ag %d\n", agno); + } + if (status & XR_AG_AGF) { + if (!no_modify) + agf_dirty = 1; + else + do_warn("would "); + + do_warn("reset bad agf for ag %d\n", agno); + } + if (status & XR_AG_AGI) { + if (!no_modify) + agi_dirty = 1; + else + do_warn("would "); + + do_warn("reset bad agi for ag %d\n", agno); + } + + if (status && no_modify) { + libxfs_putbuf(agibuf); + libxfs_putbuf(agfbuf); + libxfs_putbuf(sbbuf); + free(sb); + + do_warn("bad uncorrected agheader %d, skipping ag...\n", agno); + + return; + } + + scan_freelist(agf); + + if (INT_GET(agf->agf_roots[XFS_BTNUM_BNO], ARCH_CONVERT) != 0 && + verify_agbno(mp, agno, INT_GET(agf->agf_roots[XFS_BTNUM_BNO], ARCH_CONVERT))) + scan_sbtree(INT_GET(agf->agf_roots[XFS_BTNUM_BNO], ARCH_CONVERT), + INT_GET(agf->agf_levels[XFS_BTNUM_BNO], ARCH_CONVERT), + agno, 0, scanfunc_bno, 1); + else + do_warn("bad agbno %u for btbno root, agno %d\n", + INT_GET(agf->agf_roots[XFS_BTNUM_BNO], ARCH_CONVERT), agno); + + if (INT_GET(agf->agf_roots[XFS_BTNUM_CNT], ARCH_CONVERT) != 0 && + verify_agbno(mp, agno, INT_GET(agf->agf_roots[XFS_BTNUM_CNT], ARCH_CONVERT))) + scan_sbtree(INT_GET(agf->agf_roots[XFS_BTNUM_CNT], ARCH_CONVERT), + INT_GET(agf->agf_levels[XFS_BTNUM_CNT], ARCH_CONVERT), + agno, 0, scanfunc_cnt, 1); + else + do_warn("bad agbno %u for btbcnt root, agno %d\n", + INT_GET(agf->agf_roots[XFS_BTNUM_CNT], ARCH_CONVERT), agno); + + if (INT_GET(agi->agi_root, ARCH_CONVERT) != 0 && verify_agbno(mp, agno, INT_GET(agi->agi_root, ARCH_CONVERT))) + scan_sbtree(INT_GET(agi->agi_root, ARCH_CONVERT), INT_GET(agi->agi_level, ARCH_CONVERT), agno, 0, + scanfunc_ino, 1); + else + do_warn("bad agbno %u for inobt root, agno %d\n", + INT_GET(agi->agi_root, ARCH_CONVERT), agno); + + ASSERT(agi_dirty == 0 || agi_dirty && !no_modify); + + if (agi_dirty && !no_modify) + libxfs_writebuf(agibuf, 0); + else + libxfs_putbuf(agibuf); + + ASSERT(agf_dirty == 0 || agf_dirty && !no_modify); + + if (agf_dirty && !no_modify) + libxfs_writebuf(agfbuf, 0); + else + libxfs_putbuf(agfbuf); + + ASSERT(sb_dirty == 0 || sb_dirty && !no_modify); + + if (sb_dirty && !no_modify) { + libxfs_xlate_sb(XFS_BUF_PTR(sbbuf), sb, -1, ARCH_CONVERT, + XFS_SB_ALL_BITS); + libxfs_writebuf(sbbuf, 0); + } else + libxfs_putbuf(sbbuf); + free(sb); +} diff --git a/repair/scan.h b/repair/scan.h new file mode 100644 index 000000000..42e152647 --- /dev/null +++ b/repair/scan.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ +#ifndef _XR_SCAN_H +#define _XR_SCAN_H + +struct blkmap; + +void scan_sbtree( + xfs_agblock_t root, + int nlevels, + xfs_agnumber_t agno, + int suspect, + void (*func)(xfs_btree_sblock_t *block, + int level, + xfs_agblock_t bno, + xfs_agnumber_t agno, + int suspect, + int isroot), + int isroot); + +int scan_lbtree( + xfs_dfsbno_t root, + int nlevels, + int (*func)(xfs_btree_lblock_t *block, + int level, + int type, + int whichfork, + xfs_dfsbno_t bno, + xfs_ino_t ino, + xfs_drfsbno_t *tot, + __uint64_t *nex, + struct blkmap **blkmapp, + bmap_cursor_t *bm_cursor, + int isroot, + int check_dups, + int *dirty), + int type, + int whichfork, + xfs_ino_t ino, + xfs_drfsbno_t *tot, + __uint64_t *nex, + struct blkmap **blkmapp, + bmap_cursor_t *bm_cursor, + int isroot, + int check_dups); + +int scanfunc_bmap( + xfs_btree_lblock_t *ablock, + int level, + int type, + int whichfork, + xfs_dfsbno_t bno, + xfs_ino_t ino, + xfs_drfsbno_t *tot, + __uint64_t *nex, + struct blkmap **blkmapp, + bmap_cursor_t *bm_cursor, + int isroot, + int check_dups, + int *dirty); + +void scanfunc_bno( + xfs_btree_sblock_t *ablock, + int level, + xfs_agblock_t bno, + xfs_agnumber_t agno, + int suspect, + int isroot); + +void scanfunc_cnt( + xfs_btree_sblock_t *ablock, + int level, + xfs_agblock_t bno, + xfs_agnumber_t agno, + int suspect, + int isroot); + +void +scanfunc_ino( + xfs_btree_sblock_t *ablock, + int level, + xfs_agblock_t bno, + xfs_agnumber_t agno, + int suspect, + int isroot); + +#endif /* _XR_SCAN_H */ diff --git a/repair/versions.c b/repair/versions.c new file mode 100644 index 000000000..526be2221 --- /dev/null +++ b/repair/versions.c @@ -0,0 +1,307 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include + +#define EXTERN +#include "versions.h" +#undef EXTERN +#include "err_protos.h" +#include "globals.h" + +void +update_sb_version(xfs_mount_t *mp) +{ + xfs_sb_t *sb; + __uint16_t vn; + + sb = &mp->m_sb; + + if (fs_attributes) { + if (!XFS_SB_VERSION_HASATTR(sb)) { + ASSERT(fs_attributes_allowed); + + XFS_SB_VERSION_ADDATTR(sb); + } + } + + if (fs_inode_nlink) { + if (!XFS_SB_VERSION_HASNLINK(sb)) { + ASSERT(fs_inode_nlink_allowed); + + XFS_SB_VERSION_ADDNLINK(sb); + } + } + + /* + * fix up the superblock version number and feature bits, + * turn off quota bits and flags if the filesystem doesn't + * have quotas. + */ + if (fs_quotas) { + if (!XFS_SB_VERSION_HASQUOTA(sb)) { + ASSERT(fs_quotas_allowed); + + XFS_SB_VERSION_ADDQUOTA(sb); + } + + /* + * protect against stray bits in the quota flag field + */ + if (sb->sb_qflags & ~(XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD| + XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT| + XFS_PQUOTA_ENFD|XFS_PQUOTA_CHKD)) { + /* + * update the incore superblock, if we're in + * no_modify mode, it'll never get flushed out + * so this is ok. + */ + do_warn("bogus quota flags 0x%x set in superblock", + sb->sb_qflags & ~(XFS_UQUOTA_ACCT| + XFS_UQUOTA_ENFD| + XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT| + XFS_PQUOTA_ENFD|XFS_PQUOTA_CHKD)); + + sb->sb_qflags &= (XFS_UQUOTA_ACCT| + XFS_UQUOTA_ENFD| + XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT| + XFS_PQUOTA_ENFD|XFS_PQUOTA_CHKD); + + if (!no_modify) + do_warn(", bogus flags will be cleared\n"); + else + do_warn(", bogus flags would be cleared\n"); + } + } else { + sb->sb_qflags = 0; + + if (XFS_SB_VERSION_HASQUOTA(sb)) { + lost_quotas = 1; + vn = sb->sb_versionnum; + vn &= ~XFS_SB_VERSION_QUOTABIT; + + if (!(vn & XFS_SB_VERSION_ALLFBITS)) + vn = XFS_SB_VERSION_TOOLD(vn); + + ASSERT(vn != 0); + sb->sb_versionnum = vn; + } + } + + if (!fs_aligned_inodes) { + if (XFS_SB_VERSION_HASALIGN(sb)) { + if (XFS_SB_VERSION_NUM(sb) == XFS_SB_VERSION_4) + XFS_SB_VERSION_SUBALIGN(sb); + } + } + + return; +} + +/* + * returns 0 if things are fine, 1 if we don't understand + * this superblock version. Sets superblock geometry-dependent + * global variables. + */ +int +parse_sb_version(xfs_sb_t *sb) +{ + int issue_warning; + + fs_attributes = 0; + fs_inode_nlink = 0; + fs_quotas = 0; + fs_aligned_inodes = 0; + fs_sb_feature_bits = 0; + fs_ino_alignment = 0; + fs_has_extflgbit = 0; + have_uquotino = 0; + have_pquotino = 0; + issue_warning = 0; + + /* + * ok, check to make sure that the sb isn't newer + * than we are + */ + if (XFS_SB_VERSION_HASEXTFLGBIT(sb)) { + fs_has_extflgbit = 1; + if (!fs_has_extflgbit_allowed) { + issue_warning = 1; + do_warn( + "This filesystem has uninitialized extent flags.\n"); + } + } + + if (XFS_SB_VERSION_HASSHARED(sb)) { + fs_shared = 1; + if (!fs_shared_allowed) { + issue_warning = 1; + do_warn("This filesystem is marked shared.\n"); + } + } + + if (issue_warning) { + do_warn( +"This filesystem uses 6.5 feature(s) not yet supported in this release.\n\ +Please run a 6.5 version of xfs_repair.\n"); + return(1); + } + + if (!XFS_SB_GOOD_VERSION(sb)) { + do_warn( + "WARNING: unknown superblock version %d\n", XFS_SB_VERSION_NUM(sb)); + do_warn( + "This filesystem contains features not understood by this program.\n"); + return(1); + } + + if (XFS_SB_VERSION_NUM(sb) == XFS_SB_VERSION_4) { + if (!fs_sb_feature_bits_allowed) { + do_warn( + "WARNING: you have disallowed superblock feature bits disallowed\n"); + do_warn( + "\tbut this superblock has feature bits. The superblock\n"); + + if (!no_modify) { + do_warn( + "\twill be downgraded. This may cause loss of filesystem meta-data\n"); + } else { + do_warn( + "\twould be downgraded. This might cause loss of filesystem\n"); + do_warn( + "\tmeta-data.\n"); + } + } else { + fs_sb_feature_bits = 1; + } + } + + if (XFS_SB_VERSION_HASATTR(sb)) { + if (!fs_attributes_allowed) { + do_warn( + "WARNING: you have disallowed attributes but this filesystem\n"); + if (!no_modify) { + do_warn( + "\thas attributes. The filesystem will be downgraded and\n"); + do_warn( + "\tall attributes will be removed.\n"); + } else { + do_warn( + "\thas attributes. The filesystem would be downgraded and\n"); + do_warn( + "\tall attributes would be removed.\n"); + } + } else { + fs_attributes = 1; + } + } + + if (XFS_SB_VERSION_HASNLINK(sb)) { + if (!fs_inode_nlink_allowed) { + do_warn( + "WARNING: you have disallowed version 2 inodes but this filesystem\n"); + if (!no_modify) { + do_warn( + "\thas version 2 inodes. The filesystem will be downgraded and\n"); + do_warn( + "\tall version 2 inodes will be converted to version 1 inodes.\n"); + do_warn( + "\tThis may cause some hard links to files to be destroyed\n"); + } else { + do_warn( + "\thas version 2 inodes. The filesystem would be downgraded and\n"); + do_warn( + "\tall version 2 inodes would be converted to version 1 inodes.\n"); + do_warn( + "\tThis might cause some hard links to files to be destroyed\n"); + } + } else { + fs_inode_nlink = 1; + } + } + + if (XFS_SB_VERSION_HASQUOTA(sb)) { + if (!fs_quotas_allowed) { + do_warn( + "WARNING: you have disallowed quotas but this filesystem\n"); + if (!no_modify) { + do_warn( + "\thas quotas. The filesystem will be downgraded and\n"); + do_warn( + "\tall quota information will be removed.\n"); + } else { + do_warn( + "\thas quotas. The filesystem would be downgraded and\n"); + do_warn( + "\tall quota information would be removed.\n"); + } + } else { + fs_quotas = 1; + + if (sb->sb_uquotino != 0 && + sb->sb_uquotino != NULLFSINO) + have_uquotino = 1; + + if (sb->sb_pquotino != 0 && + sb->sb_pquotino != NULLFSINO) + have_pquotino = 1; + } + } + + if (XFS_SB_VERSION_HASALIGN(sb)) { + if (fs_aligned_inodes_allowed) { + fs_aligned_inodes = 1; + fs_ino_alignment = sb->sb_inoalignmt; + } else { + do_warn( + "WARNING: you have disallowed aligned inodes but this filesystem\n"); + if (!no_modify) { + do_warn( + "\thas aligned inodes. The filesystem will be downgraded.\n"); + do_warn( +"\tThis will permanently degrade the performance of this filesystem.\n"); + } else { + do_warn( + "\thas aligned inodes. The filesystem would be downgraded.\n"); + do_warn( +"\tThis would permanently degrade the performance of this filesystem.\n"); + } + } + } + + /* + * calculate maximum file offset for this geometry + */ + fs_max_file_offset = 0x7fffffffffffffffLL >> sb->sb_blocklog; + + return(0); +} diff --git a/repair/versions.h b/repair/versions.h new file mode 100644 index 000000000..5f592be41 --- /dev/null +++ b/repair/versions.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#ifndef _XR_VERSIONS_H +#define _XR_VERSIONS_H + +#ifndef EXTERN +#define EXTERN extern +#endif /* EXTERN */ + +/* + * possible XFS filesystem features + * + * attributes (6.2) + * inode version 2 (32-bit link counts) (6.2) + * quotas (6.2+) + * aligned inodes (6.2+) + * + * bitmask fields happend after 6.2. + */ + +/* + * filesystem feature global vars, set to 1 if the feature + * is *allowed*, 0 otherwise. These can be set via command-line + * options + */ + +EXTERN int fs_attributes_allowed; +EXTERN int fs_inode_nlink_allowed; +EXTERN int fs_quotas_allowed; +EXTERN int fs_aligned_inodes_allowed; +EXTERN int fs_sb_feature_bits_allowed; +EXTERN int fs_has_extflgbit_allowed; +EXTERN int fs_shared_allowed; + +/* + * filesystem feature global vars, set to 1 if the feature + * is on, 0 otherwise + */ + +EXTERN int fs_attributes; +EXTERN int fs_inode_nlink; +EXTERN int fs_quotas; +EXTERN int fs_aligned_inodes; +EXTERN int fs_sb_feature_bits; +EXTERN int fs_has_extflgbit; +EXTERN int fs_shared; + +/* + * inode chunk alignment, fsblocks + */ + +EXTERN xfs_extlen_t fs_ino_alignment; + +/* + * modify superblock to reflect current state of global fs + * feature vars above + */ +void update_sb_version(xfs_mount_t *mp); + +/* + * parse current sb to set above feature vars + */ +int parse_sb_version(xfs_sb_t *sb); + +#endif /* _XR_VERSIONS_H */ diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c new file mode 100644 index 000000000..9f3203156 --- /dev/null +++ b/repair/xfs_repair.c @@ -0,0 +1,582 @@ +/* + * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include +#include "avl.h" +#include "avl64.h" +#include "globals.h" +#include "versions.h" +#include "agheader.h" +#include "protos.h" +#include "incore.h" +#include "err_protos.h" + +#define rounddown(x, y) (((x)/(y))*(y)) + +extern void phase1(xfs_mount_t *); +extern void phase2(xfs_mount_t *, libxfs_init_t *); +extern void phase3(xfs_mount_t *); +extern void phase4(xfs_mount_t *); +extern void phase5(xfs_mount_t *); +extern void phase6(xfs_mount_t *); +extern void phase7(xfs_mount_t *); +extern void incore_init(xfs_mount_t *); + +#define XR_MAX_SECT_SIZE (64 * 1024) + +/* + * option tables for getsubopt calls + */ + +/* + * -o (user-supplied override options) + */ + +char *o_opts[] = { +#define ASSUME_XFS 0 + "assume_xfs", +#define PRE_65_BETA 1 + "fs_is_pre_65_beta", + NULL +}; + +static void +usage(void) +{ + do_warn("Usage: %s [-nV] [-o subopt[=value]] [-l logdevice] devname\n", + progname); + exit(1); +} + +static char *err_message[] = { + "no error", + "bad magic number", + "bad blocksize field", + "bad blocksize log field", + "bad version number", + "filesystem mkfs-in-progress bit set", + "inconsistent filesystem geometry information", + "bad inode size or inconsistent with number of inodes/block", + "bad sector size", + "AGF geometry info conflicts with filesystem geometry", + "AGI geometry info conflicts with filesystem geometry", + "AG superblock geometry info conflicts with filesystem geometry", + "attempted to perform I/O beyond EOF", + "inconsistent filesystem geometry in realtime filesystem component", + "maximum indicated percentage of inodes > 100%", + "inconsistent inode alignment value", + "not enough secondary superblocks with matching geometry", + "bad stripe unit in superblock", + "bad stripe width in superblock", + "bad shared version number in superblock" +}; + +char * +err_string(int err_code) +{ + if (err_code < XR_OK || err_code >= XR_BAD_ERR_CODE) + do_abort("bad error code - %d\n", err_code); + + return(err_message[err_code]); +} + +static void +noval(char opt, char *tbl[], int idx) +{ + do_warn("-%c %s option cannot have a value\n", opt, tbl[idx]); + usage(); +} + +static void +respec(char opt, char *tbl[], int idx) +{ + do_warn("-%c ", opt); + if (tbl) + do_warn("%s ", tbl[idx]); + do_warn("option respecified\n"); + usage(); +} + +static void +unknown(char opt, char *s) +{ + do_warn("unknown option -%c %s\n", opt, s); + usage(); +} + +/* + * sets only the global argument flags and variables + */ +void +process_args(int argc, char **argv) +{ + char *p; + int c; + + log_spec = 0; + fs_is_dirty = 0; + verbose = 0; + no_modify = 0; + isa_file = 0; + dumpcore = 0; + full_backptrs = 0; + delete_attr_ok = 1; + force_geo = 0; + assume_xfs = 0; + clear_sunit = 0; + sb_inoalignmt = 0; + sb_unit = 0; + sb_width = 0; + fs_attributes_allowed = 1; + fs_inode_nlink_allowed = 1; + fs_quotas_allowed = 1; + fs_aligned_inodes_allowed = 1; + fs_sb_feature_bits_allowed = 1; + fs_has_extflgbit_allowed = 1; + pre_65_beta = 0; + fs_shared_allowed = 1; + + /* + * XXX have to add suboption processing here + * attributes, quotas, nlinks, aligned_inos, sb_fbits + */ + while ((c = getopt(argc, argv, "o:fnDvVl:")) != EOF) { + switch (c) { + case 'D': + dumpcore = 1; + break; + case 'o': + p = optarg; + while (*p != '\0') { + char *val; + + switch (getsubopt(&p, (constpp)o_opts, &val)) { + case ASSUME_XFS: + if (val) + noval('o', o_opts, ASSUME_XFS); + if (assume_xfs) + respec('o', o_opts, ASSUME_XFS); + assume_xfs = 1; + break; + case PRE_65_BETA: + if (val) + noval('o', o_opts, PRE_65_BETA); + if (pre_65_beta) + respec('o', o_opts, + PRE_65_BETA); + pre_65_beta = 1; + break; + default: + unknown('o', val); + break; + } + } + break; + case 'l': + log_name = optarg; + log_spec = 1; + break; + case 'f': + isa_file = 1; + break; + case 'n': + no_modify = 1; + break; + case 'v': + verbose = 1; + break; + case 'V': + printf("%s version %s\n", progname, VERSION); + break; + case '?': + usage(); + } + } + + if (argc - optind != 1) + usage(); + + if ((fs_name = argv[optind]) == NULL) + usage(); +} + +void +do_msg(int do_abort, char const *msg, va_list args) +{ + vfprintf(stderr, msg, args); + + if (do_abort) { + if (dumpcore) + abort(); + exit(1); + } +} + +void +do_error(char const *msg, ...) +{ + va_list args; + + fprintf(stderr, "\nfatal error -- "); + + va_start(args, msg); + do_msg(1, msg, args); +} + +/* + * like do_error, only the error is internal, no system + * error so no oserror processing + */ +void +do_abort(char const *msg, ...) +{ + va_list args; + + va_start(args, msg); + do_msg(1, msg, args); +} + +void +do_warn(char const *msg, ...) +{ + va_list args; + + fs_is_dirty = 1; + + va_start(args, msg); + do_msg(0, msg, args); + va_end(args); +} + +/* no formatting */ + +void +do_log(char const *msg, ...) +{ + va_list args; + + va_start(args, msg); + do_msg(0, msg, args); + va_end(args); +} + +void +calc_mkfs(xfs_mount_t *mp) +{ + xfs_agblock_t fino_bno; + int do_inoalign; + + do_inoalign = mp->m_sinoalign; + + /* + * pre-calculate geometry of ag 0. We know what it looks + * like because we know what mkfs does -- 3 btree roots, + * and some number of blocks to prefill the agfl. + */ + bnobt_root = howmany(4 * mp->m_sb.sb_sectsize, mp->m_sb.sb_blocksize); + bcntbt_root = bnobt_root + 1; + inobt_root = bnobt_root + 2; + fino_bno = inobt_root + XFS_MIN_FREELIST_RAW(1, 1, mp) + 1; + + /* + * ditto the location of the first inode chunks in the fs ('/') + */ + if (XFS_SB_VERSION_HASDALIGN(&mp->m_sb) && do_inoalign) { + first_prealloc_ino = XFS_OFFBNO_TO_AGINO(mp, roundup(fino_bno, + mp->m_sb.sb_unit), 0); + } else if (XFS_SB_VERSION_HASALIGN(&mp->m_sb) && + mp->m_sb.sb_inoalignmt > 1) { + first_prealloc_ino = XFS_OFFBNO_TO_AGINO(mp, + roundup(fino_bno, + mp->m_sb.sb_inoalignmt), + 0); + } else { + first_prealloc_ino = XFS_OFFBNO_TO_AGINO(mp, fino_bno, 0); + } + + ASSERT(XFS_IALLOC_BLOCKS(mp) > 0); + + if (XFS_IALLOC_BLOCKS(mp) > 1) + last_prealloc_ino = first_prealloc_ino + XFS_INODES_PER_CHUNK; + else + last_prealloc_ino = XFS_OFFBNO_TO_AGINO(mp, fino_bno + 1, 0); + + /* + * now the first 3 inodes in the system + */ + if (mp->m_sb.sb_rootino != first_prealloc_ino) { + do_warn( + "sb root inode value %llu inconsistent with calculated value %llu\n", + mp->m_sb.sb_rootino, first_prealloc_ino); + + if (!no_modify) + do_warn( + "resetting superblock root inode pointer to %llu\n", + first_prealloc_ino); + else + do_warn( + "would reset superblock root inode pointer to %llu\n", + first_prealloc_ino); + + /* + * just set the value -- safe since the superblock + * doesn't get flushed out if no_modify is set + */ + mp->m_sb.sb_rootino = first_prealloc_ino; + } + + if (mp->m_sb.sb_rbmino != first_prealloc_ino + 1) { + do_warn( +"sb realtime bitmap inode %llu inconsistent with calculated value %llu\n", + mp->m_sb.sb_rbmino, first_prealloc_ino + 1); + + if (!no_modify) + do_warn( + "resetting superblock realtime bitmap ino pointer to %llu\n", + first_prealloc_ino + 1); + else + do_warn( + "would reset superblock realtime bitmap ino pointer to %llu\n", + first_prealloc_ino + 1); + + /* + * just set the value -- safe since the superblock + * doesn't get flushed out if no_modify is set + */ + mp->m_sb.sb_rbmino = first_prealloc_ino + 1; + } + + if (mp->m_sb.sb_rsumino != first_prealloc_ino + 2) { + do_warn( +"sb realtime summary inode %llu inconsistent with calculated value %llu\n", + mp->m_sb.sb_rsumino, first_prealloc_ino + 2); + + if (!no_modify) + do_warn( + "resetting superblock realtime summary ino pointer to %llu\n", + first_prealloc_ino + 2); + else + do_warn( + "would reset superblock realtime summary ino pointer to %llu\n", + first_prealloc_ino + 2); + + /* + * just set the value -- safe since the superblock + * doesn't get flushed out if no_modify is set + */ + mp->m_sb.sb_rsumino = first_prealloc_ino + 2; + } + +} + +int +main(int argc, char **argv) +{ + libxfs_init_t args; + xfs_mount_t *temp_mp; + xfs_mount_t *mp; + xfs_sb_t *sb; + xfs_buf_t *sbp; + xfs_mount_t xfs_m; + + progname = basename(argv[0]); + + temp_mp = &xfs_m; + setbuf(stdout, NULL); + + process_args(argc, argv); + xfs_init(&args); + + /* do phase1 to make sure we have a superblock */ + phase1(temp_mp); + + if (no_modify && primary_sb_modified) { + do_warn("primary superblock would have been modified.\n"); + do_warn("cannot proceed further in no_modify mode.\n"); + do_warn("exiting now.\n"); + exit(1); + } + + /* prepare the mount structure */ + sbp = libxfs_readbuf(args.ddev, XFS_SB_DADDR, 1, 0); + memset(&xfs_m, 0, sizeof(xfs_mount_t)); + sb = &xfs_m.m_sb; + libxfs_xlate_sb(XFS_BUF_PTR(sbp), sb, 1, ARCH_CONVERT, XFS_SB_ALL_BITS); + + mp = libxfs_mount(&xfs_m, sb, args.ddev, args.logdev, args.rtdev, 0); + + if (!mp) { + fprintf(stderr, "%s: cannot repair this filesystem. Sorry.\n", + progname); + exit(1); + } + libxfs_putbuf(sbp); + + /* + * set XFS-independent status vars from the mount/sb structure + */ + glob_agcount = mp->m_sb.sb_agcount; + + chunks_pblock = mp->m_sb.sb_inopblock / XFS_INODES_PER_CHUNK; + max_symlink_blocks = howmany(MAXPATHLEN - 1, mp->m_sb.sb_blocksize); + inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog; + + /* + * calculate what mkfs would do to this filesystem + */ + calc_mkfs(mp); + + /* + * check sb filesystem stats and initialize in-core data structures + */ + incore_init(mp); + + if (parse_sb_version(&mp->m_sb)) { + do_warn( + "Found unsupported filesystem features. Exiting now.\n"); + return(1); + } + + /* make sure the per-ag freespace maps are ok so we can mount the fs */ + + phase2(mp, &args); + + phase3(mp); + + phase4(mp); + + if (no_modify) + printf("No modify flag set, skipping phase 5\n"); + else + phase5(mp); + + if (!bad_ino_btree) { + phase6(mp); + + phase7(mp); + } else { + do_warn( + "Inode allocation btrees are too corrupted, skipping phases 6 and 7\n"); + } + + if (lost_quotas && !have_uquotino && !have_pquotino) { + if (!no_modify) { + do_warn( + "Warning: no quota inodes were found. Quotas disabled.\n"); + } else { + do_warn( + "Warning: no quota inodes were found. Quotas would be disabled.\n"); + } + } else if (lost_quotas) { + if (!no_modify) { + do_warn( + "Warning: quota inodes were cleared. Quotas disabled.\n"); + } else { + do_warn( +"Warning: quota inodes would be cleared. Quotas would be disabled.\n"); + } + } else { + if (lost_uquotino) { + if (!no_modify) { + do_warn( + "Warning: user quota information was cleared.\n"); + do_warn( +"User quotas can not be enforced until limit information is recreated.\n"); + } else { + do_warn( + "Warning: user quota information would be cleared.\n"); + do_warn( +"User quotas could not be enforced until limit information was recreated.\n"); + } + } + + if (lost_pquotino) { + if (!no_modify) { + do_warn( + "Warning: project quota information was cleared.\n"); + do_warn( +"Project quotas can not be enforced until limit information is recreated.\n"); + } else { + do_warn( + "Warning: project quota information would be cleared.\n"); + do_warn( +"Project quotas could not be enforced until limit information was recreated.\n"); + } + } + } + + if (no_modify) { + do_log( + "No modify flag set, skipping filesystem flush and exiting.\n"); + if (fs_is_dirty) + return(1); + + return(0); + } + + /* + * Clear the quota flags if they're on. + */ + sbp = libxfs_getsb(mp, 0); + if (!sbp) + do_error("couldn't get superblock\n"); + + sb = XFS_BUF_TO_SBP(sbp); + + if (sb->sb_qflags & (XFS_UQUOTA_CHKD|XFS_PQUOTA_CHKD)) { + do_warn( + "Note - quota info will be regenerated on next quota mount.\n"); + sb->sb_qflags &= ~(XFS_UQUOTA_CHKD|XFS_PQUOTA_CHKD); + } + + if (clear_sunit) { + do_warn( +"Note - stripe unit (%d) and width (%d) fields have been reset.\n" +"Please set with mount -o sunit=,swidth=\n", + sb->sb_unit, sb->sb_width); + sb->sb_unit = 0; + sb->sb_width = 0; + } + + libxfs_writebuf(sbp, 0); + + libxfs_umount(mp); + if (args.rtdev) + libxfs_device_close(args.rtdev); + if (args.logdev) + libxfs_device_close(args.logdev); + libxfs_device_close(args.ddev); + + do_log("done\n"); + + return(0); +} -- 2.39.5