2 * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
28 * For further information regarding this notice, see:
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
34 * contains definition information. implementation (code)
35 * is spread out in separate files.
39 * block allocation lists
41 typedef struct ba_rec
{
46 void record_allocation(ba_rec_t
*addr
, ba_rec_t
*list
);
47 void free_allocations(ba_rec_t
*list
);
50 * block bit map defs -- track state of each filesystem block.
51 * ba_bmap is an array of bitstrings declared in the globals.h file.
52 * the bitstrings are broken up into 64-bit chunks. one bitstring per AG.
54 #define BA_BMAP_SIZE(x) (howmany(x, 4))
56 void set_bmap_rt(xfs_drfsbno_t numblocks
);
57 void set_bmap_log(xfs_mount_t
*mp
);
58 void set_bmap_fs(xfs_mount_t
*mp
);
59 void teardown_bmap(xfs_mount_t
*mp
);
61 void teardown_rt_bmap(xfs_mount_t
*mp
);
62 void teardown_ag_bmap(xfs_mount_t
*mp
, xfs_agnumber_t agno
);
63 void teardown_bmap_finish(xfs_mount_t
*mp
);
65 /* blocks are numbered from zero */
67 /* block records fit into __uint64_t's units */
69 #define XR_BB_UNIT 64 /* number of bits/unit */
70 #define XR_BB 4 /* bits per block record */
71 #define XR_BB_NUM (XR_BB_UNIT/XR_BB) /* number of records per unit */
72 #define XR_BB_MASK 0xF /* block record mask */
75 * bitstring ops -- set/get block states, either in filesystem
76 * bno's or in agbno's. turns out that fsbno addressing is
77 * more convenient when dealing with bmap extracted addresses
78 * and agbno addressing is more convenient when dealing with
79 * meta-data extracted addresses. So the fsbno versions use
80 * mtype (which can be one of the block map types above) to
81 * set the correct block map while the agbno versions assume
82 * you want to use the regular block map.
85 #if defined(XR_BMAP_TRACE) || defined(XR_BMAP_DBG)
87 * implemented as functions for debugging purposes
89 int get_agbno_state(xfs_mount_t
*mp
, xfs_agnumber_t agno
,
90 xfs_agblock_t ag_blockno
);
91 void set_agbno_state(xfs_mount_t
*mp
, xfs_agnumber_t agno
,
92 xfs_agblock_t ag_blockno
, int state
);
94 int get_fsbno_state(xfs_mount_t
*mp
, xfs_dfsbno_t blockno
);
95 void set_fsbno_state(xfs_mount_t
*mp
, xfs_dfsbno_t blockno
, int state
);
98 * implemented as macros for performance purposes
101 #define get_agbno_state(mp, agno, ag_blockno) \
102 ((int) (*(ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM) \
103 >> (((ag_blockno)%XR_BB_NUM)*XR_BB)) \
105 #define set_agbno_state(mp, agno, ag_blockno, state) \
106 *(ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM) = \
107 ((*(ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM) & \
108 (~((__uint64_t) XR_BB_MASK << (((ag_blockno)%XR_BB_NUM)*XR_BB)))) | \
109 (((__uint64_t) (state)) << (((ag_blockno)%XR_BB_NUM)*XR_BB)))
111 #define get_fsbno_state(mp, blockno) \
112 get_agbno_state(mp, XFS_FSB_TO_AGNO(mp, (blockno)), \
113 XFS_FSB_TO_AGBNO(mp, (blockno)))
114 #define set_fsbno_state(mp, blockno, state) \
115 set_agbno_state(mp, XFS_FSB_TO_AGNO(mp, (blockno)), \
116 XFS_FSB_TO_AGBNO(mp, (blockno)), (state))
119 #define get_agbno_rec(mp, agno, ag_blockno) \
120 (*(ba_bmap[(agno)] + (ag_blockno)/XR_BB_NUM))
121 #endif /* XR_BMAP_TRACE */
124 * these work in real-time extents (e.g. fsbno == rt extent number)
126 #define get_rtbno_state(mp, fsbno) \
127 ((*(rt_ba_bmap + (fsbno)/XR_BB_NUM) >> \
128 (((fsbno)%XR_BB_NUM)*XR_BB)) & XR_BB_MASK)
129 #define set_rtbno_state(mp, fsbno, state) \
130 *(rt_ba_bmap + (fsbno)/XR_BB_NUM) = \
131 ((*(rt_ba_bmap + (fsbno)/XR_BB_NUM) & \
132 (~((__uint64_t) XR_BB_MASK << (((fsbno)%XR_BB_NUM)*XR_BB)))) | \
133 (((__uint64_t) (state)) << (((fsbno)%XR_BB_NUM)*XR_BB)))
137 * extent tree definitions
138 * right now, there are 3 trees per AG, a bno tree, a bcnt tree
139 * and a tree for dup extents. If the code is modified in the
140 * future to use an extent tree instead of a bitmask for tracking
141 * fs blocks, then we could lose the dup extent tree if we labelled
142 * each extent with the inode that owned it.
145 typedef unsigned char extent_state_t
;
147 typedef struct extent_tree_node
{
149 xfs_agblock_t ex_startblock
; /* starting block (agbno) */
150 xfs_extlen_t ex_blockcount
; /* number of blocks in extent */
151 extent_state_t ex_state
; /* see state flags below */
153 struct extent_tree_node
*next
; /* for bcnt extent lists */
155 xfs_ino_t ex_inode
; /* owner, NULL if free or */
156 /* multiply allocated */
158 } extent_tree_node_t
;
160 typedef struct rt_extent_tree_node
{
162 xfs_drtbno_t rt_startblock
; /* starting realtime block */
163 xfs_extlen_t rt_blockcount
; /* number of blocks in extent */
164 extent_state_t rt_state
; /* see state flags below */
167 xfs_ino_t ex_inode
; /* owner, NULL if free or */
168 /* multiply allocated */
170 } rt_extent_tree_node_t
;
172 /* extent states, prefix with XR_ to avoid conflict with buffer cache defines */
174 #define XR_E_UNKNOWN 0 /* unknown state */
175 #define XR_E_FREE1 1 /* free block (marked by one fs space tree) */
176 #define XR_E_FREE 2 /* free block (marked by both fs space trees) */
177 #define XR_E_INUSE 3 /* extent used by file/dir data or metadata */
178 #define XR_E_INUSE_FS 4 /* extent used by fs ag header or log */
179 #define XR_E_MULT 5 /* extent is multiply referenced */
180 #define XR_E_INO 6 /* extent used by inodes (inode blocks) */
181 #define XR_E_FS_MAP 7 /* extent used by fs space/inode maps */
182 #define XR_E_BAD_STATE 8
184 /* separate state bit, OR'ed into high (4th) bit of ex_state field */
186 #define XR_E_WRITTEN 0x8 /* extent has been written out, can't reclaim */
187 #define good_state(state) (((state) & (~XR_E_WRITTEN)) >= XR_E_UNKNOWN && \
188 ((state) & (~XR_E_WRITTEN) < XF_E_BAD_STATE))
189 #define written(state) ((state) & XR_E_WRITTEN)
190 #define set_written(state) (state) &= XR_E_WRITTEN
193 * bno extent tree functions
196 add_bno_extent(xfs_agnumber_t agno
, xfs_agblock_t startblock
,
197 xfs_extlen_t blockcount
);
200 findfirst_bno_extent(xfs_agnumber_t agno
);
203 find_bno_extent(xfs_agnumber_t agno
, xfs_agblock_t agbno
);
206 findfirst_bno_extent(xfs_agnumber_t agno
);
208 #define findnext_bno_extent(exent_ptr) \
209 ((extent_tree_node_t *) ((exent_ptr)->avl_node.avl_nextino))
212 get_bno_extent(xfs_agnumber_t agno
, extent_tree_node_t
*ext
);
215 * bcnt tree functions
218 add_bcnt_extent(xfs_agnumber_t agno
, xfs_agblock_t startblock
,
219 xfs_extlen_t blockcount
);
222 findfirst_bcnt_extent(xfs_agnumber_t agno
);
225 find_bcnt_extent(xfs_agnumber_t agno
, xfs_agblock_t agbno
);
228 findbiggest_bcnt_extent(xfs_agnumber_t agno
);
231 findnext_bcnt_extent(xfs_agnumber_t agno
, extent_tree_node_t
*ext
);
234 get_bcnt_extent(xfs_agnumber_t agno
, xfs_agblock_t startblock
,
235 xfs_extlen_t blockcount
);
238 * duplicate extent tree functions
240 void add_dup_extent(xfs_agnumber_t agno
,
241 xfs_agblock_t startblock
,
242 xfs_extlen_t blockcount
);
244 int search_dup_extent(xfs_mount_t
*mp
,
246 xfs_agblock_t agbno
);
248 void add_rt_dup_extent(xfs_drtbno_t startblock
,
249 xfs_extlen_t blockcount
);
251 int search_rt_dup_extent(xfs_mount_t
*mp
,
255 * extent/tree recyling and deletion routines
259 * return an extent node to the extent node free list
261 void release_extent_tree_node(extent_tree_node_t
*node
);
264 * recycle all the nodes in the per-AG tree
266 void release_dup_extent_tree(xfs_agnumber_t agno
);
267 void release_agbno_extent_tree(xfs_agnumber_t agno
);
268 void release_agbcnt_extent_tree(xfs_agnumber_t agno
);
271 * realtime duplicate extent tree - this one actually frees the memory
273 void free_rt_dup_extent_tree(xfs_mount_t
*mp
);
276 * per-AG extent trees shutdown routine -- all (bno, bcnt and dup)
277 * at once. this one actually frees the memory instead of just recyling
280 void incore_ext_teardown(xfs_mount_t
*mp
);
288 #define XR_INO_UNKNOWN 0 /* unknown */
289 #define XR_INO_DIR 1 /* directory */
290 #define XR_INO_RTDATA 2 /* realtime file */
291 #define XR_INO_RTBITMAP 3 /* realtime bitmap inode */
292 #define XR_INO_RTSUM 4 /* realtime summary inode */
293 #define XR_INO_DATA 5 /* regular file */
294 #define XR_INO_SYMLINK 6 /* symlink */
295 #define XR_INO_CHRDEV 7 /* character device */
296 #define XR_INO_BLKDEV 8 /* block device */
297 #define XR_INO_SOCK 9 /* socket */
298 #define XR_INO_FIFO 10 /* fifo */
299 #define XR_INO_MOUNTPOINT 11 /* mountpoint */
301 /* inode allocation tree */
304 * Inodes in the inode allocation trees are allocated in chunks.
305 * Those groups can be easily duplicated in our trees.
306 * Disconnected inodes are harder. We can do one of two
307 * things in that case: if we know the inode allocation btrees
308 * are good, then we can disallow directory references to unknown
309 * inode chunks. If the inode allocation trees have been trashed or
310 * we feel like being aggressive, then as we hit unknown inodes,
311 * we can search on the disk for all contiguous inodes and see if
312 * they fit into chunks. Before putting them into the inode tree,
313 * we can scan each inode starting at the earliest inode to see which
314 * ones are good. This protects us from the pathalogical case of
315 * inodes appearing in user-data. We still may have to mark the
316 * inodes as "possibly fake" so that if a file claims the blocks,
317 * we decide to believe the inodes, especially if they're not
321 #define PLIST_CHUNK_SIZE 4
323 typedef xfs_ino_t parent_entry_t
;
325 typedef struct parent_list
{
327 parent_entry_t
*pentries
;
333 typedef struct backptrs
{
334 __uint64_t ino_reached
; /* bit == 1 if reached */
335 __uint64_t ino_processed
; /* reference checked bit mask */
336 __uint32_t nlinks
[XFS_INODES_PER_CHUNK
];
337 parent_list_t
*parents
;
340 typedef struct ino_tree_node
{
342 xfs_agino_t ino_startnum
; /* starting inode # */
343 xfs_inofree_t ir_free
; /* inode free bit mask */
344 __uint64_t ino_confirmed
; /* confirmed bitmask */
345 __uint64_t ino_isa_dir
; /* bit == 1 if a directory */
347 backptrs_t
*backptrs
;
348 parent_list_t
*plist
;
352 #define INOS_PER_IREC (sizeof(__uint64_t) * NBBY)
353 void add_ino_backptrs(xfs_mount_t
*mp
);
356 * return an inode record to the free inode record pool
358 void free_inode_rec(xfs_agnumber_t agno
, ino_tree_node_t
*ino_rec
);
361 * get pulls the inode record from the good inode tree
363 void get_inode_rec(xfs_agnumber_t agno
, ino_tree_node_t
*ino_rec
);
365 ino_tree_node_t
*findfirst_inode_rec(xfs_agnumber_t agno
);
366 ino_tree_node_t
*find_inode_rec(xfs_agnumber_t agno
, xfs_agino_t ino
);
367 void find_inode_rec_range(xfs_agnumber_t agno
,
368 xfs_agino_t start_ino
, xfs_agino_t end_ino
,
369 ino_tree_node_t
**first
, ino_tree_node_t
**last
);
372 * set inode states -- setting an inode to used or free also
373 * automatically marks it as "existing". Note -- all the inode
374 * add/set/get routines assume a valid inode number.
376 ino_tree_node_t
*set_inode_used_alloc(xfs_agnumber_t agno
, xfs_agino_t ino
);
377 ino_tree_node_t
*set_inode_free_alloc(xfs_agnumber_t agno
, xfs_agino_t ino
);
379 void print_inode_list(xfs_agnumber_t agno
);
380 void print_uncertain_inode_list(xfs_agnumber_t agno
);
383 * separate trees for uncertain inodes (they may not exist).
385 ino_tree_node_t
*findfirst_uncertain_inode_rec(xfs_agnumber_t agno
);
386 ino_tree_node_t
*find_uncertain_inode_rec(xfs_agnumber_t agno
,
388 void add_inode_uncertain(xfs_mount_t
*mp
,
389 xfs_ino_t ino
, int free
);
390 void add_aginode_uncertain(xfs_agnumber_t agno
,
391 xfs_agino_t agino
, int free
);
392 void get_uncertain_inode_rec(xfs_agnumber_t agno
,
393 ino_tree_node_t
*ino_rec
);
394 void clear_uncertain_ino_cache(xfs_agnumber_t agno
);
397 * return next in-order inode tree node. takes an "ino_tree_node_t *"
399 #define next_ino_rec(ino_node_ptr) \
400 ((ino_tree_node_t *) ((ino_node_ptr)->avl_node.avl_nextino))
402 * return the next linked inode (forward avl tree link)-- meant to be used
403 * by linked list routines (uncertain inode routines/records)
405 #define next_link_rec(ino_node_ptr) \
406 ((ino_tree_node_t *) ((ino_node_ptr)->avl_node.avl_forw))
409 * Bit manipulations for processed field
411 #define XFS_INOPROC_MASK(i) ((__uint64_t)1 << (i))
412 #define XFS_INOPROC_MASKN(i,n) ((__uint64_t)((1 << (n)) - 1) << (i))
414 #define XFS_INOPROC_IS_PROC(rp, i) \
415 (((rp)->ino_un.backptrs->ino_processed & XFS_INOPROC_MASK((i))) == 0LL \
417 #define XFS_INOPROC_SET_PROC(rp, i) \
418 ((rp)->ino_un.backptrs->ino_processed |= XFS_INOPROC_MASK((i)))
420 #define XFS_INOPROC_CLR_PROC(rp, i) \
421 ((rp)->ino_un.backptrs->ino_processed &= ~XFS_INOPROC_MASK((i)))
425 * same for ir_confirmed.
427 #define XFS_INOCF_MASK(i) ((__uint64_t)1 << (i))
428 #define XFS_INOCF_MASKN(i,n) ((__uint64_t)((1 << (n)) - 1) << (i))
430 #define XFS_INOCF_IS_CF(rp, i) \
431 (((rp)->ino_confirmed & XFS_INOCF_MASK((i))) == 0LL \
433 #define XFS_INOCF_SET_CF(rp, i) \
434 ((rp)->ino_confirmed |= XFS_INOCF_MASK((i)))
435 #define XFS_INOCF_CLR_CF(rp, i) \
436 ((rp)->ino_confirmed &= ~XFS_INOCF_MASK((i)))
439 * same for backptr->ino_reached
441 #define XFS_INO_RCHD_MASK(i) ((__uint64_t)1 << (i))
443 #define XFS_INO_RCHD_IS_RCHD(rp, i) \
444 (((rp)->ino_un.backptrs->ino_reached & XFS_INO_RCHD_MASK((i))) == 0LL \
446 #define XFS_INO_RCHD_SET_RCHD(rp, i) \
447 ((rp)->ino_un.backptrs->ino_reached |= XFS_INO_RCHD_MASK((i)))
448 #define XFS_INO_RCHD_CLR_RCHD(rp, i) \
449 ((rp)->ino_un.backptrs->ino_reached &= ~XFS_INO_RCHD_MASK((i)))
451 * set/clear/test is inode a directory inode
453 #define XFS_INO_ISADIR_MASK(i) ((__uint64_t)1 << (i))
455 #define inode_isadir(ino_rec, ino_offset) \
456 (((ino_rec)->ino_isa_dir & XFS_INO_ISADIR_MASK((ino_offset))) == 0LL \
458 #define set_inode_isadir(ino_rec, ino_offset) \
459 ((ino_rec)->ino_isa_dir |= XFS_INO_ISADIR_MASK((ino_offset)))
460 #define clear_inode_isadir(ino_rec, ino_offset) \
461 ((ino_rec)->ino_isa_dir &= ~XFS_INO_ISADIR_MASK((ino_offset)))
465 * set/clear/test is inode known to be valid (although perhaps corrupt)
467 #define clear_inode_confirmed(ino_rec, ino_offset) \
468 XFS_INOCF_CLR_CF((ino_rec), (ino_offset))
470 #define set_inode_confirmed(ino_rec, ino_offset) \
471 XFS_INOCF_SET_CF((ino_rec), (ino_offset))
473 #define is_inode_confirmed(ino_rec, ino_offset) \
474 XFS_INOCF_IS_CF(ino_rec, ino_offset)
477 * set/clear/test is inode free or used
479 #define set_inode_free(ino_rec, ino_offset) \
480 XFS_INOCF_SET_CF((ino_rec), (ino_offset)), \
481 XFS_INOBT_SET_FREE((ino_rec), (ino_offset),(ARCH_NOCONVERT))
483 #define set_inode_used(ino_rec, ino_offset) \
484 XFS_INOCF_SET_CF((ino_rec), (ino_offset)), \
485 XFS_INOBT_CLR_FREE((ino_rec), (ino_offset),(ARCH_NOCONVERT))
487 #define is_inode_used(ino_rec, ino_offset) \
488 !XFS_INOBT_IS_FREE((ino_rec), (ino_offset),(ARCH_NOCONVERT))
490 #define is_inode_free(ino_rec, ino_offset) \
491 XFS_INOBT_IS_FREE((ino_rec), (ino_offset),(ARCH_NOCONVERT))
494 * add_inode_reached() is set on inode I only if I has been reached
495 * by an inode P claiming to be the parent and if I is a directory,
496 * the .. link in the I says that P is I's parent.
498 * add_inode_ref() is called every time a link to an inode is
499 * detected and drop_inode_ref() is called every time a link to
500 * an inode that we've counted is removed.
503 void add_inode_reached(ino_tree_node_t
*ino_rec
, int ino_offset
);
504 void add_inode_ref(ino_tree_node_t
*ino_rec
, int ino_offset
);
505 void drop_inode_ref(ino_tree_node_t
*ino_rec
, int ino_offset
);
506 int is_inode_reached(ino_tree_node_t
*ino_rec
, int ino_offset
);
507 int is_inode_referenced(ino_tree_node_t
*ino_rec
, int ino_offset
);
508 __uint32_t
num_inode_references(ino_tree_node_t
*ino_rec
, int ino_offset
);
511 * has an inode been processed for phase 6 (reference count checking)?
512 * add_inode_refchecked() is set on an inode when it gets traversed
513 * during the reference count phase (6). It's set so that if the inode
514 * is a directory, it's traversed (and it's links counted) only once.
516 #ifndef XR_INO_REF_DEBUG
517 #define add_inode_refchecked(ino, ino_rec, ino_offset) \
518 XFS_INOPROC_SET_PROC((ino_rec), (ino_offset))
519 #define is_inode_refchecked(ino, ino_rec, ino_offset) \
520 (XFS_INOPROC_IS_PROC(ino_rec, ino_offset) == 0LL ? 0 : 1)
522 void add_inode_refchecked(xfs_ino_t ino
,
523 ino_tree_node_t
*ino_rec
, int ino_offset
);
524 int is_inode_refchecked(xfs_ino_t ino
,
525 ino_tree_node_t
*ino_rec
, int ino_offset
);
526 #endif /* XR_INO_REF_DEBUG */
529 * set/get inode number of parent -- works for directory inodes only
531 void set_inode_parent(ino_tree_node_t
*irec
, int ino_offset
,
534 void clear_inode_parent(ino_tree_node_t
*irec
, int offset
);
536 xfs_ino_t
get_inode_parent(ino_tree_node_t
*irec
, int ino_offset
);
539 * bmap cursor for tracking and fixing bmap btrees. All xfs btrees number
540 * the levels with 0 being the leaf and every level up being 1 greater.
543 #define XR_MAX_BMLEVELS 10 /* XXX - rcc need to verify number */
545 typedef struct bm_level_state
{
547 xfs_dfsbno_t left_fsbno
;
548 xfs_dfsbno_t right_fsbno
;
549 __uint64_t first_key
;
553 __uint64_t prev_last_key;
555 xfs_bmbt_block_t *block;
559 typedef struct bm_cursor
{
563 bm_level_state_t level
[XR_MAX_BMLEVELS
];
566 void init_bm_cursor(bmap_cursor_t
*cursor
, int num_level
);