2 * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
28 * For further information regarding this notice, see:
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
38 * Routines to implement leaf blocks of attributes as Btrees of hashed names.
41 /*========================================================================
42 * Routines used for growing the Btree.
43 *========================================================================*/
46 * Create the initial contents of a leaf attribute list
47 * or a leaf in a node attribute list.
50 xfs_attr_leaf_create(xfs_da_args_t
*args
, xfs_dablk_t blkno
, xfs_dabuf_t
**bpp
)
52 xfs_attr_leafblock_t
*leaf
;
53 xfs_attr_leaf_hdr_t
*hdr
;
60 error
= xfs_da_get_buf(args
->trans
, args
->dp
, blkno
, -1, &bp
,
66 bzero((char *)leaf
, XFS_LBSIZE(dp
->i_mount
));
68 INT_SET(hdr
->info
.magic
, ARCH_CONVERT
, XFS_ATTR_LEAF_MAGIC
);
69 INT_SET(hdr
->firstused
, ARCH_CONVERT
, XFS_LBSIZE(dp
->i_mount
));
70 if (INT_ISZERO(hdr
->firstused
, ARCH_CONVERT
)) {
71 INT_SET(hdr
->firstused
, ARCH_CONVERT
,
72 XFS_LBSIZE(dp
->i_mount
) - XFS_ATTR_LEAF_NAME_ALIGN
);
75 INT_SET(hdr
->freemap
[0].base
, ARCH_CONVERT
,
76 sizeof(xfs_attr_leaf_hdr_t
));
77 INT_SET(hdr
->freemap
[0].size
, ARCH_CONVERT
,
78 INT_GET(hdr
->firstused
, ARCH_CONVERT
)
79 - INT_GET(hdr
->freemap
[0].base
,
82 xfs_da_log_buf(args
->trans
, bp
, 0, XFS_LBSIZE(dp
->i_mount
) - 1);
89 * Split the leaf node, rebalance, then add the new entry.
92 xfs_attr_leaf_split(xfs_da_state_t
*state
, xfs_da_state_blk_t
*oldblk
,
93 xfs_da_state_blk_t
*newblk
)
99 * Allocate space for a new leaf node.
101 ASSERT(oldblk
->magic
== XFS_ATTR_LEAF_MAGIC
);
102 error
= xfs_da_grow_inode(state
->args
, &blkno
);
105 error
= xfs_attr_leaf_create(state
->args
, blkno
, &newblk
->bp
);
108 newblk
->blkno
= blkno
;
109 newblk
->magic
= XFS_ATTR_LEAF_MAGIC
;
112 * Rebalance the entries across the two leaves.
113 * NOTE: rebalance() currently depends on the 2nd block being empty.
115 xfs_attr_leaf_rebalance(state
, oldblk
, newblk
);
116 error
= xfs_da_blk_link(state
, oldblk
, newblk
);
121 * Save info on "old" attribute for "atomic rename" ops, leaf_add()
122 * modifies the index/blkno/rmtblk/rmtblkcnt fields to show the
123 * "new" attrs info. Will need the "old" info to remove it later.
125 * Insert the "new" entry in the correct block.
128 error
= xfs_attr_leaf_add(oldblk
->bp
, state
->args
);
130 error
= xfs_attr_leaf_add(newblk
->bp
, state
->args
);
133 * Update last hashval in each block since we added the name.
135 oldblk
->hashval
= xfs_attr_leaf_lasthash(oldblk
->bp
, NULL
);
136 newblk
->hashval
= xfs_attr_leaf_lasthash(newblk
->bp
, NULL
);
141 * Add a name to the leaf attribute list structure.
144 xfs_attr_leaf_add(xfs_dabuf_t
*bp
, xfs_da_args_t
*args
)
146 xfs_attr_leafblock_t
*leaf
;
147 xfs_attr_leaf_hdr_t
*hdr
;
148 xfs_attr_leaf_map_t
*map
;
149 int tablesize
, entsize
, sum
, tmp
, i
;
152 ASSERT(INT_GET(leaf
->hdr
.info
.magic
, ARCH_CONVERT
)
153 == XFS_ATTR_LEAF_MAGIC
);
154 ASSERT((args
->index
>= 0)
155 && (args
->index
<= INT_GET(leaf
->hdr
.count
, ARCH_CONVERT
)));
157 entsize
= xfs_attr_leaf_newentsize(args
,
158 args
->trans
->t_mountp
->m_sb
.sb_blocksize
, NULL
);
161 * Search through freemap for first-fit on new name length.
162 * (may need to figure in size of entry struct too)
164 tablesize
= (INT_GET(hdr
->count
, ARCH_CONVERT
) + 1)
165 * sizeof(xfs_attr_leaf_entry_t
)
166 + sizeof(xfs_attr_leaf_hdr_t
);
167 map
= &hdr
->freemap
[XFS_ATTR_LEAF_MAPSIZE
-1];
168 for (sum
= 0, i
= XFS_ATTR_LEAF_MAPSIZE
-1; i
>= 0; map
--, i
--) {
169 if (tablesize
> INT_GET(hdr
->firstused
, ARCH_CONVERT
)) {
170 sum
+= INT_GET(map
->size
, ARCH_CONVERT
);
173 if (INT_ISZERO(map
->size
, ARCH_CONVERT
))
174 continue; /* no space in this map */
176 if (INT_GET(map
->base
, ARCH_CONVERT
)
177 < INT_GET(hdr
->firstused
, ARCH_CONVERT
))
178 tmp
+= sizeof(xfs_attr_leaf_entry_t
);
179 if (INT_GET(map
->size
, ARCH_CONVERT
) >= tmp
) {
180 tmp
= xfs_attr_leaf_add_work(bp
, args
, i
);
183 sum
+= INT_GET(map
->size
, ARCH_CONVERT
);
187 * If there are no holes in the address space of the block,
188 * and we don't have enough freespace, then compaction will do us
189 * no good and we should just give up.
191 if (!hdr
->holes
&& (sum
< entsize
))
192 return(XFS_ERROR(ENOSPC
));
195 * Compact the entries to coalesce free space.
196 * This may change the hdr->count via dropping INCOMPLETE entries.
198 xfs_attr_leaf_compact(args
->trans
, bp
);
201 * After compaction, the block is guaranteed to have only one
202 * free region, in freemap[0]. If it is not big enough, give up.
204 if (INT_GET(hdr
->freemap
[0].size
, ARCH_CONVERT
)
205 < (entsize
+ sizeof(xfs_attr_leaf_entry_t
)))
206 return(XFS_ERROR(ENOSPC
));
208 return(xfs_attr_leaf_add_work(bp
, args
, 0));
212 * Add a name to a leaf attribute list structure.
215 xfs_attr_leaf_add_work(xfs_dabuf_t
*bp
, xfs_da_args_t
*args
, int mapindex
)
217 xfs_attr_leafblock_t
*leaf
;
218 xfs_attr_leaf_hdr_t
*hdr
;
219 xfs_attr_leaf_entry_t
*entry
;
220 xfs_attr_leaf_name_local_t
*name_loc
;
221 xfs_attr_leaf_name_remote_t
*name_rmt
;
222 xfs_attr_leaf_map_t
*map
;
227 ASSERT(INT_GET(leaf
->hdr
.info
.magic
, ARCH_CONVERT
)
228 == XFS_ATTR_LEAF_MAGIC
);
230 ASSERT((mapindex
>= 0) && (mapindex
< XFS_ATTR_LEAF_MAPSIZE
));
231 ASSERT((args
->index
>= 0)
232 && (args
->index
<= INT_GET(hdr
->count
, ARCH_CONVERT
)));
235 * Force open some space in the entry array and fill it in.
237 entry
= &leaf
->entries
[args
->index
];
238 if (args
->index
< INT_GET(hdr
->count
, ARCH_CONVERT
)) {
239 tmp
= INT_GET(hdr
->count
, ARCH_CONVERT
) - args
->index
;
240 tmp
*= sizeof(xfs_attr_leaf_entry_t
);
241 ovbcopy((char *)entry
, (char *)(entry
+1), tmp
);
242 xfs_da_log_buf(args
->trans
, bp
,
243 XFS_DA_LOGRANGE(leaf
, entry
, tmp
+ sizeof(*entry
)));
245 INT_MOD(hdr
->count
, ARCH_CONVERT
, 1);
248 * Allocate space for the new string (at the end of the run).
250 map
= &hdr
->freemap
[mapindex
];
251 mp
= args
->trans
->t_mountp
;
252 ASSERT(INT_GET(map
->base
, ARCH_CONVERT
) < XFS_LBSIZE(mp
));
253 ASSERT((INT_GET(map
->base
, ARCH_CONVERT
) & 0x3) == 0);
254 ASSERT(INT_GET(map
->size
, ARCH_CONVERT
)
255 >= xfs_attr_leaf_newentsize(args
,
256 mp
->m_sb
.sb_blocksize
, NULL
));
257 ASSERT(INT_GET(map
->size
, ARCH_CONVERT
) < XFS_LBSIZE(mp
));
258 ASSERT((INT_GET(map
->size
, ARCH_CONVERT
) & 0x3) == 0);
259 INT_MOD(map
->size
, ARCH_CONVERT
,
260 -xfs_attr_leaf_newentsize(args
, mp
->m_sb
.sb_blocksize
, &tmp
));
261 INT_SET(entry
->nameidx
, ARCH_CONVERT
,
262 INT_GET(map
->base
, ARCH_CONVERT
)
263 + INT_GET(map
->size
, ARCH_CONVERT
));
264 INT_SET(entry
->hashval
, ARCH_CONVERT
, args
->hashval
);
265 entry
->flags
= tmp
? XFS_ATTR_LOCAL
: 0;
266 entry
->flags
|= (args
->flags
& ATTR_ROOT
) ? XFS_ATTR_ROOT
: 0;
268 entry
->flags
|= XFS_ATTR_INCOMPLETE
;
269 if ((args
->blkno2
== args
->blkno
) &&
270 (args
->index2
<= args
->index
)) {
274 xfs_da_log_buf(args
->trans
, bp
,
275 XFS_DA_LOGRANGE(leaf
, entry
, sizeof(*entry
)));
276 ASSERT((args
->index
== 0) || (INT_GET(entry
->hashval
, ARCH_CONVERT
)
277 >= INT_GET((entry
-1)->hashval
,
279 ASSERT((args
->index
== INT_GET(hdr
->count
, ARCH_CONVERT
)-1) ||
280 (INT_GET(entry
->hashval
, ARCH_CONVERT
)
281 <= (INT_GET((entry
+1)->hashval
, ARCH_CONVERT
))));
284 * Copy the attribute name and value into the new space.
286 * For "remote" attribute values, simply note that we need to
287 * allocate space for the "remote" value. We can't actually
288 * allocate the extents in this transaction, and we can't decide
289 * which blocks they should be as we might allocate more blocks
290 * as part of this transaction (a split operation for example).
292 if (entry
->flags
& XFS_ATTR_LOCAL
) {
293 name_loc
= XFS_ATTR_LEAF_NAME_LOCAL(leaf
, args
->index
);
294 name_loc
->namelen
= args
->namelen
;
295 INT_SET(name_loc
->valuelen
, ARCH_CONVERT
, args
->valuelen
);
296 bcopy(args
->name
, (char *)name_loc
->nameval
, args
->namelen
);
297 bcopy(args
->value
, (char *)&name_loc
->nameval
[args
->namelen
],
298 INT_GET(name_loc
->valuelen
, ARCH_CONVERT
));
300 name_rmt
= XFS_ATTR_LEAF_NAME_REMOTE(leaf
, args
->index
);
301 name_rmt
->namelen
= args
->namelen
;
302 bcopy(args
->name
, (char *)name_rmt
->name
, args
->namelen
);
303 entry
->flags
|= XFS_ATTR_INCOMPLETE
;
305 INT_ZERO(name_rmt
->valuelen
, ARCH_CONVERT
);
306 INT_ZERO(name_rmt
->valueblk
, ARCH_CONVERT
);
308 args
->rmtblkcnt
= XFS_B_TO_FSB(mp
, args
->valuelen
);
310 xfs_da_log_buf(args
->trans
, bp
,
311 XFS_DA_LOGRANGE(leaf
, XFS_ATTR_LEAF_NAME(leaf
, args
->index
),
312 xfs_attr_leaf_entsize(leaf
, args
->index
)));
315 * Update the control info for this leaf node
317 if (INT_GET(entry
->nameidx
, ARCH_CONVERT
)
318 < INT_GET(hdr
->firstused
, ARCH_CONVERT
)) {
319 /* both on-disk, don't endian-flip twice */
320 hdr
->firstused
= entry
->nameidx
;
322 ASSERT(INT_GET(hdr
->firstused
, ARCH_CONVERT
)
323 >= ((INT_GET(hdr
->count
, ARCH_CONVERT
)
324 * sizeof(*entry
))+sizeof(*hdr
)));
325 tmp
= (INT_GET(hdr
->count
, ARCH_CONVERT
)-1)
326 * sizeof(xfs_attr_leaf_entry_t
)
327 + sizeof(xfs_attr_leaf_hdr_t
);
328 map
= &hdr
->freemap
[0];
329 for (i
= 0; i
< XFS_ATTR_LEAF_MAPSIZE
; map
++, i
++) {
330 if (INT_GET(map
->base
, ARCH_CONVERT
) == tmp
) {
331 INT_MOD(map
->base
, ARCH_CONVERT
,
332 sizeof(xfs_attr_leaf_entry_t
));
333 INT_MOD(map
->size
, ARCH_CONVERT
,
334 -sizeof(xfs_attr_leaf_entry_t
));
337 INT_MOD(hdr
->usedbytes
, ARCH_CONVERT
,
338 xfs_attr_leaf_entsize(leaf
, args
->index
));
339 xfs_da_log_buf(args
->trans
, bp
,
340 XFS_DA_LOGRANGE(leaf
, hdr
, sizeof(*hdr
)));
345 * Garbage collect a leaf attribute list block by copying it to a new buffer.
348 xfs_attr_leaf_compact(xfs_trans_t
*trans
, xfs_dabuf_t
*bp
)
350 xfs_attr_leafblock_t
*leaf_s
, *leaf_d
;
351 xfs_attr_leaf_hdr_t
*hdr_s
, *hdr_d
;
355 mp
= trans
->t_mountp
;
356 tmpbuffer
= kmem_alloc(XFS_LBSIZE(mp
), KM_SLEEP
);
357 ASSERT(tmpbuffer
!= NULL
);
358 bcopy(bp
->data
, tmpbuffer
, XFS_LBSIZE(mp
));
359 bzero(bp
->data
, XFS_LBSIZE(mp
));
362 * Copy basic information
364 leaf_s
= (xfs_attr_leafblock_t
*)tmpbuffer
;
366 hdr_s
= &leaf_s
->hdr
;
367 hdr_d
= &leaf_d
->hdr
;
368 hdr_d
->info
= hdr_s
->info
; /* struct copy */
369 INT_SET(hdr_d
->firstused
, ARCH_CONVERT
, XFS_LBSIZE(mp
));
370 /* handle truncation gracefully */
371 if (INT_ISZERO(hdr_d
->firstused
, ARCH_CONVERT
)) {
372 INT_SET(hdr_d
->firstused
, ARCH_CONVERT
,
373 XFS_LBSIZE(mp
) - XFS_ATTR_LEAF_NAME_ALIGN
);
375 INT_ZERO(hdr_d
->usedbytes
, ARCH_CONVERT
);
376 INT_ZERO(hdr_d
->count
, ARCH_CONVERT
);
378 INT_SET(hdr_d
->freemap
[0].base
, ARCH_CONVERT
,
379 sizeof(xfs_attr_leaf_hdr_t
));
380 INT_SET(hdr_d
->freemap
[0].size
, ARCH_CONVERT
,
381 INT_GET(hdr_d
->firstused
, ARCH_CONVERT
)
382 - INT_GET(hdr_d
->freemap
[0].base
, ARCH_CONVERT
));
385 * Copy all entry's in the same (sorted) order,
386 * but allocate name/value pairs packed and in sequence.
388 xfs_attr_leaf_moveents(leaf_s
, 0, leaf_d
, 0,
389 (int)INT_GET(hdr_s
->count
, ARCH_CONVERT
), mp
);
391 xfs_da_log_buf(trans
, bp
, 0, XFS_LBSIZE(mp
) - 1);
393 kmem_free(tmpbuffer
, XFS_LBSIZE(mp
));
397 * Redistribute the attribute list entries between two leaf nodes,
398 * taking into account the size of the new entry.
400 * NOTE: if new block is empty, then it will get the upper half of the
401 * old block. At present, all (one) callers pass in an empty second block.
403 * This code adjusts the args->index/blkno and args->index2/blkno2 fields
404 * to match what it is doing in splitting the attribute leaf block. Those
405 * values are used in "atomic rename" operations on attributes. Note that
406 * the "new" and "old" values can end up in different blocks.
409 xfs_attr_leaf_rebalance(xfs_da_state_t
*state
, xfs_da_state_blk_t
*blk1
,
410 xfs_da_state_blk_t
*blk2
)
413 xfs_da_state_blk_t
*tmp_blk
;
414 xfs_attr_leafblock_t
*leaf1
, *leaf2
;
415 xfs_attr_leaf_hdr_t
*hdr1
, *hdr2
;
416 int count
, totallen
, max
, space
, swap
;
419 * Set up environment.
421 ASSERT(blk1
->magic
== XFS_ATTR_LEAF_MAGIC
);
422 ASSERT(blk2
->magic
== XFS_ATTR_LEAF_MAGIC
);
423 leaf1
= blk1
->bp
->data
;
424 leaf2
= blk2
->bp
->data
;
425 ASSERT(INT_GET(leaf1
->hdr
.info
.magic
, ARCH_CONVERT
)
426 == XFS_ATTR_LEAF_MAGIC
);
427 ASSERT(INT_GET(leaf2
->hdr
.info
.magic
, ARCH_CONVERT
)
428 == XFS_ATTR_LEAF_MAGIC
);
432 * Check ordering of blocks, reverse if it makes things simpler.
434 * NOTE: Given that all (current) callers pass in an empty
435 * second block, this code should never set "swap".
438 if (xfs_attr_leaf_order(blk1
->bp
, blk2
->bp
)) {
442 leaf1
= blk1
->bp
->data
;
443 leaf2
= blk2
->bp
->data
;
450 * Examine entries until we reduce the absolute difference in
451 * byte usage between the two blocks to a minimum. Then get
452 * the direction to copy and the number of elements to move.
454 * "inleaf" is true if the new entry should be inserted into blk1.
455 * If "swap" is also true, then reverse the sense of "inleaf".
457 state
->inleaf
= xfs_attr_leaf_figure_balance(state
, blk1
, blk2
,
460 state
->inleaf
= !state
->inleaf
;
463 * Move any entries required from leaf to leaf:
465 if (count
< INT_GET(hdr1
->count
, ARCH_CONVERT
)) {
467 * Figure the total bytes to be added to the destination leaf.
469 /* number entries being moved */
470 count
= INT_GET(hdr1
->count
, ARCH_CONVERT
) - count
;
471 space
= INT_GET(hdr1
->usedbytes
, ARCH_CONVERT
) - totallen
;
472 space
+= count
* sizeof(xfs_attr_leaf_entry_t
);
475 * leaf2 is the destination, compact it if it looks tight.
477 max
= INT_GET(hdr2
->firstused
, ARCH_CONVERT
)
478 - sizeof(xfs_attr_leaf_hdr_t
);
479 max
-= INT_GET(hdr2
->count
, ARCH_CONVERT
)
480 * sizeof(xfs_attr_leaf_entry_t
);
482 xfs_attr_leaf_compact(args
->trans
, blk2
->bp
);
486 * Move high entries from leaf1 to low end of leaf2.
488 xfs_attr_leaf_moveents(leaf1
,
489 INT_GET(hdr1
->count
, ARCH_CONVERT
)-count
,
490 leaf2
, 0, count
, state
->mp
);
492 xfs_da_log_buf(args
->trans
, blk1
->bp
, 0, state
->blocksize
-1);
493 xfs_da_log_buf(args
->trans
, blk2
->bp
, 0, state
->blocksize
-1);
494 } else if (count
> INT_GET(hdr1
->count
, ARCH_CONVERT
)) {
496 * I assert that since all callers pass in an empty
497 * second buffer, this code should never execute.
501 * Figure the total bytes to be added to the destination leaf.
503 /* number entries being moved */
504 count
-= INT_GET(hdr1
->count
, ARCH_CONVERT
);
505 space
= totallen
- INT_GET(hdr1
->usedbytes
, ARCH_CONVERT
);
506 space
+= count
* sizeof(xfs_attr_leaf_entry_t
);
509 * leaf1 is the destination, compact it if it looks tight.
511 max
= INT_GET(hdr1
->firstused
, ARCH_CONVERT
)
512 - sizeof(xfs_attr_leaf_hdr_t
);
513 max
-= INT_GET(hdr1
->count
, ARCH_CONVERT
)
514 * sizeof(xfs_attr_leaf_entry_t
);
516 xfs_attr_leaf_compact(args
->trans
, blk1
->bp
);
520 * Move low entries from leaf2 to high end of leaf1.
522 xfs_attr_leaf_moveents(leaf2
, 0, leaf1
,
523 (int)INT_GET(hdr1
->count
, ARCH_CONVERT
), count
,
526 xfs_da_log_buf(args
->trans
, blk1
->bp
, 0, state
->blocksize
-1);
527 xfs_da_log_buf(args
->trans
, blk2
->bp
, 0, state
->blocksize
-1);
531 * Copy out last hashval in each block for B-tree code.
534 INT_GET(leaf1
->entries
[INT_GET(leaf1
->hdr
.count
,
535 ARCH_CONVERT
)-1].hashval
, ARCH_CONVERT
);
537 INT_GET(leaf2
->entries
[INT_GET(leaf2
->hdr
.count
,
538 ARCH_CONVERT
)-1].hashval
, ARCH_CONVERT
);
541 * Adjust the expected index for insertion.
542 * NOTE: this code depends on the (current) situation that the
543 * second block was originally empty.
545 * If the insertion point moved to the 2nd block, we must adjust
546 * the index. We must also track the entry just following the
547 * new entry for use in an "atomic rename" operation, that entry
548 * is always the "old" entry and the "new" entry is what we are
549 * inserting. The index/blkno fields refer to the "old" entry,
550 * while the index2/blkno2 fields refer to the "new" entry.
552 if (blk1
->index
> INT_GET(leaf1
->hdr
.count
, ARCH_CONVERT
)) {
553 ASSERT(state
->inleaf
== 0);
554 blk2
->index
= blk1
->index
555 - INT_GET(leaf1
->hdr
.count
, ARCH_CONVERT
);
556 args
->index
= args
->index2
= blk2
->index
;
557 args
->blkno
= args
->blkno2
= blk2
->blkno
;
558 } else if (blk1
->index
== INT_GET(leaf1
->hdr
.count
, ARCH_CONVERT
)) {
560 args
->index
= blk1
->index
;
561 args
->blkno
= blk1
->blkno
;
563 args
->blkno2
= blk2
->blkno
;
565 blk2
->index
= blk1
->index
566 - INT_GET(leaf1
->hdr
.count
, ARCH_CONVERT
);
567 args
->index
= args
->index2
= blk2
->index
;
568 args
->blkno
= args
->blkno2
= blk2
->blkno
;
571 ASSERT(state
->inleaf
== 1);
572 args
->index
= args
->index2
= blk1
->index
;
573 args
->blkno
= args
->blkno2
= blk1
->blkno
;
578 * Examine entries until we reduce the absolute difference in
579 * byte usage between the two blocks to a minimum.
580 * GROT: Is this really necessary? With other than a 512 byte blocksize,
581 * GROT: there will always be enough room in either block for a new entry.
582 * GROT: Do a double-split for this case?
585 xfs_attr_leaf_figure_balance(xfs_da_state_t
*state
,
586 xfs_da_state_blk_t
*blk1
,
587 xfs_da_state_blk_t
*blk2
,
588 int *countarg
, int *usedbytesarg
)
590 xfs_attr_leafblock_t
*leaf1
, *leaf2
;
591 xfs_attr_leaf_hdr_t
*hdr1
, *hdr2
;
592 xfs_attr_leaf_entry_t
*entry
;
593 int count
, max
, index
, totallen
, half
;
594 int lastdelta
, foundit
, tmp
;
597 * Set up environment.
599 leaf1
= blk1
->bp
->data
;
600 leaf2
= blk2
->bp
->data
;
607 * Examine entries until we reduce the absolute difference in
608 * byte usage between the two blocks to a minimum.
610 max
= INT_GET(hdr1
->count
, ARCH_CONVERT
)
611 + INT_GET(hdr2
->count
, ARCH_CONVERT
);
612 half
= (max
+1) * sizeof(*entry
);
613 half
+= INT_GET(hdr1
->usedbytes
, ARCH_CONVERT
)
614 + INT_GET(hdr2
->usedbytes
, ARCH_CONVERT
)
615 + xfs_attr_leaf_newentsize(state
->args
,
616 state
->blocksize
, NULL
);
618 lastdelta
= state
->blocksize
;
619 entry
= &leaf1
->entries
[0];
620 for (count
= index
= 0; count
< max
; entry
++, index
++, count
++) {
622 #define XFS_ATTR_ABS(A) (((A) < 0) ? -(A) : (A))
624 * The new entry is in the first block, account for it.
626 if (count
== blk1
->index
) {
627 tmp
= totallen
+ sizeof(*entry
) +
628 xfs_attr_leaf_newentsize(state
->args
,
631 if (XFS_ATTR_ABS(half
- tmp
) > lastdelta
)
633 lastdelta
= XFS_ATTR_ABS(half
- tmp
);
639 * Wrap around into the second block if necessary.
641 if (count
== INT_GET(hdr1
->count
, ARCH_CONVERT
)) {
643 entry
= &leaf1
->entries
[0];
648 * Figure out if next leaf entry would be too much.
650 tmp
= totallen
+ sizeof(*entry
) + xfs_attr_leaf_entsize(leaf1
,
652 if (XFS_ATTR_ABS(half
- tmp
) > lastdelta
)
654 lastdelta
= XFS_ATTR_ABS(half
- tmp
);
660 * Calculate the number of usedbytes that will end up in lower block.
661 * If new entry not in lower block, fix up the count.
663 totallen
-= count
* sizeof(*entry
);
665 totallen
-= sizeof(*entry
) +
666 xfs_attr_leaf_newentsize(state
->args
,
672 *usedbytesarg
= totallen
;
676 /*========================================================================
677 * Routines used for shrinking the Btree.
678 *========================================================================*/
681 * Check a leaf block and its neighbors to see if the block should be
682 * collapsed into one or the other neighbor. Always keep the block
683 * with the smaller block number.
684 * If the current block is over 50% full, don't try to join it, return 0.
685 * If the block is empty, fill in the state structure and return 2.
686 * If it can be collapsed, fill in the state structure and return 1.
687 * If nothing can be done, return 0.
689 * GROT: allow for INCOMPLETE entries in calculation.
692 xfs_attr_leaf_toosmall(xfs_da_state_t
*state
, int *action
)
694 xfs_attr_leafblock_t
*leaf
;
695 xfs_da_state_blk_t
*blk
;
696 xfs_da_blkinfo_t
*info
;
697 int count
, bytes
, forward
, error
, retval
, i
;
702 * Check for the degenerate case of the block being over 50% full.
703 * If so, it's not worth even looking to see if we might be able
704 * to coalesce with a sibling.
706 blk
= &state
->path
.blk
[ state
->path
.active
-1 ];
707 info
= blk
->bp
->data
;
708 ASSERT(INT_GET(info
->magic
, ARCH_CONVERT
) == XFS_ATTR_LEAF_MAGIC
);
709 leaf
= (xfs_attr_leafblock_t
*)info
;
710 count
= INT_GET(leaf
->hdr
.count
, ARCH_CONVERT
);
711 bytes
= sizeof(xfs_attr_leaf_hdr_t
) +
712 count
* sizeof(xfs_attr_leaf_entry_t
) +
713 INT_GET(leaf
->hdr
.usedbytes
, ARCH_CONVERT
);
714 if (bytes
> (state
->blocksize
>> 1)) {
715 *action
= 0; /* blk over 50%, dont try to join */
720 * Check for the degenerate case of the block being empty.
721 * If the block is empty, we'll simply delete it, no need to
722 * coalesce it with a sibling block. We choose (aribtrarily)
723 * to merge with the forward block unless it is NULL.
727 * Make altpath point to the block we want to keep and
728 * path point to the block we want to drop (this one).
730 forward
= (!INT_ISZERO(info
->forw
, ARCH_CONVERT
));
731 bcopy(&state
->path
, &state
->altpath
, sizeof(state
->path
));
732 error
= xfs_da_path_shift(state
, &state
->altpath
, forward
,
745 * Examine each sibling block to see if we can coalesce with
746 * at least 25% free space to spare. We need to figure out
747 * whether to merge with the forward or the backward block.
748 * We prefer coalescing with the lower numbered sibling so as
749 * to shrink an attribute list over time.
751 /* start with smaller blk num */
752 forward
= (INT_GET(info
->forw
, ARCH_CONVERT
)
753 < INT_GET(info
->back
, ARCH_CONVERT
));
754 for (i
= 0; i
< 2; forward
= !forward
, i
++) {
756 blkno
= INT_GET(info
->forw
, ARCH_CONVERT
);
758 blkno
= INT_GET(info
->back
, ARCH_CONVERT
);
761 error
= xfs_da_read_buf(state
->args
->trans
, state
->args
->dp
,
762 blkno
, -1, &bp
, XFS_ATTR_FORK
);
767 leaf
= (xfs_attr_leafblock_t
*)info
;
768 count
= INT_GET(leaf
->hdr
.count
, ARCH_CONVERT
);
769 bytes
= state
->blocksize
- (state
->blocksize
>>2);
770 bytes
-= INT_GET(leaf
->hdr
.usedbytes
, ARCH_CONVERT
);
772 ASSERT(INT_GET(leaf
->hdr
.info
.magic
, ARCH_CONVERT
)
773 == XFS_ATTR_LEAF_MAGIC
);
774 count
+= INT_GET(leaf
->hdr
.count
, ARCH_CONVERT
);
775 bytes
-= INT_GET(leaf
->hdr
.usedbytes
, ARCH_CONVERT
);
776 bytes
-= count
* sizeof(xfs_attr_leaf_entry_t
);
777 bytes
-= sizeof(xfs_attr_leaf_hdr_t
);
778 xfs_da_brelse(state
->args
->trans
, bp
);
780 break; /* fits with at least 25% to spare */
788 * Make altpath point to the block we want to keep (the lower
789 * numbered block) and path point to the block we want to drop.
791 bcopy(&state
->path
, &state
->altpath
, sizeof(state
->path
));
792 if (blkno
< blk
->blkno
) {
793 error
= xfs_da_path_shift(state
, &state
->altpath
, forward
,
796 error
= xfs_da_path_shift(state
, &state
->path
, forward
,
810 * Move all the attribute list entries from drop_leaf into save_leaf.
813 xfs_attr_leaf_unbalance(xfs_da_state_t
*state
, xfs_da_state_blk_t
*drop_blk
,
814 xfs_da_state_blk_t
*save_blk
)
816 xfs_attr_leafblock_t
*drop_leaf
, *save_leaf
, *tmp_leaf
;
817 xfs_attr_leaf_hdr_t
*drop_hdr
, *save_hdr
, *tmp_hdr
;
822 * Set up environment.
825 ASSERT(drop_blk
->magic
== XFS_ATTR_LEAF_MAGIC
);
826 ASSERT(save_blk
->magic
== XFS_ATTR_LEAF_MAGIC
);
827 drop_leaf
= drop_blk
->bp
->data
;
828 save_leaf
= save_blk
->bp
->data
;
829 ASSERT(INT_GET(drop_leaf
->hdr
.info
.magic
, ARCH_CONVERT
)
830 == XFS_ATTR_LEAF_MAGIC
);
831 ASSERT(INT_GET(save_leaf
->hdr
.info
.magic
, ARCH_CONVERT
)
832 == XFS_ATTR_LEAF_MAGIC
);
833 drop_hdr
= &drop_leaf
->hdr
;
834 save_hdr
= &save_leaf
->hdr
;
837 * Save last hashval from dying block for later Btree fixup.
840 INT_GET(drop_leaf
->entries
[INT_GET(drop_leaf
->hdr
.count
,
841 ARCH_CONVERT
)-1].hashval
,
845 * Check if we need a temp buffer, or can we do it in place.
846 * Note that we don't check "leaf" for holes because we will
847 * always be dropping it, toosmall() decided that for us already.
849 if (save_hdr
->holes
== 0) {
851 * dest leaf has no holes, so we add there. May need
852 * to make some room in the entry array.
854 if (xfs_attr_leaf_order(save_blk
->bp
, drop_blk
->bp
)) {
855 xfs_attr_leaf_moveents(drop_leaf
, 0, save_leaf
, 0,
856 (int)INT_GET(drop_hdr
->count
, ARCH_CONVERT
), mp
);
858 xfs_attr_leaf_moveents(drop_leaf
, 0, save_leaf
,
859 INT_GET(save_hdr
->count
, ARCH_CONVERT
),
860 (int)INT_GET(drop_hdr
->count
, ARCH_CONVERT
),
865 * Destination has holes, so we make a temporary copy
866 * of the leaf and add them both to that.
868 tmpbuffer
= kmem_alloc(state
->blocksize
, KM_SLEEP
);
869 ASSERT(tmpbuffer
!= NULL
);
870 bzero(tmpbuffer
, state
->blocksize
);
871 tmp_leaf
= (xfs_attr_leafblock_t
*)tmpbuffer
;
872 tmp_hdr
= &tmp_leaf
->hdr
;
873 tmp_hdr
->info
= save_hdr
->info
; /* struct copy */
874 INT_ZERO(tmp_hdr
->count
, ARCH_CONVERT
);
875 INT_SET(tmp_hdr
->firstused
, ARCH_CONVERT
, state
->blocksize
);
876 if (INT_ISZERO(tmp_hdr
->firstused
, ARCH_CONVERT
)) {
877 INT_SET(tmp_hdr
->firstused
, ARCH_CONVERT
,
878 state
->blocksize
- XFS_ATTR_LEAF_NAME_ALIGN
);
880 INT_ZERO(tmp_hdr
->usedbytes
, ARCH_CONVERT
);
881 if (xfs_attr_leaf_order(save_blk
->bp
, drop_blk
->bp
)) {
882 xfs_attr_leaf_moveents(drop_leaf
, 0, tmp_leaf
, 0,
883 (int)INT_GET(drop_hdr
->count
, ARCH_CONVERT
),
885 xfs_attr_leaf_moveents(save_leaf
, 0, tmp_leaf
,
886 INT_GET(tmp_leaf
->hdr
.count
, ARCH_CONVERT
),
887 (int)INT_GET(save_hdr
->count
, ARCH_CONVERT
),
890 xfs_attr_leaf_moveents(save_leaf
, 0, tmp_leaf
, 0,
891 (int)INT_GET(save_hdr
->count
, ARCH_CONVERT
),
893 xfs_attr_leaf_moveents(drop_leaf
, 0, tmp_leaf
,
894 INT_GET(tmp_leaf
->hdr
.count
, ARCH_CONVERT
),
895 (int)INT_GET(drop_hdr
->count
, ARCH_CONVERT
),
898 bcopy((char *)tmp_leaf
, (char *)save_leaf
, state
->blocksize
);
899 kmem_free(tmpbuffer
, state
->blocksize
);
902 xfs_da_log_buf(state
->args
->trans
, save_blk
->bp
, 0,
903 state
->blocksize
- 1);
906 * Copy out last hashval in each block for B-tree code.
909 INT_GET(save_leaf
->entries
[INT_GET(save_leaf
->hdr
.count
,
910 ARCH_CONVERT
)-1].hashval
,
915 /*========================================================================
917 *========================================================================*/
920 * Move the indicated entries from one leaf to another.
921 * NOTE: this routine modifies both source and destination leaves.
925 xfs_attr_leaf_moveents(xfs_attr_leafblock_t
*leaf_s
, int start_s
,
926 xfs_attr_leafblock_t
*leaf_d
, int start_d
,
927 int count
, xfs_mount_t
*mp
)
929 xfs_attr_leaf_hdr_t
*hdr_s
, *hdr_d
;
930 xfs_attr_leaf_entry_t
*entry_s
, *entry_d
;
934 * Check for nothing to do.
940 * Set up environment.
942 ASSERT(INT_GET(leaf_s
->hdr
.info
.magic
, ARCH_CONVERT
)
943 == XFS_ATTR_LEAF_MAGIC
);
944 ASSERT(INT_GET(leaf_d
->hdr
.info
.magic
, ARCH_CONVERT
)
945 == XFS_ATTR_LEAF_MAGIC
);
946 hdr_s
= &leaf_s
->hdr
;
947 hdr_d
= &leaf_d
->hdr
;
948 ASSERT((INT_GET(hdr_s
->count
, ARCH_CONVERT
) > 0)
949 && (INT_GET(hdr_s
->count
, ARCH_CONVERT
)
950 < (XFS_LBSIZE(mp
)/8)));
951 ASSERT(INT_GET(hdr_s
->firstused
, ARCH_CONVERT
) >=
952 ((INT_GET(hdr_s
->count
, ARCH_CONVERT
)
953 * sizeof(*entry_s
))+sizeof(*hdr_s
)));
954 ASSERT(INT_GET(hdr_d
->count
, ARCH_CONVERT
) < (XFS_LBSIZE(mp
)/8));
955 ASSERT(INT_GET(hdr_d
->firstused
, ARCH_CONVERT
) >=
956 ((INT_GET(hdr_d
->count
, ARCH_CONVERT
)
957 * sizeof(*entry_d
))+sizeof(*hdr_d
)));
959 ASSERT(start_s
< INT_GET(hdr_s
->count
, ARCH_CONVERT
));
960 ASSERT(start_d
<= INT_GET(hdr_d
->count
, ARCH_CONVERT
));
961 ASSERT(count
<= INT_GET(hdr_s
->count
, ARCH_CONVERT
));
964 * Move the entries in the destination leaf up to make a hole?
966 if (start_d
< INT_GET(hdr_d
->count
, ARCH_CONVERT
)) {
967 tmp
= INT_GET(hdr_d
->count
, ARCH_CONVERT
) - start_d
;
968 tmp
*= sizeof(xfs_attr_leaf_entry_t
);
969 entry_s
= &leaf_d
->entries
[start_d
];
970 entry_d
= &leaf_d
->entries
[start_d
+ count
];
971 ovbcopy((char *)entry_s
, (char *)entry_d
, tmp
);
975 * Copy all entry's in the same (sorted) order,
976 * but allocate attribute info packed and in sequence.
978 entry_s
= &leaf_s
->entries
[start_s
];
979 entry_d
= &leaf_d
->entries
[start_d
];
981 for (i
= 0; i
< count
; entry_s
++, entry_d
++, desti
++, i
++) {
982 ASSERT(INT_GET(entry_s
->nameidx
, ARCH_CONVERT
)
983 >= INT_GET(hdr_s
->firstused
, ARCH_CONVERT
));
984 tmp
= xfs_attr_leaf_entsize(leaf_s
, start_s
+ i
);
987 * Code to drop INCOMPLETE entries. Difficult to use as we
988 * may also need to change the insertion index. Code turned
989 * off for 6.2, should be revisited later.
991 if (entry_s
->flags
& XFS_ATTR_INCOMPLETE
) { /* skip partials? */
992 bzero(XFS_ATTR_LEAF_NAME(leaf_s
, start_s
+ i
), tmp
);
993 INT_MOD(hdr_s
->usedbytes
, ARCH_CONVERT
, -tmp
);
994 INT_MOD(hdr_s
->count
, ARCH_CONVERT
, -1);
995 entry_d
--; /* to compensate for ++ in loop hdr */
997 if ((start_s
+ i
) < offset
)
998 result
++; /* insertion index adjustment */
1001 INT_MOD(hdr_d
->firstused
, ARCH_CONVERT
, -tmp
);
1002 /* both on-disk, don't endian flip twice */
1003 entry_d
->hashval
= entry_s
->hashval
;
1004 /* both on-disk, don't endian flip twice */
1005 entry_d
->nameidx
= hdr_d
->firstused
;
1006 entry_d
->flags
= entry_s
->flags
;
1007 ASSERT(INT_GET(entry_d
->nameidx
, ARCH_CONVERT
) + tmp
1009 ovbcopy(XFS_ATTR_LEAF_NAME(leaf_s
, start_s
+ i
),
1010 XFS_ATTR_LEAF_NAME(leaf_d
, desti
), tmp
);
1011 ASSERT(INT_GET(entry_s
->nameidx
, ARCH_CONVERT
) + tmp
1013 bzero(XFS_ATTR_LEAF_NAME(leaf_s
, start_s
+ i
), tmp
);
1014 INT_MOD(hdr_s
->usedbytes
, ARCH_CONVERT
, -tmp
);
1015 INT_MOD(hdr_d
->usedbytes
, ARCH_CONVERT
, tmp
);
1016 INT_MOD(hdr_s
->count
, ARCH_CONVERT
, -1);
1017 INT_MOD(hdr_d
->count
, ARCH_CONVERT
, 1);
1018 tmp
= INT_GET(hdr_d
->count
, ARCH_CONVERT
)
1019 * sizeof(xfs_attr_leaf_entry_t
)
1020 + sizeof(xfs_attr_leaf_hdr_t
);
1021 ASSERT(INT_GET(hdr_d
->firstused
, ARCH_CONVERT
) >= tmp
);
1028 * Zero out the entries we just copied.
1030 if (start_s
== INT_GET(hdr_s
->count
, ARCH_CONVERT
)) {
1031 tmp
= count
* sizeof(xfs_attr_leaf_entry_t
);
1032 entry_s
= &leaf_s
->entries
[start_s
];
1033 ASSERT(((char *)entry_s
+ tmp
) <=
1034 ((char *)leaf_s
+ XFS_LBSIZE(mp
)));
1035 bzero((char *)entry_s
, tmp
);
1038 * Move the remaining entries down to fill the hole,
1039 * then zero the entries at the top.
1041 tmp
= INT_GET(hdr_s
->count
, ARCH_CONVERT
) - count
;
1042 tmp
*= sizeof(xfs_attr_leaf_entry_t
);
1043 entry_s
= &leaf_s
->entries
[start_s
+ count
];
1044 entry_d
= &leaf_s
->entries
[start_s
];
1045 ovbcopy((char *)entry_s
, (char *)entry_d
, tmp
);
1047 tmp
= count
* sizeof(xfs_attr_leaf_entry_t
);
1048 entry_s
= &leaf_s
->entries
[INT_GET(hdr_s
->count
,
1050 ASSERT(((char *)entry_s
+ tmp
) <=
1051 ((char *)leaf_s
+ XFS_LBSIZE(mp
)));
1052 bzero((char *)entry_s
, tmp
);
1056 * Fill in the freemap information
1058 INT_SET(hdr_d
->freemap
[0].base
, ARCH_CONVERT
,
1059 sizeof(xfs_attr_leaf_hdr_t
));
1060 INT_MOD(hdr_d
->freemap
[0].base
, ARCH_CONVERT
,
1061 INT_GET(hdr_d
->count
, ARCH_CONVERT
)
1062 * sizeof(xfs_attr_leaf_entry_t
));
1063 INT_SET(hdr_d
->freemap
[0].size
, ARCH_CONVERT
,
1064 INT_GET(hdr_d
->firstused
, ARCH_CONVERT
)
1065 - INT_GET(hdr_d
->freemap
[0].base
, ARCH_CONVERT
));
1066 INT_ZERO(hdr_d
->freemap
[1].base
, ARCH_CONVERT
);
1067 INT_ZERO(hdr_d
->freemap
[2].base
, ARCH_CONVERT
);
1068 INT_ZERO(hdr_d
->freemap
[1].size
, ARCH_CONVERT
);
1069 INT_ZERO(hdr_d
->freemap
[2].size
, ARCH_CONVERT
);
1070 hdr_s
->holes
= 1; /* leaf may not be compact */
1074 * Compare two leaf blocks "order".
1075 * Return 0 unless leaf2 should go before leaf1.
1078 xfs_attr_leaf_order(xfs_dabuf_t
*leaf1_bp
, xfs_dabuf_t
*leaf2_bp
)
1080 xfs_attr_leafblock_t
*leaf1
, *leaf2
;
1082 leaf1
= leaf1_bp
->data
;
1083 leaf2
= leaf2_bp
->data
;
1084 ASSERT((INT_GET(leaf1
->hdr
.info
.magic
, ARCH_CONVERT
)
1085 == XFS_ATTR_LEAF_MAGIC
) &&
1086 (INT_GET(leaf2
->hdr
.info
.magic
, ARCH_CONVERT
)
1087 == XFS_ATTR_LEAF_MAGIC
));
1088 if ( (INT_GET(leaf1
->hdr
.count
, ARCH_CONVERT
) > 0)
1089 && (INT_GET(leaf2
->hdr
.count
, ARCH_CONVERT
) > 0)
1090 && ( (INT_GET(leaf2
->entries
[ 0 ].hashval
, ARCH_CONVERT
) <
1091 INT_GET(leaf1
->entries
[ 0 ].hashval
, ARCH_CONVERT
))
1092 || (INT_GET(leaf2
->entries
[INT_GET(leaf2
->hdr
.count
,
1093 ARCH_CONVERT
)-1].hashval
, ARCH_CONVERT
) <
1094 INT_GET(leaf1
->entries
[INT_GET(leaf1
->hdr
.count
,
1095 ARCH_CONVERT
)-1].hashval
, ARCH_CONVERT
))) ) {
1102 * Pick up the last hashvalue from a leaf block.
1105 xfs_attr_leaf_lasthash(xfs_dabuf_t
*bp
, int *count
)
1107 xfs_attr_leafblock_t
*leaf
;
1110 ASSERT(INT_GET(leaf
->hdr
.info
.magic
, ARCH_CONVERT
)
1111 == XFS_ATTR_LEAF_MAGIC
);
1113 *count
= INT_GET(leaf
->hdr
.count
, ARCH_CONVERT
);
1114 if (INT_ISZERO(leaf
->hdr
.count
, ARCH_CONVERT
))
1116 return(INT_GET(leaf
->entries
[INT_GET(leaf
->hdr
.count
,
1117 ARCH_CONVERT
)-1].hashval
, ARCH_CONVERT
));
1121 * Calculate the number of bytes used to store the indicated attribute
1122 * (whether local or remote only calculate bytes in this block).
1125 xfs_attr_leaf_entsize(xfs_attr_leafblock_t
*leaf
, int index
)
1127 xfs_attr_leaf_name_local_t
*name_loc
;
1128 xfs_attr_leaf_name_remote_t
*name_rmt
;
1131 ASSERT(INT_GET(leaf
->hdr
.info
.magic
, ARCH_CONVERT
)
1132 == XFS_ATTR_LEAF_MAGIC
);
1133 if (leaf
->entries
[index
].flags
& XFS_ATTR_LOCAL
) {
1134 name_loc
= XFS_ATTR_LEAF_NAME_LOCAL(leaf
, index
);
1135 size
= XFS_ATTR_LEAF_ENTSIZE_LOCAL(name_loc
->namelen
,
1136 INT_GET(name_loc
->valuelen
,
1139 name_rmt
= XFS_ATTR_LEAF_NAME_REMOTE(leaf
, index
);
1140 size
= XFS_ATTR_LEAF_ENTSIZE_REMOTE(name_rmt
->namelen
);
1146 * Calculate the number of bytes that would be required to store the new
1147 * attribute (whether local or remote only calculate bytes in this block).
1148 * This routine decides as a side effect whether the attribute will be
1149 * a "local" or a "remote" attribute.
1152 xfs_attr_leaf_newentsize(xfs_da_args_t
*args
, int blocksize
, int *local
)
1156 size
= XFS_ATTR_LEAF_ENTSIZE_LOCAL(args
->namelen
, args
->valuelen
);
1157 if (size
< XFS_ATTR_LEAF_ENTSIZE_LOCAL_MAX(blocksize
)) {
1162 size
= XFS_ATTR_LEAF_ENTSIZE_REMOTE(args
->namelen
);