]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - libxfs/xfs_da_btree.c
xfs: don't cast string literals
[thirdparty/xfsprogs-dev.git] / libxfs / xfs_da_btree.c
CommitLineData
2bd0ea18 1/*
da23017d 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
88b32f06 3 * Copyright (c) 2013 Red Hat, Inc.
da23017d 4 * All Rights Reserved.
5000d01d 5 *
da23017d
NS
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
2bd0ea18 8 * published by the Free Software Foundation.
5000d01d 9 *
da23017d
NS
10 * This program is distributed in the hope that it would be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
5000d01d 14 *
da23017d
NS
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
2bd0ea18 18 */
9c799827 19#include "libxfs_priv.h"
b626fb59
DC
20#include "xfs_fs.h"
21#include "xfs_shared.h"
22#include "xfs_format.h"
23#include "xfs_log_format.h"
24#include "xfs_trans_resv.h"
25#include "xfs_bit.h"
26#include "xfs_mount.h"
27#include "xfs_da_format.h"
28#include "xfs_da_btree.h"
29#include "xfs_dir2.h"
30#include "xfs_dir2_priv.h"
31#include "xfs_inode.h"
32#include "xfs_trans.h"
33#include "xfs_alloc.h"
34#include "xfs_bmap.h"
35#include "xfs_attr_leaf.h"
36#include "xfs_trace.h"
37#include "xfs_cksum.h"
2bd0ea18
NS
38
39/*
40 * xfs_da_btree.c
41 *
42 * Routines to implement directories as Btrees of hashed names.
43 */
44
5e656dbb
BN
45/*========================================================================
46 * Function prototypes for the kernel.
47 *========================================================================*/
48
49/*
50 * Routines used for growing the Btree.
51 */
88b32f06 52STATIC int xfs_da3_root_split(xfs_da_state_t *state,
5e656dbb
BN
53 xfs_da_state_blk_t *existing_root,
54 xfs_da_state_blk_t *new_child);
88b32f06 55STATIC int xfs_da3_node_split(xfs_da_state_t *state,
5e656dbb
BN
56 xfs_da_state_blk_t *existing_blk,
57 xfs_da_state_blk_t *split_blk,
58 xfs_da_state_blk_t *blk_to_add,
59 int treelevel,
60 int *result);
88b32f06 61STATIC void xfs_da3_node_rebalance(xfs_da_state_t *state,
5e656dbb
BN
62 xfs_da_state_blk_t *node_blk_1,
63 xfs_da_state_blk_t *node_blk_2);
88b32f06 64STATIC void xfs_da3_node_add(xfs_da_state_t *state,
5e656dbb
BN
65 xfs_da_state_blk_t *old_node_blk,
66 xfs_da_state_blk_t *new_node_blk);
67
68/*
69 * Routines used for shrinking the Btree.
70 */
88b32f06 71STATIC int xfs_da3_root_join(xfs_da_state_t *state,
5e656dbb 72 xfs_da_state_blk_t *root_blk);
88b32f06
DC
73STATIC int xfs_da3_node_toosmall(xfs_da_state_t *state, int *retval);
74STATIC void xfs_da3_node_remove(xfs_da_state_t *state,
5e656dbb 75 xfs_da_state_blk_t *drop_blk);
88b32f06 76STATIC void xfs_da3_node_unbalance(xfs_da_state_t *state,
5e656dbb
BN
77 xfs_da_state_blk_t *src_node_blk,
78 xfs_da_state_blk_t *dst_node_blk);
79
80/*
81 * Utility routines.
82 */
88b32f06 83STATIC int xfs_da3_blk_unlink(xfs_da_state_t *state,
5e656dbb
BN
84 xfs_da_state_blk_t *drop_blk,
85 xfs_da_state_blk_t *save_blk);
5e656dbb 86
88b32f06
DC
87
88kmem_zone_t *xfs_da_state_zone; /* anchor for state struct zone */
89
90/*
91 * Allocate a dir-state structure.
92 * We don't put them on the stack since they're large.
93 */
94xfs_da_state_t *
95xfs_da_state_alloc(void)
96{
97 return kmem_zone_zalloc(xfs_da_state_zone, KM_NOFS);
98}
99
100/*
101 * Kill the altpath contents of a da-state structure.
102 */
103STATIC void
104xfs_da_state_kill_altpath(xfs_da_state_t *state)
105{
106 int i;
107
108 for (i = 0; i < state->altpath.active; i++)
109 state->altpath.blk[i].bp = NULL;
110 state->altpath.active = 0;
111}
112
113/*
114 * Free a da-state structure.
115 */
116void
117xfs_da_state_free(xfs_da_state_t *state)
118{
119 xfs_da_state_kill_altpath(state);
120#ifdef DEBUG
121 memset((char *)state, 0, sizeof(*state));
122#endif /* DEBUG */
123 kmem_zone_free(xfs_da_state_zone, state);
124}
125
88b32f06
DC
126static bool
127xfs_da3_node_verify(
a2ceac1f
DC
128 struct xfs_buf *bp)
129{
130 struct xfs_mount *mp = bp->b_target->bt_mount;
88b32f06
DC
131 struct xfs_da_intnode *hdr = bp->b_addr;
132 struct xfs_da3_icnode_hdr ichdr;
ff105f75
DC
133 const struct xfs_dir_ops *ops;
134
135 ops = xfs_dir_get_ops(mp, NULL);
88b32f06 136
ff105f75 137 ops->node_hdr_from_disk(&ichdr, hdr);
88b32f06
DC
138
139 if (xfs_sb_version_hascrc(&mp->m_sb)) {
140 struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
141
142 if (ichdr.magic != XFS_DA3_NODE_MAGIC)
143 return false;
144
9c4e12fb 145 if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid))
88b32f06
DC
146 return false;
147 if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn)
148 return false;
149 } else {
150 if (ichdr.magic != XFS_DA_NODE_MAGIC)
151 return false;
a2ceac1f 152 }
88b32f06
DC
153 if (ichdr.level == 0)
154 return false;
155 if (ichdr.level > XFS_DA_NODE_MAXDEPTH)
156 return false;
157 if (ichdr.count == 0)
158 return false;
159
160 /*
161 * we don't know if the node is for and attribute or directory tree,
162 * so only fail if the count is outside both bounds
163 */
ff105f75
DC
164 if (ichdr.count > mp->m_dir_geo->node_ents &&
165 ichdr.count > mp->m_attr_geo->node_ents)
88b32f06
DC
166 return false;
167
168 /* XXX: hash order check? */
a2ceac1f 169
88b32f06 170 return true;
a2ceac1f
DC
171}
172
173static void
88b32f06 174xfs_da3_node_write_verify(
a2ceac1f
DC
175 struct xfs_buf *bp)
176{
88b32f06
DC
177 struct xfs_mount *mp = bp->b_target->bt_mount;
178 struct xfs_buf_log_item *bip = bp->b_fspriv;
179 struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
180
181 if (!xfs_da3_node_verify(bp)) {
12b53197 182 xfs_buf_ioerror(bp, -EFSCORRUPTED);
45922933 183 xfs_verifier_error(bp);
88b32f06
DC
184 return;
185 }
186
187 if (!xfs_sb_version_hascrc(&mp->m_sb))
188 return;
189
190 if (bip)
191 hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
192
43b5aeed 193 xfs_buf_update_cksum(bp, XFS_DA3_NODE_CRC_OFF);
a2ceac1f
DC
194}
195
196/*
197 * leaf/node format detection on trees is sketchy, so a node read can be done on
198 * leaf level blocks when detection identifies the tree as a node format tree
199 * incorrectly. In this case, we need to swap the verifier to match the correct
200 * format of the block being read.
201 */
202static void
88b32f06 203xfs_da3_node_read_verify(
a2ceac1f
DC
204 struct xfs_buf *bp)
205{
a2ceac1f
DC
206 struct xfs_da_blkinfo *info = bp->b_addr;
207
208 switch (be16_to_cpu(info->magic)) {
88b32f06 209 case XFS_DA3_NODE_MAGIC:
45922933 210 if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) {
12b53197 211 xfs_buf_ioerror(bp, -EFSBADCRC);
88b32f06 212 break;
45922933 213 }
88b32f06 214 /* fall through */
a2ceac1f 215 case XFS_DA_NODE_MAGIC:
45922933 216 if (!xfs_da3_node_verify(bp)) {
12b53197 217 xfs_buf_ioerror(bp, -EFSCORRUPTED);
88b32f06 218 break;
45922933 219 }
88b32f06 220 return;
a2ceac1f 221 case XFS_ATTR_LEAF_MAGIC:
a24374f4
DC
222 case XFS_ATTR3_LEAF_MAGIC:
223 bp->b_ops = &xfs_attr3_leaf_buf_ops;
a2ceac1f
DC
224 bp->b_ops->verify_read(bp);
225 return;
226 case XFS_DIR2_LEAFN_MAGIC:
65b80c98
DC
227 case XFS_DIR3_LEAFN_MAGIC:
228 bp->b_ops = &xfs_dir3_leafn_buf_ops;
a2ceac1f
DC
229 bp->b_ops->verify_read(bp);
230 return;
231 default:
a2ceac1f
DC
232 break;
233 }
88b32f06
DC
234
235 /* corrupt block */
45922933 236 xfs_verifier_error(bp);
a2ceac1f
DC
237}
238
88b32f06
DC
239const struct xfs_buf_ops xfs_da3_node_buf_ops = {
240 .verify_read = xfs_da3_node_read_verify,
241 .verify_write = xfs_da3_node_write_verify,
a2ceac1f
DC
242};
243
a2ceac1f 244int
88b32f06 245xfs_da3_node_read(
a2ceac1f
DC
246 struct xfs_trans *tp,
247 struct xfs_inode *dp,
248 xfs_dablk_t bno,
249 xfs_daddr_t mappedbno,
250 struct xfs_buf **bpp,
251 int which_fork)
252{
8b4dc4a9
DC
253 int err;
254
255 err = xfs_da_read_buf(tp, dp, bno, mappedbno, bpp,
88b32f06 256 which_fork, &xfs_da3_node_buf_ops);
8b4dc4a9
DC
257 if (!err && tp) {
258 struct xfs_da_blkinfo *info = (*bpp)->b_addr;
259 int type;
260
261 switch (be16_to_cpu(info->magic)) {
8b4dc4a9 262 case XFS_DA_NODE_MAGIC:
f383ee8c 263 case XFS_DA3_NODE_MAGIC:
bdc16ee5 264 type = XFS_BLFT_DA_NODE_BUF;
8b4dc4a9
DC
265 break;
266 case XFS_ATTR_LEAF_MAGIC:
267 case XFS_ATTR3_LEAF_MAGIC:
bdc16ee5 268 type = XFS_BLFT_ATTR_LEAF_BUF;
8b4dc4a9
DC
269 break;
270 case XFS_DIR2_LEAFN_MAGIC:
271 case XFS_DIR3_LEAFN_MAGIC:
bdc16ee5 272 type = XFS_BLFT_DIR_LEAFN_BUF;
8b4dc4a9
DC
273 break;
274 default:
275 type = 0;
276 ASSERT(0);
277 break;
278 }
279 xfs_trans_buf_set_type(tp, *bpp, type);
280 }
281 return err;
a2ceac1f
DC
282}
283
2bd0ea18
NS
284/*========================================================================
285 * Routines used for growing the Btree.
286 *========================================================================*/
287
288/*
289 * Create the initial contents of an intermediate node.
290 */
291int
88b32f06
DC
292xfs_da3_node_create(
293 struct xfs_da_args *args,
294 xfs_dablk_t blkno,
295 int level,
296 struct xfs_buf **bpp,
297 int whichfork)
2bd0ea18 298{
88b32f06
DC
299 struct xfs_da_intnode *node;
300 struct xfs_trans *tp = args->trans;
301 struct xfs_mount *mp = tp->t_mountp;
302 struct xfs_da3_icnode_hdr ichdr = {0};
303 struct xfs_buf *bp;
304 int error;
ff105f75 305 struct xfs_inode *dp = args->dp;
2bd0ea18 306
a2ceac1f 307 trace_xfs_da_node_create(args);
88b32f06 308 ASSERT(level <= XFS_DA_NODE_MAXDEPTH);
a2ceac1f 309
ff105f75 310 error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, whichfork);
2bd0ea18 311 if (error)
af43ca9f 312 return error;
8b4dc4a9 313 bp->b_ops = &xfs_da3_node_buf_ops;
bdc16ee5 314 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF);
a2ceac1f 315 node = bp->b_addr;
2bd0ea18 316
88b32f06
DC
317 if (xfs_sb_version_hascrc(&mp->m_sb)) {
318 struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
319
320 ichdr.magic = XFS_DA3_NODE_MAGIC;
321 hdr3->info.blkno = cpu_to_be64(bp->b_bn);
322 hdr3->info.owner = cpu_to_be64(args->dp->i_ino);
9c4e12fb 323 uuid_copy(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid);
88b32f06
DC
324 } else {
325 ichdr.magic = XFS_DA_NODE_MAGIC;
326 }
327 ichdr.level = level;
328
ff105f75 329 dp->d_ops->node_hdr_to_disk(node, &ichdr);
a2ceac1f 330 xfs_trans_log_buf(tp, bp,
ff105f75 331 XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size));
2bd0ea18
NS
332
333 *bpp = bp;
af43ca9f 334 return 0;
2bd0ea18
NS
335}
336
337/*
338 * Split a leaf node, rebalance, then possibly split
339 * intermediate nodes, rebalance, etc.
340 */
341int /* error */
88b32f06
DC
342xfs_da3_split(
343 struct xfs_da_state *state)
2bd0ea18 344{
88b32f06
DC
345 struct xfs_da_state_blk *oldblk;
346 struct xfs_da_state_blk *newblk;
347 struct xfs_da_state_blk *addblk;
348 struct xfs_da_intnode *node;
349 struct xfs_buf *bp;
350 int max;
6bddecbc 351 int action = 0;
88b32f06
DC
352 int error;
353 int i;
2bd0ea18 354
a2ceac1f
DC
355 trace_xfs_da_split(state->args);
356
2bd0ea18
NS
357 /*
358 * Walk back up the tree splitting/inserting/adjusting as necessary.
359 * If we need to insert and there isn't room, split the node, then
360 * decide which fragment to insert the new block from below into.
361 * Note that we may split the root this way, but we need more fixup.
362 */
363 max = state->path.active - 1;
364 ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH));
365 ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC ||
5e656dbb 366 state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC);
2bd0ea18
NS
367
368 addblk = &state->path.blk[max]; /* initial dummy value */
369 for (i = max; (i >= 0) && addblk; state->path.active--, i--) {
370 oldblk = &state->path.blk[i];
371 newblk = &state->altpath.blk[i];
372
373 /*
374 * If a leaf node then
375 * Allocate a new leaf node, then rebalance across them.
376 * else if an intermediate node then
377 * We split on the last layer, must we split the node?
378 */
379 switch (oldblk->magic) {
380 case XFS_ATTR_LEAF_MAGIC:
a24374f4 381 error = xfs_attr3_leaf_split(state, oldblk, newblk);
12b53197 382 if ((error != 0) && (error != -ENOSPC)) {
af43ca9f 383 return error; /* GROT: attr is inconsistent */
2bd0ea18
NS
384 }
385 if (!error) {
386 addblk = newblk;
387 break;
388 }
389 /*
390 * Entry wouldn't fit, split the leaf again.
391 */
392 state->extravalid = 1;
393 if (state->inleaf) {
394 state->extraafter = 0; /* before newblk */
a2ceac1f 395 trace_xfs_attr_leaf_split_before(state->args);
a24374f4 396 error = xfs_attr3_leaf_split(state, oldblk,
2bd0ea18
NS
397 &state->extrablk);
398 } else {
399 state->extraafter = 1; /* after newblk */
a2ceac1f 400 trace_xfs_attr_leaf_split_after(state->args);
a24374f4 401 error = xfs_attr3_leaf_split(state, newblk,
2bd0ea18
NS
402 &state->extrablk);
403 }
404 if (error)
af43ca9f 405 return error; /* GROT: attr inconsistent */
2bd0ea18
NS
406 addblk = newblk;
407 break;
2bd0ea18 408 case XFS_DIR2_LEAFN_MAGIC:
2bd0ea18
NS
409 error = xfs_dir2_leafn_split(state, oldblk, newblk);
410 if (error)
411 return error;
412 addblk = newblk;
413 break;
414 case XFS_DA_NODE_MAGIC:
88b32f06 415 error = xfs_da3_node_split(state, oldblk, newblk, addblk,
2bd0ea18 416 max - i, &action);
2bd0ea18
NS
417 addblk->bp = NULL;
418 if (error)
af43ca9f 419 return error; /* GROT: dir is inconsistent */
2bd0ea18
NS
420 /*
421 * Record the newly split block for the next time thru?
422 */
423 if (action)
424 addblk = newblk;
425 else
426 addblk = NULL;
427 break;
428 }
429
430 /*
431 * Update the btree to show the new hashval for this child.
432 */
88b32f06 433 xfs_da3_fixhashpath(state, &state->path);
2bd0ea18
NS
434 }
435 if (!addblk)
af43ca9f 436 return 0;
2bd0ea18
NS
437
438 /*
439 * Split the root node.
440 */
441 ASSERT(state->path.active == 0);
442 oldblk = &state->path.blk[0];
88b32f06 443 error = xfs_da3_root_split(state, oldblk, addblk);
2bd0ea18 444 if (error) {
2bd0ea18 445 addblk->bp = NULL;
af43ca9f 446 return error; /* GROT: dir is inconsistent */
2bd0ea18
NS
447 }
448
449 /*
450 * Update pointers to the node which used to be block 0 and
451 * just got bumped because of the addition of a new root node.
452 * There might be three blocks involved if a double split occurred,
453 * and the original block 0 could be at any position in the list.
88b32f06 454 *
7b02556d
DC
455 * Note: the magic numbers and sibling pointers are in the same
456 * physical place for both v2 and v3 headers (by design). Hence it
457 * doesn't matter which version of the xfs_da_intnode structure we use
458 * here as the result will be the same using either structure.
2bd0ea18 459 */
a2ceac1f 460 node = oldblk->bp->b_addr;
46eca962 461 if (node->hdr.info.forw) {
5e656dbb 462 if (be32_to_cpu(node->hdr.info.forw) == addblk->blkno) {
2bd0ea18
NS
463 bp = addblk->bp;
464 } else {
465 ASSERT(state->extravalid);
466 bp = state->extrablk.bp;
467 }
a2ceac1f 468 node = bp->b_addr;
5e656dbb 469 node->hdr.info.back = cpu_to_be32(oldblk->blkno);
a2ceac1f 470 xfs_trans_log_buf(state->args->trans, bp,
2bd0ea18
NS
471 XFS_DA_LOGRANGE(node, &node->hdr.info,
472 sizeof(node->hdr.info)));
473 }
a2ceac1f 474 node = oldblk->bp->b_addr;
5e656dbb
BN
475 if (node->hdr.info.back) {
476 if (be32_to_cpu(node->hdr.info.back) == addblk->blkno) {
2bd0ea18
NS
477 bp = addblk->bp;
478 } else {
479 ASSERT(state->extravalid);
480 bp = state->extrablk.bp;
481 }
a2ceac1f 482 node = bp->b_addr;
5e656dbb 483 node->hdr.info.forw = cpu_to_be32(oldblk->blkno);
a2ceac1f 484 xfs_trans_log_buf(state->args->trans, bp,
2bd0ea18
NS
485 XFS_DA_LOGRANGE(node, &node->hdr.info,
486 sizeof(node->hdr.info)));
487 }
2bd0ea18 488 addblk->bp = NULL;
af43ca9f 489 return 0;
2bd0ea18
NS
490}
491
492/*
493 * Split the root. We have to create a new root and point to the two
494 * parts (the split old root) that we just created. Copy block zero to
495 * the EOF, extending the inode in process.
496 */
497STATIC int /* error */
88b32f06
DC
498xfs_da3_root_split(
499 struct xfs_da_state *state,
500 struct xfs_da_state_blk *blk1,
501 struct xfs_da_state_blk *blk2)
2bd0ea18 502{
88b32f06
DC
503 struct xfs_da_intnode *node;
504 struct xfs_da_intnode *oldroot;
505 struct xfs_da_node_entry *btree;
506 struct xfs_da3_icnode_hdr nodehdr;
507 struct xfs_da_args *args;
508 struct xfs_buf *bp;
509 struct xfs_inode *dp;
510 struct xfs_trans *tp;
88b32f06
DC
511 struct xfs_dir2_leaf *leaf;
512 xfs_dablk_t blkno;
513 int level;
514 int error;
515 int size;
2bd0ea18 516
a2ceac1f
DC
517 trace_xfs_da_root_split(state->args);
518
2bd0ea18
NS
519 /*
520 * Copy the existing (incorrect) block from the root node position
521 * to a free space somewhere.
522 */
523 args = state->args;
2bd0ea18
NS
524 error = xfs_da_grow_inode(args, &blkno);
525 if (error)
88b32f06
DC
526 return error;
527
2bd0ea18
NS
528 dp = args->dp;
529 tp = args->trans;
2bd0ea18
NS
530 error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, args->whichfork);
531 if (error)
88b32f06 532 return error;
a2ceac1f
DC
533 node = bp->b_addr;
534 oldroot = blk1->bp->b_addr;
88b32f06
DC
535 if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
536 oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) {
19ebedcf 537 struct xfs_da3_icnode_hdr icnodehdr;
88b32f06 538
19ebedcf 539 dp->d_ops->node_hdr_from_disk(&icnodehdr, oldroot);
ff105f75 540 btree = dp->d_ops->node_tree_p(oldroot);
19ebedcf
DC
541 size = (int)((char *)&btree[icnodehdr.count] - (char *)oldroot);
542 level = icnodehdr.level;
8b4dc4a9
DC
543
544 /*
545 * we are about to copy oldroot to bp, so set up the type
546 * of bp while we know exactly what it will be.
547 */
bdc16ee5 548 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF);
2bd0ea18 549 } else {
65b80c98
DC
550 struct xfs_dir3_icleaf_hdr leafhdr;
551 struct xfs_dir2_leaf_entry *ents;
552
2bd0ea18 553 leaf = (xfs_dir2_leaf_t *)oldroot;
ff105f75
DC
554 dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
555 ents = dp->d_ops->leaf_ents_p(leaf);
65b80c98
DC
556
557 ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
558 leafhdr.magic == XFS_DIR3_LEAFN_MAGIC);
559 size = (int)((char *)&ents[leafhdr.count] - (char *)leaf);
88b32f06 560 level = 0;
8b4dc4a9
DC
561
562 /*
563 * we are about to copy oldroot to bp, so set up the type
564 * of bp while we know exactly what it will be.
565 */
bdc16ee5 566 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAFN_BUF);
2bd0ea18 567 }
88b32f06
DC
568
569 /*
570 * we can copy most of the information in the node from one block to
571 * another, but for CRC enabled headers we have to make sure that the
572 * block specific identifiers are kept intact. We update the buffer
573 * directly for this.
574 */
32181a02 575 memcpy(node, oldroot, size);
88b32f06
DC
576 if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) ||
577 oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
578 struct xfs_da3_intnode *node3 = (struct xfs_da3_intnode *)node;
579
580 node3->hdr.info.blkno = cpu_to_be64(bp->b_bn);
581 }
a2ceac1f
DC
582 xfs_trans_log_buf(tp, bp, 0, size - 1);
583
584 bp->b_ops = blk1->bp->b_ops;
e26915ee 585 xfs_trans_buf_copy_type(bp, blk1->bp);
2bd0ea18
NS
586 blk1->bp = bp;
587 blk1->blkno = blkno;
588
589 /*
590 * Set up the new root node.
591 */
88b32f06 592 error = xfs_da3_node_create(args,
ff105f75 593 (args->whichfork == XFS_DATA_FORK) ? args->geo->leafblk : 0,
88b32f06 594 level + 1, &bp, args->whichfork);
2bd0ea18 595 if (error)
88b32f06
DC
596 return error;
597
a2ceac1f 598 node = bp->b_addr;
ff105f75
DC
599 dp->d_ops->node_hdr_from_disk(&nodehdr, node);
600 btree = dp->d_ops->node_tree_p(node);
88b32f06
DC
601 btree[0].hashval = cpu_to_be32(blk1->hashval);
602 btree[0].before = cpu_to_be32(blk1->blkno);
603 btree[1].hashval = cpu_to_be32(blk2->hashval);
604 btree[1].before = cpu_to_be32(blk2->blkno);
605 nodehdr.count = 2;
ff105f75 606 dp->d_ops->node_hdr_to_disk(node, &nodehdr);
6bef826c
NS
607
608#ifdef DEBUG
65b80c98
DC
609 if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
610 oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
ff105f75
DC
611 ASSERT(blk1->blkno >= args->geo->leafblk &&
612 blk1->blkno < args->geo->freeblk);
613 ASSERT(blk2->blkno >= args->geo->leafblk &&
614 blk2->blkno < args->geo->freeblk);
2bd0ea18 615 }
6bef826c
NS
616#endif
617
2bd0ea18 618 /* Header is already logged by xfs_da_node_create */
a2ceac1f 619 xfs_trans_log_buf(tp, bp,
88b32f06 620 XFS_DA_LOGRANGE(node, btree, sizeof(xfs_da_node_entry_t) * 2));
2bd0ea18 621
88b32f06 622 return 0;
2bd0ea18
NS
623}
624
625/*
626 * Split the node, rebalance, then add the new entry.
627 */
628STATIC int /* error */
88b32f06
DC
629xfs_da3_node_split(
630 struct xfs_da_state *state,
631 struct xfs_da_state_blk *oldblk,
632 struct xfs_da_state_blk *newblk,
633 struct xfs_da_state_blk *addblk,
634 int treelevel,
635 int *result)
2bd0ea18 636{
88b32f06
DC
637 struct xfs_da_intnode *node;
638 struct xfs_da3_icnode_hdr nodehdr;
639 xfs_dablk_t blkno;
640 int newcount;
641 int error;
642 int useextra;
ff105f75 643 struct xfs_inode *dp = state->args->dp;
2bd0ea18 644
a2ceac1f
DC
645 trace_xfs_da_node_split(state->args);
646
647 node = oldblk->bp->b_addr;
ff105f75 648 dp->d_ops->node_hdr_from_disk(&nodehdr, node);
2bd0ea18
NS
649
650 /*
5e656dbb 651 * With V2 dirs the extra block is data or freespace.
2bd0ea18 652 */
5e656dbb 653 useextra = state->extravalid && state->args->whichfork == XFS_ATTR_FORK;
2bd0ea18
NS
654 newcount = 1 + useextra;
655 /*
656 * Do we have to split the node?
657 */
ff105f75 658 if (nodehdr.count + newcount > state->args->geo->node_ents) {
2bd0ea18
NS
659 /*
660 * Allocate a new node, add to the doubly linked chain of
661 * nodes, then move some of our excess entries into it.
662 */
663 error = xfs_da_grow_inode(state->args, &blkno);
664 if (error)
af43ca9f 665 return error; /* GROT: dir is inconsistent */
5000d01d 666
88b32f06 667 error = xfs_da3_node_create(state->args, blkno, treelevel,
2bd0ea18
NS
668 &newblk->bp, state->args->whichfork);
669 if (error)
af43ca9f 670 return error; /* GROT: dir is inconsistent */
2bd0ea18
NS
671 newblk->blkno = blkno;
672 newblk->magic = XFS_DA_NODE_MAGIC;
88b32f06
DC
673 xfs_da3_node_rebalance(state, oldblk, newblk);
674 error = xfs_da3_blk_link(state, oldblk, newblk);
2bd0ea18 675 if (error)
af43ca9f 676 return error;
2bd0ea18
NS
677 *result = 1;
678 } else {
679 *result = 0;
680 }
681
682 /*
683 * Insert the new entry(s) into the correct block
684 * (updating last hashval in the process).
685 *
88b32f06 686 * xfs_da3_node_add() inserts BEFORE the given index,
2bd0ea18
NS
687 * and as a result of using node_lookup_int() we always
688 * point to a valid entry (not after one), but a split
689 * operation always results in a new block whose hashvals
690 * FOLLOW the current block.
691 *
692 * If we had double-split op below us, then add the extra block too.
693 */
a2ceac1f 694 node = oldblk->bp->b_addr;
ff105f75 695 dp->d_ops->node_hdr_from_disk(&nodehdr, node);
88b32f06 696 if (oldblk->index <= nodehdr.count) {
2bd0ea18 697 oldblk->index++;
88b32f06 698 xfs_da3_node_add(state, oldblk, addblk);
2bd0ea18
NS
699 if (useextra) {
700 if (state->extraafter)
701 oldblk->index++;
88b32f06 702 xfs_da3_node_add(state, oldblk, &state->extrablk);
2bd0ea18
NS
703 state->extravalid = 0;
704 }
705 } else {
706 newblk->index++;
88b32f06 707 xfs_da3_node_add(state, newblk, addblk);
2bd0ea18
NS
708 if (useextra) {
709 if (state->extraafter)
710 newblk->index++;
88b32f06 711 xfs_da3_node_add(state, newblk, &state->extrablk);
2bd0ea18
NS
712 state->extravalid = 0;
713 }
714 }
715
af43ca9f 716 return 0;
2bd0ea18
NS
717}
718
719/*
720 * Balance the btree elements between two intermediate nodes,
721 * usually one full and one empty.
722 *
723 * NOTE: if blk2 is empty, then it will get the upper half of blk1.
724 */
725STATIC void
88b32f06
DC
726xfs_da3_node_rebalance(
727 struct xfs_da_state *state,
728 struct xfs_da_state_blk *blk1,
729 struct xfs_da_state_blk *blk2)
2bd0ea18 730{
88b32f06
DC
731 struct xfs_da_intnode *node1;
732 struct xfs_da_intnode *node2;
733 struct xfs_da_intnode *tmpnode;
734 struct xfs_da_node_entry *btree1;
735 struct xfs_da_node_entry *btree2;
736 struct xfs_da_node_entry *btree_s;
737 struct xfs_da_node_entry *btree_d;
738 struct xfs_da3_icnode_hdr nodehdr1;
739 struct xfs_da3_icnode_hdr nodehdr2;
740 struct xfs_trans *tp;
741 int count;
742 int tmp;
743 int swap = 0;
ff105f75 744 struct xfs_inode *dp = state->args->dp;
2bd0ea18 745
a2ceac1f
DC
746 trace_xfs_da_node_rebalance(state->args);
747
748 node1 = blk1->bp->b_addr;
749 node2 = blk2->bp->b_addr;
ff105f75
DC
750 dp->d_ops->node_hdr_from_disk(&nodehdr1, node1);
751 dp->d_ops->node_hdr_from_disk(&nodehdr2, node2);
752 btree1 = dp->d_ops->node_tree_p(node1);
753 btree2 = dp->d_ops->node_tree_p(node2);
88b32f06 754
2bd0ea18
NS
755 /*
756 * Figure out how many entries need to move, and in which direction.
757 * Swap the nodes around if that makes it simpler.
758 */
88b32f06
DC
759 if (nodehdr1.count > 0 && nodehdr2.count > 0 &&
760 ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) ||
761 (be32_to_cpu(btree2[nodehdr2.count - 1].hashval) <
762 be32_to_cpu(btree1[nodehdr1.count - 1].hashval)))) {
2bd0ea18
NS
763 tmpnode = node1;
764 node1 = node2;
765 node2 = tmpnode;
ff105f75
DC
766 dp->d_ops->node_hdr_from_disk(&nodehdr1, node1);
767 dp->d_ops->node_hdr_from_disk(&nodehdr2, node2);
768 btree1 = dp->d_ops->node_tree_p(node1);
769 btree2 = dp->d_ops->node_tree_p(node2);
88b32f06 770 swap = 1;
2bd0ea18 771 }
88b32f06
DC
772
773 count = (nodehdr1.count - nodehdr2.count) / 2;
2bd0ea18
NS
774 if (count == 0)
775 return;
776 tp = state->args->trans;
777 /*
778 * Two cases: high-to-low and low-to-high.
779 */
780 if (count > 0) {
781 /*
782 * Move elements in node2 up to make a hole.
783 */
88b32f06
DC
784 tmp = nodehdr2.count;
785 if (tmp > 0) {
2bd0ea18 786 tmp *= (uint)sizeof(xfs_da_node_entry_t);
88b32f06
DC
787 btree_s = &btree2[0];
788 btree_d = &btree2[count];
32181a02 789 memmove(btree_d, btree_s, tmp);
2bd0ea18
NS
790 }
791
792 /*
793 * Move the req'd B-tree elements from high in node1 to
794 * low in node2.
795 */
88b32f06 796 nodehdr2.count += count;
2bd0ea18 797 tmp = count * (uint)sizeof(xfs_da_node_entry_t);
7b02556d 798 btree_s = &btree1[nodehdr1.count - count];
88b32f06 799 btree_d = &btree2[0];
32181a02 800 memcpy(btree_d, btree_s, tmp);
88b32f06 801 nodehdr1.count -= count;
2bd0ea18
NS
802 } else {
803 /*
804 * Move the req'd B-tree elements from low in node2 to
805 * high in node1.
806 */
807 count = -count;
808 tmp = count * (uint)sizeof(xfs_da_node_entry_t);
88b32f06
DC
809 btree_s = &btree2[0];
810 btree_d = &btree1[nodehdr1.count];
32181a02 811 memcpy(btree_d, btree_s, tmp);
88b32f06
DC
812 nodehdr1.count += count;
813
a2ceac1f 814 xfs_trans_log_buf(tp, blk1->bp,
2bd0ea18
NS
815 XFS_DA_LOGRANGE(node1, btree_d, tmp));
816
817 /*
818 * Move elements in node2 down to fill the hole.
819 */
88b32f06 820 tmp = nodehdr2.count - count;
2bd0ea18 821 tmp *= (uint)sizeof(xfs_da_node_entry_t);
88b32f06
DC
822 btree_s = &btree2[count];
823 btree_d = &btree2[0];
32181a02 824 memmove(btree_d, btree_s, tmp);
88b32f06 825 nodehdr2.count -= count;
2bd0ea18
NS
826 }
827
828 /*
829 * Log header of node 1 and all current bits of node 2.
830 */
ff105f75 831 dp->d_ops->node_hdr_to_disk(node1, &nodehdr1);
a2ceac1f 832 xfs_trans_log_buf(tp, blk1->bp,
ff105f75 833 XFS_DA_LOGRANGE(node1, &node1->hdr, dp->d_ops->node_hdr_size));
88b32f06 834
ff105f75 835 dp->d_ops->node_hdr_to_disk(node2, &nodehdr2);
a2ceac1f 836 xfs_trans_log_buf(tp, blk2->bp,
2bd0ea18 837 XFS_DA_LOGRANGE(node2, &node2->hdr,
ff105f75 838 dp->d_ops->node_hdr_size +
88b32f06 839 (sizeof(btree2[0]) * nodehdr2.count)));
2bd0ea18
NS
840
841 /*
842 * Record the last hashval from each block for upward propagation.
843 * (note: don't use the swapped node pointers)
844 */
88b32f06
DC
845 if (swap) {
846 node1 = blk1->bp->b_addr;
847 node2 = blk2->bp->b_addr;
ff105f75
DC
848 dp->d_ops->node_hdr_from_disk(&nodehdr1, node1);
849 dp->d_ops->node_hdr_from_disk(&nodehdr2, node2);
850 btree1 = dp->d_ops->node_tree_p(node1);
851 btree2 = dp->d_ops->node_tree_p(node2);
88b32f06
DC
852 }
853 blk1->hashval = be32_to_cpu(btree1[nodehdr1.count - 1].hashval);
854 blk2->hashval = be32_to_cpu(btree2[nodehdr2.count - 1].hashval);
2bd0ea18
NS
855
856 /*
857 * Adjust the expected index for insertion.
858 */
88b32f06
DC
859 if (blk1->index >= nodehdr1.count) {
860 blk2->index = blk1->index - nodehdr1.count;
861 blk1->index = nodehdr1.count + 1; /* make it invalid */
2bd0ea18
NS
862 }
863}
864
865/*
866 * Add a new entry to an intermediate node.
867 */
868STATIC void
88b32f06
DC
869xfs_da3_node_add(
870 struct xfs_da_state *state,
871 struct xfs_da_state_blk *oldblk,
872 struct xfs_da_state_blk *newblk)
2bd0ea18 873{
88b32f06
DC
874 struct xfs_da_intnode *node;
875 struct xfs_da3_icnode_hdr nodehdr;
876 struct xfs_da_node_entry *btree;
877 int tmp;
ff105f75 878 struct xfs_inode *dp = state->args->dp;
2bd0ea18 879
a2ceac1f
DC
880 trace_xfs_da_node_add(state->args);
881
882 node = oldblk->bp->b_addr;
ff105f75
DC
883 dp->d_ops->node_hdr_from_disk(&nodehdr, node);
884 btree = dp->d_ops->node_tree_p(node);
88b32f06
DC
885
886 ASSERT(oldblk->index >= 0 && oldblk->index <= nodehdr.count);
2bd0ea18 887 ASSERT(newblk->blkno != 0);
5e656dbb 888 if (state->args->whichfork == XFS_DATA_FORK)
ff105f75
DC
889 ASSERT(newblk->blkno >= state->args->geo->leafblk &&
890 newblk->blkno < state->args->geo->freeblk);
2bd0ea18
NS
891
892 /*
893 * We may need to make some room before we insert the new node.
894 */
895 tmp = 0;
88b32f06
DC
896 if (oldblk->index < nodehdr.count) {
897 tmp = (nodehdr.count - oldblk->index) * (uint)sizeof(*btree);
898 memmove(&btree[oldblk->index + 1], &btree[oldblk->index], tmp);
2bd0ea18 899 }
88b32f06
DC
900 btree[oldblk->index].hashval = cpu_to_be32(newblk->hashval);
901 btree[oldblk->index].before = cpu_to_be32(newblk->blkno);
a2ceac1f 902 xfs_trans_log_buf(state->args->trans, oldblk->bp,
88b32f06
DC
903 XFS_DA_LOGRANGE(node, &btree[oldblk->index],
904 tmp + sizeof(*btree)));
905
906 nodehdr.count += 1;
ff105f75 907 dp->d_ops->node_hdr_to_disk(node, &nodehdr);
a2ceac1f 908 xfs_trans_log_buf(state->args->trans, oldblk->bp,
ff105f75 909 XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size));
2bd0ea18
NS
910
911 /*
912 * Copy the last hash value from the oldblk to propagate upwards.
913 */
88b32f06 914 oldblk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval);
2bd0ea18
NS
915}
916
917/*========================================================================
918 * Routines used for shrinking the Btree.
919 *========================================================================*/
920
921/*
922 * Deallocate an empty leaf node, remove it from its parent,
923 * possibly deallocating that block, etc...
924 */
925int
88b32f06
DC
926xfs_da3_join(
927 struct xfs_da_state *state)
2bd0ea18 928{
88b32f06
DC
929 struct xfs_da_state_blk *drop_blk;
930 struct xfs_da_state_blk *save_blk;
931 int action = 0;
932 int error;
2bd0ea18 933
a2ceac1f
DC
934 trace_xfs_da_join(state->args);
935
2bd0ea18
NS
936 drop_blk = &state->path.blk[ state->path.active-1 ];
937 save_blk = &state->altpath.blk[ state->path.active-1 ];
938 ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC);
939 ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC ||
5e656dbb 940 drop_blk->magic == XFS_DIR2_LEAFN_MAGIC);
2bd0ea18
NS
941
942 /*
943 * Walk back up the tree joining/deallocating as necessary.
944 * When we stop dropping blocks, break out.
945 */
946 for ( ; state->path.active >= 2; drop_blk--, save_blk--,
947 state->path.active--) {
948 /*
949 * See if we can combine the block with a neighbor.
950 * (action == 0) => no options, just leave
951 * (action == 1) => coalesce, then unlink
952 * (action == 2) => block empty, unlink it
953 */
954 switch (drop_blk->magic) {
955 case XFS_ATTR_LEAF_MAGIC:
a24374f4 956 error = xfs_attr3_leaf_toosmall(state, &action);
2bd0ea18 957 if (error)
af43ca9f 958 return error;
2bd0ea18 959 if (action == 0)
af43ca9f 960 return 0;
a24374f4 961 xfs_attr3_leaf_unbalance(state, drop_blk, save_blk);
2bd0ea18 962 break;
2bd0ea18 963 case XFS_DIR2_LEAFN_MAGIC:
2bd0ea18
NS
964 error = xfs_dir2_leafn_toosmall(state, &action);
965 if (error)
966 return error;
967 if (action == 0)
968 return 0;
969 xfs_dir2_leafn_unbalance(state, drop_blk, save_blk);
970 break;
971 case XFS_DA_NODE_MAGIC:
972 /*
973 * Remove the offending node, fixup hashvals,
974 * check for a toosmall neighbor.
975 */
88b32f06
DC
976 xfs_da3_node_remove(state, drop_blk);
977 xfs_da3_fixhashpath(state, &state->path);
978 error = xfs_da3_node_toosmall(state, &action);
2bd0ea18 979 if (error)
af43ca9f 980 return error;
2bd0ea18
NS
981 if (action == 0)
982 return 0;
88b32f06 983 xfs_da3_node_unbalance(state, drop_blk, save_blk);
2bd0ea18
NS
984 break;
985 }
88b32f06
DC
986 xfs_da3_fixhashpath(state, &state->altpath);
987 error = xfs_da3_blk_unlink(state, drop_blk, save_blk);
2bd0ea18
NS
988 xfs_da_state_kill_altpath(state);
989 if (error)
af43ca9f 990 return error;
2bd0ea18
NS
991 error = xfs_da_shrink_inode(state->args, drop_blk->blkno,
992 drop_blk->bp);
993 drop_blk->bp = NULL;
994 if (error)
af43ca9f 995 return error;
2bd0ea18
NS
996 }
997 /*
998 * We joined all the way to the top. If it turns out that
999 * we only have one entry in the root, make the child block
1000 * the new root.
1001 */
88b32f06
DC
1002 xfs_da3_node_remove(state, drop_blk);
1003 xfs_da3_fixhashpath(state, &state->path);
1004 error = xfs_da3_root_join(state, &state->path.blk[0]);
af43ca9f 1005 return error;
2bd0ea18
NS
1006}
1007
a2ceac1f
DC
1008#ifdef DEBUG
1009static void
1010xfs_da_blkinfo_onlychild_validate(struct xfs_da_blkinfo *blkinfo, __u16 level)
1011{
1012 __be16 magic = blkinfo->magic;
1013
1014 if (level == 1) {
1015 ASSERT(magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
65b80c98 1016 magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) ||
a24374f4
DC
1017 magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) ||
1018 magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC));
88b32f06
DC
1019 } else {
1020 ASSERT(magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
1021 magic == cpu_to_be16(XFS_DA3_NODE_MAGIC));
1022 }
a2ceac1f
DC
1023 ASSERT(!blkinfo->forw);
1024 ASSERT(!blkinfo->back);
1025}
1026#else /* !DEBUG */
1027#define xfs_da_blkinfo_onlychild_validate(blkinfo, level)
1028#endif /* !DEBUG */
1029
2bd0ea18 1030/*
dfc130f3 1031 * We have only one entry in the root. Copy the only remaining child of
2bd0ea18
NS
1032 * the old root to block 0 as the new root node.
1033 */
1034STATIC int
88b32f06
DC
1035xfs_da3_root_join(
1036 struct xfs_da_state *state,
1037 struct xfs_da_state_blk *root_blk)
2bd0ea18 1038{
88b32f06
DC
1039 struct xfs_da_intnode *oldroot;
1040 struct xfs_da_args *args;
1041 xfs_dablk_t child;
1042 struct xfs_buf *bp;
1043 struct xfs_da3_icnode_hdr oldroothdr;
1044 struct xfs_da_node_entry *btree;
1045 int error;
ff105f75 1046 struct xfs_inode *dp = state->args->dp;
2bd0ea18 1047
a2ceac1f
DC
1048 trace_xfs_da_root_join(state->args);
1049
2bd0ea18 1050 ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
88b32f06
DC
1051
1052 args = state->args;
a2ceac1f 1053 oldroot = root_blk->bp->b_addr;
ff105f75 1054 dp->d_ops->node_hdr_from_disk(&oldroothdr, oldroot);
88b32f06
DC
1055 ASSERT(oldroothdr.forw == 0);
1056 ASSERT(oldroothdr.back == 0);
2bd0ea18
NS
1057
1058 /*
1059 * If the root has more than one child, then don't do anything.
1060 */
88b32f06
DC
1061 if (oldroothdr.count > 1)
1062 return 0;
2bd0ea18
NS
1063
1064 /*
1065 * Read in the (only) child block, then copy those bytes into
1066 * the root block's buffer and free the original child block.
1067 */
ff105f75 1068 btree = dp->d_ops->node_tree_p(oldroot);
88b32f06 1069 child = be32_to_cpu(btree[0].before);
2bd0ea18 1070 ASSERT(child != 0);
ff105f75 1071 error = xfs_da3_node_read(args->trans, dp, child, -1, &bp,
2bd0ea18
NS
1072 args->whichfork);
1073 if (error)
88b32f06
DC
1074 return error;
1075 xfs_da_blkinfo_onlychild_validate(bp->b_addr, oldroothdr.level);
a2ceac1f
DC
1076
1077 /*
1078 * This could be copying a leaf back into the root block in the case of
1079 * there only being a single leaf block left in the tree. Hence we have
1080 * to update the b_ops pointer as well to match the buffer type change
88b32f06
DC
1081 * that could occur. For dir3 blocks we also need to update the block
1082 * number in the buffer header.
a2ceac1f 1083 */
ff105f75 1084 memcpy(root_blk->bp->b_addr, bp->b_addr, args->geo->blksize);
a2ceac1f 1085 root_blk->bp->b_ops = bp->b_ops;
8b4dc4a9 1086 xfs_trans_buf_copy_type(root_blk->bp, bp);
88b32f06
DC
1087 if (oldroothdr.magic == XFS_DA3_NODE_MAGIC) {
1088 struct xfs_da3_blkinfo *da3 = root_blk->bp->b_addr;
1089 da3->blkno = cpu_to_be64(root_blk->bp->b_bn);
1090 }
ff105f75
DC
1091 xfs_trans_log_buf(args->trans, root_blk->bp, 0,
1092 args->geo->blksize - 1);
2bd0ea18 1093 error = xfs_da_shrink_inode(args, child, bp);
af43ca9f 1094 return error;
2bd0ea18
NS
1095}
1096
1097/*
1098 * Check a node block and its neighbors to see if the block should be
1099 * collapsed into one or the other neighbor. Always keep the block
1100 * with the smaller block number.
1101 * If the current block is over 50% full, don't try to join it, return 0.
1102 * If the block is empty, fill in the state structure and return 2.
1103 * If it can be collapsed, fill in the state structure and return 1.
1104 * If nothing can be done, return 0.
1105 */
1106STATIC int
88b32f06
DC
1107xfs_da3_node_toosmall(
1108 struct xfs_da_state *state,
1109 int *action)
2bd0ea18 1110{
88b32f06
DC
1111 struct xfs_da_intnode *node;
1112 struct xfs_da_state_blk *blk;
1113 struct xfs_da_blkinfo *info;
1114 xfs_dablk_t blkno;
1115 struct xfs_buf *bp;
1116 struct xfs_da3_icnode_hdr nodehdr;
1117 int count;
1118 int forward;
1119 int error;
1120 int retval;
1121 int i;
ff105f75 1122 struct xfs_inode *dp = state->args->dp;
a2ceac1f
DC
1123
1124 trace_xfs_da_node_toosmall(state->args);
2bd0ea18
NS
1125
1126 /*
1127 * Check for the degenerate case of the block being over 50% full.
1128 * If so, it's not worth even looking to see if we might be able
1129 * to coalesce with a sibling.
1130 */
1131 blk = &state->path.blk[ state->path.active-1 ];
a2ceac1f 1132 info = blk->bp->b_addr;
2bd0ea18 1133 node = (xfs_da_intnode_t *)info;
ff105f75
DC
1134 dp->d_ops->node_hdr_from_disk(&nodehdr, node);
1135 if (nodehdr.count > (state->args->geo->node_ents >> 1)) {
4ca431fc 1136 *action = 0; /* blk over 50%, don't try to join */
af43ca9f 1137 return 0; /* blk over 50%, don't try to join */
2bd0ea18
NS
1138 }
1139
1140 /*
1141 * Check for the degenerate case of the block being empty.
1142 * If the block is empty, we'll simply delete it, no need to
5e656dbb 1143 * coalesce it with a sibling block. We choose (arbitrarily)
2bd0ea18
NS
1144 * to merge with the forward block unless it is NULL.
1145 */
88b32f06 1146 if (nodehdr.count == 0) {
2bd0ea18
NS
1147 /*
1148 * Make altpath point to the block we want to keep and
1149 * path point to the block we want to drop (this one).
1150 */
5e656dbb 1151 forward = (info->forw != 0);
32181a02 1152 memcpy(&state->altpath, &state->path, sizeof(state->path));
88b32f06 1153 error = xfs_da3_path_shift(state, &state->altpath, forward,
2bd0ea18
NS
1154 0, &retval);
1155 if (error)
af43ca9f 1156 return error;
2bd0ea18
NS
1157 if (retval) {
1158 *action = 0;
1159 } else {
1160 *action = 2;
1161 }
af43ca9f 1162 return 0;
2bd0ea18
NS
1163 }
1164
1165 /*
1166 * Examine each sibling block to see if we can coalesce with
1167 * at least 25% free space to spare. We need to figure out
1168 * whether to merge with the forward or the backward block.
1169 * We prefer coalescing with the lower numbered sibling so as
1170 * to shrink a directory over time.
1171 */
ff105f75
DC
1172 count = state->args->geo->node_ents;
1173 count -= state->args->geo->node_ents >> 2;
88b32f06
DC
1174 count -= nodehdr.count;
1175
2bd0ea18 1176 /* start with smaller blk num */
88b32f06 1177 forward = nodehdr.forw < nodehdr.back;
2bd0ea18 1178 for (i = 0; i < 2; forward = !forward, i++) {
c9522f4d 1179 struct xfs_da3_icnode_hdr thdr;
2bd0ea18 1180 if (forward)
88b32f06 1181 blkno = nodehdr.forw;
2bd0ea18 1182 else
88b32f06 1183 blkno = nodehdr.back;
2bd0ea18
NS
1184 if (blkno == 0)
1185 continue;
ff105f75 1186 error = xfs_da3_node_read(state->args->trans, dp,
2bd0ea18
NS
1187 blkno, -1, &bp, state->args->whichfork);
1188 if (error)
af43ca9f 1189 return error;
2bd0ea18 1190
a2ceac1f 1191 node = bp->b_addr;
ff105f75 1192 dp->d_ops->node_hdr_from_disk(&thdr, node);
a2ceac1f 1193 xfs_trans_brelse(state->args->trans, bp);
88b32f06 1194
c9522f4d 1195 if (count - thdr.count >= 0)
2bd0ea18
NS
1196 break; /* fits with at least 25% to spare */
1197 }
1198 if (i >= 2) {
1199 *action = 0;
88b32f06 1200 return 0;
2bd0ea18
NS
1201 }
1202
1203 /*
1204 * Make altpath point to the block we want to keep (the lower
1205 * numbered block) and path point to the block we want to drop.
1206 */
32181a02 1207 memcpy(&state->altpath, &state->path, sizeof(state->path));
2bd0ea18 1208 if (blkno < blk->blkno) {
88b32f06 1209 error = xfs_da3_path_shift(state, &state->altpath, forward,
2bd0ea18 1210 0, &retval);
2bd0ea18 1211 } else {
88b32f06 1212 error = xfs_da3_path_shift(state, &state->path, forward,
2bd0ea18 1213 0, &retval);
88b32f06
DC
1214 }
1215 if (error)
1216 return error;
1217 if (retval) {
1218 *action = 0;
1219 return 0;
2bd0ea18
NS
1220 }
1221 *action = 1;
88b32f06
DC
1222 return 0;
1223}
1224
1225/*
1226 * Pick up the last hashvalue from an intermediate node.
1227 */
1228STATIC uint
1229xfs_da3_node_lasthash(
ff105f75 1230 struct xfs_inode *dp,
88b32f06
DC
1231 struct xfs_buf *bp,
1232 int *count)
1233{
1234 struct xfs_da_intnode *node;
1235 struct xfs_da_node_entry *btree;
1236 struct xfs_da3_icnode_hdr nodehdr;
1237
1238 node = bp->b_addr;
ff105f75 1239 dp->d_ops->node_hdr_from_disk(&nodehdr, node);
88b32f06
DC
1240 if (count)
1241 *count = nodehdr.count;
1242 if (!nodehdr.count)
1243 return 0;
ff105f75 1244 btree = dp->d_ops->node_tree_p(node);
88b32f06 1245 return be32_to_cpu(btree[nodehdr.count - 1].hashval);
2bd0ea18
NS
1246}
1247
2bd0ea18
NS
1248/*
1249 * Walk back up the tree adjusting hash values as necessary,
1250 * when we stop making changes, return.
1251 */
1252void
88b32f06
DC
1253xfs_da3_fixhashpath(
1254 struct xfs_da_state *state,
1255 struct xfs_da_state_path *path)
2bd0ea18 1256{
88b32f06
DC
1257 struct xfs_da_state_blk *blk;
1258 struct xfs_da_intnode *node;
1259 struct xfs_da_node_entry *btree;
1260 xfs_dahash_t lasthash=0;
1261 int level;
1262 int count;
ff105f75 1263 struct xfs_inode *dp = state->args->dp;
2bd0ea18 1264
a2ceac1f
DC
1265 trace_xfs_da_fixhashpath(state->args);
1266
2bd0ea18
NS
1267 level = path->active-1;
1268 blk = &path->blk[ level ];
1269 switch (blk->magic) {
2bd0ea18
NS
1270 case XFS_ATTR_LEAF_MAGIC:
1271 lasthash = xfs_attr_leaf_lasthash(blk->bp, &count);
1272 if (count == 0)
1273 return;
1274 break;
2bd0ea18 1275 case XFS_DIR2_LEAFN_MAGIC:
ff105f75 1276 lasthash = xfs_dir2_leafn_lasthash(dp, blk->bp, &count);
2bd0ea18
NS
1277 if (count == 0)
1278 return;
1279 break;
1280 case XFS_DA_NODE_MAGIC:
ff105f75 1281 lasthash = xfs_da3_node_lasthash(dp, blk->bp, &count);
2bd0ea18
NS
1282 if (count == 0)
1283 return;
1284 break;
1285 }
1286 for (blk--, level--; level >= 0; blk--, level--) {
88b32f06
DC
1287 struct xfs_da3_icnode_hdr nodehdr;
1288
a2ceac1f 1289 node = blk->bp->b_addr;
ff105f75
DC
1290 dp->d_ops->node_hdr_from_disk(&nodehdr, node);
1291 btree = dp->d_ops->node_tree_p(node);
0a3717c9 1292 if (be32_to_cpu(btree[blk->index].hashval) == lasthash)
2bd0ea18
NS
1293 break;
1294 blk->hashval = lasthash;
88b32f06 1295 btree[blk->index].hashval = cpu_to_be32(lasthash);
a2ceac1f 1296 xfs_trans_log_buf(state->args->trans, blk->bp,
88b32f06
DC
1297 XFS_DA_LOGRANGE(node, &btree[blk->index],
1298 sizeof(*btree)));
2bd0ea18 1299
88b32f06 1300 lasthash = be32_to_cpu(btree[nodehdr.count - 1].hashval);
2bd0ea18
NS
1301 }
1302}
1303
2bd0ea18
NS
1304/*
1305 * Remove an entry from an intermediate node.
1306 */
1307STATIC void
88b32f06
DC
1308xfs_da3_node_remove(
1309 struct xfs_da_state *state,
1310 struct xfs_da_state_blk *drop_blk)
2bd0ea18 1311{
88b32f06
DC
1312 struct xfs_da_intnode *node;
1313 struct xfs_da3_icnode_hdr nodehdr;
1314 struct xfs_da_node_entry *btree;
1315 int index;
1316 int tmp;
ff105f75 1317 struct xfs_inode *dp = state->args->dp;
2bd0ea18 1318
a2ceac1f
DC
1319 trace_xfs_da_node_remove(state->args);
1320
1321 node = drop_blk->bp->b_addr;
ff105f75 1322 dp->d_ops->node_hdr_from_disk(&nodehdr, node);
88b32f06 1323 ASSERT(drop_blk->index < nodehdr.count);
2bd0ea18
NS
1324 ASSERT(drop_blk->index >= 0);
1325
1326 /*
1327 * Copy over the offending entry, or just zero it out.
1328 */
88b32f06 1329 index = drop_blk->index;
ff105f75 1330 btree = dp->d_ops->node_tree_p(node);
88b32f06
DC
1331 if (index < nodehdr.count - 1) {
1332 tmp = nodehdr.count - index - 1;
2bd0ea18 1333 tmp *= (uint)sizeof(xfs_da_node_entry_t);
88b32f06 1334 memmove(&btree[index], &btree[index + 1], tmp);
a2ceac1f 1335 xfs_trans_log_buf(state->args->trans, drop_blk->bp,
88b32f06
DC
1336 XFS_DA_LOGRANGE(node, &btree[index], tmp));
1337 index = nodehdr.count - 1;
2bd0ea18 1338 }
88b32f06 1339 memset(&btree[index], 0, sizeof(xfs_da_node_entry_t));
a2ceac1f 1340 xfs_trans_log_buf(state->args->trans, drop_blk->bp,
88b32f06
DC
1341 XFS_DA_LOGRANGE(node, &btree[index], sizeof(btree[index])));
1342 nodehdr.count -= 1;
ff105f75 1343 dp->d_ops->node_hdr_to_disk(node, &nodehdr);
a2ceac1f 1344 xfs_trans_log_buf(state->args->trans, drop_blk->bp,
ff105f75 1345 XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size));
2bd0ea18
NS
1346
1347 /*
1348 * Copy the last hash value from the block to propagate upwards.
1349 */
88b32f06 1350 drop_blk->hashval = be32_to_cpu(btree[index - 1].hashval);
2bd0ea18
NS
1351}
1352
1353/*
88b32f06 1354 * Unbalance the elements between two intermediate nodes,
2bd0ea18
NS
1355 * move all Btree elements from one node into another.
1356 */
1357STATIC void
88b32f06
DC
1358xfs_da3_node_unbalance(
1359 struct xfs_da_state *state,
1360 struct xfs_da_state_blk *drop_blk,
1361 struct xfs_da_state_blk *save_blk)
2bd0ea18 1362{
88b32f06
DC
1363 struct xfs_da_intnode *drop_node;
1364 struct xfs_da_intnode *save_node;
7b02556d
DC
1365 struct xfs_da_node_entry *drop_btree;
1366 struct xfs_da_node_entry *save_btree;
1367 struct xfs_da3_icnode_hdr drop_hdr;
1368 struct xfs_da3_icnode_hdr save_hdr;
88b32f06
DC
1369 struct xfs_trans *tp;
1370 int sindex;
1371 int tmp;
ff105f75 1372 struct xfs_inode *dp = state->args->dp;
2bd0ea18 1373
a2ceac1f
DC
1374 trace_xfs_da_node_unbalance(state->args);
1375
1376 drop_node = drop_blk->bp->b_addr;
1377 save_node = save_blk->bp->b_addr;
ff105f75
DC
1378 dp->d_ops->node_hdr_from_disk(&drop_hdr, drop_node);
1379 dp->d_ops->node_hdr_from_disk(&save_hdr, save_node);
1380 drop_btree = dp->d_ops->node_tree_p(drop_node);
1381 save_btree = dp->d_ops->node_tree_p(save_node);
2bd0ea18
NS
1382 tp = state->args->trans;
1383
1384 /*
1385 * If the dying block has lower hashvals, then move all the
1386 * elements in the remaining block up to make a hole.
1387 */
7b02556d
DC
1388 if ((be32_to_cpu(drop_btree[0].hashval) <
1389 be32_to_cpu(save_btree[0].hashval)) ||
1390 (be32_to_cpu(drop_btree[drop_hdr.count - 1].hashval) <
1391 be32_to_cpu(save_btree[save_hdr.count - 1].hashval))) {
88b32f06 1392 /* XXX: check this - is memmove dst correct? */
7b02556d
DC
1393 tmp = save_hdr.count * sizeof(xfs_da_node_entry_t);
1394 memmove(&save_btree[drop_hdr.count], &save_btree[0], tmp);
88b32f06
DC
1395
1396 sindex = 0;
a2ceac1f 1397 xfs_trans_log_buf(tp, save_blk->bp,
7b02556d
DC
1398 XFS_DA_LOGRANGE(save_node, &save_btree[0],
1399 (save_hdr.count + drop_hdr.count) *
88b32f06 1400 sizeof(xfs_da_node_entry_t)));
2bd0ea18 1401 } else {
7b02556d 1402 sindex = save_hdr.count;
a2ceac1f 1403 xfs_trans_log_buf(tp, save_blk->bp,
7b02556d
DC
1404 XFS_DA_LOGRANGE(save_node, &save_btree[sindex],
1405 drop_hdr.count * sizeof(xfs_da_node_entry_t)));
2bd0ea18
NS
1406 }
1407
1408 /*
1409 * Move all the B-tree elements from drop_blk to save_blk.
1410 */
7b02556d
DC
1411 tmp = drop_hdr.count * (uint)sizeof(xfs_da_node_entry_t);
1412 memcpy(&save_btree[sindex], &drop_btree[0], tmp);
1413 save_hdr.count += drop_hdr.count;
2bd0ea18 1414
ff105f75 1415 dp->d_ops->node_hdr_to_disk(save_node, &save_hdr);
a2ceac1f 1416 xfs_trans_log_buf(tp, save_blk->bp,
2bd0ea18 1417 XFS_DA_LOGRANGE(save_node, &save_node->hdr,
ff105f75 1418 dp->d_ops->node_hdr_size));
2bd0ea18
NS
1419
1420 /*
1421 * Save the last hashval in the remaining block for upward propagation.
1422 */
7b02556d 1423 save_blk->hashval = be32_to_cpu(save_btree[save_hdr.count - 1].hashval);
2bd0ea18
NS
1424}
1425
2bd0ea18
NS
1426/*========================================================================
1427 * Routines used for finding things in the Btree.
1428 *========================================================================*/
1429
1430/*
1431 * Walk down the Btree looking for a particular filename, filling
1432 * in the state structure as we go.
1433 *
1434 * We will set the state structure to point to each of the elements
1435 * in each of the nodes where either the hashval is or should be.
1436 *
1437 * We support duplicate hashval's so for each entry in the current
1438 * node that could contain the desired hashval, descend. This is a
1439 * pruned depth-first tree search.
1440 */
1441int /* error */
88b32f06
DC
1442xfs_da3_node_lookup_int(
1443 struct xfs_da_state *state,
1444 int *result)
2bd0ea18 1445{
88b32f06
DC
1446 struct xfs_da_state_blk *blk;
1447 struct xfs_da_blkinfo *curr;
1448 struct xfs_da_intnode *node;
1449 struct xfs_da_node_entry *btree;
1450 struct xfs_da3_icnode_hdr nodehdr;
1451 struct xfs_da_args *args;
1452 xfs_dablk_t blkno;
1453 xfs_dahash_t hashval;
1454 xfs_dahash_t btreehashval;
1455 int probe;
1456 int span;
1457 int max;
1458 int error;
1459 int retval;
ff105f75 1460 struct xfs_inode *dp = state->args->dp;
2bd0ea18
NS
1461
1462 args = state->args;
32a82561 1463
2bd0ea18
NS
1464 /*
1465 * Descend thru the B-tree searching each level for the right
1466 * node to use, until the right hashval is found.
1467 */
ff105f75 1468 blkno = (args->whichfork == XFS_DATA_FORK)? args->geo->leafblk : 0;
2bd0ea18
NS
1469 for (blk = &state->path.blk[0], state->path.active = 1;
1470 state->path.active <= XFS_DA_NODE_MAXDEPTH;
1471 blk++, state->path.active++) {
1472 /*
1473 * Read the next node down in the tree.
1474 */
1475 blk->blkno = blkno;
88b32f06 1476 error = xfs_da3_node_read(args->trans, args->dp, blkno,
32a82561 1477 -1, &blk->bp, args->whichfork);
2bd0ea18
NS
1478 if (error) {
1479 blk->blkno = 0;
1480 state->path.active--;
af43ca9f 1481 return error;
2bd0ea18 1482 }
a2ceac1f 1483 curr = blk->bp->b_addr;
5e656dbb 1484 blk->magic = be16_to_cpu(curr->magic);
88b32f06 1485
a24374f4
DC
1486 if (blk->magic == XFS_ATTR_LEAF_MAGIC ||
1487 blk->magic == XFS_ATTR3_LEAF_MAGIC) {
1488 blk->magic = XFS_ATTR_LEAF_MAGIC;
88b32f06
DC
1489 blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
1490 break;
1491 }
1492
1493 if (blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1494 blk->magic == XFS_DIR3_LEAFN_MAGIC) {
1495 blk->magic = XFS_DIR2_LEAFN_MAGIC;
ff105f75
DC
1496 blk->hashval = xfs_dir2_leafn_lasthash(args->dp,
1497 blk->bp, NULL);
88b32f06
DC
1498 break;
1499 }
1500
1501 blk->magic = XFS_DA_NODE_MAGIC;
1502
2bd0ea18
NS
1503
1504 /*
1505 * Search an intermediate node for a match.
1506 */
88b32f06 1507 node = blk->bp->b_addr;
ff105f75
DC
1508 dp->d_ops->node_hdr_from_disk(&nodehdr, node);
1509 btree = dp->d_ops->node_tree_p(node);
2bd0ea18 1510
88b32f06
DC
1511 max = nodehdr.count;
1512 blk->hashval = be32_to_cpu(btree[max - 1].hashval);
2bd0ea18 1513
88b32f06
DC
1514 /*
1515 * Binary search. (note: small blocks will skip loop)
1516 */
1517 probe = span = max / 2;
1518 hashval = args->hashval;
1519 while (span > 4) {
1520 span /= 2;
1521 btreehashval = be32_to_cpu(btree[probe].hashval);
1522 if (btreehashval < hashval)
1523 probe += span;
1524 else if (btreehashval > hashval)
1525 probe -= span;
1526 else
1527 break;
1528 }
1529 ASSERT((probe >= 0) && (probe < max));
1530 ASSERT((span <= 4) ||
1531 (be32_to_cpu(btree[probe].hashval) == hashval));
2bd0ea18 1532
88b32f06
DC
1533 /*
1534 * Since we may have duplicate hashval's, find the first
1535 * matching hashval in the node.
1536 */
1537 while (probe > 0 &&
1538 be32_to_cpu(btree[probe].hashval) >= hashval) {
1539 probe--;
1540 }
1541 while (probe < max &&
1542 be32_to_cpu(btree[probe].hashval) < hashval) {
1543 probe++;
1544 }
1545
1546 /*
1547 * Pick the right block to descend on.
1548 */
1549 if (probe == max) {
1550 blk->index = max - 1;
1551 blkno = be32_to_cpu(btree[max - 1].before);
1552 } else {
1553 blk->index = probe;
1554 blkno = be32_to_cpu(btree[probe].before);
2bd0ea18
NS
1555 }
1556 }
1557
1558 /*
1559 * A leaf block that ends in the hashval that we are interested in
1560 * (final hashval == search hashval) means that the next block may
1561 * contain more entries with the same hashval, shift upward to the
1562 * next leaf and keep searching.
1563 */
1564 for (;;) {
5e656dbb 1565 if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
32a82561 1566 retval = xfs_dir2_leafn_lookup_int(blk->bp, args,
2bd0ea18 1567 &blk->index, state);
5e656dbb 1568 } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
a24374f4 1569 retval = xfs_attr3_leaf_lookup_int(blk->bp, args);
32a82561
NS
1570 blk->index = args->index;
1571 args->blkno = blk->blkno;
5e656dbb
BN
1572 } else {
1573 ASSERT(0);
12b53197 1574 return -EFSCORRUPTED;
2bd0ea18 1575 }
12b53197 1576 if (((retval == -ENOENT) || (retval == -ENOATTR)) &&
32a82561 1577 (blk->hashval == args->hashval)) {
88b32f06 1578 error = xfs_da3_path_shift(state, &state->path, 1, 1,
2bd0ea18
NS
1579 &retval);
1580 if (error)
af43ca9f 1581 return error;
2bd0ea18
NS
1582 if (retval == 0) {
1583 continue;
5e656dbb 1584 } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
2bd0ea18 1585 /* path_shift() gives ENOENT */
12b53197 1586 retval = -ENOATTR;
2bd0ea18 1587 }
2bd0ea18
NS
1588 }
1589 break;
1590 }
1591 *result = retval;
af43ca9f 1592 return 0;
2bd0ea18
NS
1593}
1594
2bd0ea18
NS
1595/*========================================================================
1596 * Utility routines.
1597 *========================================================================*/
1598
88b32f06
DC
1599/*
1600 * Compare two intermediate nodes for "order".
1601 */
1602STATIC int
1603xfs_da3_node_order(
ff105f75 1604 struct xfs_inode *dp,
88b32f06
DC
1605 struct xfs_buf *node1_bp,
1606 struct xfs_buf *node2_bp)
1607{
1608 struct xfs_da_intnode *node1;
1609 struct xfs_da_intnode *node2;
1610 struct xfs_da_node_entry *btree1;
1611 struct xfs_da_node_entry *btree2;
1612 struct xfs_da3_icnode_hdr node1hdr;
1613 struct xfs_da3_icnode_hdr node2hdr;
1614
1615 node1 = node1_bp->b_addr;
1616 node2 = node2_bp->b_addr;
ff105f75
DC
1617 dp->d_ops->node_hdr_from_disk(&node1hdr, node1);
1618 dp->d_ops->node_hdr_from_disk(&node2hdr, node2);
1619 btree1 = dp->d_ops->node_tree_p(node1);
1620 btree2 = dp->d_ops->node_tree_p(node2);
88b32f06
DC
1621
1622 if (node1hdr.count > 0 && node2hdr.count > 0 &&
1623 ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) ||
1624 (be32_to_cpu(btree2[node2hdr.count - 1].hashval) <
1625 be32_to_cpu(btree1[node1hdr.count - 1].hashval)))) {
1626 return 1;
1627 }
1628 return 0;
1629}
1630
2bd0ea18
NS
1631/*
1632 * Link a new block into a doubly linked list of blocks (of whatever type).
1633 */
1634int /* error */
88b32f06
DC
1635xfs_da3_blk_link(
1636 struct xfs_da_state *state,
1637 struct xfs_da_state_blk *old_blk,
1638 struct xfs_da_state_blk *new_blk)
2bd0ea18 1639{
88b32f06
DC
1640 struct xfs_da_blkinfo *old_info;
1641 struct xfs_da_blkinfo *new_info;
1642 struct xfs_da_blkinfo *tmp_info;
1643 struct xfs_da_args *args;
1644 struct xfs_buf *bp;
1645 int before = 0;
1646 int error;
ff105f75 1647 struct xfs_inode *dp = state->args->dp;
2bd0ea18
NS
1648
1649 /*
1650 * Set up environment.
1651 */
1652 args = state->args;
1653 ASSERT(args != NULL);
a2ceac1f
DC
1654 old_info = old_blk->bp->b_addr;
1655 new_info = new_blk->bp->b_addr;
2bd0ea18 1656 ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC ||
5e656dbb 1657 old_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
2bd0ea18 1658 old_blk->magic == XFS_ATTR_LEAF_MAGIC);
2bd0ea18
NS
1659
1660 switch (old_blk->magic) {
2bd0ea18
NS
1661 case XFS_ATTR_LEAF_MAGIC:
1662 before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp);
1663 break;
2bd0ea18 1664 case XFS_DIR2_LEAFN_MAGIC:
ff105f75 1665 before = xfs_dir2_leafn_order(dp, old_blk->bp, new_blk->bp);
2bd0ea18
NS
1666 break;
1667 case XFS_DA_NODE_MAGIC:
ff105f75 1668 before = xfs_da3_node_order(dp, old_blk->bp, new_blk->bp);
2bd0ea18
NS
1669 break;
1670 }
1671
1672 /*
1673 * Link blocks in appropriate order.
1674 */
1675 if (before) {
1676 /*
1677 * Link new block in before existing block.
1678 */
a2ceac1f 1679 trace_xfs_da_link_before(args);
5e656dbb
BN
1680 new_info->forw = cpu_to_be32(old_blk->blkno);
1681 new_info->back = old_info->back;
1682 if (old_info->back) {
ff105f75 1683 error = xfs_da3_node_read(args->trans, dp,
5e656dbb
BN
1684 be32_to_cpu(old_info->back),
1685 -1, &bp, args->whichfork);
2bd0ea18 1686 if (error)
af43ca9f 1687 return error;
2bd0ea18 1688 ASSERT(bp != NULL);
a2ceac1f 1689 tmp_info = bp->b_addr;
88b32f06 1690 ASSERT(tmp_info->magic == old_info->magic);
5e656dbb
BN
1691 ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno);
1692 tmp_info->forw = cpu_to_be32(new_blk->blkno);
a2ceac1f 1693 xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
2bd0ea18 1694 }
5e656dbb 1695 old_info->back = cpu_to_be32(new_blk->blkno);
2bd0ea18
NS
1696 } else {
1697 /*
1698 * Link new block in after existing block.
1699 */
a2ceac1f 1700 trace_xfs_da_link_after(args);
5e656dbb
BN
1701 new_info->forw = old_info->forw;
1702 new_info->back = cpu_to_be32(old_blk->blkno);
1703 if (old_info->forw) {
ff105f75 1704 error = xfs_da3_node_read(args->trans, dp,
5e656dbb
BN
1705 be32_to_cpu(old_info->forw),
1706 -1, &bp, args->whichfork);
2bd0ea18 1707 if (error)
af43ca9f 1708 return error;
2bd0ea18 1709 ASSERT(bp != NULL);
a2ceac1f 1710 tmp_info = bp->b_addr;
5e656dbb
BN
1711 ASSERT(tmp_info->magic == old_info->magic);
1712 ASSERT(be32_to_cpu(tmp_info->back) == old_blk->blkno);
1713 tmp_info->back = cpu_to_be32(new_blk->blkno);
a2ceac1f 1714 xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
2bd0ea18 1715 }
5e656dbb 1716 old_info->forw = cpu_to_be32(new_blk->blkno);
2bd0ea18
NS
1717 }
1718
a2ceac1f
DC
1719 xfs_trans_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1);
1720 xfs_trans_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1);
af43ca9f 1721 return 0;
2bd0ea18
NS
1722}
1723
2bd0ea18
NS
1724/*
1725 * Unlink a block from a doubly linked list of blocks.
1726 */
5e656dbb 1727STATIC int /* error */
88b32f06
DC
1728xfs_da3_blk_unlink(
1729 struct xfs_da_state *state,
1730 struct xfs_da_state_blk *drop_blk,
1731 struct xfs_da_state_blk *save_blk)
2bd0ea18 1732{
88b32f06
DC
1733 struct xfs_da_blkinfo *drop_info;
1734 struct xfs_da_blkinfo *save_info;
1735 struct xfs_da_blkinfo *tmp_info;
1736 struct xfs_da_args *args;
1737 struct xfs_buf *bp;
1738 int error;
2bd0ea18
NS
1739
1740 /*
1741 * Set up environment.
1742 */
1743 args = state->args;
1744 ASSERT(args != NULL);
a2ceac1f
DC
1745 save_info = save_blk->bp->b_addr;
1746 drop_info = drop_blk->bp->b_addr;
2bd0ea18 1747 ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC ||
5e656dbb 1748 save_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
2bd0ea18 1749 save_blk->magic == XFS_ATTR_LEAF_MAGIC);
2bd0ea18 1750 ASSERT(save_blk->magic == drop_blk->magic);
5e656dbb
BN
1751 ASSERT((be32_to_cpu(save_info->forw) == drop_blk->blkno) ||
1752 (be32_to_cpu(save_info->back) == drop_blk->blkno));
1753 ASSERT((be32_to_cpu(drop_info->forw) == save_blk->blkno) ||
1754 (be32_to_cpu(drop_info->back) == save_blk->blkno));
2bd0ea18
NS
1755
1756 /*
1757 * Unlink the leaf block from the doubly linked chain of leaves.
1758 */
5e656dbb 1759 if (be32_to_cpu(save_info->back) == drop_blk->blkno) {
a2ceac1f 1760 trace_xfs_da_unlink_back(args);
5e656dbb
BN
1761 save_info->back = drop_info->back;
1762 if (drop_info->back) {
88b32f06 1763 error = xfs_da3_node_read(args->trans, args->dp,
5e656dbb
BN
1764 be32_to_cpu(drop_info->back),
1765 -1, &bp, args->whichfork);
2bd0ea18 1766 if (error)
af43ca9f 1767 return error;
2bd0ea18 1768 ASSERT(bp != NULL);
a2ceac1f 1769 tmp_info = bp->b_addr;
5e656dbb
BN
1770 ASSERT(tmp_info->magic == save_info->magic);
1771 ASSERT(be32_to_cpu(tmp_info->forw) == drop_blk->blkno);
1772 tmp_info->forw = cpu_to_be32(save_blk->blkno);
a2ceac1f 1773 xfs_trans_log_buf(args->trans, bp, 0,
2bd0ea18 1774 sizeof(*tmp_info) - 1);
2bd0ea18
NS
1775 }
1776 } else {
a2ceac1f 1777 trace_xfs_da_unlink_forward(args);
5e656dbb
BN
1778 save_info->forw = drop_info->forw;
1779 if (drop_info->forw) {
88b32f06 1780 error = xfs_da3_node_read(args->trans, args->dp,
5e656dbb
BN
1781 be32_to_cpu(drop_info->forw),
1782 -1, &bp, args->whichfork);
2bd0ea18 1783 if (error)
af43ca9f 1784 return error;
2bd0ea18 1785 ASSERT(bp != NULL);
a2ceac1f 1786 tmp_info = bp->b_addr;
5e656dbb
BN
1787 ASSERT(tmp_info->magic == save_info->magic);
1788 ASSERT(be32_to_cpu(tmp_info->back) == drop_blk->blkno);
1789 tmp_info->back = cpu_to_be32(save_blk->blkno);
a2ceac1f 1790 xfs_trans_log_buf(args->trans, bp, 0,
2bd0ea18 1791 sizeof(*tmp_info) - 1);
2bd0ea18
NS
1792 }
1793 }
1794
a2ceac1f 1795 xfs_trans_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1);
af43ca9f 1796 return 0;
2bd0ea18
NS
1797}
1798
1799/*
1800 * Move a path "forward" or "!forward" one block at the current level.
1801 *
1802 * This routine will adjust a "path" to point to the next block
1803 * "forward" (higher hashvalues) or "!forward" (lower hashvals) in the
1804 * Btree, including updating pointers to the intermediate nodes between
1805 * the new bottom and the root.
1806 */
1807int /* error */
88b32f06
DC
1808xfs_da3_path_shift(
1809 struct xfs_da_state *state,
1810 struct xfs_da_state_path *path,
1811 int forward,
1812 int release,
1813 int *result)
2bd0ea18 1814{
88b32f06
DC
1815 struct xfs_da_state_blk *blk;
1816 struct xfs_da_blkinfo *info;
1817 struct xfs_da_intnode *node;
1818 struct xfs_da_args *args;
1819 struct xfs_da_node_entry *btree;
1820 struct xfs_da3_icnode_hdr nodehdr;
1821 xfs_dablk_t blkno = 0;
1822 int level;
1823 int error;
ff105f75 1824 struct xfs_inode *dp = state->args->dp;
2bd0ea18 1825
a2ceac1f
DC
1826 trace_xfs_da_path_shift(state->args);
1827
2bd0ea18
NS
1828 /*
1829 * Roll up the Btree looking for the first block where our
1830 * current index is not at the edge of the block. Note that
1831 * we skip the bottom layer because we want the sibling block.
1832 */
1833 args = state->args;
1834 ASSERT(args != NULL);
1835 ASSERT(path != NULL);
1836 ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1837 level = (path->active-1) - 1; /* skip bottom layer in path */
1838 for (blk = &path->blk[level]; level >= 0; blk--, level--) {
a2ceac1f 1839 node = blk->bp->b_addr;
ff105f75
DC
1840 dp->d_ops->node_hdr_from_disk(&nodehdr, node);
1841 btree = dp->d_ops->node_tree_p(node);
88b32f06
DC
1842
1843 if (forward && (blk->index < nodehdr.count - 1)) {
2bd0ea18 1844 blk->index++;
88b32f06 1845 blkno = be32_to_cpu(btree[blk->index].before);
2bd0ea18
NS
1846 break;
1847 } else if (!forward && (blk->index > 0)) {
1848 blk->index--;
88b32f06 1849 blkno = be32_to_cpu(btree[blk->index].before);
2bd0ea18
NS
1850 break;
1851 }
1852 }
1853 if (level < 0) {
12b53197 1854 *result = -ENOENT; /* we're out of our tree */
5e656dbb 1855 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
af43ca9f 1856 return 0;
2bd0ea18
NS
1857 }
1858
1859 /*
1860 * Roll down the edge of the subtree until we reach the
1861 * same depth we were at originally.
1862 */
1863 for (blk++, level++; level < path->active; blk++, level++) {
1864 /*
1865 * Release the old block.
1866 * (if it's dirty, trans won't actually let go)
1867 */
1868 if (release)
a2ceac1f 1869 xfs_trans_brelse(args->trans, blk->bp);
2bd0ea18
NS
1870
1871 /*
1872 * Read the next child block.
1873 */
1874 blk->blkno = blkno;
ff105f75 1875 error = xfs_da3_node_read(args->trans, dp, blkno, -1,
a2ceac1f 1876 &blk->bp, args->whichfork);
2bd0ea18 1877 if (error)
af43ca9f 1878 return error;
a2ceac1f
DC
1879 info = blk->bp->b_addr;
1880 ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
88b32f06 1881 info->magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) ||
a2ceac1f 1882 info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
65b80c98 1883 info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) ||
a24374f4
DC
1884 info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) ||
1885 info->magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC));
88b32f06
DC
1886
1887
1888 /*
1889 * Note: we flatten the magic number to a single type so we
1890 * don't have to compare against crc/non-crc types elsewhere.
1891 */
1892 switch (be16_to_cpu(info->magic)) {
1893 case XFS_DA_NODE_MAGIC:
1894 case XFS_DA3_NODE_MAGIC:
1895 blk->magic = XFS_DA_NODE_MAGIC;
2bd0ea18 1896 node = (xfs_da_intnode_t *)info;
ff105f75
DC
1897 dp->d_ops->node_hdr_from_disk(&nodehdr, node);
1898 btree = dp->d_ops->node_tree_p(node);
88b32f06 1899 blk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval);
2bd0ea18
NS
1900 if (forward)
1901 blk->index = 0;
1902 else
88b32f06
DC
1903 blk->index = nodehdr.count - 1;
1904 blkno = be32_to_cpu(btree[blk->index].before);
1905 break;
1906 case XFS_ATTR_LEAF_MAGIC:
a24374f4 1907 case XFS_ATTR3_LEAF_MAGIC:
88b32f06 1908 blk->magic = XFS_ATTR_LEAF_MAGIC;
2bd0ea18
NS
1909 ASSERT(level == path->active-1);
1910 blk->index = 0;
ff105f75 1911 blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
88b32f06
DC
1912 break;
1913 case XFS_DIR2_LEAFN_MAGIC:
1914 case XFS_DIR3_LEAFN_MAGIC:
1915 blk->magic = XFS_DIR2_LEAFN_MAGIC;
1916 ASSERT(level == path->active-1);
1917 blk->index = 0;
ff105f75
DC
1918 blk->hashval = xfs_dir2_leafn_lasthash(args->dp,
1919 blk->bp, NULL);
88b32f06
DC
1920 break;
1921 default:
1922 ASSERT(0);
1923 break;
2bd0ea18
NS
1924 }
1925 }
1926 *result = 0;
88b32f06 1927 return 0;
2bd0ea18
NS
1928}
1929
1930
1931/*========================================================================
1932 * Utility routines.
1933 *========================================================================*/
1934
1935/*
1936 * Implement a simple hash on a character string.
1937 * Rotate the hash value by 7 bits, then XOR each character in.
1938 * This is implemented with some source-level loop unrolling.
1939 */
1940xfs_dahash_t
56b2de80 1941xfs_da_hashname(const __uint8_t *name, int namelen)
2bd0ea18
NS
1942{
1943 xfs_dahash_t hash;
1944
2bd0ea18
NS
1945 /*
1946 * Do four characters at a time as long as we can.
1947 */
f9c48a87 1948 for (hash = 0; namelen >= 4; namelen -= 4, name += 4)
2bd0ea18 1949 hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^
f9c48a87
NS
1950 (name[3] << 0) ^ rol32(hash, 7 * 4);
1951
2bd0ea18
NS
1952 /*
1953 * Now do the rest of the characters.
1954 */
1955 switch (namelen) {
1956 case 3:
1957 return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^
f9c48a87 1958 rol32(hash, 7 * 3);
2bd0ea18 1959 case 2:
f9c48a87 1960 return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2);
2bd0ea18 1961 case 1:
f9c48a87
NS
1962 return (name[0] << 0) ^ rol32(hash, 7 * 1);
1963 default: /* case 0: */
2bd0ea18
NS
1964 return hash;
1965 }
2bd0ea18
NS
1966}
1967
51ca7008 1968enum xfs_dacmp
5e656dbb
BN
1969xfs_da_compname(
1970 struct xfs_da_args *args,
56b2de80
DC
1971 const unsigned char *name,
1972 int len)
51ca7008 1973{
5e656dbb 1974 return (args->namelen == len && memcmp(args->name, name, len) == 0) ?
51ca7008
BN
1975 XFS_CMP_EXACT : XFS_CMP_DIFFERENT;
1976}
1977
5e656dbb
BN
1978static xfs_dahash_t
1979xfs_default_hashname(
1980 struct xfs_name *name)
1981{
1982 return xfs_da_hashname(name->name, name->len);
1983}
1984
51ca7008 1985const struct xfs_nameops xfs_default_nameops = {
5e656dbb 1986 .hashname = xfs_default_hashname,
51ca7008
BN
1987 .compname = xfs_da_compname
1988};
1989
2bd0ea18 1990int
a2ceac1f
DC
1991xfs_da_grow_inode_int(
1992 struct xfs_da_args *args,
1993 xfs_fileoff_t *bno,
1994 int count)
2bd0ea18 1995{
a2ceac1f
DC
1996 struct xfs_trans *tp = args->trans;
1997 struct xfs_inode *dp = args->dp;
1998 int w = args->whichfork;
5a35bf2c 1999 xfs_rfsblock_t nblks = dp->i_d.di_nblocks;
a2ceac1f
DC
2000 struct xfs_bmbt_irec map, *mapp;
2001 int nmap, error, got, i, mapi;
56b2de80 2002
2bd0ea18
NS
2003 /*
2004 * Find a spot in the file space to put the new block.
2005 */
a2ceac1f
DC
2006 error = xfs_bmap_first_unused(tp, dp, count, bno, w);
2007 if (error)
2bd0ea18 2008 return error;
a2ceac1f 2009
2bd0ea18
NS
2010 /*
2011 * Try mapping it in one filesystem block.
2012 */
2013 nmap = 1;
2014 ASSERT(args->firstblock != NULL);
a2ceac1f
DC
2015 error = xfs_bmapi_write(tp, dp, *bno, count,
2016 xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
2bd0ea18 2017 args->firstblock, args->total, &map, &nmap,
a2ceac1f
DC
2018 args->flist);
2019 if (error)
2bd0ea18 2020 return error;
a2ceac1f 2021
2bd0ea18
NS
2022 ASSERT(nmap <= 1);
2023 if (nmap == 1) {
2024 mapp = &map;
2025 mapi = 1;
a2ceac1f
DC
2026 } else if (nmap == 0 && count > 1) {
2027 xfs_fileoff_t b;
2028 int c;
2029
2030 /*
2031 * If we didn't get it and the block might work if fragmented,
2032 * try without the CONTIG flag. Loop until we get it all.
2033 */
2bd0ea18 2034 mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP);
a2ceac1f 2035 for (b = *bno, mapi = 0; b < *bno + count; ) {
2bd0ea18 2036 nmap = MIN(XFS_BMAP_MAX_NMAP, count);
a2ceac1f
DC
2037 c = (int)(*bno + count - b);
2038 error = xfs_bmapi_write(tp, dp, b, c,
2039 xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
2bd0ea18 2040 args->firstblock, args->total,
a2ceac1f
DC
2041 &mapp[mapi], &nmap, args->flist);
2042 if (error)
2043 goto out_free_map;
2bd0ea18
NS
2044 if (nmap < 1)
2045 break;
2046 mapi += nmap;
2047 b = mapp[mapi - 1].br_startoff +
2048 mapp[mapi - 1].br_blockcount;
2049 }
2050 } else {
2bd0ea18
NS
2051 mapi = 0;
2052 mapp = NULL;
2053 }
a2ceac1f 2054
2bd0ea18
NS
2055 /*
2056 * Count the blocks we got, make sure it matches the total.
2057 */
2058 for (i = 0, got = 0; i < mapi; i++)
2059 got += mapp[i].br_blockcount;
a2ceac1f 2060 if (got != count || mapp[0].br_startoff != *bno ||
2bd0ea18 2061 mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
a2ceac1f 2062 *bno + count) {
12b53197 2063 error = -ENOSPC;
a2ceac1f 2064 goto out_free_map;
2bd0ea18 2065 }
a2ceac1f 2066
56b2de80
DC
2067 /* account for newly allocated blocks in reserved blocks total */
2068 args->total -= dp->i_d.di_nblocks - nblks;
a2ceac1f
DC
2069
2070out_free_map:
2071 if (mapp != &map)
2072 kmem_free(mapp);
2073 return error;
2074}
2075
2076/*
2077 * Add a block to the btree ahead of the file.
2078 * Return the new block number to the caller.
2079 */
2080int
2081xfs_da_grow_inode(
2082 struct xfs_da_args *args,
2083 xfs_dablk_t *new_blkno)
2084{
2085 xfs_fileoff_t bno;
a2ceac1f
DC
2086 int error;
2087
2088 trace_xfs_da_grow_inode(args);
2089
ff105f75
DC
2090 bno = args->geo->leafblk;
2091 error = xfs_da_grow_inode_int(args, &bno, args->geo->fsbcount);
a2ceac1f
DC
2092 if (!error)
2093 *new_blkno = (xfs_dablk_t)bno;
2094 return error;
2bd0ea18
NS
2095}
2096
2bd0ea18 2097/*
dfc130f3 2098 * Ick. We need to always be able to remove a btree block, even
2bd0ea18
NS
2099 * if there's no space reservation because the filesystem is full.
2100 * This is called if xfs_bunmapi on a btree block fails due to ENOSPC.
2101 * It swaps the target block with the last block in the file. The
2102 * last block in the file can always be removed since it can't cause
2103 * a bmap btree split to do that.
2104 */
2105STATIC int
88b32f06
DC
2106xfs_da3_swap_lastblock(
2107 struct xfs_da_args *args,
2108 xfs_dablk_t *dead_blknop,
2109 struct xfs_buf **dead_bufp)
2bd0ea18 2110{
88b32f06
DC
2111 struct xfs_da_blkinfo *dead_info;
2112 struct xfs_da_blkinfo *sib_info;
2113 struct xfs_da_intnode *par_node;
2114 struct xfs_da_intnode *dead_node;
2115 struct xfs_dir2_leaf *dead_leaf2;
2116 struct xfs_da_node_entry *btree;
2117 struct xfs_da3_icnode_hdr par_hdr;
ff105f75 2118 struct xfs_inode *dp;
88b32f06
DC
2119 struct xfs_trans *tp;
2120 struct xfs_mount *mp;
2121 struct xfs_buf *dead_buf;
2122 struct xfs_buf *last_buf;
2123 struct xfs_buf *sib_buf;
2124 struct xfs_buf *par_buf;
2125 xfs_dahash_t dead_hash;
2126 xfs_fileoff_t lastoff;
2127 xfs_dablk_t dead_blkno;
2128 xfs_dablk_t last_blkno;
2129 xfs_dablk_t sib_blkno;
2130 xfs_dablk_t par_blkno;
2131 int error;
2132 int w;
2133 int entno;
2134 int level;
2135 int dead_level;
2bd0ea18 2136
a2ceac1f
DC
2137 trace_xfs_da_swap_lastblock(args);
2138
2bd0ea18
NS
2139 dead_buf = *dead_bufp;
2140 dead_blkno = *dead_blknop;
2141 tp = args->trans;
ff105f75 2142 dp = args->dp;
2bd0ea18
NS
2143 w = args->whichfork;
2144 ASSERT(w == XFS_DATA_FORK);
ff105f75
DC
2145 mp = dp->i_mount;
2146 lastoff = args->geo->freeblk;
2147 error = xfs_bmap_last_before(tp, dp, &lastoff, w);
2bd0ea18
NS
2148 if (error)
2149 return error;
4ca431fc
NS
2150 if (unlikely(lastoff == 0)) {
2151 XFS_ERROR_REPORT("xfs_da_swap_lastblock(1)", XFS_ERRLEVEL_LOW,
2152 mp);
12b53197 2153 return -EFSCORRUPTED;
4ca431fc 2154 }
2bd0ea18
NS
2155 /*
2156 * Read the last block in the btree space.
2157 */
ff105f75
DC
2158 last_blkno = (xfs_dablk_t)lastoff - args->geo->fsbcount;
2159 error = xfs_da3_node_read(tp, dp, last_blkno, -1, &last_buf, w);
a2ceac1f 2160 if (error)
2bd0ea18
NS
2161 return error;
2162 /*
2163 * Copy the last block into the dead buffer and log it.
2164 */
ff105f75
DC
2165 memcpy(dead_buf->b_addr, last_buf->b_addr, args->geo->blksize);
2166 xfs_trans_log_buf(tp, dead_buf, 0, args->geo->blksize - 1);
a2ceac1f 2167 dead_info = dead_buf->b_addr;
2bd0ea18
NS
2168 /*
2169 * Get values from the moved block.
2170 */
65b80c98
DC
2171 if (dead_info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
2172 dead_info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) {
2173 struct xfs_dir3_icleaf_hdr leafhdr;
2174 struct xfs_dir2_leaf_entry *ents;
2175
2bd0ea18 2176 dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
ff105f75
DC
2177 dp->d_ops->leaf_hdr_from_disk(&leafhdr, dead_leaf2);
2178 ents = dp->d_ops->leaf_ents_p(dead_leaf2);
2bd0ea18 2179 dead_level = 0;
65b80c98 2180 dead_hash = be32_to_cpu(ents[leafhdr.count - 1].hashval);
2bd0ea18 2181 } else {
88b32f06
DC
2182 struct xfs_da3_icnode_hdr deadhdr;
2183
2bd0ea18 2184 dead_node = (xfs_da_intnode_t *)dead_info;
ff105f75
DC
2185 dp->d_ops->node_hdr_from_disk(&deadhdr, dead_node);
2186 btree = dp->d_ops->node_tree_p(dead_node);
88b32f06
DC
2187 dead_level = deadhdr.level;
2188 dead_hash = be32_to_cpu(btree[deadhdr.count - 1].hashval);
2bd0ea18
NS
2189 }
2190 sib_buf = par_buf = NULL;
2191 /*
2192 * If the moved block has a left sibling, fix up the pointers.
2193 */
5e656dbb 2194 if ((sib_blkno = be32_to_cpu(dead_info->back))) {
ff105f75 2195 error = xfs_da3_node_read(tp, dp, sib_blkno, -1, &sib_buf, w);
a2ceac1f 2196 if (error)
2bd0ea18 2197 goto done;
a2ceac1f 2198 sib_info = sib_buf->b_addr;
4ca431fc 2199 if (unlikely(
5e656dbb
BN
2200 be32_to_cpu(sib_info->forw) != last_blkno ||
2201 sib_info->magic != dead_info->magic)) {
4ca431fc
NS
2202 XFS_ERROR_REPORT("xfs_da_swap_lastblock(2)",
2203 XFS_ERRLEVEL_LOW, mp);
12b53197 2204 error = -EFSCORRUPTED;
2bd0ea18
NS
2205 goto done;
2206 }
5e656dbb 2207 sib_info->forw = cpu_to_be32(dead_blkno);
a2ceac1f 2208 xfs_trans_log_buf(tp, sib_buf,
2bd0ea18
NS
2209 XFS_DA_LOGRANGE(sib_info, &sib_info->forw,
2210 sizeof(sib_info->forw)));
2bd0ea18
NS
2211 sib_buf = NULL;
2212 }
2213 /*
2214 * If the moved block has a right sibling, fix up the pointers.
2215 */
5e656dbb 2216 if ((sib_blkno = be32_to_cpu(dead_info->forw))) {
ff105f75 2217 error = xfs_da3_node_read(tp, dp, sib_blkno, -1, &sib_buf, w);
a2ceac1f 2218 if (error)
2bd0ea18 2219 goto done;
a2ceac1f 2220 sib_info = sib_buf->b_addr;
4ca431fc 2221 if (unlikely(
5e656dbb
BN
2222 be32_to_cpu(sib_info->back) != last_blkno ||
2223 sib_info->magic != dead_info->magic)) {
4ca431fc
NS
2224 XFS_ERROR_REPORT("xfs_da_swap_lastblock(3)",
2225 XFS_ERRLEVEL_LOW, mp);
12b53197 2226 error = -EFSCORRUPTED;
2bd0ea18
NS
2227 goto done;
2228 }
5e656dbb 2229 sib_info->back = cpu_to_be32(dead_blkno);
a2ceac1f 2230 xfs_trans_log_buf(tp, sib_buf,
2bd0ea18
NS
2231 XFS_DA_LOGRANGE(sib_info, &sib_info->back,
2232 sizeof(sib_info->back)));
2bd0ea18
NS
2233 sib_buf = NULL;
2234 }
ff105f75 2235 par_blkno = args->geo->leafblk;
2bd0ea18
NS
2236 level = -1;
2237 /*
2238 * Walk down the tree looking for the parent of the moved block.
2239 */
2240 for (;;) {
ff105f75 2241 error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w);
a2ceac1f 2242 if (error)
2bd0ea18 2243 goto done;
a2ceac1f 2244 par_node = par_buf->b_addr;
ff105f75 2245 dp->d_ops->node_hdr_from_disk(&par_hdr, par_node);
88b32f06 2246 if (level >= 0 && level != par_hdr.level + 1) {
4ca431fc
NS
2247 XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)",
2248 XFS_ERRLEVEL_LOW, mp);
12b53197 2249 error = -EFSCORRUPTED;
2bd0ea18
NS
2250 goto done;
2251 }
88b32f06 2252 level = par_hdr.level;
ff105f75 2253 btree = dp->d_ops->node_tree_p(par_node);
2bd0ea18 2254 for (entno = 0;
88b32f06
DC
2255 entno < par_hdr.count &&
2256 be32_to_cpu(btree[entno].hashval) < dead_hash;
2bd0ea18
NS
2257 entno++)
2258 continue;
88b32f06 2259 if (entno == par_hdr.count) {
4ca431fc
NS
2260 XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)",
2261 XFS_ERRLEVEL_LOW, mp);
12b53197 2262 error = -EFSCORRUPTED;
2bd0ea18
NS
2263 goto done;
2264 }
88b32f06 2265 par_blkno = be32_to_cpu(btree[entno].before);
2bd0ea18
NS
2266 if (level == dead_level + 1)
2267 break;
a2ceac1f 2268 xfs_trans_brelse(tp, par_buf);
2bd0ea18
NS
2269 par_buf = NULL;
2270 }
2271 /*
2272 * We're in the right parent block.
2273 * Look for the right entry.
2274 */
2275 for (;;) {
2276 for (;
88b32f06
DC
2277 entno < par_hdr.count &&
2278 be32_to_cpu(btree[entno].before) != last_blkno;
2bd0ea18
NS
2279 entno++)
2280 continue;
88b32f06 2281 if (entno < par_hdr.count)
2bd0ea18 2282 break;
88b32f06 2283 par_blkno = par_hdr.forw;
a2ceac1f 2284 xfs_trans_brelse(tp, par_buf);
2bd0ea18 2285 par_buf = NULL;
4ca431fc
NS
2286 if (unlikely(par_blkno == 0)) {
2287 XFS_ERROR_REPORT("xfs_da_swap_lastblock(6)",
2288 XFS_ERRLEVEL_LOW, mp);
12b53197 2289 error = -EFSCORRUPTED;
2bd0ea18
NS
2290 goto done;
2291 }
ff105f75 2292 error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w);
a2ceac1f 2293 if (error)
2bd0ea18 2294 goto done;
a2ceac1f 2295 par_node = par_buf->b_addr;
ff105f75 2296 dp->d_ops->node_hdr_from_disk(&par_hdr, par_node);
88b32f06 2297 if (par_hdr.level != level) {
4ca431fc
NS
2298 XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)",
2299 XFS_ERRLEVEL_LOW, mp);
12b53197 2300 error = -EFSCORRUPTED;
2bd0ea18
NS
2301 goto done;
2302 }
ff105f75 2303 btree = dp->d_ops->node_tree_p(par_node);
2bd0ea18
NS
2304 entno = 0;
2305 }
2306 /*
2307 * Update the parent entry pointing to the moved block.
2308 */
88b32f06 2309 btree[entno].before = cpu_to_be32(dead_blkno);
a2ceac1f 2310 xfs_trans_log_buf(tp, par_buf,
88b32f06
DC
2311 XFS_DA_LOGRANGE(par_node, &btree[entno].before,
2312 sizeof(btree[entno].before)));
2bd0ea18
NS
2313 *dead_blknop = last_blkno;
2314 *dead_bufp = last_buf;
2315 return 0;
2316done:
2317 if (par_buf)
a2ceac1f 2318 xfs_trans_brelse(tp, par_buf);
2bd0ea18 2319 if (sib_buf)
a2ceac1f
DC
2320 xfs_trans_brelse(tp, sib_buf);
2321 xfs_trans_brelse(tp, last_buf);
2bd0ea18
NS
2322 return error;
2323}
2324
2325/*
2326 * Remove a btree block from a directory or attribute.
2327 */
2328int
a2ceac1f
DC
2329xfs_da_shrink_inode(
2330 xfs_da_args_t *args,
2331 xfs_dablk_t dead_blkno,
2332 struct xfs_buf *dead_buf)
2bd0ea18
NS
2333{
2334 xfs_inode_t *dp;
2335 int done, error, w, count;
2bd0ea18 2336 xfs_trans_t *tp;
2bd0ea18 2337
a2ceac1f
DC
2338 trace_xfs_da_shrink_inode(args);
2339
2bd0ea18
NS
2340 dp = args->dp;
2341 w = args->whichfork;
2342 tp = args->trans;
ff105f75 2343 count = args->geo->fsbcount;
2bd0ea18
NS
2344 for (;;) {
2345 /*
2346 * Remove extents. If we get ENOSPC for a dir we have to move
2347 * the last block to the place we want to kill.
2348 */
88b32f06
DC
2349 error = xfs_bunmapi(tp, dp, dead_blkno, count,
2350 xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
2351 0, args->firstblock, args->flist, &done);
12b53197 2352 if (error == -ENOSPC) {
2bd0ea18 2353 if (w != XFS_DATA_FORK)
5e656dbb 2354 break;
88b32f06
DC
2355 error = xfs_da3_swap_lastblock(args, &dead_blkno,
2356 &dead_buf);
2357 if (error)
5e656dbb
BN
2358 break;
2359 } else {
2bd0ea18 2360 break;
2bd0ea18
NS
2361 }
2362 }
a2ceac1f 2363 xfs_trans_binval(tp, dead_buf);
2bd0ea18
NS
2364 return error;
2365}
2366
2367/*
2368 * See if the mapping(s) for this btree block are valid, i.e.
2369 * don't contain holes, are logically contiguous, and cover the whole range.
2370 */
2371STATIC int
2372xfs_da_map_covers_blocks(
2373 int nmap,
dfc130f3 2374 xfs_bmbt_irec_t *mapp,
2bd0ea18
NS
2375 xfs_dablk_t bno,
2376 int count)
2377{
2378 int i;
2379 xfs_fileoff_t off;
2380
2381 for (i = 0, off = bno; i < nmap; i++) {
2382 if (mapp[i].br_startblock == HOLESTARTBLOCK ||
2383 mapp[i].br_startblock == DELAYSTARTBLOCK) {
2bd0ea18
NS
2384 return 0;
2385 }
2386 if (off != mapp[i].br_startoff) {
2bd0ea18
NS
2387 return 0;
2388 }
2389 off += mapp[i].br_blockcount;
2390 }
2391 return off == bno + count;
2392}
2393
2394/*
a2ceac1f
DC
2395 * Convert a struct xfs_bmbt_irec to a struct xfs_buf_map.
2396 *
2397 * For the single map case, it is assumed that the caller has provided a pointer
2398 * to a valid xfs_buf_map. For the multiple map case, this function will
2399 * allocate the xfs_buf_map to hold all the maps and replace the caller's single
2400 * map pointer with the allocated map.
2bd0ea18 2401 */
a2ceac1f
DC
2402static int
2403xfs_buf_map_from_irec(
2404 struct xfs_mount *mp,
2405 struct xfs_buf_map **mapp,
6bddecbc 2406 int *nmaps,
a2ceac1f 2407 struct xfs_bmbt_irec *irecs,
6bddecbc 2408 int nirecs)
2bd0ea18 2409{
a2ceac1f
DC
2410 struct xfs_buf_map *map;
2411 int i;
2412
2413 ASSERT(*nmaps == 1);
2414 ASSERT(nirecs >= 1);
2415
2416 if (nirecs > 1) {
7b02556d
DC
2417 map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map),
2418 KM_SLEEP | KM_NOFS);
a2ceac1f 2419 if (!map)
12b53197 2420 return -ENOMEM;
a2ceac1f
DC
2421 *mapp = map;
2422 }
2423
2424 *nmaps = nirecs;
2425 map = *mapp;
2426 for (i = 0; i < *nmaps; i++) {
2427 ASSERT(irecs[i].br_startblock != DELAYSTARTBLOCK &&
2428 irecs[i].br_startblock != HOLESTARTBLOCK);
2429 map[i].bm_bn = XFS_FSB_TO_DADDR(mp, irecs[i].br_startblock);
2430 map[i].bm_len = XFS_FSB_TO_BB(mp, irecs[i].br_blockcount);
2431 }
2432 return 0;
2433}
2434
2435/*
2436 * Map the block we are given ready for reading. There are three possible return
2437 * values:
2438 * -1 - will be returned if we land in a hole and mappedbno == -2 so the
2439 * caller knows not to execute a subsequent read.
2440 * 0 - if we mapped the block successfully
2441 * >0 - positive error number if there was an error.
2442 */
2443static int
2444xfs_dabuf_map(
a2ceac1f
DC
2445 struct xfs_inode *dp,
2446 xfs_dablk_t bno,
2447 xfs_daddr_t mappedbno,
2448 int whichfork,
2449 struct xfs_buf_map **map,
2450 int *nmaps)
2451{
2452 struct xfs_mount *mp = dp->i_mount;
2453 int nfsb;
2454 int error = 0;
2455 struct xfs_bmbt_irec irec;
2456 struct xfs_bmbt_irec *irecs = &irec;
2457 int nirecs;
2458
2459 ASSERT(map && *map);
2460 ASSERT(*nmaps == 1);
2bd0ea18 2461
ff105f75
DC
2462 if (whichfork == XFS_DATA_FORK)
2463 nfsb = mp->m_dir_geo->fsbcount;
2464 else
2465 nfsb = mp->m_attr_geo->fsbcount;
a2ceac1f 2466
2bd0ea18
NS
2467 /*
2468 * Caller doesn't have a mapping. -2 means don't complain
2469 * if we land in a hole.
2470 */
2471 if (mappedbno == -1 || mappedbno == -2) {
2472 /*
2473 * Optimize the one-block case.
2474 */
a2ceac1f 2475 if (nfsb != 1)
7b02556d
DC
2476 irecs = kmem_zalloc(sizeof(irec) * nfsb,
2477 KM_SLEEP | KM_NOFS);
2bd0ea18 2478
a2ceac1f
DC
2479 nirecs = nfsb;
2480 error = xfs_bmapi_read(dp, (xfs_fileoff_t)bno, nfsb, irecs,
2481 &nirecs, xfs_bmapi_aflag(whichfork));
2482 if (error)
2483 goto out;
2bd0ea18 2484 } else {
a2ceac1f
DC
2485 irecs->br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno);
2486 irecs->br_startoff = (xfs_fileoff_t)bno;
2487 irecs->br_blockcount = nfsb;
2488 irecs->br_state = 0;
2489 nirecs = 1;
2bd0ea18 2490 }
a2ceac1f
DC
2491
2492 if (!xfs_da_map_covers_blocks(nirecs, irecs, bno, nfsb)) {
12b53197
DC
2493 error = mappedbno == -2 ? -1 : -EFSCORRUPTED;
2494 if (unlikely(error == -EFSCORRUPTED)) {
4ca431fc 2495 if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
a2ceac1f
DC
2496 int i;
2497 xfs_alert(mp, "%s: bno %lld dir: inode %lld",
2498 __func__, (long long)bno,
4ca431fc 2499 (long long)dp->i_ino);
a2ceac1f
DC
2500 for (i = 0; i < *nmaps; i++) {
2501 xfs_alert(mp,
2502"[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d",
4ca431fc 2503 i,
a2ceac1f
DC
2504 (long long)irecs[i].br_startoff,
2505 (long long)irecs[i].br_startblock,
2506 (long long)irecs[i].br_blockcount,
2507 irecs[i].br_state);
4ca431fc
NS
2508 }
2509 }
2510 XFS_ERROR_REPORT("xfs_da_do_buf(1)",
2511 XFS_ERRLEVEL_LOW, mp);
2512 }
a2ceac1f 2513 goto out;
2bd0ea18 2514 }
a2ceac1f
DC
2515 error = xfs_buf_map_from_irec(mp, map, nmaps, irecs, nirecs);
2516out:
2517 if (irecs != &irec)
2518 kmem_free(irecs);
2519 return error;
2520}
2521
2522/*
2523 * Get a buffer for the dir/attr block.
2524 */
2525int
2526xfs_da_get_buf(
2527 struct xfs_trans *trans,
2528 struct xfs_inode *dp,
2529 xfs_dablk_t bno,
2530 xfs_daddr_t mappedbno,
2531 struct xfs_buf **bpp,
2532 int whichfork)
2533{
2534 struct xfs_buf *bp;
2535 struct xfs_buf_map map;
2536 struct xfs_buf_map *mapp;
2537 int nmap;
2538 int error;
2539
2540 *bpp = NULL;
2541 mapp = &map;
2542 nmap = 1;
ff105f75 2543 error = xfs_dabuf_map(dp, bno, mappedbno, whichfork,
a2ceac1f
DC
2544 &mapp, &nmap);
2545 if (error) {
2546 /* mapping a hole is not an error, but we don't continue */
2547 if (error == -1)
2bd0ea18 2548 error = 0;
a2ceac1f 2549 goto out_free;
2bd0ea18 2550 }
a2ceac1f
DC
2551
2552 bp = xfs_trans_get_buf_map(trans, dp->i_mount->m_ddev_targp,
2553 mapp, nmap, 0);
12b53197 2554 error = bp ? bp->b_error : -EIO;
a2ceac1f 2555 if (error) {
5a35bf2c
DC
2556 if (bp)
2557 xfs_trans_brelse(trans, bp);
a2ceac1f
DC
2558 goto out_free;
2559 }
2560
2561 *bpp = bp;
2562
2563out_free:
2564 if (mapp != &map)
2565 kmem_free(mapp);
2566
2567 return error;
2568}
2569
2570/*
2571 * Get a buffer for the dir/attr block, fill in the contents.
2572 */
2573int
2574xfs_da_read_buf(
2575 struct xfs_trans *trans,
2576 struct xfs_inode *dp,
2577 xfs_dablk_t bno,
2578 xfs_daddr_t mappedbno,
2579 struct xfs_buf **bpp,
2580 int whichfork,
2581 const struct xfs_buf_ops *ops)
2582{
2583 struct xfs_buf *bp;
2584 struct xfs_buf_map map;
2585 struct xfs_buf_map *mapp;
2586 int nmap;
2587 int error;
2588
2589 *bpp = NULL;
2590 mapp = &map;
2591 nmap = 1;
ff105f75 2592 error = xfs_dabuf_map(dp, bno, mappedbno, whichfork,
a2ceac1f
DC
2593 &mapp, &nmap);
2594 if (error) {
2595 /* mapping a hole is not an error, but we don't continue */
2596 if (error == -1)
2597 error = 0;
2598 goto out_free;
2599 }
2600
2601 error = xfs_trans_read_buf_map(dp->i_mount, trans,
2602 dp->i_mount->m_ddev_targp,
2603 mapp, nmap, 0, &bp, ops);
2604 if (error)
2605 goto out_free;
2606
2607 if (whichfork == XFS_ATTR_FORK)
2608 xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF);
2bd0ea18 2609 else
a2ceac1f 2610 xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF);
a2ceac1f
DC
2611 *bpp = bp;
2612out_free:
2bd0ea18 2613 if (mapp != &map)
5e656dbb 2614 kmem_free(mapp);
2bd0ea18 2615
a2ceac1f 2616 return error;
2bd0ea18
NS
2617}
2618
2619/*
5e656dbb 2620 * Readahead the dir/attr block.
2bd0ea18 2621 */
5e656dbb
BN
2622xfs_daddr_t
2623xfs_da_reada_buf(
a2ceac1f
DC
2624 struct xfs_inode *dp,
2625 xfs_dablk_t bno,
2626 xfs_daddr_t mappedbno,
2627 int whichfork,
2628 const struct xfs_buf_ops *ops)
2bd0ea18 2629{
a2ceac1f
DC
2630 struct xfs_buf_map map;
2631 struct xfs_buf_map *mapp;
2632 int nmap;
2633 int error;
2bd0ea18 2634
a2ceac1f
DC
2635 mapp = &map;
2636 nmap = 1;
ff105f75 2637 error = xfs_dabuf_map(dp, bno, mappedbno, whichfork,
a2ceac1f
DC
2638 &mapp, &nmap);
2639 if (error) {
2640 /* mapping a hole is not an error, but we don't continue */
2641 if (error == -1)
2642 error = 0;
2643 goto out_free;
2644 }
2645
2646 mappedbno = mapp[0].bm_bn;
2647 xfs_buf_readahead_map(dp->i_mount->m_ddev_targp, mapp, nmap, ops);
2648
2649out_free:
2650 if (mapp != &map)
2651 kmem_free(mapp);
2652
2653 if (error)
5e656dbb 2654 return -1;
a2ceac1f 2655 return mappedbno;
2bd0ea18 2656}