]> git.ipfire.org Git - people/ms/linux.git/blame - fs/xfs/xfs_fsops.c
xfs: convert growfs AG header init to use buffer lists
[people/ms/linux.git] / fs / xfs / xfs_fsops.c
CommitLineData
1da177e4 1/*
7b718769
NS
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
1da177e4 4 *
7b718769
NS
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
1da177e4
LT
7 * published by the Free Software Foundation.
8 *
7b718769
NS
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
1da177e4 13 *
7b718769
NS
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1da177e4 17 */
1da177e4 18#include "xfs.h"
a844f451 19#include "xfs_fs.h"
70a9883c 20#include "xfs_shared.h"
239880ef 21#include "xfs_format.h"
a4fbe6ab 22#include "xfs_log_format.h"
239880ef 23#include "xfs_trans_resv.h"
1da177e4 24#include "xfs_sb.h"
1da177e4 25#include "xfs_mount.h"
3ab78df2 26#include "xfs_defer.h"
8f66193c
DC
27#include "xfs_da_format.h"
28#include "xfs_da_btree.h"
a844f451 29#include "xfs_inode.h"
239880ef 30#include "xfs_trans.h"
a844f451 31#include "xfs_inode_item.h"
1da177e4 32#include "xfs_error.h"
a4fbe6ab
DC
33#include "xfs_btree.h"
34#include "xfs_alloc_btree.h"
1da177e4 35#include "xfs_alloc.h"
e70d829f 36#include "xfs_rmap_btree.h"
1da177e4
LT
37#include "xfs_ialloc.h"
38#include "xfs_fsops.h"
39#include "xfs_itable.h"
1da177e4
LT
40#include "xfs_trans_space.h"
41#include "xfs_rtalloc.h"
0b1b213f 42#include "xfs_trace.h"
239880ef 43#include "xfs_log.h"
a4fbe6ab 44#include "xfs_filestream.h"
340785cc 45#include "xfs_rmap.h"
84d69619 46#include "xfs_ag_resv.h"
1da177e4
LT
47
48/*
49 * File system operations
50 */
51
fd23683c
DC
52static struct xfs_buf *
53xfs_growfs_get_hdr_buf(
54 struct xfs_mount *mp,
55 xfs_daddr_t blkno,
56 size_t numblks,
1813dd64
DC
57 int flags,
58 const struct xfs_buf_ops *ops)
fd23683c
DC
59{
60 struct xfs_buf *bp;
61
62 bp = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, flags);
63 if (!bp)
64 return NULL;
65
66 xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
67 bp->b_bn = blkno;
68 bp->b_maps[0].bm_bn = blkno;
1813dd64 69 bp->b_ops = ops;
fd23683c
DC
70
71 return bp;
72}
73
cce77bcf
DC
74/*
75 * Write new AG headers to disk. Non-transactional, but written
76 * synchronously so they are completed prior to the growfs transaction
77 * being logged.
78 */
79static int
80xfs_grow_ag_headers(
81 struct xfs_mount *mp,
82 xfs_agnumber_t agno,
83 xfs_extlen_t agsize,
9aebe805
DC
84 xfs_rfsblock_t *nfree,
85 struct list_head *buffer_list)
cce77bcf
DC
86{
87 struct xfs_agf *agf;
88 struct xfs_agi *agi;
89 struct xfs_agfl *agfl;
90 __be32 *agfl_bno;
91 xfs_alloc_rec_t *arec;
92 struct xfs_buf *bp;
93 int bucket;
94 xfs_extlen_t tmpsize;
95 int error = 0;
96
97 /*
98 * AG freespace header block
99 */
100 bp = xfs_growfs_get_hdr_buf(mp,
101 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
102 XFS_FSS_TO_BB(mp, 1), 0,
103 &xfs_agf_buf_ops);
104 if (!bp) {
105 error = -ENOMEM;
106 goto out_error;
107 }
108
109 agf = XFS_BUF_TO_AGF(bp);
110 agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC);
111 agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION);
112 agf->agf_seqno = cpu_to_be32(agno);
113 agf->agf_length = cpu_to_be32(agsize);
114 agf->agf_roots[XFS_BTNUM_BNOi] = cpu_to_be32(XFS_BNO_BLOCK(mp));
115 agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp));
116 agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1);
117 agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1);
118 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
119 agf->agf_roots[XFS_BTNUM_RMAPi] =
120 cpu_to_be32(XFS_RMAP_BLOCK(mp));
121 agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1);
122 agf->agf_rmap_blocks = cpu_to_be32(1);
123 }
124
125 agf->agf_flfirst = cpu_to_be32(1);
126 agf->agf_fllast = 0;
127 agf->agf_flcount = 0;
128 tmpsize = agsize - mp->m_ag_prealloc_blocks;
129 agf->agf_freeblks = cpu_to_be32(tmpsize);
130 agf->agf_longest = cpu_to_be32(tmpsize);
131 if (xfs_sb_version_hascrc(&mp->m_sb))
132 uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid);
133 if (xfs_sb_version_hasreflink(&mp->m_sb)) {
134 agf->agf_refcount_root = cpu_to_be32(
135 xfs_refc_block(mp));
136 agf->agf_refcount_level = cpu_to_be32(1);
137 agf->agf_refcount_blocks = cpu_to_be32(1);
138 }
9aebe805 139 xfs_buf_delwri_queue(bp, buffer_list);
cce77bcf 140 xfs_buf_relse(bp);
cce77bcf
DC
141
142 /*
143 * AG freelist header block
144 */
145 bp = xfs_growfs_get_hdr_buf(mp,
146 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
147 XFS_FSS_TO_BB(mp, 1), 0,
148 &xfs_agfl_buf_ops);
149 if (!bp) {
150 error = -ENOMEM;
151 goto out_error;
152 }
153
154 agfl = XFS_BUF_TO_AGFL(bp);
155 if (xfs_sb_version_hascrc(&mp->m_sb)) {
156 agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC);
157 agfl->agfl_seqno = cpu_to_be32(agno);
158 uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid);
159 }
160
161 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp);
162 for (bucket = 0; bucket < xfs_agfl_size(mp); bucket++)
163 agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK);
164
9aebe805 165 xfs_buf_delwri_queue(bp, buffer_list);
cce77bcf 166 xfs_buf_relse(bp);
cce77bcf
DC
167
168 /*
169 * AG inode header block
170 */
171 bp = xfs_growfs_get_hdr_buf(mp,
172 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
173 XFS_FSS_TO_BB(mp, 1), 0,
174 &xfs_agi_buf_ops);
175 if (!bp) {
176 error = -ENOMEM;
177 goto out_error;
178 }
179
180 agi = XFS_BUF_TO_AGI(bp);
181 agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC);
182 agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION);
183 agi->agi_seqno = cpu_to_be32(agno);
184 agi->agi_length = cpu_to_be32(agsize);
185 agi->agi_count = 0;
186 agi->agi_root = cpu_to_be32(XFS_IBT_BLOCK(mp));
187 agi->agi_level = cpu_to_be32(1);
188 agi->agi_freecount = 0;
189 agi->agi_newino = cpu_to_be32(NULLAGINO);
190 agi->agi_dirino = cpu_to_be32(NULLAGINO);
191 if (xfs_sb_version_hascrc(&mp->m_sb))
192 uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid);
193 if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
194 agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp));
195 agi->agi_free_level = cpu_to_be32(1);
196 }
197 for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++)
198 agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
199
9aebe805 200 xfs_buf_delwri_queue(bp, buffer_list);
cce77bcf 201 xfs_buf_relse(bp);
cce77bcf
DC
202
203 /*
204 * BNO btree root block
205 */
206 bp = xfs_growfs_get_hdr_buf(mp,
207 XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)),
208 BTOBB(mp->m_sb.sb_blocksize), 0,
209 &xfs_allocbt_buf_ops);
210
211 if (!bp) {
212 error = -ENOMEM;
213 goto out_error;
214 }
215
216 xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, agno, 0);
217
218 arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
219 arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
220 arec->ar_blockcount = cpu_to_be32(
221 agsize - be32_to_cpu(arec->ar_startblock));
222
9aebe805 223 xfs_buf_delwri_queue(bp, buffer_list);
cce77bcf 224 xfs_buf_relse(bp);
cce77bcf
DC
225
226 /*
227 * CNT btree root block
228 */
229 bp = xfs_growfs_get_hdr_buf(mp,
230 XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)),
231 BTOBB(mp->m_sb.sb_blocksize), 0,
232 &xfs_allocbt_buf_ops);
233 if (!bp) {
234 error = -ENOMEM;
235 goto out_error;
236 }
237
238 xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, agno, 0);
239
240 arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
241 arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
242 arec->ar_blockcount = cpu_to_be32(
243 agsize - be32_to_cpu(arec->ar_startblock));
244 *nfree += be32_to_cpu(arec->ar_blockcount);
245
9aebe805 246 xfs_buf_delwri_queue(bp, buffer_list);
cce77bcf 247 xfs_buf_relse(bp);
cce77bcf
DC
248
249 /* RMAP btree root block */
250 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
251 struct xfs_rmap_rec *rrec;
252 struct xfs_btree_block *block;
253
254 bp = xfs_growfs_get_hdr_buf(mp,
255 XFS_AGB_TO_DADDR(mp, agno, XFS_RMAP_BLOCK(mp)),
256 BTOBB(mp->m_sb.sb_blocksize), 0,
257 &xfs_rmapbt_buf_ops);
258 if (!bp) {
259 error = -ENOMEM;
260 goto out_error;
261 }
262
263 xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 0,
264 agno, 0);
265 block = XFS_BUF_TO_BLOCK(bp);
266
267
268 /*
269 * mark the AG header regions as static metadata The BNO
270 * btree block is the first block after the headers, so
271 * it's location defines the size of region the static
272 * metadata consumes.
273 *
274 * Note: unlike mkfs, we never have to account for log
275 * space when growing the data regions
276 */
277 rrec = XFS_RMAP_REC_ADDR(block, 1);
278 rrec->rm_startblock = 0;
279 rrec->rm_blockcount = cpu_to_be32(XFS_BNO_BLOCK(mp));
280 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_FS);
281 rrec->rm_offset = 0;
282 be16_add_cpu(&block->bb_numrecs, 1);
283
284 /* account freespace btree root blocks */
285 rrec = XFS_RMAP_REC_ADDR(block, 2);
286 rrec->rm_startblock = cpu_to_be32(XFS_BNO_BLOCK(mp));
287 rrec->rm_blockcount = cpu_to_be32(2);
288 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
289 rrec->rm_offset = 0;
290 be16_add_cpu(&block->bb_numrecs, 1);
291
292 /* account inode btree root blocks */
293 rrec = XFS_RMAP_REC_ADDR(block, 3);
294 rrec->rm_startblock = cpu_to_be32(XFS_IBT_BLOCK(mp));
295 rrec->rm_blockcount = cpu_to_be32(XFS_RMAP_BLOCK(mp) -
296 XFS_IBT_BLOCK(mp));
297 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_INOBT);
298 rrec->rm_offset = 0;
299 be16_add_cpu(&block->bb_numrecs, 1);
300
301 /* account for rmap btree root */
302 rrec = XFS_RMAP_REC_ADDR(block, 4);
303 rrec->rm_startblock = cpu_to_be32(XFS_RMAP_BLOCK(mp));
304 rrec->rm_blockcount = cpu_to_be32(1);
305 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
306 rrec->rm_offset = 0;
307 be16_add_cpu(&block->bb_numrecs, 1);
308
309 /* account for refc btree root */
310 if (xfs_sb_version_hasreflink(&mp->m_sb)) {
311 rrec = XFS_RMAP_REC_ADDR(block, 5);
312 rrec->rm_startblock = cpu_to_be32(xfs_refc_block(mp));
313 rrec->rm_blockcount = cpu_to_be32(1);
314 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_REFC);
315 rrec->rm_offset = 0;
316 be16_add_cpu(&block->bb_numrecs, 1);
317 }
318
9aebe805 319 xfs_buf_delwri_queue(bp, buffer_list);
cce77bcf 320 xfs_buf_relse(bp);
cce77bcf
DC
321 }
322
323 /*
324 * INO btree root block
325 */
326 bp = xfs_growfs_get_hdr_buf(mp,
327 XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)),
328 BTOBB(mp->m_sb.sb_blocksize), 0,
329 &xfs_inobt_buf_ops);
330 if (!bp) {
331 error = -ENOMEM;
332 goto out_error;
333 }
334
335 xfs_btree_init_block(mp, bp, XFS_BTNUM_INO , 0, 0, agno, 0);
9aebe805 336 xfs_buf_delwri_queue(bp, buffer_list);
cce77bcf 337 xfs_buf_relse(bp);
cce77bcf
DC
338
339 /*
340 * FINO btree root block
341 */
342 if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
343 bp = xfs_growfs_get_hdr_buf(mp,
344 XFS_AGB_TO_DADDR(mp, agno, XFS_FIBT_BLOCK(mp)),
345 BTOBB(mp->m_sb.sb_blocksize), 0,
346 &xfs_inobt_buf_ops);
347 if (!bp) {
348 error = -ENOMEM;
349 goto out_error;
350 }
351
9aebe805
DC
352 xfs_btree_init_block(mp, bp, XFS_BTNUM_FINO, 0, 0, agno, 0);
353 xfs_buf_delwri_queue(bp, buffer_list);
cce77bcf 354 xfs_buf_relse(bp);
cce77bcf
DC
355 }
356
357 /*
358 * refcount btree root block
359 */
360 if (xfs_sb_version_hasreflink(&mp->m_sb)) {
361 bp = xfs_growfs_get_hdr_buf(mp,
362 XFS_AGB_TO_DADDR(mp, agno, xfs_refc_block(mp)),
363 BTOBB(mp->m_sb.sb_blocksize), 0,
364 &xfs_refcountbt_buf_ops);
365 if (!bp) {
366 error = -ENOMEM;
367 goto out_error;
368 }
369
9aebe805
DC
370 xfs_btree_init_block(mp, bp, XFS_BTNUM_REFC, 0, 0, agno, 0);
371 xfs_buf_delwri_queue(bp, buffer_list);
cce77bcf 372 xfs_buf_relse(bp);
cce77bcf
DC
373 }
374
375out_error:
376 return error;
377}
378
1da177e4
LT
379static int
380xfs_growfs_data_private(
381 xfs_mount_t *mp, /* mount point for filesystem */
382 xfs_growfs_data_t *in) /* growfs data input struct */
383{
384 xfs_agf_t *agf;
385 xfs_agi_t *agi;
386 xfs_agnumber_t agno;
387 xfs_extlen_t agsize;
1da177e4 388 xfs_buf_t *bp;
1da177e4 389 int dpct;
59e5a0e8 390 int error, saved_error = 0;
1da177e4
LT
391 xfs_agnumber_t nagcount;
392 xfs_agnumber_t nagimax = 0;
393 xfs_rfsblock_t nb, nb_mod;
394 xfs_rfsblock_t new;
395 xfs_rfsblock_t nfree;
396 xfs_agnumber_t oagcount;
397 int pct;
1da177e4 398 xfs_trans_t *tp;
9aebe805 399 LIST_HEAD (buffer_list);
1da177e4
LT
400
401 nb = in->newblocks;
402 pct = in->imaxpct;
403 if (nb < mp->m_sb.sb_dblocks || pct < 0 || pct > 100)
2451337d 404 return -EINVAL;
4cc929ee
NS
405 if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb)))
406 return error;
1da177e4 407 dpct = pct - mp->m_sb.sb_imax_pct;
ba372674 408 error = xfs_buf_read_uncached(mp->m_ddev_targp,
1922c949 409 XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1),
ba372674
DC
410 XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL);
411 if (error)
eab4e633 412 return error;
1da177e4
LT
413 xfs_buf_relse(bp);
414
415 new = nb; /* use new as a temporary here */
416 nb_mod = do_div(new, mp->m_sb.sb_agblocks);
417 nagcount = new + (nb_mod != 0);
418 if (nb_mod && nb_mod < XFS_MIN_AG_BLOCKS) {
419 nagcount--;
e6da7c9f 420 nb = (xfs_rfsblock_t)nagcount * mp->m_sb.sb_agblocks;
1da177e4 421 if (nb < mp->m_sb.sb_dblocks)
2451337d 422 return -EINVAL;
1da177e4
LT
423 }
424 new = nb - mp->m_sb.sb_dblocks;
425 oagcount = mp->m_sb.sb_agcount;
0cc6eee1 426
1c1c6ebc
DC
427 /* allocate the new per-ag structures */
428 if (nagcount > oagcount) {
429 error = xfs_initialize_perag(mp, nagcount, &nagimax);
430 if (error)
431 return error;
1da177e4 432 }
1c1c6ebc 433
253f4911
CH
434 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata,
435 XFS_GROWFS_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp);
436 if (error)
1da177e4 437 return error;
1da177e4 438
1c1c6ebc 439 /*
9aebe805
DC
440 * Write new AG headers to disk. Non-transactional, but need to be
441 * written and completed prior to the growfs transaction being logged.
442 * To do this, we use a delayed write buffer list and wait for
443 * submission and IO completion of the list as a whole. This allows the
444 * IO subsystem to merge all the AG headers in a single AG into a single
445 * IO and hide most of the latency of the IO from us.
446 *
447 * This also means that if we get an error whilst building the buffer
448 * list to write, we can cancel the entire list without having written
449 * anything.
1c1c6ebc 450 */
1da177e4
LT
451 nfree = 0;
452 for (agno = nagcount - 1; agno >= oagcount; agno--, new -= agsize) {
f94c4457 453
1da177e4 454 if (agno == nagcount - 1)
cce77bcf 455 agsize = nb -
1da177e4
LT
456 (agno * (xfs_rfsblock_t)mp->m_sb.sb_agblocks);
457 else
458 agsize = mp->m_sb.sb_agblocks;
b64f3a39 459
9aebe805
DC
460 error = xfs_grow_ag_headers(mp, agno, agsize, &nfree,
461 &buffer_list);
462 if (error) {
463 xfs_buf_delwri_cancel(&buffer_list);
1da177e4 464 goto error0;
9aebe805 465 }
1da177e4 466 }
9aebe805
DC
467 error = xfs_buf_delwri_submit(&buffer_list);
468 if (error)
469 goto error0;
470
1da177e4 471 xfs_trans_agblocks_delta(tp, nfree);
cce77bcf 472
1da177e4
LT
473 /*
474 * There are new blocks in the old last a.g.
475 */
476 if (new) {
340785cc
DW
477 struct xfs_owner_info oinfo;
478
1da177e4
LT
479 /*
480 * Change the agi length.
481 */
482 error = xfs_ialloc_read_agi(mp, tp, agno, &bp);
483 if (error) {
484 goto error0;
485 }
486 ASSERT(bp);
487 agi = XFS_BUF_TO_AGI(bp);
413d57c9 488 be32_add_cpu(&agi->agi_length, new);
1da177e4 489 ASSERT(nagcount == oagcount ||
16259e7d 490 be32_to_cpu(agi->agi_length) == mp->m_sb.sb_agblocks);
1da177e4
LT
491 xfs_ialloc_log_agi(tp, bp, XFS_AGI_LENGTH);
492 /*
493 * Change agf length.
494 */
495 error = xfs_alloc_read_agf(mp, tp, agno, 0, &bp);
496 if (error) {
497 goto error0;
498 }
499 ASSERT(bp);
500 agf = XFS_BUF_TO_AGF(bp);
413d57c9 501 be32_add_cpu(&agf->agf_length, new);
16259e7d
CH
502 ASSERT(be32_to_cpu(agf->agf_length) ==
503 be32_to_cpu(agi->agi_length));
0b1b213f 504
0164af51 505 xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH);
340785cc 506
1da177e4
LT
507 /*
508 * Free the new space.
340785cc
DW
509 *
510 * XFS_RMAP_OWN_NULL is used here to tell the rmap btree that
511 * this doesn't actually exist in the rmap btree.
1da177e4 512 */
340785cc 513 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL);
33df3a9c
DW
514 error = xfs_rmap_free(tp, bp, agno,
515 be32_to_cpu(agf->agf_length) - new,
516 new, &oinfo);
517 if (error)
518 goto error0;
340785cc
DW
519 error = xfs_free_extent(tp,
520 XFS_AGB_TO_FSB(mp, agno,
521 be32_to_cpu(agf->agf_length) - new),
3fd129b6 522 new, &oinfo, XFS_AG_RESV_NONE);
340785cc 523 if (error)
1da177e4 524 goto error0;
1da177e4 525 }
1c1c6ebc
DC
526
527 /*
528 * Update changed superblock fields transactionally. These are not
529 * seen by the rest of the world until the transaction commit applies
530 * them atomically to the superblock.
531 */
1da177e4
LT
532 if (nagcount > oagcount)
533 xfs_trans_mod_sb(tp, XFS_TRANS_SB_AGCOUNT, nagcount - oagcount);
534 if (nb > mp->m_sb.sb_dblocks)
535 xfs_trans_mod_sb(tp, XFS_TRANS_SB_DBLOCKS,
536 nb - mp->m_sb.sb_dblocks);
537 if (nfree)
538 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, nfree);
539 if (dpct)
540 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct);
f8079b85 541 xfs_trans_set_sync(tp);
70393313 542 error = xfs_trans_commit(tp);
1c1c6ebc 543 if (error)
1da177e4 544 return error;
1c1c6ebc 545
1da177e4
LT
546 /* New allocation groups fully initialized, so update mount struct */
547 if (nagimax)
548 mp->m_maxagi = nagimax;
549 if (mp->m_sb.sb_imax_pct) {
c8ce540d 550 uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct;
1da177e4
LT
551 do_div(icount, 100);
552 mp->m_maxicount = icount << mp->m_sb.sb_inopblog;
553 } else
554 mp->m_maxicount = 0;
055388a3 555 xfs_set_low_space_thresholds(mp);
52548852 556 mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
1c1c6ebc 557
20e73b00
DW
558 /*
559 * If we expanded the last AG, free the per-AG reservation
560 * so we can reinitialize it with the new size.
561 */
562 if (new) {
563 struct xfs_perag *pag;
564
565 pag = xfs_perag_get(mp, agno);
566 error = xfs_ag_resv_free(pag);
567 xfs_perag_put(pag);
568 if (error)
569 goto out;
570 }
571
84d69619
DW
572 /* Reserve AG metadata blocks. */
573 error = xfs_fs_reserve_ag_blocks(mp);
574 if (error && error != -ENOSPC)
575 goto out;
576
1c1c6ebc 577 /* update secondary superblocks. */
1da177e4 578 for (agno = 1; agno < nagcount; agno++) {
1375cb65
DC
579 error = 0;
580 /*
581 * new secondary superblocks need to be zeroed, not read from
582 * disk as the contents of the new area we are growing into is
583 * completely unknown.
584 */
585 if (agno < oagcount) {
586 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
1da177e4 587 XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
98021821 588 XFS_FSS_TO_BB(mp, 1), 0, &bp,
1813dd64 589 &xfs_sb_buf_ops);
1375cb65
DC
590 } else {
591 bp = xfs_trans_get_buf(NULL, mp->m_ddev_targp,
592 XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
593 XFS_FSS_TO_BB(mp, 1), 0);
b0f539de 594 if (bp) {
1813dd64 595 bp->b_ops = &xfs_sb_buf_ops;
1375cb65 596 xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
b0f539de 597 } else
2451337d 598 error = -ENOMEM;
1375cb65
DC
599 }
600
59e5a0e8
ES
601 /*
602 * If we get an error reading or writing alternate superblocks,
603 * continue. xfs_repair chooses the "best" superblock based
604 * on most matches; if we break early, we'll leave more
605 * superblocks un-updated than updated, and xfs_repair may
606 * pick them over the properly-updated primary.
607 */
1da177e4 608 if (error) {
53487786
DC
609 xfs_warn(mp,
610 "error %d reading secondary superblock for ag %d",
1da177e4 611 error, agno);
59e5a0e8
ES
612 saved_error = error;
613 continue;
1da177e4 614 }
4d11a402 615 xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb);
98021821 616
c2b006c1
CH
617 error = xfs_bwrite(bp);
618 xfs_buf_relse(bp);
619 if (error) {
53487786 620 xfs_warn(mp,
1da177e4
LT
621 "write error %d updating secondary superblock for ag %d",
622 error, agno);
59e5a0e8
ES
623 saved_error = error;
624 continue;
1da177e4
LT
625 }
626 }
84d69619
DW
627
628 out:
59e5a0e8 629 return saved_error ? saved_error : error;
1da177e4
LT
630
631 error0:
4906e215 632 xfs_trans_cancel(tp);
1da177e4
LT
633 return error;
634}
635
636static int
637xfs_growfs_log_private(
638 xfs_mount_t *mp, /* mount point for filesystem */
639 xfs_growfs_log_t *in) /* growfs log input struct */
640{
641 xfs_extlen_t nb;
642
643 nb = in->newblocks;
644 if (nb < XFS_MIN_LOG_BLOCKS || nb < XFS_B_TO_FSB(mp, XFS_MIN_LOG_BYTES))
2451337d 645 return -EINVAL;
1da177e4
LT
646 if (nb == mp->m_sb.sb_logblocks &&
647 in->isint == (mp->m_sb.sb_logstart != 0))
2451337d 648 return -EINVAL;
1da177e4
LT
649 /*
650 * Moving the log is hard, need new interfaces to sync
651 * the log first, hold off all activity while moving it.
652 * Can have shorter or longer log in the same space,
653 * or transform internal to external log or vice versa.
654 */
2451337d 655 return -ENOSYS;
1da177e4
LT
656}
657
658/*
659 * protected versions of growfs function acquire and release locks on the mount
660 * point - exported through ioctls: XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG,
661 * XFS_IOC_FSGROWFSRT
662 */
663
664
665int
666xfs_growfs_data(
667 xfs_mount_t *mp,
668 xfs_growfs_data_t *in)
669{
670 int error;
743bb465 671
672 if (!capable(CAP_SYS_ADMIN))
2451337d 673 return -EPERM;
cc92e7ac 674 if (!mutex_trylock(&mp->m_growlock))
2451337d 675 return -EWOULDBLOCK;
1da177e4 676 error = xfs_growfs_data_private(mp, in);
52785112
CH
677 /*
678 * Increment the generation unconditionally, the error could be from
679 * updating the secondary superblocks, in which case the new size
680 * is live already.
681 */
682 mp->m_generation++;
cc92e7ac 683 mutex_unlock(&mp->m_growlock);
1da177e4
LT
684 return error;
685}
686
687int
688xfs_growfs_log(
689 xfs_mount_t *mp,
690 xfs_growfs_log_t *in)
691{
692 int error;
743bb465 693
694 if (!capable(CAP_SYS_ADMIN))
2451337d 695 return -EPERM;
cc92e7ac 696 if (!mutex_trylock(&mp->m_growlock))
2451337d 697 return -EWOULDBLOCK;
1da177e4 698 error = xfs_growfs_log_private(mp, in);
cc92e7ac 699 mutex_unlock(&mp->m_growlock);
1da177e4
LT
700 return error;
701}
702
703/*
704 * exported through ioctl XFS_IOC_FSCOUNTS
705 */
706
707int
708xfs_fs_counts(
709 xfs_mount_t *mp,
710 xfs_fsop_counts_t *cnt)
711{
501ab323 712 cnt->allocino = percpu_counter_read_positive(&mp->m_icount);
e88b64ea 713 cnt->freeino = percpu_counter_read_positive(&mp->m_ifree);
0d485ada 714 cnt->freedata = percpu_counter_read_positive(&mp->m_fdblocks) -
52548852 715 mp->m_alloc_set_aside;
501ab323 716
3685c2a1 717 spin_lock(&mp->m_sb_lock);
1da177e4 718 cnt->freertx = mp->m_sb.sb_frextents;
3685c2a1 719 spin_unlock(&mp->m_sb_lock);
1da177e4
LT
720 return 0;
721}
722
723/*
724 * exported through ioctl XFS_IOC_SET_RESBLKS & XFS_IOC_GET_RESBLKS
725 *
726 * xfs_reserve_blocks is called to set m_resblks
727 * in the in-core mount table. The number of unused reserved blocks
c41564b5 728 * is kept in m_resblks_avail.
1da177e4
LT
729 *
730 * Reserve the requested number of blocks if available. Otherwise return
731 * as many as possible to satisfy the request. The actual number
732 * reserved are returned in outval
733 *
734 * A null inval pointer indicates that only the current reserved blocks
735 * available should be returned no settings are changed.
736 */
737
738int
739xfs_reserve_blocks(
740 xfs_mount_t *mp,
c8ce540d 741 uint64_t *inval,
1da177e4
LT
742 xfs_fsop_resblks_t *outval)
743{
c8ce540d
DW
744 int64_t lcounter, delta;
745 int64_t fdblks_delta = 0;
746 uint64_t request;
747 int64_t free;
408fd484 748 int error = 0;
1da177e4
LT
749
750 /* If inval is null, report current values and return */
c8ce540d 751 if (inval == (uint64_t *)NULL) {
84e1e99f 752 if (!outval)
2451337d 753 return -EINVAL;
1da177e4
LT
754 outval->resblks = mp->m_resblks;
755 outval->resblks_avail = mp->m_resblks_avail;
014c2544 756 return 0;
1da177e4
LT
757 }
758
759 request = *inval;
dbcabad1
DC
760
761 /*
408fd484
BF
762 * With per-cpu counters, this becomes an interesting problem. we need
763 * to work out if we are freeing or allocation blocks first, then we can
764 * do the modification as necessary.
dbcabad1 765 *
408fd484
BF
766 * We do this under the m_sb_lock so that if we are near ENOSPC, we will
767 * hold out any changes while we work out what to do. This means that
768 * the amount of free space can change while we do this, so we need to
769 * retry if we end up trying to reserve more space than is available.
dbcabad1 770 */
3685c2a1 771 spin_lock(&mp->m_sb_lock);
1da177e4
LT
772
773 /*
774 * If our previous reservation was larger than the current value,
408fd484
BF
775 * then move any unused blocks back to the free pool. Modify the resblks
776 * counters directly since we shouldn't have any problems unreserving
777 * space.
1da177e4 778 */
1da177e4
LT
779 if (mp->m_resblks > request) {
780 lcounter = mp->m_resblks_avail - request;
781 if (lcounter > 0) { /* release unused blocks */
dbcabad1 782 fdblks_delta = lcounter;
1da177e4
LT
783 mp->m_resblks_avail -= lcounter;
784 }
785 mp->m_resblks = request;
408fd484
BF
786 if (fdblks_delta) {
787 spin_unlock(&mp->m_sb_lock);
788 error = xfs_mod_fdblocks(mp, fdblks_delta, 0);
789 spin_lock(&mp->m_sb_lock);
790 }
791
792 goto out;
793 }
4be536de 794
408fd484
BF
795 /*
796 * If the request is larger than the current reservation, reserve the
797 * blocks before we update the reserve counters. Sample m_fdblocks and
798 * perform a partial reservation if the request exceeds free space.
799 */
800 error = -ENOSPC;
801 do {
0d485ada 802 free = percpu_counter_sum(&mp->m_fdblocks) -
52548852 803 mp->m_alloc_set_aside;
dbcabad1 804 if (!free)
408fd484 805 break;
dbcabad1 806
1da177e4 807 delta = request - mp->m_resblks;
4be536de 808 lcounter = free - delta;
408fd484 809 if (lcounter < 0)
1da177e4 810 /* We can't satisfy the request, just get what we can */
408fd484
BF
811 fdblks_delta = free;
812 else
813 fdblks_delta = delta;
dbcabad1 814
dbcabad1 815 /*
408fd484
BF
816 * We'll either succeed in getting space from the free block
817 * count or we'll get an ENOSPC. If we get a ENOSPC, it means
818 * things changed while we were calculating fdblks_delta and so
819 * we should try again to see if there is anything left to
820 * reserve.
dbcabad1
DC
821 *
822 * Don't set the reserved flag here - we don't want to reserve
823 * the extra reserve blocks from the reserve.....
824 */
408fd484
BF
825 spin_unlock(&mp->m_sb_lock);
826 error = xfs_mod_fdblocks(mp, -fdblks_delta, 0);
827 spin_lock(&mp->m_sb_lock);
828 } while (error == -ENOSPC);
829
830 /*
831 * Update the reserve counters if blocks have been successfully
832 * allocated.
833 */
834 if (!error && fdblks_delta) {
835 mp->m_resblks += fdblks_delta;
836 mp->m_resblks_avail += fdblks_delta;
dbcabad1 837 }
408fd484
BF
838
839out:
840 if (outval) {
841 outval->resblks = mp->m_resblks;
842 outval->resblks_avail = mp->m_resblks_avail;
843 }
844
845 spin_unlock(&mp->m_sb_lock);
846 return error;
1da177e4
LT
847}
848
1da177e4
LT
849int
850xfs_fs_goingdown(
851 xfs_mount_t *mp,
c8ce540d 852 uint32_t inflags)
1da177e4
LT
853{
854 switch (inflags) {
855 case XFS_FSOP_GOING_FLAGS_DEFAULT: {
b267ce99 856 struct super_block *sb = freeze_bdev(mp->m_super->s_bdev);
1da177e4 857
f33c6797 858 if (sb && !IS_ERR(sb)) {
7d04a335 859 xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT);
1da177e4
LT
860 thaw_bdev(sb->s_bdev, sb);
861 }
189f4bf2 862
1da177e4
LT
863 break;
864 }
865 case XFS_FSOP_GOING_FLAGS_LOGFLUSH:
7d04a335 866 xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT);
1da177e4
LT
867 break;
868 case XFS_FSOP_GOING_FLAGS_NOLOGFLUSH:
7d04a335
NS
869 xfs_force_shutdown(mp,
870 SHUTDOWN_FORCE_UMOUNT | SHUTDOWN_LOG_IO_ERROR);
1da177e4
LT
871 break;
872 default:
2451337d 873 return -EINVAL;
1da177e4
LT
874 }
875
876 return 0;
877}
2af51f3a
DC
878
879/*
880 * Force a shutdown of the filesystem instantly while keeping the filesystem
881 * consistent. We don't do an unmount here; just shutdown the shop, make sure
882 * that absolutely nothing persistent happens to this filesystem after this
883 * point.
884 */
885void
886xfs_do_force_shutdown(
887 xfs_mount_t *mp,
888 int flags,
889 char *fname,
890 int lnnum)
891{
892 int logerror;
893
894 logerror = flags & SHUTDOWN_LOG_IO_ERROR;
895
896 if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
897 xfs_notice(mp,
c9690043 898 "%s(0x%x) called from line %d of file %s. Return address = "PTR_FMT,
2af51f3a
DC
899 __func__, flags, lnnum, fname, __return_address);
900 }
901 /*
902 * No need to duplicate efforts.
903 */
904 if (XFS_FORCED_SHUTDOWN(mp) && !logerror)
905 return;
906
907 /*
908 * This flags XFS_MOUNT_FS_SHUTDOWN, makes sure that we don't
909 * queue up anybody new on the log reservations, and wakes up
910 * everybody who's sleeping on log reservations to tell them
911 * the bad news.
912 */
913 if (xfs_log_force_umount(mp, logerror))
914 return;
915
916 if (flags & SHUTDOWN_CORRUPT_INCORE) {
917 xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_CORRUPT,
918 "Corruption of in-memory data detected. Shutting down filesystem");
919 if (XFS_ERRLEVEL_HIGH <= xfs_error_level)
920 xfs_stack_trace();
921 } else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
922 if (logerror) {
923 xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_LOGERROR,
924 "Log I/O Error Detected. Shutting down filesystem");
925 } else if (flags & SHUTDOWN_DEVICE_REQ) {
926 xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR,
927 "All device paths lost. Shutting down filesystem");
928 } else if (!(flags & SHUTDOWN_REMOTE_REQ)) {
929 xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR,
930 "I/O Error Detected. Shutting down filesystem");
931 }
932 }
933 if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
934 xfs_alert(mp,
935 "Please umount the filesystem and rectify the problem(s)");
936 }
937}
84d69619
DW
938
939/*
940 * Reserve free space for per-AG metadata.
941 */
942int
943xfs_fs_reserve_ag_blocks(
944 struct xfs_mount *mp)
945{
946 xfs_agnumber_t agno;
947 struct xfs_perag *pag;
948 int error = 0;
949 int err2;
950
951 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
952 pag = xfs_perag_get(mp, agno);
953 err2 = xfs_ag_resv_init(pag);
954 xfs_perag_put(pag);
955 if (err2 && !error)
956 error = err2;
957 }
958
959 if (error && error != -ENOSPC) {
960 xfs_warn(mp,
961 "Error %d reserving per-AG metadata reserve pool.", error);
962 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
963 }
964
965 return error;
966}
967
968/*
969 * Free space reserved for per-AG metadata.
970 */
971int
972xfs_fs_unreserve_ag_blocks(
973 struct xfs_mount *mp)
974{
975 xfs_agnumber_t agno;
976 struct xfs_perag *pag;
977 int error = 0;
978 int err2;
979
980 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
981 pag = xfs_perag_get(mp, agno);
982 err2 = xfs_ag_resv_free(pag);
983 xfs_perag_put(pag);
984 if (err2 && !error)
985 error = err2;
986 }
987
988 if (error)
989 xfs_warn(mp,
990 "Error %d freeing per-AG metadata reserve pool.", error);
991
992 return error;
993}