2 * mdadm - manage Linux "md" devices aka RAID arrays.
4 * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * Email: <neilb@suse.de>
25 #define HAVE_STDINT_H 1
29 * All handling for the 0.90.0 version superblock is in
32 * - finding, loading, and writing the superblock.
33 * - initialising a new superblock
34 * - printing the superblock for --examine
35 * - printing part of the superblock for --detail
39 static unsigned long calc_sb0_csum(mdp_super_t
*super
)
41 unsigned long csum
= super
->sb_csum
;
42 unsigned long newcsum
;
44 newcsum
= calc_csum(super
, MD_SB_BYTES
);
45 super
->sb_csum
= csum
;
49 static void super0_swap_endian(struct mdp_superblock_s
*sb
)
51 /* as super0 superblocks are host-endian, it is sometimes
52 * useful to be able to swap the endianness
53 * as (almost) everything is u32's we byte-swap every 4byte
55 * We then also have to swap the events_hi and events_lo
57 char *sbc
= (char *)sb
;
61 for (i
=0; i
< MD_SB_BYTES
; i
+=4) {
70 sb
->events_hi
= sb
->events_lo
;
73 t32
= sb
->cp_events_hi
;
74 sb
->cp_events_hi
= sb
->cp_events_lo
;
75 sb
->cp_events_lo
= t32
;
79 static void examine_super0(struct supertype
*st
, char *homehost
)
81 mdp_super_t
*sb
= st
->sb
;
87 printf(" Magic : %08x\n", sb
->md_magic
);
88 printf(" Version : %d.%02d.%02d\n",
89 sb
->major_version
, sb
->minor_version
, sb
->patch_version
);
90 if (sb
->minor_version
>= 90) {
91 printf(" UUID : %08x:%08x:%08x:%08x", sb
->set_uuid0
,
92 sb
->set_uuid1
, sb
->set_uuid2
, sb
->set_uuid3
);
97 hash
= sha1_buffer(homehost
, strlen(homehost
), buf
);
98 if (memcmp(&sb
->set_uuid2
, hash
, 8) == 0)
99 printf(" (local to host %s)", homehost
);
103 printf(" UUID : %08x\n", sb
->set_uuid0
);
105 if (sb
->not_persistent
)
106 printf(" Eedk : not persistent\n");
109 printf(" Creation Time : %.24s\n", ctime(&atime
));
110 c
= map_num(pers
, sb
->level
);
111 printf(" Raid Level : %s\n", c
?c
:"-unknown-");
112 if ((int)sb
->level
> 0) {
113 int ddsks
= 0, ddsks_denom
= 1;
114 printf(" Used Dev Size : %d%s\n", sb
->size
,
115 human_size((long long)sb
->size
<<10));
122 ddsks
= sb
->raid_disks
- 1;
125 ddsks
= sb
->raid_disks
- 2;
128 ddsks
= sb
->raid_disks
;
130 (sb
->layout
& 255) * ((sb
->layout
>> 8) & 255);
133 long long asize
= sb
->size
;
134 asize
= (asize
<< 10) * ddsks
/ ddsks_denom
;
135 printf(" Array Size : %llu%s\n",
136 asize
>> 10, human_size(asize
));
139 printf(" Raid Devices : %d\n", sb
->raid_disks
);
140 printf(" Total Devices : %d\n", sb
->nr_disks
);
141 printf("Preferred Minor : %d\n", sb
->md_minor
);
143 if (sb
->minor_version
> 90 && (sb
->reshape_position
+ 1) != 0) {
144 printf(" Reshape pos'n : %llu%s\n",
145 (unsigned long long)sb
->reshape_position
/ 2,
146 human_size((long long)sb
->reshape_position
<< 9));
147 if (sb
->delta_disks
) {
148 printf(" Delta Devices : %d", sb
->delta_disks
);
149 printf(" (%d->%d)\n", sb
->raid_disks
-sb
->delta_disks
,
151 if (((int)sb
->delta_disks
) < 0)
152 delta_extra
= - sb
->delta_disks
;
154 if (sb
->new_level
!= sb
->level
) {
155 c
= map_num(pers
, sb
->new_level
);
156 printf(" New Level : %s\n", c
?c
:"-unknown-");
158 if (sb
->new_layout
!= sb
->layout
) {
159 if (sb
->level
== 5) {
160 c
= map_num(r5layout
, sb
->new_layout
);
161 printf(" New Layout : %s\n",
164 if (sb
->level
== 6) {
165 c
= map_num(r6layout
, sb
->new_layout
);
166 printf(" New Layout : %s\n",
169 if (sb
->level
== 10) {
170 printf(" New Layout : near=%d, %s=%d\n",
172 (sb
->new_layout
&0x10000)?"offset":"far",
173 (sb
->new_layout
>>8)&255);
176 if (sb
->new_chunk
!= sb
->chunk_size
)
177 printf(" New Chunksize : %d\n", sb
->new_chunk
);
181 printf(" Update Time : %.24s\n", ctime(&atime
));
182 printf(" State : %s\n",
183 (sb
->state
&(1 << MD_SB_CLEAN
)) ? "clean":"active");
184 if (sb
->state
& (1 << MD_SB_BITMAP_PRESENT
))
185 printf("Internal Bitmap : present\n");
186 printf(" Active Devices : %d\n", sb
->active_disks
);
187 printf("Working Devices : %d\n", sb
->working_disks
);
188 printf(" Failed Devices : %d\n", sb
->failed_disks
);
189 printf(" Spare Devices : %d\n", sb
->spare_disks
);
190 if (calc_sb0_csum(sb
) == sb
->sb_csum
)
191 printf(" Checksum : %x - correct\n", sb
->sb_csum
);
193 printf(" Checksum : %x - expected %lx\n",
194 sb
->sb_csum
, calc_sb0_csum(sb
));
195 printf(" Events : %llu\n",
196 ((unsigned long long)sb
->events_hi
<< 32) + sb
->events_lo
);
198 if (sb
->level
== 5) {
199 c
= map_num(r5layout
, sb
->layout
);
200 printf(" Layout : %s\n", c
?c
:"-unknown-");
202 if (sb
->level
== 6) {
203 c
= map_num(r6layout
, sb
->layout
);
204 printf(" Layout : %s\n", c
?c
:"-unknown-");
206 if (sb
->level
== 10) {
208 print_r10_layout(sb
->layout
);
217 printf(" Chunk Size : %dK\n", sb
->chunk_size
/ 1024);
220 printf(" Rounding : %dK\n", sb
->chunk_size
/ 1024);
226 printf(" Number Major Minor RaidDevice State\n");
228 d
< (signed int)(sb
->raid_disks
+ delta_extra
+ sb
->spare_disks
);
234 if (d
>=0) dp
= &sb
->disks
[d
];
235 else dp
= &sb
->this_disk
;
236 snprintf(nb
, sizeof(nb
), "%4d", d
);
237 printf("%4s %5d %5d %5d %5d ", d
< 0 ? "this" : nb
,
238 dp
->number
, dp
->major
, dp
->minor
, dp
->raid_disk
);
239 wonly
= dp
->state
& (1 << MD_DISK_WRITEMOSTLY
);
240 failfast
= dp
->state
& (1<<MD_DISK_FAILFAST
);
241 dp
->state
&= ~(wonly
| failfast
);
242 if (dp
->state
& (1 << MD_DISK_FAULTY
))
244 if (dp
->state
& (1 << MD_DISK_ACTIVE
))
246 if (dp
->state
& (1 << MD_DISK_SYNC
))
248 if (dp
->state
& (1 << MD_DISK_REMOVED
))
251 printf(" write-mostly");
256 if ((dv
= map_dev(dp
->major
, dp
->minor
, 0)))
264 static void brief_examine_super0(struct supertype
*st
, int verbose
)
266 mdp_super_t
*sb
= st
->sb
;
267 char *c
=map_num(pers
, sb
->level
);
270 sprintf(devname
, "/dev/md%d", sb
->md_minor
);
273 printf("ARRAY %s level=%s num-devices=%d",
275 c
?c
:"-unknown-", sb
->raid_disks
);
277 printf("ARRAY %s", devname
);
279 if (sb
->minor_version
>= 90)
280 printf(" UUID=%08x:%08x:%08x:%08x", sb
->set_uuid0
, sb
->set_uuid1
,
281 sb
->set_uuid2
, sb
->set_uuid3
);
283 printf(" UUID=%08x", sb
->set_uuid0
);
287 static void export_examine_super0(struct supertype
*st
)
289 mdp_super_t
*sb
= st
->sb
;
291 printf("MD_LEVEL=%s\n", map_num(pers
, sb
->level
));
292 printf("MD_DEVICES=%d\n", sb
->raid_disks
);
293 if (sb
->minor_version
>= 90)
294 printf("MD_UUID=%08x:%08x:%08x:%08x\n",
295 sb
->set_uuid0
, sb
->set_uuid1
,
296 sb
->set_uuid2
, sb
->set_uuid3
);
298 printf("MD_UUID=%08x\n", sb
->set_uuid0
);
299 printf("MD_UPDATE_TIME=%llu\n",
300 __le64_to_cpu(sb
->ctime
) & 0xFFFFFFFFFFULL
);
301 printf("MD_EVENTS=%llu\n",
302 ((unsigned long long)sb
->events_hi
<< 32)
306 static int copy_metadata0(struct supertype
*st
, int from
, int to
)
308 /* Read 64K from the appropriate offset of 'from'
309 * and if it looks a little like a 0.90 superblock,
310 * write it to the same offset of 'to'
313 unsigned long long dsize
, offset
;
314 const int bufsize
= 64*1024;
317 if (posix_memalign(&buf
, 4096, bufsize
) != 0)
320 if (!get_dev_size(from
, NULL
, &dsize
))
323 if (dsize
< MD_RESERVED_SECTORS
*512)
326 offset
= MD_NEW_SIZE_SECTORS(dsize
>>9);
330 if (lseek64(from
, offset
, 0) < 0LL)
332 if (read(from
, buf
, bufsize
) != bufsize
)
335 if (lseek64(to
, offset
, 0) < 0LL)
338 if (super
->md_magic
!= MD_SB_MAGIC
||
339 super
->major_version
!= 0 ||
340 calc_sb0_csum(super
) != super
->sb_csum
)
342 if (write(to
, buf
, bufsize
) != bufsize
)
351 static void detail_super0(struct supertype
*st
, char *homehost
, char *subarray
)
353 mdp_super_t
*sb
= st
->sb
;
355 if (sb
->minor_version
>= 90)
356 printf("%08x:%08x:%08x:%08x", sb
->set_uuid0
, sb
->set_uuid1
,
357 sb
->set_uuid2
, sb
->set_uuid3
);
359 printf("%08x", sb
->set_uuid0
);
362 void *hash
= sha1_buffer(homehost
,
365 if (memcmp(&sb
->set_uuid2
, hash
, 8)==0)
366 printf(" (local to host %s)", homehost
);
368 printf("\n Events : %d.%d\n\n", sb
->events_hi
, sb
->events_lo
);
371 static void brief_detail_super0(struct supertype
*st
, char *subarray
)
373 mdp_super_t
*sb
= st
->sb
;
375 if (sb
->minor_version
>= 90)
376 printf("%08x:%08x:%08x:%08x", sb
->set_uuid0
, sb
->set_uuid1
,
377 sb
->set_uuid2
, sb
->set_uuid3
);
379 printf("%08x", sb
->set_uuid0
);
382 static int match_home0(struct supertype
*st
, char *homehost
)
384 mdp_super_t
*sb
= st
->sb
;
390 hash
= sha1_buffer(homehost
,
394 return (memcmp(&sb
->set_uuid2
, hash
, 8)==0);
397 static void uuid_from_super0(struct supertype
*st
, int uuid
[4])
399 mdp_super_t
*super
= st
->sb
;
400 uuid
[0] = super
->set_uuid0
;
401 if (super
->minor_version
>= 90) {
402 uuid
[1] = super
->set_uuid1
;
403 uuid
[2] = super
->set_uuid2
;
404 uuid
[3] = super
->set_uuid3
;
412 static void getinfo_super0(struct supertype
*st
, struct mdinfo
*info
, char *map
)
414 mdp_super_t
*sb
= st
->sb
;
417 int map_disks
= info
->array
.raid_disks
;
419 memset(info
, 0, sizeof(*info
));
420 info
->array
.major_version
= sb
->major_version
;
421 info
->array
.minor_version
= sb
->minor_version
;
422 info
->array
.patch_version
= sb
->patch_version
;
423 info
->array
.raid_disks
= sb
->raid_disks
;
424 info
->array
.level
= sb
->level
;
425 info
->array
.layout
= sb
->layout
;
426 info
->array
.md_minor
= sb
->md_minor
;
427 info
->array
.ctime
= sb
->ctime
;
428 info
->array
.utime
= sb
->utime
;
429 info
->array
.chunk_size
= sb
->chunk_size
;
430 info
->array
.state
= sb
->state
;
431 info
->component_size
= sb
->size
;
432 info
->component_size
*= 2;
434 if (sb
->state
& (1<<MD_SB_BITMAP_PRESENT
))
435 info
->bitmap_offset
= 8;
437 info
->disk
.state
= sb
->this_disk
.state
;
438 info
->disk
.major
= sb
->this_disk
.major
;
439 info
->disk
.minor
= sb
->this_disk
.minor
;
440 info
->disk
.raid_disk
= sb
->this_disk
.raid_disk
;
441 info
->disk
.number
= sb
->this_disk
.number
;
443 info
->events
= md_event(sb
);
444 info
->data_offset
= 0;
446 sprintf(info
->text_version
, "0.%d", sb
->minor_version
);
447 info
->safe_mode_delay
= 200;
449 uuid_from_super0(st
, info
->uuid
);
451 info
->recovery_start
= MaxSector
;
452 if (sb
->minor_version
> 90 && (sb
->reshape_position
+1) != 0) {
453 info
->reshape_active
= 1;
454 info
->reshape_progress
= sb
->reshape_position
;
455 info
->new_level
= sb
->new_level
;
456 info
->delta_disks
= sb
->delta_disks
;
457 info
->new_layout
= sb
->new_layout
;
458 info
->new_chunk
= sb
->new_chunk
;
459 if (info
->delta_disks
< 0)
460 info
->array
.raid_disks
-= info
->delta_disks
;
462 info
->reshape_active
= 0;
464 info
->recovery_blocked
= info
->reshape_active
;
466 sprintf(info
->name
, "%d", sb
->md_minor
);
467 /* work_disks is calculated rather than read directly */
468 for (i
=0; i
< MD_SB_DISKS
; i
++)
469 if ((sb
->disks
[i
].state
& (1<<MD_DISK_SYNC
)) &&
470 (sb
->disks
[i
].raid_disk
< (unsigned)info
->array
.raid_disks
) &&
471 (sb
->disks
[i
].state
& (1<<MD_DISK_ACTIVE
)) &&
472 !(sb
->disks
[i
].state
& (1<<MD_DISK_FAULTY
))) {
474 if (map
&& i
< map_disks
)
476 } else if (map
&& i
< map_disks
)
478 info
->array
.working_disks
= working
;
481 static struct mdinfo
*container_content0(struct supertype
*st
, char *subarray
)
488 info
= xmalloc(sizeof(*info
));
489 getinfo_super0(st
, info
, NULL
);
493 static int update_super0(struct supertype
*st
, struct mdinfo
*info
,
495 char *devname
, int verbose
,
496 int uuid_set
, char *homehost
)
498 /* NOTE: for 'assemble' and 'force' we need to return non-zero
499 * if any change was made. For others, the return value is
504 mdp_super_t
*sb
= st
->sb
;
506 if (strcmp(update
, "homehost") == 0 &&
508 /* note that 'homehost' is special as it is really
513 info
->uuid
[0] = sb
->set_uuid0
;
514 info
->uuid
[1] = sb
->set_uuid1
;
517 if (strcmp(update
, "sparc2.2")==0 ) {
518 /* 2.2 sparc put the events in the wrong place
519 * So we copy the tail of the superblock
520 * up 4 bytes before continuing
522 __u32
*sb32
= (__u32
*)sb
;
524 memmove(sb32
+MD_SB_GENERIC_CONSTANT_WORDS
+7,
525 sb32
+MD_SB_GENERIC_CONSTANT_WORDS
+7+1,
526 (MD_SB_WORDS
- (MD_SB_GENERIC_CONSTANT_WORDS
+7+1))*4);
528 pr_err("adjusting superblock of %s for 2.2/sparc compatibility.\n",
530 } else if (strcmp(update
, "super-minor") ==0) {
531 sb
->md_minor
= info
->array
.md_minor
;
533 pr_err("updating superblock of %s with minor number %d\n",
534 devname
, info
->array
.md_minor
);
535 } else if (strcmp(update
, "summaries") == 0) {
537 /* set nr_disks, active_disks, working_disks,
538 * failed_disks, spare_disks based on disks[]
539 * array in superblock.
540 * Also make sure extra slots aren't 'failed'
542 sb
->nr_disks
= sb
->active_disks
=
543 sb
->working_disks
= sb
->failed_disks
=
545 for (i
=0; i
< MD_SB_DISKS
; i
++)
546 if (sb
->disks
[i
].major
||
547 sb
->disks
[i
].minor
) {
548 int state
= sb
->disks
[i
].state
;
549 if (state
& (1<<MD_DISK_REMOVED
))
552 if (state
& (1<<MD_DISK_ACTIVE
))
554 if (state
& (1<<MD_DISK_FAULTY
))
560 } else if (i
>= sb
->raid_disks
&& sb
->disks
[i
].number
== 0)
561 sb
->disks
[i
].state
= 0;
562 } else if (strcmp(update
, "force-one")==0) {
563 /* Not enough devices for a working array, so
564 * bring this one up-to-date.
566 __u32 ehi
= sb
->events_hi
, elo
= sb
->events_lo
;
567 sb
->events_hi
= (info
->events
>>32) & 0xFFFFFFFF;
568 sb
->events_lo
= (info
->events
) & 0xFFFFFFFF;
569 if (sb
->events_hi
!= ehi
||
570 sb
->events_lo
!= elo
)
572 } else if (strcmp(update
, "force-array")==0) {
573 /* degraded array and 'force' requested, so
574 * maybe need to mark it 'clean'
576 if ((sb
->level
== 5 || sb
->level
== 4 || sb
->level
== 6) &&
577 (sb
->state
& (1 << MD_SB_CLEAN
)) == 0) {
578 /* need to force clean */
579 sb
->state
|= (1 << MD_SB_CLEAN
);
582 } else if (strcmp(update
, "assemble")==0) {
583 int d
= info
->disk
.number
;
584 int wonly
= sb
->disks
[d
].state
& (1<<MD_DISK_WRITEMOSTLY
);
585 int failfast
= sb
->disks
[d
].state
& (1<<MD_DISK_FAILFAST
);
586 int mask
= (1<<MD_DISK_WRITEMOSTLY
)|(1<<MD_DISK_FAILFAST
);
588 if (sb
->minor_version
>= 91)
589 /* During reshape we don't insist on everything
590 * being marked 'sync'
592 add
= (1<<MD_DISK_SYNC
);
593 if (((sb
->disks
[d
].state
& ~mask
) | add
) !=
594 (unsigned)info
->disk
.state
) {
595 sb
->disks
[d
].state
= info
->disk
.state
| wonly
|failfast
;
598 if (info
->reshape_active
&&
599 sb
->minor_version
> 90 && (sb
->reshape_position
+1) != 0 &&
600 info
->delta_disks
>= 0 &&
601 info
->reshape_progress
< sb
->reshape_position
) {
602 sb
->reshape_position
= info
->reshape_progress
;
605 if (info
->reshape_active
&&
606 sb
->minor_version
> 90 && (sb
->reshape_position
+1) != 0 &&
607 info
->delta_disks
< 0 &&
608 info
->reshape_progress
> sb
->reshape_position
) {
609 sb
->reshape_position
= info
->reshape_progress
;
612 } else if (strcmp(update
, "linear-grow-new") == 0) {
613 memset(&sb
->disks
[info
->disk
.number
], 0, sizeof(sb
->disks
[0]));
614 sb
->disks
[info
->disk
.number
].number
= info
->disk
.number
;
615 sb
->disks
[info
->disk
.number
].major
= info
->disk
.major
;
616 sb
->disks
[info
->disk
.number
].minor
= info
->disk
.minor
;
617 sb
->disks
[info
->disk
.number
].raid_disk
= info
->disk
.raid_disk
;
618 sb
->disks
[info
->disk
.number
].state
= info
->disk
.state
;
619 sb
->this_disk
= sb
->disks
[info
->disk
.number
];
620 } else if (strcmp(update
, "linear-grow-update") == 0) {
621 sb
->raid_disks
= info
->array
.raid_disks
;
622 sb
->nr_disks
= info
->array
.nr_disks
;
623 sb
->active_disks
= info
->array
.active_disks
;
624 sb
->working_disks
= info
->array
.working_disks
;
625 memset(&sb
->disks
[info
->disk
.number
], 0, sizeof(sb
->disks
[0]));
626 sb
->disks
[info
->disk
.number
].number
= info
->disk
.number
;
627 sb
->disks
[info
->disk
.number
].major
= info
->disk
.major
;
628 sb
->disks
[info
->disk
.number
].minor
= info
->disk
.minor
;
629 sb
->disks
[info
->disk
.number
].raid_disk
= info
->disk
.raid_disk
;
630 sb
->disks
[info
->disk
.number
].state
= info
->disk
.state
;
631 } else if (strcmp(update
, "resync") == 0) {
632 /* make sure resync happens */
633 sb
->state
&= ~(1<<MD_SB_CLEAN
);
635 } else if (strcmp(update
, "uuid") == 0) {
636 if (!uuid_set
&& homehost
) {
638 char *hash
= sha1_buffer(homehost
,
641 memcpy(info
->uuid
+2, hash
, 8);
643 sb
->set_uuid0
= info
->uuid
[0];
644 sb
->set_uuid1
= info
->uuid
[1];
645 sb
->set_uuid2
= info
->uuid
[2];
646 sb
->set_uuid3
= info
->uuid
[3];
647 if (sb
->state
& (1<<MD_SB_BITMAP_PRESENT
)) {
648 struct bitmap_super_s
*bm
;
649 bm
= (struct bitmap_super_s
*)(sb
+1);
650 uuid_from_super0(st
, uuid
);
651 memcpy(bm
->uuid
, uuid
, 16);
653 } else if (strcmp(update
, "metadata") == 0) {
654 /* Create some v1.0 metadata to match ours but make the
655 * ctime bigger. Also update info->array.*_version.
656 * We need to arrange that store_super writes out
658 * Not permitted for unclean array, or array with
661 if (info
->bitmap_offset
) {
662 pr_err("Cannot update metadata when bitmap is present\n");
664 } else if (info
->array
.state
!= 1) {
665 pr_err("Cannot update metadata on unclean array\n");
668 info
->array
.major_version
= 1;
669 info
->array
.minor_version
= 0;
670 uuid_from_super0(st
, info
->uuid
);
671 st
->other
= super1_make_v0(st
, info
, st
->sb
);
673 } else if (strcmp(update
, "revert-reshape") == 0) {
675 if (sb
->minor_version
<= 90)
676 pr_err("No active reshape to revert on %s\n",
678 else if (sb
->delta_disks
== 0)
679 pr_err("%s: Can only revert reshape which changes number of devices\n",
683 int parity
= sb
->level
== 6 ? 2 : 1;
686 if (sb
->level
>= 4 && sb
->level
<= 6 &&
687 sb
->reshape_position
% (
689 (sb
->raid_disks
- sb
->delta_disks
- parity
))) {
690 pr_err("Reshape position is not suitably aligned.\n");
691 pr_err("Try normal assembly and stop again\n");
694 sb
->raid_disks
-= sb
->delta_disks
;
695 sb
->delta_disks
= -sb
->delta_disks
;
697 tmp
= sb
->new_layout
;
698 sb
->new_layout
= sb
->layout
;
702 sb
->new_chunk
= sb
->chunk_size
;
703 sb
->chunk_size
= tmp
;
705 } else if (strcmp(update
, "no-bitmap") == 0) {
706 sb
->state
&= ~(1<<MD_SB_BITMAP_PRESENT
);
707 } else if (strcmp(update
, "_reshape_progress")==0)
708 sb
->reshape_position
= info
->reshape_progress
;
709 else if (strcmp(update
, "writemostly")==0)
710 sb
->state
|= (1<<MD_DISK_WRITEMOSTLY
);
711 else if (strcmp(update
, "readwrite")==0)
712 sb
->state
&= ~(1<<MD_DISK_WRITEMOSTLY
);
716 sb
->sb_csum
= calc_sb0_csum(sb
);
721 * For version-0 superblock, the homehost is 'stored' in the uuid.
722 * 8 bytes for a hash of the host leaving 8 bytes of random material.
723 * We use the first 8 bytes (64bits) of the sha1 of the host name
725 static int init_super0(struct supertype
*st
, mdu_array_info_t
*info
,
726 struct shape
*s
, char *ignored_name
,
727 char *homehost
, int *uuid
,
728 unsigned long long data_offset
)
733 if (data_offset
!= INVALID_SECTORS
) {
734 pr_err("data-offset not support for 0.90\n");
738 if (posix_memalign((void**)&sb
, 4096,
739 MD_SB_BYTES
+ ROUND_UP(sizeof(bitmap_super_t
), 4096)) != 0) {
740 pr_err("could not allocate superblock\n");
743 memset(sb
, 0, MD_SB_BYTES
+ sizeof(bitmap_super_t
));
747 /* zeroing the superblock */
751 spares
= info
->working_disks
- info
->active_disks
;
752 if (info
->raid_disks
+ spares
> MD_SB_DISKS
) {
753 pr_err("too many devices requested: %d+%d > %d\n",
754 info
->raid_disks
, spares
, MD_SB_DISKS
);
758 sb
->md_magic
= MD_SB_MAGIC
;
759 sb
->major_version
= 0;
760 sb
->minor_version
= 90;
761 sb
->patch_version
= 0;
762 sb
->gvalid_words
= 0; /* ignored */
764 sb
->level
= info
->level
;
766 if (s
->size
!= (unsigned long long)sb
->size
)
768 sb
->nr_disks
= info
->nr_disks
;
769 sb
->raid_disks
= info
->raid_disks
;
770 sb
->md_minor
= info
->md_minor
;
771 sb
->not_persistent
= 0;
773 sb
->set_uuid0
= uuid
[0];
774 sb
->set_uuid1
= uuid
[1];
775 sb
->set_uuid2
= uuid
[2];
776 sb
->set_uuid3
= uuid
[3];
779 random_uuid((__u8
*)r
);
780 sb
->set_uuid0
= r
[0];
781 sb
->set_uuid1
= r
[1];
782 sb
->set_uuid2
= r
[2];
783 sb
->set_uuid3
= r
[3];
785 if (homehost
&& !uuid
) {
787 char *hash
= sha1_buffer(homehost
,
790 memcpy(&sb
->set_uuid2
, hash
, 8);
793 sb
->utime
= sb
->ctime
;
794 sb
->state
= info
->state
;
795 sb
->active_disks
= info
->active_disks
;
796 sb
->working_disks
= info
->working_disks
;
797 sb
->failed_disks
= info
->failed_disks
;
798 sb
->spare_disks
= info
->spare_disks
;
802 sb
->layout
= info
->layout
;
803 sb
->chunk_size
= info
->chunk_size
;
811 mdu_disk_info_t disk
;
812 struct devinfo
*next
;
815 /* Add a device to the superblock being created */
816 static int add_to_super0(struct supertype
*st
, mdu_disk_info_t
*dinfo
,
817 int fd
, char *devname
, unsigned long long data_offset
)
819 mdp_super_t
*sb
= st
->sb
;
820 mdp_disk_t
*dk
= &sb
->disks
[dinfo
->number
];
821 struct devinfo
*di
, **dip
;
823 dk
->number
= dinfo
->number
;
824 dk
->major
= dinfo
->major
;
825 dk
->minor
= dinfo
->minor
;
826 dk
->raid_disk
= dinfo
->raid_disk
;
827 dk
->state
= dinfo
->state
& ((1<<MD_DISK_ACTIVE
) |
830 sb
->this_disk
= sb
->disks
[dinfo
->number
];
831 sb
->sb_csum
= calc_sb0_csum(sb
);
833 dip
= (struct devinfo
**)&st
->info
;
836 di
= xmalloc(sizeof(struct devinfo
));
838 di
->devname
= devname
;
846 static int store_super0(struct supertype
*st
, int fd
)
848 unsigned long long dsize
;
849 unsigned long long offset
;
850 mdp_super_t
*super
= st
->sb
;
852 if (!get_dev_size(fd
, NULL
, &dsize
))
855 if (dsize
< MD_RESERVED_SECTORS
*512)
859 /* Writing out v1.0 metadata for --update=metadata */
862 offset
= dsize
/512 - 8*2;
865 if (lseek64(fd
, offset
, 0)< 0LL)
867 else if (write(fd
, st
->other
, 1024) != 1024)
876 offset
= MD_NEW_SIZE_SECTORS(dsize
>>9);
880 if (lseek64(fd
, offset
, 0)< 0LL)
883 if (write(fd
, super
, sizeof(*super
)) != sizeof(*super
))
886 if (super
->state
& (1<<MD_SB_BITMAP_PRESENT
)) {
887 struct bitmap_super_s
* bm
= (struct bitmap_super_s
*)(super
+1);
888 if (__le32_to_cpu(bm
->magic
) == BITMAP_MAGIC
)
889 if (write(fd
, bm
, ROUND_UP(sizeof(*bm
),4096)) !=
890 ROUND_UP(sizeof(*bm
),4096))
898 static int write_init_super0(struct supertype
*st
)
900 mdp_super_t
*sb
= st
->sb
;
904 for (di
= st
->info
; di
&& ! rv
; di
= di
->next
) {
906 if (di
->disk
.state
& (1 << MD_DISK_FAULTY
))
910 while (Kill(di
->devname
, NULL
, 0, -1, 1) == 0)
913 sb
->disks
[di
->disk
.number
].state
&= ~(1<<MD_DISK_FAULTY
);
915 sb
->this_disk
= sb
->disks
[di
->disk
.number
];
916 sb
->sb_csum
= calc_sb0_csum(sb
);
917 rv
= store_super0(st
, di
->fd
);
919 if (rv
== 0 && (sb
->state
& (1<<MD_SB_BITMAP_PRESENT
)))
920 rv
= st
->ss
->write_bitmap(st
, di
->fd
, NoUpdate
);
923 pr_err("failed to write superblock to %s\n",
929 static int compare_super0(struct supertype
*st
, struct supertype
*tst
)
933 * 0 same, or first was empty, and second was copied
934 * 1 second had wrong number
938 mdp_super_t
*first
= st
->sb
;
939 mdp_super_t
*second
= tst
->sb
;
940 int uuid1
[4], uuid2
[4];
942 if (second
->md_magic
!= MD_SB_MAGIC
)
945 if (posix_memalign((void**)&first
, 4096,
947 ROUND_UP(sizeof(struct bitmap_super_s
), 4096)) != 0) {
948 pr_err("could not allocate superblock\n");
951 memcpy(first
, second
, MD_SB_BYTES
+ sizeof(struct bitmap_super_s
));
956 uuid_from_super0(st
, uuid1
);
957 uuid_from_super0(tst
, uuid2
);
958 if (!same_uuid(uuid1
, uuid2
, 0))
960 if (first
->major_version
!= second
->major_version
||
961 first
->minor_version
!= second
->minor_version
||
962 first
->patch_version
!= second
->patch_version
||
963 first
->gvalid_words
!= second
->gvalid_words
||
964 first
->ctime
!= second
->ctime
||
965 first
->level
!= second
->level
||
966 first
->size
!= second
->size
||
967 first
->raid_disks
!= second
->raid_disks
)
973 static void free_super0(struct supertype
*st
);
975 static int load_super0(struct supertype
*st
, int fd
, char *devname
)
977 /* try to read in the superblock
980 * 1 on cannot get superblock
981 * 2 on superblock meaningless
983 unsigned long long dsize
;
984 unsigned long long offset
;
987 struct bitmap_super_s
*bsb
;
991 if (!get_dev_size(fd
, devname
, &dsize
))
994 if (dsize
< MD_RESERVED_SECTORS
*512) {
996 pr_err("%s is too small for md: size is %llu sectors.\n",
1000 st
->devsize
= dsize
;
1002 offset
= MD_NEW_SIZE_SECTORS(dsize
>>9);
1006 if (lseek64(fd
, offset
, 0)< 0LL) {
1008 pr_err("Cannot seek to superblock on %s: %s\n",
1009 devname
, strerror(errno
));
1013 if (posix_memalign((void**)&super
, 4096,
1015 ROUND_UP(sizeof(bitmap_super_t
), 4096)) != 0) {
1016 pr_err("could not allocate superblock\n");
1020 if (read(fd
, super
, sizeof(*super
)) != MD_SB_BYTES
) {
1022 pr_err("Cannot read superblock on %s\n",
1028 if (st
->ss
&& st
->minor_version
== 9)
1029 super0_swap_endian(super
);
1031 if (super
->md_magic
!= MD_SB_MAGIC
) {
1033 pr_err("No super block found on %s (Expected magic %08x, got %08x)\n",
1034 devname
, MD_SB_MAGIC
, super
->md_magic
);
1039 if (super
->major_version
!= 0) {
1041 pr_err("Cannot interpret superblock on %s - version is %d\n",
1042 devname
, super
->major_version
);
1048 if (st
->ss
== NULL
) {
1050 st
->minor_version
= super
->minor_version
;
1051 st
->max_devs
= MD_SB_DISKS
;
1055 /* Now check on the bitmap superblock */
1056 if ((super
->state
& (1<<MD_SB_BITMAP_PRESENT
)) == 0)
1058 /* Read the bitmap superblock and make sure it looks
1059 * valid. If it doesn't clear the bit. An --assemble --force
1060 * should get that written out.
1062 if (read(fd
, super
+1, ROUND_UP(sizeof(struct bitmap_super_s
),4096)) !=
1063 ROUND_UP(sizeof(struct bitmap_super_s
), 4096))
1066 uuid_from_super0(st
, uuid
);
1067 bsb
= (struct bitmap_super_s
*)(super
+1);
1068 if (__le32_to_cpu(bsb
->magic
) != BITMAP_MAGIC
||
1069 memcmp(bsb
->uuid
, uuid
, 16) != 0)
1074 super
->state
&= ~(1<<MD_SB_BITMAP_PRESENT
);
1079 static struct supertype
*match_metadata_desc0(char *arg
)
1081 struct supertype
*st
= xcalloc(1, sizeof(*st
));
1083 st
->container_devnm
[0] = 0;
1086 st
->minor_version
= 90;
1087 st
->max_devs
= MD_SB_DISKS
;
1089 /* we sometimes get 00.90 */
1090 while (arg
[0] == '0' && arg
[1] == '0')
1092 if (strcmp(arg
, "0") == 0 ||
1093 #ifdef DEFAULT_OLD_METADATA /* ifndef in super1.c */
1094 strcmp(arg
, "default") == 0 ||
1095 #endif /* DEFAULT_OLD_METADATA */
1096 strcmp(arg
, "0.90") == 0 ||
1097 strcmp(arg
, "") == 0 /* no metadata - i.e. non_persistent */
1101 st
->minor_version
= 91; /* reshape in progress */
1102 if (strcmp(arg
, "0.91") == 0) /* For dup_super support */
1105 st
->minor_version
= 9; /* flag for 'byte-swapped' */
1106 if (strcmp(arg
, "0.swap")==0 ||
1107 strcmp(arg
, "0.9") == 0) /* For dup_super support */
1114 static __u64
avail_size0(struct supertype
*st
, __u64 devsize
,
1115 unsigned long long data_offset
)
1117 if (data_offset
!= 0 && data_offset
!= INVALID_SECTORS
)
1119 if (devsize
< MD_RESERVED_SECTORS
)
1121 return MD_NEW_SIZE_SECTORS(devsize
);
1124 static int add_internal_bitmap0(struct supertype
*st
, int *chunkp
,
1125 int delay
, int write_behind
,
1126 unsigned long long size
, int may_change
,
1130 * The bitmap comes immediately after the superblock and must be 60K in size
1131 * at most. The default size is between 30K and 60K
1133 * size is in sectors, chunk is in bytes !!!
1135 unsigned long long bits
;
1136 unsigned long long max_bits
= (60*1024 - sizeof(bitmap_super_t
))*8;
1137 unsigned long long min_chunk
;
1138 int chunk
= *chunkp
;
1139 mdp_super_t
*sb
= st
->sb
;
1140 bitmap_super_t
*bms
= (bitmap_super_t
*)(((char*)sb
) + MD_SB_BYTES
);
1143 min_chunk
= 4096; /* sub-page chunks don't work yet.. */
1144 bits
= (size
* 512) / min_chunk
+ 1;
1145 while (bits
> max_bits
) {
1149 if (chunk
== UnSet
) {
1150 /* A chunk size less than a few Megabytes gives poor
1151 * performance without increasing resync noticeably
1154 if (chunk
< 64*1024*1024)
1155 chunk
= 64*1024*1024;
1156 } else if ((unsigned long long)chunk
< min_chunk
)
1157 return -EINVAL
; /* chunk size too small */
1159 sb
->state
|= (1<<MD_SB_BITMAP_PRESENT
);
1161 memset(bms
, 0, sizeof(*bms
));
1162 bms
->magic
= __cpu_to_le32(BITMAP_MAGIC
);
1163 bms
->version
= __cpu_to_le32(major
);
1164 uuid_from_super0(st
, uuid
);
1165 memcpy(bms
->uuid
, uuid
, 16);
1166 bms
->chunksize
= __cpu_to_le32(chunk
);
1167 bms
->daemon_sleep
= __cpu_to_le32(delay
);
1168 bms
->sync_size
= __cpu_to_le64(size
);
1169 bms
->write_behind
= __cpu_to_le32(write_behind
);
1174 static int locate_bitmap0(struct supertype
*st
, int fd
, int node_num
)
1176 unsigned long long dsize
;
1177 unsigned long long offset
;
1179 if (!get_dev_size(fd
, NULL
, &dsize
))
1182 if (dsize
< MD_RESERVED_SECTORS
*512)
1185 offset
= MD_NEW_SIZE_SECTORS(dsize
>>9);
1189 offset
+= MD_SB_BYTES
;
1191 lseek64(fd
, offset
, 0);
1195 static int write_bitmap0(struct supertype
*st
, int fd
, enum bitmap_update update
)
1197 unsigned long long dsize
;
1198 unsigned long long offset
;
1199 mdp_super_t
*sb
= st
->sb
;
1206 if (!get_dev_size(fd
, NULL
, &dsize
))
1209 if (dsize
< MD_RESERVED_SECTORS
*512)
1212 offset
= MD_NEW_SIZE_SECTORS(dsize
>>9);
1216 if (lseek64(fd
, offset
+ 4096, 0)< 0LL)
1219 if (posix_memalign(&buf
, 4096, 4096))
1222 memset(buf
, 0xff, 4096);
1223 memcpy(buf
, ((char*)sb
)+MD_SB_BYTES
, sizeof(bitmap_super_t
));
1225 while (towrite
> 0) {
1229 n
= write(fd
, buf
, n
);
1234 memset(buf
, 0xff, 4096);
1244 static void free_super0(struct supertype
*st
)
1249 struct devinfo
*di
= st
->info
;
1250 st
->info
= di
->next
;
1258 static int validate_geometry0(struct supertype
*st
, int level
,
1259 int layout
, int raiddisks
,
1260 int *chunk
, unsigned long long size
,
1261 unsigned long long data_offset
,
1262 char *subdev
, unsigned long long *freesize
,
1263 int consistency_policy
, int verbose
)
1265 unsigned long long ldsize
;
1267 unsigned int tbmax
= 4;
1269 /* prior to linux 3.1, a but limits usable device size to 2TB.
1270 * It was introduced in 2.6.29, but we won't worry about that detail
1272 if (get_linux_version() < 3001000)
1275 if (level
== LEVEL_CONTAINER
) {
1277 pr_err("0.90 metadata does not support containers\n");
1280 if (raiddisks
> MD_SB_DISKS
) {
1282 pr_err("0.90 metadata supports at most %d devices per array\n",
1286 if (size
>= tbmax
* 2ULL*1024*1024*1024) {
1288 pr_err("0.90 metadata supports at most %d terabytes per device\n", tbmax
);
1291 if (*chunk
== UnSet
)
1292 *chunk
= DEFAULT_CHUNK
;
1294 if (level
== 0 && layout
!= UnSet
) {
1296 pr_err("0.90 metadata does not support layouts for RAID0\n");
1303 fd
= open(subdev
, O_RDONLY
|O_EXCL
, 0);
1306 pr_err("super0.90 cannot open %s: %s\n",
1307 subdev
, strerror(errno
));
1311 if (!get_dev_size(fd
, subdev
, &ldsize
)) {
1317 if (ldsize
< MD_RESERVED_SECTORS
* 512)
1319 *freesize
= MD_NEW_SIZE_SECTORS(ldsize
>> 9);
1323 struct superswitch super0
= {
1324 .examine_super
= examine_super0
,
1325 .brief_examine_super
= brief_examine_super0
,
1326 .export_examine_super
= export_examine_super0
,
1327 .detail_super
= detail_super0
,
1328 .brief_detail_super
= brief_detail_super0
,
1329 .write_init_super
= write_init_super0
,
1330 .validate_geometry
= validate_geometry0
,
1331 .add_to_super
= add_to_super0
,
1332 .copy_metadata
= copy_metadata0
,
1333 .match_home
= match_home0
,
1334 .uuid_from_super
= uuid_from_super0
,
1335 .getinfo_super
= getinfo_super0
,
1336 .container_content
= container_content0
,
1337 .update_super
= update_super0
,
1338 .init_super
= init_super0
,
1339 .store_super
= store_super0
,
1340 .compare_super
= compare_super0
,
1341 .load_super
= load_super0
,
1342 .match_metadata_desc
= match_metadata_desc0
,
1343 .avail_size
= avail_size0
,
1344 .add_internal_bitmap
= add_internal_bitmap0
,
1345 .locate_bitmap
= locate_bitmap0
,
1346 .write_bitmap
= write_bitmap0
,
1347 .free_super
= free_super0
,