2 * mdadm - manage Linux "md" devices aka RAID arrays.
4 * Copyright (C) 2001-2004 Neil Brown <neilb@cse.unsw.edu.au>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * Email: <neilb@cse.unsw.edu.au>
24 * School of Computer Science and Engineering
25 * The University of New South Wales
31 #include <asm/byteorder.h>
34 * All handling for the 0.90.0 version superblock is in
37 * - finding, loading, and writing the superblock.
38 * - initialising a new superblock
39 * - printing the superblock for --examine
40 * - printing part of the superblock for --detail
45 static unsigned long calc_sb0_csum(mdp_super_t
*super
)
47 unsigned long csum
= super
->sb_csum
;
48 unsigned long newcsum
;
50 newcsum
= calc_csum(super
, MD_SB_BYTES
);
51 super
->sb_csum
= csum
;
56 static void examine_super0(void *sbv
)
58 mdp_super_t
*sb
= sbv
;
63 printf(" Magic : %08x\n", sb
->md_magic
);
64 printf(" Version : %02d.%02d.%02d\n", sb
->major_version
, sb
->minor_version
,
66 if (sb
->minor_version
>= 90)
67 printf(" UUID : %08x:%08x:%08x:%08x\n", sb
->set_uuid0
, sb
->set_uuid1
,
68 sb
->set_uuid2
, sb
->set_uuid3
);
70 printf(" UUID : %08x\n", sb
->set_uuid0
);
73 printf(" Creation Time : %.24s\n", ctime(&atime
));
74 c
=map_num(pers
, sb
->level
);
75 printf(" Raid Level : %s\n", c
?c
:"-unknown-");
76 if ((int)sb
->level
>= 0)
77 printf(" Device Size : %d%s\n", sb
->size
, human_size((long long)sb
->size
<<10));
78 printf(" Raid Devices : %d\n", sb
->raid_disks
);
79 printf(" Total Devices : %d\n", sb
->nr_disks
);
80 printf("Preferred Minor : %d\n", sb
->md_minor
);
83 printf(" Update Time : %.24s\n", ctime(&atime
));
84 printf(" State : %s\n",
85 (sb
->state
&(1<<MD_SB_CLEAN
))?"clean":"active");
86 if (sb
->state
& (1<<MD_SB_BITMAP_PRESENT
))
87 printf("Internal Bitmap : present\n");
88 printf(" Active Devices : %d\n", sb
->active_disks
);
89 printf("Working Devices : %d\n", sb
->working_disks
);
90 printf(" Failed Devices : %d\n", sb
->failed_disks
);
91 printf(" Spare Devices : %d\n", sb
->spare_disks
);
92 if (calc_sb0_csum(sb
) == sb
->sb_csum
)
93 printf(" Checksum : %x - correct\n", sb
->sb_csum
);
95 printf(" Checksum : %x - expected %lx\n", sb
->sb_csum
, calc_sb0_csum(sb
));
96 printf(" Events : %d.%d\n", sb
->events_hi
, sb
->events_lo
);
99 c
= map_num(r5layout
, sb
->layout
);
100 printf(" Layout : %s\n", c
?c
:"-unknown-");
106 printf(" Chunk Size : %dK\n", sb
->chunk_size
/1024);
109 printf(" Rounding : %dK\n", sb
->chunk_size
/1024);
114 printf(" Number Major Minor RaidDevice State\n");
115 for (d
= -1; d
<(signed int)(sb
->raid_disks
+sb
->spare_disks
); d
++) {
119 if (d
>=0) dp
= &sb
->disks
[d
];
120 else dp
= &sb
->this_disk
;
121 snprintf(nb
, sizeof(nb
), "%4d", d
);
122 printf("%4s %5d %5d %5d %5d ", d
< 0 ? "this" : nb
,
123 dp
->number
, dp
->major
, dp
->minor
, dp
->raid_disk
);
124 if (dp
->state
& (1<<MD_DISK_FAULTY
)) printf(" faulty");
125 if (dp
->state
& (1<<MD_DISK_ACTIVE
)) printf(" active");
126 if (dp
->state
& (1<<MD_DISK_SYNC
)) printf(" sync");
127 if (dp
->state
& (1<<MD_DISK_REMOVED
)) printf(" removed");
128 if (dp
->state
== 0) printf(" spare");
129 if ((dv
=map_dev(dp
->major
, dp
->minor
)))
132 if (d
== -1) printf("\n");
136 static void brief_examine_super0(void *sbv
)
138 mdp_super_t
*sb
= sbv
;
139 char *c
=map_num(pers
, sb
->level
);
141 printf("ARRAY %s level=%s num-devices=%d UUID=",
142 get_md_name(sb
->md_minor
),
143 c
?c
:"-unknown-", sb
->raid_disks
);
144 if (sb
->minor_version
>= 90)
145 printf("%08x:%08x:%08x:%08x", sb
->set_uuid0
, sb
->set_uuid1
,
146 sb
->set_uuid2
, sb
->set_uuid3
);
148 printf("%08x", sb
->set_uuid0
);
152 static void detail_super0(void *sbv
)
154 mdp_super_t
*sb
= sbv
;
156 if (sb
->minor_version
>= 90)
157 printf("%08x:%08x:%08x:%08x", sb
->set_uuid0
, sb
->set_uuid1
,
158 sb
->set_uuid2
, sb
->set_uuid3
);
160 printf("%08x", sb
->set_uuid0
);
161 printf("\n Events : %d.%d\n\n", sb
->events_hi
, sb
->events_lo
);
164 static void brief_detail_super0(void *sbv
)
166 mdp_super_t
*sb
= sbv
;
168 if (sb
->minor_version
>= 90)
169 printf("%08x:%08x:%08x:%08x", sb
->set_uuid0
, sb
->set_uuid1
,
170 sb
->set_uuid2
, sb
->set_uuid3
);
172 printf("%08x", sb
->set_uuid0
);
175 static void uuid_from_super0(int uuid
[4], void * sbv
)
177 mdp_super_t
*super
= sbv
;
178 uuid
[0] = super
->set_uuid0
;
179 if (super
->minor_version
>= 90) {
180 uuid
[1] = super
->set_uuid1
;
181 uuid
[2] = super
->set_uuid2
;
182 uuid
[3] = super
->set_uuid3
;
190 static void getinfo_super0(struct mdinfo
*info
, void *sbv
)
192 mdp_super_t
*sb
= sbv
;
196 info
->array
.major_version
= sb
->major_version
;
197 info
->array
.minor_version
= sb
->minor_version
;
198 info
->array
.patch_version
= sb
->patch_version
;
199 info
->array
.raid_disks
= sb
->raid_disks
;
200 info
->array
.level
= sb
->level
;
201 info
->array
.md_minor
= sb
->md_minor
;
202 info
->array
.ctime
= sb
->ctime
;
204 info
->disk
.state
= sb
->this_disk
.state
;
205 info
->disk
.major
= sb
->this_disk
.major
;
206 info
->disk
.minor
= sb
->this_disk
.minor
;
207 info
->disk
.raid_disk
= sb
->this_disk
.raid_disk
;
209 info
->events
= md_event(sb
);
211 uuid_from_super0(info
->uuid
, sbv
);
213 /* work_disks is calculated rather than read directly */
214 for (i
=0; i
< MD_SB_DISKS
; i
++)
215 if ((sb
->disks
[i
].state
& (1<<MD_DISK_SYNC
)) &&
216 (sb
->disks
[i
].state
& (1<<MD_DISK_ACTIVE
)) &&
217 !(sb
->disks
[i
].state
& (1<<MD_DISK_FAULTY
)))
219 info
->array
.working_disks
= working
;
223 static int update_super0(struct mdinfo
*info
, void *sbv
, char *update
, char *devname
, int verbose
)
226 mdp_super_t
*sb
= sbv
;
227 if (strcmp(update
, "sparc2.2")==0 ) {
228 /* 2.2 sparc put the events in the wrong place
229 * So we copy the tail of the superblock
230 * up 4 bytes before continuing
232 __u32
*sb32
= (__u32
*)sb
;
233 memcpy(sb32
+MD_SB_GENERIC_CONSTANT_WORDS
+7,
234 sb32
+MD_SB_GENERIC_CONSTANT_WORDS
+7+1,
235 (MD_SB_WORDS
- (MD_SB_GENERIC_CONSTANT_WORDS
+7+1))*4);
236 fprintf (stderr
, Name
": adjusting superblock of %s for 2.2/sparc compatability.\n",
239 if (strcmp(update
, "super-minor") ==0) {
240 sb
->md_minor
= info
->array
.md_minor
;
242 fprintf(stderr
, Name
": updating superblock of %s with minor number %d\n",
243 devname
, info
->array
.md_minor
);
245 if (strcmp(update
, "summaries") == 0) {
247 /* set nr_disks, active_disks, working_disks,
248 * failed_disks, spare_disks based on disks[]
249 * array in superblock.
250 * Also make sure extra slots aren't 'failed'
252 sb
->nr_disks
= sb
->active_disks
=
253 sb
->working_disks
= sb
->failed_disks
=
255 for (i
=0; i
< MD_SB_DISKS
; i
++)
256 if (sb
->disks
[i
].major
||
257 sb
->disks
[i
].minor
) {
258 int state
= sb
->disks
[i
].state
;
259 if (state
& (1<<MD_DISK_REMOVED
))
262 if (state
& (1<<MD_DISK_ACTIVE
))
264 if (state
& (1<<MD_DISK_FAULTY
))
270 } else if (i
>= sb
->raid_disks
&& sb
->disks
[i
].number
== 0)
271 sb
->disks
[i
].state
= 0;
273 if (strcmp(update
, "force")==0) {
274 sb
->events_hi
= (info
->events
>>32) & 0xFFFFFFFF;
275 sb
->events_lo
= (info
->events
) & 0xFFFFFFFF;
276 if (sb
->level
== 5 || sb
->level
== 4 || sb
->level
== 6)
277 /* need to force clean */
278 sb
->state
|= (1 << MD_SB_CLEAN
);
280 if (strcmp(update
, "assemble")==0) {
281 int d
= info
->disk
.number
;
282 if (sb
->disks
[d
].state
!= info
->disk
.state
) {
283 sb
->disks
[d
].state
= info
->disk
.state
;
287 if (strcmp(update
, "newdev") == 0) {
288 int d
= info
->disk
.number
;
289 memset(&sb
->disks
[d
], 0, sizeof(sb
->disks
[d
]));
290 sb
->disks
[d
].number
= d
;
291 sb
->disks
[d
].major
= info
->disk
.major
;
292 sb
->disks
[d
].minor
= info
->disk
.minor
;
293 sb
->disks
[d
].raid_disk
= info
->disk
.raid_disk
;
294 sb
->disks
[d
].state
= info
->disk
.state
;
295 sb
->this_disk
= sb
->disks
[d
];
297 if (strcmp(update
, "grow") == 0) {
298 sb
->raid_disks
= info
->array
.raid_disks
;
299 sb
->nr_disks
= info
->array
.nr_disks
;
300 sb
->active_disks
= info
->array
.active_disks
;
301 sb
->working_disks
= info
->array
.working_disks
;
302 memset(&sb
->disks
[info
->disk
.number
], 0, sizeof(sb
->disks
[0]));
303 sb
->disks
[info
->disk
.number
].number
= info
->disk
.number
;
304 sb
->disks
[info
->disk
.number
].major
= info
->disk
.major
;
305 sb
->disks
[info
->disk
.number
].minor
= info
->disk
.minor
;
306 sb
->disks
[info
->disk
.number
].raid_disk
= info
->disk
.raid_disk
;
307 sb
->disks
[info
->disk
.number
].state
= info
->disk
.state
;
308 if (sb
->this_disk
.number
== info
->disk
.number
)
309 sb
->this_disk
= sb
->disks
[info
->disk
.number
];
311 if (strcmp(update
, "resync") == 0) {
312 /* make sure resync happens */
313 sb
->state
&= ~(1<<MD_SB_CLEAN
);
317 sb
->sb_csum
= calc_sb0_csum(sb
);
321 static __u64
event_super0(void *sbv
)
323 mdp_super_t
*sb
= sbv
;
329 static int init_super0(struct supertype
*st
, void **sbp
, mdu_array_info_t
*info
)
331 mdp_super_t
*sb
= malloc(MD_SB_BYTES
+ sizeof(bitmap_super_t
));
334 memset(sb
, 0, MD_SB_BYTES
+ sizeof(bitmap_super_t
));
336 if (info
->major_version
== -1) {
337 /* zeroing the superblock */
341 spares
= info
->working_disks
- info
->active_disks
;
342 if (info
->raid_disks
+ spares
> MD_SB_DISKS
) {
343 fprintf(stderr
, Name
": too many devices requested: %d+%d > %d\n",
344 info
->raid_disks
, spares
, MD_SB_DISKS
);
348 rfd
= open("/dev/urandom", O_RDONLY
);
349 sb
->md_magic
= MD_SB_MAGIC
;
350 sb
->major_version
= 0;
351 sb
->minor_version
= 90;
352 sb
->patch_version
= 0;
353 sb
->gvalid_words
= 0; /* ignored */
354 if (rfd
< 0 || read(rfd
, &sb
->set_uuid0
, 4) != 4)
355 sb
->set_uuid0
= random();
357 sb
->level
= info
->level
;
358 sb
->size
= info
->size
;
359 sb
->nr_disks
= info
->nr_disks
;
360 sb
->raid_disks
= info
->raid_disks
;
361 sb
->md_minor
= info
->md_minor
;
362 sb
->not_persistent
= 0;
363 if (rfd
< 0 || read(rfd
, &sb
->set_uuid1
, 12) != 12) {
364 sb
->set_uuid1
= random();
365 sb
->set_uuid2
= random();
366 sb
->set_uuid3
= random();
371 sb
->utime
= sb
->ctime
;
372 sb
->state
= info
->state
;
373 sb
->active_disks
= info
->active_disks
;
374 sb
->working_disks
= info
->working_disks
;
375 sb
->failed_disks
= info
->failed_disks
;
376 sb
->spare_disks
= info
->spare_disks
;
380 sb
->layout
= info
->layout
;
381 sb
->chunk_size
= info
->chunk_size
;
387 /* Add a device to the superblock being created */
388 static void add_to_super0(void *sbv
, mdu_disk_info_t
*dinfo
)
390 mdp_super_t
*sb
= sbv
;
391 mdp_disk_t
*dk
= &sb
->disks
[dinfo
->number
];
393 dk
->number
= dinfo
->number
;
394 dk
->major
= dinfo
->major
;
395 dk
->minor
= dinfo
->minor
;
396 dk
->raid_disk
= dinfo
->raid_disk
;
397 dk
->state
= dinfo
->state
;
400 static int store_super0(struct supertype
*st
, int fd
, void *sbv
)
403 unsigned long long dsize
;
404 unsigned long long offset
;
405 mdp_super_t
*super
= sbv
;
408 if (ioctl(fd
, BLKGETSIZE64
, &dsize
) != 0)
411 if (ioctl(fd
, BLKGETSIZE
, &size
))
414 dsize
= ((unsigned long long)size
)<<9;
417 if (dsize
< MD_RESERVED_SECTORS
*2)
420 offset
= MD_NEW_SIZE_SECTORS(dsize
>>9);
424 if (lseek64(fd
, offset
, 0)< 0LL)
427 if (write(fd
, super
, sizeof(*super
)) != sizeof(*super
))
434 static int write_init_super0(struct supertype
*st
, void *sbv
, mdu_disk_info_t
*dinfo
, char *devname
)
436 mdp_super_t
*sb
= sbv
;
437 int fd
= open(devname
, O_RDWR
, O_EXCL
);
441 fprintf(stderr
, Name
": Failed to open %s to write superblock\n", devname
);
445 sb
->disks
[dinfo
->number
].state
&= ~(1<<MD_DISK_FAULTY
);
446 sb
->disks
[dinfo
->number
].state
|= (1<<MD_DISK_SYNC
);
448 sb
->this_disk
= sb
->disks
[dinfo
->number
];
449 sb
->sb_csum
= calc_sb0_csum(sb
);
450 rv
= store_super0(st
, fd
, sb
);
452 if (sb
->state
& (1<<MD_SB_BITMAP_PRESENT
)) {
455 write(fd
, ((char*)sb
)+MD_SB_BYTES
, sizeof(bitmap_super_t
));
456 towrite
= 64*1024 - MD_SB_BYTES
- sizeof(bitmap_super_t
);
457 memset(buf
, 0xff, sizeof(buf
));
458 while (towrite
> 0) {
462 n
= write(fd
, buf
, n
);
474 fprintf(stderr
, Name
": failed to write superblock to %s\n", devname
);
478 static int compare_super0(void **firstp
, void *secondv
)
482 * 0 same, or first was empty, and second was copied
483 * 1 second had wrong number
487 mdp_super_t
*first
= *firstp
;
488 mdp_super_t
*second
= secondv
;
490 int uuid1
[4], uuid2
[4];
491 if (second
->md_magic
!= MD_SB_MAGIC
)
494 first
= malloc(MD_SB_BYTES
);
495 memcpy(first
, second
, MD_SB_BYTES
);
500 uuid_from_super0(uuid1
, first
);
501 uuid_from_super0(uuid2
, second
);
502 if (!same_uuid(uuid1
, uuid2
))
504 if (first
->major_version
!= second
->major_version
||
505 first
->minor_version
!= second
->minor_version
||
506 first
->patch_version
!= second
->patch_version
||
507 first
->gvalid_words
!= second
->gvalid_words
||
508 first
->ctime
!= second
->ctime
||
509 first
->level
!= second
->level
||
510 first
->size
!= second
->size
||
511 first
->raid_disks
!= second
->raid_disks
)
518 static int load_super0(struct supertype
*st
, int fd
, void **sbp
, char *devname
)
520 /* try to read in the superblock
523 * 1 on cannot get superblock
524 * 2 on superblock meaningless
527 unsigned long long dsize
;
528 unsigned long long offset
;
532 if (ioctl(fd
, BLKGETSIZE64
, &dsize
) != 0)
535 if (ioctl(fd
, BLKGETSIZE
, &size
)) {
537 fprintf(stderr
, Name
": cannot find device size for %s: %s\n",
538 devname
, strerror(errno
));
544 if (dsize
< MD_RESERVED_SECTORS
*2) {
546 fprintf(stderr
, Name
": %s is too small for md: size is %ld sectors.\n",
551 offset
= MD_NEW_SIZE_SECTORS(dsize
>>9);
555 ioctl(fd
, BLKFLSBUF
, 0); /* make sure we read current data */
557 if (lseek64(fd
, offset
, 0)< 0LL) {
559 fprintf(stderr
, Name
": Cannot seek to superblock on %s: %s\n",
560 devname
, strerror(errno
));
564 super
= malloc(MD_SB_BYTES
+ sizeof(bitmap_super_t
));
566 if (read(fd
, super
, sizeof(*super
)) != MD_SB_BYTES
) {
568 fprintf(stderr
, Name
": Cannot read superblock on %s\n",
574 if (super
->md_magic
!= MD_SB_MAGIC
) {
576 fprintf(stderr
, Name
": No super block found on %s (Expected magic %08x, got %08x)\n",
577 devname
, MD_SB_MAGIC
, super
->md_magic
);
582 if (super
->major_version
!= 0) {
584 fprintf(stderr
, Name
": Cannot interpret superblock on %s - version is %d\n",
585 devname
, super
->major_version
);
590 if (st
->ss
== NULL
) {
592 st
->minor_version
= 90;
593 st
->max_devs
= MD_SB_DISKS
;
599 static struct supertype
*match_metadata_desc0(char *arg
)
601 struct supertype
*st
= malloc(sizeof(*st
));
605 st
->minor_version
= 90;
606 st
->max_devs
= MD_SB_DISKS
;
607 if (strcmp(arg
, "0") == 0 ||
608 strcmp(arg
, "0.90") == 0 ||
609 strcmp(arg
, "default") == 0
617 static __u64
avail_size0(__u64 devsize
)
619 if (devsize
< MD_RESERVED_SECTORS
*2)
621 return MD_NEW_SIZE_SECTORS(devsize
);
624 static int add_internal_bitmap0(void *sbv
, int chunk
, int delay
, unsigned long long size
)
627 * The bitmap comes immediately after the superblock and must be 60K in size
628 * at most. The default size is between 30K and 60K
630 * size is in K, chunk is in bytes !!!
633 unsigned long long bits
= size
;
634 unsigned long long max_bits
= 60*1024*8;
635 unsigned long long min_chunk
;
636 mdp_super_t
*sb
= sbv
;
637 bitmap_super_t
*bms
= (bitmap_super_t
*)(((char*)sb
) + MD_SB_BYTES
);
641 while (bits
> max_bits
) {
647 else if (chunk
< min_chunk
)
648 return 0; /* chunk size too small */
650 sb
->state
|= (1<<MD_SB_BITMAP_PRESENT
);
652 memset(bms
, sizeof(*bms
), 0);
653 bms
->magic
= __le32_to_cpu(BITMAP_MAGIC
);
654 bms
->version
= __le32_to_cpu(BITMAP_MAJOR
);
655 uuid_from_super0((int*)bms
->uuid
, sb
);
656 bms
->chunksize
= __le32_to_cpu(chunk
);
657 bms
->daemon_sleep
= __le32_to_cpu(delay
);
658 bms
->sync_size
= __le64_to_cpu(size
);
666 void locate_bitmap0(struct supertype
*st
, int fd
)
668 unsigned long long dsize
;
670 unsigned long long offset
;
672 if (ioctl(fd
, BLKGETSIZE64
, &dsize
) != 0)
675 if (ioctl(fd
, BLKGETSIZE
, &size
))
678 dsize
= ((unsigned long long)size
)<<9;
681 if (dsize
< MD_RESERVED_SECTORS
*2)
684 offset
= MD_NEW_SIZE_SECTORS(dsize
>>9);
688 offset
+= MD_SB_BYTES
;
690 lseek64(fd
, offset
, 0);
693 int write_bitmap0(struct supertype
*st
, int fd
, void *sbv
)
696 unsigned long long dsize
;
697 unsigned long long offset
;
698 mdp_super_t
*sb
= sbv
;
706 if (ioctl(fd
, BLKGETSIZE64
, &dsize
) != 0)
709 if (ioctl(fd
, BLKGETSIZE
, &size
))
712 dsize
= ((unsigned long long)size
)<<9;
715 if (dsize
< MD_RESERVED_SECTORS
*2)
718 offset
= MD_NEW_SIZE_SECTORS(dsize
>>9);
722 if (lseek64(fd
, offset
+ 4096, 0)< 0LL)
726 write(fd
, ((char*)sb
)+MD_SB_BYTES
, sizeof(bitmap_super_t
));
727 towrite
= 64*1024 - MD_SB_BYTES
- sizeof(bitmap_super_t
);
728 memset(buf
, 0xff, sizeof(buf
));
729 while (towrite
> 0) {
733 n
= write(fd
, buf
, n
);
746 struct superswitch super0
= {
748 .examine_super
= examine_super0
,
749 .brief_examine_super
= brief_examine_super0
,
750 .detail_super
= detail_super0
,
751 .brief_detail_super
= brief_detail_super0
,
753 .uuid_from_super
= uuid_from_super0
,
754 .getinfo_super
= getinfo_super0
,
755 .update_super
= update_super0
,
756 .event_super
= event_super0
,
757 .init_super
= init_super0
,
758 .add_to_super
= add_to_super0
,
759 .store_super
= store_super0
,
760 .write_init_super
= write_init_super0
,
761 .compare_super
= compare_super0
,
762 .load_super
= load_super0
,
763 .match_metadata_desc
= match_metadata_desc0
,
764 .avail_size
= avail_size0
,
765 .add_internal_bitmap
= add_internal_bitmap0
,
766 .locate_bitmap
= locate_bitmap0
,
767 .write_bitmap
= write_bitmap0
,