2 * mdadm - manage Linux "md" devices aka RAID arrays.
4 * Copyright (C) 2001-2004 Neil Brown <neilb@cse.unsw.edu.au>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * Email: <neilb@cse.unsw.edu.au>
24 * School of Computer Science and Engineering
25 * The University of New South Wales
31 #include <asm/byteorder.h>
34 * All handling for the 0.90.0 version superblock is in
37 * - finding, loading, and writing the superblock.
38 * - initialising a new superblock
39 * - printing the superblock for --examine
40 * - printing part of the superblock for --detail
45 static unsigned long calc_sb0_csum(mdp_super_t
*super
)
47 unsigned long csum
= super
->sb_csum
;
48 unsigned long newcsum
;
50 newcsum
= calc_csum(super
, MD_SB_BYTES
);
51 super
->sb_csum
= csum
;
55 static void examine_super0(void *sbv
)
57 mdp_super_t
*sb
= sbv
;
62 printf(" Magic : %08x\n", sb
->md_magic
);
63 printf(" Version : %02d.%02d.%02d\n", sb
->major_version
, sb
->minor_version
,
65 if (sb
->minor_version
>= 90)
66 printf(" UUID : %08x:%08x:%08x:%08x\n", sb
->set_uuid0
, sb
->set_uuid1
,
67 sb
->set_uuid2
, sb
->set_uuid3
);
69 printf(" UUID : %08x\n", sb
->set_uuid0
);
72 printf(" Creation Time : %.24s\n", ctime(&atime
));
73 c
=map_num(pers
, sb
->level
);
74 printf(" Raid Level : %s\n", c
?c
:"-unknown-");
75 if ((int)sb
->level
>= 0)
76 printf(" Device Size : %d%s\n", sb
->size
, human_size((long long)sb
->size
<<10));
77 printf(" Raid Devices : %d\n", sb
->raid_disks
);
78 printf(" Total Devices : %d\n", sb
->nr_disks
);
79 printf("Preferred Minor : %d\n", sb
->md_minor
);
82 printf(" Update Time : %.24s\n", ctime(&atime
));
83 printf(" State : %s\n",
84 (sb
->state
&(1<<MD_SB_CLEAN
))?"clean":"active");
85 if (sb
->state
& (1<<MD_SB_BITMAP_PRESENT
))
86 printf("Internal Bitmap : present\n");
87 printf(" Active Devices : %d\n", sb
->active_disks
);
88 printf("Working Devices : %d\n", sb
->working_disks
);
89 printf(" Failed Devices : %d\n", sb
->failed_disks
);
90 printf(" Spare Devices : %d\n", sb
->spare_disks
);
91 if (calc_sb0_csum(sb
) == sb
->sb_csum
)
92 printf(" Checksum : %x - correct\n", sb
->sb_csum
);
94 printf(" Checksum : %x - expected %lx\n", sb
->sb_csum
, calc_sb0_csum(sb
));
95 printf(" Events : %d.%d\n", sb
->events_hi
, sb
->events_lo
);
98 c
= map_num(r5layout
, sb
->layout
);
99 printf(" Layout : %s\n", c
?c
:"-unknown-");
105 printf(" Chunk Size : %dK\n", sb
->chunk_size
/1024);
108 printf(" Rounding : %dK\n", sb
->chunk_size
/1024);
113 printf(" Number Major Minor RaidDevice State\n");
114 for (d
= -1; d
<(signed int)(sb
->raid_disks
+sb
->spare_disks
); d
++) {
118 if (d
>=0) dp
= &sb
->disks
[d
];
119 else dp
= &sb
->this_disk
;
120 sprintf(nb
, "%4d", d
);
121 printf("%4s %5d %5d %5d %5d ", d
< 0 ? "this" : nb
,
122 dp
->number
, dp
->major
, dp
->minor
, dp
->raid_disk
);
123 if (dp
->state
& (1<<MD_DISK_FAULTY
)) printf(" faulty");
124 if (dp
->state
& (1<<MD_DISK_ACTIVE
)) printf(" active");
125 if (dp
->state
& (1<<MD_DISK_SYNC
)) printf(" sync");
126 if (dp
->state
& (1<<MD_DISK_REMOVED
)) printf(" removed");
127 if (dp
->state
== 0) printf(" spare");
128 if ((dv
=map_dev(dp
->major
, dp
->minor
)))
131 if (d
== -1) printf("\n");
135 static void brief_examine_super0(void *sbv
)
137 mdp_super_t
*sb
= sbv
;
138 char *c
=map_num(pers
, sb
->level
);
140 printf("ARRAY %s level=%s num-devices=%d UUID=",
141 get_md_name(sb
->md_minor
),
142 c
?c
:"-unknown-", sb
->raid_disks
);
143 if (sb
->minor_version
>= 90)
144 printf("%08x:%08x:%08x:%08x", sb
->set_uuid0
, sb
->set_uuid1
,
145 sb
->set_uuid2
, sb
->set_uuid3
);
147 printf("%08x", sb
->set_uuid0
);
151 static void detail_super0(void *sbv
)
153 mdp_super_t
*sb
= sbv
;
155 if (sb
->minor_version
>= 90)
156 printf("%08x:%08x:%08x:%08x", sb
->set_uuid0
, sb
->set_uuid1
,
157 sb
->set_uuid2
, sb
->set_uuid3
);
159 printf("%08x", sb
->set_uuid0
);
160 printf("\n Events : %d.%d\n\n", sb
->events_hi
, sb
->events_lo
);
163 static void brief_detail_super0(void *sbv
)
165 mdp_super_t
*sb
= sbv
;
167 if (sb
->minor_version
>= 90)
168 printf("%08x:%08x:%08x:%08x", sb
->set_uuid0
, sb
->set_uuid1
,
169 sb
->set_uuid2
, sb
->set_uuid3
);
171 printf("%08x", sb
->set_uuid0
);
174 static void uuid_from_super0(int uuid
[4], void * sbv
)
176 mdp_super_t
*super
= sbv
;
177 uuid
[0] = super
->set_uuid0
;
178 if (super
->minor_version
>= 90) {
179 uuid
[1] = super
->set_uuid1
;
180 uuid
[2] = super
->set_uuid2
;
181 uuid
[3] = super
->set_uuid3
;
189 static void getinfo_super0(struct mdinfo
*info
, void *sbv
)
191 mdp_super_t
*sb
= sbv
;
195 info
->array
.major_version
= sb
->major_version
;
196 info
->array
.minor_version
= sb
->minor_version
;
197 info
->array
.patch_version
= sb
->patch_version
;
198 info
->array
.raid_disks
= sb
->raid_disks
;
199 info
->array
.level
= sb
->level
;
200 info
->array
.md_minor
= sb
->md_minor
;
201 info
->array
.ctime
= sb
->ctime
;
203 info
->disk
.state
= sb
->this_disk
.state
;
204 info
->disk
.major
= sb
->this_disk
.major
;
205 info
->disk
.minor
= sb
->this_disk
.minor
;
206 info
->disk
.raid_disk
= sb
->this_disk
.raid_disk
;
208 info
->events
= md_event(sb
);
210 uuid_from_super0(info
->uuid
, sbv
);
212 /* work_disks is calculated rather than read directly */
213 for (i
=0; i
< MD_SB_DISKS
; i
++)
214 if ((sb
->disks
[i
].state
& (1<<MD_DISK_SYNC
)) &&
215 (sb
->disks
[i
].state
& (1<<MD_DISK_ACTIVE
)) &&
216 !(sb
->disks
[i
].state
& (1<<MD_DISK_FAULTY
)))
218 info
->array
.working_disks
= working
;
222 static int update_super0(struct mdinfo
*info
, void *sbv
, char *update
, char *devname
, int verbose
)
225 mdp_super_t
*sb
= sbv
;
226 if (strcmp(update
, "sparc2.2")==0 ) {
227 /* 2.2 sparc put the events in the wrong place
228 * So we copy the tail of the superblock
229 * up 4 bytes before continuing
231 __u32
*sb32
= (__u32
*)sb
;
232 memcpy(sb32
+MD_SB_GENERIC_CONSTANT_WORDS
+7,
233 sb32
+MD_SB_GENERIC_CONSTANT_WORDS
+7+1,
234 (MD_SB_WORDS
- (MD_SB_GENERIC_CONSTANT_WORDS
+7+1))*4);
235 fprintf (stderr
, Name
": adjusting superblock of %s for 2.2/sparc compatability.\n",
238 if (strcmp(update
, "super-minor") ==0) {
239 sb
->md_minor
= info
->array
.md_minor
;
241 fprintf(stderr
, Name
": updating superblock of %s with minor number %d\n",
242 devname
, info
->array
.md_minor
);
244 if (strcmp(update
, "summaries") == 0) {
246 /* set nr_disks, active_disks, working_disks,
247 * failed_disks, spare_disks based on disks[]
248 * array in superblock.
249 * Also make sure extra slots aren't 'failed'
251 sb
->nr_disks
= sb
->active_disks
=
252 sb
->working_disks
= sb
->failed_disks
=
254 for (i
=0; i
< MD_SB_DISKS
; i
++)
255 if (sb
->disks
[i
].major
||
256 sb
->disks
[i
].minor
) {
257 int state
= sb
->disks
[i
].state
;
258 if (state
& (1<<MD_DISK_REMOVED
))
261 if (state
& (1<<MD_DISK_ACTIVE
))
263 if (state
& (1<<MD_DISK_FAULTY
))
269 } else if (i
>= sb
->raid_disks
&& sb
->disks
[i
].number
== 0)
270 sb
->disks
[i
].state
= 0;
272 if (strcmp(update
, "force")==0) {
273 sb
->events_hi
= (info
->events
>>32) & 0xFFFFFFFF;
274 sb
->events_lo
= (info
->events
) & 0xFFFFFFFF;
275 if (sb
->level
== 5 || sb
->level
== 4 || sb
->level
== 6)
276 /* need to force clean */
277 sb
->state
|= (1 << MD_SB_CLEAN
);
279 if (strcmp(update
, "assemble")==0) {
280 int d
= info
->disk
.number
;
281 if (sb
->disks
[d
].state
!= info
->disk
.state
) {
282 sb
->disks
[d
].state
= info
->disk
.state
;
286 if (strcmp(update
, "newdev") == 0) {
287 int d
= info
->disk
.number
;
288 memset(&sb
->disks
[d
], 0, sizeof(sb
->disks
[d
]));
289 sb
->disks
[d
].number
= d
;
290 sb
->disks
[d
].major
= info
->disk
.major
;
291 sb
->disks
[d
].minor
= info
->disk
.minor
;
292 sb
->disks
[d
].raid_disk
= info
->disk
.raid_disk
;
293 sb
->disks
[d
].state
= info
->disk
.state
;
294 sb
->this_disk
= sb
->disks
[d
];
296 if (strcmp(update
, "grow") == 0) {
297 sb
->raid_disks
= info
->array
.raid_disks
;
298 sb
->nr_disks
= info
->array
.nr_disks
;
299 sb
->active_disks
= info
->array
.active_disks
;
300 sb
->working_disks
= info
->array
.working_disks
;
301 memset(&sb
->disks
[info
->disk
.number
], 0, sizeof(sb
->disks
[0]));
302 sb
->disks
[info
->disk
.number
].number
= info
->disk
.number
;
303 sb
->disks
[info
->disk
.number
].major
= info
->disk
.major
;
304 sb
->disks
[info
->disk
.number
].minor
= info
->disk
.minor
;
305 sb
->disks
[info
->disk
.number
].raid_disk
= info
->disk
.raid_disk
;
306 sb
->disks
[info
->disk
.number
].state
= info
->disk
.state
;
307 if (sb
->this_disk
.number
== info
->disk
.number
)
308 sb
->this_disk
= sb
->disks
[info
->disk
.number
];
310 if (strcmp(update
, "resync") == 0) {
311 /* make sure resync happens */
312 sb
->state
&= ~(1<<MD_SB_CLEAN
);
316 sb
->sb_csum
= calc_sb0_csum(sb
);
320 static __u64
event_super0(void *sbv
)
322 mdp_super_t
*sb
= sbv
;
328 static int init_super0(void **sbp
, mdu_array_info_t
*info
)
330 mdp_super_t
*sb
= malloc(MD_SB_BYTES
+ sizeof(bitmap_super_t
));
333 memset(sb
, 0, MD_SB_BYTES
+ sizeof(bitmap_super_t
));
335 if (info
->major_version
== -1) {
336 /* zeroing the superblock */
340 spares
= info
->working_disks
- info
->active_disks
;
341 if (info
->raid_disks
+ spares
> MD_SB_DISKS
) {
342 fprintf(stderr
, Name
": too many devices requested: %d+%d > %d\n",
343 info
->raid_disks
, spares
, MD_SB_DISKS
);
347 rfd
= open("/dev/urandom", O_RDONLY
);
348 sb
->md_magic
= MD_SB_MAGIC
;
349 sb
->major_version
= 0;
350 sb
->minor_version
= 90;
351 sb
->patch_version
= 0;
352 sb
->gvalid_words
= 0; /* ignored */
353 if (rfd
< 0 || read(rfd
, &sb
->set_uuid0
, 4) != 4)
354 sb
->set_uuid0
= random();
356 sb
->level
= info
->level
;
357 sb
->size
= info
->size
;
358 sb
->nr_disks
= info
->nr_disks
;
359 sb
->raid_disks
= info
->raid_disks
;
360 sb
->md_minor
= info
->md_minor
;
361 sb
->not_persistent
= 0;
362 if (rfd
< 0 || read(rfd
, &sb
->set_uuid1
, 12) != 12) {
363 sb
->set_uuid1
= random();
364 sb
->set_uuid2
= random();
365 sb
->set_uuid3
= random();
370 sb
->utime
= sb
->ctime
;
371 sb
->state
= info
->state
;
372 sb
->active_disks
= info
->active_disks
;
373 sb
->working_disks
= info
->working_disks
;
374 sb
->failed_disks
= info
->failed_disks
;
375 sb
->spare_disks
= info
->spare_disks
;
379 sb
->layout
= info
->layout
;
380 sb
->chunk_size
= info
->chunk_size
;
386 /* Add a device to the superblock being created */
387 static void add_to_super0(void *sbv
, mdu_disk_info_t
*dinfo
)
389 mdp_super_t
*sb
= sbv
;
390 mdp_disk_t
*dk
= &sb
->disks
[dinfo
->number
];
392 dk
->number
= dinfo
->number
;
393 dk
->major
= dinfo
->major
;
394 dk
->minor
= dinfo
->minor
;
395 dk
->raid_disk
= dinfo
->raid_disk
;
396 dk
->state
= dinfo
->state
;
399 static int store_super0(int fd
, void *sbv
)
402 unsigned long long dsize
;
403 unsigned long long offset
;
404 mdp_super_t
*super
= sbv
;
407 if (ioctl(fd
, BLKGETSIZE64
, &dsize
) != 0)
410 if (ioctl(fd
, BLKGETSIZE
, &size
))
413 dsize
= ((unsigned long long)size
)<<9;
416 if (dsize
< MD_RESERVED_SECTORS
*2)
419 offset
= MD_NEW_SIZE_SECTORS(dsize
>>9);
423 if (lseek64(fd
, offset
, 0)< 0LL)
426 if (write(fd
, super
, sizeof(*super
)) != sizeof(*super
))
433 static int write_init_super0(struct supertype
*st
, void *sbv
, mdu_disk_info_t
*dinfo
, char *devname
)
435 mdp_super_t
*sb
= sbv
;
436 int fd
= open(devname
, O_RDWR
, O_EXCL
);
440 fprintf(stderr
, Name
": Failed to open %s to write superblock\n", devname
);
444 sb
->disks
[dinfo
->number
].state
&= ~(1<<MD_DISK_FAULTY
);
445 sb
->disks
[dinfo
->number
].state
|= (1<<MD_DISK_SYNC
);
447 sb
->this_disk
= sb
->disks
[dinfo
->number
];
448 sb
->sb_csum
= calc_sb0_csum(sb
);
449 rv
= store_super0(fd
, sb
);
451 if (sb
->state
& (1<<MD_SB_BITMAP_PRESENT
)) {
454 write(fd
, ((char*)sb
)+MD_SB_BYTES
, sizeof(bitmap_super_t
));
455 towrite
= 64*1024 - MD_SB_BYTES
- sizeof(bitmap_super_t
);
456 memset(buf
, 0xff, sizeof(buf
));
457 while (towrite
> 0) {
461 n
= write(fd
, buf
, n
);
473 fprintf(stderr
, Name
": failed to write superblock to %s\n", devname
);
477 static int compare_super0(void **firstp
, void *secondv
)
481 * 0 same, or first was empty, and second was copied
482 * 1 second had wrong number
486 mdp_super_t
*first
= *firstp
;
487 mdp_super_t
*second
= secondv
;
489 int uuid1
[4], uuid2
[4];
490 if (second
->md_magic
!= MD_SB_MAGIC
)
493 first
= malloc(MD_SB_BYTES
);
494 memcpy(first
, second
, MD_SB_BYTES
);
499 uuid_from_super0(uuid1
, first
);
500 uuid_from_super0(uuid2
, second
);
501 if (!same_uuid(uuid1
, uuid2
))
503 if (first
->major_version
!= second
->major_version
||
504 first
->minor_version
!= second
->minor_version
||
505 first
->patch_version
!= second
->patch_version
||
506 first
->gvalid_words
!= second
->gvalid_words
||
507 first
->ctime
!= second
->ctime
||
508 first
->level
!= second
->level
||
509 first
->size
!= second
->size
||
510 first
->raid_disks
!= second
->raid_disks
)
517 static int load_super0(struct supertype
*st
, int fd
, void **sbp
, char *devname
)
519 /* try to read in the superblock
522 * 1 on cannot get superblock
523 * 2 on superblock meaningless
526 unsigned long long dsize
;
527 unsigned long long offset
;
531 if (ioctl(fd
, BLKGETSIZE64
, &dsize
) != 0)
534 if (ioctl(fd
, BLKGETSIZE
, &size
)) {
536 fprintf(stderr
, Name
": cannot find device size for %s: %s\n",
537 devname
, strerror(errno
));
543 if (dsize
< MD_RESERVED_SECTORS
*2) {
545 fprintf(stderr
, Name
": %s is too small for md: size is %ld sectors.\n",
550 offset
= MD_NEW_SIZE_SECTORS(dsize
>>9);
554 ioctl(fd
, BLKFLSBUF
, 0); /* make sure we read current data */
556 if (lseek64(fd
, offset
, 0)< 0LL) {
558 fprintf(stderr
, Name
": Cannot seek to superblock on %s: %s\n",
559 devname
, strerror(errno
));
563 super
= malloc(MD_SB_BYTES
+ sizeof(bitmap_super_t
));
565 if (read(fd
, super
, sizeof(*super
)) != MD_SB_BYTES
) {
567 fprintf(stderr
, Name
": Cannot read superblock on %s\n",
573 if (super
->md_magic
!= MD_SB_MAGIC
) {
575 fprintf(stderr
, Name
": No super block found on %s (Expected magic %08x, got %08x)\n",
576 devname
, MD_SB_MAGIC
, super
->md_magic
);
581 if (super
->major_version
!= 0) {
583 fprintf(stderr
, Name
": Cannot interpret superblock on %s - version is %d\n",
584 devname
, super
->major_version
);
589 if (st
->ss
== NULL
) {
591 st
->minor_version
= 90;
597 static struct supertype
*match_metadata_desc0(char *arg
)
599 struct supertype
*st
= malloc(sizeof(*st
));
603 st
->minor_version
= 90;
604 if (strcmp(arg
, "0") == 0 ||
605 strcmp(arg
, "0.90") == 0 ||
606 strcmp(arg
, "default") == 0
614 static __u64
avail_size0(__u64 devsize
)
616 if (devsize
< MD_RESERVED_SECTORS
*2)
618 return MD_NEW_SIZE_SECTORS(devsize
);
621 static int add_internal_bitmap0(void *sbv
, int chunk
, int delay
, unsigned long long size
)
624 * The bitmap comes immediately after the superblock and must be 60K in size
625 * at most. The default size is between 30K and 60K
627 * size is in K, chunk is in bytes !!!
630 unsigned long long bits
= size
;
631 unsigned long long max_bits
= 60*1024*8;
632 unsigned long long min_chunk
;
633 mdp_super_t
*sb
= sbv
;
634 bitmap_super_t
*bms
= (bitmap_super_t
*)(((char*)sb
) + MD_SB_BYTES
);
638 while (bits
> max_bits
) {
644 else if (chunk
< min_chunk
)
645 return 0; /* chunk size too small */
647 sb
->state
|= (1<<MD_SB_BITMAP_PRESENT
);
649 memset(bms
, sizeof(*bms
), 0);
650 bms
->magic
= __le32_to_cpu(BITMAP_MAGIC
);
651 bms
->version
= __le32_to_cpu(BITMAP_MAJOR
);
652 uuid_from_super0((int*)bms
->uuid
, sb
);
653 bms
->chunksize
= __le32_to_cpu(chunk
);
654 bms
->daemon_sleep
= __le32_to_cpu(delay
);
655 bms
->sync_size
= __le64_to_cpu(size
);
663 void locate_bitmap0(struct supertype
*st
, int fd
)
665 unsigned long long dsize
;
667 unsigned long long offset
;
669 if (ioctl(fd
, BLKGETSIZE64
, &dsize
) != 0)
672 if (ioctl(fd
, BLKGETSIZE
, &size
))
675 dsize
= ((unsigned long long)size
)<<9;
678 if (dsize
< MD_RESERVED_SECTORS
*2)
681 offset
= MD_NEW_SIZE_SECTORS(dsize
>>9);
685 offset
+= MD_SB_BYTES
;
687 lseek64(fd
, offset
, 0);
690 int write_bitmap0(struct supertype
*st
, int fd
, void *sbv
)
693 unsigned long long dsize
;
694 unsigned long long offset
;
695 mdp_super_t
*sb
= sbv
;
703 if (ioctl(fd
, BLKGETSIZE64
, &dsize
) != 0)
706 if (ioctl(fd
, BLKGETSIZE
, &size
))
709 dsize
= ((unsigned long long)size
)<<9;
712 if (dsize
< MD_RESERVED_SECTORS
*2)
715 offset
= MD_NEW_SIZE_SECTORS(dsize
>>9);
719 if (lseek64(fd
, offset
+ 4096, 0)< 0LL)
723 write(fd
, ((char*)sb
)+MD_SB_BYTES
, sizeof(bitmap_super_t
));
724 towrite
= 64*1024 - MD_SB_BYTES
- sizeof(bitmap_super_t
);
725 memset(buf
, 0xff, sizeof(buf
));
726 while (towrite
> 0) {
730 n
= write(fd
, buf
, n
);
743 struct superswitch super0
= {
744 .examine_super
= examine_super0
,
745 .brief_examine_super
= brief_examine_super0
,
746 .detail_super
= detail_super0
,
747 .brief_detail_super
= brief_detail_super0
,
748 .uuid_from_super
= uuid_from_super0
,
749 .getinfo_super
= getinfo_super0
,
750 .update_super
= update_super0
,
751 .event_super
= event_super0
,
752 .init_super
= init_super0
,
753 .add_to_super
= add_to_super0
,
754 .store_super
= store_super0
,
755 .write_init_super
= write_init_super0
,
756 .compare_super
= compare_super0
,
757 .load_super
= load_super0
,
758 .match_metadata_desc
= match_metadata_desc0
,
759 .avail_size
= avail_size0
,
760 .add_internal_bitmap
= add_internal_bitmap0
,
761 .locate_bitmap
= locate_bitmap0
,
762 .write_bitmap
= write_bitmap0
,