]> git.ipfire.org Git - thirdparty/mdadm.git/blame - util.c
Fix open_container
[thirdparty/mdadm.git] / util.c
CommitLineData
64c4757e 1/*
9a9dab36 2 * mdadm - manage Linux "md" devices aka RAID arrays.
64c4757e 3 *
e736b623 4 * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
64c4757e
NB
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
e736b623 22 * Email: <neilb@suse.de>
64c4757e
NB
23 */
24
9a9dab36 25#include "mdadm.h"
64c4757e 26#include "md_p.h"
edd8d13c 27#include <sys/socket.h>
64c4757e 28#include <sys/utsname.h>
9fe32043 29#include <sys/wait.h>
edd8d13c 30#include <sys/un.h>
98c6faba 31#include <ctype.h>
a322f70c 32#include <dirent.h>
a931db9e 33#include <signal.h>
0a816ef9 34
a0963a86
JS
35int __offroot;
36
0a816ef9
NB
37/*
38 * following taken from linux/blkpg.h because they aren't
39 * anywhere else and it isn't safe to #include linux/ * stuff.
40 */
41
42#define BLKPG _IO(0x12,105)
43
44/* The argument structure */
45struct blkpg_ioctl_arg {
46 int op;
47 int flags;
48 int datalen;
49 void *data;
50};
51
52/* The subfunctions (for the op field) */
53#define BLKPG_ADD_PARTITION 1
54#define BLKPG_DEL_PARTITION 2
55
56/* Sizes of name fields. Unused at present. */
57#define BLKPG_DEVNAMELTH 64
58#define BLKPG_VOLNAMELTH 64
59
60/* The data structure for ADD_PARTITION and DEL_PARTITION */
61struct blkpg_partition {
62 long long start; /* starting offset in bytes */
63 long long length; /* length in bytes */
64 int pno; /* partition number */
65 char devname[BLKPG_DEVNAMELTH]; /* partition name, like sda5 or c0d1p2,
66 to be used in kernel messages */
67 char volname[BLKPG_VOLNAMELTH]; /* volume label */
68};
64c4757e 69
0f22b998 70#include "part.h"
056b331e
N
71
72/* Force a compilation error if condition is true */
73#define BUILD_BUG_ON(condition) ((void)BUILD_BUG_ON_ZERO(condition))
74
75/* Force a compilation error if condition is true, but also produce a
76 result (of value 0 and type size_t), so the expression can be used
77 e.g. in a structure initializer (or where-ever else comma expressions
78 aren't permitted). */
79#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
80
64c4757e
NB
81/*
82 * Parse a 128 bit uuid in 4 integers
83 * format is 32 hexx nibbles with options :.<space> separator
84 * If not exactly 32 hex digits are found, return 0
85 * else return 1
86 */
87int parse_uuid(char *str, int uuid[4])
88{
aba69144
NB
89 int hit = 0; /* number of Hex digIT */
90 int i;
91 char c;
92 for (i=0; i<4; i++) uuid[i]=0;
93
94 while ((c= *str++)) {
95 int n;
96 if (c>='0' && c<='9')
97 n = c-'0';
98 else if (c>='a' && c <= 'f')
99 n = 10 + c - 'a';
100 else if (c>='A' && c <= 'F')
101 n = 10 + c - 'A';
102 else if (strchr(":. -", c))
103 continue;
104 else return 0;
105
106 if (hit<32) {
107 uuid[hit/8] <<= 4;
108 uuid[hit/8] += n;
109 }
110 hit++;
82b27616 111 }
aba69144
NB
112 if (hit == 32)
113 return 1;
114 return 0;
64c4757e
NB
115}
116
117
118/*
119 * Get the md version number.
120 * We use the RAID_VERSION ioctl if it is supported
121 * If not, but we have a block device with major '9', we assume
122 * 0.36.0
123 *
124 * Return version number as 24 but number - assume version parts
125 * always < 255
126 */
127
128int md_get_version(int fd)
129{
130 struct stat stb;
131 mdu_version_t vers;
132
133 if (fstat(fd, &stb)<0)
134 return -1;
135 if ((S_IFMT&stb.st_mode) != S_IFBLK)
136 return -1;
137
138 if (ioctl(fd, RAID_VERSION, &vers) == 0)
682c7051 139 return (vers.major*10000) + (vers.minor*100) + vers.patchlevel;
5787fa49
NB
140 if (errno == EACCES)
141 return -1;
0df46c2a 142 if (major(stb.st_rdev) == MD_MAJOR)
682c7051 143 return (3600);
64c4757e
NB
144 return -1;
145}
146
64c4757e
NB
147int get_linux_version()
148{
149 struct utsname name;
98c6faba 150 char *cp;
f161d047 151 int a = 0, b = 0,c = 0;
64c4757e
NB
152 if (uname(&name) <0)
153 return -1;
154
98c6faba
NB
155 cp = name.release;
156 a = strtoul(cp, &cp, 10);
f161d047
N
157 if (*cp == '.')
158 b = strtoul(cp+1, &cp, 10);
159 if (*cp == '.')
160 c = strtoul(cp+1, &cp, 10);
98c6faba 161
682c7051 162 return (a*1000000)+(b*1000)+c;
64c4757e
NB
163}
164
71204a50 165#ifndef MDASSEMBLE
bc77ed53
DW
166int mdadm_version(char *version)
167{
168 int a, b, c;
169 char *cp;
170
171 if (!version)
172 version = Version;
173
174 cp = strchr(version, '-');
175 if (!cp || *(cp+1) != ' ' || *(cp+2) != 'v')
176 return -1;
177 cp += 3;
178 a = strtoul(cp, &cp, 10);
179 if (*cp != '.')
180 return -1;
181 b = strtoul(cp+1, &cp, 10);
182 if (*cp == '.')
183 c = strtoul(cp+1, &cp, 10);
184 else
185 c = 0;
186 if (*cp != ' ' && *cp != '-')
187 return -1;
188 return (a*1000000)+(b*1000)+c;
189}
190
84e11361
N
191long long parse_size(char *size)
192{
193 /* parse 'size' which should be a number optionally
194 * followed by 'K', 'M', or 'G'.
195 * Without a suffix, K is assumed.
196 * Number returned is in sectors (half-K)
15632a96 197 * -1 returned on error.
84e11361
N
198 */
199 char *c;
200 long long s = strtoll(size, &c, 10);
201 if (s > 0) {
202 switch (*c) {
203 case 'K':
204 c++;
205 default:
206 s *= 2;
207 break;
208 case 'M':
209 c++;
210 s *= 1024 * 2;
211 break;
212 case 'G':
213 c++;
214 s *= 1024 * 1024 * 2;
215 break;
216 }
217 }
218 if (*c)
15632a96 219 s = -1;
84e11361
N
220 return s;
221}
222
4a06e2c2
N
223int parse_layout_10(char *layout)
224{
225 int copies, rv;
226 char *cp;
227 /* Parse the layout string for raid10 */
228 /* 'f', 'o' or 'n' followed by a number <= raid_disks */
229 if ((layout[0] != 'n' && layout[0] != 'f' && layout[0] != 'o') ||
230 (copies = strtoul(layout+1, &cp, 10)) < 1 ||
231 copies > 200 ||
232 *cp)
233 return -1;
234 if (layout[0] == 'n')
235 rv = 256 + copies;
236 else if (layout[0] == 'o')
237 rv = 0x10000 + (copies<<8) + 1;
238 else
239 rv = 1 + (copies<<8);
240 return rv;
241}
242
243int parse_layout_faulty(char *layout)
244{
245 /* Parse the layout string for 'faulty' */
246 int ln = strcspn(layout, "0123456789");
247 char *m = strdup(layout);
248 int mode;
249 m[ln] = 0;
250 mode = map_name(faultylayout, m);
251 if (mode == UnSet)
252 return -1;
253
254 return mode | (atoi(layout+ln)<< ModeShift);
255}
eb3929a4 256#endif
4a06e2c2 257
0430ed48
NB
258void remove_partitions(int fd)
259{
260 /* remove partitions from this block devices.
261 * This is used for components added to an array
262 */
263#ifdef BLKPG_DEL_PARTITION
264 struct blkpg_ioctl_arg a;
265 struct blkpg_partition p;
266
267 a.op = BLKPG_DEL_PARTITION;
268 a.data = (void*)&p;
269 a.datalen = sizeof(p);
270 a.flags = 0;
271 memset(a.data, 0, a.datalen);
272 for (p.pno=0; p.pno < 16; p.pno++)
273 ioctl(fd, BLKPG, &a);
274#endif
275}
276
691c6ee1
N
277int test_partition(int fd)
278{
279 /* Check if fd is a whole-disk or a partition.
280 * BLKPG will return EINVAL on a partition, and BLKPG_DEL_PARTITION
281 * will return ENXIO on an invalid partition number.
282 */
283 struct blkpg_ioctl_arg a;
284 struct blkpg_partition p;
285 a.op = BLKPG_DEL_PARTITION;
286 a.data = (void*)&p;
287 a.datalen = sizeof(p);
288 a.flags = 0;
289 memset(a.data, 0, a.datalen);
290 p.pno = 1<<30;
291 if (ioctl(fd, BLKPG, &a) == 0)
292 /* Very unlikely, but not a partition */
293 return 0;
294 if (errno == ENXIO)
295 /* not a partition */
296 return 0;
297
298 return 1;
299}
300
bfd76b93
CA
301int test_partition_from_id(dev_t id)
302{
303 char buf[20];
304 int fd, rv;
305
306 sprintf(buf, "%d:%d", major(id), minor(id));
307 fd = dev_open(buf, O_RDONLY);
308 if (fd < 0)
309 return -1;
310 rv = test_partition(fd);
311 close(fd);
312 return rv;
313}
691c6ee1 314
de5a472e 315int enough(int level, int raid_disks, int layout, int clean, char *avail)
64c4757e 316{
265e0f17 317 int copies, first;
de5a472e
N
318 int i;
319 int avail_disks = 0;
320
321 for (i = 0; i < raid_disks; i++)
322 avail_disks += !!avail[i];
323
64c4757e 324 switch (level) {
265e0f17
NB
325 case 10:
326 /* This is the tricky one - we need to check
327 * which actual disks are present.
328 */
702b557b 329 copies = (layout&255)* ((layout>>8) & 255);
265e0f17
NB
330 first=0;
331 do {
332 /* there must be one of the 'copies' form 'first' */
333 int n = copies;
334 int cnt=0;
2117ad1d 335 int this = first;
265e0f17 336 while (n--) {
2117ad1d 337 if (avail[this])
265e0f17 338 cnt++;
2117ad1d 339 this = (this+1) % raid_disks;
265e0f17
NB
340 }
341 if (cnt == 0)
342 return 0;
2117ad1d 343 first = (first+(layout&255)) % raid_disks;
265e0f17
NB
344 } while (first != 0);
345 return 1;
e5329c37 346
df0d4ea0 347 case LEVEL_MULTIPATH:
e0d19036 348 return avail_disks>= 1;
df0d4ea0 349 case LEVEL_LINEAR:
64c4757e
NB
350 case 0:
351 return avail_disks == raid_disks;
352 case 1:
353 return avail_disks >= 1;
354 case 4:
355 case 5:
583315d9
NB
356 if (clean)
357 return avail_disks >= raid_disks-1;
358 else
359 return avail_disks >= raid_disks;
98c6faba 360 case 6:
583315d9
NB
361 if (clean)
362 return avail_disks >= raid_disks-2;
363 else
364 return avail_disks >= raid_disks;
64c4757e
NB
365 default:
366 return 0;
367 }
368}
369
8453e704
N
370int enough_fd(int fd)
371{
372 struct mdu_array_info_s array;
373 struct mdu_disk_info_s disk;
374 int avail_disks = 0;
aabe020d 375 int i, rv;
8453e704
N
376 char *avail;
377
378 if (ioctl(fd, GET_ARRAY_INFO, &array) != 0 ||
379 array.raid_disks <= 0)
380 return 0;
381 avail = calloc(array.raid_disks, 1);
480f3566 382 for (i=0; i < MAX_DISKS && array.nr_disks > 0; i++) {
8453e704
N
383 disk.number = i;
384 if (ioctl(fd, GET_DISK_INFO, &disk) != 0)
385 continue;
9e6d9291
N
386 if (disk.major == 0 && disk.minor == 0)
387 continue;
388 array.nr_disks--;
389
8453e704
N
390 if (! (disk.state & (1<<MD_DISK_SYNC)))
391 continue;
392 if (disk.raid_disk < 0 || disk.raid_disk >= array.raid_disks)
393 continue;
394 avail_disks++;
395 avail[disk.raid_disk] = 1;
396 }
397 /* This is used on an active array, so assume it is clean */
aabe020d 398 rv = enough(array.level, array.raid_disks, array.layout,
de5a472e 399 1, avail);
aabe020d
JS
400 free(avail);
401 return rv;
8453e704
N
402}
403
404
22e263f6
AC
405const int uuid_zero[4] = { 0, 0, 0, 0 };
406
f277ce36 407int same_uuid(int a[4], int b[4], int swapuuid)
64c4757e 408{
f277ce36
NB
409 if (swapuuid) {
410 /* parse uuids are hostendian.
411 * uuid's from some superblocks are big-ending
aba69144 412 * if there is a difference, we need to swap..
f277ce36
NB
413 */
414 unsigned char *ac = (unsigned char *)a;
415 unsigned char *bc = (unsigned char *)b;
416 int i;
417 for (i=0; i<16; i+= 4) {
418 if (ac[i+0] != bc[i+3] ||
419 ac[i+1] != bc[i+2] ||
420 ac[i+2] != bc[i+1] ||
421 ac[i+3] != bc[i+0])
422 return 0;
423 }
424 return 1;
425 } else {
426 if (a[0]==b[0] &&
427 a[1]==b[1] &&
428 a[2]==b[2] &&
429 a[3]==b[3])
430 return 1;
431 return 0;
432 }
64c4757e 433}
350f29f9
NB
434void copy_uuid(void *a, int b[4], int swapuuid)
435{
436 if (swapuuid) {
437 /* parse uuids are hostendian.
438 * uuid's from some superblocks are big-ending
439 * if there is a difference, we need to swap..
440 */
441 unsigned char *ac = (unsigned char *)a;
442 unsigned char *bc = (unsigned char *)b;
443 int i;
444 for (i=0; i<16; i+= 4) {
445 ac[i+0] = bc[i+3];
446 ac[i+1] = bc[i+2];
447 ac[i+2] = bc[i+1];
448 ac[i+3] = bc[i+0];
449 }
450 } else
451 memcpy(a, b, 16);
452}
64c4757e 453
aae5a112 454char *__fname_from_uuid(int id[4], int swap, char *buf, char sep)
d7288ddc 455{
9968e376 456 int i, j;
d7288ddc
N
457 char uuid[16];
458 char *c = buf;
459 strcpy(c, "UUID-");
460 c += strlen(c);
aae5a112 461 copy_uuid(uuid, id, swap);
9968e376 462 for (i = 0; i < 4; i++) {
9968e376 463 if (i)
ff54de6e 464 *c++ = sep;
9968e376
DW
465 for (j = 3; j >= 0; j--) {
466 sprintf(c,"%02x", (unsigned char) uuid[j+4*i]);
467 c+= 2;
468 }
d7288ddc
N
469 }
470 return buf;
aae5a112
DW
471
472}
473
474char *fname_from_uuid(struct supertype *st, struct mdinfo *info, char *buf, char sep)
475{
86983cce
N
476 // dirty hack to work around an issue with super1 superblocks...
477 // super1 superblocks need swapuuid set in order for assembly to
478 // work, but can't have it set if we want this printout to match
479 // all the other uuid printouts in super1.c, so we force swapuuid
480 // to 1 to make our printout match the rest of super1
481 return __fname_from_uuid(info->uuid, (st->ss == &super1) ? 1 : st->ss->swapuuid, buf, sep);
d7288ddc
N
482}
483
435d4ebb 484#ifndef MDASSEMBLE
682c7051
NB
485int check_ext2(int fd, char *name)
486{
487 /*
488 * Check for an ext2fs file system.
489 * Superblock is always 1K at 1K offset
490 *
491 * s_magic is le16 at 56 == 0xEF53
492 * report mtime - le32 at 44
493 * blocks - le32 at 4
494 * logblksize - le32 at 24
495 */
496 unsigned char sb[1024];
497 time_t mtime;
498 int size, bsize;
499 if (lseek(fd, 1024,0)!= 1024)
500 return 0;
501 if (read(fd, sb, 1024)!= 1024)
502 return 0;
503 if (sb[56] != 0x53 || sb[57] != 0xef)
504 return 0;
505
506 mtime = sb[44]|(sb[45]|(sb[46]|sb[47]<<8)<<8)<<8;
507 bsize = sb[24]|(sb[25]|(sb[26]|sb[27]<<8)<<8)<<8;
508 size = sb[4]|(sb[5]|(sb[6]|sb[7]<<8)<<8)<<8;
509 fprintf(stderr, Name ": %s appears to contain an ext2fs file system\n",
510 name);
511 fprintf(stderr," size=%dK mtime=%s",
512 size*(1<<bsize), ctime(&mtime));
513 return 1;
514}
515
516int check_reiser(int fd, char *name)
517{
518 /*
519 * superblock is at 64K
520 * size is 1024;
521 * Magic string "ReIsErFs" or "ReIsEr2Fs" at 52
522 *
523 */
524 unsigned char sb[1024];
881990a2 525 unsigned long size;
682c7051
NB
526 if (lseek(fd, 64*1024, 0) != 64*1024)
527 return 0;
528 if (read(fd, sb, 1024) != 1024)
529 return 0;
a46f4061
NB
530 if (strncmp((char*)sb+52, "ReIsErFs",8)!=0 &&
531 strncmp((char*)sb+52, "ReIsEr2Fs",9)!=0)
682c7051
NB
532 return 0;
533 fprintf(stderr, Name ": %s appears to contain a reiserfs file system\n",name);
534 size = sb[0]|(sb[1]|(sb[2]|sb[3]<<8)<<8)<<8;
881990a2 535 fprintf(stderr, " size = %luK\n", size*4);
aba69144 536
682c7051
NB
537 return 1;
538}
539
540int check_raid(int fd, char *name)
541{
4b1ac34b 542 struct mdinfo info;
682c7051 543 time_t crtime;
d078d77c 544 char *level;
82d9eba6 545 struct supertype *st = guess_super(fd);
f9ce90ba 546
82d9eba6 547 if (!st) return 0;
4a96d9ff 548 st->ignore_hw_compat = 1;
3da92f27 549 st->ss->load_super(st, fd, name);
82d9eba6
NB
550 /* Looks like a raid array .. */
551 fprintf(stderr, Name ": %s appears to be part of a raid array:\n",
552 name);
a5d85af7 553 st->ss->getinfo_super(st, &info, NULL);
3da92f27 554 st->ss->free_super(st);
82d9eba6 555 crtime = info.array.ctime;
d078d77c
NB
556 level = map_num(pers, info.array.level);
557 if (!level) level = "-unknown-";
558 fprintf(stderr, " level=%s devices=%d ctime=%s",
559 level, info.array.raid_disks, ctime(&crtime));
82d9eba6 560 return 1;
682c7051
NB
561}
562
682c7051
NB
563int ask(char *mesg)
564{
565 char *add = "";
566 int i;
567 for (i=0; i<5; i++) {
568 char buf[100];
569 fprintf(stderr, "%s%s", mesg, add);
570 fflush(stderr);
571 if (fgets(buf, 100, stdin)==NULL)
572 return 0;
573 if (buf[0]=='y' || buf[0]=='Y')
574 return 1;
575 if (buf[0]=='n' || buf[0]=='N')
576 return 0;
577 add = "(y/n) ";
578 }
579 fprintf(stderr, Name ": assuming 'no'\n");
580 return 0;
581}
435d4ebb 582#endif /* MDASSEMBLE */
682c7051 583
8d80900b 584int is_standard(char *dev, int *nump)
e5329c37
NB
585{
586 /* tests if dev is a "standard" md dev name.
587 * i.e if the last component is "/dNN" or "/mdNN",
aba69144 588 * where NN is a string of digits
598f0d58
NB
589 * Returns 1 if a partitionable standard,
590 * -1 if non-partitonable,
591 * 0 if not a standard name.
e5329c37 592 */
8d80900b
NB
593 char *d = strrchr(dev, '/');
594 int type=0;
595 int num;
596 if (!d)
e5329c37 597 return 0;
8d80900b
NB
598 if (strncmp(d, "/d",2)==0)
599 d += 2, type=1; /* /dev/md/dN{pM} */
600 else if (strncmp(d, "/md_d", 5)==0)
2b4ca8f0 601 d += 5, type=1; /* /dev/md_dN{pM} */
8d80900b
NB
602 else if (strncmp(d, "/md", 3)==0)
603 d += 3, type=-1; /* /dev/mdN */
604 else if (d-dev > 3 && strncmp(d-2, "md/", 3)==0)
5a6d1148 605 d += 1, type=-1; /* /dev/md/N */
e5329c37
NB
606 else
607 return 0;
8d80900b 608 if (!*d)
e5329c37 609 return 0;
8d80900b
NB
610 num = atoi(d);
611 while (isdigit(*d))
612 d++;
613 if (*d)
e5329c37 614 return 0;
8d80900b
NB
615 if (nump) *nump = num;
616
617 return type;
e5329c37
NB
618}
619
4b1ac34b 620unsigned long calc_csum(void *super, int bytes)
82b27616 621{
56eb10c0 622 unsigned long long newcsum = 0;
82b27616 623 int i;
4b1ac34b
NB
624 unsigned int csum;
625 unsigned int *superc = (unsigned int*) super;
82b27616 626
4b1ac34b 627 for(i=0; i<bytes/4; i++)
82b27616
NB
628 newcsum+= superc[i];
629 csum = (newcsum& 0xffffffff) + (newcsum>>32);
570c0542 630#ifdef __alpha__
aba69144 631/* The in-kernel checksum calculation is always 16bit on
570c0542
NB
632 * the alpha, though it is 32 bit on i386...
633 * I wonder what it is elsewhere... (it uses and API in
634 * a way that it shouldn't).
635 */
636 csum = (csum & 0xffff) + (csum >> 16);
637 csum = (csum & 0xffff) + (csum >> 16);
638#endif
82b27616
NB
639 return csum;
640}
cd29a5c8 641
435d4ebb 642#ifndef MDASSEMBLE
56eb10c0 643char *human_size(long long bytes)
cd29a5c8
NB
644{
645 static char buf[30];
d5d3721e
NB
646
647 /* We convert bytes to either centi-M{ega,ibi}bytes or
648 * centi-G{igi,ibi}bytes, with appropriate rounding,
649 * and then print 1/100th of those as a decimal.
650 * We allow upto 2048Megabytes before converting to
651 * gigabytes, as that shows more precision and isn't
652 * too large a number.
01619b48 653 * Terabytes are not yet handled.
d5d3721e 654 */
cd29a5c8 655
56eb10c0 656 if (bytes < 5000*1024)
cd29a5c8 657 buf[0]=0;
d5d3721e
NB
658 else if (bytes < 2*1024LL*1024LL*1024LL) {
659 long cMiB = (bytes / ( (1LL<<20) / 200LL ) +1) /2;
660 long cMB = (bytes / ( 1000000LL / 200LL ) +1) /2;
8f23b0b3 661 snprintf(buf, sizeof(buf), " (%ld.%02ld MiB %ld.%02ld MB)",
d5d3721e
NB
662 cMiB/100 , cMiB % 100,
663 cMB/100, cMB % 100);
664 } else {
665 long cGiB = (bytes / ( (1LL<<30) / 200LL ) +1) /2;
666 long cGB = (bytes / (1000000000LL/200LL ) +1) /2;
8f23b0b3 667 snprintf(buf, sizeof(buf), " (%ld.%02ld GiB %ld.%02ld GB)",
d5d3721e
NB
668 cGiB/100 , cGiB % 100,
669 cGB/100, cGB % 100);
670 }
cd29a5c8
NB
671 return buf;
672}
e0d19036
NB
673
674char *human_size_brief(long long bytes)
675{
676 static char buf[30];
e0d19036
NB
677
678 if (bytes < 5000*1024)
8f23b0b3 679 snprintf(buf, sizeof(buf), "%ld.%02ldKiB",
bd526cee 680 (long)(bytes>>10), (long)(((bytes&1023)*100+512)/1024)
e0d19036
NB
681 );
682 else if (bytes < 2*1024LL*1024LL*1024LL)
8f23b0b3 683 snprintf(buf, sizeof(buf), "%ld.%02ldMiB",
e0d19036 684 (long)(bytes>>20),
bd526cee 685 (long)((bytes&0xfffff)+0x100000/200)/(0x100000/100)
e0d19036
NB
686 );
687 else
8f23b0b3 688 snprintf(buf, sizeof(buf), "%ld.%02ldGiB",
e0d19036 689 (long)(bytes>>30),
bd526cee 690 (long)(((bytes>>10)&0xfffff)+0x100000/200)/(0x100000/100)
e0d19036
NB
691 );
692 return buf;
693}
e4965ef8
N
694
695void print_r10_layout(int layout)
696{
697 int near = layout & 255;
698 int far = (layout >> 8) & 255;
699 int offset = (layout&0x10000);
700 char *sep = "";
701
702 if (near != 1) {
703 printf("%s near=%d", sep, near);
704 sep = ",";
705 }
706 if (far != 1)
707 printf("%s %s=%d", sep, offset?"offset":"far", far);
708 if (near*far == 1)
709 printf("NO REDUNDANCY");
710}
435d4ebb 711#endif
e0d19036 712
5f8097be
NB
713unsigned long long calc_array_size(int level, int raid_disks, int layout,
714 int chunksize, unsigned long long devsize)
577e8448 715{
e03640bd
CA
716 if (level == 1)
717 return devsize;
577e8448
AK
718 devsize &= ~(unsigned long long)((chunksize>>9)-1);
719 return get_data_disks(level, layout, raid_disks) * devsize;
720}
721
722int get_data_disks(int level, int layout, int raid_disks)
5f8097be
NB
723{
724 int data_disks = 0;
725 switch (level) {
726 case 0: data_disks = raid_disks; break;
727 case 1: data_disks = 1; break;
728 case 4:
729 case 5: data_disks = raid_disks - 1; break;
730 case 6: data_disks = raid_disks - 2; break;
731 case 10: data_disks = raid_disks / (layout & 255) / ((layout>>8)&255);
732 break;
733 }
577e8448
AK
734
735 return data_disks;
5f8097be
NB
736}
737
0e600426 738#if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO)
e0d19036
NB
739char *get_md_name(int dev)
740{
741 /* find /dev/md%d or /dev/md/%d or make a device /dev/.tmp.md%d */
98c6faba 742 /* if dev < 0, want /dev/md/d%d or find mdp in /proc/devices ... */
e0d19036
NB
743 static char devname[50];
744 struct stat stb;
98c6faba 745 dev_t rdev;
dd0781e5 746 char *dn;
98c6faba
NB
747
748 if (dev < 0) {
dd0781e5
NB
749 int mdp = get_mdp_major();
750 if (mdp < 0) return NULL;
0df46c2a 751 rdev = makedev(mdp, (-1-dev)<<6);
8f23b0b3 752 snprintf(devname, sizeof(devname), "/dev/md/d%d", -1-dev);
98c6faba
NB
753 if (stat(devname, &stb) == 0
754 && (S_IFMT&stb.st_mode) == S_IFBLK
755 && (stb.st_rdev == rdev))
756 return devname;
757 } else {
0df46c2a 758 rdev = makedev(MD_MAJOR, dev);
8f23b0b3 759 snprintf(devname, sizeof(devname), "/dev/md%d", dev);
98c6faba
NB
760 if (stat(devname, &stb) == 0
761 && (S_IFMT&stb.st_mode) == S_IFBLK
762 && (stb.st_rdev == rdev))
763 return devname;
764
8f23b0b3 765 snprintf(devname, sizeof(devname), "/dev/md/%d", dev);
98c6faba
NB
766 if (stat(devname, &stb) == 0
767 && (S_IFMT&stb.st_mode) == S_IFBLK
768 && (stb.st_rdev == rdev))
769 return devname;
770 }
16c6fa80 771 dn = map_dev(major(rdev), minor(rdev), 0);
dd0781e5
NB
772 if (dn)
773 return dn;
8f23b0b3 774 snprintf(devname, sizeof(devname), "/dev/.tmp.md%d", dev);
e0d19036 775 if (mknod(devname, S_IFBLK | 0600, rdev) == -1)
dd0781e5
NB
776 if (errno != EEXIST)
777 return NULL;
e0d19036
NB
778
779 if (stat(devname, &stb) == 0
780 && (S_IFMT&stb.st_mode) == S_IFBLK
781 && (stb.st_rdev == rdev))
782 return devname;
783 unlink(devname);
784 return NULL;
785}
786
787void put_md_name(char *name)
788{
789 if (strncmp(name, "/dev/.tmp.md", 12)==0)
790 unlink(name);
791}
ea24acd0 792
ea24acd0
NB
793int find_free_devnum(int use_partitions)
794{
795 int devnum;
796 for (devnum = 127; devnum != 128;
a56fb7ec 797 devnum = devnum ? devnum-1 : (1<<20)-1) {
ea24acd0
NB
798 char *dn;
799 int _devnum;
80841df2 800 char nbuf[50];
ea24acd0
NB
801
802 _devnum = use_partitions ? (-1-devnum) : devnum;
803 if (mddev_busy(_devnum))
804 continue;
80841df2
N
805 sprintf(nbuf, "%s%d", use_partitions?"mdp":"md", devnum);
806 if (!conf_name_is_free(nbuf))
807 continue;
ea24acd0
NB
808 /* make sure it is new to /dev too, at least as a
809 * non-standard */
810 dn = map_dev(dev2major(_devnum), dev2minor(_devnum), 0);
811 if (dn && ! is_standard(dn, NULL))
812 continue;
813 break;
814 }
815 if (devnum == 128)
816 return NoMdDev;
817 return use_partitions ? (-1-devnum) : devnum;
818}
435d4ebb 819#endif /* !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO) */
f9ce90ba 820
8b0dabea
NB
821int dev_open(char *dev, int flags)
822{
823 /* like 'open', but if 'dev' matches %d:%d, create a temp
824 * block device and open that
825 */
826 char *e;
827 int fd = -1;
828 char devname[32];
e81cdd9f 829 int major;
8b0dabea 830 int minor;
e81cdd9f
NB
831
832 if (!dev) return -1;
6df6a774 833 flags |= O_DIRECT;
e81cdd9f
NB
834
835 major = strtoul(dev, &e, 0);
8b0dabea
NB
836 if (e > dev && *e == ':' && e[1] &&
837 (minor = strtoul(e+1, &e, 0)) >= 0 &&
838 *e == 0) {
6df6a774
N
839 char *path = map_dev(major, minor, 0);
840 if (path)
841 fd = open(path, flags);
842 if (fd < 0) {
843 snprintf(devname, sizeof(devname), "/dev/.tmp.md.%d:%d:%d",
844 (int)getpid(), major, minor);
845 if (mknod(devname, S_IFBLK|0600, makedev(major, minor))==0) {
846 fd = open(devname, flags);
847 unlink(devname);
848 }
849 }
850 if (fd < 0) {
851 snprintf(devname, sizeof(devname), "/tmp/.tmp.md.%d:%d:%d",
852 (int)getpid(), major, minor);
853 if (mknod(devname, S_IFBLK|0600, makedev(major, minor))==0) {
854 fd = open(devname, flags);
855 unlink(devname);
856 }
8b0dabea
NB
857 }
858 } else
6df6a774 859 fd = open(dev, flags);
8b0dabea
NB
860 return fd;
861}
f9ce90ba 862
d998b738 863int open_dev_flags(int devnum, int flags)
e8a70c89
N
864{
865 char buf[20];
866
867 sprintf(buf, "%d:%d", dev2major(devnum), dev2minor(devnum));
d998b738
N
868 return dev_open(buf, flags);
869}
870
871int open_dev(int devnum)
872{
873 return open_dev_flags(devnum, O_RDONLY);
e8a70c89
N
874}
875
a931db9e
NB
876int open_dev_excl(int devnum)
877{
878 char buf[20];
879 int i;
7187750e 880 int flags = O_RDWR;
a931db9e
NB
881
882 sprintf(buf, "%d:%d", dev2major(devnum), dev2minor(devnum));
883 for (i=0 ; i<25 ; i++) {
7187750e 884 int fd = dev_open(buf, flags|O_EXCL);
a931db9e
NB
885 if (fd >= 0)
886 return fd;
7187750e
N
887 if (errno == EACCES && flags == O_RDWR) {
888 flags = O_RDONLY;
889 continue;
890 }
a931db9e
NB
891 if (errno != EBUSY)
892 return fd;
893 usleep(200000);
894 }
895 return -1;
896}
897
9008ed1c
N
898int same_dev(char *one, char *two)
899{
900 struct stat st1, st2;
901 if (stat(one, &st1) != 0)
902 return 0;
903 if (stat(two, &st2) != 0)
904 return 0;
905 if ((st1.st_mode & S_IFMT) != S_IFBLK)
906 return 0;
907 if ((st2.st_mode & S_IFMT) != S_IFBLK)
908 return 0;
909 return st1.st_rdev == st2.st_rdev;
910}
911
a7c6e3fb 912void wait_for(char *dev, int fd)
a714580e
N
913{
914 int i;
a7c6e3fb
N
915 struct stat stb_want;
916
917 if (fstat(fd, &stb_want) != 0 ||
918 (stb_want.st_mode & S_IFMT) != S_IFBLK)
919 return;
a714580e
N
920
921 for (i=0 ; i<25 ; i++) {
922 struct stat stb;
a7c6e3fb
N
923 if (stat(dev, &stb) == 0 &&
924 (stb.st_mode & S_IFMT) == S_IFBLK &&
925 (stb.st_rdev == stb_want.st_rdev))
a714580e
N
926 return;
927 usleep(200000);
928 }
436305c6
DW
929 if (i == 25)
930 dprintf("%s: timeout waiting for %s\n", __func__, dev);
a714580e
N
931}
932
0f22b998
N
933struct superswitch *superlist[] =
934{
935 &super0, &super1,
936 &super_ddf, &super_imsm,
0592faeb 937 &mbr, &gpt,
0f22b998 938 NULL };
f9ce90ba 939
ea24acd0 940#if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO)
f7dd881f 941
4725bc31 942struct supertype *super_by_fd(int fd, char **subarrayp)
f9ce90ba 943{
1686dc25
NB
944 mdu_array_info_t array;
945 int vers;
946 int minor;
947 struct supertype *st = NULL;
7e0f6979 948 struct mdinfo *sra;
142cb9e1 949 char *verstr;
1686dc25
NB
950 char version[20];
951 int i;
f7e7067b 952 char *subarray = NULL;
5f7e44b2 953 int container = NoMdDev;
1686dc25
NB
954
955 sra = sysfs_read(fd, 0, GET_VERSION);
956
957 if (sra) {
7e0f6979
NB
958 vers = sra->array.major_version;
959 minor = sra->array.minor_version;
142cb9e1 960 verstr = sra->text_version;
1686dc25
NB
961 } else {
962 if (ioctl(fd, GET_ARRAY_INFO, &array))
963 array.major_version = array.minor_version = 0;
964 vers = array.major_version;
965 minor = array.minor_version;
142cb9e1 966 verstr = "";
6fbba4c9 967 }
82d9eba6 968
1686dc25
NB
969 if (vers != -1) {
970 sprintf(version, "%d.%d", vers, minor);
971 verstr = version;
6fbba4c9 972 }
3c558363 973 if (minor == -2 && is_subarray(verstr)) {
f7e7067b 974 char *dev = verstr+1;
5f7e44b2 975
f7e7067b 976 subarray = strchr(dev, '/');
92d49ecf 977 if (subarray) {
f7e7067b 978 *subarray++ = '\0';
92d49ecf
AK
979 subarray = strdup(subarray);
980 }
5f7e44b2 981 container = devname2devnum(dev);
f7e7067b
NB
982 if (sra)
983 sysfs_free(sra);
5f7e44b2 984 sra = sysfs_read(-1, container, GET_VERSION);
603f24a0
N
985 if (sra && sra->text_version[0])
986 verstr = sra->text_version;
987 else
988 verstr = "-no-metadata-";
f7e7067b
NB
989 }
990
991 for (i = 0; st == NULL && superlist[i] ; i++)
992 st = superlist[i]->match_metadata_desc(verstr);
1686dc25
NB
993
994 if (sra)
995 sysfs_free(sra);
f7e7067b 996 if (st) {
3b0896f8 997 st->sb = NULL;
1f49fb3a
N
998 if (subarrayp)
999 *subarrayp = subarray;
5f7e44b2
DW
1000 st->container_dev = container;
1001 st->devnum = fd2devnum(fd);
4725bc31
N
1002 } else
1003 free(subarray);
5f7e44b2 1004
82d9eba6 1005 return st;
f9ce90ba 1006}
ea24acd0
NB
1007#endif /* !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO) */
1008
0f0749ad 1009int dev_size_from_id(dev_t id, unsigned long long *size)
52d5d101
AC
1010{
1011 char buf[20];
1012 int fd;
1013
1014 sprintf(buf, "%d:%d", major(id), minor(id));
1015 fd = dev_open(buf, O_RDONLY);
1016 if (fd < 0)
1017 return 0;
1018 if (get_dev_size(fd, NULL, size)) {
1019 close(fd);
1020 return 1;
1021 }
1022 close(fd);
1023 return 0;
1024}
f9ce90ba 1025
159c3a1a 1026struct supertype *dup_super(struct supertype *orig)
3da92f27 1027{
159c3a1a 1028 struct supertype *st;
1686dc25 1029
d2ca6449
NB
1030 if (!orig)
1031 return orig;
159c3a1a 1032 st = malloc(sizeof(*st));
3da92f27
NB
1033 if (!st)
1034 return st;
ef609477 1035 memset(st, 0, sizeof(*st));
159c3a1a
NB
1036 st->ss = orig->ss;
1037 st->max_devs = orig->max_devs;
1038 st->minor_version = orig->minor_version;
1039 st->sb = NULL;
1040 st->info = NULL;
1041 return st;
3da92f27
NB
1042}
1043
54887ad8 1044struct supertype *guess_super_type(int fd, enum guess_types guess_type)
f9ce90ba
NB
1045{
1046 /* try each load_super to find the best match,
1047 * and return the best superswitch
1048 */
82d9eba6
NB
1049 struct superswitch *ss;
1050 struct supertype *st;
f21e18ca 1051 time_t besttime = 0;
570c0542 1052 int bestsuper = -1;
f9ce90ba
NB
1053 int i;
1054
82d9eba6 1055 st = malloc(sizeof(*st));
d1d599ea
N
1056 memset(st, 0, sizeof(*st));
1057 st->container_dev = NoMdDev;
1058
f9ce90ba
NB
1059 for (i=0 ; superlist[i]; i++) {
1060 int rv;
1061 ss = superlist[i];
54887ad8
N
1062 if (guess_type == guess_array && ss->add_to_super == NULL)
1063 continue;
1064 if (guess_type == guess_partitions && ss->add_to_super != NULL)
1065 continue;
ef609477 1066 memset(st, 0, sizeof(*st));
df3346e6 1067 st->ignore_hw_compat = 1;
3da92f27 1068 rv = ss->load_super(st, fd, NULL);
570c0542
NB
1069 if (rv == 0) {
1070 struct mdinfo info;
a5d85af7 1071 st->ss->getinfo_super(st, &info, NULL);
570c0542
NB
1072 if (bestsuper == -1 ||
1073 besttime < info.array.ctime) {
1074 bestsuper = i;
1075 besttime = info.array.ctime;
570c0542 1076 }
3da92f27 1077 ss->free_super(st);
570c0542
NB
1078 }
1079 }
1080 if (bestsuper != -1) {
1081 int rv;
ef609477 1082 memset(st, 0, sizeof(*st));
df3346e6 1083 st->ignore_hw_compat = 1;
3da92f27 1084 rv = superlist[bestsuper]->load_super(st, fd, NULL);
f9ce90ba 1085 if (rv == 0) {
5e747af2 1086 superlist[bestsuper]->free_super(st);
df3346e6 1087 st->ignore_hw_compat = 0;
82d9eba6 1088 return st;
f9ce90ba
NB
1089 }
1090 }
570c0542 1091 free(st);
f9ce90ba
NB
1092 return NULL;
1093}
fe6729fa 1094
beae1dfe
NB
1095/* Return size of device in bytes */
1096int get_dev_size(int fd, char *dname, unsigned long long *sizep)
1097{
1098 unsigned long long ldsize;
c2c9bb6f
NB
1099 struct stat st;
1100
1101 if (fstat(fd, &st) != -1 && S_ISREG(st.st_mode))
1102 ldsize = (unsigned long long)st.st_size;
1103 else
beae1dfe
NB
1104#ifdef BLKGETSIZE64
1105 if (ioctl(fd, BLKGETSIZE64, &ldsize) != 0)
1106#endif
1107 {
1108 unsigned long dsize;
1109 if (ioctl(fd, BLKGETSIZE, &dsize) == 0) {
1110 ldsize = dsize;
1111 ldsize <<= 9;
1112 } else {
1113 if (dname)
1114 fprintf(stderr, Name ": Cannot get size of %s: %s\b",
1115 dname, strerror(errno));
1116 return 0;
1117 }
1118 }
1119 *sizep = ldsize;
1120 return 1;
1121}
8fac0577 1122
3a371610
N
1123/* Return true if this can only be a container, not a member device.
1124 * i.e. is and md device and size is zero
1125 */
1126int must_be_container(int fd)
1127{
1128 unsigned long long size;
1129 if (md_get_version(fd) < 0)
1130 return 0;
1131 if (get_dev_size(fd, NULL, &size) == 0)
1132 return 1;
1133 if (size == 0)
1134 return 1;
1135 return 0;
1136}
034b203a
TM
1137
1138/* Sets endofpart parameter to the last block used by the last GPT partition on the device.
1139 * Returns: 1 if successful
1140 * -1 for unknown partition type
1141 * 0 for other errors
1142 */
1143static int get_gpt_last_partition_end(int fd, unsigned long long *endofpart)
1144{
056b331e 1145 struct GPT gpt;
034b203a 1146 unsigned char empty_gpt_entry[16]= {0};
db7fdfe4
JS
1147 struct GPT_part_entry *part;
1148 char buf[512];
034b203a
TM
1149 unsigned long long curr_part_end;
1150 unsigned all_partitions, entry_size;
f21e18ca 1151 unsigned part_nr;
034b203a
TM
1152
1153 *endofpart = 0;
1154
056b331e 1155 BUILD_BUG_ON(sizeof(gpt) != 512);
73e658d8 1156 /* skip protective MBR */
034b203a 1157 lseek(fd, 512, SEEK_SET);
73e658d8 1158 /* read GPT header */
056b331e 1159 if (read(fd, &gpt, 512) != 512)
034b203a
TM
1160 return 0;
1161
1162 /* get the number of partition entries and the entry size */
056b331e
N
1163 all_partitions = __le32_to_cpu(gpt.part_cnt);
1164 entry_size = __le32_to_cpu(gpt.part_size);
034b203a
TM
1165
1166 /* Check GPT signature*/
056b331e 1167 if (gpt.magic != GPT_SIGNATURE_MAGIC)
034b203a
TM
1168 return -1;
1169
1170 /* sanity checks */
1171 if (all_partitions > 1024 ||
db7fdfe4 1172 entry_size > sizeof(buf))
034b203a
TM
1173 return -1;
1174
db7fdfe4
JS
1175 part = (struct GPT_part_entry *)buf;
1176
034b203a 1177 for (part_nr=0; part_nr < all_partitions; part_nr++) {
73e658d8 1178 /* read partition entry */
db7fdfe4 1179 if (read(fd, buf, entry_size) != (ssize_t)entry_size)
73e658d8
LB
1180 return 0;
1181
034b203a 1182 /* is this valid partition? */
db7fdfe4 1183 if (memcmp(part->type_guid, empty_gpt_entry, 16) != 0) {
034b203a 1184 /* check the last lba for the current partition */
db7fdfe4 1185 curr_part_end = __le64_to_cpu(part->ending_lba);
034b203a
TM
1186 if (curr_part_end > *endofpart)
1187 *endofpart = curr_part_end;
1188 }
1189
034b203a
TM
1190 }
1191 return 1;
1192}
1193
1194/* Sets endofpart parameter to the last block used by the last partition on the device.
1195 * Returns: 1 if successful
1196 * -1 for unknown partition type
1197 * 0 for other errors
1198 */
1199static int get_last_partition_end(int fd, unsigned long long *endofpart)
1200{
056b331e 1201 struct MBR boot_sect;
034b203a
TM
1202 struct MBR_part_record *part;
1203 unsigned long long curr_part_end;
f21e18ca 1204 unsigned part_nr;
034b203a
TM
1205 int retval = 0;
1206
1207 *endofpart = 0;
1208
056b331e 1209 BUILD_BUG_ON(sizeof(boot_sect) != 512);
034b203a
TM
1210 /* read MBR */
1211 lseek(fd, 0, 0);
056b331e 1212 if (read(fd, &boot_sect, 512) != 512)
034b203a
TM
1213 goto abort;
1214
1215 /* check MBP signature */
056b331e 1216 if (boot_sect.magic == MBR_SIGNATURE_MAGIC) {
034b203a
TM
1217 retval = 1;
1218 /* found the correct signature */
056b331e 1219 part = boot_sect.parts;
034b203a
TM
1220
1221 for (part_nr=0; part_nr < MBR_PARTITIONS; part_nr++) {
1222 /* check for GPT type */
1223 if (part->part_type == MBR_GPT_PARTITION_TYPE) {
1224 retval = get_gpt_last_partition_end(fd, endofpart);
1225 break;
1226 }
1227 /* check the last used lba for the current partition */
1228 curr_part_end = __le32_to_cpu(part->first_sect_lba) +
1229 __le32_to_cpu(part->blocks_num);
1230 if (curr_part_end > *endofpart)
1231 *endofpart = curr_part_end;
1232
1233 part++;
1234 }
1235 } else {
1236 /* Unknown partition table */
1237 retval = -1;
1238 }
1239 abort:
1240 return retval;
1241}
1242
53ed6ac3
KW
1243int check_partitions(int fd, char *dname, unsigned long long freesize,
1244 unsigned long long size)
034b203a
TM
1245{
1246 /*
1247 * Check where the last partition ends
1248 */
1249 unsigned long long endofpart;
1250 int ret;
1251
1252 if ((ret = get_last_partition_end(fd, &endofpart)) > 0) {
1253 /* There appears to be a partition table here */
1254 if (freesize == 0) {
1255 /* partitions will not be visible in new device */
1256 fprintf(stderr,
1257 Name ": partition table exists on %s but will be lost or\n"
1258 " meaningless after creating array\n",
1259 dname);
1260 return 1;
1261 } else if (endofpart > freesize) {
1262 /* last partition overlaps metadata */
1263 fprintf(stderr,
1264 Name ": metadata will over-write last partition on %s.\n",
1265 dname);
1266 return 1;
53ed6ac3
KW
1267 } else if (size && endofpart > size) {
1268 /* partitions will be truncated in new device */
1269 fprintf(stderr,
1270 Name ": array size is too small to cover all partitions on %s.\n",
1271 dname);
1272 return 1;
034b203a
TM
1273 }
1274 }
1275 return 0;
1276}
1277
8382f19b
NB
1278void get_one_disk(int mdfd, mdu_array_info_t *ainf, mdu_disk_info_t *disk)
1279{
1280 int d;
9e6d9291 1281
8382f19b 1282 ioctl(mdfd, GET_ARRAY_INFO, ainf);
480f3566 1283 for (d = 0 ; d < MAX_DISKS ; d++) {
9e6d9291
N
1284 if (ioctl(mdfd, GET_DISK_INFO, disk) == 0 &&
1285 (disk->major || disk->minor))
8382f19b 1286 return;
9e6d9291 1287 }
8382f19b 1288}
63152c1b 1289
a322f70c
DW
1290int open_container(int fd)
1291{
1292 /* 'fd' is a block device. Find out if it is in use
1293 * by a container, and return an open fd on that container.
1294 */
1295 char path[256];
1296 char *e;
1297 DIR *dir;
1298 struct dirent *de;
1299 int dfd, n;
1300 char buf[200];
1301 int major, minor;
1302 struct stat st;
1303
1304 if (fstat(fd, &st) != 0)
1305 return -1;
1306 sprintf(path, "/sys/dev/block/%d:%d/holders",
1307 (int)major(st.st_rdev), (int)minor(st.st_rdev));
1308 e = path + strlen(path);
1309
1310 dir = opendir(path);
1311 if (!dir)
1312 return -1;
1313 while ((de = readdir(dir))) {
1314 if (de->d_ino == 0)
1315 continue;
1316 if (de->d_name[0] == '.')
1317 continue;
9ff14278
N
1318 /* Need to make sure it is a container and not a volume */
1319 sprintf(e, "/%s/md/metadata_version", de->d_name);
1320 dfd = open(path, O_RDONLY);
1321 if (dfd < 0)
1322 continue;
1323 n = read(dfd, buf, sizeof(buf));
1324 close(dfd);
1325 if (n <= 0 || (unsigned)n >= sizeof(buf))
1326 continue;
1327 buf[n] = 0;
1328 if (strncmp(buf, "external", 8) != 0 ||
1329 n < 10 ||
1330 buf[9] == '/')
1331 continue;
a322f70c
DW
1332 sprintf(e, "/%s/dev", de->d_name);
1333 dfd = open(path, O_RDONLY);
1334 if (dfd < 0)
1335 continue;
1336 n = read(dfd, buf, sizeof(buf));
1337 close(dfd);
f21e18ca 1338 if (n <= 0 || (unsigned)n >= sizeof(buf))
a322f70c
DW
1339 continue;
1340 buf[n] = 0;
1341 if (sscanf(buf, "%d:%d", &major, &minor) != 2)
1342 continue;
1343 sprintf(buf, "%d:%d", major, minor);
1344 dfd = dev_open(buf, O_RDONLY);
1345 if (dfd >= 0) {
1346 closedir(dir);
1347 return dfd;
1348 }
1349 }
355726fa 1350 closedir(dir);
a322f70c
DW
1351 return -1;
1352}
1353
33414a01
DW
1354struct superswitch *version_to_superswitch(char *vers)
1355{
1356 int i;
1357
1358 for (i = 0; superlist[i]; i++) {
1359 struct superswitch *ss = superlist[i];
1360
1361 if (strcmp(vers, ss->name) == 0)
1362 return ss;
1363 }
1364
1365 return NULL;
1366}
1367
1368int is_container_member(struct mdstat_ent *mdstat, char *container)
1369{
1370 if (mdstat->metadata_version == NULL ||
1371 strncmp(mdstat->metadata_version, "external:", 9) != 0 ||
1372 !is_subarray(mdstat->metadata_version+9) ||
1373 strncmp(mdstat->metadata_version+10, container, strlen(container)) != 0 ||
1374 mdstat->metadata_version[10+strlen(container)] != '/')
1375 return 0;
1376
1377 return 1;
1378}
1379
1380int is_subarray_active(char *subarray, char *container)
1381{
1382 struct mdstat_ent *mdstat = mdstat_read(0, 0);
1383 struct mdstat_ent *ent;
1384
e5408a32
DW
1385 for (ent = mdstat; ent; ent = ent->next)
1386 if (is_container_member(ent, container))
e5e5d7ce 1387 if (strcmp(to_subarray(ent, container), subarray) == 0)
33414a01 1388 break;
33414a01
DW
1389
1390 free_mdstat(mdstat);
1391
1392 return ent != NULL;
1393}
1394
1395/* open_subarray - opens a subarray in a container
1396 * @dev: container device name
feab51f8 1397 * @st: empty supertype
33414a01
DW
1398 * @quiet: block reporting errors flag
1399 *
1400 * On success returns an fd to a container and fills in *st
1401 */
feab51f8 1402int open_subarray(char *dev, char *subarray, struct supertype *st, int quiet)
33414a01
DW
1403{
1404 struct mdinfo *mdi;
a951a4f7 1405 struct mdinfo *info;
33414a01
DW
1406 int fd, err = 1;
1407
1408 fd = open(dev, O_RDWR|O_EXCL);
1409 if (fd < 0) {
1410 if (!quiet)
1411 fprintf(stderr, Name ": Couldn't open %s, aborting\n",
1412 dev);
b990032d 1413 return -1;
33414a01
DW
1414 }
1415
1416 st->devnum = fd2devnum(fd);
1417 if (st->devnum == NoMdDev) {
1418 if (!quiet)
1419 fprintf(stderr,
1420 Name ": Failed to determine device number for %s\n",
1421 dev);
1422 goto close_fd;
1423 }
1424
1425 mdi = sysfs_read(fd, st->devnum, GET_VERSION|GET_LEVEL);
1426 if (!mdi) {
1427 if (!quiet)
1428 fprintf(stderr, Name ": Failed to read sysfs for %s\n",
1429 dev);
1430 goto close_fd;
1431 }
1432
1433 if (mdi->array.level != UnSet) {
1434 if (!quiet)
1435 fprintf(stderr, Name ": %s is not a container\n", dev);
1436 goto free_sysfs;
1437 }
1438
1439 st->ss = version_to_superswitch(mdi->text_version);
1440 if (!st->ss) {
1441 if (!quiet)
1442 fprintf(stderr,
1443 Name ": Operation not supported for %s metadata\n",
1444 mdi->text_version);
1445 goto free_sysfs;
1446 }
1447
1448 st->devname = devnum2devname(st->devnum);
1449 if (!st->devname) {
1450 if (!quiet)
1451 fprintf(stderr, Name ": Failed to allocate device name\n");
1452 goto free_sysfs;
1453 }
1454
db20d413 1455 if (!st->ss->load_container) {
33414a01 1456 if (!quiet)
db20d413 1457 fprintf(stderr, Name ": %s is not a container\n", dev);
33414a01
DW
1458 goto free_name;
1459 }
1460
db20d413 1461 if (st->ss->load_container(st, fd, NULL)) {
33414a01 1462 if (!quiet)
db20d413
N
1463 fprintf(stderr, Name ": Failed to load metadata for %s\n",
1464 dev);
1465 goto free_name;
33414a01
DW
1466 }
1467
a951a4f7
N
1468 info = st->ss->container_content(st, subarray);
1469 if (!info) {
1470 if (!quiet)
1471 fprintf(stderr, Name ": Failed to find subarray-%s in %s\n",
1472 subarray, dev);
1473 goto free_super;
1474 }
1475 free(info);
1476
33414a01
DW
1477 err = 0;
1478
1479 free_super:
1480 if (err)
1481 st->ss->free_super(st);
1482 free_name:
1483 if (err)
1484 free(st->devname);
1485 free_sysfs:
1486 sysfs_free(mdi);
1487 close_fd:
1488 if (err)
1489 close(fd);
1490
1491 if (err)
1492 return -1;
1493 else
1494 return fd;
1495}
1496
7801ac20
N
1497int add_disk(int mdfd, struct supertype *st,
1498 struct mdinfo *sra, struct mdinfo *info)
1499{
1500 /* Add a device to an array, in one of 2 ways. */
1501 int rv;
1502#ifndef MDASSEMBLE
1503 if (st->ss->external) {
d23534e4
DW
1504 if (info->disk.state & (1<<MD_DISK_SYNC))
1505 info->recovery_start = MaxSector;
1506 else
1507 info->recovery_start = 0;
2904b26f 1508 rv = sysfs_add_disk(sra, info, 0);
7801ac20
N
1509 if (! rv) {
1510 struct mdinfo *sd2;
f35f2525
N
1511 for (sd2 = sra->devs; sd2; sd2=sd2->next)
1512 if (sd2 == info)
1513 break;
1514 if (sd2 == NULL) {
1515 sd2 = malloc(sizeof(*sd2));
1516 *sd2 = *info;
1517 sd2->next = sra->devs;
1518 sra->devs = sd2;
1519 }
7801ac20
N
1520 }
1521 } else
1522#endif
1523 rv = ioctl(mdfd, ADD_NEW_DISK, &info->disk);
1524 return rv;
1525}
1526
de6ae750
N
1527int remove_disk(int mdfd, struct supertype *st,
1528 struct mdinfo *sra, struct mdinfo *info)
1529{
1530 int rv;
1531 /* Remove the disk given by 'info' from the array */
1532#ifndef MDASSEMBLE
1533 if (st->ss->external)
1534 rv = sysfs_set_str(sra, info, "slot", "none");
1535 else
1536#endif
1537 rv = ioctl(mdfd, HOT_REMOVE_DISK, makedev(info->disk.major,
1538 info->disk.minor));
1539 return rv;
1540}
1541
f35f2525
N
1542int set_array_info(int mdfd, struct supertype *st, struct mdinfo *info)
1543{
1544 /* Initialise kernel's knowledge of array.
1545 * This varies between externally managed arrays
1546 * and older kernels
1547 */
1548 int vers = md_get_version(mdfd);
1549 int rv;
1550
1551#ifndef MDASSEMBLE
1552 if (st->ss->external)
1553 rv = sysfs_set_array(info, vers);
1554 else
1555#endif
1556 if ((vers % 100) >= 1) { /* can use different versions */
1557 mdu_array_info_t inf;
1558 memset(&inf, 0, sizeof(inf));
1559 inf.major_version = info->array.major_version;
1560 inf.minor_version = info->array.minor_version;
1561 rv = ioctl(mdfd, SET_ARRAY_INFO, &inf);
1562 } else
1563 rv = ioctl(mdfd, SET_ARRAY_INFO, NULL);
1564 return rv;
1565}
1566
1e5c6983
DW
1567unsigned long long min_recovery_start(struct mdinfo *array)
1568{
1569 /* find the minimum recovery_start in an array for metadata
1570 * formats that only record per-array recovery progress instead
1571 * of per-device
1572 */
1573 unsigned long long recovery_start = MaxSector;
1574 struct mdinfo *d;
1575
1576 for (d = array->devs; d; d = d->next)
1577 recovery_start = min(recovery_start, d->recovery_start);
1578
1579 return recovery_start;
1580}
1581
24f6f99b 1582int mdmon_pid(int devnum)
a931db9e
NB
1583{
1584 char path[100];
1585 char pid[10];
1586 int fd;
1587 int n;
10013317
PHC
1588 char *devname = devnum2devname(devnum);
1589
753cf905 1590 sprintf(path, "%s/%s.pid", MDMON_DIR, devname);
10013317
PHC
1591 free(devname);
1592
24f6f99b 1593 fd = open(path, O_RDONLY | O_NOATIME, 0);
a931db9e
NB
1594
1595 if (fd < 0)
cf556303 1596 return -1;
a931db9e
NB
1597 n = read(fd, pid, 9);
1598 close(fd);
1599 if (n <= 0)
cf556303 1600 return -1;
24f6f99b 1601 return atoi(pid);
a931db9e
NB
1602}
1603
24f6f99b 1604int mdmon_running(int devnum)
a931db9e 1605{
24f6f99b
N
1606 int pid = mdmon_pid(devnum);
1607 if (pid <= 0)
a931db9e 1608 return 0;
24f6f99b 1609 if (kill(pid, 0) == 0)
a931db9e
NB
1610 return 1;
1611 return 0;
1612}
1613
8850ee3e
N
1614int start_mdmon(int devnum)
1615{
1913c325 1616 int i, skipped;
44d2e365 1617 int len;
9fe32043
N
1618 pid_t pid;
1619 int status;
44d2e365
N
1620 char pathbuf[1024];
1621 char *paths[4] = {
1622 pathbuf,
1623 "/sbin/mdmon",
1624 "mdmon",
1625 NULL
1626 };
8850ee3e 1627
40ebbb9c 1628 if (check_env("MDADM_NO_MDMON"))
8850ee3e
N
1629 return 0;
1630
9cf014ec 1631 len = readlink("/proc/self/exe", pathbuf, sizeof(pathbuf)-1);
44d2e365
N
1632 if (len > 0) {
1633 char *sl;
1634 pathbuf[len] = 0;
1635 sl = strrchr(pathbuf, '/');
1636 if (sl)
1637 sl++;
1638 else
1639 sl = pathbuf;
1640 strcpy(sl, "mdmon");
1641 } else
1642 pathbuf[0] = '\0';
1643
8850ee3e
N
1644 switch(fork()) {
1645 case 0:
1646 /* FIXME yuk. CLOSE_EXEC?? */
1913c325
N
1647 skipped = 0;
1648 for (i=3; skipped < 20; i++)
1649 if (close(i) < 0)
1650 skipped++;
1651 else
1652 skipped = 0;
1653
44d2e365 1654 for (i=0; paths[i]; i++)
a0963a86
JS
1655 if (paths[i][0]) {
1656 if (__offroot) {
1657 execl(paths[i], "mdmon", "--offroot",
1658 devnum2devname(devnum),
1659 NULL);
1660 } else {
1661 execl(paths[i], "mdmon",
1662 devnum2devname(devnum),
1663 NULL);
1664 }
1665 }
8850ee3e
N
1666 exit(1);
1667 case -1: fprintf(stderr, Name ": cannot run mdmon. "
1668 "Array remains readonly\n");
1669 return -1;
9fe32043
N
1670 default: /* parent - good */
1671 pid = wait(&status);
1672 if (pid < 0 || status != 0)
1673 return -1;
8850ee3e
N
1674 }
1675 return 0;
1676}
1677
40ebbb9c 1678int check_env(char *name)
5dcfcb71 1679{
40ebbb9c 1680 char *val = getenv(name);
5dcfcb71
DW
1681
1682 if (val && atoi(val) == 1)
1683 return 1;
1684
1685 return 0;
1686}
1687
148acb7b
DW
1688__u32 random32(void)
1689{
1690 __u32 rv;
1691 int rfd = open("/dev/urandom", O_RDONLY);
1692 if (rfd < 0 || read(rfd, &rv, 4) != 4)
1693 rv = random();
1694 if (rfd >= 0)
1695 close(rfd);
1696 return rv;
1697}
1698
0e600426 1699#ifndef MDASSEMBLE
edd8d13c
NB
1700int flush_metadata_updates(struct supertype *st)
1701{
1702 int sfd;
1703 if (!st->updates) {
1704 st->update_tail = NULL;
1705 return -1;
1706 }
1707
1708 sfd = connect_monitor(devnum2devname(st->container_dev));
1709 if (sfd < 0)
1710 return -1;
1711
1712 while (st->updates) {
1713 struct metadata_update *mu = st->updates;
1714 st->updates = mu->next;
1715
1716 send_message(sfd, mu, 0);
1717 wait_reply(sfd, 0);
1718 free(mu->buf);
1719 free(mu);
1720 }
1721 ack(sfd, 0);
1722 wait_reply(sfd, 0);
1723 close(sfd);
1724 st->update_tail = NULL;
1725 return 0;
1726}
1727
1728void append_metadata_update(struct supertype *st, void *buf, int len)
1729{
1730
1731 struct metadata_update *mu = malloc(sizeof(*mu));
1732
1733 mu->buf = buf;
1734 mu->len = len;
1735 mu->space = NULL;
cb23f1f4 1736 mu->space_list = NULL;
edd8d13c
NB
1737 mu->next = NULL;
1738 *st->update_tail = mu;
1739 st->update_tail = &mu->next;
1740}
0e600426 1741#endif /* MDASSEMBLE */
a931db9e 1742
fe6729fa
NB
1743#ifdef __TINYC__
1744/* tinyc doesn't optimize this check in ioctl.h out ... */
1745unsigned int __invalid_size_argument_for_IOC = 0;
1746#endif
1747
6d11ec6f
AK
1748int experimental(void)
1749{
1750 if (check_env("MDADM_EXPERIMENTAL"))
1751 return 1;
1752 else {
65c83a80
LD
1753 fprintf(stderr, Name ": To use this feature MDADM_EXPERIMENTAL"
1754 " environment variable has to be defined.\n");
6d11ec6f
AK
1755 return 0;
1756 }
1757}
1758
326727d9
AC
1759/* Pick all spares matching given criteria from a container
1760 * if min_size == 0 do not check size
1761 * if domlist == NULL do not check domains
1762 * if spare_group given add it to domains of each spare
1763 * metadata allows to test domains using metadata of destination array */
1764struct mdinfo *container_choose_spares(struct supertype *st,
1765 unsigned long long min_size,
1766 struct domainlist *domlist,
1767 char *spare_group,
1768 const char *metadata, int get_one)
1769{
1770 struct mdinfo *d, **dp, *disks = NULL;
1771
1772 /* get list of all disks in container */
1773 if (st->ss->getinfo_super_disks)
1774 disks = st->ss->getinfo_super_disks(st);
1775
1776 if (!disks)
1777 return disks;
1778 /* find spare devices on the list */
1779 dp = &disks->devs;
1780 disks->array.spare_disks = 0;
1781 while (*dp) {
1782 int found = 0;
1783 d = *dp;
1784 if (d->disk.state == 0) {
1785 /* check if size is acceptable */
1786 unsigned long long dev_size;
1787 dev_t dev = makedev(d->disk.major,d->disk.minor);
1788
1789 if (!min_size ||
1790 (dev_size_from_id(dev, &dev_size) &&
1791 dev_size >= min_size))
1792 found = 1;
1793 /* check if domain matches */
1794 if (found && domlist) {
1795 struct dev_policy *pol = devnum_policy(dev);
1796 if (spare_group)
1797 pol_add(&pol, pol_domain,
1798 spare_group, NULL);
e5508b36 1799 if (domain_test(domlist, pol, metadata) != 1)
326727d9
AC
1800 found = 0;
1801 dev_policy_free(pol);
1802 }
1803 }
1804 if (found) {
1805 dp = &d->next;
1806 disks->array.spare_disks++;
1807 if (get_one) {
1808 sysfs_free(*dp);
1809 d->next = NULL;
1810 }
1811 } else {
1812 *dp = d->next;
1813 d->next = NULL;
1814 sysfs_free(d);
1815 }
1816 }
1817 return disks;
1818}