Having single function to read mdmon pid file.
[thirdparty/mdadm.git] / util.c
CommitLineData
64c4757e 1/*
9a9dab36 2 * mdadm - manage Linux "md" devices aka RAID arrays.
64c4757e 3 *
e736b623 4 * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
64c4757e
NB
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
e736b623 22 * Email: <neilb@suse.de>
64c4757e
NB
23 */
24
9a9dab36 25#include "mdadm.h"
64c4757e 26#include "md_p.h"
edd8d13c 27#include <sys/socket.h>
64c4757e 28#include <sys/utsname.h>
9fe32043 29#include <sys/wait.h>
edd8d13c 30#include <sys/un.h>
98c6faba 31#include <ctype.h>
a322f70c 32#include <dirent.h>
a931db9e 33#include <signal.h>
0a816ef9
NB
34
35/*
36 * following taken from linux/blkpg.h because they aren't
37 * anywhere else and it isn't safe to #include linux/ * stuff.
38 */
39
40#define BLKPG _IO(0x12,105)
41
42/* The argument structure */
43struct blkpg_ioctl_arg {
44 int op;
45 int flags;
46 int datalen;
47 void *data;
48};
49
50/* The subfunctions (for the op field) */
51#define BLKPG_ADD_PARTITION 1
52#define BLKPG_DEL_PARTITION 2
53
54/* Sizes of name fields. Unused at present. */
55#define BLKPG_DEVNAMELTH 64
56#define BLKPG_VOLNAMELTH 64
57
58/* The data structure for ADD_PARTITION and DEL_PARTITION */
59struct blkpg_partition {
60 long long start; /* starting offset in bytes */
61 long long length; /* length in bytes */
62 int pno; /* partition number */
63 char devname[BLKPG_DEVNAMELTH]; /* partition name, like sda5 or c0d1p2,
64 to be used in kernel messages */
65 char volname[BLKPG_VOLNAMELTH]; /* volume label */
66};
64c4757e 67
034b203a
TM
68/* partition table structures so we can check metadata position
69 * against the end of the last partition.
70 * Only handle MBR ant GPT partition tables.
71 */
72struct MBR_part_record {
73 __u8 bootable;
74 __u8 first_head;
75 __u8 first_sector;
76 __u8 first_cyl;
77 __u8 part_type;
78 __u8 last_head;
79 __u8 last_sector;
80 __u8 last_cyl;
81 __u32 first_sect_lba;
82 __u32 blocks_num;
83};
84
85struct GPT_part_entry {
86 unsigned char type_guid[16];
87 unsigned char partition_guid[16];
88 unsigned char starting_lba[8];
89 unsigned char ending_lba[8];
90 unsigned char attr_bits[8];
91 unsigned char name[72];
92};
93
94/* MBR/GPT magic numbers */
95#define MBR_SIGNATURE_MAGIC __cpu_to_le16(0xAA55)
96#define GPT_SIGNATURE_MAGIC __cpu_to_le64(0x5452415020494645ULL)
97
98#define MBR_SIGNATURE_OFFSET 510
99#define MBR_PARTITION_TABLE_OFFSET 446
100#define MBR_PARTITIONS 4
101#define MBR_GPT_PARTITION_TYPE 0xEE
102#define GPT_ALL_PARTITIONS_OFFSET 80
103#define GPT_ENTRY_SIZE_OFFSET 84
104
64c4757e
NB
105/*
106 * Parse a 128 bit uuid in 4 integers
107 * format is 32 hexx nibbles with options :.<space> separator
108 * If not exactly 32 hex digits are found, return 0
109 * else return 1
110 */
111int parse_uuid(char *str, int uuid[4])
112{
aba69144
NB
113 int hit = 0; /* number of Hex digIT */
114 int i;
115 char c;
116 for (i=0; i<4; i++) uuid[i]=0;
117
118 while ((c= *str++)) {
119 int n;
120 if (c>='0' && c<='9')
121 n = c-'0';
122 else if (c>='a' && c <= 'f')
123 n = 10 + c - 'a';
124 else if (c>='A' && c <= 'F')
125 n = 10 + c - 'A';
126 else if (strchr(":. -", c))
127 continue;
128 else return 0;
129
130 if (hit<32) {
131 uuid[hit/8] <<= 4;
132 uuid[hit/8] += n;
133 }
134 hit++;
82b27616 135 }
aba69144
NB
136 if (hit == 32)
137 return 1;
138 return 0;
64c4757e
NB
139}
140
141
142/*
143 * Get the md version number.
144 * We use the RAID_VERSION ioctl if it is supported
145 * If not, but we have a block device with major '9', we assume
146 * 0.36.0
147 *
148 * Return version number as 24 but number - assume version parts
149 * always < 255
150 */
151
152int md_get_version(int fd)
153{
154 struct stat stb;
155 mdu_version_t vers;
156
157 if (fstat(fd, &stb)<0)
158 return -1;
159 if ((S_IFMT&stb.st_mode) != S_IFBLK)
160 return -1;
161
162 if (ioctl(fd, RAID_VERSION, &vers) == 0)
682c7051 163 return (vers.major*10000) + (vers.minor*100) + vers.patchlevel;
5787fa49
NB
164 if (errno == EACCES)
165 return -1;
0df46c2a 166 if (major(stb.st_rdev) == MD_MAJOR)
682c7051 167 return (3600);
64c4757e
NB
168 return -1;
169}
170
64c4757e
NB
171int get_linux_version()
172{
173 struct utsname name;
98c6faba 174 char *cp;
64c4757e
NB
175 int a,b,c;
176 if (uname(&name) <0)
177 return -1;
178
98c6faba
NB
179 cp = name.release;
180 a = strtoul(cp, &cp, 10);
181 if (*cp != '.') return -1;
182 b = strtoul(cp+1, &cp, 10);
183 if (*cp != '.') return -1;
184 c = strtoul(cp+1, NULL, 10);
185
682c7051 186 return (a*1000000)+(b*1000)+c;
64c4757e
NB
187}
188
eb3929a4 189#ifndef MDASSEMBLE
84e11361
N
190long long parse_size(char *size)
191{
192 /* parse 'size' which should be a number optionally
193 * followed by 'K', 'M', or 'G'.
194 * Without a suffix, K is assumed.
195 * Number returned is in sectors (half-K)
196 */
197 char *c;
198 long long s = strtoll(size, &c, 10);
199 if (s > 0) {
200 switch (*c) {
201 case 'K':
202 c++;
203 default:
204 s *= 2;
205 break;
206 case 'M':
207 c++;
208 s *= 1024 * 2;
209 break;
210 case 'G':
211 c++;
212 s *= 1024 * 1024 * 2;
213 break;
214 }
215 }
216 if (*c)
217 s = 0;
218 return s;
219}
220
4a06e2c2
N
221int parse_layout_10(char *layout)
222{
223 int copies, rv;
224 char *cp;
225 /* Parse the layout string for raid10 */
226 /* 'f', 'o' or 'n' followed by a number <= raid_disks */
227 if ((layout[0] != 'n' && layout[0] != 'f' && layout[0] != 'o') ||
228 (copies = strtoul(layout+1, &cp, 10)) < 1 ||
229 copies > 200 ||
230 *cp)
231 return -1;
232 if (layout[0] == 'n')
233 rv = 256 + copies;
234 else if (layout[0] == 'o')
235 rv = 0x10000 + (copies<<8) + 1;
236 else
237 rv = 1 + (copies<<8);
238 return rv;
239}
240
241int parse_layout_faulty(char *layout)
242{
243 /* Parse the layout string for 'faulty' */
244 int ln = strcspn(layout, "0123456789");
245 char *m = strdup(layout);
246 int mode;
247 m[ln] = 0;
248 mode = map_name(faultylayout, m);
249 if (mode == UnSet)
250 return -1;
251
252 return mode | (atoi(layout+ln)<< ModeShift);
253}
eb3929a4 254#endif
4a06e2c2 255
0430ed48
NB
256void remove_partitions(int fd)
257{
258 /* remove partitions from this block devices.
259 * This is used for components added to an array
260 */
261#ifdef BLKPG_DEL_PARTITION
262 struct blkpg_ioctl_arg a;
263 struct blkpg_partition p;
264
265 a.op = BLKPG_DEL_PARTITION;
266 a.data = (void*)&p;
267 a.datalen = sizeof(p);
268 a.flags = 0;
269 memset(a.data, 0, a.datalen);
270 for (p.pno=0; p.pno < 16; p.pno++)
271 ioctl(fd, BLKPG, &a);
272#endif
273}
274
583315d9 275int enough(int level, int raid_disks, int layout, int clean,
265e0f17 276 char *avail, int avail_disks)
64c4757e 277{
265e0f17 278 int copies, first;
64c4757e 279 switch (level) {
265e0f17
NB
280 case 10:
281 /* This is the tricky one - we need to check
282 * which actual disks are present.
283 */
702b557b 284 copies = (layout&255)* ((layout>>8) & 255);
265e0f17
NB
285 first=0;
286 do {
287 /* there must be one of the 'copies' form 'first' */
288 int n = copies;
289 int cnt=0;
290 while (n--) {
291 if (avail[first])
292 cnt++;
293 first = (first+1) % raid_disks;
294 }
295 if (cnt == 0)
296 return 0;
297
298 } while (first != 0);
299 return 1;
e5329c37 300
df0d4ea0 301 case LEVEL_MULTIPATH:
e0d19036 302 return avail_disks>= 1;
df0d4ea0 303 case LEVEL_LINEAR:
64c4757e
NB
304 case 0:
305 return avail_disks == raid_disks;
306 case 1:
307 return avail_disks >= 1;
308 case 4:
309 case 5:
583315d9
NB
310 if (clean)
311 return avail_disks >= raid_disks-1;
312 else
313 return avail_disks >= raid_disks;
98c6faba 314 case 6:
583315d9
NB
315 if (clean)
316 return avail_disks >= raid_disks-2;
317 else
318 return avail_disks >= raid_disks;
64c4757e
NB
319 default:
320 return 0;
321 }
322}
323
36ba7d48 324const int uuid_match_any[4] = { ~0, ~0, ~0, ~0 };
f277ce36 325int same_uuid(int a[4], int b[4], int swapuuid)
64c4757e 326{
36ba7d48
DW
327 if (memcmp(a, uuid_match_any, sizeof(int[4])) == 0 ||
328 memcmp(b, uuid_match_any, sizeof(int[4])) == 0)
329 return 1;
330
f277ce36
NB
331 if (swapuuid) {
332 /* parse uuids are hostendian.
333 * uuid's from some superblocks are big-ending
aba69144 334 * if there is a difference, we need to swap..
f277ce36
NB
335 */
336 unsigned char *ac = (unsigned char *)a;
337 unsigned char *bc = (unsigned char *)b;
338 int i;
339 for (i=0; i<16; i+= 4) {
340 if (ac[i+0] != bc[i+3] ||
341 ac[i+1] != bc[i+2] ||
342 ac[i+2] != bc[i+1] ||
343 ac[i+3] != bc[i+0])
344 return 0;
345 }
346 return 1;
347 } else {
348 if (a[0]==b[0] &&
349 a[1]==b[1] &&
350 a[2]==b[2] &&
351 a[3]==b[3])
352 return 1;
353 return 0;
354 }
64c4757e 355}
350f29f9
NB
356void copy_uuid(void *a, int b[4], int swapuuid)
357{
358 if (swapuuid) {
359 /* parse uuids are hostendian.
360 * uuid's from some superblocks are big-ending
361 * if there is a difference, we need to swap..
362 */
363 unsigned char *ac = (unsigned char *)a;
364 unsigned char *bc = (unsigned char *)b;
365 int i;
366 for (i=0; i<16; i+= 4) {
367 ac[i+0] = bc[i+3];
368 ac[i+1] = bc[i+2];
369 ac[i+2] = bc[i+1];
370 ac[i+3] = bc[i+0];
371 }
372 } else
373 memcpy(a, b, 16);
374}
64c4757e 375
aae5a112 376char *__fname_from_uuid(int id[4], int swap, char *buf, char sep)
d7288ddc 377{
9968e376 378 int i, j;
d7288ddc
N
379 char uuid[16];
380 char *c = buf;
381 strcpy(c, "UUID-");
382 c += strlen(c);
aae5a112 383 copy_uuid(uuid, id, swap);
9968e376 384 for (i = 0; i < 4; i++) {
9968e376 385 if (i)
ff54de6e 386 *c++ = sep;
9968e376
DW
387 for (j = 3; j >= 0; j--) {
388 sprintf(c,"%02x", (unsigned char) uuid[j+4*i]);
389 c+= 2;
390 }
d7288ddc
N
391 }
392 return buf;
aae5a112
DW
393
394}
395
396char *fname_from_uuid(struct supertype *st, struct mdinfo *info, char *buf, char sep)
397{
398 return __fname_from_uuid(info->uuid, st->ss->swapuuid, buf, sep);
d7288ddc
N
399}
400
435d4ebb 401#ifndef MDASSEMBLE
682c7051
NB
402int check_ext2(int fd, char *name)
403{
404 /*
405 * Check for an ext2fs file system.
406 * Superblock is always 1K at 1K offset
407 *
408 * s_magic is le16 at 56 == 0xEF53
409 * report mtime - le32 at 44
410 * blocks - le32 at 4
411 * logblksize - le32 at 24
412 */
413 unsigned char sb[1024];
414 time_t mtime;
415 int size, bsize;
416 if (lseek(fd, 1024,0)!= 1024)
417 return 0;
418 if (read(fd, sb, 1024)!= 1024)
419 return 0;
420 if (sb[56] != 0x53 || sb[57] != 0xef)
421 return 0;
422
423 mtime = sb[44]|(sb[45]|(sb[46]|sb[47]<<8)<<8)<<8;
424 bsize = sb[24]|(sb[25]|(sb[26]|sb[27]<<8)<<8)<<8;
425 size = sb[4]|(sb[5]|(sb[6]|sb[7]<<8)<<8)<<8;
426 fprintf(stderr, Name ": %s appears to contain an ext2fs file system\n",
427 name);
428 fprintf(stderr," size=%dK mtime=%s",
429 size*(1<<bsize), ctime(&mtime));
430 return 1;
431}
432
433int check_reiser(int fd, char *name)
434{
435 /*
436 * superblock is at 64K
437 * size is 1024;
438 * Magic string "ReIsErFs" or "ReIsEr2Fs" at 52
439 *
440 */
441 unsigned char sb[1024];
881990a2 442 unsigned long size;
682c7051
NB
443 if (lseek(fd, 64*1024, 0) != 64*1024)
444 return 0;
445 if (read(fd, sb, 1024) != 1024)
446 return 0;
a46f4061
NB
447 if (strncmp((char*)sb+52, "ReIsErFs",8)!=0 &&
448 strncmp((char*)sb+52, "ReIsEr2Fs",9)!=0)
682c7051
NB
449 return 0;
450 fprintf(stderr, Name ": %s appears to contain a reiserfs file system\n",name);
451 size = sb[0]|(sb[1]|(sb[2]|sb[3]<<8)<<8)<<8;
881990a2 452 fprintf(stderr, " size = %luK\n", size*4);
aba69144 453
682c7051
NB
454 return 1;
455}
456
457int check_raid(int fd, char *name)
458{
4b1ac34b 459 struct mdinfo info;
682c7051 460 time_t crtime;
d078d77c 461 char *level;
82d9eba6 462 struct supertype *st = guess_super(fd);
f9ce90ba 463
82d9eba6 464 if (!st) return 0;
3da92f27 465 st->ss->load_super(st, fd, name);
82d9eba6
NB
466 /* Looks like a raid array .. */
467 fprintf(stderr, Name ": %s appears to be part of a raid array:\n",
468 name);
3da92f27
NB
469 st->ss->getinfo_super(st, &info);
470 st->ss->free_super(st);
82d9eba6 471 crtime = info.array.ctime;
d078d77c
NB
472 level = map_num(pers, info.array.level);
473 if (!level) level = "-unknown-";
474 fprintf(stderr, " level=%s devices=%d ctime=%s",
475 level, info.array.raid_disks, ctime(&crtime));
82d9eba6 476 return 1;
682c7051
NB
477}
478
682c7051
NB
479int ask(char *mesg)
480{
481 char *add = "";
482 int i;
483 for (i=0; i<5; i++) {
484 char buf[100];
485 fprintf(stderr, "%s%s", mesg, add);
486 fflush(stderr);
487 if (fgets(buf, 100, stdin)==NULL)
488 return 0;
489 if (buf[0]=='y' || buf[0]=='Y')
490 return 1;
491 if (buf[0]=='n' || buf[0]=='N')
492 return 0;
493 add = "(y/n) ";
494 }
495 fprintf(stderr, Name ": assuming 'no'\n");
496 return 0;
497}
435d4ebb 498#endif /* MDASSEMBLE */
682c7051
NB
499
500char *map_num(mapping_t *map, int num)
501{
502 while (map->name) {
503 if (map->num == num)
504 return map->name;
505 map++;
506 }
507 return NULL;
508}
509
510int map_name(mapping_t *map, char *name)
511{
512 while (map->name) {
513 if (strcmp(map->name, name)==0)
514 return map->num;
515 map++;
516 }
98c6faba 517 return UnSet;
682c7051 518}
82b27616 519
e5329c37 520
8d80900b 521int is_standard(char *dev, int *nump)
e5329c37
NB
522{
523 /* tests if dev is a "standard" md dev name.
524 * i.e if the last component is "/dNN" or "/mdNN",
aba69144 525 * where NN is a string of digits
598f0d58
NB
526 * Returns 1 if a partitionable standard,
527 * -1 if non-partitonable,
528 * 0 if not a standard name.
e5329c37 529 */
8d80900b
NB
530 char *d = strrchr(dev, '/');
531 int type=0;
532 int num;
533 if (!d)
e5329c37 534 return 0;
8d80900b
NB
535 if (strncmp(d, "/d",2)==0)
536 d += 2, type=1; /* /dev/md/dN{pM} */
537 else if (strncmp(d, "/md_d", 5)==0)
2b4ca8f0 538 d += 5, type=1; /* /dev/md_dN{pM} */
8d80900b
NB
539 else if (strncmp(d, "/md", 3)==0)
540 d += 3, type=-1; /* /dev/mdN */
541 else if (d-dev > 3 && strncmp(d-2, "md/", 3)==0)
5a6d1148 542 d += 1, type=-1; /* /dev/md/N */
e5329c37
NB
543 else
544 return 0;
8d80900b 545 if (!*d)
e5329c37 546 return 0;
8d80900b
NB
547 num = atoi(d);
548 while (isdigit(*d))
549 d++;
550 if (*d)
e5329c37 551 return 0;
8d80900b
NB
552 if (nump) *nump = num;
553
554 return type;
e5329c37
NB
555}
556
557
82b27616
NB
558/*
559 * convert a major/minor pair for a block device into a name in /dev, if possible.
560 * On the first call, walk /dev collecting name.
561 * Put them in a simple linked listfor now.
562 */
563struct devmap {
564 int major, minor;
565 char *name;
566 struct devmap *next;
567} *devlist = NULL;
568int devlist_ready = 0;
569
82b27616
NB
570int add_dev(const char *name, const struct stat *stb, int flag, struct FTW *s)
571{
bed256c2 572 struct stat st;
bf68e9d9 573
bed256c2 574 if (S_ISLNK(stb->st_mode)) {
bf68e9d9
DW
575 if (stat(name, &st) != 0)
576 return 0;
bed256c2 577 stb = &st;
82b27616 578 }
bed256c2
NB
579
580 if ((stb->st_mode&S_IFMT)== S_IFBLK) {
581 char *n = strdup(name);
582 struct devmap *dm = malloc(sizeof(*dm));
583 if (strncmp(n, "/dev/./", 7)==0)
584 strcpy(n+4, name+6);
585 if (dm) {
586 dm->major = major(stb->st_rdev);
587 dm->minor = minor(stb->st_rdev);
588 dm->name = n;
589 dm->next = devlist;
590 devlist = dm;
591 }
592 }
593 return 0;
82b27616
NB
594}
595
45e878bb
NB
596#ifndef HAVE_NFTW
597#ifdef HAVE_FTW
598int add_dev_1(const char *name, const struct stat *stb, int flag)
599{
600 return add_dev(name, stb, flag, NULL);
601}
602int nftw(const char *path, int (*han)(const char *name, const struct stat *stb, int flag, struct FTW *s), int nopenfd, int flags)
603{
604 return ftw(path, add_dev_1, nopenfd);
605}
606#else
45e878bb
NB
607int nftw(const char *path, int (*han)(const char *name, const struct stat *stb, int flag, struct FTW *s), int nopenfd, int flags)
608{
609 return 0;
610}
611#endif /* HAVE_FTW */
612#endif /* HAVE_NFTW */
613
dd0781e5
NB
614/*
615 * Find a block device with the right major/minor number.
b79713f8 616 * If we find multiple names, choose the shortest.
70ef16db 617 * If we find a name in /dev/md/, we prefer that.
b79713f8 618 * This applies only to names for MD devices.
dd0781e5 619 */
16c6fa80 620char *map_dev(int major, int minor, int create)
82b27616 621{
dd0781e5 622 struct devmap *p;
70ef16db 623 char *regular = NULL, *preferred=NULL;
e7bb5d23 624 int did_check = 0;
eed35d66 625
e81cdd9f 626 if (major == 0 && minor == 0)
eed35d66 627 return NULL;
e81cdd9f 628
e7bb5d23 629 retry:
dd0781e5 630 if (!devlist_ready) {
0a416ec3
NB
631 char *dev = "/dev";
632 struct stat stb;
eed35d66
NB
633 while(devlist) {
634 struct devmap *d = devlist;
635 devlist = d->next;
636 free(d->name);
637 free(d);
638 }
0a416ec3
NB
639 if (lstat(dev, &stb)==0 &&
640 S_ISLNK(stb.st_mode))
641 dev = "/dev/.";
642 nftw(dev, add_dev, 10, FTW_PHYS);
dd0781e5 643 devlist_ready=1;
e7bb5d23 644 did_check = 1;
dd0781e5 645 }
82b27616 646
dd0781e5
NB
647 for (p=devlist; p; p=p->next)
648 if (p->major == major &&
649 p->minor == minor) {
70ef16db
N
650 if (strncmp(p->name, "/dev/md/",8) == 0) {
651 if (preferred == NULL ||
652 strlen(p->name) < strlen(preferred))
653 preferred = p->name;
b79713f8 654 } else {
70ef16db
N
655 if (regular == NULL ||
656 strlen(p->name) < strlen(regular))
657 regular = p->name;
b79713f8 658 }
dd0781e5 659 }
70ef16db 660 if (!regular && !preferred && !did_check) {
e7bb5d23
NB
661 devlist_ready = 0;
662 goto retry;
663 }
70ef16db 664 if (create && !regular && !preferred) {
16c6fa80 665 static char buf[30];
382245c3 666 snprintf(buf, sizeof(buf), "%d:%d", major, minor);
70ef16db 667 regular = buf;
16c6fa80
NB
668 }
669
70ef16db 670 return preferred ? preferred : regular;
82b27616
NB
671}
672
4b1ac34b 673unsigned long calc_csum(void *super, int bytes)
82b27616 674{
56eb10c0 675 unsigned long long newcsum = 0;
82b27616 676 int i;
4b1ac34b
NB
677 unsigned int csum;
678 unsigned int *superc = (unsigned int*) super;
82b27616 679
4b1ac34b 680 for(i=0; i<bytes/4; i++)
82b27616
NB
681 newcsum+= superc[i];
682 csum = (newcsum& 0xffffffff) + (newcsum>>32);
570c0542 683#ifdef __alpha__
aba69144 684/* The in-kernel checksum calculation is always 16bit on
570c0542
NB
685 * the alpha, though it is 32 bit on i386...
686 * I wonder what it is elsewhere... (it uses and API in
687 * a way that it shouldn't).
688 */
689 csum = (csum & 0xffff) + (csum >> 16);
690 csum = (csum & 0xffff) + (csum >> 16);
691#endif
82b27616
NB
692 return csum;
693}
cd29a5c8 694
435d4ebb 695#ifndef MDASSEMBLE
56eb10c0 696char *human_size(long long bytes)
cd29a5c8
NB
697{
698 static char buf[30];
d5d3721e
NB
699
700 /* We convert bytes to either centi-M{ega,ibi}bytes or
701 * centi-G{igi,ibi}bytes, with appropriate rounding,
702 * and then print 1/100th of those as a decimal.
703 * We allow upto 2048Megabytes before converting to
704 * gigabytes, as that shows more precision and isn't
705 * too large a number.
706 * Terrabytes are not yet handled.
707 */
cd29a5c8 708
56eb10c0 709 if (bytes < 5000*1024)
cd29a5c8 710 buf[0]=0;
d5d3721e
NB
711 else if (bytes < 2*1024LL*1024LL*1024LL) {
712 long cMiB = (bytes / ( (1LL<<20) / 200LL ) +1) /2;
713 long cMB = (bytes / ( 1000000LL / 200LL ) +1) /2;
8f23b0b3 714 snprintf(buf, sizeof(buf), " (%ld.%02ld MiB %ld.%02ld MB)",
d5d3721e
NB
715 cMiB/100 , cMiB % 100,
716 cMB/100, cMB % 100);
717 } else {
718 long cGiB = (bytes / ( (1LL<<30) / 200LL ) +1) /2;
719 long cGB = (bytes / (1000000000LL/200LL ) +1) /2;
8f23b0b3 720 snprintf(buf, sizeof(buf), " (%ld.%02ld GiB %ld.%02ld GB)",
d5d3721e
NB
721 cGiB/100 , cGiB % 100,
722 cGB/100, cGB % 100);
723 }
cd29a5c8
NB
724 return buf;
725}
e0d19036
NB
726
727char *human_size_brief(long long bytes)
728{
729 static char buf[30];
e0d19036
NB
730
731 if (bytes < 5000*1024)
8f23b0b3 732 snprintf(buf, sizeof(buf), "%ld.%02ldKiB",
bd526cee 733 (long)(bytes>>10), (long)(((bytes&1023)*100+512)/1024)
e0d19036
NB
734 );
735 else if (bytes < 2*1024LL*1024LL*1024LL)
8f23b0b3 736 snprintf(buf, sizeof(buf), "%ld.%02ldMiB",
e0d19036 737 (long)(bytes>>20),
bd526cee 738 (long)((bytes&0xfffff)+0x100000/200)/(0x100000/100)
e0d19036
NB
739 );
740 else
8f23b0b3 741 snprintf(buf, sizeof(buf), "%ld.%02ldGiB",
e0d19036 742 (long)(bytes>>30),
bd526cee 743 (long)(((bytes>>10)&0xfffff)+0x100000/200)/(0x100000/100)
e0d19036
NB
744 );
745 return buf;
746}
e4965ef8
N
747
748void print_r10_layout(int layout)
749{
750 int near = layout & 255;
751 int far = (layout >> 8) & 255;
752 int offset = (layout&0x10000);
753 char *sep = "";
754
755 if (near != 1) {
756 printf("%s near=%d", sep, near);
757 sep = ",";
758 }
759 if (far != 1)
760 printf("%s %s=%d", sep, offset?"offset":"far", far);
761 if (near*far == 1)
762 printf("NO REDUNDANCY");
763}
435d4ebb 764#endif
e0d19036 765
5f8097be
NB
766unsigned long long calc_array_size(int level, int raid_disks, int layout,
767 int chunksize, unsigned long long devsize)
768{
769 int data_disks = 0;
770 switch (level) {
771 case 0: data_disks = raid_disks; break;
772 case 1: data_disks = 1; break;
773 case 4:
774 case 5: data_disks = raid_disks - 1; break;
775 case 6: data_disks = raid_disks - 2; break;
776 case 10: data_disks = raid_disks / (layout & 255) / ((layout>>8)&255);
777 break;
778 }
779 devsize &= ~(unsigned long long)((chunksize>>9)-1);
780 return data_disks * devsize;
781}
782
dd0781e5 783int get_mdp_major(void)
98c6faba 784{
dd0781e5
NB
785static int mdp_major = -1;
786 FILE *fl;
98c6faba
NB
787 char *w;
788 int have_block = 0;
789 int have_devices = 0;
790 int last_num = -1;
dd0781e5
NB
791
792 if (mdp_major != -1)
793 return mdp_major;
794 fl = fopen("/proc/devices", "r");
98c6faba 795 if (!fl)
dd0781e5 796 return -1;
98c6faba
NB
797 while ((w = conf_word(fl, 1))) {
798 if (have_block && strcmp(w, "devices:")==0)
799 have_devices = 1;
800 have_block = (strcmp(w, "Block")==0);
801 if (isdigit(w[0]))
802 last_num = atoi(w);
803 if (have_devices && strcmp(w, "mdp")==0)
804 mdp_major = last_num;
805 free(w);
806 }
807 fclose(fl);
dd0781e5 808 return mdp_major;
98c6faba
NB
809}
810
0e600426 811#if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO)
e0d19036
NB
812char *get_md_name(int dev)
813{
814 /* find /dev/md%d or /dev/md/%d or make a device /dev/.tmp.md%d */
98c6faba 815 /* if dev < 0, want /dev/md/d%d or find mdp in /proc/devices ... */
e0d19036
NB
816 static char devname[50];
817 struct stat stb;
98c6faba 818 dev_t rdev;
dd0781e5 819 char *dn;
98c6faba
NB
820
821 if (dev < 0) {
dd0781e5
NB
822 int mdp = get_mdp_major();
823 if (mdp < 0) return NULL;
0df46c2a 824 rdev = makedev(mdp, (-1-dev)<<6);
8f23b0b3 825 snprintf(devname, sizeof(devname), "/dev/md/d%d", -1-dev);
98c6faba
NB
826 if (stat(devname, &stb) == 0
827 && (S_IFMT&stb.st_mode) == S_IFBLK
828 && (stb.st_rdev == rdev))
829 return devname;
830 } else {
0df46c2a 831 rdev = makedev(MD_MAJOR, dev);
8f23b0b3 832 snprintf(devname, sizeof(devname), "/dev/md%d", dev);
98c6faba
NB
833 if (stat(devname, &stb) == 0
834 && (S_IFMT&stb.st_mode) == S_IFBLK
835 && (stb.st_rdev == rdev))
836 return devname;
837
8f23b0b3 838 snprintf(devname, sizeof(devname), "/dev/md/%d", dev);
98c6faba
NB
839 if (stat(devname, &stb) == 0
840 && (S_IFMT&stb.st_mode) == S_IFBLK
841 && (stb.st_rdev == rdev))
842 return devname;
843 }
16c6fa80 844 dn = map_dev(major(rdev), minor(rdev), 0);
dd0781e5
NB
845 if (dn)
846 return dn;
8f23b0b3 847 snprintf(devname, sizeof(devname), "/dev/.tmp.md%d", dev);
e0d19036 848 if (mknod(devname, S_IFBLK | 0600, rdev) == -1)
dd0781e5
NB
849 if (errno != EEXIST)
850 return NULL;
e0d19036
NB
851
852 if (stat(devname, &stb) == 0
853 && (S_IFMT&stb.st_mode) == S_IFBLK
854 && (stb.st_rdev == rdev))
855 return devname;
856 unlink(devname);
857 return NULL;
858}
859
860void put_md_name(char *name)
861{
862 if (strncmp(name, "/dev/.tmp.md", 12)==0)
863 unlink(name);
864}
ea24acd0 865
ea24acd0
NB
866int find_free_devnum(int use_partitions)
867{
868 int devnum;
869 for (devnum = 127; devnum != 128;
a56fb7ec 870 devnum = devnum ? devnum-1 : (1<<20)-1) {
ea24acd0
NB
871 char *dn;
872 int _devnum;
873
874 _devnum = use_partitions ? (-1-devnum) : devnum;
875 if (mddev_busy(_devnum))
876 continue;
877 /* make sure it is new to /dev too, at least as a
878 * non-standard */
879 dn = map_dev(dev2major(_devnum), dev2minor(_devnum), 0);
880 if (dn && ! is_standard(dn, NULL))
881 continue;
882 break;
883 }
884 if (devnum == 128)
885 return NoMdDev;
886 return use_partitions ? (-1-devnum) : devnum;
887}
435d4ebb 888#endif /* !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO) */
f9ce90ba 889
8b0dabea
NB
890int dev_open(char *dev, int flags)
891{
892 /* like 'open', but if 'dev' matches %d:%d, create a temp
893 * block device and open that
894 */
895 char *e;
896 int fd = -1;
897 char devname[32];
e81cdd9f 898 int major;
8b0dabea 899 int minor;
e81cdd9f
NB
900
901 if (!dev) return -1;
902
903 major = strtoul(dev, &e, 0);
8b0dabea
NB
904 if (e > dev && *e == ':' && e[1] &&
905 (minor = strtoul(e+1, &e, 0)) >= 0 &&
906 *e == 0) {
8c210183
NB
907 snprintf(devname, sizeof(devname), "/dev/.tmp.md.%d:%d:%d",
908 (int)getpid(), major, minor);
8b0dabea 909 if (mknod(devname, S_IFBLK|0600, makedev(major, minor))==0) {
6416d527 910 fd = open(devname, flags|O_DIRECT);
8b0dabea
NB
911 unlink(devname);
912 }
913 } else
6416d527 914 fd = open(dev, flags|O_DIRECT);
8b0dabea
NB
915 return fd;
916}
f9ce90ba 917
e8a70c89
N
918int open_dev(int devnum)
919{
920 char buf[20];
921
922 sprintf(buf, "%d:%d", dev2major(devnum), dev2minor(devnum));
923 return dev_open(buf, O_RDWR);
924}
925
a931db9e
NB
926int open_dev_excl(int devnum)
927{
928 char buf[20];
929 int i;
930
931 sprintf(buf, "%d:%d", dev2major(devnum), dev2minor(devnum));
932 for (i=0 ; i<25 ; i++) {
933 int fd = dev_open(buf, O_RDWR|O_EXCL);
934 if (fd >= 0)
935 return fd;
936 if (errno != EBUSY)
937 return fd;
938 usleep(200000);
939 }
940 return -1;
941}
942
9008ed1c
N
943int same_dev(char *one, char *two)
944{
945 struct stat st1, st2;
946 if (stat(one, &st1) != 0)
947 return 0;
948 if (stat(two, &st2) != 0)
949 return 0;
950 if ((st1.st_mode & S_IFMT) != S_IFBLK)
951 return 0;
952 if ((st2.st_mode & S_IFMT) != S_IFBLK)
953 return 0;
954 return st1.st_rdev == st2.st_rdev;
955}
956
a7c6e3fb 957void wait_for(char *dev, int fd)
a714580e
N
958{
959 int i;
a7c6e3fb
N
960 struct stat stb_want;
961
962 if (fstat(fd, &stb_want) != 0 ||
963 (stb_want.st_mode & S_IFMT) != S_IFBLK)
964 return;
a714580e
N
965
966 for (i=0 ; i<25 ; i++) {
967 struct stat stb;
a7c6e3fb
N
968 if (stat(dev, &stb) == 0 &&
969 (stb.st_mode & S_IFMT) == S_IFBLK &&
970 (stb.st_rdev == stb_want.st_rdev))
a714580e
N
971 return;
972 usleep(200000);
973 }
436305c6
DW
974 if (i == 25)
975 dprintf("%s: timeout waiting for %s\n", __func__, dev);
a714580e
N
976}
977
cdddbdbc 978struct superswitch *superlist[] = { &super0, &super1, &super_ddf, &super_imsm, NULL };
f9ce90ba 979
ea24acd0 980#if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO)
f7dd881f 981
1686dc25 982struct supertype *super_by_fd(int fd)
f9ce90ba 983{
1686dc25
NB
984 mdu_array_info_t array;
985 int vers;
986 int minor;
987 struct supertype *st = NULL;
7e0f6979 988 struct mdinfo *sra;
142cb9e1 989 char *verstr;
1686dc25
NB
990 char version[20];
991 int i;
f7e7067b 992 char *subarray = NULL;
1686dc25
NB
993
994 sra = sysfs_read(fd, 0, GET_VERSION);
995
996 if (sra) {
7e0f6979
NB
997 vers = sra->array.major_version;
998 minor = sra->array.minor_version;
142cb9e1 999 verstr = sra->text_version;
1686dc25
NB
1000 } else {
1001 if (ioctl(fd, GET_ARRAY_INFO, &array))
1002 array.major_version = array.minor_version = 0;
1003 vers = array.major_version;
1004 minor = array.minor_version;
142cb9e1 1005 verstr = "";
6fbba4c9 1006 }
82d9eba6 1007
1686dc25
NB
1008 if (vers != -1) {
1009 sprintf(version, "%d.%d", vers, minor);
1010 verstr = version;
6fbba4c9 1011 }
3c558363 1012 if (minor == -2 && is_subarray(verstr)) {
f7e7067b
NB
1013 char *dev = verstr+1;
1014 subarray = strchr(dev, '/');
1015 int devnum;
1016 if (subarray)
1017 *subarray++ = '\0';
77472ff8 1018 devnum = devname2devnum(dev);
f7e7067b
NB
1019 subarray = strdup(subarray);
1020 if (sra)
1021 sysfs_free(sra);
1022 sra = sysfs_read(-1, devnum, GET_VERSION);
603f24a0
N
1023 if (sra && sra->text_version[0])
1024 verstr = sra->text_version;
1025 else
1026 verstr = "-no-metadata-";
f7e7067b
NB
1027 }
1028
1029 for (i = 0; st == NULL && superlist[i] ; i++)
1030 st = superlist[i]->match_metadata_desc(verstr);
1686dc25
NB
1031
1032 if (sra)
1033 sysfs_free(sra);
f7e7067b 1034 if (st) {
3b0896f8 1035 st->sb = NULL;
f7e7067b
NB
1036 if (subarray) {
1037 strncpy(st->subarray, subarray, 32);
1038 st->subarray[31] = 0;
1039 free(subarray);
1040 } else
1041 st->subarray[0] = 0;
1042 }
82d9eba6 1043 return st;
f9ce90ba 1044}
ea24acd0
NB
1045#endif /* !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO) */
1046
f9ce90ba 1047
159c3a1a 1048struct supertype *dup_super(struct supertype *orig)
3da92f27 1049{
159c3a1a 1050 struct supertype *st;
1686dc25 1051
d2ca6449
NB
1052 if (!orig)
1053 return orig;
159c3a1a 1054 st = malloc(sizeof(*st));
3da92f27
NB
1055 if (!st)
1056 return st;
ef609477 1057 memset(st, 0, sizeof(*st));
159c3a1a
NB
1058 st->ss = orig->ss;
1059 st->max_devs = orig->max_devs;
1060 st->minor_version = orig->minor_version;
f7e7067b 1061 strcpy(st->subarray, orig->subarray);
159c3a1a
NB
1062 st->sb = NULL;
1063 st->info = NULL;
1064 return st;
3da92f27
NB
1065}
1066
82d9eba6 1067struct supertype *guess_super(int fd)
f9ce90ba
NB
1068{
1069 /* try each load_super to find the best match,
1070 * and return the best superswitch
1071 */
82d9eba6
NB
1072 struct superswitch *ss;
1073 struct supertype *st;
570c0542
NB
1074 unsigned long besttime = 0;
1075 int bestsuper = -1;
f9ce90ba
NB
1076 int i;
1077
82d9eba6 1078 st = malloc(sizeof(*st));
f9ce90ba
NB
1079 for (i=0 ; superlist[i]; i++) {
1080 int rv;
1081 ss = superlist[i];
ef609477 1082 memset(st, 0, sizeof(*st));
3da92f27 1083 rv = ss->load_super(st, fd, NULL);
570c0542
NB
1084 if (rv == 0) {
1085 struct mdinfo info;
3da92f27 1086 st->ss->getinfo_super(st, &info);
570c0542
NB
1087 if (bestsuper == -1 ||
1088 besttime < info.array.ctime) {
1089 bestsuper = i;
1090 besttime = info.array.ctime;
570c0542 1091 }
3da92f27 1092 ss->free_super(st);
570c0542
NB
1093 }
1094 }
1095 if (bestsuper != -1) {
1096 int rv;
ef609477 1097 memset(st, 0, sizeof(*st));
3da92f27 1098 rv = superlist[bestsuper]->load_super(st, fd, NULL);
f9ce90ba 1099 if (rv == 0) {
5e747af2 1100 superlist[bestsuper]->free_super(st);
82d9eba6 1101 return st;
f9ce90ba
NB
1102 }
1103 }
570c0542 1104 free(st);
f9ce90ba
NB
1105 return NULL;
1106}
fe6729fa 1107
beae1dfe
NB
1108/* Return size of device in bytes */
1109int get_dev_size(int fd, char *dname, unsigned long long *sizep)
1110{
1111 unsigned long long ldsize;
c2c9bb6f
NB
1112 struct stat st;
1113
1114 if (fstat(fd, &st) != -1 && S_ISREG(st.st_mode))
1115 ldsize = (unsigned long long)st.st_size;
1116 else
beae1dfe
NB
1117#ifdef BLKGETSIZE64
1118 if (ioctl(fd, BLKGETSIZE64, &ldsize) != 0)
1119#endif
1120 {
1121 unsigned long dsize;
1122 if (ioctl(fd, BLKGETSIZE, &dsize) == 0) {
1123 ldsize = dsize;
1124 ldsize <<= 9;
1125 } else {
1126 if (dname)
1127 fprintf(stderr, Name ": Cannot get size of %s: %s\b",
1128 dname, strerror(errno));
1129 return 0;
1130 }
1131 }
1132 *sizep = ldsize;
1133 return 1;
1134}
8fac0577 1135
034b203a
TM
1136
1137/* Sets endofpart parameter to the last block used by the last GPT partition on the device.
1138 * Returns: 1 if successful
1139 * -1 for unknown partition type
1140 * 0 for other errors
1141 */
1142static int get_gpt_last_partition_end(int fd, unsigned long long *endofpart)
1143{
1144 unsigned char buf[512];
1145 unsigned char empty_gpt_entry[16]= {0};
1146 struct GPT_part_entry *part;
1147 unsigned long long curr_part_end;
1148 unsigned all_partitions, entry_size;
1149 int part_nr;
1150
1151 *endofpart = 0;
1152
1153 /* read GPT header */
1154 lseek(fd, 512, SEEK_SET);
1155 if (read(fd, buf, 512) != 512)
1156 return 0;
1157
1158 /* get the number of partition entries and the entry size */
1159 all_partitions = __le32_to_cpu(buf[GPT_ALL_PARTITIONS_OFFSET]);
1160 entry_size = __le32_to_cpu(buf[GPT_ENTRY_SIZE_OFFSET]);
1161
1162 /* Check GPT signature*/
1163 if (*((__u64*)buf) != GPT_SIGNATURE_MAGIC)
1164 return -1;
1165
1166 /* sanity checks */
1167 if (all_partitions > 1024 ||
1168 entry_size > 512)
1169 return -1;
1170
1171 /* read first GPT partition entries */
1172 if (read(fd, buf, 512) != 512)
1173 return 0;
1174
1175 part = (struct GPT_part_entry*)buf;
1176
1177 for (part_nr=0; part_nr < all_partitions; part_nr++) {
1178 /* is this valid partition? */
1179 if (memcmp(part->type_guid, empty_gpt_entry, 16) != 0) {
1180 /* check the last lba for the current partition */
1181 curr_part_end = __le64_to_cpu(*(__u64*)part->ending_lba);
1182 if (curr_part_end > *endofpart)
1183 *endofpart = curr_part_end;
1184 }
1185
1186 part = (struct GPT_part_entry*)((unsigned char*)part + entry_size);
1187
1188 if ((unsigned char *)part >= buf + 512) {
1189 if (read(fd, buf, 512) != 512)
1190 return 0;
1191 part = (struct GPT_part_entry*)buf;
1192 }
1193 }
1194 return 1;
1195}
1196
1197/* Sets endofpart parameter to the last block used by the last partition on the device.
1198 * Returns: 1 if successful
1199 * -1 for unknown partition type
1200 * 0 for other errors
1201 */
1202static int get_last_partition_end(int fd, unsigned long long *endofpart)
1203{
1204 unsigned char boot_sect[512];
1205 struct MBR_part_record *part;
1206 unsigned long long curr_part_end;
1207 int part_nr;
1208 int retval = 0;
1209
1210 *endofpart = 0;
1211
1212 /* read MBR */
1213 lseek(fd, 0, 0);
1214 if (read(fd, boot_sect, 512) != 512)
1215 goto abort;
1216
1217 /* check MBP signature */
1218 if (*((__u16*)(boot_sect + MBR_SIGNATURE_OFFSET))
1219 == MBR_SIGNATURE_MAGIC) {
1220 retval = 1;
1221 /* found the correct signature */
1222 part = (struct MBR_part_record*)
1223 (boot_sect + MBR_PARTITION_TABLE_OFFSET);
1224
1225 for (part_nr=0; part_nr < MBR_PARTITIONS; part_nr++) {
1226 /* check for GPT type */
1227 if (part->part_type == MBR_GPT_PARTITION_TYPE) {
1228 retval = get_gpt_last_partition_end(fd, endofpart);
1229 break;
1230 }
1231 /* check the last used lba for the current partition */
1232 curr_part_end = __le32_to_cpu(part->first_sect_lba) +
1233 __le32_to_cpu(part->blocks_num);
1234 if (curr_part_end > *endofpart)
1235 *endofpart = curr_part_end;
1236
1237 part++;
1238 }
1239 } else {
1240 /* Unknown partition table */
1241 retval = -1;
1242 }
1243 abort:
1244 return retval;
1245}
1246
1247int check_partitions(int fd, char *dname, unsigned long long freesize)
1248{
1249 /*
1250 * Check where the last partition ends
1251 */
1252 unsigned long long endofpart;
1253 int ret;
1254
1255 if ((ret = get_last_partition_end(fd, &endofpart)) > 0) {
1256 /* There appears to be a partition table here */
1257 if (freesize == 0) {
1258 /* partitions will not be visible in new device */
1259 fprintf(stderr,
1260 Name ": partition table exists on %s but will be lost or\n"
1261 " meaningless after creating array\n",
1262 dname);
1263 return 1;
1264 } else if (endofpart > freesize) {
1265 /* last partition overlaps metadata */
1266 fprintf(stderr,
1267 Name ": metadata will over-write last partition on %s.\n",
1268 dname);
1269 return 1;
1270 }
1271 }
1272 return 0;
1273}
1274
8382f19b
NB
1275void get_one_disk(int mdfd, mdu_array_info_t *ainf, mdu_disk_info_t *disk)
1276{
1277 int d;
1278 ioctl(mdfd, GET_ARRAY_INFO, ainf);
1279 for (d = 0 ; d < ainf->raid_disks + ainf->nr_disks ; d++)
1280 if (ioctl(mdfd, GET_DISK_INFO, disk) == 0)
1281 return;
1282}
63152c1b 1283
a322f70c
DW
1284int open_container(int fd)
1285{
1286 /* 'fd' is a block device. Find out if it is in use
1287 * by a container, and return an open fd on that container.
1288 */
1289 char path[256];
1290 char *e;
1291 DIR *dir;
1292 struct dirent *de;
1293 int dfd, n;
1294 char buf[200];
1295 int major, minor;
1296 struct stat st;
1297
1298 if (fstat(fd, &st) != 0)
1299 return -1;
1300 sprintf(path, "/sys/dev/block/%d:%d/holders",
1301 (int)major(st.st_rdev), (int)minor(st.st_rdev));
1302 e = path + strlen(path);
1303
1304 dir = opendir(path);
1305 if (!dir)
1306 return -1;
1307 while ((de = readdir(dir))) {
1308 if (de->d_ino == 0)
1309 continue;
1310 if (de->d_name[0] == '.')
1311 continue;
1312 sprintf(e, "/%s/dev", de->d_name);
1313 dfd = open(path, O_RDONLY);
1314 if (dfd < 0)
1315 continue;
1316 n = read(dfd, buf, sizeof(buf));
1317 close(dfd);
1318 if (n <= 0 || n >= sizeof(buf))
1319 continue;
1320 buf[n] = 0;
1321 if (sscanf(buf, "%d:%d", &major, &minor) != 2)
1322 continue;
1323 sprintf(buf, "%d:%d", major, minor);
1324 dfd = dev_open(buf, O_RDONLY);
1325 if (dfd >= 0) {
1326 closedir(dir);
1327 return dfd;
1328 }
1329 }
355726fa 1330 closedir(dir);
a322f70c
DW
1331 return -1;
1332}
1333
7801ac20
N
1334int add_disk(int mdfd, struct supertype *st,
1335 struct mdinfo *sra, struct mdinfo *info)
1336{
1337 /* Add a device to an array, in one of 2 ways. */
1338 int rv;
1339#ifndef MDASSEMBLE
1340 if (st->ss->external) {
d23534e4
DW
1341 if (info->disk.state & (1<<MD_DISK_SYNC))
1342 info->recovery_start = MaxSector;
1343 else
1344 info->recovery_start = 0;
2904b26f 1345 rv = sysfs_add_disk(sra, info, 0);
7801ac20
N
1346 if (! rv) {
1347 struct mdinfo *sd2;
f35f2525
N
1348 for (sd2 = sra->devs; sd2; sd2=sd2->next)
1349 if (sd2 == info)
1350 break;
1351 if (sd2 == NULL) {
1352 sd2 = malloc(sizeof(*sd2));
1353 *sd2 = *info;
1354 sd2->next = sra->devs;
1355 sra->devs = sd2;
1356 }
7801ac20
N
1357 }
1358 } else
1359#endif
1360 rv = ioctl(mdfd, ADD_NEW_DISK, &info->disk);
1361 return rv;
1362}
1363
f35f2525
N
1364int set_array_info(int mdfd, struct supertype *st, struct mdinfo *info)
1365{
1366 /* Initialise kernel's knowledge of array.
1367 * This varies between externally managed arrays
1368 * and older kernels
1369 */
1370 int vers = md_get_version(mdfd);
1371 int rv;
1372
1373#ifndef MDASSEMBLE
1374 if (st->ss->external)
1375 rv = sysfs_set_array(info, vers);
1376 else
1377#endif
1378 if ((vers % 100) >= 1) { /* can use different versions */
1379 mdu_array_info_t inf;
1380 memset(&inf, 0, sizeof(inf));
1381 inf.major_version = info->array.major_version;
1382 inf.minor_version = info->array.minor_version;
1383 rv = ioctl(mdfd, SET_ARRAY_INFO, &inf);
1384 } else
1385 rv = ioctl(mdfd, SET_ARRAY_INFO, NULL);
1386 return rv;
1387}
1388
1e5c6983
DW
1389unsigned long long min_recovery_start(struct mdinfo *array)
1390{
1391 /* find the minimum recovery_start in an array for metadata
1392 * formats that only record per-array recovery progress instead
1393 * of per-device
1394 */
1395 unsigned long long recovery_start = MaxSector;
1396 struct mdinfo *d;
1397
1398 for (d = array->devs; d; d = d->next)
1399 recovery_start = min(recovery_start, d->recovery_start);
1400
1401 return recovery_start;
1402}
1403
2f6079dc
NB
1404char *devnum2devname(int num)
1405{
1406 char name[100];
1f0769d7 1407 if (num >= 0)
2f6079dc
NB
1408 sprintf(name, "md%d", num);
1409 else
1410 sprintf(name, "md_d%d", -1-num);
1411 return strdup(name);
1412}
1413
77472ff8
NB
1414int devname2devnum(char *name)
1415{
1416 char *ep;
1417 int num;
1418 if (strncmp(name, "md_d", 4)==0)
1419 num = -1-strtoul(name+4, &ep, 10);
1420 else
1421 num = strtoul(name+2, &ep, 10);
1422 return num;
1423}
1424
c94709e8 1425int stat2devnum(struct stat *st)
2f6079dc 1426{
d7ab966b
N
1427 char path[30];
1428 char link[200];
1429 char *cp;
1430 int n;
1431
c94709e8
DW
1432 if ((S_IFMT & st->st_mode) == S_IFBLK) {
1433 if (major(st->st_rdev) == MD_MAJOR)
1434 return minor(st->st_rdev);
d7ab966b
N
1435 else if (major(st->st_rdev) == get_mdp_major())
1436 return -1- (minor(st->st_rdev)>>MdpMinorShift);
1437
1438 /* must be an extended-minor partition. Look at the
1439 * /sys/dev/block/%d:%d link which must look like
1440 * ../../block/mdXXX/mdXXXpYY
1441 */
1442 sprintf(path, "/sys/dev/block/%d:%d", major(st->st_rdev),
1443 minor(st->st_rdev));
1444 n = readlink(path, link, sizeof(link)-1);
1445 if (n <= 0)
1446 return NoMdDev;
1447 link[n] = 0;
1448 cp = strrchr(link, '/');
1449 if (cp) *cp = 0;
1450 cp = strchr(link, '/');
1451 if (cp && strncmp(cp, "/md", 3) == 0)
1452 return devname2devnum(cp+1);
2f6079dc 1453 }
d7ab966b 1454 return NoMdDev;
c94709e8
DW
1455
1456}
1457
1458int fd2devnum(int fd)
1459{
1460 struct stat stb;
1461 if (fstat(fd, &stb) == 0)
1462 return stat2devnum(&stb);
d7ab966b 1463 return NoMdDev;
2f6079dc
NB
1464}
1465
24f6f99b 1466int mdmon_pid(int devnum)
a931db9e
NB
1467{
1468 char path[100];
1469 char pid[10];
1470 int fd;
1471 int n;
1472 sprintf(path, "/var/run/mdadm/%s.pid", devnum2devname(devnum));
24f6f99b 1473 fd = open(path, O_RDONLY | O_NOATIME, 0);
a931db9e
NB
1474
1475 if (fd < 0)
1476 return 0;
1477 n = read(fd, pid, 9);
1478 close(fd);
1479 if (n <= 0)
1480 return 0;
24f6f99b 1481 return atoi(pid);
a931db9e
NB
1482}
1483
24f6f99b 1484int mdmon_running(int devnum)
a931db9e 1485{
24f6f99b
N
1486 int pid = mdmon_pid(devnum);
1487 if (pid <= 0)
a931db9e 1488 return 0;
24f6f99b 1489 if (kill(pid, 0) == 0)
a931db9e
NB
1490 return 1;
1491 return 0;
1492}
1493
8850ee3e
N
1494int start_mdmon(int devnum)
1495{
1496 int i;
44d2e365 1497 int len;
9fe32043
N
1498 pid_t pid;
1499 int status;
44d2e365
N
1500 char pathbuf[1024];
1501 char *paths[4] = {
1502 pathbuf,
1503 "/sbin/mdmon",
1504 "mdmon",
1505 NULL
1506 };
8850ee3e 1507
40ebbb9c 1508 if (check_env("MDADM_NO_MDMON"))
8850ee3e
N
1509 return 0;
1510
44d2e365
N
1511 len = readlink("/proc/self/exe", pathbuf, sizeof(pathbuf));
1512 if (len > 0) {
1513 char *sl;
1514 pathbuf[len] = 0;
1515 sl = strrchr(pathbuf, '/');
1516 if (sl)
1517 sl++;
1518 else
1519 sl = pathbuf;
1520 strcpy(sl, "mdmon");
1521 } else
1522 pathbuf[0] = '\0';
1523
8850ee3e
N
1524 switch(fork()) {
1525 case 0:
1526 /* FIXME yuk. CLOSE_EXEC?? */
1527 for (i=3; i < 100; i++)
1528 close(i);
44d2e365
N
1529 for (i=0; paths[i]; i++)
1530 if (paths[i][0])
1531 execl(paths[i], "mdmon",
e8a70c89
N
1532 devnum2devname(devnum),
1533 NULL);
8850ee3e
N
1534 exit(1);
1535 case -1: fprintf(stderr, Name ": cannot run mdmon. "
1536 "Array remains readonly\n");
1537 return -1;
9fe32043
N
1538 default: /* parent - good */
1539 pid = wait(&status);
1540 if (pid < 0 || status != 0)
1541 return -1;
8850ee3e
N
1542 }
1543 return 0;
1544}
1545
40ebbb9c 1546int check_env(char *name)
5dcfcb71 1547{
40ebbb9c 1548 char *val = getenv(name);
5dcfcb71
DW
1549
1550 if (val && atoi(val) == 1)
1551 return 1;
1552
1553 return 0;
1554}
1555
148acb7b
DW
1556__u32 random32(void)
1557{
1558 __u32 rv;
1559 int rfd = open("/dev/urandom", O_RDONLY);
1560 if (rfd < 0 || read(rfd, &rv, 4) != 4)
1561 rv = random();
1562 if (rfd >= 0)
1563 close(rfd);
1564 return rv;
1565}
1566
0e600426 1567#ifndef MDASSEMBLE
edd8d13c
NB
1568int flush_metadata_updates(struct supertype *st)
1569{
1570 int sfd;
1571 if (!st->updates) {
1572 st->update_tail = NULL;
1573 return -1;
1574 }
1575
1576 sfd = connect_monitor(devnum2devname(st->container_dev));
1577 if (sfd < 0)
1578 return -1;
1579
1580 while (st->updates) {
1581 struct metadata_update *mu = st->updates;
1582 st->updates = mu->next;
1583
1584 send_message(sfd, mu, 0);
1585 wait_reply(sfd, 0);
1586 free(mu->buf);
1587 free(mu);
1588 }
1589 ack(sfd, 0);
1590 wait_reply(sfd, 0);
1591 close(sfd);
1592 st->update_tail = NULL;
1593 return 0;
1594}
1595
1596void append_metadata_update(struct supertype *st, void *buf, int len)
1597{
1598
1599 struct metadata_update *mu = malloc(sizeof(*mu));
1600
1601 mu->buf = buf;
1602 mu->len = len;
1603 mu->space = NULL;
1604 mu->next = NULL;
1605 *st->update_tail = mu;
1606 st->update_tail = &mu->next;
1607}
0e600426 1608#endif /* MDASSEMBLE */
a931db9e 1609
fe6729fa
NB
1610#ifdef __TINYC__
1611/* tinyc doesn't optimize this check in ioctl.h out ... */
1612unsigned int __invalid_size_argument_for_IOC = 0;
1613#endif
1614