]> git.ipfire.org Git - thirdparty/mdadm.git/blob - Incremental.c
Release mdadm-2.6.9
[thirdparty/mdadm.git] / Incremental.c
1 /*
2 * Incremental.c - support --incremental. Part of:
3 * mdadm - manage Linux "md" devices aka RAID arrays.
4 *
5 * Copyright (C) 2006 Neil Brown <neilb@suse.de>
6 *
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 *
22 * Author: Neil Brown
23 * Email: <neilb@suse.de>
24 * Paper: Neil Brown
25 * Novell Inc
26 * GPO Box Q1283
27 * QVB Post Office, NSW 1230
28 * Australia
29 */
30
31 #include "mdadm.h"
32
33 static int count_active(struct supertype *st, int mdfd, char **availp,
34 struct mdinfo *info);
35 static void find_reject(int mdfd, struct supertype *st, struct mdinfo *sra,
36 int number, __u64 events, int verbose,
37 char *array_name);
38
39 int Incremental(char *devname, int verbose, int runstop,
40 struct supertype *st, char *homehost, int autof)
41 {
42 /* Add this device to an array, creating the array if necessary
43 * and starting the array if sensible or - if runstop>0 - if possible.
44 *
45 * This has several steps:
46 *
47 * 1/ Check if device is permitted by mdadm.conf, reject if not.
48 * 2/ Find metadata, reject if none appropriate (check
49 * version/name from args)
50 * 3/ Check if there is a match in mdadm.conf
51 * 3a/ if not, check for homehost match. If no match, reject.
52 * 4/ Determine device number.
53 * - If in mdadm.conf with std name, use that
54 * - UUID in /var/run/mdadm.map use that
55 * - If name is suggestive, use that. unless in use with different uuid.
56 * - Choose a free, high number.
57 * - Use a partitioned device unless strong suggestion not to.
58 * e.g. auto=md
59 * 5/ Find out if array already exists
60 * 5a/ if it does not
61 * - choose a name, from mdadm.conf or 'name' field in array.
62 * - create the array
63 * - add the device
64 * 5b/ if it does
65 * - check one drive in array to make sure metadata is a reasonably
66 * close match. Reject if not (e.g. different type)
67 * - add the device
68 * 6/ Make sure /var/run/mdadm.map contains this array.
69 * 7/ Is there enough devices to possibly start the array?
70 * 7a/ if not, finish with success.
71 * 7b/ if yes,
72 * - read all metadata and arrange devices like -A does
73 * - if number of OK devices match expected, or -R and there are enough,
74 * start the array (auto-readonly).
75 */
76 struct stat stb;
77 struct mdinfo info, info2;
78 struct mddev_ident_s *array_list, *match;
79 char chosen_name[1024];
80 int rv;
81 int devnum;
82 struct map_ent *mp, *map = NULL;
83 int dfd, mdfd;
84 char *avail;
85 int active_disks;
86 struct createinfo *ci = conf_get_create_info();
87 char *name;
88
89
90 /* 1/ Check if devices is permitted by mdadm.conf */
91
92 if (!conf_test_dev(devname)) {
93 if (verbose >= 0)
94 fprintf(stderr, Name
95 ": %s not permitted by mdadm.conf.\n",
96 devname);
97 return 1;
98 }
99
100 /* 2/ Find metadata, reject if none appropriate (check
101 * version/name from args) */
102
103 dfd = dev_open(devname, O_RDONLY|O_EXCL);
104 if (dfd < 0) {
105 if (verbose >= 0)
106 fprintf(stderr, Name ": cannot open %s: %s.\n",
107 devname, strerror(errno));
108 return 1;
109 }
110 if (fstat(dfd, &stb) < 0) {
111 if (verbose >= 0)
112 fprintf(stderr, Name ": fstat failed for %s: %s.\n",
113 devname, strerror(errno));
114 close(dfd);
115 return 1;
116 }
117 if ((stb.st_mode & S_IFMT) != S_IFBLK) {
118 if (verbose >= 0)
119 fprintf(stderr, Name ": %s is not a block device.\n",
120 devname);
121 close(dfd);
122 return 1;
123 }
124
125 if (st == NULL && (st = guess_super(dfd)) == NULL) {
126 if (verbose >= 0)
127 fprintf(stderr, Name
128 ": no recognisable superblock on %s.\n",
129 devname);
130 close(dfd);
131 return 1;
132 }
133 if (st->ss->load_super(st, dfd, NULL)) {
134 if (verbose >= 0)
135 fprintf(stderr, Name ": no RAID superblock on %s.\n",
136 devname);
137 close(dfd);
138 return 1;
139 }
140 st->ss->getinfo_super(st, &info);
141 close (dfd);
142
143 /* 3/ Check if there is a match in mdadm.conf */
144
145 array_list = conf_get_ident(NULL);
146 match = NULL;
147 for (; array_list; array_list = array_list->next) {
148 if (array_list->uuid_set &&
149 same_uuid(array_list->uuid, info.uuid, st->ss->swapuuid)
150 == 0) {
151 if (verbose >= 2)
152 fprintf(stderr, Name
153 ": UUID differs from %s.\n",
154 array_list->devname);
155 continue;
156 }
157 if (array_list->name[0] &&
158 strcasecmp(array_list->name, info.name) != 0) {
159 if (verbose >= 2)
160 fprintf(stderr, Name
161 ": Name differs from %s.\n",
162 array_list->devname);
163 continue;
164 }
165 if (array_list->devices &&
166 !match_oneof(array_list->devices, devname)) {
167 if (verbose >= 2)
168 fprintf(stderr, Name
169 ": Not a listed device for %s.\n",
170 array_list->devname);
171 continue;
172 }
173 if (array_list->super_minor != UnSet &&
174 array_list->super_minor != info.array.md_minor) {
175 if (verbose >= 2)
176 fprintf(stderr, Name
177 ": Different super-minor to %s.\n",
178 array_list->devname);
179 continue;
180 }
181 if (!array_list->uuid_set &&
182 !array_list->name[0] &&
183 !array_list->devices &&
184 array_list->super_minor == UnSet) {
185 if (verbose >= 2)
186 fprintf(stderr, Name
187 ": %s doesn't have any identifying information.\n",
188 array_list->devname);
189 continue;
190 }
191 /* FIXME, should I check raid_disks and level too?? */
192
193 if (match) {
194 if (verbose >= 0)
195 fprintf(stderr, Name
196 ": we match both %s and %s - cannot decide which to use.\n",
197 match->devname, array_list->devname);
198 return 2;
199 }
200 match = array_list;
201 }
202
203 /* 3a/ if not, check for homehost match. If no match, continue
204 * but don't trust the 'name' in the array. Thus a 'random' minor
205 * number will be assigned, and the device name will be based
206 * on that. */
207 name = info.name;
208 if (!match) {
209 if (homehost == NULL ||
210 st->ss->match_home(st, homehost) == 0) {
211 if (verbose >= 0)
212 fprintf(stderr, Name
213 ": not found in mdadm.conf and not identified by homehost.\n");
214 name = NULL;
215 }
216 }
217 /* 4/ Determine device number. */
218 /* - If in mdadm.conf with std name, get number from name. */
219 /* - UUID in /var/run/mdadm.map get number from mapping */
220 /* - If name is suggestive, use that. unless in use with */
221 /* different uuid. */
222 /* - Choose a free, high number. */
223 /* - Use a partitioned device unless strong suggestion not to. */
224 /* e.g. auto=md */
225
226 /* There are three possible sources for 'autof': command line,
227 * ARRAY line in mdadm.conf, or CREATE line in mdadm.conf.
228 * ARRAY takes precedence, then command line, then
229 * CREATE.
230 */
231 if (match && match->autof)
232 autof = match->autof;
233 if (autof == 0)
234 autof = ci->autof;
235
236 if (match && (rv = is_standard(match->devname, &devnum))) {
237 devnum = (rv > 0) ? (-1-devnum) : devnum;
238 } else if ((mp = map_by_uuid(&map, info.uuid)) != NULL)
239 devnum = mp->devnum;
240 else {
241 /* Have to guess a bit. */
242 int use_partitions = 1;
243 char *np, *ep;
244 if ((autof&7) == 3 || (autof&7) == 5)
245 use_partitions = 0;
246 np = name ? strchr(name, ':') : ":NONAME";
247 if (np)
248 np++;
249 else
250 np = name;
251 devnum = strtoul(np, &ep, 10);
252 if (ep > np && *ep == 0) {
253 /* This is a number. Let check that it is unused. */
254 if (mddev_busy(use_partitions ? (-1-devnum) : devnum))
255 devnum = -1;
256 } else
257 devnum = -1;
258
259 if (devnum < 0) {
260 /* Haven't found anything yet, choose something free */
261 devnum = find_free_devnum(use_partitions);
262
263 if (devnum == NoMdDev) {
264 fprintf(stderr, Name
265 ": No spare md devices!!\n");
266 return 2;
267 }
268 } else
269 devnum = use_partitions ? (-1-devnum) : devnum;
270 }
271 mdfd = open_mddev_devnum(match ? match->devname : NULL,
272 devnum,
273 name,
274 chosen_name, autof >> 3);
275 if (mdfd < 0) {
276 fprintf(stderr, Name ": failed to open %s: %s.\n",
277 chosen_name, strerror(errno));
278 return 2;
279 }
280 /* 5/ Find out if array already exists */
281 if (! mddev_busy(devnum)) {
282 /* 5a/ if it does not */
283 /* - choose a name, from mdadm.conf or 'name' field in array. */
284 /* - create the array */
285 /* - add the device */
286 mdu_array_info_t ainf;
287 mdu_disk_info_t disk;
288 char md[20];
289 struct mdinfo *sra;
290
291 memset(&ainf, 0, sizeof(ainf));
292 ainf.major_version = st->ss->major;
293 ainf.minor_version = st->minor_version;
294 if (ioctl(mdfd, SET_ARRAY_INFO, &ainf) != 0) {
295 fprintf(stderr, Name
296 ": SET_ARRAY_INFO failed for %s: %s\b",
297 chosen_name, strerror(errno));
298 close(mdfd);
299 return 2;
300 }
301 sprintf(md, "%d.%d\n", st->ss->major, st->minor_version);
302 sra = sysfs_read(mdfd, devnum, GET_VERSION);
303 sysfs_set_str(sra, NULL, "metadata_version", md);
304 memset(&disk, 0, sizeof(disk));
305 disk.major = major(stb.st_rdev);
306 disk.minor = minor(stb.st_rdev);
307 sysfs_free(sra);
308 if (ioctl(mdfd, ADD_NEW_DISK, &disk) != 0) {
309 fprintf(stderr, Name ": failed to add %s to %s: %s.\n",
310 devname, chosen_name, strerror(errno));
311 ioctl(mdfd, STOP_ARRAY, 0);
312 close(mdfd);
313 return 2;
314 }
315 sra = sysfs_read(mdfd, devnum, GET_DEVS);
316 if (!sra || !sra->devs || sra->devs->disk.raid_disk >= 0) {
317 /* It really should be 'none' - must be old buggy
318 * kernel, and mdadm -I may not be able to complete.
319 * So reject it.
320 */
321 ioctl(mdfd, STOP_ARRAY, NULL);
322 fprintf(stderr, Name
323 ": You have an old buggy kernel which cannot support\n"
324 " --incremental reliably. Aborting.\n");
325 close(mdfd);
326 sysfs_free(sra);
327 return 2;
328 }
329 } else {
330 /* 5b/ if it does */
331 /* - check one drive in array to make sure metadata is a reasonably */
332 /* close match. Reject if not (e.g. different type) */
333 /* - add the device */
334 char dn[20];
335 int dfd2;
336 mdu_disk_info_t disk;
337 int err;
338 struct mdinfo *sra;
339 struct supertype *st2;
340 sra = sysfs_read(mdfd, devnum, (GET_VERSION | GET_DEVS |
341 GET_STATE));
342
343 if (sra->array.major_version != st->ss->major ||
344 sra->array.minor_version != st->minor_version) {
345 if (verbose >= 0)
346 fprintf(stderr, Name
347 ": %s has different metadata to chosen array %s %d.%d %d.%d.\n",
348 devname, chosen_name,
349 sra->array.major_version,
350 sra->array.minor_version,
351 st->ss->major, st->minor_version);
352 close(mdfd);
353 return 1;
354 }
355 sprintf(dn, "%d:%d", sra->devs->disk.major,
356 sra->devs->disk.minor);
357 dfd2 = dev_open(dn, O_RDONLY);
358 st2 = dup_super(st);
359 if (st2->ss->load_super(st2, dfd2, NULL)) {
360 fprintf(stderr, Name
361 ": Strange error loading metadata for %s.\n",
362 chosen_name);
363 close(mdfd);
364 close(dfd2);
365 return 2;
366 }
367 close(dfd2);
368 st2->ss->getinfo_super(st2, &info2);
369 st2->ss->free_super(st2);
370 if (info.array.level != info2.array.level ||
371 memcmp(info.uuid, info2.uuid, 16) != 0 ||
372 info.array.raid_disks != info2.array.raid_disks) {
373 fprintf(stderr, Name
374 ": unexpected difference between %s and %s.\n",
375 chosen_name, devname);
376 close(mdfd);
377 return 2;
378 }
379 memset(&disk, 0, sizeof(disk));
380 disk.major = major(stb.st_rdev);
381 disk.minor = minor(stb.st_rdev);
382 err = ioctl(mdfd, ADD_NEW_DISK, &disk);
383 if (err < 0 && errno == EBUSY) {
384 /* could be another device present with the same
385 * disk.number. Find and reject any such
386 */
387 find_reject(mdfd, st, sra, info.disk.number,
388 info.events, verbose, chosen_name);
389 err = ioctl(mdfd, ADD_NEW_DISK, &disk);
390 }
391 if (err < 0) {
392 fprintf(stderr, Name ": failed to add %s to %s: %s.\n",
393 devname, chosen_name, strerror(errno));
394 close(mdfd);
395 return 2;
396 }
397 }
398 /* 6/ Make sure /var/run/mdadm.map contains this array. */
399 map_update(&map, devnum,
400 info.array.major_version,
401 info.array.minor_version,
402 info.uuid, chosen_name);
403
404 /* 7/ Is there enough devices to possibly start the array? */
405 /* 7a/ if not, finish with success. */
406 avail = NULL;
407 active_disks = count_active(st, mdfd, &avail, &info);
408 if (enough(info.array.level, info.array.raid_disks,
409 info.array.layout, info.array.state & 1,
410 avail, active_disks) == 0) {
411 free(avail);
412 if (verbose >= 0)
413 fprintf(stderr, Name
414 ": %s attached to %s, not enough to start (%d).\n",
415 devname, chosen_name, active_disks);
416 close(mdfd);
417 return 0;
418 }
419 free(avail);
420
421 /* 7b/ if yes, */
422 /* - if number of OK devices match expected, or -R and there */
423 /* are enough, */
424 /* + add any bitmap file */
425 /* + start the array (auto-readonly). */
426 {
427 mdu_array_info_t ainf;
428
429 if (ioctl(mdfd, GET_ARRAY_INFO, &ainf) == 0) {
430 if (verbose >= 0)
431 fprintf(stderr, Name
432 ": %s attached to %s which is already active.\n",
433 devname, chosen_name);
434 close (mdfd);
435 return 0;
436 }
437 }
438 if (runstop > 0 || active_disks >= info.array.working_disks) {
439 struct mdinfo *sra;
440 /* Let's try to start it */
441 if (match && match->bitmap_file) {
442 int bmfd = open(match->bitmap_file, O_RDWR);
443 if (bmfd < 0) {
444 fprintf(stderr, Name
445 ": Could not open bitmap file %s.\n",
446 match->bitmap_file);
447 close(mdfd);
448 return 1;
449 }
450 if (ioctl(mdfd, SET_BITMAP_FILE, bmfd) != 0) {
451 close(bmfd);
452 fprintf(stderr, Name
453 ": Failed to set bitmapfile for %s.\n",
454 chosen_name);
455 close(mdfd);
456 return 1;
457 }
458 close(bmfd);
459 }
460 sra = sysfs_read(mdfd, devnum, 0);
461 if ((sra == NULL || active_disks >= info.array.working_disks)
462 && name != NULL)
463 rv = ioctl(mdfd, RUN_ARRAY, NULL);
464 else
465 rv = sysfs_set_str(sra, NULL,
466 "array_state", "read-auto");
467 if (rv == 0) {
468 if (verbose >= 0)
469 fprintf(stderr, Name
470 ": %s attached to %s, which has been started.\n",
471 devname, chosen_name);
472 rv = 0;
473 } else {
474 fprintf(stderr, Name
475 ": %s attached to %s, but failed to start: %s.\n",
476 devname, chosen_name, strerror(errno));
477 rv = 1;
478 }
479 } else {
480 if (verbose >= 0)
481 fprintf(stderr, Name
482 ": %s attached to %s, not enough to start safely.\n",
483 devname, chosen_name);
484 rv = 0;
485 }
486 close(mdfd);
487 return rv;
488 }
489
490 static void find_reject(int mdfd, struct supertype *st, struct mdinfo *sra,
491 int number, __u64 events, int verbose,
492 char *array_name)
493 {
494 /* Find a device attached to this array with a disk.number of number
495 * and events less than the passed events, and remove the device.
496 */
497 struct mdinfo *d;
498 mdu_array_info_t ra;
499
500 if (ioctl(mdfd, GET_ARRAY_INFO, &ra) == 0)
501 return; /* not safe to remove from active arrays
502 * without thinking more */
503
504 for (d = sra->devs; d ; d = d->next) {
505 char dn[10];
506 int dfd;
507 struct mdinfo info;
508 sprintf(dn, "%d:%d", d->disk.major, d->disk.minor);
509 dfd = dev_open(dn, O_RDONLY);
510 if (dfd < 0)
511 continue;
512 if (st->ss->load_super(st, dfd, NULL)) {
513 close(dfd);
514 continue;
515 }
516 st->ss->getinfo_super(st, &info);
517 st->ss->free_super(st);
518 close(dfd);
519
520 if (info.disk.number != number ||
521 info.events >= events)
522 continue;
523
524 if (d->disk.raid_disk > -1)
525 sysfs_set_str(sra, d, "slot", "none");
526 if (sysfs_set_str(sra, d, "state", "remove") == 0)
527 if (verbose >= 0)
528 fprintf(stderr, Name
529 ": removing old device %s from %s\n",
530 d->sys_name+4, array_name);
531 }
532 }
533
534 static int count_active(struct supertype *st, int mdfd, char **availp,
535 struct mdinfo *bestinfo)
536 {
537 /* count how many devices in sra think they are active */
538 struct mdinfo *d;
539 int cnt = 0, cnt1 = 0;
540 __u64 max_events = 0;
541 struct mdinfo *sra = sysfs_read(mdfd, -1, GET_DEVS | GET_STATE);
542 char *avail = NULL;
543
544 for (d = sra->devs ; d ; d = d->next) {
545 char dn[30];
546 int dfd;
547 int ok;
548 struct mdinfo info;
549
550 sprintf(dn, "%d:%d", d->disk.major, d->disk.minor);
551 dfd = dev_open(dn, O_RDONLY);
552 if (dfd < 0)
553 continue;
554 ok = st->ss->load_super(st, dfd, NULL);
555 close(dfd);
556 if (ok != 0)
557 continue;
558 st->ss->getinfo_super(st, &info);
559 if (!avail) {
560 avail = malloc(info.array.raid_disks);
561 if (!avail) {
562 fprintf(stderr, Name ": out of memory.\n");
563 exit(1);
564 }
565 memset(avail, 0, info.array.raid_disks);
566 *availp = avail;
567 }
568
569 if (info.disk.state & (1<<MD_DISK_SYNC))
570 {
571 if (cnt == 0) {
572 cnt++;
573 max_events = info.events;
574 avail[info.disk.raid_disk] = 2;
575 st->ss->getinfo_super(st, bestinfo);
576 } else if (info.events == max_events) {
577 cnt++;
578 avail[info.disk.raid_disk] = 2;
579 } else if (info.events == max_events-1) {
580 cnt1++;
581 avail[info.disk.raid_disk] = 1;
582 } else if (info.events < max_events - 1)
583 ;
584 else if (info.events == max_events+1) {
585 int i;
586 cnt1 = cnt;
587 cnt = 1;
588 max_events = info.events;
589 for (i=0; i<info.array.raid_disks; i++)
590 if (avail[i])
591 avail[i]--;
592 avail[info.disk.raid_disk] = 2;
593 st->ss->getinfo_super(st, bestinfo);
594 } else { /* info.events much bigger */
595 cnt = 1; cnt1 = 0;
596 memset(avail, 0, info.disk.raid_disk);
597 max_events = info.events;
598 st->ss->getinfo_super(st, bestinfo);
599 }
600 }
601 st->ss->free_super(st);
602 }
603 return cnt + cnt1;
604 }
605
606 int IncrementalScan(int verbose)
607 {
608 /* look at every device listed in the 'map' file.
609 * If one is found that is not running then:
610 * look in mdadm.conf for bitmap file.
611 * if one exists, but array has none, add it.
612 * try to start array in auto-readonly mode
613 */
614 struct map_ent *mapl = NULL;
615 struct map_ent *me;
616 mddev_ident_t devs, mddev;
617 int rv = 0;
618
619 map_read(&mapl);
620 devs = conf_get_ident(NULL);
621
622 for (me = mapl ; me ; me = me->next) {
623 char path[1024];
624 mdu_array_info_t array;
625 mdu_bitmap_file_t bmf;
626 struct mdinfo *sra;
627 int mdfd = open_mddev_devnum(me->path, me->devnum,
628 NULL, path, 0);
629 if (mdfd < 0)
630 continue;
631 if (ioctl(mdfd, GET_ARRAY_INFO, &array) == 0 ||
632 errno != ENODEV) {
633 close(mdfd);
634 continue;
635 }
636 /* Ok, we can try this one. Maybe it needs a bitmap */
637 for (mddev = devs ; mddev ; mddev = mddev->next)
638 if (strcmp(mddev->devname, me->path) == 0)
639 break;
640 if (mddev && mddev->bitmap_file) {
641 /*
642 * Note: early kernels will wrongly fail this, so it
643 * is a hint only
644 */
645 int added = -1;
646 if (ioctl(mdfd, GET_ARRAY_INFO, &bmf) < 0) {
647 int bmfd = open(mddev->bitmap_file, O_RDWR);
648 if (bmfd >= 0) {
649 added = ioctl(mdfd, SET_BITMAP_FILE,
650 bmfd);
651 close(bmfd);
652 }
653 }
654 if (verbose >= 0) {
655 if (added == 0)
656 fprintf(stderr, Name
657 ": Added bitmap %s to %s\n",
658 mddev->bitmap_file, me->path);
659 else if (errno != EEXIST)
660 fprintf(stderr, Name
661 ": Failed to add bitmap to %s: %s\n",
662 me->path, strerror(errno));
663 }
664 }
665 sra = sysfs_read(mdfd, 0, 0);
666 if (sra) {
667 if (sysfs_set_str(sra, NULL,
668 "array_state", "read-auto") == 0) {
669 if (verbose >= 0)
670 fprintf(stderr, Name
671 ": started array %s\n",
672 me->path);
673 } else {
674 fprintf(stderr, Name
675 ": failed to start array %s: %s\n",
676 me->path, strerror(errno));
677 rv = 1;
678 }
679 }
680 }
681 return rv;
682 }