2 * mdadm - manage Linux "md" devices aka RAID arrays.
4 * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * Email: <neilb@suse.de>
31 * Policy module for mdadm.
32 * A policy statement about a device lists a set of values for each
33 * of a set of names. Each value can have a metadata type as context.
36 * action - the actions that can be taken on hot-plug
37 * domain - the domain(s) that the device is part of
39 * Policy information is extracted from various sources, but
40 * particularly from a set of policy rules in mdadm.conf
43 static void pol_new(struct dev_policy
**pol
, char *name
, const char *val
,
46 struct dev_policy
*n
= xmalloc(sizeof(*n
));
47 const char *real_metadata
= NULL
;
53 /* We need to normalise the metadata name */
55 for (i
= 0; superlist
[i
] ; i
++)
56 if (strcmp(metadata
, superlist
[i
]->name
) == 0) {
57 real_metadata
= superlist
[i
]->name
;
61 if (strcmp(metadata
, "1") == 0 ||
62 strcmp(metadata
, "1.0") == 0 ||
63 strcmp(metadata
, "1.1") == 0 ||
64 strcmp(metadata
, "1.2") == 0)
65 real_metadata
= super1
.name
;
68 static const char *prev
= NULL
;
69 if (prev
!= metadata
) {
70 pr_err("metadata=%s unrecognised - ignoring rule\n",
74 real_metadata
= "unknown";
78 n
->metadata
= real_metadata
;
83 static int pol_lesseq(struct dev_policy
*a
, struct dev_policy
*b
)
87 if (a
->name
< b
->name
)
89 if (a
->name
> b
->name
)
92 cmp
= strcmp(a
->value
, b
->value
);
98 return (a
->metadata
<= b
->metadata
);
101 static void pol_sort(struct dev_policy
**pol
)
103 /* sort policy list in *pol by name/metadata/value
107 struct dev_policy
*pl
[2];
112 struct dev_policy
**plp
[2], *p
[2];
114 struct dev_policy nul
= { NULL
, NULL
, NULL
, NULL
};
115 struct dev_policy
*prev
= &nul
;
118 /* p[] are the two lists that we are merging.
119 * plp[] are the ends of the two lists we create
121 * 'curr' is which of plp[] that we are currently
123 * 'next' is which if p[] we will take the next
125 * 'prev' is that last value, which was placed in
133 /* take least of p[0] and p[1]
134 * if it is larger than prev, add to
135 * plp[curr], else swap curr then add
137 while (p
[0] || p
[1]) {
138 if (p
[next
] == NULL
||
139 (p
[1-next
] != NULL
&&
140 !(pol_lesseq(prev
, p
[1-next
])
141 ^pol_lesseq(prev
, p
[next
])
142 ^pol_lesseq(p
[next
], p
[1-next
])))
146 if (!pol_lesseq(prev
, p
[next
]))
149 *plp
[curr
] = prev
= p
[next
];
150 plp
[curr
] = &p
[next
]->next
;
151 p
[next
] = p
[next
]->next
;
155 } while (pl
[0] && pl
[1]);
162 static void pol_dedup(struct dev_policy
*pol
)
164 /* This is a sorted list - remove duplicates. */
165 while (pol
&& pol
->next
) {
166 if (pol_lesseq(pol
->next
, pol
)) {
167 struct dev_policy
*tmp
= pol
->next
;
168 pol
->next
= tmp
->next
;
176 * pol_find finds the first entry in the policy
177 * list to match name.
178 * If it returns non-NULL there is at least one
179 * value, but how many can only be found by
180 * iterating through the list.
182 struct dev_policy
*pol_find(struct dev_policy
*pol
, char *name
)
184 while (pol
&& pol
->name
< name
)
187 if (!pol
|| pol
->name
!= name
)
192 static char **disk_paths(struct mdinfo
*disk
)
197 char symlink
[PATH_MAX
] = "/dev/disk/by-path/";
202 paths
= xmalloc(sizeof(*paths
) * (cnt
+1));
204 by_path
= opendir(symlink
);
206 prefix_len
= strlen(symlink
);
207 while ((ent
= readdir(by_path
)) != NULL
) {
208 if (ent
->d_type
!= DT_LNK
)
210 strncpy(symlink
+ prefix_len
,
212 sizeof(symlink
) - prefix_len
);
213 if (stat(symlink
, &stb
) < 0)
215 if ((stb
.st_mode
& S_IFMT
) != S_IFBLK
)
217 if (stb
.st_rdev
!= makedev(disk
->disk
.major
, disk
->disk
.minor
))
219 paths
[cnt
++] = xstrdup(ent
->d_name
);
220 paths
= xrealloc(paths
, sizeof(*paths
) * (cnt
+1));
228 char type_part
[] = "part";
229 char type_disk
[] = "disk";
230 static char *disk_type(struct mdinfo
*disk
)
234 sprintf(buf
, "/sys/dev/block/%d:%d/partition",
235 disk
->disk
.major
, disk
->disk
.minor
);
236 if (stat(buf
, &stb
) == 0)
242 static int path_has_part(char *path
, char **part
)
244 /* check if path ends with "-partNN" and
245 * if it does, place a pointer to "-pathNN"
252 while (l
> 1 && isdigit(path
[l
-1]))
254 if (l
< 5 || strncmp(path
+l
-5, "-part", 5) != 0)
260 static int pol_match(struct rule
*rule
, char **paths
, char *type
, char **part
)
262 /* Check if this rule matches on any path and type.
263 * If 'part' is not NULL, then 'path' must end in -partN, which
264 * we ignore for matching, and return in *part on success.
266 int pathok
= 0; /* 0 == no path, 1 == match, -1 == no match yet */
269 for (; rule
; rule
= rule
->next
) {
270 if (rule
->name
== rule_path
) {
277 for (i
= 0; paths
[i
]; i
++) {
279 if (!path_has_part(paths
[i
], &p
))
284 if (fnmatch(rule
->value
, paths
[i
], 0) == 0)
290 if (rule
->name
== rule_type
) {
293 if (type
&& strcmp(rule
->value
, type
) == 0)
297 return pathok
>= 0 && typeok
>= 0;
300 static void pol_merge(struct dev_policy
**pol
, struct rule
*rule
)
302 /* copy any name assignments from rule into pol */
304 char *metadata
= NULL
;
305 for (r
= rule
; r
; r
= r
->next
)
306 if (r
->name
== pol_metadata
)
309 for (r
= rule
; r
; r
= r
->next
)
310 if (r
->name
== pol_act
||
311 r
->name
== pol_domain
||
313 pol_new(pol
, r
->name
, r
->value
, metadata
);
316 static void pol_merge_part(struct dev_policy
**pol
, struct rule
*rule
, char *part
)
318 /* copy any name assignments from rule into pol, appending
319 * -part to any domain. The string with -part appended is
320 * stored with the rule so it has a lifetime to match
324 char *metadata
= NULL
;
325 for (r
= rule
; r
; r
= r
->next
)
326 if (r
->name
== pol_metadata
)
329 for (r
= rule
; r
; r
= r
->next
) {
330 if (r
->name
== pol_act
)
331 pol_new(pol
, r
->name
, r
->value
, metadata
);
332 else if (r
->name
== pol_domain
) {
337 len
= strlen(r
->value
);
338 for (dom
= dl_next(r
->dups
); dom
!= r
->dups
;
340 if (strcmp(dom
+len
+1, part
)== 0)
342 if (dom
== r
->dups
) {
343 char *newdom
= dl_strndup(
344 r
->value
, len
+ 1 + strlen(part
));
345 strcat(strcat(newdom
, "-"), part
);
346 dl_add(r
->dups
, newdom
);
349 pol_new(pol
, r
->name
, dom
, metadata
);
354 static struct pol_rule
*config_rules
= NULL
;
355 static struct pol_rule
**config_rules_end
= NULL
;
356 static int config_rules_has_path
= 0;
359 * most policy comes from a set policy rules that are
360 * read from the config file.
361 * path_policy() gathers policy information for the
362 * disk described in the given a 'path' and a 'type'.
364 struct dev_policy
*path_policy(char **paths
, char *type
)
366 struct pol_rule
*rules
;
367 struct dev_policy
*pol
= NULL
;
369 rules
= config_rules
;
373 if (rules
->type
== rule_policy
)
374 if (pol_match(rules
->rule
, paths
, type
, NULL
))
375 pol_merge(&pol
, rules
->rule
);
376 if (rules
->type
== rule_part
&& strcmp(type
, type_part
) == 0)
377 if (pol_match(rules
->rule
, paths
, type_disk
, &part
))
378 pol_merge_part(&pol
, rules
->rule
, part
);
388 * drive_test_and_add_policies() - get policies for drive and add them to pols.
390 * @pols: pointer to pointer of first list entry, cannot be NULL, may point to NULL.
391 * @fd: device descriptor.
392 * @verbose: verbose flag.
394 * If supertype doesn't support this functionality return success. Use metadata handler to get
397 mdadm_status_t
drive_test_and_add_policies(struct supertype
*st
, dev_policy_t
**pols
, int fd
,
400 if (!st
->ss
->test_and_add_drive_policies
)
401 return MDADM_STATUS_SUCCESS
;
403 if (st
->ss
->test_and_add_drive_policies(pols
, fd
, verbose
) == MDADM_STATUS_SUCCESS
) {
404 /* After successful call list cannot be empty */
406 return MDADM_STATUS_SUCCESS
;
409 return MDADM_STATUS_ERROR
;
413 * sysfs_test_and_add_policies() - get policies for mddev and add them to pols.
415 * @pols: pointer to pointer of first list entry, cannot be NULL, may point to NULL.
416 * @mdi: mdinfo describes the MD array, must have GET_DISKS option.
417 * @verbose: verbose flag.
419 * If supertype doesn't support this functionality return success. To get policies, all disks
420 * connected to mddev are analyzed.
422 mdadm_status_t
sysfs_test_and_add_drive_policies(struct supertype
*st
, dev_policy_t
**pols
,
423 struct mdinfo
*mdi
, const int verbose
)
427 if (!st
->ss
->test_and_add_drive_policies
)
428 return MDADM_STATUS_SUCCESS
;
430 for (sd
= mdi
->devs
; sd
; sd
= sd
->next
) {
431 char *devpath
= map_dev(sd
->disk
.major
, sd
->disk
.minor
, 0);
432 int fd
= dev_open(devpath
, O_RDONLY
);
435 if (!is_fd_valid(fd
)) {
436 pr_err("Cannot open fd for %s\n", devpath
);
437 return MDADM_STATUS_ERROR
;
440 rv
= drive_test_and_add_policies(st
, pols
, fd
, verbose
);
444 return MDADM_STATUS_ERROR
;
447 return MDADM_STATUS_SUCCESS
;
451 * mddev_test_and_add_policies() - get policies for mddev and add them to pols.
453 * @pols: pointer to pointer of first list entry, cannot be NULL, may point to NULL.
454 * @array_fd: MD device descriptor.
455 * @verbose: verbose flag.
457 * If supertype doesn't support this functionality return success. Use fd to extract disks.
459 mdadm_status_t
mddev_test_and_add_drive_policies(struct supertype
*st
, dev_policy_t
**pols
,
460 int array_fd
, const int verbose
)
465 if (!st
->ss
->test_and_add_drive_policies
)
466 return MDADM_STATUS_SUCCESS
;
468 sra
= sysfs_read(array_fd
, NULL
, GET_DEVS
);
470 pr_err("Cannot load sysfs for %s\n", fd2devnm(array_fd
));
471 return MDADM_STATUS_ERROR
;
474 ret
= sysfs_test_and_add_drive_policies(st
, pols
, sra
, verbose
);
480 void pol_add(struct dev_policy
**pol
,
481 char *name
, char *val
,
484 pol_new(pol
, name
, val
, metadata
);
489 static void free_paths(char **paths
)
496 for (i
= 0; paths
[i
]; i
++)
502 * disk_policy() gathers policy information for the
503 * disk described in the given mdinfo (disk.{major,minor}).
505 struct dev_policy
*disk_policy(struct mdinfo
*disk
)
508 char *type
= disk_type(disk
);
509 struct dev_policy
*pol
= NULL
;
511 if (config_rules_has_path
)
512 paths
= disk_paths(disk
);
514 pol
= path_policy(paths
, type
);
520 struct dev_policy
*devid_policy(int dev
)
523 disk
.disk
.major
= major(dev
);
524 disk
.disk
.minor
= minor(dev
);
525 return disk_policy(&disk
);
529 * process policy rules read from config file.
532 char rule_path
[] = "path";
533 char rule_type
[] = "type";
535 char rule_policy
[] = "policy";
536 char rule_part
[] = "part-policy";
538 char pol_metadata
[] = "metadata";
539 char pol_act
[] = "action";
540 char pol_domain
[] = "domain";
541 char pol_auto
[] = "auto";
543 static int try_rule(char *w
, char *name
, struct rule
**rp
)
546 int len
= strlen(name
);
547 if (strncmp(w
, name
, len
) != 0 ||
550 r
= xmalloc(sizeof(*r
));
553 r
->value
= xstrdup(w
+len
+1);
559 void policyline(char *line
, char *type
)
564 if (config_rules_end
== NULL
)
565 config_rules_end
= &config_rules
;
567 pr
= xmalloc(sizeof(*pr
));
570 for (w
= dl_next(line
); w
!= line
; w
= dl_next(w
)) {
571 if (try_rule(w
, rule_path
, &pr
->rule
))
572 config_rules_has_path
= 1;
573 else if (! try_rule(w
, rule_type
, &pr
->rule
) &&
574 ! try_rule(w
, pol_metadata
, &pr
->rule
) &&
575 ! try_rule(w
, pol_act
, &pr
->rule
) &&
576 ! try_rule(w
, pol_domain
, &pr
->rule
) &&
577 ! try_rule(w
, pol_auto
, &pr
->rule
))
578 pr_err("policy rule %s unrecognised and ignored\n",
581 pr
->next
= config_rules
;
585 void policy_add(char *type
, ...)
591 pr
= xmalloc(sizeof(*pr
));
596 while ((name
= va_arg(ap
, char*)) != NULL
) {
599 val
= va_arg(ap
, char*);
600 r
= xmalloc(sizeof(*r
));
603 r
->value
= xstrdup(val
);
607 pr
->next
= config_rules
;
612 void policy_free(void)
614 while (config_rules
) {
615 struct pol_rule
*pr
= config_rules
;
618 config_rules
= config_rules
->next
;
620 for (r
= pr
->rule
; r
; ) {
621 struct rule
*next
= r
->next
;
630 config_rules_end
= NULL
;
631 config_rules_has_path
= 0;
634 void dev_policy_free(struct dev_policy
*p
)
636 struct dev_policy
*t
;
644 static enum policy_action
map_act(const char *act
)
646 if (strcmp(act
, "include") == 0)
648 if (strcmp(act
, "re-add") == 0)
650 if (strcmp(act
, "spare") == 0)
652 if (strcmp(act
, "spare-same-slot") == 0)
653 return act_spare_same_slot
;
654 if (strcmp(act
, "force-spare") == 0)
655 return act_force_spare
;
659 static enum policy_action
policy_action(struct dev_policy
*plist
, const char *metadata
)
661 enum policy_action rv
= act_default
;
662 struct dev_policy
*p
;
664 plist
= pol_find(plist
, pol_act
);
665 pol_for_each(p
, plist
, metadata
) {
666 enum policy_action a
= map_act(p
->value
);
673 int policy_action_allows(struct dev_policy
*plist
, const char *metadata
, enum policy_action want
)
675 enum policy_action act
= policy_action(plist
, metadata
);
679 return (act
>= want
);
682 int disk_action_allows(struct mdinfo
*disk
, const char *metadata
, enum policy_action want
)
684 struct dev_policy
*pol
= disk_policy(disk
);
685 int rv
= policy_action_allows(pol
, metadata
, want
);
687 dev_policy_free(pol
);
692 * Any device can have a list of domains asserted by different policy
694 * An array also has a list of domains comprising all the domains of
695 * all the devices in an array.
696 * Where an array has a spare-group, that becomes an addition domain for
697 * every device in the array and thus for the array.
699 * We keep the list of domains in a sorted linked list
700 * As dev policies are already sorted, this is fairly easy to manage.
703 static struct domainlist
**domain_merge_one(struct domainlist
**domp
,
706 /* merge a domain name into a sorted list and return the
707 * location of the insertion or match
709 struct domainlist
*dom
= *domp
;
711 while (dom
&& strcmp(dom
->dom
, domain
) < 0) {
715 if (dom
== NULL
|| strcmp(dom
->dom
, domain
) != 0) {
716 dom
= xmalloc(sizeof(*dom
));
725 void dump_policy(struct dev_policy
*policy
)
728 dprintf("policy: %p name: %s value: %s metadata: %s\n",
733 policy
= policy
->next
;
738 void domain_merge(struct domainlist
**domp
, struct dev_policy
*pollist
,
739 const char *metadata
)
741 /* Add to 'domp' all the domains in pol that apply to 'metadata'
742 * which are not already in domp
744 struct dev_policy
*pol
;
745 pollist
= pol_find(pollist
, pol_domain
);
746 pol_for_each(pol
, pollist
, metadata
)
747 domain_merge_one(domp
, pol
->value
);
750 int domain_test(struct domainlist
*dom
, struct dev_policy
*pol
,
751 const char *metadata
)
753 /* Check that all domains in pol (for metadata) are also in
754 * dom. Both lists are sorted.
755 * If pol has no domains, we don't really know about this device
756 * so we allow caller to choose:
758 * 0: has domains, not all match
759 * 1: has domains, all match
762 int has_one_domain
= 1;
763 struct dev_policy
*p
;
765 pol
= pol_find(pol
, pol_domain
);
766 pol_for_each(p
, pol
, metadata
) {
768 while (dom
&& strcmp(dom
->dom
, p
->value
) < 0)
770 if (!dom
|| strcmp(dom
->dom
, p
->value
) != 0)
772 if (has_one_domain
&& metadata
&& strcmp(metadata
, "imsm") == 0)
779 void domainlist_add_dev(struct domainlist
**dom
, int devid
, const char *metadata
)
781 struct dev_policy
*pol
= devid_policy(devid
);
782 domain_merge(dom
, pol
, metadata
);
783 dev_policy_free(pol
);
786 struct domainlist
*domain_from_array(struct mdinfo
*mdi
, const char *metadata
)
788 struct domainlist
*domlist
= NULL
;
792 for (mdi
= mdi
->devs
; mdi
; mdi
= mdi
->next
)
793 domainlist_add_dev(&domlist
, makedev(mdi
->disk
.major
,
800 void domain_add(struct domainlist
**domp
, char *domain
)
802 domain_merge_one(domp
, domain
);
805 void domain_free(struct domainlist
*dl
)
808 struct domainlist
*head
= dl
;
816 * Some policy decisions are guided by knowledge of which
817 * array previously owned the device at a given physical location (path).
818 * When removing a device from an array we might record the array against
819 * the path, and when finding a new device, we might look for which
820 * array previously used that path.
822 * The 'array' is described by a map_ent, and the path by a the disk in an
823 * mdinfo, or a string.
826 void policy_save_path(char *id_path
, struct map_ent
*array
)
831 if (mkdir(FAILED_SLOTS_DIR
, S_IRWXU
) < 0 && errno
!= EEXIST
) {
832 pr_err("can't create file to save path to old disk: %s\n", strerror(errno
));
836 snprintf(path
, PATH_MAX
, FAILED_SLOTS_DIR
"/%s", id_path
);
837 f
= fopen(path
, "w");
839 pr_err("can't create file to save path to old disk: %s\n",
844 if (fprintf(f
, "%20s %08x:%08x:%08x:%08x\n",
846 array
->uuid
[0], array
->uuid
[1],
847 array
->uuid
[2], array
->uuid
[3]) <= 0)
848 pr_err("Failed to write to <id_path> cookie\n");
853 int policy_check_path(struct mdinfo
*disk
, struct map_ent
*array
)
857 char **id_paths
= disk_paths(disk
);
861 for (i
= 0; id_paths
[i
]; i
++) {
862 snprintf(path
, PATH_MAX
, FAILED_SLOTS_DIR
"/%s", id_paths
[i
]);
863 f
= fopen(path
, "r");
867 rv
= fscanf(f
, " %20s %x:%x:%x:%x\n",
876 free_paths(id_paths
);
880 /* invocation of udev rule file */
881 char udev_template_start
[] =
882 "# do not edit this file, it is automatically generated by mdadm\n"
885 /* find rule named rule_type and return its value */
886 char *find_rule(struct rule
*rule
, char *rule_type
)
889 if (rule
->name
== rule_type
)
897 #define UDEV_RULE_FORMAT \
898 "ACTION==\"add\", SUBSYSTEM==\"block\", " \
899 "ENV{DEVTYPE}==\"%s\", ENV{ID_PATH}==\"%s\", " \
900 "RUN+=\"" BINDIR "/mdadm --incremental $env{DEVNAME}\"\n"
902 #define UDEV_RULE_FORMAT_NOTYPE \
903 "ACTION==\"add\", SUBSYSTEM==\"block\", " \
904 "ENV{ID_PATH}==\"%s\", " \
905 "RUN+=\"" BINDIR "/mdadm --incremental $env{DEVNAME}\"\n"
907 /* Write rule in the rule file. Use format from UDEV_RULE_FORMAT */
908 int write_rule(struct rule
*rule
, int fd
, int force_part
)
911 char *pth
= find_rule(rule
, rule_path
);
912 char *typ
= find_rule(rule
, rule_type
);
919 snprintf(line
, sizeof(line
) - 1, UDEV_RULE_FORMAT
, typ
, pth
);
921 snprintf(line
, sizeof(line
) - 1, UDEV_RULE_FORMAT_NOTYPE
, pth
);
922 return write(fd
, line
, strlen(line
)) == (int)strlen(line
);
925 /* Generate single entry in udev rule basing on POLICY line found in config
926 * file. Take only those with paths, only first occurrence if paths are equal
927 * and if actions supports handling of spares (>=act_spare_same_slot)
929 int generate_entries(int fd
)
931 struct pol_rule
*loop
, *dup
;
932 char *loop_value
, *dup_value
;
935 for (loop
= config_rules
; loop
; loop
= loop
->next
) {
936 if (loop
->type
!= rule_policy
&& loop
->type
!= rule_part
)
940 /* only policies with paths and with actions supporting
941 * bare disks are considered */
942 loop_value
= find_rule(loop
->rule
, pol_act
);
943 if (!loop_value
|| map_act(loop_value
) < act_spare_same_slot
)
945 loop_value
= find_rule(loop
->rule
, rule_path
);
948 for (dup
= config_rules
; dup
!= loop
; dup
= dup
->next
) {
949 if (dup
->type
!= rule_policy
&& loop
->type
!= rule_part
)
951 dup_value
= find_rule(dup
->rule
, pol_act
);
952 if (!dup_value
|| map_act(dup_value
) < act_spare_same_slot
)
954 dup_value
= find_rule(dup
->rule
, rule_path
);
957 if (strcmp(loop_value
, dup_value
) == 0) {
963 /* not a dup or first occurrence */
965 if (!write_rule(loop
->rule
, fd
, loop
->type
== rule_part
) )
971 /* Write_rules routine creates dynamic udev rules used to handle
972 * hot-plug events for bare devices (and making them spares)
974 int Write_rules(char *rule_name
)
977 char udev_rule_file
[PATH_MAX
];
980 strncpy(udev_rule_file
, rule_name
, sizeof(udev_rule_file
) - 6);
981 udev_rule_file
[sizeof(udev_rule_file
) - 6] = '\0';
982 strcat(udev_rule_file
, ".temp");
983 fd
= creat(udev_rule_file
,
984 S_IRUSR
| S_IWUSR
| S_IRGRP
| S_IROTH
);
990 /* write static invocation */
991 if (write(fd
, udev_template_start
, sizeof(udev_template_start
) - 1) !=
992 (int)sizeof(udev_template_start
) - 1)
995 /* iterate, if none created or error occurred, remove file */
996 if (generate_entries(fd
) < 0)
1002 rename(udev_rule_file
, rule_name
);
1008 unlink(udev_rule_file
);