X-Git-Url: http://git.ipfire.org/?p=thirdparty%2Fmdadm.git;a=blobdiff_plain;f=util.c;h=38750b248892e5eef8dcc1e38e42521ba5a64553;hp=8315200b4f3bd19a7b2a8fbca636a846c7d31c27;hb=ac597b1c2134b4342df3c957fa054c34e522bee9;hpb=d492df0307f1750210450ca7454ea9362e86b733 diff --git a/util.c b/util.c index 8315200b..38750b24 100644 --- a/util.c +++ b/util.c @@ -65,55 +65,7 @@ struct blkpg_partition { char volname[BLKPG_VOLNAMELTH]; /* volume label */ }; -/* partition table structures so we can check metadata position - * against the end of the last partition. - * Only handle MBR ant GPT partition tables. - */ -struct MBR_part_record { - __u8 bootable; - __u8 first_head; - __u8 first_sector; - __u8 first_cyl; - __u8 part_type; - __u8 last_head; - __u8 last_sector; - __u8 last_cyl; - __u32 first_sect_lba; - __u32 blocks_num; -}; - -struct MBR { - __u8 pad[446]; - struct MBR_part_record parts[4]; - __u16 magic; -} __attribute__((packed)); - -struct GPT_part_entry { - unsigned char type_guid[16]; - unsigned char partition_guid[16]; - __u64 starting_lba; - __u64 ending_lba; - unsigned char attr_bits[8]; - unsigned char name[72]; -} __attribute__((packed)); - -struct GPT { - __u64 magic; - __u32 revision; - __u32 header_size; - __u32 crc; - __u32 pad1; - __u64 current_lba; - __u64 backup_lba; - __u64 first_lba; - __u64 last_lba; - __u8 guid[16]; - __u64 part_start; - __u32 part_cnt; - __u32 part_size; - __u32 part_crc; - __u8 pad2[420]; -} __attribute__((packed)); +#include "part.h" /* Force a compilation error if condition is true */ #define BUILD_BUG_ON(condition) ((void)BUILD_BUG_ON_ZERO(condition)) @@ -124,14 +76,6 @@ struct GPT { aren't permitted). */ #define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); })) - -/* MBR/GPT magic numbers */ -#define MBR_SIGNATURE_MAGIC __cpu_to_le16(0xAA55) -#define GPT_SIGNATURE_MAGIC __cpu_to_le64(0x5452415020494645ULL) - -#define MBR_PARTITIONS 4 -#define MBR_GPT_PARTITION_TYPE 0xEE - /* * Parse a 128 bit uuid in 4 integers * format is 32 hexx nibbles with options :. separator @@ -216,6 +160,31 @@ int get_linux_version() return (a*1000000)+(b*1000)+c; } +int mdadm_version(char *version) +{ + int a, b, c; + char *cp; + + if (!version) + version = Version; + + cp = strchr(version, '-'); + if (!cp || *(cp+1) != ' ' || *(cp+2) != 'v') + return -1; + cp += 3; + a = strtoul(cp, &cp, 10); + if (*cp != '.') + return -1; + b = strtoul(cp+1, &cp, 10); + if (*cp == '.') + c = strtoul(cp+1, &cp, 10); + else + c = 0; + if (*cp != ' ' && *cp != '-') + return -1; + return (a*1000000)+(b*1000)+c; +} + #ifndef MDASSEMBLE long long parse_size(char *size) { @@ -326,6 +295,19 @@ int test_partition(int fd) return 1; } +int test_partition_from_id(dev_t id) +{ + char buf[20]; + int fd, rv; + + sprintf(buf, "%d:%d", major(id), minor(id)); + fd = dev_open(buf, O_RDONLY); + if (fd < 0) + return -1; + rv = test_partition(fd); + close(fd); + return rv; +} int enough(int level, int raid_disks, int layout, int clean, char *avail, int avail_disks) @@ -376,13 +358,40 @@ int enough(int level, int raid_disks, int layout, int clean, } } -const int uuid_match_any[4] = { ~0, ~0, ~0, ~0 }; -int same_uuid(int a[4], int b[4], int swapuuid) +int enough_fd(int fd) { - if (memcmp(a, uuid_match_any, sizeof(int[4])) == 0 || - memcmp(b, uuid_match_any, sizeof(int[4])) == 0) - return 1; + struct mdu_array_info_s array; + struct mdu_disk_info_s disk; + int avail_disks = 0; + int i; + char *avail; + + if (ioctl(fd, GET_ARRAY_INFO, &array) != 0 || + array.raid_disks <= 0) + return 0; + avail = calloc(array.raid_disks, 1); + for (i=0; i= array.raid_disks) + continue; + avail_disks++; + avail[disk.raid_disk] = 1; + } + /* This is used on an active array, so assume it is clean */ + return enough(array.level, array.raid_disks, array.layout, + 1, + avail, avail_disks); +} + + +const int uuid_zero[4] = { 0, 0, 0, 0 }; +int same_uuid(int a[4], int b[4], int swapuuid) +{ if (swapuuid) { /* parse uuids are hostendian. * uuid's from some superblocks are big-ending @@ -526,7 +535,7 @@ int check_raid(int fd, char *name) /* Looks like a raid array .. */ fprintf(stderr, Name ": %s appears to be part of a raid array:\n", name); - st->ss->getinfo_super(st, &info); + st->ss->getinfo_super(st, &info, NULL); st->ss->free_super(st); crtime = info.array.ctime; level = map_num(pers, info.array.level); @@ -959,19 +968,33 @@ int dev_open(char *dev, int flags) int minor; if (!dev) return -1; + flags |= O_DIRECT; major = strtoul(dev, &e, 0); if (e > dev && *e == ':' && e[1] && (minor = strtoul(e+1, &e, 0)) >= 0 && *e == 0) { - snprintf(devname, sizeof(devname), "/dev/.tmp.md.%d:%d:%d", - (int)getpid(), major, minor); - if (mknod(devname, S_IFBLK|0600, makedev(major, minor))==0) { - fd = open(devname, flags|O_DIRECT); - unlink(devname); + char *path = map_dev(major, minor, 0); + if (path) + fd = open(path, flags); + if (fd < 0) { + snprintf(devname, sizeof(devname), "/dev/.tmp.md.%d:%d:%d", + (int)getpid(), major, minor); + if (mknod(devname, S_IFBLK|0600, makedev(major, minor))==0) { + fd = open(devname, flags); + unlink(devname); + } + } + if (fd < 0) { + snprintf(devname, sizeof(devname), "/tmp/.tmp.md.%d:%d:%d", + (int)getpid(), major, minor); + if (mknod(devname, S_IFBLK|0600, makedev(major, minor))==0) { + fd = open(devname, flags); + unlink(devname); + } } } else - fd = open(dev, flags|O_DIRECT); + fd = open(dev, flags); return fd; } @@ -1035,11 +1058,16 @@ void wait_for(char *dev, int fd) dprintf("%s: timeout waiting for %s\n", __func__, dev); } -struct superswitch *superlist[] = { &super0, &super1, &super_ddf, &super_imsm, NULL }; +struct superswitch *superlist[] = +{ + &super0, &super1, + &super_ddf, &super_imsm, + &mbr, &gpt, + NULL }; #if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO) -struct supertype *super_by_fd(int fd) +struct supertype *super_by_fd(int fd, char **subarrayp) { mdu_array_info_t array; int vers; @@ -1050,6 +1078,7 @@ struct supertype *super_by_fd(int fd) char version[20]; int i; char *subarray = NULL; + int container = NoMdDev; sra = sysfs_read(fd, 0, GET_VERSION); @@ -1071,15 +1100,15 @@ struct supertype *super_by_fd(int fd) } if (minor == -2 && is_subarray(verstr)) { char *dev = verstr+1; + subarray = strchr(dev, '/'); - int devnum; if (subarray) *subarray++ = '\0'; - devnum = devname2devnum(dev); subarray = strdup(subarray); + container = devname2devnum(dev); if (sra) sysfs_free(sra); - sra = sysfs_read(-1, devnum, GET_VERSION); + sra = sysfs_read(-1, container, GET_VERSION); if (sra && sra->text_version[0]) verstr = sra->text_version; else @@ -1093,17 +1122,33 @@ struct supertype *super_by_fd(int fd) sysfs_free(sra); if (st) { st->sb = NULL; - if (subarray) { - strncpy(st->subarray, subarray, 32); - st->subarray[31] = 0; - free(subarray); - } else - st->subarray[0] = 0; - } + if (subarrayp) + *subarrayp = subarray; + st->container_dev = container; + st->devnum = fd2devnum(fd); + } else + free(subarray); + return st; } #endif /* !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO) */ +int dev_size_from_id(dev_t id, unsigned long long *size) +{ + char buf[20]; + int fd; + + sprintf(buf, "%d:%d", major(id), minor(id)); + fd = dev_open(buf, O_RDONLY); + if (fd < 0) + return 0; + if (get_dev_size(fd, NULL, size)) { + close(fd); + return 1; + } + close(fd); + return 0; +} struct supertype *dup_super(struct supertype *orig) { @@ -1118,32 +1163,38 @@ struct supertype *dup_super(struct supertype *orig) st->ss = orig->ss; st->max_devs = orig->max_devs; st->minor_version = orig->minor_version; - strcpy(st->subarray, orig->subarray); st->sb = NULL; st->info = NULL; return st; } -struct supertype *guess_super(int fd) +struct supertype *guess_super_type(int fd, enum guess_types guess_type) { /* try each load_super to find the best match, * and return the best superswitch */ struct superswitch *ss; struct supertype *st; - unsigned long besttime = 0; + time_t besttime = 0; int bestsuper = -1; int i; st = malloc(sizeof(*st)); + memset(st, 0, sizeof(*st)); + st->container_dev = NoMdDev; + for (i=0 ; superlist[i]; i++) { int rv; ss = superlist[i]; + if (guess_type == guess_array && ss->add_to_super == NULL) + continue; + if (guess_type == guess_partitions && ss->add_to_super != NULL) + continue; memset(st, 0, sizeof(*st)); rv = ss->load_super(st, fd, NULL); if (rv == 0) { struct mdinfo info; - st->ss->getinfo_super(st, &info); + st->ss->getinfo_super(st, &info, NULL); if (bestsuper == -1 || besttime < info.array.ctime) { bestsuper = i; @@ -1193,6 +1244,20 @@ int get_dev_size(int fd, char *dname, unsigned long long *sizep) return 1; } +/* Return true if this can only be a container, not a member device. + * i.e. is and md device and size is zero + */ +int must_be_container(int fd) +{ + unsigned long long size; + if (md_get_version(fd) < 0) + return 0; + if (get_dev_size(fd, NULL, &size) == 0) + return 1; + if (size == 0) + return 1; + return 0; +} /* Sets endofpart parameter to the last block used by the last GPT partition on the device. * Returns: 1 if successful @@ -1207,7 +1272,7 @@ static int get_gpt_last_partition_end(int fd, unsigned long long *endofpart) struct GPT_part_entry *part; unsigned long long curr_part_end; unsigned all_partitions, entry_size; - int part_nr; + unsigned part_nr; *endofpart = 0; @@ -1266,7 +1331,7 @@ static int get_last_partition_end(int fd, unsigned long long *endofpart) struct MBR boot_sect; struct MBR_part_record *part; unsigned long long curr_part_end; - int part_nr; + unsigned part_nr; int retval = 0; *endofpart = 0; @@ -1376,7 +1441,7 @@ int open_container(int fd) continue; n = read(dfd, buf, sizeof(buf)); close(dfd); - if (n <= 0 || n >= sizeof(buf)) + if (n <= 0 || (unsigned)n >= sizeof(buf)) continue; buf[n] = 0; if (sscanf(buf, "%d:%d", &major, &minor) != 2) @@ -1392,6 +1457,155 @@ int open_container(int fd) return -1; } +struct superswitch *version_to_superswitch(char *vers) +{ + int i; + + for (i = 0; superlist[i]; i++) { + struct superswitch *ss = superlist[i]; + + if (strcmp(vers, ss->name) == 0) + return ss; + } + + return NULL; +} + +int is_container_member(struct mdstat_ent *mdstat, char *container) +{ + if (mdstat->metadata_version == NULL || + strncmp(mdstat->metadata_version, "external:", 9) != 0 || + !is_subarray(mdstat->metadata_version+9) || + strncmp(mdstat->metadata_version+10, container, strlen(container)) != 0 || + mdstat->metadata_version[10+strlen(container)] != '/') + return 0; + + return 1; +} + +int is_subarray_active(char *subarray, char *container) +{ + struct mdstat_ent *mdstat = mdstat_read(0, 0); + struct mdstat_ent *ent; + + for (ent = mdstat; ent; ent = ent->next) + if (is_container_member(ent, container)) + if (!subarray || + strcmp(to_subarray(ent, container), subarray) == 0) + break; + + free_mdstat(mdstat); + + return ent != NULL; +} + +int is_container_active(char *container) +{ + return is_subarray_active(NULL, container); +} + +/* open_subarray - opens a subarray in a container + * @dev: container device name + * @st: empty supertype + * @quiet: block reporting errors flag + * + * On success returns an fd to a container and fills in *st + */ +int open_subarray(char *dev, char *subarray, struct supertype *st, int quiet) +{ + struct mdinfo *mdi; + struct mdinfo *info; + int fd, err = 1; + + fd = open(dev, O_RDWR|O_EXCL); + if (fd < 0) { + if (!quiet) + fprintf(stderr, Name ": Couldn't open %s, aborting\n", + dev); + return 2; + } + + st->devnum = fd2devnum(fd); + if (st->devnum == NoMdDev) { + if (!quiet) + fprintf(stderr, + Name ": Failed to determine device number for %s\n", + dev); + goto close_fd; + } + + mdi = sysfs_read(fd, st->devnum, GET_VERSION|GET_LEVEL); + if (!mdi) { + if (!quiet) + fprintf(stderr, Name ": Failed to read sysfs for %s\n", + dev); + goto close_fd; + } + + if (mdi->array.level != UnSet) { + if (!quiet) + fprintf(stderr, Name ": %s is not a container\n", dev); + goto free_sysfs; + } + + st->ss = version_to_superswitch(mdi->text_version); + if (!st->ss) { + if (!quiet) + fprintf(stderr, + Name ": Operation not supported for %s metadata\n", + mdi->text_version); + goto free_sysfs; + } + + st->devname = devnum2devname(st->devnum); + if (!st->devname) { + if (!quiet) + fprintf(stderr, Name ": Failed to allocate device name\n"); + goto free_sysfs; + } + + if (!st->ss->load_container) { + if (!quiet) + fprintf(stderr, Name ": %s is not a container\n", dev); + goto free_name; + } + + if (st->ss->load_container(st, fd, NULL)) { + if (!quiet) + fprintf(stderr, Name ": Failed to load metadata for %s\n", + dev); + goto free_name; + } + + info = st->ss->container_content(st, subarray); + if (!info) { + if (!quiet) + fprintf(stderr, Name ": Failed to find subarray-%s in %s\n", + subarray, dev); + goto free_super; + } + free(info); + + err = 0; + + free_super: + if (err) + st->ss->free_super(st); + free_name: + if (err) + free(st->devname); + free_sysfs: + sysfs_free(mdi); + close_fd: + if (err) + close(fd); + + if (err) + return -1; + else + return fd; +} + int add_disk(int mdfd, struct supertype *st, struct mdinfo *sra, struct mdinfo *info) { @@ -1422,6 +1636,21 @@ int add_disk(int mdfd, struct supertype *st, return rv; } +int remove_disk(int mdfd, struct supertype *st, + struct mdinfo *sra, struct mdinfo *info) +{ + int rv; + /* Remove the disk given by 'info' from the array */ +#ifndef MDASSEMBLE + if (st->ss->external) + rv = sysfs_set_str(sra, info, "slot", "none"); + else +#endif + rv = ioctl(mdfd, HOT_REMOVE_DISK, makedev(info->disk.major, + info->disk.minor)); + return rv; +} + int set_array_info(int mdfd, struct supertype *st, struct mdinfo *info) { /* Initialise kernel's knowledge of array. @@ -1462,13 +1691,18 @@ unsigned long long min_recovery_start(struct mdinfo *array) return recovery_start; } -char *devnum2devname(int num) +void fmt_devname(char *name, int num) { - char name[100]; if (num >= 0) sprintf(name, "md%d", num); else sprintf(name, "md_d%d", -1-num); +} + +char *devnum2devname(int num) +{ + char name[100]; + fmt_devname(name,num); return strdup(name); } @@ -1493,7 +1727,7 @@ int stat2devnum(struct stat *st) if ((S_IFMT & st->st_mode) == S_IFBLK) { if (major(st->st_rdev) == MD_MAJOR) return minor(st->st_rdev); - else if (major(st->st_rdev) == get_mdp_major()) + else if (major(st->st_rdev) == (unsigned)get_mdp_major()) return -1- (minor(st->st_rdev)>>MdpMinorShift); /* must be an extended-minor partition. Look at the @@ -1508,7 +1742,7 @@ int stat2devnum(struct stat *st) link[n] = 0; cp = strrchr(link, '/'); if (cp) *cp = 0; - cp = strchr(link, '/'); + cp = strrchr(link, '/'); if (cp && strncmp(cp, "/md", 3) == 0) return devname2devnum(cp+1); } @@ -1524,8 +1758,6 @@ int fd2devnum(int fd) return NoMdDev; } -char *pid_dir = VAR_RUN; - int mdmon_pid(int devnum) { char path[100]; @@ -1534,7 +1766,7 @@ int mdmon_pid(int devnum) int n; char *devname = devnum2devname(devnum); - sprintf(path, "%s/%s.pid", pid_dir, devname); + sprintf(path, "%s/%s.pid", MDMON_DIR, devname); free(devname); fd = open(path, O_RDONLY | O_NOATIME, 0); @@ -1668,6 +1900,7 @@ void append_metadata_update(struct supertype *st, void *buf, int len) mu->buf = buf; mu->len = len; mu->space = NULL; + mu->space_list = NULL; mu->next = NULL; *st->update_tail = mu; st->update_tail = &mu->next; @@ -1679,3 +1912,73 @@ void append_metadata_update(struct supertype *st, void *buf, int len) unsigned int __invalid_size_argument_for_IOC = 0; #endif +int experimental(void) +{ + if (check_env("MDADM_EXPERIMENTAL")) + return 1; + else { + fprintf(stderr, Name ": To use this feature MDADM_EXPERIMENTAL enviroment variable has to defined.\n"); + return 0; + } +} + +/* Pick all spares matching given criteria from a container + * if min_size == 0 do not check size + * if domlist == NULL do not check domains + * if spare_group given add it to domains of each spare + * metadata allows to test domains using metadata of destination array */ +struct mdinfo *container_choose_spares(struct supertype *st, + unsigned long long min_size, + struct domainlist *domlist, + char *spare_group, + const char *metadata, int get_one) +{ + struct mdinfo *d, **dp, *disks = NULL; + + /* get list of all disks in container */ + if (st->ss->getinfo_super_disks) + disks = st->ss->getinfo_super_disks(st); + + if (!disks) + return disks; + /* find spare devices on the list */ + dp = &disks->devs; + disks->array.spare_disks = 0; + while (*dp) { + int found = 0; + d = *dp; + if (d->disk.state == 0) { + /* check if size is acceptable */ + unsigned long long dev_size; + dev_t dev = makedev(d->disk.major,d->disk.minor); + + if (!min_size || + (dev_size_from_id(dev, &dev_size) && + dev_size >= min_size)) + found = 1; + /* check if domain matches */ + if (found && domlist) { + struct dev_policy *pol = devnum_policy(dev); + if (spare_group) + pol_add(&pol, pol_domain, + spare_group, NULL); + if (!domain_test(domlist, pol, metadata)) + found = 0; + dev_policy_free(pol); + } + } + if (found) { + dp = &d->next; + disks->array.spare_disks++; + if (get_one) { + sysfs_free(*dp); + d->next = NULL; + } + } else { + *dp = d->next; + d->next = NULL; + sysfs_free(d); + } + } + return disks; +}