X-Git-Url: http://git.ipfire.org/?p=thirdparty%2Fmdadm.git;a=blobdiff_plain;f=util.c;h=e5b0c1f5697bbabd84bc40354438a3f4eb3b1776;hp=f15ba43d44953974979b7f09c4ce9fa3c6a47fa2;hb=24f6f99b3630b1a89aaa57930c5c9de8a3df9ded;hpb=ce744c97bce1b34147be5e278d2b246743d89536 diff --git a/util.c b/util.c index f15ba43d..e5b0c1f5 100644 --- a/util.c +++ b/util.c @@ -1,7 +1,7 @@ /* * mdadm - manage Linux "md" devices aka RAID arrays. * - * Copyright (C) 2001-2006 Neil Brown + * Copyright (C) 2001-2009 Neil Brown * * * This program is free software; you can redistribute it and/or modify @@ -19,12 +19,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * Author: Neil Brown - * Email: - * Paper: Neil Brown - * School of Computer Science and Engineering - * The University of New South Wales - * Sydney, 2052 - * Australia + * Email: */ #include "mdadm.h" @@ -70,6 +65,43 @@ struct blkpg_partition { char volname[BLKPG_VOLNAMELTH]; /* volume label */ }; +/* partition table structures so we can check metadata position + * against the end of the last partition. + * Only handle MBR ant GPT partition tables. + */ +struct MBR_part_record { + __u8 bootable; + __u8 first_head; + __u8 first_sector; + __u8 first_cyl; + __u8 part_type; + __u8 last_head; + __u8 last_sector; + __u8 last_cyl; + __u32 first_sect_lba; + __u32 blocks_num; +}; + +struct GPT_part_entry { + unsigned char type_guid[16]; + unsigned char partition_guid[16]; + unsigned char starting_lba[8]; + unsigned char ending_lba[8]; + unsigned char attr_bits[8]; + unsigned char name[72]; +}; + +/* MBR/GPT magic numbers */ +#define MBR_SIGNATURE_MAGIC __cpu_to_le16(0xAA55) +#define GPT_SIGNATURE_MAGIC __cpu_to_le64(0x5452415020494645ULL) + +#define MBR_SIGNATURE_OFFSET 510 +#define MBR_PARTITION_TABLE_OFFSET 446 +#define MBR_PARTITIONS 4 +#define MBR_GPT_PARTITION_TYPE 0xEE +#define GPT_ALL_PARTITIONS_OFFSET 80 +#define GPT_ENTRY_SIZE_OFFSET 84 + /* * Parse a 128 bit uuid in 4 integers * format is 32 hexx nibbles with options :. separator @@ -154,6 +186,73 @@ int get_linux_version() return (a*1000000)+(b*1000)+c; } +#ifndef MDASSEMBLE +long long parse_size(char *size) +{ + /* parse 'size' which should be a number optionally + * followed by 'K', 'M', or 'G'. + * Without a suffix, K is assumed. + * Number returned is in sectors (half-K) + */ + char *c; + long long s = strtoll(size, &c, 10); + if (s > 0) { + switch (*c) { + case 'K': + c++; + default: + s *= 2; + break; + case 'M': + c++; + s *= 1024 * 2; + break; + case 'G': + c++; + s *= 1024 * 1024 * 2; + break; + } + } + if (*c) + s = 0; + return s; +} + +int parse_layout_10(char *layout) +{ + int copies, rv; + char *cp; + /* Parse the layout string for raid10 */ + /* 'f', 'o' or 'n' followed by a number <= raid_disks */ + if ((layout[0] != 'n' && layout[0] != 'f' && layout[0] != 'o') || + (copies = strtoul(layout+1, &cp, 10)) < 1 || + copies > 200 || + *cp) + return -1; + if (layout[0] == 'n') + rv = 256 + copies; + else if (layout[0] == 'o') + rv = 0x10000 + (copies<<8) + 1; + else + rv = 1 + (copies<<8); + return rv; +} + +int parse_layout_faulty(char *layout) +{ + /* Parse the layout string for 'faulty' */ + int ln = strcspn(layout, "0123456789"); + char *m = strdup(layout); + int mode; + m[ln] = 0; + mode = map_name(faultylayout, m); + if (mode == UnSet) + return -1; + + return mode | (atoi(layout+ln)<< ModeShift); +} +#endif + void remove_partitions(int fd) { /* remove partitions from this block devices. @@ -199,9 +298,9 @@ int enough(int level, int raid_disks, int layout, int clean, } while (first != 0); return 1; - case -4: + case LEVEL_MULTIPATH: return avail_disks>= 1; - case -1: + case LEVEL_LINEAR: case 0: return avail_disks == raid_disks; case 1: @@ -274,17 +373,15 @@ void copy_uuid(void *a, int b[4], int swapuuid) memcpy(a, b, 16); } -char *fname_from_uuid(struct supertype *st, struct mdinfo *info, char *buf, char sep) +char *__fname_from_uuid(int id[4], int swap, char *buf, char sep) { int i, j; - int id; char uuid[16]; char *c = buf; strcpy(c, "UUID-"); c += strlen(c); - copy_uuid(uuid, info->uuid, st->ss->swapuuid); + copy_uuid(uuid, id, swap); for (i = 0; i < 4; i++) { - id = uuid[i]; if (i) *c++ = sep; for (j = 3; j >= 0; j--) { @@ -293,6 +390,12 @@ char *fname_from_uuid(struct supertype *st, struct mdinfo *info, char *buf, char } } return buf; + +} + +char *fname_from_uuid(struct supertype *st, struct mdinfo *info, char *buf, char sep) +{ + return __fname_from_uuid(info->uuid, st->ss->swapuuid, buf, sep); } #ifndef MDASSEMBLE @@ -467,8 +570,10 @@ int devlist_ready = 0; int add_dev(const char *name, const struct stat *stb, int flag, struct FTW *s) { struct stat st; + if (S_ISLNK(stb->st_mode)) { - stat(name, &st); + if (stat(name, &st) != 0) + return 0; stb = &st; } @@ -509,14 +614,13 @@ int nftw(const char *path, int (*han)(const char *name, const struct stat *stb, /* * Find a block device with the right major/minor number. * If we find multiple names, choose the shortest. - * If we find a non-standard name, it is probably there - * deliberately so prefer it over a standard name. + * If we find a name in /dev/md/, we prefer that. * This applies only to names for MD devices. */ char *map_dev(int major, int minor, int create) { struct devmap *p; - char *std = NULL, *nonstd=NULL; + char *regular = NULL, *preferred=NULL; int did_check = 0; if (major == 0 && minor == 0) @@ -543,27 +647,27 @@ char *map_dev(int major, int minor, int create) for (p=devlist; p; p=p->next) if (p->major == major && p->minor == minor) { - if (is_standard(p->name, NULL)) { - if (std == NULL || - strlen(p->name) < strlen(std)) - std = p->name; + if (strncmp(p->name, "/dev/md/",8) == 0) { + if (preferred == NULL || + strlen(p->name) < strlen(preferred)) + preferred = p->name; } else { - if (nonstd == NULL || - strlen(p->name) < strlen(nonstd)) - nonstd = p->name; + if (regular == NULL || + strlen(p->name) < strlen(regular)) + regular = p->name; } } - if (!std && !nonstd && !did_check) { + if (!regular && !preferred && !did_check) { devlist_ready = 0; goto retry; } - if (create && !std && !nonstd) { + if (create && !regular && !preferred) { static char buf[30]; snprintf(buf, sizeof(buf), "%d:%d", major, minor); - nonstd = buf; + regular = buf; } - return nonstd ? nonstd : std; + return preferred ? preferred : regular; } unsigned long calc_csum(void *super, int bytes) @@ -763,7 +867,7 @@ int find_free_devnum(int use_partitions) { int devnum; for (devnum = 127; devnum != 128; - devnum = devnum ? devnum-1 : (1<<22)-1) { + devnum = devnum ? devnum-1 : (1<<20)-1) { char *dn; int _devnum; @@ -811,6 +915,14 @@ int dev_open(char *dev, int flags) return fd; } +int open_dev(int devnum) +{ + char buf[20]; + + sprintf(buf, "%d:%d", dev2major(devnum), dev2minor(devnum)); + return dev_open(buf, O_RDWR); +} + int open_dev_excl(int devnum) { char buf[20]; @@ -828,6 +940,41 @@ int open_dev_excl(int devnum) return -1; } +int same_dev(char *one, char *two) +{ + struct stat st1, st2; + if (stat(one, &st1) != 0) + return 0; + if (stat(two, &st2) != 0) + return 0; + if ((st1.st_mode & S_IFMT) != S_IFBLK) + return 0; + if ((st2.st_mode & S_IFMT) != S_IFBLK) + return 0; + return st1.st_rdev == st2.st_rdev; +} + +void wait_for(char *dev, int fd) +{ + int i; + struct stat stb_want; + + if (fstat(fd, &stb_want) != 0 || + (stb_want.st_mode & S_IFMT) != S_IFBLK) + return; + + for (i=0 ; i<25 ; i++) { + struct stat stb; + if (stat(dev, &stb) == 0 && + (stb.st_mode & S_IFMT) == S_IFBLK && + (stb.st_rdev == stb_want.st_rdev)) + return; + usleep(200000); + } + if (i == 25) + dprintf("%s: timeout waiting for %s\n", __func__, dev); +} + struct superswitch *superlist[] = { &super0, &super1, &super_ddf, &super_imsm, NULL }; #if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO) @@ -873,7 +1020,10 @@ struct supertype *super_by_fd(int fd) if (sra) sysfs_free(sra); sra = sysfs_read(-1, devnum, GET_VERSION); - verstr = sra->text_version ? : "-no-metadata-"; + if (sra && sra->text_version[0]) + verstr = sra->text_version; + else + verstr = "-no-metadata-"; } for (i = 0; st == NULL && superlist[i] ; i++) @@ -983,6 +1133,145 @@ int get_dev_size(int fd, char *dname, unsigned long long *sizep) return 1; } + +/* Sets endofpart parameter to the last block used by the last GPT partition on the device. + * Returns: 1 if successful + * -1 for unknown partition type + * 0 for other errors + */ +static int get_gpt_last_partition_end(int fd, unsigned long long *endofpart) +{ + unsigned char buf[512]; + unsigned char empty_gpt_entry[16]= {0}; + struct GPT_part_entry *part; + unsigned long long curr_part_end; + unsigned all_partitions, entry_size; + int part_nr; + + *endofpart = 0; + + /* read GPT header */ + lseek(fd, 512, SEEK_SET); + if (read(fd, buf, 512) != 512) + return 0; + + /* get the number of partition entries and the entry size */ + all_partitions = __le32_to_cpu(buf[GPT_ALL_PARTITIONS_OFFSET]); + entry_size = __le32_to_cpu(buf[GPT_ENTRY_SIZE_OFFSET]); + + /* Check GPT signature*/ + if (*((__u64*)buf) != GPT_SIGNATURE_MAGIC) + return -1; + + /* sanity checks */ + if (all_partitions > 1024 || + entry_size > 512) + return -1; + + /* read first GPT partition entries */ + if (read(fd, buf, 512) != 512) + return 0; + + part = (struct GPT_part_entry*)buf; + + for (part_nr=0; part_nr < all_partitions; part_nr++) { + /* is this valid partition? */ + if (memcmp(part->type_guid, empty_gpt_entry, 16) != 0) { + /* check the last lba for the current partition */ + curr_part_end = __le64_to_cpu(*(__u64*)part->ending_lba); + if (curr_part_end > *endofpart) + *endofpart = curr_part_end; + } + + part = (struct GPT_part_entry*)((unsigned char*)part + entry_size); + + if ((unsigned char *)part >= buf + 512) { + if (read(fd, buf, 512) != 512) + return 0; + part = (struct GPT_part_entry*)buf; + } + } + return 1; +} + +/* Sets endofpart parameter to the last block used by the last partition on the device. + * Returns: 1 if successful + * -1 for unknown partition type + * 0 for other errors + */ +static int get_last_partition_end(int fd, unsigned long long *endofpart) +{ + unsigned char boot_sect[512]; + struct MBR_part_record *part; + unsigned long long curr_part_end; + int part_nr; + int retval = 0; + + *endofpart = 0; + + /* read MBR */ + lseek(fd, 0, 0); + if (read(fd, boot_sect, 512) != 512) + goto abort; + + /* check MBP signature */ + if (*((__u16*)(boot_sect + MBR_SIGNATURE_OFFSET)) + == MBR_SIGNATURE_MAGIC) { + retval = 1; + /* found the correct signature */ + part = (struct MBR_part_record*) + (boot_sect + MBR_PARTITION_TABLE_OFFSET); + + for (part_nr=0; part_nr < MBR_PARTITIONS; part_nr++) { + /* check for GPT type */ + if (part->part_type == MBR_GPT_PARTITION_TYPE) { + retval = get_gpt_last_partition_end(fd, endofpart); + break; + } + /* check the last used lba for the current partition */ + curr_part_end = __le32_to_cpu(part->first_sect_lba) + + __le32_to_cpu(part->blocks_num); + if (curr_part_end > *endofpart) + *endofpart = curr_part_end; + + part++; + } + } else { + /* Unknown partition table */ + retval = -1; + } + abort: + return retval; +} + +int check_partitions(int fd, char *dname, unsigned long long freesize) +{ + /* + * Check where the last partition ends + */ + unsigned long long endofpart; + int ret; + + if ((ret = get_last_partition_end(fd, &endofpart)) > 0) { + /* There appears to be a partition table here */ + if (freesize == 0) { + /* partitions will not be visible in new device */ + fprintf(stderr, + Name ": partition table exists on %s but will be lost or\n" + " meaningless after creating array\n", + dname); + return 1; + } else if (endofpart > freesize) { + /* last partition overlaps metadata */ + fprintf(stderr, + Name ": metadata will over-write last partition on %s.\n", + dname); + return 1; + } + } + return 0; +} + void get_one_disk(int mdfd, mdu_array_info_t *ainf, mdu_disk_info_t *disk) { int d; @@ -1049,7 +1338,11 @@ int add_disk(int mdfd, struct supertype *st, int rv; #ifndef MDASSEMBLE if (st->ss->external) { - rv = sysfs_add_disk(sra, info); + if (info->disk.state & (1<recovery_start = MaxSector; + else + info->recovery_start = 0; + rv = sysfs_add_disk(sra, info, 0); if (! rv) { struct mdinfo *sd2; for (sd2 = sra->devs; sd2; sd2=sd2->next) @@ -1093,10 +1386,25 @@ int set_array_info(int mdfd, struct supertype *st, struct mdinfo *info) return rv; } +unsigned long long min_recovery_start(struct mdinfo *array) +{ + /* find the minimum recovery_start in an array for metadata + * formats that only record per-array recovery progress instead + * of per-device + */ + unsigned long long recovery_start = MaxSector; + struct mdinfo *d; + + for (d = array->devs; d; d = d->next) + recovery_start = min(recovery_start, d->recovery_start); + + return recovery_start; +} + char *devnum2devname(int num) { char name[100]; - if (num > 0) + if (num >= 0) sprintf(name, "md%d", num); else sprintf(name, "md_d%d", -1-num); @@ -1155,14 +1463,14 @@ int fd2devnum(int fd) return NoMdDev; } -int mdmon_running(int devnum) +int mdmon_pid(int devnum) { char path[100]; char pid[10]; int fd; int n; sprintf(path, "/var/run/mdadm/%s.pid", devnum2devname(devnum)); - fd = open(path, O_RDONLY, 0); + fd = open(path, O_RDONLY | O_NOATIME, 0); if (fd < 0) return 0; @@ -1170,27 +1478,15 @@ int mdmon_running(int devnum) close(fd); if (n <= 0) return 0; - if (kill(atoi(pid), 0) == 0) - return 1; - return 0; + return atoi(pid); } -int signal_mdmon(int devnum) +int mdmon_running(int devnum) { - char path[100]; - char pid[10]; - int fd; - int n; - sprintf(path, "/var/run/mdadm/%s.pid", devnum2devname(devnum)); - fd = open(path, O_RDONLY, 0); - - if (fd < 0) + int pid = mdmon_pid(devnum); + if (pid <= 0) return 0; - n = read(fd, pid, 9); - close(fd); - if (n <= 0) - return 0; - if (kill(atoi(pid), SIGUSR1) == 0) + if (kill(pid, 0) == 0) return 1; return 0; } @@ -1233,9 +1529,8 @@ int start_mdmon(int devnum) for (i=0; paths[i]; i++) if (paths[i][0]) execl(paths[i], "mdmon", - map_dev(dev2major(devnum), - dev2minor(devnum), - 1), NULL); + devnum2devname(devnum), + NULL); exit(1); case -1: fprintf(stderr, Name ": cannot run mdmon. " "Array remains readonly\n"); @@ -1258,6 +1553,17 @@ int check_env(char *name) return 0; } +__u32 random32(void) +{ + __u32 rv; + int rfd = open("/dev/urandom", O_RDONLY); + if (rfd < 0 || read(rfd, &rv, 4) != 4) + rv = random(); + if (rfd >= 0) + close(rfd); + return rv; +} + #ifndef MDASSEMBLE int flush_metadata_updates(struct supertype *st) {