X-Git-Url: http://git.ipfire.org/?p=thirdparty%2Fmdadm.git;a=blobdiff_plain;f=util.c;h=725877d105bb137658d30d598476a3102074f870;hp=4032fa9b1778aff6895b95e43b21c7449915c7c7;hb=5b13d2e1fb8abecddd4e28e67facac5d7ef2cef3;hpb=7d55dca2cce88de443864b1e843dd0faf9142bd7 diff --git a/util.c b/util.c index 4032fa9b..725877d1 100644 --- a/util.c +++ b/util.c @@ -31,17 +31,11 @@ #include #include #include +#include #include #include #include #include -#include -#ifdef NO_COROSYNC - typedef uint64_t cmap_handle_t; - #define CS_OK 1 -#else - #include -#endif /* @@ -88,6 +82,144 @@ struct blkpg_partition { aren't permitted). */ #define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); })) +static int is_dlm_hooks_ready = 0; + +int dlm_funs_ready(void) +{ + return is_dlm_hooks_ready ? 1 : 0; +} + +#ifndef MDASSEMBLE +static struct dlm_hooks *dlm_hooks = NULL; +struct dlm_lock_resource *dlm_lock_res = NULL; +static int ast_called = 0; + +struct dlm_lock_resource { + dlm_lshandle_t *ls; + struct dlm_lksb lksb; +}; + +/* Using poll(2) to wait for and dispatch ASTs */ +static int poll_for_ast(dlm_lshandle_t ls) +{ + struct pollfd pfd; + + pfd.fd = dlm_hooks->ls_get_fd(ls); + pfd.events = POLLIN; + + while (!ast_called) + { + if (poll(&pfd, 1, 0) < 0) + { + perror("poll"); + return -1; + } + dlm_hooks->dispatch(dlm_hooks->ls_get_fd(ls)); + } + ast_called = 0; + + return 0; +} + +static void dlm_ast(void *arg) +{ + ast_called = 1; +} + +static char *cluster_name = NULL; +/* Create the lockspace, take bitmapXXX locks on all the bitmaps. */ +int cluster_get_dlmlock(int *lockid) +{ + int ret = -1; + char str[64]; + int flags = LKF_NOQUEUE; + + ret = get_cluster_name(&cluster_name); + if (ret) { + pr_err("The md can't get cluster name\n"); + return -1; + } + + dlm_lock_res = xmalloc(sizeof(struct dlm_lock_resource)); + dlm_lock_res->ls = dlm_hooks->create_lockspace(cluster_name, O_RDWR); + if (!dlm_lock_res->ls) { + pr_err("%s failed to create lockspace\n", cluster_name); + return -ENOMEM; + } + + snprintf(str, 64, "bitmap%s", cluster_name); + ret = dlm_hooks->ls_lock(dlm_lock_res->ls, LKM_PWMODE, &dlm_lock_res->lksb, + flags, str, strlen(str), 0, dlm_ast, + dlm_lock_res, NULL, NULL); + if (ret) { + pr_err("error %d when get PW mode on lock %s\n", errno, str); + dlm_hooks->release_lockspace(cluster_name, dlm_lock_res->ls, 1); + return ret; + } + + /* Wait for it to complete */ + poll_for_ast(dlm_lock_res->ls); + *lockid = dlm_lock_res->lksb.sb_lkid; + + return dlm_lock_res->lksb.sb_status; +} + +int cluster_release_dlmlock(int lockid) +{ + int ret = -1; + + if (!cluster_name) + return -1; + + ret = dlm_hooks->ls_unlock(dlm_lock_res->ls, lockid, 0, + &dlm_lock_res->lksb, dlm_lock_res); + if (ret) { + pr_err("error %d happened when unlock\n", errno); + /* XXX make sure the lock is unlocked eventually */ + goto out; + } + + /* Wait for it to complete */ + poll_for_ast(dlm_lock_res->ls); + + errno = dlm_lock_res->lksb.sb_status; + if (errno != EUNLOCK) { + pr_err("error %d happened in ast when unlock lockspace\n", errno); + /* XXX make sure the lockspace is unlocked eventually */ + goto out; + } + + ret = dlm_hooks->release_lockspace(cluster_name, dlm_lock_res->ls, 1); + if (ret) { + pr_err("error %d happened when release lockspace\n", errno); + /* XXX make sure the lockspace is released eventually */ + goto out; + } + free(dlm_lock_res); + +out: + return ret; +} +#else +int cluster_get_dlmlock(int *lockid) +{ + return -1; +} +int cluster_release_dlmlock(int lockid) +{ + return -1; +} +#endif + +/* + * Get array info from the kernel. Longer term we want to deprecate the + * ioctl and get it from sysfs. + */ +int md_get_array_info(int fd, struct mdu_array_info_s *array) +{ + return ioctl(fd, GET_ARRAY_INFO, array); +} + /* * Parse a 128 bit uuid in 4 integers * format is 32 hexx nibbles with options :. separator @@ -416,8 +548,7 @@ int enough_fd(int fd) int i, rv; char *avail; - if (ioctl(fd, GET_ARRAY_INFO, &array) != 0 || - array.raid_disks <= 0) + if (md_get_array_info(fd, &array) != 0 || array.raid_disks <= 0) return 0; avail = xcalloc(array.raid_disks, 1); for (i = 0; i < MAX_DISKS && array.nr_disks > 0; i++) { @@ -587,17 +718,22 @@ int check_raid(int fd, char *name) if (!st) return 0; - st->ss->load_super(st, fd, name); - /* Looks like a raid array .. */ - pr_err("%s appears to be part of a raid array:\n", - name); - st->ss->getinfo_super(st, &info, NULL); - st->ss->free_super(st); - crtime = info.array.ctime; - level = map_num(pers, info.array.level); - if (!level) level = "-unknown-"; - cont_err("level=%s devices=%d ctime=%s", - level, info.array.raid_disks, ctime(&crtime)); + if (st->ss->add_to_super != NULL) { + st->ss->load_super(st, fd, name); + /* Looks like a raid array .. */ + pr_err("%s appears to be part of a raid array:\n", name); + st->ss->getinfo_super(st, &info, NULL); + st->ss->free_super(st); + crtime = info.array.ctime; + level = map_num(pers, info.array.level); + if (!level) + level = "-unknown-"; + cont_err("level=%s devices=%d ctime=%s", + level, info.array.raid_disks, ctime(&crtime)); + } else { + /* Looks like GPT or MBR */ + pr_err("partition table exists on %s\n", name); + } return 1; } @@ -683,7 +819,7 @@ unsigned long calc_csum(void *super, int bytes) #ifndef MDASSEMBLE char *human_size(long long bytes) { - static char buf[30]; + static char buf[47]; /* We convert bytes to either centi-M{ega,ibi}bytes or * centi-G{igi,ibi}bytes, with appropriate rounding, @@ -700,14 +836,12 @@ char *human_size(long long bytes) long cMiB = (bytes * 200LL / (1LL<<20) + 1) / 2; long cMB = (bytes / ( 1000000LL / 200LL ) +1) /2; snprintf(buf, sizeof(buf), " (%ld.%02ld MiB %ld.%02ld MB)", - cMiB/100 , cMiB % 100, - cMB/100, cMB % 100); + cMiB/100, cMiB % 100, cMB/100, cMB % 100); } else { long cGiB = (bytes * 200LL / (1LL<<30) +1) / 2; long cGB = (bytes / (1000000000LL/200LL ) +1) /2; snprintf(buf, sizeof(buf), " (%ld.%02ld GiB %ld.%02ld GB)", - cGiB/100 , cGiB % 100, - cGB/100, cGB % 100); + cGiB/100, cGiB % 100, cGB/100, cGB % 100); } return buf; } @@ -734,22 +868,22 @@ char *human_size_brief(long long bytes, int prefix) if (bytes < 2*1024LL*1024LL*1024LL) { long cMiB = (bytes * 200LL / (1LL<<20) +1) /2; snprintf(buf, sizeof(buf), "%ld.%02ldMiB", - cMiB/100 , cMiB % 100); + cMiB/100, cMiB % 100); } else { long cGiB = (bytes * 200LL / (1LL<<30) +1) /2; snprintf(buf, sizeof(buf), "%ld.%02ldGiB", - cGiB/100 , cGiB % 100); + cGiB/100, cGiB % 100); } } else if (prefix == JEDEC) { if (bytes < 2*1024LL*1024LL*1024LL) { long cMB = (bytes / ( 1000000LL / 200LL ) +1) /2; snprintf(buf, sizeof(buf), "%ld.%02ldMB", - cMB/100, cMB % 100); + cMB/100, cMB % 100); } else { long cGB = (bytes / (1000000000LL/200LL ) +1) /2; snprintf(buf, sizeof(buf), "%ld.%02ldGB", - cGB/100 , cGB % 100); + cGB/100, cGB % 100); } } else @@ -805,7 +939,7 @@ int get_data_disks(int level, int layout, int raid_disks) return data_disks; } -int devnm2devid(char *devnm) +dev_t devnm2devid(char *devnm) { /* First look in /sys/block/$DEVNM/dev for %d:%d * If that fails, try parsing out a number @@ -916,7 +1050,8 @@ int dev_open(char *dev, int flags) int major; int minor; - if (!dev) return -1; + if (!dev) + return -1; flags |= O_DIRECT; if (get_maj_min(dev, &major, &minor)) { @@ -942,7 +1077,7 @@ int dev_open(char *dev, int flags) int open_dev_flags(char *devnm, int flags) { - int devid; + dev_t devid; char buf[20]; devid = devnm2devid(devnm); @@ -960,11 +1095,11 @@ int open_dev_excl(char *devnm) char buf[20]; int i; int flags = O_RDWR; - int devid = devnm2devid(devnm); + dev_t devid = devnm2devid(devnm); long delay = 1000; sprintf(buf, "%d:%d", major(devid), minor(devid)); - for (i = 0 ; i < 25 ; i++) { + for (i = 0; i < 25; i++) { int fd = dev_open(buf, flags|O_EXCL); if (fd >= 0) return fd; @@ -1005,7 +1140,7 @@ void wait_for(char *dev, int fd) (stb_want.st_mode & S_IFMT) != S_IFBLK) return; - for (i = 0 ; i < 25 ; i++) { + for (i = 0; i < 25; i++) { struct stat stb; if (stat(dev, &stb) == 0 && (stb.st_mode & S_IFMT) == S_IFBLK && @@ -1048,7 +1183,7 @@ struct supertype *super_by_fd(int fd, char **subarrayp) minor = sra->array.minor_version; verstr = sra->text_version; } else { - if (ioctl(fd, GET_ARRAY_INFO, &array)) + if (md_get_array_info(fd, &array)) array.major_version = array.minor_version = 0; vers = array.major_version; minor = array.minor_version; @@ -1068,8 +1203,7 @@ struct supertype *super_by_fd(int fd, char **subarrayp) subarray = xstrdup(subarray); } strcpy(container, dev); - if (sra) - sysfs_free(sra); + sysfs_free(sra); sra = sysfs_read(-1, container, GET_VERSION); if (sra && sra->text_version[0]) verstr = sra->text_version; @@ -1077,11 +1211,10 @@ struct supertype *super_by_fd(int fd, char **subarrayp) verstr = "-no-metadata-"; } - for (i = 0; st == NULL && superlist[i] ; i++) + for (i = 0; st == NULL && superlist[i]; i++) st = superlist[i]->match_metadata_desc(verstr); - if (sra) - sysfs_free(sra); + sysfs_free(sra); if (st) { st->sb = NULL; if (subarrayp) @@ -1136,14 +1269,14 @@ struct supertype *guess_super_type(int fd, enum guess_types guess_type) */ struct superswitch *ss; struct supertype *st; - time_t besttime = 0; + unsigned int besttime = 0; int bestsuper = -1; int i; st = xcalloc(1, sizeof(*st)); st->container_devnm[0] = 0; - for (i = 0 ; superlist[i]; i++) { + for (i = 0; superlist[i]; i++) { int rv; ss = superlist[i]; if (guess_type == guess_array && ss->add_to_super == NULL) @@ -1197,7 +1330,7 @@ int get_dev_size(int fd, char *dname, unsigned long long *sizep) ldsize <<= 9; } else { if (dname) - pr_err("Cannot get size of %s: %s\b", + pr_err("Cannot get size of %s: %s\n", dname, strerror(errno)); return 0; } @@ -1206,6 +1339,22 @@ int get_dev_size(int fd, char *dname, unsigned long long *sizep) return 1; } +/* Return sector size of device in bytes */ +int get_dev_sector_size(int fd, char *dname, unsigned int *sectsizep) +{ + unsigned int sectsize; + + if (ioctl(fd, BLKSSZGET, §size) != 0) { + if (dname) + pr_err("Cannot get sector size of %s: %s\n", + dname, strerror(errno)); + return 0; + } + + *sectsizep = sectsize; + return 1; +} + /* Return true if this can only be a container, not a member device. * i.e. is and md device and size is zero */ @@ -1235,12 +1384,15 @@ static int get_gpt_last_partition_end(int fd, unsigned long long *endofpart) unsigned long long curr_part_end; unsigned all_partitions, entry_size; unsigned part_nr; + unsigned int sector_size = 0; *endofpart = 0; BUILD_BUG_ON(sizeof(gpt) != 512); /* skip protective MBR */ - lseek(fd, 512, SEEK_SET); + if (!get_dev_sector_size(fd, NULL, §or_size)) + return 0; + lseek(fd, sector_size, SEEK_SET); /* read GPT header */ if (read(fd, &gpt, 512) != 512) return 0; @@ -1260,6 +1412,8 @@ static int get_gpt_last_partition_end(int fd, unsigned long long *endofpart) part = (struct GPT_part_entry *)buf; + /* set offset to third block (GPT entries) */ + lseek(fd, sector_size*2, SEEK_SET); for (part_nr = 0; part_nr < all_partitions; part_nr++) { /* read partition entry */ if (read(fd, buf, entry_size) != (ssize_t)entry_size) @@ -1285,9 +1439,9 @@ static int get_gpt_last_partition_end(int fd, unsigned long long *endofpart) static int get_last_partition_end(int fd, unsigned long long *endofpart) { struct MBR boot_sect; - struct MBR_part_record *part; unsigned long long curr_part_end; unsigned part_nr; + unsigned int sector_size; int retval = 0; *endofpart = 0; @@ -1302,26 +1456,34 @@ static int get_last_partition_end(int fd, unsigned long long *endofpart) if (boot_sect.magic == MBR_SIGNATURE_MAGIC) { retval = 1; /* found the correct signature */ - part = boot_sect.parts; for (part_nr = 0; part_nr < MBR_PARTITIONS; part_nr++) { + /* + * Have to make every access through boot_sect rather + * than using a pointer to the partition table (or an + * entry), since the entries are not properly aligned. + */ + /* check for GPT type */ - if (part->part_type == MBR_GPT_PARTITION_TYPE) { + if (boot_sect.parts[part_nr].part_type == + MBR_GPT_PARTITION_TYPE) { retval = get_gpt_last_partition_end(fd, endofpart); break; } /* check the last used lba for the current partition */ - curr_part_end = __le32_to_cpu(part->first_sect_lba) + - __le32_to_cpu(part->blocks_num); + curr_part_end = + __le32_to_cpu(boot_sect.parts[part_nr].first_sect_lba) + + __le32_to_cpu(boot_sect.parts[part_nr].blocks_num); if (curr_part_end > *endofpart) *endofpart = curr_part_end; - - part++; } } else { /* Unknown partition table */ retval = -1; } + /* calculate number of 512-byte blocks */ + if (get_dev_sector_size(fd, NULL, §or_size)) + *endofpart *= (sector_size / 512); abort: return retval; } @@ -1333,9 +1495,8 @@ int check_partitions(int fd, char *dname, unsigned long long freesize, * Check where the last partition ends */ unsigned long long endofpart; - int ret; - if ((ret = get_last_partition_end(fd, &endofpart)) > 0) { + if (get_last_partition_end(fd, &endofpart) > 0) { /* There appears to be a partition table here */ if (freesize == 0) { /* partitions will not be visible in new device */ @@ -1640,6 +1801,36 @@ int remove_disk(int mdfd, struct supertype *st, return rv; } +int hot_remove_disk(int mdfd, unsigned long dev, int force) +{ + int cnt = force ? 500 : 5; + int ret; + + /* HOT_REMOVE_DISK can fail with EBUSY if there are + * outstanding IO requests to the device. + * In this case, it can be helpful to wait a little while, + * up to 5 seconds if 'force' is set, or 50 msec if not. + */ + while ((ret = ioctl(mdfd, HOT_REMOVE_DISK, dev)) == -1 && + errno == EBUSY && + cnt-- > 0) + usleep(10000); + + return ret; +} + +int sys_hot_remove_disk(int statefd, int force) +{ + int cnt = force ? 500 : 5; + int ret; + + while ((ret = write(statefd, "remove", 6)) == -1 && + errno == EBUSY && + cnt-- > 0) + usleep(10000); + return ret == 6 ? 0 : -1; +} + int set_array_info(int mdfd, struct supertype *st, struct mdinfo *info) { /* Initialise kernel's knowledge of array. @@ -1814,6 +2005,27 @@ __u32 random32(void) return rv; } +void random_uuid(__u8 *buf) +{ + int fd, i, len; + __u32 r[4]; + + fd = open("/dev/urandom", O_RDONLY); + if (fd < 0) + goto use_random; + len = read(fd, buf, 16); + close(fd); + if (len != 16) + goto use_random; + + return; + +use_random: + for (i = 0; i < 4; i++) + r[i] = random(); + memcpy(buf, r, 16); +} + #ifndef MDASSEMBLE int flush_metadata_updates(struct supertype *st) { @@ -1995,41 +2207,42 @@ void reopen_mddev(int mdfd) if (fd >= 0 && fd != mdfd) dup2(fd, mdfd); } -#ifndef MDASSEMBLE -int get_cluster_name(char **cluster_name) -{ - void *lib_handle = NULL; - int rv = -1; - cmap_handle_t handle; - static int (*initialize)(cmap_handle_t *handle); - static int (*get_string)(cmap_handle_t handle, - const char *string, - char **name); - static int (*finalize)(cmap_handle_t handle); +#ifndef MDASSEMBLE +static struct cmap_hooks *cmap_hooks = NULL; +static int is_cmap_hooks_ready = 0; +void set_cmap_hooks(void) +{ + cmap_hooks = xmalloc(sizeof(struct cmap_hooks)); + cmap_hooks->cmap_handle = dlopen("libcmap.so.4", RTLD_NOW | RTLD_LOCAL); + if (!cmap_hooks->cmap_handle) + return; - lib_handle = dlopen("libcmap.so.4", RTLD_NOW | RTLD_LOCAL); - if (!lib_handle) - return rv; + cmap_hooks->initialize = dlsym(cmap_hooks->cmap_handle, "cmap_initialize"); + cmap_hooks->get_string = dlsym(cmap_hooks->cmap_handle, "cmap_get_string"); + cmap_hooks->finalize = dlsym(cmap_hooks->cmap_handle, "cmap_finalize"); - initialize = dlsym(lib_handle, "cmap_initialize"); - if (!initialize) - goto out; + if (!cmap_hooks->initialize || !cmap_hooks->get_string || + !cmap_hooks->finalize) + dlclose(cmap_hooks->cmap_handle); + else + is_cmap_hooks_ready = 1; +} - get_string = dlsym(lib_handle, "cmap_get_string"); - if (!get_string) - goto out; +int get_cluster_name(char **cluster_name) +{ + int rv = -1; + cmap_handle_t handle; - finalize = dlsym(lib_handle, "cmap_finalize"); - if (!finalize) - goto out; + if (!is_cmap_hooks_ready) + return rv; - rv = initialize(&handle); + rv = cmap_hooks->initialize(&handle); if (rv != CS_OK) goto out; - rv = get_string(handle, "totem.cluster_name", cluster_name); + rv = cmap_hooks->get_string(handle, "totem.cluster_name", cluster_name); if (rv != CS_OK) { free(*cluster_name); rv = -1; @@ -2038,9 +2251,36 @@ int get_cluster_name(char **cluster_name) rv = 0; name_err: - finalize(handle); + cmap_hooks->finalize(handle); out: - dlclose(lib_handle); return rv; } + +void set_dlm_hooks(void) +{ + dlm_hooks = xmalloc(sizeof(struct dlm_hooks)); + dlm_hooks->dlm_handle = dlopen("libdlm_lt.so.3", RTLD_NOW | RTLD_LOCAL); + if (!dlm_hooks->dlm_handle) + return; + + dlm_hooks->create_lockspace = dlsym(dlm_hooks->dlm_handle, "dlm_create_lockspace"); + dlm_hooks->release_lockspace = dlsym(dlm_hooks->dlm_handle, "dlm_release_lockspace"); + dlm_hooks->ls_lock = dlsym(dlm_hooks->dlm_handle, "dlm_ls_lock"); + dlm_hooks->ls_unlock = dlsym(dlm_hooks->dlm_handle, "dlm_ls_unlock"); + dlm_hooks->ls_get_fd = dlsym(dlm_hooks->dlm_handle, "dlm_ls_get_fd"); + dlm_hooks->dispatch = dlsym(dlm_hooks->dlm_handle, "dlm_dispatch"); + + if (!dlm_hooks->create_lockspace || !dlm_hooks->ls_lock || + !dlm_hooks->ls_unlock || !dlm_hooks->release_lockspace || + !dlm_hooks->ls_get_fd || !dlm_hooks->dispatch) + dlclose(dlm_hooks->dlm_handle); + else + is_dlm_hooks_ready = 1; +} + +void set_hooks(void) +{ + set_dlm_hooks(); + set_cmap_hooks(); +} #endif