]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - util.c
Incremental: Remove redundant call for GET_ARRAY_INFO
[thirdparty/mdadm.git] / util.c
diff --git a/util.c b/util.c
index 4032fa9b1778aff6895b95e43b21c7449915c7c7..725877d105bb137658d30d598476a3102074f870 100644 (file)
--- a/util.c
+++ b/util.c
 #include       <sys/resource.h>
 #include       <sys/vfs.h>
 #include       <linux/magic.h>
+#include       <poll.h>
 #include       <ctype.h>
 #include       <dirent.h>
 #include       <signal.h>
 #include       <dlfcn.h>
-#include       <stdint.h>
-#ifdef NO_COROSYNC
- typedef uint64_t cmap_handle_t;
- #define CS_OK 1
-#else
- #include      <corosync/cmap.h>
-#endif
 
 
 /*
@@ -88,6 +82,144 @@ struct blkpg_partition {
    aren't permitted). */
 #define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
 
+static int is_dlm_hooks_ready = 0;
+
+int dlm_funs_ready(void)
+{
+       return is_dlm_hooks_ready ? 1 : 0;
+}
+
+#ifndef MDASSEMBLE
+static struct dlm_hooks *dlm_hooks = NULL;
+struct dlm_lock_resource *dlm_lock_res = NULL;
+static int ast_called = 0;
+
+struct dlm_lock_resource {
+       dlm_lshandle_t *ls;
+       struct dlm_lksb lksb;
+};
+
+/* Using poll(2) to wait for and dispatch ASTs */
+static int poll_for_ast(dlm_lshandle_t ls)
+{
+       struct pollfd pfd;
+
+       pfd.fd = dlm_hooks->ls_get_fd(ls);
+       pfd.events = POLLIN;
+
+       while (!ast_called)
+       {
+               if (poll(&pfd, 1, 0) < 0)
+               {
+                       perror("poll");
+                       return -1;
+               }
+               dlm_hooks->dispatch(dlm_hooks->ls_get_fd(ls));
+       }
+       ast_called = 0;
+
+       return 0;
+}
+
+static void dlm_ast(void *arg)
+{
+       ast_called = 1;
+}
+
+static char *cluster_name = NULL;
+/* Create the lockspace, take bitmapXXX locks on all the bitmaps. */
+int cluster_get_dlmlock(int *lockid)
+{
+       int ret = -1;
+       char str[64];
+       int flags = LKF_NOQUEUE;
+
+       ret = get_cluster_name(&cluster_name);
+       if (ret) {
+               pr_err("The md can't get cluster name\n");
+               return -1;
+       }
+
+       dlm_lock_res = xmalloc(sizeof(struct dlm_lock_resource));
+       dlm_lock_res->ls = dlm_hooks->create_lockspace(cluster_name, O_RDWR);
+       if (!dlm_lock_res->ls) {
+               pr_err("%s failed to create lockspace\n", cluster_name);
+               return -ENOMEM;
+       }
+
+       snprintf(str, 64, "bitmap%s", cluster_name);
+       ret = dlm_hooks->ls_lock(dlm_lock_res->ls, LKM_PWMODE, &dlm_lock_res->lksb,
+                         flags, str, strlen(str), 0, dlm_ast,
+                         dlm_lock_res, NULL, NULL);
+       if (ret) {
+               pr_err("error %d when get PW mode on lock %s\n", errno, str);
+               dlm_hooks->release_lockspace(cluster_name, dlm_lock_res->ls, 1);
+               return ret;
+       }
+
+       /* Wait for it to complete */
+       poll_for_ast(dlm_lock_res->ls);
+       *lockid = dlm_lock_res->lksb.sb_lkid;
+
+       return dlm_lock_res->lksb.sb_status;
+}
+
+int cluster_release_dlmlock(int lockid)
+{
+       int ret = -1;
+
+       if (!cluster_name)
+               return -1;
+
+       ret = dlm_hooks->ls_unlock(dlm_lock_res->ls, lockid, 0,
+                                    &dlm_lock_res->lksb, dlm_lock_res);
+       if (ret) {
+               pr_err("error %d happened when unlock\n", errno);
+               /* XXX make sure the lock is unlocked eventually */
+                goto out;
+       }
+
+       /* Wait for it to complete */
+       poll_for_ast(dlm_lock_res->ls);
+
+       errno = dlm_lock_res->lksb.sb_status;
+       if (errno != EUNLOCK) {
+               pr_err("error %d happened in ast when unlock lockspace\n", errno);
+               /* XXX make sure the lockspace is unlocked eventually */
+                goto out;
+       }
+
+       ret = dlm_hooks->release_lockspace(cluster_name, dlm_lock_res->ls, 1);
+       if (ret) {
+               pr_err("error %d happened when release lockspace\n", errno);
+               /* XXX make sure the lockspace is released eventually */
+                goto out;
+       }
+       free(dlm_lock_res);
+
+out:
+       return ret;
+}
+#else
+int cluster_get_dlmlock(int *lockid)
+{
+       return -1;
+}
+int cluster_release_dlmlock(int lockid)
+{
+       return -1;
+}
+#endif
+
+/*
+ * Get array info from the kernel. Longer term we want to deprecate the
+ * ioctl and get it from sysfs.
+ */
+int md_get_array_info(int fd, struct mdu_array_info_s *array)
+{
+       return ioctl(fd, GET_ARRAY_INFO, array);
+}
+
 /*
  * Parse a 128 bit uuid in 4 integers
  * format is 32 hexx nibbles with options :.<space> separator
@@ -416,8 +548,7 @@ int enough_fd(int fd)
        int i, rv;
        char *avail;
 
-       if (ioctl(fd, GET_ARRAY_INFO, &array) != 0 ||
-           array.raid_disks <= 0)
+       if (md_get_array_info(fd, &array) != 0 || array.raid_disks <= 0)
                return 0;
        avail = xcalloc(array.raid_disks, 1);
        for (i = 0; i < MAX_DISKS && array.nr_disks > 0; i++) {
@@ -587,17 +718,22 @@ int check_raid(int fd, char *name)
 
        if (!st)
                return 0;
-       st->ss->load_super(st, fd, name);
-       /* Looks like a raid array .. */
-       pr_err("%s appears to be part of a raid array:\n",
-               name);
-       st->ss->getinfo_super(st, &info, NULL);
-       st->ss->free_super(st);
-       crtime = info.array.ctime;
-       level = map_num(pers, info.array.level);
-       if (!level) level = "-unknown-";
-       cont_err("level=%s devices=%d ctime=%s",
-                level, info.array.raid_disks, ctime(&crtime));
+       if (st->ss->add_to_super != NULL) {
+               st->ss->load_super(st, fd, name);
+               /* Looks like a raid array .. */
+               pr_err("%s appears to be part of a raid array:\n", name);
+               st->ss->getinfo_super(st, &info, NULL);
+               st->ss->free_super(st);
+               crtime = info.array.ctime;
+               level = map_num(pers, info.array.level);
+               if (!level)
+                       level = "-unknown-";
+               cont_err("level=%s devices=%d ctime=%s",
+                       level, info.array.raid_disks, ctime(&crtime));
+       } else {
+               /* Looks like GPT or MBR */
+               pr_err("partition table exists on %s\n", name);
+       }
        return 1;
 }
 
@@ -683,7 +819,7 @@ unsigned long calc_csum(void *super, int bytes)
 #ifndef MDASSEMBLE
 char *human_size(long long bytes)
 {
-       static char buf[30];
+       static char buf[47];
 
        /* We convert bytes to either centi-M{ega,ibi}bytes or
         * centi-G{igi,ibi}bytes, with appropriate rounding,
@@ -700,14 +836,12 @@ char *human_size(long long bytes)
                long cMiB = (bytes * 200LL / (1LL<<20) + 1) / 2;
                long cMB  = (bytes / ( 1000000LL / 200LL ) +1) /2;
                snprintf(buf, sizeof(buf), " (%ld.%02ld MiB %ld.%02ld MB)",
-                       cMiB/100 , cMiB % 100,
-                       cMB/100, cMB % 100);
+                       cMiB/100, cMiB % 100, cMB/100, cMB % 100);
        } else {
                long cGiB = (bytes * 200LL / (1LL<<30) +1) / 2;
                long cGB  = (bytes / (1000000000LL/200LL ) +1) /2;
                snprintf(buf, sizeof(buf), " (%ld.%02ld GiB %ld.%02ld GB)",
-                       cGiB/100 , cGiB % 100,
-                       cGB/100, cGB % 100);
+                       cGiB/100, cGiB % 100, cGB/100, cGB % 100);
        }
        return buf;
 }
@@ -734,22 +868,22 @@ char *human_size_brief(long long bytes, int prefix)
                if (bytes < 2*1024LL*1024LL*1024LL) {
                        long cMiB = (bytes * 200LL / (1LL<<20) +1) /2;
                        snprintf(buf, sizeof(buf), "%ld.%02ldMiB",
-                               cMiB/100 , cMiB % 100);
+                                cMiB/100, cMiB % 100);
                } else {
                        long cGiB = (bytes * 200LL / (1LL<<30) +1) /2;
                        snprintf(buf, sizeof(buf), "%ld.%02ldGiB",
-                                       cGiB/100 , cGiB % 100);
+                                cGiB/100, cGiB % 100);
                }
        }
        else if (prefix == JEDEC) {
                if (bytes < 2*1024LL*1024LL*1024LL) {
                        long cMB  = (bytes / ( 1000000LL / 200LL ) +1) /2;
                        snprintf(buf, sizeof(buf), "%ld.%02ldMB",
-                                       cMB/100, cMB % 100);
+                                cMB/100, cMB % 100);
                } else {
                        long cGB  = (bytes / (1000000000LL/200LL ) +1) /2;
                        snprintf(buf, sizeof(buf), "%ld.%02ldGB",
-                                       cGB/100 , cGB % 100);
+                                cGB/100, cGB % 100);
                }
        }
        else
@@ -805,7 +939,7 @@ int get_data_disks(int level, int layout, int raid_disks)
        return data_disks;
 }
 
-int devnm2devid(char *devnm)
+dev_t devnm2devid(char *devnm)
 {
        /* First look in /sys/block/$DEVNM/dev for %d:%d
         * If that fails, try parsing out a number
@@ -916,7 +1050,8 @@ int dev_open(char *dev, int flags)
        int major;
        int minor;
 
-       if (!dev) return -1;
+       if (!dev)
+               return -1;
        flags |= O_DIRECT;
 
        if (get_maj_min(dev, &major, &minor)) {
@@ -942,7 +1077,7 @@ int dev_open(char *dev, int flags)
 
 int open_dev_flags(char *devnm, int flags)
 {
-       int devid;
+       dev_t devid;
        char buf[20];
 
        devid = devnm2devid(devnm);
@@ -960,11 +1095,11 @@ int open_dev_excl(char *devnm)
        char buf[20];
        int i;
        int flags = O_RDWR;
-       int devid = devnm2devid(devnm);
+       dev_t devid = devnm2devid(devnm);
        long delay = 1000;
 
        sprintf(buf, "%d:%d", major(devid), minor(devid));
-       for (i = 0 ; i < 25 ; i++) {
+       for (i = 0; i < 25; i++) {
                int fd = dev_open(buf, flags|O_EXCL);
                if (fd >= 0)
                        return fd;
@@ -1005,7 +1140,7 @@ void wait_for(char *dev, int fd)
            (stb_want.st_mode & S_IFMT) != S_IFBLK)
                return;
 
-       for (i = 0 ; i < 25 ; i++) {
+       for (i = 0; i < 25; i++) {
                struct stat stb;
                if (stat(dev, &stb) == 0 &&
                    (stb.st_mode & S_IFMT) == S_IFBLK &&
@@ -1048,7 +1183,7 @@ struct supertype *super_by_fd(int fd, char **subarrayp)
                minor = sra->array.minor_version;
                verstr = sra->text_version;
        } else {
-               if (ioctl(fd, GET_ARRAY_INFO, &array))
+               if (md_get_array_info(fd, &array))
                        array.major_version = array.minor_version = 0;
                vers = array.major_version;
                minor = array.minor_version;
@@ -1068,8 +1203,7 @@ struct supertype *super_by_fd(int fd, char **subarrayp)
                        subarray = xstrdup(subarray);
                }
                strcpy(container, dev);
-               if (sra)
-                       sysfs_free(sra);
+               sysfs_free(sra);
                sra = sysfs_read(-1, container, GET_VERSION);
                if (sra && sra->text_version[0])
                        verstr = sra->text_version;
@@ -1077,11 +1211,10 @@ struct supertype *super_by_fd(int fd, char **subarrayp)
                        verstr = "-no-metadata-";
        }
 
-       for (i = 0; st == NULL && superlist[i] ; i++)
+       for (i = 0; st == NULL && superlist[i]; i++)
                st = superlist[i]->match_metadata_desc(verstr);
 
-       if (sra)
-               sysfs_free(sra);
+       sysfs_free(sra);
        if (st) {
                st->sb = NULL;
                if (subarrayp)
@@ -1136,14 +1269,14 @@ struct supertype *guess_super_type(int fd, enum guess_types guess_type)
         */
        struct superswitch  *ss;
        struct supertype *st;
-       time_t besttime = 0;
+       unsigned int besttime = 0;
        int bestsuper = -1;
        int i;
 
        st = xcalloc(1, sizeof(*st));
        st->container_devnm[0] = 0;
 
-       for (i = 0 ; superlist[i]; i++) {
+       for (i = 0; superlist[i]; i++) {
                int rv;
                ss = superlist[i];
                if (guess_type == guess_array && ss->add_to_super == NULL)
@@ -1197,7 +1330,7 @@ int get_dev_size(int fd, char *dname, unsigned long long *sizep)
                        ldsize <<= 9;
                } else {
                        if (dname)
-                               pr_err("Cannot get size of %s: %s\b",
+                               pr_err("Cannot get size of %s: %s\n",
                                        dname, strerror(errno));
                        return 0;
                }
@@ -1206,6 +1339,22 @@ int get_dev_size(int fd, char *dname, unsigned long long *sizep)
        return 1;
 }
 
+/* Return sector size of device in bytes */
+int get_dev_sector_size(int fd, char *dname, unsigned int *sectsizep)
+{
+       unsigned int sectsize;
+
+       if (ioctl(fd, BLKSSZGET, &sectsize) != 0) {
+               if (dname)
+                       pr_err("Cannot get sector size of %s: %s\n",
+                               dname, strerror(errno));
+               return 0;
+       }
+
+       *sectsizep = sectsize;
+       return 1;
+}
+
 /* Return true if this can only be a container, not a member device.
  * i.e. is and md device and size is zero
  */
@@ -1235,12 +1384,15 @@ static int get_gpt_last_partition_end(int fd, unsigned long long *endofpart)
        unsigned long long curr_part_end;
        unsigned all_partitions, entry_size;
        unsigned part_nr;
+       unsigned int sector_size = 0;
 
        *endofpart = 0;
 
        BUILD_BUG_ON(sizeof(gpt) != 512);
        /* skip protective MBR */
-       lseek(fd, 512, SEEK_SET);
+       if (!get_dev_sector_size(fd, NULL, &sector_size))
+               return 0;
+       lseek(fd, sector_size, SEEK_SET);
        /* read GPT header */
        if (read(fd, &gpt, 512) != 512)
                return 0;
@@ -1260,6 +1412,8 @@ static int get_gpt_last_partition_end(int fd, unsigned long long *endofpart)
 
        part = (struct GPT_part_entry *)buf;
 
+       /* set offset to third block (GPT entries) */
+       lseek(fd, sector_size*2, SEEK_SET);
        for (part_nr = 0; part_nr < all_partitions; part_nr++) {
                /* read partition entry */
                if (read(fd, buf, entry_size) != (ssize_t)entry_size)
@@ -1285,9 +1439,9 @@ static int get_gpt_last_partition_end(int fd, unsigned long long *endofpart)
 static int get_last_partition_end(int fd, unsigned long long *endofpart)
 {
        struct MBR boot_sect;
-       struct MBR_part_record *part;
        unsigned long long curr_part_end;
        unsigned part_nr;
+       unsigned int sector_size;
        int retval = 0;
 
        *endofpart = 0;
@@ -1302,26 +1456,34 @@ static int get_last_partition_end(int fd, unsigned long long *endofpart)
        if (boot_sect.magic == MBR_SIGNATURE_MAGIC) {
                retval = 1;
                /* found the correct signature */
-               part = boot_sect.parts;
 
                for (part_nr = 0; part_nr < MBR_PARTITIONS; part_nr++) {
+                       /*
+                        * Have to make every access through boot_sect rather
+                        * than using a pointer to the partition table (or an
+                        * entry), since the entries are not properly aligned.
+                        */
+
                        /* check for GPT type */
-                       if (part->part_type == MBR_GPT_PARTITION_TYPE) {
+                       if (boot_sect.parts[part_nr].part_type ==
+                           MBR_GPT_PARTITION_TYPE) {
                                retval = get_gpt_last_partition_end(fd, endofpart);
                                break;
                        }
                        /* check the last used lba for the current partition  */
-                       curr_part_end = __le32_to_cpu(part->first_sect_lba) +
-                               __le32_to_cpu(part->blocks_num);
+                       curr_part_end =
+                               __le32_to_cpu(boot_sect.parts[part_nr].first_sect_lba) +
+                               __le32_to_cpu(boot_sect.parts[part_nr].blocks_num);
                        if (curr_part_end > *endofpart)
                                *endofpart = curr_part_end;
-
-                       part++;
                }
        } else {
                /* Unknown partition table */
                retval = -1;
        }
+       /* calculate number of 512-byte blocks */
+       if (get_dev_sector_size(fd, NULL, &sector_size))
+               *endofpart *= (sector_size / 512);
  abort:
        return retval;
 }
@@ -1333,9 +1495,8 @@ int check_partitions(int fd, char *dname, unsigned long long freesize,
         * Check where the last partition ends
         */
        unsigned long long endofpart;
-       int ret;
 
-       if ((ret = get_last_partition_end(fd, &endofpart)) > 0) {
+       if (get_last_partition_end(fd, &endofpart) > 0) {
                /* There appears to be a partition table here */
                if (freesize == 0) {
                        /* partitions will not be visible in new device */
@@ -1640,6 +1801,36 @@ int remove_disk(int mdfd, struct supertype *st,
        return rv;
 }
 
+int hot_remove_disk(int mdfd, unsigned long dev, int force)
+{
+       int cnt = force ? 500 : 5;
+       int ret;
+
+       /* HOT_REMOVE_DISK can fail with EBUSY if there are
+        * outstanding IO requests to the device.
+        * In this case, it can be helpful to wait a little while,
+        * up to 5 seconds if 'force' is set, or 50 msec if not.
+        */
+       while ((ret = ioctl(mdfd, HOT_REMOVE_DISK, dev)) == -1 &&
+              errno == EBUSY &&
+              cnt-- > 0)
+               usleep(10000);
+
+       return ret;
+}
+
+int sys_hot_remove_disk(int statefd, int force)
+{
+       int cnt = force ? 500 : 5;
+       int ret;
+
+       while ((ret = write(statefd, "remove", 6)) == -1 &&
+              errno == EBUSY &&
+              cnt-- > 0)
+               usleep(10000);
+       return ret == 6 ? 0 : -1;
+}
+
 int set_array_info(int mdfd, struct supertype *st, struct mdinfo *info)
 {
        /* Initialise kernel's knowledge of array.
@@ -1814,6 +2005,27 @@ __u32 random32(void)
        return rv;
 }
 
+void random_uuid(__u8 *buf)
+{
+       int fd, i, len;
+       __u32 r[4];
+
+       fd = open("/dev/urandom", O_RDONLY);
+       if (fd < 0)
+               goto use_random;
+       len = read(fd, buf, 16);
+       close(fd);
+       if (len != 16)
+               goto use_random;
+
+       return;
+
+use_random:
+       for (i = 0; i < 4; i++)
+               r[i] = random();
+       memcpy(buf, r, 16);
+}
+
 #ifndef MDASSEMBLE
 int flush_metadata_updates(struct supertype *st)
 {
@@ -1995,41 +2207,42 @@ void reopen_mddev(int mdfd)
        if (fd >= 0 && fd != mdfd)
                dup2(fd, mdfd);
 }
-#ifndef MDASSEMBLE
-int get_cluster_name(char **cluster_name)
-{
-        void *lib_handle = NULL;
-        int rv = -1;
 
-        cmap_handle_t handle;
-        static int (*initialize)(cmap_handle_t *handle);
-        static int (*get_string)(cmap_handle_t handle,
-                                const char *string,
-                                char **name);
-        static int (*finalize)(cmap_handle_t handle);
+#ifndef MDASSEMBLE
+static struct cmap_hooks *cmap_hooks = NULL;
+static int is_cmap_hooks_ready = 0;
 
+void set_cmap_hooks(void)
+{
+       cmap_hooks = xmalloc(sizeof(struct cmap_hooks));
+       cmap_hooks->cmap_handle = dlopen("libcmap.so.4", RTLD_NOW | RTLD_LOCAL);
+       if (!cmap_hooks->cmap_handle)
+               return;
 
-        lib_handle = dlopen("libcmap.so.4", RTLD_NOW | RTLD_LOCAL);
-        if (!lib_handle)
-                return rv;
+       cmap_hooks->initialize = dlsym(cmap_hooks->cmap_handle, "cmap_initialize");
+       cmap_hooks->get_string = dlsym(cmap_hooks->cmap_handle, "cmap_get_string");
+       cmap_hooks->finalize = dlsym(cmap_hooks->cmap_handle, "cmap_finalize");
 
-        initialize = dlsym(lib_handle, "cmap_initialize");
-        if (!initialize)
-                goto out;
+       if (!cmap_hooks->initialize || !cmap_hooks->get_string ||
+           !cmap_hooks->finalize)
+               dlclose(cmap_hooks->cmap_handle);
+       else
+               is_cmap_hooks_ready = 1;
+}
 
-        get_string = dlsym(lib_handle, "cmap_get_string");
-        if (!get_string)
-                goto out;
+int get_cluster_name(char **cluster_name)
+{
+        int rv = -1;
+       cmap_handle_t handle;
 
-        finalize = dlsym(lib_handle, "cmap_finalize");
-        if (!finalize)
-                goto out;
+       if (!is_cmap_hooks_ready)
+               return rv;
 
-        rv = initialize(&handle);
+        rv = cmap_hooks->initialize(&handle);
         if (rv != CS_OK)
                 goto out;
 
-        rv = get_string(handle, "totem.cluster_name", cluster_name);
+        rv = cmap_hooks->get_string(handle, "totem.cluster_name", cluster_name);
         if (rv != CS_OK) {
                 free(*cluster_name);
                 rv = -1;
@@ -2038,9 +2251,36 @@ int get_cluster_name(char **cluster_name)
 
         rv = 0;
 name_err:
-        finalize(handle);
+        cmap_hooks->finalize(handle);
 out:
-        dlclose(lib_handle);
         return rv;
 }
+
+void set_dlm_hooks(void)
+{
+       dlm_hooks = xmalloc(sizeof(struct dlm_hooks));
+       dlm_hooks->dlm_handle = dlopen("libdlm_lt.so.3", RTLD_NOW | RTLD_LOCAL);
+       if (!dlm_hooks->dlm_handle)
+               return;
+
+       dlm_hooks->create_lockspace = dlsym(dlm_hooks->dlm_handle, "dlm_create_lockspace");
+       dlm_hooks->release_lockspace = dlsym(dlm_hooks->dlm_handle, "dlm_release_lockspace");
+       dlm_hooks->ls_lock = dlsym(dlm_hooks->dlm_handle, "dlm_ls_lock");
+       dlm_hooks->ls_unlock = dlsym(dlm_hooks->dlm_handle, "dlm_ls_unlock");
+       dlm_hooks->ls_get_fd = dlsym(dlm_hooks->dlm_handle, "dlm_ls_get_fd");
+       dlm_hooks->dispatch = dlsym(dlm_hooks->dlm_handle, "dlm_dispatch");
+
+       if (!dlm_hooks->create_lockspace || !dlm_hooks->ls_lock ||
+           !dlm_hooks->ls_unlock || !dlm_hooks->release_lockspace ||
+           !dlm_hooks->ls_get_fd || !dlm_hooks->dispatch)
+               dlclose(dlm_hooks->dlm_handle);
+       else
+               is_dlm_hooks_ready = 1;
+}
+
+void set_hooks(void)
+{
+       set_dlm_hooks();
+       set_cmap_hooks();
+}
 #endif