]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - util.c
Show all bitmaps while examining bitmap
[thirdparty/mdadm.git] / util.c
diff --git a/util.c b/util.c
index 7da69ac1c7569b30c826ba9ff3c82456ca641b08..9ec4aefde8c1fb21ccf4d5ec8fe272cbefaf0a72 100644 (file)
--- a/util.c
+++ b/util.c
@@ -1,7 +1,7 @@
 /*
  * mdadm - manage Linux "md" devices aka RAID arrays.
  *
- * Copyright (C) 2001-2012 Neil Brown <neilb@suse.de>
+ * Copyright (C) 2001-2013 Neil Brown <neilb@suse.de>
  *
  *
  *    This program is free software; you can redistribute it and/or modify
 #include       <sys/wait.h>
 #include       <sys/un.h>
 #include       <sys/resource.h>
+#include       <sys/vfs.h>
+#include       <linux/magic.h>
 #include       <ctype.h>
 #include       <dirent.h>
 #include       <signal.h>
+#include       <dlfcn.h>
+#include       <stdint.h>
+#ifdef NO_COROSYNC
+ typedef uint64_t cmap_handle_t;
+ #define CS_OK 1
+#else
+ #include      <corosync/cmap.h>
+#endif
+
 
 /*
  * following taken from linux/blkpg.h because they aren't
@@ -305,7 +316,7 @@ int test_partition(int fd)
        if (ioctl(fd, BLKPG, &a) == 0)
                /* Very unlikely, but not a partition */
                return 0;
-       if (errno == ENXIO)
+       if (errno == ENXIO || errno == ENOTTY)
                /* not a partition */
                return 0;
 
@@ -366,6 +377,13 @@ int enough(int level, int raid_disks, int layout, int clean, char *avail)
        case 1:
                return avail_disks >= 1;
        case 4:
+               if (avail_disks == raid_disks - 1 &&
+                   !avail[raid_disks - 1])
+                       /* If just the parity device is missing, then we
+                        * have enough, even if not clean
+                        */
+                       return 1;
+               /* FALL THROUGH */
        case 5:
                if (clean)
                        return avail_disks >= raid_disks-1;
@@ -385,7 +403,6 @@ int enough_fd(int fd)
 {
        struct mdu_array_info_s array;
        struct mdu_disk_info_s disk;
-       int avail_disks = 0;
        int i, rv;
        char *avail;
 
@@ -405,7 +422,6 @@ int enough_fd(int fd)
                        continue;
                if (disk.raid_disk < 0 || disk.raid_disk >= array.raid_disks)
                        continue;
-               avail_disks++;
                avail[disk.raid_disk] = 1;
        }
        /* This is used on an active array, so assume it is clean */
@@ -509,7 +525,8 @@ int check_ext2(int fd, char *name)
         */
        unsigned char sb[1024];
        time_t mtime;
-       int size, bsize;
+       unsigned long long size;
+       int bsize;
        if (lseek(fd, 1024,0)!= 1024)
                return 0;
        if (read(fd, sb, 1024)!= 1024)
@@ -520,10 +537,10 @@ int check_ext2(int fd, char *name)
        mtime = sb[44]|(sb[45]|(sb[46]|sb[47]<<8)<<8)<<8;
        bsize = sb[24]|(sb[25]|(sb[26]|sb[27]<<8)<<8)<<8;
        size = sb[4]|(sb[5]|(sb[6]|sb[7]<<8)<<8)<<8;
+       size <<= bsize;
        pr_err("%s appears to contain an ext2fs file system\n",
                name);
-       cont_err("size=%dK  mtime=%s",
-               size*(1<<bsize), ctime(&mtime));
+       cont_err("size=%lluK  mtime=%s", size, ctime(&mtime));
        return 1;
 }
 
@@ -536,7 +553,7 @@ int check_reiser(int fd, char *name)
         *
         */
        unsigned char sb[1024];
-       unsigned long size;
+       unsigned long long size;
        if (lseek(fd, 64*1024, 0) != 64*1024)
                return 0;
        if (read(fd, sb, 1024) != 1024)
@@ -546,7 +563,7 @@ int check_reiser(int fd, char *name)
                return 0;
        pr_err("%s appears to contain a reiserfs file system\n",name);
        size = sb[0]|(sb[1]|(sb[2]|sb[3]<<8)<<8)<<8;
-       cont_err("size = %luK\n", size*4);
+       cont_err("size = %lluK\n", size*4);
 
        return 1;
 }
@@ -670,13 +687,13 @@ char *human_size(long long bytes)
        if (bytes < 5000*1024)
                buf[0] = 0;
        else if (bytes < 2*1024LL*1024LL*1024LL) {
-               long cMiB = (bytes / ( (1LL<<20) / 200LL ) +1) /2;
+               long cMiB = (bytes * 200LL / (1LL<<20) + 1) / 2;
                long cMB  = (bytes / ( 1000000LL / 200LL ) +1) /2;
                snprintf(buf, sizeof(buf), " (%ld.%02ld MiB %ld.%02ld MB)",
                        cMiB/100 , cMiB % 100,
                        cMB/100, cMB % 100);
        } else {
-               long cGiB = (bytes / ( (1LL<<30) / 200LL ) +1) /2;
+               long cGiB = (bytes * 200LL / (1LL<<30) +1) / 2;
                long cGB  = (bytes / (1000000000LL/200LL ) +1) /2;
                snprintf(buf, sizeof(buf), " (%ld.%02ld GiB %ld.%02ld GB)",
                        cGiB/100 , cGiB % 100,
@@ -705,11 +722,11 @@ char *human_size_brief(long long bytes, int prefix)
                buf[0] = 0;
        else if (prefix == IEC) {
                if (bytes < 2*1024LL*1024LL*1024LL) {
-                       long cMiB = (bytes / ( (1LL<<20) / 200LL ) +1) /2;
+                       long cMiB = (bytes * 200LL / (1LL<<20) +1) /2;
                        snprintf(buf, sizeof(buf), "%ld.%02ldMiB",
                                cMiB/100 , cMiB % 100);
                } else {
-                       long cGiB = (bytes / ( (1LL<<30) / 200LL ) +1) /2;
+                       long cGiB = (bytes * 200LL / (1LL<<30) +1) /2;
                        snprintf(buf, sizeof(buf), "%ld.%02ldGiB",
                                        cGiB/100 , cGiB % 100);
                }
@@ -778,7 +795,6 @@ int get_data_disks(int level, int layout, int raid_disks)
        return data_disks;
 }
 
-
 int devnm2devid(char *devnm)
 {
        /* First look in /sys/block/$DEVNM/dev for %d:%d
@@ -869,47 +885,22 @@ void put_md_name(char *name)
        if (strncmp(name, "/dev/.tmp.md", 12) == 0)
                unlink(name);
 }
+#endif /* !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO) */
 
-char *find_free_devnm(int use_partitions)
+int get_maj_min(char *dev, int *major, int *minor)
 {
-       static char devnm[32];
-       int devnum;
-       for (devnum = 127; devnum != 128;
-            devnum = devnum ? devnum-1 : (1<<20)-1) {
-
-               if (use_partitions)
-                       sprintf(devnm, "md_d%d", devnum);
-               else
-                       sprintf(devnm, "md%d", devnum);
-               if (mddev_busy(devnm))
-                       continue;
-               if (!conf_name_is_free(devnm))
-                       continue;
-               if (!use_udev()) {
-                       /* make sure it is new to /dev too, at least as a
-                        * non-standard */
-                       int devid = devnm2devid(devnm);
-                       if (devid) {
-                               char *dn = map_dev(major(devid),
-                                                  minor(devid), 0);
-                               if (dn && ! is_standard(dn, NULL))
-                                       continue;
-                       }
-               }
-               break;
-       }
-       if (devnum == 128)
-               return NULL;
-       return devnm;
+       char *e;
+       *major = strtoul(dev, &e, 0);
+       return (e > dev && *e == ':' && e[1] &&
+               (*minor = strtoul(e+1, &e, 0)) >= 0 &&
+               *e == 0);
 }
-#endif /* !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO) */
 
 int dev_open(char *dev, int flags)
 {
        /* like 'open', but if 'dev' matches %d:%d, create a temp
         * block device and open that
         */
-       char *e;
        int fd = -1;
        char devname[32];
        int major;
@@ -918,10 +909,7 @@ int dev_open(char *dev, int flags)
        if (!dev) return -1;
        flags |= O_DIRECT;
 
-       major = strtoul(dev, &e, 0);
-       if (e > dev && *e == ':' && e[1] &&
-           (minor = strtoul(e+1, &e, 0)) >= 0 &&
-           *e == 0) {
+       if (get_maj_min(dev, &major, &minor)) {
                snprintf(devname, sizeof(devname), "/dev/.tmp.md.%d:%d:%d",
                         (int)getpid(), major, minor);
                if (mknod(devname, S_IFBLK|0600, makedev(major, minor)) == 0) {
@@ -963,6 +951,7 @@ int open_dev_excl(char *devnm)
        int i;
        int flags = O_RDWR;
        int devid = devnm2devid(devnm);
+       long delay = 1000;
 
        sprintf(buf, "%d:%d", major(devid), minor(devid));
        for (i = 0 ; i < 25 ; i++) {
@@ -975,7 +964,9 @@ int open_dev_excl(char *devnm)
                }
                if (errno != EBUSY)
                        return fd;
-               usleep(200000);
+               usleep(delay);
+               if (delay < 200000)
+                       delay *= 2;
        }
        return -1;
 }
@@ -998,6 +989,7 @@ void wait_for(char *dev, int fd)
 {
        int i;
        struct stat stb_want;
+       long delay = 1000;
 
        if (fstat(fd, &stb_want) != 0 ||
            (stb_want.st_mode & S_IFMT) != S_IFBLK)
@@ -1009,10 +1001,12 @@ void wait_for(char *dev, int fd)
                    (stb.st_mode & S_IFMT) == S_IFBLK &&
                    (stb.st_rdev == stb_want.st_rdev))
                        return;
-               usleep(200000);
+               usleep(delay);
+               if (delay < 200000)
+                       delay *= 2;
        }
        if (i == 25)
-               dprintf("%s: timeout waiting for %s\n", __func__, dev);
+               dprintf("timeout waiting for %s\n", dev);
 }
 
 struct superswitch *superlist[] =
@@ -1119,6 +1113,7 @@ struct supertype *dup_super(struct supertype *orig)
        st->max_devs = orig->max_devs;
        st->minor_version = orig->minor_version;
        st->ignore_hw_compat = orig->ignore_hw_compat;
+       st->data_offset = orig->data_offset;
        st->sb = NULL;
        st->info = NULL;
        return st;
@@ -1714,8 +1709,8 @@ int start_mdmon(char *devnm)
        char pathbuf[1024];
        char *paths[4] = {
                pathbuf,
-               "/sbin/mdmon",
-               "mdmon",
+               BINDIR "/mdmon",
+               "./mdmon",
                NULL
        };
 
@@ -1736,36 +1731,38 @@ int start_mdmon(char *devnm)
                pathbuf[0] = '\0';
 
        /* First try to run systemctl */
-       switch(fork()) {
-       case 0:
-               /* FIXME yuk. CLOSE_EXEC?? */
-               skipped = 0;
-               for (i = 3; skipped < 20; i++)
-                       if (close(i) < 0)
-                               skipped++;
-                       else
-                               skipped = 0;
-
-               /* Don't want to see error messages from systemctl.
-                * If the service doesn't exist, we start mdmon ourselves.
-                */
-               close(2);
-               open("/dev/null", O_WRONLY);
-               snprintf(pathbuf, sizeof(pathbuf), "mdmon@%s.service",
-                        devnm);
-               status = execl("/usr/bin/systemctl", "systemctl", "start",
-                              pathbuf, NULL);
-               status = execl("/bin/systemctl", "systemctl", "start",
-                              pathbuf, NULL);
-               exit(1);
-       case -1: pr_err("cannot run mdmon. "
-                        "Array remains readonly\n");
-               return -1;
-       default: /* parent - good */
-               pid = wait(&status);
-               if (pid >= 0 && status == 0)
-                       return 0;
-       }
+       if (!check_env("MDADM_NO_SYSTEMCTL"))
+               switch(fork()) {
+               case 0:
+                       /* FIXME yuk. CLOSE_EXEC?? */
+                       skipped = 0;
+                       for (i = 3; skipped < 20; i++)
+                               if (close(i) < 0)
+                                       skipped++;
+                               else
+                                       skipped = 0;
+
+                       /* Don't want to see error messages from
+                        * systemctl.  If the service doesn't exist,
+                        * we start mdmon ourselves.
+                        */
+                       close(2);
+                       open("/dev/null", O_WRONLY);
+                       snprintf(pathbuf, sizeof(pathbuf), "mdmon@%s.service",
+                                devnm);
+                       status = execl("/usr/bin/systemctl", "systemctl",
+                                      "start",
+                                      pathbuf, NULL);
+                       status = execl("/bin/systemctl", "systemctl", "start",
+                                      pathbuf, NULL);
+                       exit(1);
+               case -1: pr_err("cannot run mdmon. Array remains readonly\n");
+                       return -1;
+               default: /* parent - good */
+                       pid = wait(&status);
+                       if (pid >= 0 && status == 0)
+                               return 0;
+               }
 
        /* That failed, try running mdmon directly */
        switch(fork()) {
@@ -1780,18 +1777,16 @@ int start_mdmon(char *devnm)
 
                for (i = 0; paths[i]; i++)
                        if (paths[i][0]) {
-                               execl(paths[i], "mdmon",
+                               execl(paths[i], paths[i],
                                      devnm, NULL);
                        }
                exit(1);
-       case -1: pr_err("cannot run mdmon. "
-                        "Array remains readonly\n");
+       case -1: pr_err("cannot run mdmon. Array remains readonly\n");
                return -1;
        default: /* parent - good */
                pid = wait(&status);
                if (pid < 0 || status != 0) {
-                       pr_err("failed to launch mdmon. "
-                              "Array remains readonly\n");
+                       pr_err("failed to launch mdmon. Array remains readonly\n");
                        return -1;
                }
        }
@@ -1863,8 +1858,7 @@ int experimental(void)
        if (check_env("MDADM_EXPERIMENTAL"))
                return 1;
        else {
-               pr_err("To use this feature MDADM_EXPERIMENTAL"
-                               " environment variable has to be defined.\n");
+               pr_err("To use this feature MDADM_EXPERIMENTAL environment variable has to be defined.\n");
                return 0;
        }
 }
@@ -1964,3 +1958,78 @@ void enable_fds(int devices)
        lim.rlim_cur = fds;
        setrlimit(RLIMIT_NOFILE, &lim);
 }
+
+int in_initrd(void)
+{
+       /* This is based on similar function in systemd. */
+       struct statfs s;
+       /* statfs.f_type is signed long on s390x and MIPS, causing all
+          sorts of sign extension problems with RAMFS_MAGIC being
+          defined as 0x858458f6 */
+       return  statfs("/", &s) >= 0 &&
+               ((unsigned long)s.f_type == TMPFS_MAGIC ||
+                ((unsigned long)s.f_type & 0xFFFFFFFFUL) ==
+                ((unsigned long)RAMFS_MAGIC & 0xFFFFFFFFUL));
+}
+
+void reopen_mddev(int mdfd)
+{
+       /* Re-open without any O_EXCL, but keep
+        * the same fd
+        */
+       char *devnm;
+       int fd;
+       devnm = fd2devnm(mdfd);
+       close(mdfd);
+       fd = open_dev(devnm);
+       if (fd >= 0 && fd != mdfd)
+               dup2(fd, mdfd);
+}
+
+int get_cluster_name(char **cluster_name)
+{
+        void *lib_handle = NULL;
+        int rv = -1;
+
+        cmap_handle_t handle;
+        static int (*initialize)(cmap_handle_t *handle);
+        static int (*get_string)(cmap_handle_t handle,
+                                const char *string,
+                                char **name);
+        static int (*finalize)(cmap_handle_t handle);
+
+
+        lib_handle = dlopen("libcmap.so.4", RTLD_NOW | RTLD_LOCAL);
+        if (!lib_handle)
+                return rv;
+
+        initialize = dlsym(lib_handle, "cmap_initialize");
+        if (!initialize)
+                goto out;
+
+        get_string = dlsym(lib_handle, "cmap_get_string");
+        if (!get_string)
+                goto out;
+
+        finalize = dlsym(lib_handle, "cmap_finalize");
+        if (!finalize)
+                goto out;
+
+        rv = initialize(&handle);
+        if (rv != CS_OK)
+                goto out;
+
+        rv = get_string(handle, "totem.cluster_name", cluster_name);
+        if (rv != CS_OK) {
+                free(*cluster_name);
+                rv = -1;
+                goto name_err;
+        }
+
+        rv = 0;
+name_err:
+        finalize(handle);
+out:
+        dlclose(lib_handle);
+        return rv;
+}