]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - super-ddf.c
Release 3.2.6 - stability release
[thirdparty/mdadm.git] / super-ddf.c
index 8c5456834ff7c294f0d9c66580dbf10e827fdca1..2b0dabf4cbbebd86b805907e2804ec8de2b3892a 100644 (file)
@@ -1336,18 +1336,21 @@ static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *m
 {
        struct ddf_super *ddf = st->sb;
        int map_disks = info->array.raid_disks;
+       __u32 *cptr;
 
        if (ddf->currentconf) {
                getinfo_super_ddf_bvd(st, info, map);
                return;
        }
+       memset(info, 0, sizeof(*info));
 
        info->array.raid_disks    = __be16_to_cpu(ddf->phys->used_pdes);
        info->array.level         = LEVEL_CONTAINER;
        info->array.layout        = 0;
        info->array.md_minor      = -1;
-       info->array.ctime         = DECADE + __be32_to_cpu(*(__u32*)
-                                                        (ddf->anchor.guid+16));
+       cptr = (__u32 *)(ddf->anchor.guid + 16);
+       info->array.ctime         = DECADE + __be32_to_cpu(*cptr);
+
        info->array.utime         = 0;
        info->array.chunk_size    = 0;
        info->container_enough    = 1;
@@ -1373,6 +1376,7 @@ static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *m
 
        info->recovery_start = MaxSector;
        info->reshape_active = 0;
+       info->recovery_blocked = 0;
        info->name[0] = 0;
 
        info->array.major_version = -1;
@@ -1405,7 +1409,9 @@ static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, cha
        int j;
        struct dl *dl;
        int map_disks = info->array.raid_disks;
+       __u32 *cptr;
 
+       memset(info, 0, sizeof(*info));
        /* FIXME this returns BVD info - what if we want SVD ?? */
 
        info->array.raid_disks    = __be16_to_cpu(vc->conf.prim_elmnt_count);
@@ -1413,8 +1419,8 @@ static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, cha
        info->array.layout        = rlq_to_layout(vc->conf.rlq, vc->conf.prl,
                                                  info->array.raid_disks);
        info->array.md_minor      = -1;
-       info->array.ctime         = DECADE +
-               __be32_to_cpu(*(__u32*)(vc->conf.guid+16));
+       cptr = (__u32 *)(vc->conf.guid + 16);
+       info->array.ctime         = DECADE + __be32_to_cpu(*cptr);
        info->array.utime         = DECADE + __be32_to_cpu(vc->conf.timestamp);
        info->array.chunk_size    = 512 << vc->conf.chunk_shift;
        info->custom_array_size   = 0;
@@ -1428,23 +1434,26 @@ static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, cha
        }
 
        for (dl = ddf->dlist; dl ; dl = dl->next)
-               if (dl->raiddisk == info->disk.raid_disk)
+               if (dl->raiddisk == ddf->currentdev)
                        break;
+
        info->disk.major = 0;
        info->disk.minor = 0;
+       info->disk.state = 0;
        if (dl) {
                info->disk.major = dl->major;
                info->disk.minor = dl->minor;
+               info->disk.raid_disk = dl->raiddisk;
+               info->disk.number = dl->pdnum;
+               info->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
        }
-//     info->disk.number = __be32_to_cpu(ddf->disk.refnum);
-//     info->disk.raid_disk = find refnum in the table and use index;
-//     info->disk.state = ???;
 
        info->container_member = ddf->currentconf->vcnum;
 
        info->recovery_start = MaxSector;
        info->resync_start = 0;
        info->reshape_active = 0;
+       info->recovery_blocked = 0;
        if (!(ddf->virt->entries[info->container_member].state
              & DDF_state_inconsistent)  &&
            (ddf->virt->entries[info->container_member].init_state
@@ -2186,6 +2195,7 @@ static int add_to_super_ddf(struct supertype *st,
        struct phys_disk_entry *pde;
        unsigned int n, i;
        struct stat stb;
+       __u32 *tptr;
 
        if (ddf->currentconf) {
                add_to_super_ddf_bvd(st, dk, fd, devname);
@@ -2214,8 +2224,9 @@ static int add_to_super_ddf(struct supertype *st,
        tm = localtime(&now);
        sprintf(dd->disk.guid, "%8s%04d%02d%02d",
                T10, tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday);
-       *(__u32*)(dd->disk.guid + 16) = random32();
-       *(__u32*)(dd->disk.guid + 20) = random32();
+       tptr = (__u32 *)(dd->disk.guid + 16);
+       *tptr++ = random32();
+       *tptr = random32();
 
        do {
                /* Cannot be bothered finding a CRC of some irrelevant details*/
@@ -2276,13 +2287,46 @@ static int add_to_super_ddf(struct supertype *st,
        return 0;
 }
 
+static int remove_from_super_ddf(struct supertype *st, mdu_disk_info_t *dk)
+{
+       struct ddf_super *ddf = st->sb;
+       struct dl *dl;
+
+       /* mdmon has noticed that this disk (dk->major/dk->minor) has
+        * disappeared from the container.
+        * We need to arrange that it disappears from the metadata and
+        * internal data structures too.
+        * Most of the work is done by ddf_process_update which edits
+        * the metadata and closes the file handle and attaches the memory
+        * where free_updates will free it.
+        */
+       for (dl = ddf->dlist; dl ; dl = dl->next)
+               if (dl->major == dk->major &&
+                   dl->minor == dk->minor)
+                       break;
+       if (!dl)
+               return -1;
+
+       if (st->update_tail) {
+               int len = (sizeof(struct phys_disk) +
+                          sizeof(struct phys_disk_entry));
+               struct phys_disk *pd;
+
+               pd = malloc(len);
+               pd->magic = DDF_PHYS_RECORDS_MAGIC;
+               pd->used_pdes = __cpu_to_be16(dl->pdnum);
+               pd->entries[0].state = __cpu_to_be16(DDF_Missing);
+               append_metadata_update(st, pd, len);
+       }
+       return 0;
+}
+
 /*
  * This is the write_init_super method for a ddf container.  It is
  * called when creating a container or adding another device to a
  * container.
  */
-
-static unsigned char null_conf[4096+512];
+#define NULL_CONF_SZ   4096
 
 static int __write_init_super_ddf(struct supertype *st)
 {
@@ -2295,6 +2339,12 @@ static int __write_init_super_ddf(struct supertype *st)
        int attempts = 0;
        int successes = 0;
        unsigned long long size, sector;
+       char *null_aligned;
+
+       if (posix_memalign((void**)&null_aligned, 4096, NULL_CONF_SZ) != 0) {
+               return -ENOMEM;
+       }
+       memset(null_aligned, 0xff, NULL_CONF_SZ);
 
        /* try to write updated metadata,
         * if we catch a failure move on to the next disk
@@ -2364,14 +2414,11 @@ static int __write_init_super_ddf(struct supertype *st)
                                if (write(fd, &c->conf, conf_size) < 0)
                                        break;
                        } else {
-                               char *null_aligned = (char*)((((unsigned long)null_conf)+511)&~511UL);
-                               if (null_conf[0] != 0xff)
-                                       memset(null_conf, 0xff, sizeof(null_conf));
                                unsigned int togo = conf_size;
-                               while (togo > sizeof(null_conf)-512) {
-                                       if (write(fd, null_aligned, sizeof(null_conf)-512) < 0)
+                               while (togo > NULL_CONF_SZ) {
+                                       if (write(fd, null_aligned, NULL_CONF_SZ) < 0)
                                                break;
-                                       togo -= sizeof(null_conf)-512;
+                                       togo -= NULL_CONF_SZ;
                                }
                                if (write(fd, null_aligned, togo) < 0)
                                        break;
@@ -2390,6 +2437,7 @@ static int __write_init_super_ddf(struct supertype *st)
                        continue;
                successes++;
        }
+       free(null_aligned);
 
        return attempts != successes;
 }
@@ -2517,7 +2565,7 @@ static int reserve_space(struct supertype *st, int raiddisks,
                                continue;
                        /* This is bigger than 'size', see if there are enough */
                        cnt = 0;
-                       for (dl2 = dl; dl2 ; dl2=dl2->next)
+                       for (dl2 = ddf->dlist; dl2 ; dl2=dl2->next)
                                if (dl2->esize >= dl->esize)
                                        cnt++;
                        if (cnt >= raiddisks)
@@ -2581,7 +2629,7 @@ static int validate_geometry_ddf(struct supertype *st,
        if (chunk && *chunk == UnSet)
                *chunk = DEFAULT_CHUNK;
 
-
+       if (level == -1000000) level = LEVEL_CONTAINER;
        if (level == LEVEL_CONTAINER) {
                /* Must be a fresh device to add to a container */
                return validate_geometry_ddf_container(st, level, layout,
@@ -2927,6 +2975,7 @@ static struct mdinfo *container_content_ddf(struct supertype *st, char *subarray
                unsigned int j;
                struct mdinfo *this;
                char *ep;
+               __u32 *cptr;
 
                if (subarray &&
                    (strtoul(subarray, &ep, 10) != vc->vcnum ||
@@ -2946,8 +2995,8 @@ static struct mdinfo *container_content_ddf(struct supertype *st, char *subarray
                this->array.md_minor      = -1;
                this->array.major_version = -1;
                this->array.minor_version = -2;
-               this->array.ctime         = DECADE +
-                       __be32_to_cpu(*(__u32*)(vc->conf.guid+16));
+               cptr = (__u32 *)(vc->conf.guid + 16);
+               this->array.ctime         = DECADE + __be32_to_cpu(*cptr);
                this->array.utime         = DECADE +
                        __be32_to_cpu(vc->conf.timestamp);
                this->array.chunk_size    = 512 << vc->conf.chunk_shift;
@@ -2985,23 +3034,33 @@ static struct mdinfo *container_content_ddf(struct supertype *st, char *subarray
                        struct mdinfo *dev;
                        struct dl *d;
                        int stt;
+                       int pd;
 
                        if (vc->conf.phys_refnum[i] == 0xFFFFFFFF)
                                continue;
 
-                       for (d = ddf->dlist; d ; d=d->next)
-                               if (d->disk.refnum == vc->conf.phys_refnum[i])
+                       for (pd = __be16_to_cpu(ddf->phys->used_pdes);
+                            pd--;)
+                               if (ddf->phys->entries[pd].refnum
+                                   == vc->conf.phys_refnum[i])
                                        break;
-                       if (d == NULL)
-                               /* Haven't found that one yet, maybe there are others */
+                       if (pd < 0)
                                continue;
-                       stt = __be16_to_cpu(ddf->phys->entries[d->pdnum].state);
+
+                       stt = __be16_to_cpu(ddf->phys->entries[pd].state);
                        if ((stt & (DDF_Online|DDF_Failed|DDF_Rebuilding))
                            != DDF_Online)
                                continue;
 
                        this->array.working_disks++;
 
+                       for (d = ddf->dlist; d ; d=d->next)
+                               if (d->disk.refnum == vc->conf.phys_refnum[i])
+                                       break;
+                       if (d == NULL)
+                               /* Haven't found that one yet, maybe there are others */
+                               continue;
+
                        dev = malloc(sizeof(*dev));
                        memset(dev, 0, sizeof(*dev));
                        dev->next = this->devs;
@@ -3177,7 +3236,8 @@ static void ddf_set_disk(struct active_array *a, int n, int state)
 
        /* and find the 'dl' entry corresponding to that. */
        for (dl = ddf->dlist; dl; dl = dl->next)
-               if (mdi->disk.major == dl->major &&
+               if (mdi->state_fd >= 0 &&
+                   mdi->disk.major == dl->major &&
                    mdi->disk.minor == dl->minor)
                        break;
        if (!dl)
@@ -3239,6 +3299,8 @@ static void ddf_set_disk(struct active_array *a, int n, int state)
        case DDF_RAID1:
                if (working == 0)
                        state = DDF_state_failed;
+               else if (working == 2 && state == DDF_state_degraded)
+                       state = DDF_state_part_optimal;
                break;
        case DDF_RAID4:
        case DDF_RAID5:
@@ -3291,8 +3353,8 @@ static void ddf_process_update(struct supertype *st,
         * our actions.
         * Possible update are:
         *  DDF_PHYS_RECORDS_MAGIC
-        *    Add a new physical device.  Changes to this record
-        *    only happen implicitly.
+        *    Add a new physical device or remove an old one.
+        *    Changes to this record only happen implicitly.
         *    used_pdes is the device number.
         *  DDF_VIRT_RECORDS_MAGIC
         *    Add a new VD.  Possibly also change the 'access' bits.
@@ -3322,6 +3384,7 @@ static void ddf_process_update(struct supertype *st,
        struct dl *dl;
        unsigned int mppe;
        unsigned int ent;
+       unsigned int pdnum, pd2;
 
        dprintf("Process update %x\n", *magic);
 
@@ -3336,6 +3399,25 @@ static void ddf_process_update(struct supertype *st,
                ent = __be16_to_cpu(pd->used_pdes);
                if (ent >= __be16_to_cpu(ddf->phys->max_pdes))
                        return;
+               if (pd->entries[0].state & __cpu_to_be16(DDF_Missing)) {
+                       struct dl **dlp;
+                       /* removing this disk. */
+                       ddf->phys->entries[ent].state |= __cpu_to_be16(DDF_Missing);
+                       for (dlp = &ddf->dlist; *dlp; dlp = &(*dlp)->next) {
+                               struct dl *dl = *dlp;
+                               if (dl->pdnum == (signed)ent) {
+                                       close(dl->fd);
+                                       dl->fd = -1;
+                                       /* FIXME this doesn't free
+                                        * dl->devname */
+                                       update->space = dl;
+                                       *dlp = dl->next;
+                                       break;
+                               }
+                       }
+                       ddf->updates_pending = 1;
+                       return;
+               }
                if (!all_ff(ddf->phys->entries[ent].guid))
                        return;
                ddf->phys->entries[ent] = pd->entries[0];
@@ -3413,17 +3495,38 @@ static void ddf_process_update(struct supertype *st,
                                }
                        ddf->conflist = vcl;
                }
+               /* Set DDF_Transition on all Failed devices - to help
+                * us detect those that are no longer in use
+                */
+               for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
+                       if (ddf->phys->entries[pdnum].state
+                           & __be16_to_cpu(DDF_Failed))
+                               ddf->phys->entries[pdnum].state
+                                       |= __be16_to_cpu(DDF_Transition);
                /* Now make sure vlist is correct for each dl. */
                for (dl = ddf->dlist; dl; dl = dl->next) {
                        unsigned int dn;
                        unsigned int vn = 0;
+                       int in_degraded = 0;
                        for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
                                for (dn=0; dn < ddf->mppe ; dn++)
                                        if (vcl->conf.phys_refnum[dn] ==
                                            dl->disk.refnum) {
+                                               int vstate;
                                                dprintf("dev %d has %p at %d\n",
                                                        dl->pdnum, vcl, vn);
+                                               /* Clear the Transition flag */
+                                               if (ddf->phys->entries[dl->pdnum].state
+                                                   & __be16_to_cpu(DDF_Failed))
+                                                       ddf->phys->entries[dl->pdnum].state &=
+                                                               ~__be16_to_cpu(DDF_Transition);
+
                                                dl->vlist[vn++] = vcl;
+                                               vstate = ddf->virt->entries[vcl->vcnum].state
+                                                       & DDF_state_mask;
+                                               if (vstate == DDF_state_degraded ||
+                                                   vstate == DDF_state_part_optimal)
+                                                       in_degraded = 1;
                                                break;
                                        }
                        while (vn < ddf->max_part)
@@ -3431,8 +3534,14 @@ static void ddf_process_update(struct supertype *st,
                        if (dl->vlist[0]) {
                                ddf->phys->entries[dl->pdnum].type &=
                                        ~__cpu_to_be16(DDF_Global_Spare);
-                               ddf->phys->entries[dl->pdnum].type |=
-                                       __cpu_to_be16(DDF_Active_in_VD);
+                               if (!(ddf->phys->entries[dl->pdnum].type &
+                                     __cpu_to_be16(DDF_Active_in_VD))) {
+                                           ddf->phys->entries[dl->pdnum].type |=
+                                                   __cpu_to_be16(DDF_Active_in_VD);
+                                           if (in_degraded)
+                                                   ddf->phys->entries[dl->pdnum].state |=
+                                                           __cpu_to_be16(DDF_Rebuilding);
+                                   }
                        }
                        if (dl->spare) {
                                ddf->phys->entries[dl->pdnum].type &=
@@ -3448,6 +3557,33 @@ static void ddf_process_update(struct supertype *st,
                                                       DDF_Active_in_VD);
                        }
                }
+
+               /* Now remove any 'Failed' devices that are not part
+                * of any VD.  They will have the Transition flag set.
+                * Once done, we need to update all dl->pdnum numbers.
+                */
+               pd2 = 0;
+               for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
+                       if ((ddf->phys->entries[pdnum].state
+                            & __be16_to_cpu(DDF_Failed))
+                           && (ddf->phys->entries[pdnum].state
+                               & __be16_to_cpu(DDF_Transition)))
+                               /* skip this one */;
+                       else if (pdnum == pd2)
+                               pd2++;
+                       else {
+                               ddf->phys->entries[pd2] = ddf->phys->entries[pdnum];
+                               for (dl = ddf->dlist; dl; dl = dl->next)
+                                       if (dl->pdnum == (int)pdnum)
+                                               dl->pdnum = pd2;
+                               pd2++;
+                       }
+               ddf->phys->used_pdes = __cpu_to_be16(pd2);
+               while (pd2 < pdnum) {
+                       memset(ddf->phys->entries[pd2].guid, 0xff, DDF_GUID_LEN);
+                       pd2++;
+               }
+
                ddf->updates_pending = 1;
                break;
        case DDF_SPARE_ASSIGN_MAGIC:
@@ -3582,6 +3718,10 @@ static struct mdinfo *ddf_activate_spare(struct active_array *a,
                        } else if (ddf->phys->entries[dl->pdnum].type &
                                   __cpu_to_be16(DDF_Global_Spare)) {
                                is_global = 1;
+                       } else if (!(ddf->phys->entries[dl->pdnum].state &
+                                    __cpu_to_be16(DDF_Failed))) {
+                               /* we can possibly use some of this */
+                               is_global = 1;
                        }
                        if ( ! (is_dedicated ||
                                (is_global && global_ok))) {
@@ -3604,13 +3744,14 @@ static struct mdinfo *ddf_activate_spare(struct active_array *a,
                                esize = ex[j].start - pos;
                                if (esize >= a->info.component_size)
                                        break;
-                               pos = ex[i].start + ex[i].size;
-                               i++;
-                       } while (ex[i-1].size);
+                               pos = ex[j].start + ex[j].size;
+                               j++;
+                       } while (ex[j-1].size);
 
                        free(ex);
                        if (esize < a->info.component_size) {
-                               dprintf("%x:%x has no room: %llu %llu\n", dl->major, dl->minor,
+                               dprintf("%x:%x has no room: %llu %llu\n",
+                                       dl->major, dl->minor,
                                        esize, a->info.component_size);
                                /* No room */
                                continue;
@@ -3670,6 +3811,7 @@ static struct mdinfo *ddf_activate_spare(struct active_array *a,
        mu->buf = malloc(ddf->conf_rec_len * 512);
        mu->len = ddf->conf_rec_len * 512;
        mu->space = NULL;
+       mu->space_list = NULL;
        mu->next = *updates;
        vc = find_vdcr(ddf, a->info.container_member);
        memcpy(mu->buf, vc, ddf->conf_rec_len * 512);
@@ -3723,6 +3865,7 @@ struct superswitch super_ddf = {
        .validate_geometry = validate_geometry_ddf,
        .write_init_super = write_init_super_ddf,
        .add_to_super   = add_to_super_ddf,
+       .remove_from_super = remove_from_super_ddf,
        .load_container = load_container_ddf,
 #endif
        .match_home     = match_home_ddf,