]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - super-ddf.c
ddf: fix up detection of failed/missing devices.
[thirdparty/mdadm.git] / super-ddf.c
index 92741dd8f8ddd9569b2059c7fd6069bb129aecd5..792936452dc55abc56fab9d76c630ce7a5d5f343 100644 (file)
@@ -2281,6 +2281,40 @@ static int add_to_super_ddf(struct supertype *st,
        return 0;
 }
 
+static int remove_from_super_ddf(struct supertype *st, mdu_disk_info_t *dk)
+{
+       struct ddf_super *ddf = st->sb;
+       struct dl *dl;
+
+       /* mdmon has noticed that this disk (dk->major/dk->minor) has
+        * disappeared from the container.
+        * We need to arrange that it disappears from the metadata and
+        * internal data structures too.
+        * Most of the work is done by ddf_process_update which edits
+        * the metadata and closes the file handle and attaches the memory
+        * where free_updates will free it.
+        */
+       for (dl = ddf->dlist; dl ; dl = dl->next)
+               if (dl->major == dk->major &&
+                   dl->minor == dk->minor)
+                       break;
+       if (!dl)
+               return -1;
+
+       if (st->update_tail) {
+               int len = (sizeof(struct phys_disk) +
+                          sizeof(struct phys_disk_entry));
+               struct phys_disk *pd;
+
+               pd = malloc(len);
+               pd->magic = DDF_PHYS_RECORDS_MAGIC;
+               pd->used_pdes = __cpu_to_be16(dl->pdnum);
+               pd->entries[0].state = __cpu_to_be16(DDF_Missing);
+               append_metadata_update(st, pd, len);
+       }
+       return 0;
+}
+
 /*
  * This is the write_init_super method for a ddf container.  It is
  * called when creating a container or adding another device to a
@@ -2999,23 +3033,33 @@ static struct mdinfo *container_content_ddf(struct supertype *st)
                        struct mdinfo *dev;
                        struct dl *d;
                        int stt;
+                       int pd;
 
                        if (vc->conf.phys_refnum[i] == 0xFFFFFFFF)
                                continue;
 
-                       for (d = ddf->dlist; d ; d=d->next)
-                               if (d->disk.refnum == vc->conf.phys_refnum[i])
+                       for (pd = __be16_to_cpu(ddf->phys->used_pdes);
+                            pd--;)
+                               if (ddf->phys->entries[pd].refnum
+                                   == vc->conf.phys_refnum[i])
                                        break;
-                       if (d == NULL)
-                               /* Haven't found that one yet, maybe there are others */
+                       if (pd < 0)
                                continue;
-                       stt = __be16_to_cpu(ddf->phys->entries[d->pdnum].state);
+
+                       stt = __be16_to_cpu(ddf->phys->entries[pd].state);
                        if ((stt & (DDF_Online|DDF_Failed|DDF_Rebuilding))
                            != DDF_Online)
                                continue;
 
                        this->array.working_disks++;
 
+                       for (d = ddf->dlist; d ; d=d->next)
+                               if (d->disk.refnum == vc->conf.phys_refnum[i])
+                                       break;
+                       if (d == NULL)
+                               /* Haven't found that one yet, maybe there are others */
+                               continue;
+
                        dev = malloc(sizeof(*dev));
                        memset(dev, 0, sizeof(*dev));
                        dev->next = this->devs;
@@ -3151,6 +3195,9 @@ static int ddf_set_array_state(struct active_array *a, int consistent)
        return consistent;
 }
 
+#define container_of(ptr, type, member) ({                      \
+        const typeof( ((type *)0)->member ) *__mptr = (ptr);    \
+        (type *)( (char *)__mptr - offsetof(type,member) );})
 /*
  * The state of each disk is stored in the global phys_disk structure
  * in phys_disk.entries[n].state.
@@ -3172,20 +3219,43 @@ static void ddf_set_disk(struct active_array *a, int n, int state)
        struct vd_config *vc = find_vdcr(ddf, inst);
        int pd = find_phys(ddf, vc->phys_refnum[n]);
        int i, st, working;
+       struct mdinfo *mdi;
+       struct dl *dl;
 
        if (vc == NULL) {
                dprintf("ddf: cannot find instance %d!!\n", inst);
                return;
        }
-       if (pd < 0) {
-               /* disk doesn't currently exist. If it is now in_sync,
-                * insert it. */
+       /* Find the matching slot in 'info'. */
+       for (mdi = a->info.devs; mdi; mdi = mdi->next)
+               if (mdi->disk.raid_disk == n)
+                       break;
+       if (!mdi)
+               return;
+
+       /* and find the 'dl' entry corresponding to that. */
+       for (dl = ddf->dlist; dl; dl = dl->next)
+               if (mdi->state_fd >= 0 &&
+                   mdi->disk.major == dl->major &&
+                   mdi->disk.minor == dl->minor)
+                       break;
+       if (!dl)
+               return;
+
+       if (pd < 0 || pd != dl->pdnum) {
+               /* disk doesn't currently exist or has changed.
+                * If it is now in_sync, insert it. */
                if ((state & DS_INSYNC) && ! (state & DS_FAULTY)) {
-                       /* Find dev 'n' in a->info->devs, determine the
-                        * ddf refnum, and set vc->phys_refnum and update
-                        * phys->entries[]
-                        */
-                       /* FIXME */
+                       struct vcl *vcl;
+                       pd = dl->pdnum;
+                       vc->phys_refnum[n] = dl->disk.refnum;
+                       vcl = container_of(vc, struct vcl, conf);
+                       vcl->lba_offset[n] = mdi->data_offset;
+                       ddf->phys->entries[pd].type &=
+                               ~__cpu_to_be16(DDF_Global_Spare);
+                       ddf->phys->entries[pd].type |=
+                               __cpu_to_be16(DDF_Active_in_VD);
+                       ddf->updates_pending = 1;
                }
        } else {
                int old = ddf->phys->entries[pd].state;
@@ -3228,6 +3298,8 @@ static void ddf_set_disk(struct active_array *a, int n, int state)
        case DDF_RAID1:
                if (working == 0)
                        state = DDF_state_failed;
+               else if (working == 2 && state == DDF_state_degraded)
+                       state = DDF_state_part_optimal;
                break;
        case DDF_RAID4:
        case DDF_RAID5:
@@ -3280,8 +3352,8 @@ static void ddf_process_update(struct supertype *st,
         * our actions.
         * Possible update are:
         *  DDF_PHYS_RECORDS_MAGIC
-        *    Add a new physical device.  Changes to this record
-        *    only happen implicitly.
+        *    Add a new physical device or remove an old one.
+        *    Changes to this record only happen implicitly.
         *    used_pdes is the device number.
         *  DDF_VIRT_RECORDS_MAGIC
         *    Add a new VD.  Possibly also change the 'access' bits.
@@ -3311,6 +3383,7 @@ static void ddf_process_update(struct supertype *st,
        struct dl *dl;
        unsigned int mppe;
        unsigned int ent;
+       unsigned int pdnum, pd2;
 
        dprintf("Process update %x\n", *magic);
 
@@ -3325,6 +3398,25 @@ static void ddf_process_update(struct supertype *st,
                ent = __be16_to_cpu(pd->used_pdes);
                if (ent >= __be16_to_cpu(ddf->phys->max_pdes))
                        return;
+               if (pd->entries[0].state & __cpu_to_be16(DDF_Missing)) {
+                       struct dl **dlp;
+                       /* removing this disk. */
+                       ddf->phys->entries[ent].state |= __cpu_to_be16(DDF_Missing);
+                       for (dlp = &ddf->dlist; *dlp; dlp = &(*dlp)->next) {
+                               struct dl *dl = *dlp;
+                               if (dl->pdnum == (signed)ent) {
+                                       close(dl->fd);
+                                       dl->fd = -1;
+                                       /* FIXME this doesn't free
+                                        * dl->devname */
+                                       update->space = dl;
+                                       *dlp = dl->next;
+                                       break;
+                               }
+                       }
+                       ddf->updates_pending = 1;
+                       return;
+               }
                if (!all_ff(ddf->phys->entries[ent].guid))
                        return;
                ddf->phys->entries[ent] = pd->entries[0];
@@ -3392,19 +3484,48 @@ static void ddf_process_update(struct supertype *st,
                        memcpy(&vcl->conf, vc, update->len);
                        vcl->lba_offset = (__u64*)
                                &vcl->conf.phys_refnum[mppe];
+                       for (ent = 0;
+                            ent < __be16_to_cpu(ddf->virt->populated_vdes);
+                            ent++)
+                               if (memcmp(vc->guid, ddf->virt->entries[ent].guid,
+                                          DDF_GUID_LEN) == 0) {
+                                       vcl->vcnum = ent;
+                                       break;
+                               }
                        ddf->conflist = vcl;
                }
+               /* Set DDF_Transition on all Failed devices - to help
+                * us detect those that are no longer in use
+                */
+               for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
+                       if (ddf->phys->entries[pdnum].state
+                           & __be16_to_cpu(DDF_Failed))
+                               ddf->phys->entries[pdnum].state
+                                       |= __be16_to_cpu(DDF_Transition);
                /* Now make sure vlist is correct for each dl. */
                for (dl = ddf->dlist; dl; dl = dl->next) {
                        unsigned int dn;
                        unsigned int vn = 0;
+                       int in_degraded = 0;
                        for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
                                for (dn=0; dn < ddf->mppe ; dn++)
                                        if (vcl->conf.phys_refnum[dn] ==
                                            dl->disk.refnum) {
+                                               int vstate;
                                                dprintf("dev %d has %p at %d\n",
                                                        dl->pdnum, vcl, vn);
+                                               /* Clear the Transition flag */
+                                               if (ddf->phys->entries[dl->pdnum].state
+                                                   & __be16_to_cpu(DDF_Failed))
+                                                       ddf->phys->entries[dl->pdnum].state &=
+                                                               ~__be16_to_cpu(DDF_Transition);
+
                                                dl->vlist[vn++] = vcl;
+                                               vstate = ddf->virt->entries[vcl->vcnum].state
+                                                       & DDF_state_mask;
+                                               if (vstate == DDF_state_degraded ||
+                                                   vstate == DDF_state_part_optimal)
+                                                       in_degraded = 1;
                                                break;
                                        }
                        while (vn < ddf->max_part)
@@ -3412,8 +3533,14 @@ static void ddf_process_update(struct supertype *st,
                        if (dl->vlist[0]) {
                                ddf->phys->entries[dl->pdnum].type &=
                                        ~__cpu_to_be16(DDF_Global_Spare);
-                               ddf->phys->entries[dl->pdnum].type |=
-                                       __cpu_to_be16(DDF_Active_in_VD);
+                               if (!(ddf->phys->entries[dl->pdnum].type &
+                                     __cpu_to_be16(DDF_Active_in_VD))) {
+                                           ddf->phys->entries[dl->pdnum].type |=
+                                                   __cpu_to_be16(DDF_Active_in_VD);
+                                           if (in_degraded)
+                                                   ddf->phys->entries[dl->pdnum].state |=
+                                                           __cpu_to_be16(DDF_Rebuilding);
+                                   }
                        }
                        if (dl->spare) {
                                ddf->phys->entries[dl->pdnum].type &=
@@ -3429,6 +3556,33 @@ static void ddf_process_update(struct supertype *st,
                                                       DDF_Active_in_VD);
                        }
                }
+
+               /* Now remove any 'Failed' devices that are not part
+                * of any VD.  They will have the Transition flag set.
+                * Once done, we need to update all dl->pdnum numbers.
+                */
+               pd2 = 0;
+               for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
+                       if ((ddf->phys->entries[pdnum].state
+                            & __be16_to_cpu(DDF_Failed))
+                           && (ddf->phys->entries[pdnum].state
+                               & __be16_to_cpu(DDF_Transition)))
+                               /* skip this one */;
+                       else if (pdnum == pd2)
+                               pd2++;
+                       else {
+                               ddf->phys->entries[pd2] = ddf->phys->entries[pdnum];
+                               for (dl = ddf->dlist; dl; dl = dl->next)
+                                       if (dl->pdnum == (int)pdnum)
+                                               dl->pdnum = pd2;
+                               pd2++;
+                       }
+               ddf->phys->used_pdes = __cpu_to_be16(pd2);
+               while (pd2 < pdnum) {
+                       memset(ddf->phys->entries[pd2].guid, 0xff, DDF_GUID_LEN);
+                       pd2++;
+               }
+
                ddf->updates_pending = 1;
                break;
        case DDF_SPARE_ASSIGN_MAGIC:
@@ -3585,13 +3739,14 @@ static struct mdinfo *ddf_activate_spare(struct active_array *a,
                                esize = ex[j].start - pos;
                                if (esize >= a->info.component_size)
                                        break;
-                               pos = ex[i].start + ex[i].size;
-                               i++;
-                       } while (ex[i-1].size);
+                               pos = ex[j].start + ex[j].size;
+                               j++;
+                       } while (ex[j-1].size);
 
                        free(ex);
                        if (esize < a->info.component_size) {
-                               dprintf("%x:%x has no room: %llu %llu\n", dl->major, dl->minor,
+                               dprintf("%x:%x has no room: %llu %llu\n",
+                                       dl->major, dl->minor,
                                        esize, a->info.component_size);
                                /* No room */
                                continue;
@@ -3695,6 +3850,7 @@ struct superswitch super_ddf = {
        .validate_geometry = validate_geometry_ddf,
        .write_init_super = write_init_super_ddf,
        .add_to_super   = add_to_super_ddf,
+       .remove_from_super = remove_from_super_ddf,
 #endif
        .match_home     = match_home_ddf,
        .uuid_from_super= uuid_from_super_ddf,