* - When a device is added to the container, we add it to the metadata
* as a spare.
*
- * - assist with activating spares by opening relevant sysfs file.
+ * - Deal with degraded array
+ * We only do this when first noticing the array is degraded.
+ * This can be when we first see the array, when sync completes or
+ * when recovery completes.
+ *
+ * Check if number of failed devices suggests recovery is needed, and
+ * skip if not.
+ * Ask metadata to allocate a spare device
+ * Add device as not in_sync and give a role
+ * Update metadata.
+ * Open sysfs files and pass to monitor.
+ * Make sure that monitor Starts recovery....
*
* - Pass on metadata updates from external programs such as
* mdadm creating a new array.
free(aa);
}
+static struct active_array *duplicate_aa(struct active_array *aa)
+{
+ struct active_array *newa = malloc(sizeof(*newa));
+ struct mdinfo **dp1, **dp2;
+
+ *newa = *aa;
+ newa->next = NULL;
+ newa->replaces = NULL;
+ newa->info.next = NULL;
+
+ dp2 = &newa->info.devs;
+
+ for (dp1 = &aa->info.devs; *dp1; dp1 = &(*dp1)->next) {
+ struct mdinfo *d;
+ if ((*dp1)->state_fd < 0)
+ continue;
+
+ d = malloc(sizeof(*d));
+ *d = **dp1;
+ *dp2 = d;
+ dp2 = & d->next;
+ }
+
+ return newa;
+}
+
static void write_wakeup(struct supertype *c)
{
static struct md_generic_cmd cmd = { .action = md_action_ping_monitor };
}
}
-void queue_metadata_update(struct metadata_update *mu)
+static void queue_metadata_update(struct metadata_update *mu)
{
struct metadata_update **qp;
* array ignoring any metadata on it.
* FIXME should we look for compatible metadata and take hints
* about spare assignment.... probably not.
- *
*/
if (mdstat->devcnt != container->devcnt) {
/* read /sys/block/NAME/md/dev-??/block/dev to find out
* being requested.
* Unfortunately decreases in raid_disks don't show up in
* mdstat until the reshape completes FIXME.
+ *
+ * Actually, we also want to handle degraded arrays here by
+ * trying to find and assign a spare.
+ * We do that whenever the monitor tells us too.
*/
// FIXME
a->info.array.raid_disks = mdstat->raid_disks;
a->info.array.chunk_size = mdstat->chunk_size;
// MORE
+ if (a->check_degraded) {
+ struct metadata_update *updates = NULL;
+ struct mdinfo *newdev;
+ struct active_array *newa;
+ wait_update_handled();
+ a->check_degraded = 0;
+
+ /* The array may not be degraded, this is just a good time
+ * to check.
+ */
+ newdev = a->container->ss->activate_spare(a, &updates);
+ if (newdev) {
+ struct mdinfo *d;
+ /* Cool, we can add a device or several. */
+ newa = duplicate_aa(a);
+ /* suspend recovery - maybe not needed */
+
+ /* Add device to array and set offset/size/slot.
+ * and open files for each newdev */
+ for (d = newdev; d ; d = d->next) {
+ struct mdinfo *newd;
+ if (sysfs_add_disk(&newa->info, d))
+ continue;
+ newd = newa->info.devs;
+ newd->state_fd = sysfs_open(a->devnum,
+ newd->sys_name,
+ "state");
+ newd->prev_state
+ = read_dev_state(newd->state_fd);
+ newd->curr_state = newd->prev_state;
+ }
+ queue_metadata_update(updates);
+ replace_array(a->container, a, newa);
+ sysfs_set_str(&a->info, NULL, "sync_action", "repair");
+ }
+ }
}
static void manage_new(struct mdstat_ent *mdstat,
"state");
newd->prev_state = read_dev_state(newd->state_fd);
- newd->curr_state = newd->curr_state;
+ newd->curr_state = newd->prev_state;
} else {
newd->state_fd = -1;
}
void (*process_update)(struct supertype *st,
struct metadata_update *update);
+ /* activate_spare will check if the array is degraded and, if it
+ * is, try to find some spare space in the container.
+ * On success, it add appropriate updates (For process_update) to
+ * to the 'updates' list and returns a list of 'mdinfo' identifying
+ * the device, or devices as there might be multiple missing
+ * devices and multiple spares available.
+ */
+ struct mdinfo *(*activate_spare)(struct active_array *a,
+ struct metadata_update **updates);
int major;
int swapuuid; /* true if uuid is bigending rather than hostendian */
* sync_action was 'resync' and becomes 'idle' and resync_start becomes
* MaxSector
* Notify metadata that sync is complete.
- * "Deal with Degraded"
*
* recovery completes
* sync_action changes from 'recover' to 'idle'
* Check each device state and mark metadata if 'faulty' or 'in_sync'.
- * "Deal with Degraded"
- *
- * deal with degraded array
- * We only do this when first noticing the array is degraded.
- * This can be when we first see the array, when sync completes or
- * when recovery completes.
- *
- * Check if number of failed devices suggests recovery is needed, and
- * skip if not.
- * Ask metadata for a spare device
- * Add device as not in_sync and give a role
- * Update metadata.
- * Start recovery.
*
* deal with resync
* This only happens on finding a new array... mdadm will have set
static int read_and_act(struct active_array *a)
{
- int check_degraded;
+ int check_degraded = 0;
int deactivate = 0;
struct mdinfo *mdi;
}
if (check_degraded) {
- // FIXME;
+ /* manager will do the actual check */
+ a->check_degraded = 1;
+ signal_manager();
}
a->container->ss->sync_metadata(a->container);
*ap = (*ap)->next;
discard_this = a->replaces;
a->replaces = NULL;
+ /* FIXME check if device->state_fd need to be cleared?*/
signal_manager();
}
if (a->container)