/* Device info for mdmon: */
int state_fd;
+ #define DS_FAULTY 1
+ #define DS_INSYNC 2
+ #define DS_WRITE_MOSTLY 4
+ #define DS_SPARE 8
+ #define DS_BLOCKED 16
+ #define DS_REMOVE 1024
int prev_state, curr_state, next_state;
};
void (*mark_clean)(struct active_array *a, unsigned long long sync_pos);
void (*mark_dirty)(struct active_array *a);
void (*mark_sync)(struct active_array *a, unsigned long long resync);
- void (*set_disk)(struct active_array *a, int n);
+ void (*set_disk)(struct active_array *a, int n, int state);
void (*sync_metadata)(struct active_array *a);
return (enum sync_action) match_word(buf, sync_actions);
}
-#define DS_FAULTY 1
-#define DS_INSYNC 2
-#define DS_WRITE_MOSTLY 4
-#define DS_SPARE 8
-#define DS_REMOVE 1024
-
int read_dev_state(int fd)
{
char buf[60];
cp = buf;
while (cp) {
- if (attr_match("faulty", cp))
+ if (attr_match(cp, "faulty"))
rv |= DS_FAULTY;
- if (attr_match("in_sync", cp))
+ if (attr_match(cp, "in_sync"))
rv |= DS_INSYNC;
- if (attr_match("write_mostly", cp))
+ if (attr_match(cp, "write_mostly"))
rv |= DS_WRITE_MOSTLY;
- if (attr_match("spare", cp))
+ if (attr_match(cp, "spare"))
rv |= DS_SPARE;
+ if (attr_match(cp, "blocked"))
+ rv |= DS_BLOCKED;
cp = strchr(cp, ',');
if (cp)
cp++;
*
* device fails
* detected by rd-N/state reporting "faulty"
- * mark device as 'failed' in metadata, the remove device
- * by writing 'remove' to rd/state.
+ * mark device as 'failed' in metadata, let the kernel release the
+ * device by writing '-blocked' to rd/state, and finally write 'remove' to
+ * rd/state
*
* sync completes
* sync_action was 'resync' and becomes 'idle' and resync_start becomes
a->curr_action = read_action(a->action_fd);
for (mdi = a->info.devs; mdi ; mdi = mdi->next) {
mdi->next_state = 0;
- mdi->curr_state = read_dev_state(mdi->state_fd);
+ if (mdi->state_fd > 0)
+ mdi->curr_state = read_dev_state(mdi->state_fd);
}
if (a->curr_state <= inactive &&
if (a->curr_action == idle &&
a->prev_action == recover) {
for (mdi = a->info.devs ; mdi ; mdi = mdi->next) {
- a->container->ss->set_disk(a, mdi->disk.raid_disk);
+ a->container->ss->set_disk(a, mdi->disk.raid_disk,
+ mdi->curr_state);
if (! (mdi->curr_state & DS_INSYNC))
check_degraded = 1;
}
for (mdi = a->info.devs ; mdi ; mdi = mdi->next) {
if (mdi->curr_state & DS_FAULTY) {
- a->container->ss->set_disk(a, mdi->disk.raid_disk);
+ a->container->ss->set_disk(a, mdi->disk.raid_disk,
+ mdi->curr_state);
check_degraded = 1;
mdi->next_state = DS_REMOVE;
}
if (a->next_action != bad_action)
write_attr(sync_actions[a->next_action], a->action_fd);
for (mdi = a->info.devs; mdi ; mdi = mdi->next) {
- if (mdi->next_state == DS_REMOVE)
- write_attr("remove", mdi->state_fd);
+ if (mdi->next_state == DS_REMOVE && mdi->state_fd > 0) {
+ int remove_err;
+
+ write_attr("-blocked", mdi->state_fd);
+ /* the kernel may not be able to immediately remove the
+ * disk, we can simply wait until the next event to try
+ * again.
+ */
+ remove_err = write_attr("remove", mdi->state_fd);
+ if (!remove_err) {
+ close(mdi->state_fd);
+ mdi->state_fd = -1;
+ }
+ }
if (mdi->next_state & DS_INSYNC)
write_attr("+in_sync", mdi->state_fd);
}
}
}
-static void imsm_set_disk(struct active_array *a, int n)
+static void imsm_set_disk(struct active_array *a, int n, int state)
{
- fprintf(stderr, "imsm: set_disk %d\n", n);
+ int inst = a->info.container_member;
+ struct intel_super *super = a->container->sb;
+ struct imsm_dev *dev = get_imsm_dev(super->mpb, inst);
+ struct imsm_map *map = dev->vol.map;
+ struct imsm_disk *disk;
+ __u32 status;
+ int failed = 0;
+ int new_failure = 0;
+
+ if (n > map->num_members)
+ fprintf(stderr, "imsm: set_disk %d out of range 0..%d\n",
+ n, map->num_members - 1);
+
+ if (n < 0)
+ return;
+
+ fprintf(stderr, "imsm: set_disk %d:%x\n", n, state);
+
+ disk = get_imsm_disk(super->mpb, get_imsm_disk_idx(map, n));
+
+ /* check if we have seen this failure before */
+ status = __le32_to_cpu(disk->status);
+ if ((state & DS_FAULTY) && !(status & FAILED_DISK)) {
+ status |= FAILED_DISK;
+ disk->status = __cpu_to_le32(status);
+ new_failure = 1;
+ }
+
+ /**
+ * the number of failures have changed, count up 'failed' to determine
+ * degraded / failed status
+ */
+ if (new_failure && map->map_state != IMSM_T_STATE_FAILED)
+ failed = imsm_count_failed(super->mpb, map);
+
+ if (failed)
+ map->map_state = imsm_check_degraded(super->mpb, inst, failed);
+
+ if (new_failure)
+ super->updates_pending++;
}
static int store_imsm_mpb(int fd, struct intel_super *super)