]> git.ipfire.org Git - thirdparty/mdadm.git/blobdiff - super-intel.c
Zeroout whole ppl space during creation/force assemble
[thirdparty/mdadm.git] / super-intel.c
index c84e7559fb93d970c67a6a80fb1d4cb68dc92a4a..65cdc9256df309c0e0f57fc0768c8fb6682a9eae 100644 (file)
@@ -92,6 +92,9 @@
 #define NUM_BLOCKS_DIRTY_STRIPE_REGION 2056
 #define SECT_PER_MB_SHIFT 11
 #define MAX_SECTOR_SIZE 4096
+#define MULTIPLE_PPL_AREA_SIZE_IMSM (1024 * 1024) /* Size of the whole
+                                                  * mutliple PPL area
+                                                  */
 
 /* Disk configuration info. */
 #define IMSM_MAX_DEVICES 255
@@ -207,6 +210,9 @@ struct imsm_dev {
 #define RWH_OFF 0
 #define RWH_DISTRIBUTED 1
 #define RWH_JOURNALING_DRIVE 2
+#define RWH_MULTIPLE_DISTRIBUTED 3
+#define RWH_MULTIPLE_PPLS_JOURNALING_DRIVE 4
+#define RWH_MULTIPLE_OFF 5
        __u8  rwh_policy; /* Raid Write Hole Policy */
        __u8  jd_serial[MAX_RAID_SERIAL_LEN]; /* Journal Drive serial number */
        __u8  filler1;
@@ -284,7 +290,7 @@ static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed"
                                 *  already been migrated and must
                                 *  be recovered from checkpoint area */
 
-#define PPL_ENTRY_SPACE (128 * 1024) /* Size of the PPL, without the header */
+#define PPL_ENTRY_SPACE (128 * 1024) /* Size of single PPL, without the header */
 
 struct migr_record {
        __u32 rec_status;           /* Status used to determine how to restart
@@ -1539,12 +1545,16 @@ static void print_imsm_dev(struct intel_super *super,
        printf("    Dirty State : %s\n", (dev->vol.dirty & RAIDVOL_DIRTY) ?
                                         "dirty" : "clean");
        printf("     RWH Policy : ");
-       if (dev->rwh_policy == RWH_OFF)
+       if (dev->rwh_policy == RWH_OFF || dev->rwh_policy == RWH_MULTIPLE_OFF)
                printf("off\n");
        else if (dev->rwh_policy == RWH_DISTRIBUTED)
                printf("PPL distributed\n");
        else if (dev->rwh_policy == RWH_JOURNALING_DRIVE)
                printf("PPL journaling drive\n");
+       else if (dev->rwh_policy == RWH_MULTIPLE_DISTRIBUTED)
+               printf("Multiple distributed PPLs\n");
+       else if (dev->rwh_policy == RWH_MULTIPLE_PPLS_JOURNALING_DRIVE)
+               printf("Multiple PPLs on journaling drive\n");
        else
                printf("<unknown:%d>\n", dev->rwh_policy);
 }
@@ -3294,10 +3304,16 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info,
        memset(info->uuid, 0, sizeof(info->uuid));
        info->recovery_start = MaxSector;
 
-       if (info->array.level == 5 && dev->rwh_policy == RWH_DISTRIBUTED) {
+       if (info->array.level == 5 &&
+           (dev->rwh_policy == RWH_DISTRIBUTED ||
+            dev->rwh_policy == RWH_MULTIPLE_DISTRIBUTED)) {
                info->consistency_policy = CONSISTENCY_POLICY_PPL;
                info->ppl_sector = get_ppl_sector(super, super->current_vol);
-               info->ppl_size = (PPL_HEADER_SIZE + PPL_ENTRY_SPACE) >> 9;
+               if (dev->rwh_policy == RWH_MULTIPLE_DISTRIBUTED)
+                       info->ppl_size = MULTIPLE_PPL_AREA_SIZE_IMSM >> 9;
+               else
+                       info->ppl_size = (PPL_HEADER_SIZE + PPL_ENTRY_SPACE)
+                                         >> 9;
        } else if (info->array.level <= 0) {
                info->consistency_policy = CONSISTENCY_POLICY_NONE;
        } else {
@@ -4023,7 +4039,7 @@ static void migrate(struct imsm_dev *dev, struct intel_super *super,
 
        /* duplicate and then set the target end state in map[0] */
        memcpy(dest, src, sizeof_imsm_map(src));
-       if (migr_type == MIGR_REBUILD || migr_type ==  MIGR_GEN_MIGR) {
+       if (migr_type == MIGR_GEN_MIGR) {
                __u32 ord;
                int i;
 
@@ -5390,9 +5406,9 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
        dev->my_vol_raid_dev_num = mpb->num_raid_devs_created;
 
        if (s->consistency_policy <= CONSISTENCY_POLICY_RESYNC) {
-               dev->rwh_policy = RWH_OFF;
+               dev->rwh_policy = RWH_MULTIPLE_OFF;
        } else if (s->consistency_policy == CONSISTENCY_POLICY_PPL) {
-               dev->rwh_policy = RWH_DISTRIBUTED;
+               dev->rwh_policy = RWH_MULTIPLE_DISTRIBUTED;
        } else {
                free(dev);
                free(dv);
@@ -6049,7 +6065,12 @@ static int write_init_ppl_imsm(struct supertype *st, struct mdinfo *info, int fd
        struct ppl_header *ppl_hdr;
        int ret;
 
-       ret = posix_memalign(&buf, 4096, PPL_HEADER_SIZE);
+       /* first clear entire ppl space */
+       ret = zero_disk_range(fd, info->ppl_sector, info->ppl_size);
+       if (ret)
+               return ret;
+
+       ret = posix_memalign(&buf, MAX_SECTOR_SIZE, PPL_HEADER_SIZE);
        if (ret) {
                pr_err("Failed to allocate PPL header buffer\n");
                return ret;
@@ -6090,11 +6111,14 @@ static int validate_ppl_imsm(struct supertype *st, struct mdinfo *info,
        struct imsm_dev *dev;
        struct imsm_map *map;
        __u32 idx;
+       unsigned int i;
+       unsigned long long ppl_offset = 0;
+       unsigned long long prev_gen_num = 0;
 
        if (disk->disk.raid_disk < 0)
                return 0;
 
-       if (posix_memalign(&buf, 4096, PPL_HEADER_SIZE)) {
+       if (posix_memalign(&buf, MAX_SECTOR_SIZE, PPL_HEADER_SIZE)) {
                pr_err("Failed to allocate PPL header buffer\n");
                return -1;
        }
@@ -6107,34 +6131,54 @@ static int validate_ppl_imsm(struct supertype *st, struct mdinfo *info,
        if (!d || d->index < 0 || is_failed(&d->disk))
                goto out;
 
-       if (lseek64(d->fd, info->ppl_sector * 512, SEEK_SET) < 0) {
-               perror("Failed to seek to PPL header location");
-               ret = -1;
-               goto out;
-       }
+       ret = 1;
+       while (ppl_offset < MULTIPLE_PPL_AREA_SIZE_IMSM) {
+               dprintf("Checking potential PPL at offset: %llu\n", ppl_offset);
 
-       if (read(d->fd, buf, PPL_HEADER_SIZE) != PPL_HEADER_SIZE) {
-               perror("Read PPL header failed");
-               ret = -1;
-               goto out;
-       }
+               if (lseek64(d->fd, info->ppl_sector * 512 + ppl_offset,
+                           SEEK_SET) < 0) {
+                       perror("Failed to seek to PPL header location");
+                       ret = -1;
+                       goto out;
+               }
 
-       ppl_hdr = buf;
+               if (read(d->fd, buf, PPL_HEADER_SIZE) != PPL_HEADER_SIZE) {
+                       perror("Read PPL header failed");
+                       ret = -1;
+                       goto out;
+               }
 
-       crc = __le32_to_cpu(ppl_hdr->checksum);
-       ppl_hdr->checksum = 0;
+               ppl_hdr = buf;
 
-       if (crc != ~crc32c_le(~0, buf, PPL_HEADER_SIZE)) {
-               dprintf("Wrong PPL header checksum on %s\n",
-                       d->devname);
-               ret = 1;
-       }
+               crc = __le32_to_cpu(ppl_hdr->checksum);
+               ppl_hdr->checksum = 0;
+
+               if (crc != ~crc32c_le(~0, buf, PPL_HEADER_SIZE)) {
+                       dprintf("Wrong PPL header checksum on %s\n",
+                               d->devname);
+                       goto out;
+               }
+
+               if (prev_gen_num > __le64_to_cpu(ppl_hdr->generation)) {
+                       /* previous was newest, it was already checked */
+                       goto out;
+               }
+
+               if ((__le32_to_cpu(ppl_hdr->signature) !=
+                             super->anchor->orig_family_num)) {
+                       dprintf("Wrong PPL header signature on %s\n",
+                               d->devname);
+                       ret = 1;
+                       goto out;
+               }
 
-       if (!ret && (__le32_to_cpu(ppl_hdr->signature) !=
-                     super->anchor->orig_family_num)) {
-               dprintf("Wrong PPL header signature on %s\n",
-                       d->devname);
-               ret = 1;
+               ret = 0;
+               prev_gen_num = __le64_to_cpu(ppl_hdr->generation);
+
+               ppl_offset += PPL_HEADER_SIZE;
+               for (i = 0; i < __le32_to_cpu(ppl_hdr->entries_count); i++)
+                       ppl_offset +=
+                                  __le32_to_cpu(ppl_hdr->entries[i].pp_size);
        }
 
 out:
@@ -7403,9 +7447,9 @@ static int update_subarray_imsm(struct supertype *st, char *subarray,
                        return 2;
 
                if (strcmp(update, "ppl") == 0)
-                       new_policy = RWH_DISTRIBUTED;
+                       new_policy = RWH_MULTIPLE_DISTRIBUTED;
                else
-                       new_policy = RWH_OFF;
+                       new_policy = RWH_MULTIPLE_OFF;
 
                if (st->update_tail) {
                        struct imsm_update_rwh_policy *u = xmalloc(sizeof(*u));
@@ -7669,7 +7713,6 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
                        } else {
                                info_d->component_size = blocks_per_member(map);
                        }
-                       info_d->consistency_policy = this->consistency_policy;
 
                        info_d->bb.supported = 1;
                        get_volume_badblocks(super->bbm_log, ord_to_idx(ord),
@@ -7936,14 +7979,35 @@ static void handle_missing(struct intel_super *super, struct imsm_dev *dev)
        /* end process for initialization and rebuild only
         */
        if (is_gen_migration(dev) == 0) {
-               __u8 map_state;
-               int failed;
+               int failed = imsm_count_failed(super, dev, MAP_0);
 
-               failed = imsm_count_failed(super, dev, MAP_0);
-               map_state = imsm_check_degraded(super, dev, failed, MAP_0);
+               if (failed) {
+                       __u8 map_state;
+                       struct imsm_map *map = get_imsm_map(dev, MAP_0);
+                       struct imsm_map *map1;
+                       int i, ord, ord_map1;
+                       int rebuilt = 1;
 
-               if (failed)
-                       end_migration(dev, super, map_state);
+                       for (i = 0; i < map->num_members; i++) {
+                               ord = get_imsm_ord_tbl_ent(dev, i, MAP_0);
+                               if (!(ord & IMSM_ORD_REBUILD))
+                                       continue;
+
+                               map1 = get_imsm_map(dev, MAP_1);
+                               if (!map1)
+                                       continue;
+
+                               ord_map1 = __le32_to_cpu(map1->disk_ord_tbl[i]);
+                               if (ord_map1 & IMSM_ORD_REBUILD)
+                                       rebuilt = 0;
+                       }
+
+                       if (rebuilt) {
+                               map_state = imsm_check_degraded(super, dev,
+                                                               failed, MAP_0);
+                               end_migration(dev, super, map_state);
+                       }
+               }
        }
        for (dl = super->missing; dl; dl = dl->next)
                mark_missing(super, dev, &dl->disk, dl->index);
@@ -8185,7 +8249,8 @@ skip_mark_checkpoint:
                        dev->vol.dirty = RAIDVOL_CLEAN;
                } else {
                        dev->vol.dirty = RAIDVOL_DIRTY;
-                       if (dev->rwh_policy == RWH_DISTRIBUTED)
+                       if (dev->rwh_policy == RWH_DISTRIBUTED ||
+                           dev->rwh_policy == RWH_MULTIPLE_DISTRIBUTED)
                                dev->vol.dirty |= RAIDVOL_DSRECORD_VALID;
                }
                super->updates_pending++;
@@ -8225,8 +8290,10 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
        int failed;
        int ord;
        __u8 map_state;
+       int rebuild_done = 0;
+       int i;
 
-       ord = imsm_disk_slot_to_ord(a, n);
+       ord = get_imsm_ord_tbl_ent(dev, n, MAP_X);
        if (ord < 0)
                return;
 
@@ -8244,6 +8311,7 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
                struct imsm_map *migr_map = get_imsm_map(dev, MAP_1);
 
                set_imsm_ord_tbl_ent(migr_map, n, ord_to_idx(ord));
+               rebuild_done = 1;
                super->updates_pending++;
        }
 
@@ -8306,7 +8374,39 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
                                dprintf_cont(" Map state change");
                                end_migration(dev, super, map_state);
                                super->updates_pending++;
+                       } else if (!rebuild_done) {
+                               break;
                        }
+
+                       /* check if recovery is really finished */
+                       for (mdi = a->info.devs; mdi ; mdi = mdi->next)
+                               if (mdi->recovery_start != MaxSector) {
+                                       recovery_not_finished = 1;
+                                       break;
+                               }
+                       if (recovery_not_finished) {
+                               dprintf_cont("\n");
+                               dprintf("Rebuild has not finished yet, state not changed");
+                               if (a->last_checkpoint < mdi->recovery_start) {
+                                       a->last_checkpoint =
+                                               mdi->recovery_start;
+                                       super->updates_pending++;
+                               }
+                               break;
+                       }
+
+                       dprintf_cont(" Rebuild done, still degraded");
+                       dev->vol.migr_state = 0;
+                       set_migr_type(dev, 0);
+                       dev->vol.curr_migr_unit = 0;
+
+                       for (i = 0; i < map->num_members; i++) {
+                               int idx = get_imsm_ord_tbl_ent(dev, i, MAP_0);
+
+                               if (idx & IMSM_ORD_REBUILD)
+                                       map->failed_disk_num = i;
+                       }
+                       super->updates_pending++;
                        break;
                }
                if (is_gen_migration(dev)) {
@@ -8702,10 +8802,9 @@ static struct mdinfo *imsm_activate_spare(struct active_array *a,
                di->component_size = a->info.component_size;
                di->container_member = inst;
                di->bb.supported = 1;
-               if (dev->rwh_policy == RWH_DISTRIBUTED) {
-                       di->consistency_policy = CONSISTENCY_POLICY_PPL;
+               if (a->info.consistency_policy == CONSISTENCY_POLICY_PPL) {
                        di->ppl_sector = get_ppl_sector(super, inst);
-                       di->ppl_size = (PPL_HEADER_SIZE + PPL_ENTRY_SPACE) >> 9;
+                       di->ppl_size = MULTIPLE_PPL_AREA_SIZE_IMSM >> 9;
                }
                super->random = random32();
                di->next = rv;
@@ -9936,7 +10035,7 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned ind
        struct imsm_dev *dev;
        struct imsm_map *map;
        unsigned int i, j, num_members;
-       __u32 ord;
+       __u32 ord, ord_map0;
        struct bbm_log *log = super->bbm_log;
 
        dprintf("deleting device[%d] from imsm_super\n", index);
@@ -9958,12 +10057,13 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned ind
                         * ord-flags to the first map
                         */
                        ord = get_imsm_ord_tbl_ent(dev, j, MAP_X);
+                       ord_map0 = get_imsm_ord_tbl_ent(dev, j, MAP_0);
 
                        if (ord_to_idx(ord) <= index)
                                continue;
 
                        map = get_imsm_map(dev, MAP_0);
-                       set_imsm_ord_tbl_ent(map, j, ord_to_idx(ord - 1));
+                       set_imsm_ord_tbl_ent(map, j, ord_map0 - 1);
                        map = get_imsm_map(dev, MAP_1);
                        if (map)
                                set_imsm_ord_tbl_ent(map, j, ord - 1);
@@ -10692,6 +10792,10 @@ static const char *imsm_get_disk_controller_domain(const char *path)
                        drv = "isci";
                else if (hba && hba->type == SYS_DEV_SATA)
                        drv = "ahci";
+               else if (hba && hba->type == SYS_DEV_VMD)
+                       drv = "vmd";
+               else if (hba && hba->type == SYS_DEV_NVME)
+                       drv = "nvme";
                else
                        drv = "unknown";
                dprintf("path: %s hba: %s attached: %s\n",
@@ -10874,8 +10978,7 @@ static int imsm_create_metadata_update_for_reshape(
         */
        spares = get_spares_for_grow(st);
 
-       if (spares == NULL
-           || delta_disks > spares->array.spare_disks) {
+       if (spares == NULL || delta_disks > spares->array.spare_disks) {
                pr_err("imsm: ERROR: Cannot get spare devices for %s.\n", geo->dev_name);
                i = -1;
                goto abort;