]> git.ipfire.org Git - thirdparty/mdadm.git/blob - super-intel.c
imsm: manage a list of missing disks
[thirdparty/mdadm.git] / super-intel.c
1 /*
2 * mdadm - Intel(R) Matrix Storage Manager Support
3 *
4 * Copyright (C) 2002-2007 Intel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #define HAVE_STDINT_H 1
21 #include "mdadm.h"
22 #include "mdmon.h"
23 #include "sha1.h"
24 #include <values.h>
25 #include <scsi/sg.h>
26 #include <ctype.h>
27
28 /* MPB == Metadata Parameter Block */
29 #define MPB_SIGNATURE "Intel Raid ISM Cfg Sig. "
30 #define MPB_SIG_LEN (strlen(MPB_SIGNATURE))
31 #define MPB_VERSION_RAID0 "1.0.00"
32 #define MPB_VERSION_RAID1 "1.1.00"
33 #define MPB_VERSION_RAID5 "1.2.02"
34 #define MAX_SIGNATURE_LENGTH 32
35 #define MAX_RAID_SERIAL_LEN 16
36 #define MPB_SECTOR_CNT 418
37 #define IMSM_RESERVED_SECTORS 4096
38
39 /* Disk configuration info. */
40 #define IMSM_MAX_DEVICES 255
41 struct imsm_disk {
42 __u8 serial[MAX_RAID_SERIAL_LEN];/* 0xD8 - 0xE7 ascii serial number */
43 __u32 total_blocks; /* 0xE8 - 0xEB total blocks */
44 __u32 scsi_id; /* 0xEC - 0xEF scsi ID */
45 __u32 status; /* 0xF0 - 0xF3 */
46 #define SPARE_DISK 0x01 /* Spare */
47 #define CONFIGURED_DISK 0x02 /* Member of some RaidDev */
48 #define FAILED_DISK 0x04 /* Permanent failure */
49 #define USABLE_DISK 0x08 /* Fully usable unless FAILED_DISK is set */
50
51 #define IMSM_DISK_FILLERS 5
52 __u32 filler[IMSM_DISK_FILLERS]; /* 0xF4 - 0x107 MPB_DISK_FILLERS for future expansion */
53 };
54
55 /* RAID map configuration infos. */
56 struct imsm_map {
57 __u32 pba_of_lba0; /* start address of partition */
58 __u32 blocks_per_member;/* blocks per member */
59 __u32 num_data_stripes; /* number of data stripes */
60 __u16 blocks_per_strip;
61 __u8 map_state; /* Normal, Uninitialized, Degraded, Failed */
62 #define IMSM_T_STATE_NORMAL 0
63 #define IMSM_T_STATE_UNINITIALIZED 1
64 #define IMSM_T_STATE_DEGRADED 2 /* FIXME: is this correct? */
65 #define IMSM_T_STATE_FAILED 3 /* FIXME: is this correct? */
66 __u8 raid_level;
67 #define IMSM_T_RAID0 0
68 #define IMSM_T_RAID1 1
69 #define IMSM_T_RAID5 5 /* since metadata version 1.2.02 ? */
70 __u8 num_members; /* number of member disks */
71 __u8 reserved[3];
72 __u32 filler[7]; /* expansion area */
73 #define IMSM_ORD_REBUILD (1 << 24)
74 __u32 disk_ord_tbl[1]; /* disk_ord_tbl[num_members],
75 * top byte contains some flags
76 */
77 } __attribute__ ((packed));
78
79 struct imsm_vol {
80 __u32 curr_migr_unit;
81 __u32 reserved;
82 __u8 migr_state; /* Normal or Migrating */
83 __u8 migr_type; /* Initializing, Rebuilding, ... */
84 __u8 dirty;
85 __u8 fill[1];
86 __u32 filler[5];
87 struct imsm_map map[1];
88 /* here comes another one if migr_state */
89 } __attribute__ ((packed));
90
91 struct imsm_dev {
92 __u8 volume[MAX_RAID_SERIAL_LEN];
93 __u32 size_low;
94 __u32 size_high;
95 __u32 status; /* Persistent RaidDev status */
96 __u32 reserved_blocks; /* Reserved blocks at beginning of volume */
97 #define IMSM_DEV_FILLERS 12
98 __u32 filler[IMSM_DEV_FILLERS];
99 struct imsm_vol vol;
100 } __attribute__ ((packed));
101
102 struct imsm_super {
103 __u8 sig[MAX_SIGNATURE_LENGTH]; /* 0x00 - 0x1F */
104 __u32 check_sum; /* 0x20 - 0x23 MPB Checksum */
105 __u32 mpb_size; /* 0x24 - 0x27 Size of MPB */
106 __u32 family_num; /* 0x28 - 0x2B Checksum from first time this config was written */
107 __u32 generation_num; /* 0x2C - 0x2F Incremented each time this array's MPB is written */
108 __u32 error_log_size; /* 0x30 - 0x33 in bytes */
109 __u32 attributes; /* 0x34 - 0x37 */
110 __u8 num_disks; /* 0x38 Number of configured disks */
111 __u8 num_raid_devs; /* 0x39 Number of configured volumes */
112 __u8 error_log_pos; /* 0x3A */
113 __u8 fill[1]; /* 0x3B */
114 __u32 cache_size; /* 0x3c - 0x40 in mb */
115 __u32 orig_family_num; /* 0x40 - 0x43 original family num */
116 __u32 pwr_cycle_count; /* 0x44 - 0x47 simulated power cycle count for array */
117 __u32 bbm_log_size; /* 0x48 - 0x4B - size of bad Block Mgmt Log in bytes */
118 #define IMSM_FILLERS 35
119 __u32 filler[IMSM_FILLERS]; /* 0x4C - 0xD7 RAID_MPB_FILLERS */
120 struct imsm_disk disk[1]; /* 0xD8 diskTbl[numDisks] */
121 /* here comes imsm_dev[num_raid_devs] */
122 /* here comes BBM logs */
123 } __attribute__ ((packed));
124
125 #define BBM_LOG_MAX_ENTRIES 254
126
127 struct bbm_log_entry {
128 __u64 defective_block_start;
129 #define UNREADABLE 0xFFFFFFFF
130 __u32 spare_block_offset;
131 __u16 remapped_marked_count;
132 __u16 disk_ordinal;
133 } __attribute__ ((__packed__));
134
135 struct bbm_log {
136 __u32 signature; /* 0xABADB10C */
137 __u32 entry_count;
138 __u32 reserved_spare_block_count; /* 0 */
139 __u32 reserved; /* 0xFFFF */
140 __u64 first_spare_lba;
141 struct bbm_log_entry mapped_block_entries[BBM_LOG_MAX_ENTRIES];
142 } __attribute__ ((__packed__));
143
144
145 #ifndef MDASSEMBLE
146 static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" };
147 #endif
148
149 static unsigned int sector_count(__u32 bytes)
150 {
151 return ((bytes + (512-1)) & (~(512-1))) / 512;
152 }
153
154 static unsigned int mpb_sectors(struct imsm_super *mpb)
155 {
156 return sector_count(__le32_to_cpu(mpb->mpb_size));
157 }
158
159 /* internal representation of IMSM metadata */
160 struct intel_super {
161 union {
162 void *buf; /* O_DIRECT buffer for reading/writing metadata */
163 struct imsm_super *anchor; /* immovable parameters */
164 };
165 size_t len; /* size of the 'buf' allocation */
166 void *next_buf; /* for realloc'ing buf from the manager */
167 size_t next_len;
168 int updates_pending; /* count of pending updates for mdmon */
169 int creating_imsm; /* flag to indicate container creation */
170 int current_vol; /* index of raid device undergoing creation */
171 #define IMSM_MAX_RAID_DEVS 2
172 struct imsm_dev *dev_tbl[IMSM_MAX_RAID_DEVS];
173 struct dl {
174 struct dl *next;
175 int index;
176 __u8 serial[MAX_RAID_SERIAL_LEN];
177 int major, minor;
178 char *devname;
179 struct imsm_disk disk;
180 int fd;
181 } *disks;
182 struct dl *add; /* list of disks to add while mdmon active */
183 struct dl *missing; /* disks removed while we weren't looking */
184 struct bbm_log *bbm_log;
185 };
186
187 struct extent {
188 unsigned long long start, size;
189 };
190
191 /* definition of messages passed to imsm_process_update */
192 enum imsm_update_type {
193 update_activate_spare,
194 update_create_array,
195 update_add_disk,
196 };
197
198 struct imsm_update_activate_spare {
199 enum imsm_update_type type;
200 struct dl *dl;
201 int slot;
202 int array;
203 struct imsm_update_activate_spare *next;
204 };
205
206 struct imsm_update_create_array {
207 enum imsm_update_type type;
208 int dev_idx;
209 struct imsm_dev dev;
210 };
211
212 struct imsm_update_add_disk {
213 enum imsm_update_type type;
214 };
215
216 static int imsm_env_devname_as_serial(void)
217 {
218 char *val = getenv("IMSM_DEVNAME_AS_SERIAL");
219
220 if (val && atoi(val) == 1)
221 return 1;
222
223 return 0;
224 }
225
226
227 static struct supertype *match_metadata_desc_imsm(char *arg)
228 {
229 struct supertype *st;
230
231 if (strcmp(arg, "imsm") != 0 &&
232 strcmp(arg, "default") != 0
233 )
234 return NULL;
235
236 st = malloc(sizeof(*st));
237 memset(st, 0, sizeof(*st));
238 st->ss = &super_imsm;
239 st->max_devs = IMSM_MAX_DEVICES;
240 st->minor_version = 0;
241 st->sb = NULL;
242 return st;
243 }
244
245 #ifndef MDASSEMBLE
246 static __u8 *get_imsm_version(struct imsm_super *mpb)
247 {
248 return &mpb->sig[MPB_SIG_LEN];
249 }
250 #endif
251
252 /* retrieve a disk directly from the anchor when the anchor is known to be
253 * up-to-date, currently only at load time
254 */
255 static struct imsm_disk *__get_imsm_disk(struct imsm_super *mpb, __u8 index)
256 {
257 if (index >= mpb->num_disks)
258 return NULL;
259 return &mpb->disk[index];
260 }
261
262 #ifndef MDASSEMBLE
263 /* retrieve a disk from the parsed metadata */
264 static struct imsm_disk *get_imsm_disk(struct intel_super *super, __u8 index)
265 {
266 struct dl *d;
267
268 for (d = super->disks; d; d = d->next)
269 if (d->index == index)
270 return &d->disk;
271
272 return NULL;
273 }
274 #endif
275
276 /* generate a checksum directly from the anchor when the anchor is known to be
277 * up-to-date, currently only at load or write_super after coalescing
278 */
279 static __u32 __gen_imsm_checksum(struct imsm_super *mpb)
280 {
281 __u32 end = mpb->mpb_size / sizeof(end);
282 __u32 *p = (__u32 *) mpb;
283 __u32 sum = 0;
284
285 while (end--)
286 sum += __le32_to_cpu(*p++);
287
288 return sum - __le32_to_cpu(mpb->check_sum);
289 }
290
291 static size_t sizeof_imsm_map(struct imsm_map *map)
292 {
293 return sizeof(struct imsm_map) + sizeof(__u32) * (map->num_members - 1);
294 }
295
296 struct imsm_map *get_imsm_map(struct imsm_dev *dev, int second_map)
297 {
298 struct imsm_map *map = &dev->vol.map[0];
299
300 if (second_map && !dev->vol.migr_state)
301 return NULL;
302 else if (second_map) {
303 void *ptr = map;
304
305 return ptr + sizeof_imsm_map(map);
306 } else
307 return map;
308
309 }
310
311 /* return the size of the device.
312 * migr_state increases the returned size if map[0] were to be duplicated
313 */
314 static size_t sizeof_imsm_dev(struct imsm_dev *dev, int migr_state)
315 {
316 size_t size = sizeof(*dev) - sizeof(struct imsm_map) +
317 sizeof_imsm_map(get_imsm_map(dev, 0));
318
319 /* migrating means an additional map */
320 if (dev->vol.migr_state)
321 size += sizeof_imsm_map(get_imsm_map(dev, 1));
322 else if (migr_state)
323 size += sizeof_imsm_map(get_imsm_map(dev, 0));
324
325 return size;
326 }
327
328 static struct imsm_dev *__get_imsm_dev(struct imsm_super *mpb, __u8 index)
329 {
330 int offset;
331 int i;
332 void *_mpb = mpb;
333
334 if (index >= mpb->num_raid_devs)
335 return NULL;
336
337 /* devices start after all disks */
338 offset = ((void *) &mpb->disk[mpb->num_disks]) - _mpb;
339
340 for (i = 0; i <= index; i++)
341 if (i == index)
342 return _mpb + offset;
343 else
344 offset += sizeof_imsm_dev(_mpb + offset, 0);
345
346 return NULL;
347 }
348
349 static struct imsm_dev *get_imsm_dev(struct intel_super *super, __u8 index)
350 {
351 if (index >= super->anchor->num_raid_devs)
352 return NULL;
353 return super->dev_tbl[index];
354 }
355
356 static __u32 get_imsm_ord_tbl_ent(struct imsm_dev *dev, int slot)
357 {
358 struct imsm_map *map;
359
360 if (dev->vol.migr_state)
361 map = get_imsm_map(dev, 1);
362 else
363 map = get_imsm_map(dev, 0);
364
365 /* top byte identifies disk under rebuild */
366 return __le32_to_cpu(map->disk_ord_tbl[slot]);
367 }
368
369 #define ord_to_idx(ord) (((ord) << 8) >> 8)
370 static __u32 get_imsm_disk_idx(struct imsm_dev *dev, int slot)
371 {
372 __u32 ord = get_imsm_ord_tbl_ent(dev, slot);
373
374 return ord_to_idx(ord);
375 }
376
377 static void set_imsm_ord_tbl_ent(struct imsm_map *map, int slot, __u32 ord)
378 {
379 map->disk_ord_tbl[slot] = __cpu_to_le32(ord);
380 }
381
382 static int get_imsm_raid_level(struct imsm_map *map)
383 {
384 if (map->raid_level == 1) {
385 if (map->num_members == 2)
386 return 1;
387 else
388 return 10;
389 }
390
391 return map->raid_level;
392 }
393
394 static int cmp_extent(const void *av, const void *bv)
395 {
396 const struct extent *a = av;
397 const struct extent *b = bv;
398 if (a->start < b->start)
399 return -1;
400 if (a->start > b->start)
401 return 1;
402 return 0;
403 }
404
405 static struct extent *get_extents(struct intel_super *super, struct dl *dl)
406 {
407 /* find a list of used extents on the given physical device */
408 struct extent *rv, *e;
409 int i, j;
410 int memberships = 0;
411 __u32 reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
412
413 for (i = 0; i < super->anchor->num_raid_devs; i++) {
414 struct imsm_dev *dev = get_imsm_dev(super, i);
415 struct imsm_map *map = get_imsm_map(dev, 0);
416
417 for (j = 0; j < map->num_members; j++) {
418 __u32 index = get_imsm_disk_idx(dev, j);
419
420 if (index == dl->index)
421 memberships++;
422 }
423 }
424 rv = malloc(sizeof(struct extent) * (memberships + 1));
425 if (!rv)
426 return NULL;
427 e = rv;
428
429 for (i = 0; i < super->anchor->num_raid_devs; i++) {
430 struct imsm_dev *dev = get_imsm_dev(super, i);
431 struct imsm_map *map = get_imsm_map(dev, 0);
432
433 for (j = 0; j < map->num_members; j++) {
434 __u32 index = get_imsm_disk_idx(dev, j);
435
436 if (index == dl->index) {
437 e->start = __le32_to_cpu(map->pba_of_lba0);
438 e->size = __le32_to_cpu(map->blocks_per_member);
439 e++;
440 }
441 }
442 }
443 qsort(rv, memberships, sizeof(*rv), cmp_extent);
444
445 /* determine the start of the metadata
446 * when no raid devices are defined use the default
447 * ...otherwise allow the metadata to truncate the value
448 * as is the case with older versions of imsm
449 */
450 if (memberships) {
451 struct extent *last = &rv[memberships - 1];
452 __u32 remainder;
453
454 remainder = __le32_to_cpu(dl->disk.total_blocks) -
455 (last->start + last->size);
456 if (reservation > remainder)
457 reservation = remainder;
458 }
459 e->start = __le32_to_cpu(dl->disk.total_blocks) - reservation;
460 e->size = 0;
461 return rv;
462 }
463
464 /* try to determine how much space is reserved for metadata from
465 * the last get_extents() entry, otherwise fallback to the
466 * default
467 */
468 static __u32 imsm_reserved_sectors(struct intel_super *super, struct dl *dl)
469 {
470 struct extent *e;
471 int i;
472 __u32 rv;
473
474 /* for spares just return a minimal reservation which will grow
475 * once the spare is picked up by an array
476 */
477 if (dl->index == -1)
478 return MPB_SECTOR_CNT;
479
480 e = get_extents(super, dl);
481 if (!e)
482 return MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
483
484 /* scroll to last entry */
485 for (i = 0; e[i].size; i++)
486 continue;
487
488 rv = __le32_to_cpu(dl->disk.total_blocks) - e[i].start;
489
490 free(e);
491
492 return rv;
493 }
494
495 #ifndef MDASSEMBLE
496 static void print_imsm_dev(struct imsm_dev *dev, int index)
497 {
498 __u64 sz;
499 int slot;
500 struct imsm_map *map = get_imsm_map(dev, 0);
501 __u32 ord;
502
503 printf("\n");
504 printf("[%s]:\n", dev->volume);
505 printf(" RAID Level : %d\n", get_imsm_raid_level(map));
506 printf(" Members : %d\n", map->num_members);
507 for (slot = 0; slot < map->num_members; slot++)
508 if (index == get_imsm_disk_idx(dev, slot))
509 break;
510 if (slot < map->num_members) {
511 ord = get_imsm_ord_tbl_ent(dev, slot);
512 printf(" This Slot : %d%s\n", slot,
513 ord & IMSM_ORD_REBUILD ? " (out-of-sync)" : "");
514 } else
515 printf(" This Slot : ?\n");
516 sz = __le32_to_cpu(dev->size_high);
517 sz <<= 32;
518 sz += __le32_to_cpu(dev->size_low);
519 printf(" Array Size : %llu%s\n", (unsigned long long)sz,
520 human_size(sz * 512));
521 sz = __le32_to_cpu(map->blocks_per_member);
522 printf(" Per Dev Size : %llu%s\n", (unsigned long long)sz,
523 human_size(sz * 512));
524 printf(" Sector Offset : %u\n",
525 __le32_to_cpu(map->pba_of_lba0));
526 printf(" Num Stripes : %u\n",
527 __le32_to_cpu(map->num_data_stripes));
528 printf(" Chunk Size : %u KiB\n",
529 __le16_to_cpu(map->blocks_per_strip) / 2);
530 printf(" Reserved : %d\n", __le32_to_cpu(dev->reserved_blocks));
531 printf(" Migrate State : %s", dev->vol.migr_state ? "migrating" : "idle");
532 if (dev->vol.migr_state)
533 printf(": %s", dev->vol.migr_type ? "rebuilding" : "initializing");
534 printf("\n");
535 printf(" Map State : %s", map_state_str[map->map_state]);
536 if (dev->vol.migr_state) {
537 struct imsm_map *map = get_imsm_map(dev, 1);
538 printf(" <-- %s", map_state_str[map->map_state]);
539 }
540 printf("\n");
541 printf(" Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean");
542 }
543
544 static void print_imsm_disk(struct imsm_super *mpb, int index, __u32 reserved)
545 {
546 struct imsm_disk *disk = __get_imsm_disk(mpb, index);
547 char str[MAX_RAID_SERIAL_LEN + 1];
548 __u32 s;
549 __u64 sz;
550
551 if (index < 0)
552 return;
553
554 printf("\n");
555 snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial);
556 printf(" Disk%02d Serial : %s\n", index, str);
557 s = __le32_to_cpu(disk->status);
558 printf(" State :%s%s%s%s\n", s&SPARE_DISK ? " spare" : "",
559 s&CONFIGURED_DISK ? " active" : "",
560 s&FAILED_DISK ? " failed" : "",
561 s&USABLE_DISK ? " usable" : "");
562 printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id));
563 sz = __le32_to_cpu(disk->total_blocks) - reserved;
564 printf(" Usable Size : %llu%s\n", (unsigned long long)sz,
565 human_size(sz * 512));
566 }
567
568 static void examine_super_imsm(struct supertype *st, char *homehost)
569 {
570 struct intel_super *super = st->sb;
571 struct imsm_super *mpb = super->anchor;
572 char str[MAX_SIGNATURE_LENGTH];
573 int i;
574 __u32 sum;
575 __u32 reserved = imsm_reserved_sectors(super, super->disks);
576
577 snprintf(str, MPB_SIG_LEN, "%s", mpb->sig);
578 printf(" Magic : %s\n", str);
579 snprintf(str, strlen(MPB_VERSION_RAID0), "%s", get_imsm_version(mpb));
580 printf(" Version : %s\n", get_imsm_version(mpb));
581 printf(" Family : %08x\n", __le32_to_cpu(mpb->family_num));
582 printf(" Generation : %08x\n", __le32_to_cpu(mpb->generation_num));
583 sum = __le32_to_cpu(mpb->check_sum);
584 printf(" Checksum : %08x %s\n", sum,
585 __gen_imsm_checksum(mpb) == sum ? "correct" : "incorrect");
586 printf(" MPB Sectors : %d\n", mpb_sectors(mpb));
587 printf(" Disks : %d\n", mpb->num_disks);
588 printf(" RAID Devices : %d\n", mpb->num_raid_devs);
589 print_imsm_disk(mpb, super->disks->index, reserved);
590 if (super->bbm_log) {
591 struct bbm_log *log = super->bbm_log;
592
593 printf("\n");
594 printf("Bad Block Management Log:\n");
595 printf(" Log Size : %d\n", __le32_to_cpu(mpb->bbm_log_size));
596 printf(" Signature : %x\n", __le32_to_cpu(log->signature));
597 printf(" Entry Count : %d\n", __le32_to_cpu(log->entry_count));
598 printf(" Spare Blocks : %d\n", __le32_to_cpu(log->reserved_spare_block_count));
599 printf(" First Spare : %llx\n", __le64_to_cpu(log->first_spare_lba));
600 }
601 for (i = 0; i < mpb->num_raid_devs; i++)
602 print_imsm_dev(__get_imsm_dev(mpb, i), super->disks->index);
603 for (i = 0; i < mpb->num_disks; i++) {
604 if (i == super->disks->index)
605 continue;
606 print_imsm_disk(mpb, i, reserved);
607 }
608 }
609
610 static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info);
611
612 static void brief_examine_super_imsm(struct supertype *st)
613 {
614 /* We just write a generic DDF ARRAY entry
615 */
616 struct mdinfo info;
617 char nbuf[64];
618
619 getinfo_super_imsm(st, &info);
620 fname_from_uuid(st, &info, nbuf,'-');
621 printf("ARRAY /dev/imsm metadata=imsm UUID=%s\n", nbuf + 5);
622 }
623
624 static void detail_super_imsm(struct supertype *st, char *homehost)
625 {
626 printf("%s\n", __FUNCTION__);
627 }
628
629 static void brief_detail_super_imsm(struct supertype *st)
630 {
631 struct mdinfo info;
632 char nbuf[64];
633 getinfo_super_imsm(st, &info);
634 fname_from_uuid(st, &info, nbuf,'-');
635 printf(" UUID=%s", nbuf + 5);
636 }
637 #endif
638
639 static int match_home_imsm(struct supertype *st, char *homehost)
640 {
641 printf("%s\n", __FUNCTION__);
642
643 return -1;
644 }
645
646 static void uuid_from_super_imsm(struct supertype *st, int uuid[4])
647 {
648 /* The uuid returned here is used for:
649 * uuid to put into bitmap file (Create, Grow)
650 * uuid for backup header when saving critical section (Grow)
651 * comparing uuids when re-adding a device into an array
652 * In these cases the uuid required is that of the data-array,
653 * not the device-set.
654 * uuid to recognise same set when adding a missing device back
655 * to an array. This is a uuid for the device-set.
656 *
657 * For each of these we can make do with a truncated
658 * or hashed uuid rather than the original, as long as
659 * everyone agrees.
660 * In each case the uuid required is that of the data-array,
661 * not the device-set.
662 */
663 /* imsm does not track uuid's so we synthesis one using sha1 on
664 * - The signature (Which is constant for all imsm array, but no matter)
665 * - the family_num of the container
666 * - the index number of the volume
667 * - the 'serial' number of the volume.
668 * Hopefully these are all constant.
669 */
670 struct intel_super *super = st->sb;
671
672 char buf[20];
673 struct sha1_ctx ctx;
674 struct imsm_dev *dev = NULL;
675
676 sha1_init_ctx(&ctx);
677 sha1_process_bytes(super->anchor->sig, MAX_SIGNATURE_LENGTH, &ctx);
678 sha1_process_bytes(&super->anchor->family_num, sizeof(__u32), &ctx);
679 if (super->current_vol >= 0)
680 dev = get_imsm_dev(super, super->current_vol);
681 if (dev) {
682 __u32 vol = super->current_vol;
683 sha1_process_bytes(&vol, sizeof(vol), &ctx);
684 sha1_process_bytes(dev->volume, MAX_RAID_SERIAL_LEN, &ctx);
685 }
686 sha1_finish_ctx(&ctx, buf);
687 memcpy(uuid, buf, 4*4);
688 }
689
690 #if 0
691 static void
692 get_imsm_numerical_version(struct imsm_super *mpb, int *m, int *p)
693 {
694 __u8 *v = get_imsm_version(mpb);
695 __u8 *end = mpb->sig + MAX_SIGNATURE_LENGTH;
696 char major[] = { 0, 0, 0 };
697 char minor[] = { 0 ,0, 0 };
698 char patch[] = { 0, 0, 0 };
699 char *ver_parse[] = { major, minor, patch };
700 int i, j;
701
702 i = j = 0;
703 while (*v != '\0' && v < end) {
704 if (*v != '.' && j < 2)
705 ver_parse[i][j++] = *v;
706 else {
707 i++;
708 j = 0;
709 }
710 v++;
711 }
712
713 *m = strtol(minor, NULL, 0);
714 *p = strtol(patch, NULL, 0);
715 }
716 #endif
717
718 static int imsm_level_to_layout(int level)
719 {
720 switch (level) {
721 case 0:
722 case 1:
723 return 0;
724 case 5:
725 case 6:
726 return ALGORITHM_LEFT_ASYMMETRIC;
727 case 10:
728 return 0x102;
729 }
730 return -1;
731 }
732
733 static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info)
734 {
735 struct intel_super *super = st->sb;
736 struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
737 struct imsm_map *map = get_imsm_map(dev, 0);
738
739 info->container_member = super->current_vol;
740 info->array.raid_disks = map->num_members;
741 info->array.level = get_imsm_raid_level(map);
742 info->array.layout = imsm_level_to_layout(info->array.level);
743 info->array.md_minor = -1;
744 info->array.ctime = 0;
745 info->array.utime = 0;
746 info->array.chunk_size = __le16_to_cpu(map->blocks_per_strip) << 9;
747 info->array.state = !dev->vol.dirty;
748
749 info->disk.major = 0;
750 info->disk.minor = 0;
751
752 info->data_offset = __le32_to_cpu(map->pba_of_lba0);
753 info->component_size = __le32_to_cpu(map->blocks_per_member);
754 memset(info->uuid, 0, sizeof(info->uuid));
755
756 if (map->map_state == IMSM_T_STATE_UNINITIALIZED || dev->vol.dirty)
757 info->resync_start = 0;
758 else if (dev->vol.migr_state)
759 info->resync_start = __le32_to_cpu(dev->vol.curr_migr_unit);
760 else
761 info->resync_start = ~0ULL;
762
763 strncpy(info->name, (char *) dev->volume, MAX_RAID_SERIAL_LEN);
764 info->name[MAX_RAID_SERIAL_LEN] = 0;
765
766 info->array.major_version = -1;
767 info->array.minor_version = -2;
768 sprintf(info->text_version, "/%s/%d",
769 devnum2devname(st->container_dev),
770 info->container_member);
771 info->safe_mode_delay = 4000; /* 4 secs like the Matrix driver */
772 uuid_from_super_imsm(st, info->uuid);
773 }
774
775
776 static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info)
777 {
778 struct intel_super *super = st->sb;
779 struct imsm_disk *disk;
780 __u32 s;
781
782 if (super->current_vol >= 0) {
783 getinfo_super_imsm_volume(st, info);
784 return;
785 }
786
787 /* Set raid_disks to zero so that Assemble will always pull in valid
788 * spares
789 */
790 info->array.raid_disks = 0;
791 info->array.level = LEVEL_CONTAINER;
792 info->array.layout = 0;
793 info->array.md_minor = -1;
794 info->array.ctime = 0; /* N/A for imsm */
795 info->array.utime = 0;
796 info->array.chunk_size = 0;
797
798 info->disk.major = 0;
799 info->disk.minor = 0;
800 info->disk.raid_disk = -1;
801 info->reshape_active = 0;
802 info->array.major_version = -1;
803 info->array.minor_version = -2;
804 strcpy(info->text_version, "imsm");
805 info->safe_mode_delay = 0;
806 info->disk.number = -1;
807 info->disk.state = 0;
808 info->name[0] = 0;
809
810 if (super->disks) {
811 __u32 reserved = imsm_reserved_sectors(super, super->disks);
812
813 disk = &super->disks->disk;
814 info->disk.number = super->disks->index;
815 info->disk.raid_disk = super->disks->index;
816 info->data_offset = __le32_to_cpu(disk->total_blocks) - reserved;
817 info->component_size = reserved;
818 s = __le32_to_cpu(disk->status);
819 info->disk.state = s & CONFIGURED_DISK ? (1 << MD_DISK_ACTIVE) : 0;
820 info->disk.state |= s & FAILED_DISK ? (1 << MD_DISK_FAULTY) : 0;
821 info->disk.state |= s & USABLE_DISK ? (1 << MD_DISK_SYNC) : 0;
822 }
823 uuid_from_super_imsm(st, info->uuid);
824 }
825
826 static int update_super_imsm(struct supertype *st, struct mdinfo *info,
827 char *update, char *devname, int verbose,
828 int uuid_set, char *homehost)
829 {
830 /* FIXME */
831
832 /* For 'assemble' and 'force' we need to return non-zero if any
833 * change was made. For others, the return value is ignored.
834 * Update options are:
835 * force-one : This device looks a bit old but needs to be included,
836 * update age info appropriately.
837 * assemble: clear any 'faulty' flag to allow this device to
838 * be assembled.
839 * force-array: Array is degraded but being forced, mark it clean
840 * if that will be needed to assemble it.
841 *
842 * newdev: not used ????
843 * grow: Array has gained a new device - this is currently for
844 * linear only
845 * resync: mark as dirty so a resync will happen.
846 * name: update the name - preserving the homehost
847 *
848 * Following are not relevant for this imsm:
849 * sparc2.2 : update from old dodgey metadata
850 * super-minor: change the preferred_minor number
851 * summaries: update redundant counters.
852 * uuid: Change the uuid of the array to match watch is given
853 * homehost: update the recorded homehost
854 * _reshape_progress: record new reshape_progress position.
855 */
856 int rv = 0;
857 //struct intel_super *super = st->sb;
858 //struct imsm_super *mpb = super->mpb;
859
860 if (strcmp(update, "grow") == 0) {
861 }
862 if (strcmp(update, "resync") == 0) {
863 /* dev->vol.dirty = 1; */
864 }
865
866 /* IMSM has no concept of UUID or homehost */
867
868 return rv;
869 }
870
871 static size_t disks_to_mpb_size(int disks)
872 {
873 size_t size;
874
875 size = sizeof(struct imsm_super);
876 size += (disks - 1) * sizeof(struct imsm_disk);
877 size += 2 * sizeof(struct imsm_dev);
878 /* up to 2 maps per raid device (-2 for imsm_maps in imsm_dev */
879 size += (4 - 2) * sizeof(struct imsm_map);
880 /* 4 possible disk_ord_tbl's */
881 size += 4 * (disks - 1) * sizeof(__u32);
882
883 return size;
884 }
885
886 static __u64 avail_size_imsm(struct supertype *st, __u64 devsize)
887 {
888 if (devsize < (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS))
889 return 0;
890
891 return devsize - (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS);
892 }
893
894 static int compare_super_imsm(struct supertype *st, struct supertype *tst)
895 {
896 /*
897 * return:
898 * 0 same, or first was empty, and second was copied
899 * 1 second had wrong number
900 * 2 wrong uuid
901 * 3 wrong other info
902 */
903 struct intel_super *first = st->sb;
904 struct intel_super *sec = tst->sb;
905
906 if (!first) {
907 st->sb = tst->sb;
908 tst->sb = NULL;
909 return 0;
910 }
911
912 if (memcmp(first->anchor->sig, sec->anchor->sig, MAX_SIGNATURE_LENGTH) != 0)
913 return 3;
914
915 /* if an anchor does not have num_raid_devs set then it is a free
916 * floating spare
917 */
918 if (first->anchor->num_raid_devs > 0 &&
919 sec->anchor->num_raid_devs > 0) {
920 if (first->anchor->family_num != sec->anchor->family_num)
921 return 3;
922 }
923
924 /* if 'first' is a spare promote it to a populated mpb with sec's
925 * family number
926 */
927 if (first->anchor->num_raid_devs == 0 &&
928 sec->anchor->num_raid_devs > 0) {
929 first->anchor->num_raid_devs = sec->anchor->num_raid_devs;
930 first->anchor->family_num = sec->anchor->family_num;
931 }
932
933 return 0;
934 }
935
936 static void fd2devname(int fd, char *name)
937 {
938 struct stat st;
939 char path[256];
940 char dname[100];
941 char *nm;
942 int rv;
943
944 name[0] = '\0';
945 if (fstat(fd, &st) != 0)
946 return;
947 sprintf(path, "/sys/dev/block/%d:%d",
948 major(st.st_rdev), minor(st.st_rdev));
949
950 rv = readlink(path, dname, sizeof(dname));
951 if (rv <= 0)
952 return;
953
954 dname[rv] = '\0';
955 nm = strrchr(dname, '/');
956 nm++;
957 snprintf(name, MAX_RAID_SERIAL_LEN, "/dev/%s", nm);
958 }
959
960
961 extern int scsi_get_serial(int fd, void *buf, size_t buf_len);
962
963 static int imsm_read_serial(int fd, char *devname,
964 __u8 serial[MAX_RAID_SERIAL_LEN])
965 {
966 unsigned char scsi_serial[255];
967 int rv;
968 int rsp_len;
969 int len;
970 char *c, *rsp_buf;
971
972 memset(scsi_serial, 0, sizeof(scsi_serial));
973
974 rv = scsi_get_serial(fd, scsi_serial, sizeof(scsi_serial));
975
976 if (rv && imsm_env_devname_as_serial()) {
977 memset(serial, 0, MAX_RAID_SERIAL_LEN);
978 fd2devname(fd, (char *) serial);
979 return 0;
980 }
981
982 if (rv != 0) {
983 if (devname)
984 fprintf(stderr,
985 Name ": Failed to retrieve serial for %s\n",
986 devname);
987 return rv;
988 }
989
990 /* trim leading whitespace */
991 rsp_len = scsi_serial[3];
992 rsp_buf = (char *) &scsi_serial[4];
993 c = rsp_buf;
994 while (isspace(*c))
995 c++;
996
997 /* truncate len to the end of rsp_buf if necessary */
998 if (c + MAX_RAID_SERIAL_LEN > rsp_buf + rsp_len)
999 len = rsp_len - (c - rsp_buf);
1000 else
1001 len = MAX_RAID_SERIAL_LEN;
1002
1003 /* initialize the buffer and copy rsp_buf characters */
1004 memset(serial, 0, MAX_RAID_SERIAL_LEN);
1005 memcpy(serial, c, len);
1006
1007 /* trim trailing whitespace starting with the last character copied */
1008 c = (char *) &serial[len - 1];
1009 while (isspace(*c) || *c == '\0')
1010 *c-- = '\0';
1011
1012 return 0;
1013 }
1014
1015 static int serialcmp(__u8 *s1, __u8 *s2)
1016 {
1017 return strncmp((char *) s1, (char *) s2, MAX_RAID_SERIAL_LEN);
1018 }
1019
1020 static void serialcpy(__u8 *dest, __u8 *src)
1021 {
1022 strncpy((char *) dest, (char *) src, MAX_RAID_SERIAL_LEN);
1023 }
1024
1025 static int
1026 load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd)
1027 {
1028 struct dl *dl;
1029 struct stat stb;
1030 int rv;
1031 int i;
1032 int alloc = 1;
1033 __u8 serial[MAX_RAID_SERIAL_LEN];
1034
1035 rv = imsm_read_serial(fd, devname, serial);
1036
1037 if (rv != 0)
1038 return 2;
1039
1040 /* check if this is a disk we have seen before. it may be a spare in
1041 * super->disks while the current anchor believes it is a raid member,
1042 * check if we need to update dl->index
1043 */
1044 for (dl = super->disks; dl; dl = dl->next)
1045 if (serialcmp(dl->serial, serial) == 0)
1046 break;
1047
1048 if (!dl)
1049 dl = malloc(sizeof(*dl));
1050 else
1051 alloc = 0;
1052
1053 if (!dl) {
1054 if (devname)
1055 fprintf(stderr,
1056 Name ": failed to allocate disk buffer for %s\n",
1057 devname);
1058 return 2;
1059 }
1060
1061 if (alloc) {
1062 fstat(fd, &stb);
1063 dl->major = major(stb.st_rdev);
1064 dl->minor = minor(stb.st_rdev);
1065 dl->next = super->disks;
1066 dl->fd = keep_fd ? fd : -1;
1067 dl->devname = devname ? strdup(devname) : NULL;
1068 serialcpy(dl->serial, serial);
1069 dl->index = -2;
1070 } else if (keep_fd) {
1071 close(dl->fd);
1072 dl->fd = fd;
1073 }
1074
1075 /* look up this disk's index in the current anchor */
1076 for (i = 0; i < super->anchor->num_disks; i++) {
1077 struct imsm_disk *disk_iter;
1078
1079 disk_iter = __get_imsm_disk(super->anchor, i);
1080
1081 if (serialcmp(disk_iter->serial, dl->serial) == 0) {
1082 __u32 status;
1083
1084 dl->disk = *disk_iter;
1085 status = __le32_to_cpu(dl->disk.status);
1086 /* only set index on disks that are a member of a
1087 * populated contianer, i.e. one with raid_devs
1088 */
1089 if (status & FAILED_DISK)
1090 dl->index = -2;
1091 else if (status & SPARE_DISK)
1092 dl->index = -1;
1093 else
1094 dl->index = i;
1095
1096 break;
1097 }
1098 }
1099
1100 if (alloc)
1101 super->disks = dl;
1102
1103 return 0;
1104 }
1105
1106 static void imsm_copy_dev(struct imsm_dev *dest, struct imsm_dev *src)
1107 {
1108 memcpy(dest, src, sizeof_imsm_dev(src, 0));
1109 }
1110
1111 #ifndef MDASSEMBLE
1112 /* When migrating map0 contains the 'destination' state while map1
1113 * contains the current state. When not migrating map0 contains the
1114 * current state. This routine assumes that map[0].map_state is set to
1115 * the current array state before being called.
1116 *
1117 * Migration is indicated by one of the following states
1118 * 1/ Idle (migr_state=0 map0state=normal||unitialized||degraded||failed)
1119 * 2/ Initialize (migr_state=1 migr_type=0 map0state=normal
1120 * map1state=unitialized)
1121 * 3/ Verify (Resync) (migr_state=1 migr_type=1 map0state=normal
1122 * map1state=normal)
1123 * 4/ Rebuild (migr_state=1 migr_type=1 map0state=normal
1124 * map1state=degraded)
1125 */
1126 static void migrate(struct imsm_dev *dev, __u8 to_state, int rebuild_resync)
1127 {
1128 struct imsm_map *dest;
1129 struct imsm_map *src = get_imsm_map(dev, 0);
1130
1131 dev->vol.migr_state = 1;
1132 dev->vol.migr_type = rebuild_resync;
1133 dev->vol.curr_migr_unit = 0;
1134 dest = get_imsm_map(dev, 1);
1135
1136 memcpy(dest, src, sizeof_imsm_map(src));
1137 src->map_state = to_state;
1138 }
1139
1140 static void end_migration(struct imsm_dev *dev, __u8 map_state)
1141 {
1142 struct imsm_map *map = get_imsm_map(dev, 0);
1143
1144 dev->vol.migr_state = 0;
1145 dev->vol.curr_migr_unit = 0;
1146 map->map_state = map_state;
1147 }
1148 #endif
1149
1150 static int parse_raid_devices(struct intel_super *super)
1151 {
1152 int i;
1153 struct imsm_dev *dev_new;
1154 size_t len, len_migr;
1155 size_t space_needed = 0;
1156 struct imsm_super *mpb = super->anchor;
1157
1158 for (i = 0; i < super->anchor->num_raid_devs; i++) {
1159 struct imsm_dev *dev_iter = __get_imsm_dev(super->anchor, i);
1160
1161 len = sizeof_imsm_dev(dev_iter, 0);
1162 len_migr = sizeof_imsm_dev(dev_iter, 1);
1163 if (len_migr > len)
1164 space_needed += len_migr - len;
1165
1166 dev_new = malloc(len_migr);
1167 if (!dev_new)
1168 return 1;
1169 imsm_copy_dev(dev_new, dev_iter);
1170 super->dev_tbl[i] = dev_new;
1171 }
1172
1173 /* ensure that super->buf is large enough when all raid devices
1174 * are migrating
1175 */
1176 if (__le32_to_cpu(mpb->mpb_size) + space_needed > super->len) {
1177 void *buf;
1178
1179 len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + space_needed, 512);
1180 if (posix_memalign(&buf, 512, len) != 0)
1181 return 1;
1182
1183 memcpy(buf, super->buf, len);
1184 free(super->buf);
1185 super->buf = buf;
1186 super->len = len;
1187 }
1188
1189 return 0;
1190 }
1191
1192 /* retrieve a pointer to the bbm log which starts after all raid devices */
1193 struct bbm_log *__get_imsm_bbm_log(struct imsm_super *mpb)
1194 {
1195 void *ptr = NULL;
1196
1197 if (__le32_to_cpu(mpb->bbm_log_size)) {
1198 ptr = mpb;
1199 ptr += mpb->mpb_size - __le32_to_cpu(mpb->bbm_log_size);
1200 }
1201
1202 return ptr;
1203 }
1204
1205 static void __free_imsm(struct intel_super *super, int free_disks);
1206
1207 /* load_imsm_mpb - read matrix metadata
1208 * allocates super->mpb to be freed by free_super
1209 */
1210 static int load_imsm_mpb(int fd, struct intel_super *super, char *devname)
1211 {
1212 unsigned long long dsize;
1213 unsigned long long sectors;
1214 struct stat;
1215 struct imsm_super *anchor;
1216 __u32 check_sum;
1217 int rc;
1218
1219 get_dev_size(fd, NULL, &dsize);
1220
1221 if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0) {
1222 if (devname)
1223 fprintf(stderr,
1224 Name ": Cannot seek to anchor block on %s: %s\n",
1225 devname, strerror(errno));
1226 return 1;
1227 }
1228
1229 if (posix_memalign((void**)&anchor, 512, 512) != 0) {
1230 if (devname)
1231 fprintf(stderr,
1232 Name ": Failed to allocate imsm anchor buffer"
1233 " on %s\n", devname);
1234 return 1;
1235 }
1236 if (read(fd, anchor, 512) != 512) {
1237 if (devname)
1238 fprintf(stderr,
1239 Name ": Cannot read anchor block on %s: %s\n",
1240 devname, strerror(errno));
1241 free(anchor);
1242 return 1;
1243 }
1244
1245 if (strncmp((char *) anchor->sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0) {
1246 if (devname)
1247 fprintf(stderr,
1248 Name ": no IMSM anchor on %s\n", devname);
1249 free(anchor);
1250 return 2;
1251 }
1252
1253 __free_imsm(super, 0);
1254 super->len = ROUND_UP(anchor->mpb_size, 512);
1255 if (posix_memalign(&super->buf, 512, super->len) != 0) {
1256 if (devname)
1257 fprintf(stderr,
1258 Name ": unable to allocate %zu byte mpb buffer\n",
1259 super->len);
1260 free(anchor);
1261 return 2;
1262 }
1263 memcpy(super->buf, anchor, 512);
1264
1265 sectors = mpb_sectors(anchor) - 1;
1266 free(anchor);
1267 if (!sectors) {
1268 rc = load_imsm_disk(fd, super, devname, 0);
1269 if (rc == 0)
1270 rc = parse_raid_devices(super);
1271 return rc;
1272 }
1273
1274 /* read the extended mpb */
1275 if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0) {
1276 if (devname)
1277 fprintf(stderr,
1278 Name ": Cannot seek to extended mpb on %s: %s\n",
1279 devname, strerror(errno));
1280 return 1;
1281 }
1282
1283 if (read(fd, super->buf + 512, super->len - 512) != super->len - 512) {
1284 if (devname)
1285 fprintf(stderr,
1286 Name ": Cannot read extended mpb on %s: %s\n",
1287 devname, strerror(errno));
1288 return 2;
1289 }
1290
1291 check_sum = __gen_imsm_checksum(super->anchor);
1292 if (check_sum != __le32_to_cpu(super->anchor->check_sum)) {
1293 if (devname)
1294 fprintf(stderr,
1295 Name ": IMSM checksum %x != %x on %s\n",
1296 check_sum, __le32_to_cpu(super->anchor->check_sum),
1297 devname);
1298 return 2;
1299 }
1300
1301 /* FIXME the BBM log is disk specific so we cannot use this global
1302 * buffer for all disks. Ok for now since we only look at the global
1303 * bbm_log_size parameter to gate assembly
1304 */
1305 super->bbm_log = __get_imsm_bbm_log(super->anchor);
1306
1307 rc = load_imsm_disk(fd, super, devname, 0);
1308 if (rc == 0)
1309 rc = parse_raid_devices(super);
1310
1311 return rc;
1312 }
1313
1314 static void __free_imsm_disk(struct dl *d)
1315 {
1316 if (d->fd >= 0)
1317 close(d->fd);
1318 if (d->devname)
1319 free(d->devname);
1320 free(d);
1321
1322 }
1323 static void free_imsm_disks(struct intel_super *super)
1324 {
1325 struct dl *d;
1326
1327 while (super->disks) {
1328 d = super->disks;
1329 super->disks = d->next;
1330 __free_imsm_disk(d);
1331 }
1332 while (super->missing) {
1333 d = super->missing;
1334 super->missing = d->next;
1335 __free_imsm_disk(d);
1336 }
1337
1338 }
1339
1340 /* free all the pieces hanging off of a super pointer */
1341 static void __free_imsm(struct intel_super *super, int free_disks)
1342 {
1343 int i;
1344
1345 if (super->buf) {
1346 free(super->buf);
1347 super->buf = NULL;
1348 }
1349 if (free_disks)
1350 free_imsm_disks(super);
1351 for (i = 0; i < IMSM_MAX_RAID_DEVS; i++)
1352 if (super->dev_tbl[i]) {
1353 free(super->dev_tbl[i]);
1354 super->dev_tbl[i] = NULL;
1355 }
1356 }
1357
1358 static void free_imsm(struct intel_super *super)
1359 {
1360 __free_imsm(super, 1);
1361 free(super);
1362 }
1363
1364 static void free_super_imsm(struct supertype *st)
1365 {
1366 struct intel_super *super = st->sb;
1367
1368 if (!super)
1369 return;
1370
1371 free_imsm(super);
1372 st->sb = NULL;
1373 }
1374
1375 static struct intel_super *alloc_super(int creating_imsm)
1376 {
1377 struct intel_super *super = malloc(sizeof(*super));
1378
1379 if (super) {
1380 memset(super, 0, sizeof(*super));
1381 super->creating_imsm = creating_imsm;
1382 super->current_vol = -1;
1383 }
1384
1385 return super;
1386 }
1387
1388 #ifndef MDASSEMBLE
1389 /* find_missing - helper routine for load_super_imsm_all that identifies
1390 * disks that have disappeared from the system. This routine relies on
1391 * the mpb being uptodate, which it is at load time.
1392 */
1393 static int find_missing(struct intel_super *super)
1394 {
1395 int i;
1396 struct imsm_super *mpb = super->anchor;
1397 struct dl *dl;
1398 struct imsm_disk *disk;
1399 __u32 status;
1400
1401 for (i = 0; i < mpb->num_disks; i++) {
1402 disk = __get_imsm_disk(mpb, i);
1403 for (dl = super->disks; dl; dl = dl->next)
1404 if (serialcmp(dl->disk.serial, disk->serial) == 0)
1405 break;
1406 if (dl)
1407 continue;
1408 /* ok we have a 'disk' without a live entry in
1409 * super->disks
1410 */
1411 status = __le32_to_cpu(disk->status);
1412 if (status & FAILED_DISK || !(status & USABLE_DISK))
1413 continue; /* never mind, already marked */
1414
1415 dl = malloc(sizeof(*dl));
1416 if (!dl)
1417 return 1;
1418 dl->major = 0;
1419 dl->minor = 0;
1420 dl->fd = -1;
1421 dl->devname = strdup("missing");
1422 dl->index = i;
1423 serialcpy(dl->serial, disk->serial);
1424 dl->disk = *disk;
1425 dl->next = super->missing;
1426 super->missing = dl;
1427 }
1428
1429 return 0;
1430 }
1431
1432 static int load_super_imsm_all(struct supertype *st, int fd, void **sbp,
1433 char *devname, int keep_fd)
1434 {
1435 struct mdinfo *sra;
1436 struct intel_super *super;
1437 struct mdinfo *sd, *best = NULL;
1438 __u32 bestgen = 0;
1439 __u32 gen;
1440 char nm[20];
1441 int dfd;
1442 int rv;
1443
1444 /* check if this disk is a member of an active array */
1445 sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
1446 if (!sra)
1447 return 1;
1448
1449 if (sra->array.major_version != -1 ||
1450 sra->array.minor_version != -2 ||
1451 strcmp(sra->text_version, "imsm") != 0)
1452 return 1;
1453
1454 super = alloc_super(0);
1455 if (!super)
1456 return 1;
1457
1458 /* find the most up to date disk in this array, skipping spares */
1459 for (sd = sra->devs; sd; sd = sd->next) {
1460 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
1461 dfd = dev_open(nm, keep_fd ? O_RDWR : O_RDONLY);
1462 if (!dfd) {
1463 free_imsm(super);
1464 return 2;
1465 }
1466 rv = load_imsm_mpb(dfd, super, NULL);
1467 if (!keep_fd)
1468 close(dfd);
1469 if (rv == 0) {
1470 if (super->anchor->num_raid_devs == 0)
1471 gen = 0;
1472 else
1473 gen = __le32_to_cpu(super->anchor->generation_num);
1474 if (!best || gen > bestgen) {
1475 bestgen = gen;
1476 best = sd;
1477 }
1478 } else {
1479 free_imsm(super);
1480 return 2;
1481 }
1482 }
1483
1484 if (!best) {
1485 free_imsm(super);
1486 return 1;
1487 }
1488
1489 /* load the most up to date anchor */
1490 sprintf(nm, "%d:%d", best->disk.major, best->disk.minor);
1491 dfd = dev_open(nm, O_RDONLY);
1492 if (!dfd) {
1493 free_imsm(super);
1494 return 1;
1495 }
1496 rv = load_imsm_mpb(dfd, super, NULL);
1497 close(dfd);
1498 if (rv != 0) {
1499 free_imsm(super);
1500 return 2;
1501 }
1502
1503 /* re-parse the disk list with the current anchor */
1504 for (sd = sra->devs ; sd ; sd = sd->next) {
1505 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
1506 dfd = dev_open(nm, keep_fd? O_RDWR : O_RDONLY);
1507 if (!dfd) {
1508 free_imsm(super);
1509 return 2;
1510 }
1511 load_imsm_disk(dfd, super, NULL, keep_fd);
1512 if (!keep_fd)
1513 close(dfd);
1514 }
1515
1516
1517 if (find_missing(super) != 0) {
1518 free_imsm(super);
1519 return 2;
1520 }
1521
1522 if (st->subarray[0]) {
1523 if (atoi(st->subarray) <= super->anchor->num_raid_devs)
1524 super->current_vol = atoi(st->subarray);
1525 else
1526 return 1;
1527 }
1528
1529 *sbp = super;
1530 st->container_dev = fd2devnum(fd);
1531 if (st->ss == NULL) {
1532 st->ss = &super_imsm;
1533 st->minor_version = 0;
1534 st->max_devs = IMSM_MAX_DEVICES;
1535 }
1536 st->loaded_container = 1;
1537
1538 return 0;
1539 }
1540 #endif
1541
1542 static int load_super_imsm(struct supertype *st, int fd, char *devname)
1543 {
1544 struct intel_super *super;
1545 int rv;
1546
1547 #ifndef MDASSEMBLE
1548 if (load_super_imsm_all(st, fd, &st->sb, devname, 1) == 0)
1549 return 0;
1550 #endif
1551 if (st->subarray[0])
1552 return 1; /* FIXME */
1553
1554 super = alloc_super(0);
1555 if (!super) {
1556 fprintf(stderr,
1557 Name ": malloc of %zu failed.\n",
1558 sizeof(*super));
1559 return 1;
1560 }
1561
1562 rv = load_imsm_mpb(fd, super, devname);
1563
1564 if (rv) {
1565 if (devname)
1566 fprintf(stderr,
1567 Name ": Failed to load all information "
1568 "sections on %s\n", devname);
1569 free_imsm(super);
1570 return rv;
1571 }
1572
1573 st->sb = super;
1574 if (st->ss == NULL) {
1575 st->ss = &super_imsm;
1576 st->minor_version = 0;
1577 st->max_devs = IMSM_MAX_DEVICES;
1578 }
1579 st->loaded_container = 0;
1580
1581 return 0;
1582 }
1583
1584 static __u16 info_to_blocks_per_strip(mdu_array_info_t *info)
1585 {
1586 if (info->level == 1)
1587 return 128;
1588 return info->chunk_size >> 9;
1589 }
1590
1591 static __u32 info_to_num_data_stripes(mdu_array_info_t *info)
1592 {
1593 __u32 num_stripes;
1594
1595 num_stripes = (info->size * 2) / info_to_blocks_per_strip(info);
1596 if (info->level == 1)
1597 num_stripes /= 2;
1598
1599 return num_stripes;
1600 }
1601
1602 static __u32 info_to_blocks_per_member(mdu_array_info_t *info)
1603 {
1604 return (info->size * 2) & ~(info_to_blocks_per_strip(info) - 1);
1605 }
1606
1607 static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
1608 unsigned long long size, char *name,
1609 char *homehost, int *uuid)
1610 {
1611 /* We are creating a volume inside a pre-existing container.
1612 * so st->sb is already set.
1613 */
1614 struct intel_super *super = st->sb;
1615 struct imsm_super *mpb = super->anchor;
1616 struct imsm_dev *dev;
1617 struct imsm_vol *vol;
1618 struct imsm_map *map;
1619 int idx = mpb->num_raid_devs;
1620 int i;
1621 unsigned long long array_blocks;
1622 __u32 offset = 0;
1623 size_t size_old, size_new;
1624
1625 if (mpb->num_raid_devs >= 2) {
1626 fprintf(stderr, Name": This imsm-container already has the "
1627 "maximum of 2 volumes\n");
1628 return 0;
1629 }
1630
1631 /* ensure the mpb is large enough for the new data */
1632 size_old = __le32_to_cpu(mpb->mpb_size);
1633 size_new = disks_to_mpb_size(info->nr_disks);
1634 if (size_new > size_old) {
1635 void *mpb_new;
1636 size_t size_round = ROUND_UP(size_new, 512);
1637
1638 if (posix_memalign(&mpb_new, 512, size_round) != 0) {
1639 fprintf(stderr, Name": could not allocate new mpb\n");
1640 return 0;
1641 }
1642 memcpy(mpb_new, mpb, size_old);
1643 free(mpb);
1644 mpb = mpb_new;
1645 super->anchor = mpb_new;
1646 mpb->mpb_size = __cpu_to_le32(size_new);
1647 memset(mpb_new + size_old, 0, size_round - size_old);
1648 }
1649 super->current_vol = idx;
1650 /* when creating the first raid device in this container set num_disks
1651 * to zero, i.e. delete this spare and add raid member devices in
1652 * add_to_super_imsm_volume()
1653 */
1654 if (super->current_vol == 0)
1655 mpb->num_disks = 0;
1656 sprintf(st->subarray, "%d", idx);
1657 dev = malloc(sizeof(*dev) + sizeof(__u32) * (info->raid_disks - 1));
1658 if (!dev) {
1659 fprintf(stderr, Name": could not allocate raid device\n");
1660 return 0;
1661 }
1662 strncpy((char *) dev->volume, name, MAX_RAID_SERIAL_LEN);
1663 array_blocks = calc_array_size(info->level, info->raid_disks,
1664 info->layout, info->chunk_size,
1665 info->size*2);
1666 dev->size_low = __cpu_to_le32((__u32) array_blocks);
1667 dev->size_high = __cpu_to_le32((__u32) (array_blocks >> 32));
1668 dev->status = __cpu_to_le32(0);
1669 dev->reserved_blocks = __cpu_to_le32(0);
1670 vol = &dev->vol;
1671 vol->migr_state = 0;
1672 vol->migr_type = 0;
1673 vol->dirty = 0;
1674 vol->curr_migr_unit = 0;
1675 for (i = 0; i < idx; i++) {
1676 struct imsm_dev *prev = get_imsm_dev(super, i);
1677 struct imsm_map *pmap = get_imsm_map(prev, 0);
1678
1679 offset += __le32_to_cpu(pmap->blocks_per_member);
1680 offset += IMSM_RESERVED_SECTORS;
1681 }
1682 map = get_imsm_map(dev, 0);
1683 map->pba_of_lba0 = __cpu_to_le32(offset);
1684 map->blocks_per_member = __cpu_to_le32(info_to_blocks_per_member(info));
1685 map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info));
1686 map->num_data_stripes = __cpu_to_le32(info_to_num_data_stripes(info));
1687 map->map_state = info->level ? IMSM_T_STATE_UNINITIALIZED :
1688 IMSM_T_STATE_NORMAL;
1689
1690 if (info->level == 1 && info->raid_disks > 2) {
1691 fprintf(stderr, Name": imsm does not support more than 2 disks"
1692 "in a raid1 volume\n");
1693 return 0;
1694 }
1695 if (info->level == 10)
1696 map->raid_level = 1;
1697 else
1698 map->raid_level = info->level;
1699
1700 map->num_members = info->raid_disks;
1701 for (i = 0; i < map->num_members; i++) {
1702 /* initialized in add_to_super */
1703 set_imsm_ord_tbl_ent(map, i, 0);
1704 }
1705 mpb->num_raid_devs++;
1706 super->dev_tbl[super->current_vol] = dev;
1707
1708 return 1;
1709 }
1710
1711 static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
1712 unsigned long long size, char *name,
1713 char *homehost, int *uuid)
1714 {
1715 /* This is primarily called by Create when creating a new array.
1716 * We will then get add_to_super called for each component, and then
1717 * write_init_super called to write it out to each device.
1718 * For IMSM, Create can create on fresh devices or on a pre-existing
1719 * array.
1720 * To create on a pre-existing array a different method will be called.
1721 * This one is just for fresh drives.
1722 */
1723 struct intel_super *super;
1724 struct imsm_super *mpb;
1725 size_t mpb_size;
1726
1727 if (!info) {
1728 st->sb = NULL;
1729 return 0;
1730 }
1731 if (st->sb)
1732 return init_super_imsm_volume(st, info, size, name, homehost,
1733 uuid);
1734
1735 super = alloc_super(1);
1736 if (!super)
1737 return 0;
1738 mpb_size = disks_to_mpb_size(info->nr_disks);
1739 if (posix_memalign(&super->buf, 512, mpb_size) != 0) {
1740 free(super);
1741 return 0;
1742 }
1743 mpb = super->buf;
1744 memset(mpb, 0, mpb_size);
1745
1746 memcpy(mpb->sig, MPB_SIGNATURE, strlen(MPB_SIGNATURE));
1747 memcpy(mpb->sig + strlen(MPB_SIGNATURE), MPB_VERSION_RAID5,
1748 strlen(MPB_VERSION_RAID5));
1749 mpb->mpb_size = mpb_size;
1750
1751 st->sb = super;
1752 return 1;
1753 }
1754
1755 #ifndef MDASSEMBLE
1756 static void add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
1757 int fd, char *devname)
1758 {
1759 struct intel_super *super = st->sb;
1760 struct imsm_super *mpb = super->anchor;
1761 struct dl *dl;
1762 struct imsm_dev *dev;
1763 struct imsm_map *map;
1764 __u32 status;
1765
1766 dev = get_imsm_dev(super, super->current_vol);
1767 map = get_imsm_map(dev, 0);
1768
1769 for (dl = super->disks; dl ; dl = dl->next)
1770 if (dl->major == dk->major &&
1771 dl->minor == dk->minor)
1772 break;
1773
1774 if (!dl || ! (dk->state & (1<<MD_DISK_SYNC)))
1775 return;
1776
1777 /* add a pristine spare to the metadata */
1778 if (dl->index < 0) {
1779 dl->index = super->anchor->num_disks;
1780 super->anchor->num_disks++;
1781 }
1782 set_imsm_ord_tbl_ent(map, dk->number, dl->index);
1783 status = CONFIGURED_DISK | USABLE_DISK;
1784 dl->disk.status = __cpu_to_le32(status);
1785
1786 /* if we are creating the first raid device update the family number */
1787 if (super->current_vol == 0) {
1788 __u32 sum;
1789 struct imsm_dev *_dev = __get_imsm_dev(mpb, 0);
1790 struct imsm_disk *_disk = __get_imsm_disk(mpb, dl->index);
1791
1792 *_dev = *dev;
1793 *_disk = dl->disk;
1794 sum = __gen_imsm_checksum(mpb);
1795 mpb->family_num = __cpu_to_le32(sum);
1796 }
1797 }
1798
1799 static void add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
1800 int fd, char *devname)
1801 {
1802 struct intel_super *super = st->sb;
1803 struct dl *dd;
1804 unsigned long long size;
1805 __u32 status, id;
1806 int rv;
1807 struct stat stb;
1808
1809 if (super->current_vol >= 0) {
1810 add_to_super_imsm_volume(st, dk, fd, devname);
1811 return;
1812 }
1813
1814 fstat(fd, &stb);
1815 dd = malloc(sizeof(*dd));
1816 if (!dd) {
1817 fprintf(stderr,
1818 Name ": malloc failed %s:%d.\n", __func__, __LINE__);
1819 abort();
1820 }
1821 memset(dd, 0, sizeof(*dd));
1822 dd->major = major(stb.st_rdev);
1823 dd->minor = minor(stb.st_rdev);
1824 dd->index = -1;
1825 dd->devname = devname ? strdup(devname) : NULL;
1826 dd->fd = fd;
1827 rv = imsm_read_serial(fd, devname, dd->serial);
1828 if (rv) {
1829 fprintf(stderr,
1830 Name ": failed to retrieve scsi serial, aborting\n");
1831 free(dd);
1832 abort();
1833 }
1834
1835 get_dev_size(fd, NULL, &size);
1836 size /= 512;
1837 status = USABLE_DISK | SPARE_DISK;
1838 serialcpy(dd->disk.serial, dd->serial);
1839 dd->disk.total_blocks = __cpu_to_le32(size);
1840 dd->disk.status = __cpu_to_le32(status);
1841 if (sysfs_disk_to_scsi_id(fd, &id) == 0)
1842 dd->disk.scsi_id = __cpu_to_le32(id);
1843 else
1844 dd->disk.scsi_id = __cpu_to_le32(0);
1845
1846 if (st->update_tail) {
1847 dd->next = super->add;
1848 super->add = dd;
1849 } else {
1850 dd->next = super->disks;
1851 super->disks = dd;
1852 }
1853 }
1854
1855 static int store_imsm_mpb(int fd, struct intel_super *super);
1856
1857 /* spare records have their own family number and do not have any defined raid
1858 * devices
1859 */
1860 static int write_super_imsm_spares(struct intel_super *super, int doclose)
1861 {
1862 struct imsm_super mpb_save;
1863 struct imsm_super *mpb = super->anchor;
1864 __u32 sum;
1865 struct dl *d;
1866
1867 mpb_save = *mpb;
1868 mpb->num_raid_devs = 0;
1869 mpb->num_disks = 1;
1870 mpb->mpb_size = sizeof(struct imsm_super);
1871 mpb->generation_num = __cpu_to_le32(1UL);
1872
1873 for (d = super->disks; d; d = d->next) {
1874 if (d->index != -1)
1875 continue;
1876
1877 mpb->disk[0] = d->disk;
1878 sum = __gen_imsm_checksum(mpb);
1879 mpb->family_num = __cpu_to_le32(sum);
1880 sum = __gen_imsm_checksum(mpb);
1881 mpb->check_sum = __cpu_to_le32(sum);
1882
1883 if (store_imsm_mpb(d->fd, super)) {
1884 fprintf(stderr, "%s: failed for device %d:%d %s\n",
1885 __func__, d->major, d->minor, strerror(errno));
1886 *mpb = mpb_save;
1887 return 1;
1888 }
1889 if (doclose) {
1890 close(d->fd);
1891 d->fd = -1;
1892 }
1893 }
1894
1895 *mpb = mpb_save;
1896 return 0;
1897 }
1898
1899 static int write_super_imsm(struct intel_super *super, int doclose)
1900 {
1901 struct imsm_super *mpb = super->anchor;
1902 struct dl *d;
1903 __u32 generation;
1904 __u32 sum;
1905 int spares = 0;
1906 int i;
1907 __u32 mpb_size = sizeof(struct imsm_super) - sizeof(struct imsm_disk);
1908
1909 /* 'generation' is incremented everytime the metadata is written */
1910 generation = __le32_to_cpu(mpb->generation_num);
1911 generation++;
1912 mpb->generation_num = __cpu_to_le32(generation);
1913
1914 mpb_size += sizeof(struct imsm_disk) * mpb->num_disks;
1915 for (d = super->disks; d; d = d->next) {
1916 if (d->index == -1)
1917 spares++;
1918 else
1919 mpb->disk[d->index] = d->disk;
1920 }
1921 for (d = super->missing; d; d = d->next)
1922 mpb->disk[d->index] = d->disk;
1923
1924 for (i = 0; i < mpb->num_raid_devs; i++) {
1925 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
1926
1927 imsm_copy_dev(dev, super->dev_tbl[i]);
1928 mpb_size += sizeof_imsm_dev(dev, 0);
1929 }
1930 mpb_size += __le32_to_cpu(mpb->bbm_log_size);
1931 mpb->mpb_size = __cpu_to_le32(mpb_size);
1932
1933 /* recalculate checksum */
1934 sum = __gen_imsm_checksum(mpb);
1935 mpb->check_sum = __cpu_to_le32(sum);
1936
1937 /* write the mpb for disks that compose raid devices */
1938 for (d = super->disks; d ; d = d->next) {
1939 if (d->index < 0)
1940 continue;
1941 if (store_imsm_mpb(d->fd, super))
1942 fprintf(stderr, "%s: failed for device %d:%d %s\n",
1943 __func__, d->major, d->minor, strerror(errno));
1944 if (doclose) {
1945 close(d->fd);
1946 d->fd = -1;
1947 }
1948 }
1949
1950 if (spares)
1951 return write_super_imsm_spares(super, doclose);
1952
1953 return 0;
1954 }
1955
1956
1957 static int create_array(struct supertype *st)
1958 {
1959 size_t len;
1960 struct imsm_update_create_array *u;
1961 struct intel_super *super = st->sb;
1962 struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
1963
1964 len = sizeof(*u) - sizeof(*dev) + sizeof_imsm_dev(dev, 0);
1965 u = malloc(len);
1966 if (!u) {
1967 fprintf(stderr, "%s: failed to allocate update buffer\n",
1968 __func__);
1969 return 1;
1970 }
1971
1972 u->type = update_create_array;
1973 u->dev_idx = super->current_vol;
1974 imsm_copy_dev(&u->dev, dev);
1975 append_metadata_update(st, u, len);
1976
1977 return 0;
1978 }
1979
1980 static int _add_disk(struct supertype *st)
1981 {
1982 struct intel_super *super = st->sb;
1983 size_t len;
1984 struct imsm_update_add_disk *u;
1985
1986 if (!super->add)
1987 return 0;
1988
1989 len = sizeof(*u);
1990 u = malloc(len);
1991 if (!u) {
1992 fprintf(stderr, "%s: failed to allocate update buffer\n",
1993 __func__);
1994 return 1;
1995 }
1996
1997 u->type = update_add_disk;
1998 append_metadata_update(st, u, len);
1999
2000 return 0;
2001 }
2002
2003 static int write_init_super_imsm(struct supertype *st)
2004 {
2005 if (st->update_tail) {
2006 /* queue the recently created array / added disk
2007 * as a metadata update */
2008 struct intel_super *super = st->sb;
2009 struct dl *d;
2010 int rv;
2011
2012 /* determine if we are creating a volume or adding a disk */
2013 if (super->current_vol < 0) {
2014 /* in the add disk case we are running in mdmon
2015 * context, so don't close fd's
2016 */
2017 return _add_disk(st);
2018 } else
2019 rv = create_array(st);
2020
2021 for (d = super->disks; d ; d = d->next) {
2022 close(d->fd);
2023 d->fd = -1;
2024 }
2025
2026 return rv;
2027 } else
2028 return write_super_imsm(st->sb, 1);
2029 }
2030 #endif
2031
2032 static int store_zero_imsm(struct supertype *st, int fd)
2033 {
2034 unsigned long long dsize;
2035 void *buf;
2036
2037 get_dev_size(fd, NULL, &dsize);
2038
2039 /* first block is stored on second to last sector of the disk */
2040 if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0)
2041 return 1;
2042
2043 if (posix_memalign(&buf, 512, 512) != 0)
2044 return 1;
2045
2046 memset(buf, 0, 512);
2047 if (write(fd, buf, 512) != 512)
2048 return 1;
2049 return 0;
2050 }
2051
2052 static int imsm_bbm_log_size(struct imsm_super *mpb)
2053 {
2054 return __le32_to_cpu(mpb->bbm_log_size);
2055 }
2056
2057 #ifndef MDASSEMBLE
2058 static int validate_geometry_imsm_container(struct supertype *st, int level,
2059 int layout, int raiddisks, int chunk,
2060 unsigned long long size, char *dev,
2061 unsigned long long *freesize,
2062 int verbose)
2063 {
2064 int fd;
2065 unsigned long long ldsize;
2066
2067 if (level != LEVEL_CONTAINER)
2068 return 0;
2069 if (!dev)
2070 return 1;
2071
2072 fd = open(dev, O_RDONLY|O_EXCL, 0);
2073 if (fd < 0) {
2074 if (verbose)
2075 fprintf(stderr, Name ": imsm: Cannot open %s: %s\n",
2076 dev, strerror(errno));
2077 return 0;
2078 }
2079 if (!get_dev_size(fd, dev, &ldsize)) {
2080 close(fd);
2081 return 0;
2082 }
2083 close(fd);
2084
2085 *freesize = avail_size_imsm(st, ldsize >> 9);
2086
2087 return 1;
2088 }
2089
2090 /* validate_geometry_imsm_volume - lifted from validate_geometry_ddf_bvd
2091 * FIX ME add ahci details
2092 */
2093 static int validate_geometry_imsm_volume(struct supertype *st, int level,
2094 int layout, int raiddisks, int chunk,
2095 unsigned long long size, char *dev,
2096 unsigned long long *freesize,
2097 int verbose)
2098 {
2099 struct stat stb;
2100 struct intel_super *super = st->sb;
2101 struct dl *dl;
2102 unsigned long long pos = 0;
2103 unsigned long long maxsize;
2104 struct extent *e;
2105 int i;
2106
2107 if (level == LEVEL_CONTAINER)
2108 return 0;
2109
2110 if (level == 1 && raiddisks > 2) {
2111 if (verbose)
2112 fprintf(stderr, Name ": imsm does not support more "
2113 "than 2 in a raid1 configuration\n");
2114 return 0;
2115 }
2116
2117 /* We must have the container info already read in. */
2118 if (!super)
2119 return 0;
2120
2121 if (!dev) {
2122 /* General test: make sure there is space for
2123 * 'raiddisks' device extents of size 'size' at a given
2124 * offset
2125 */
2126 unsigned long long minsize = size*2 /* convert to blocks */;
2127 unsigned long long start_offset = ~0ULL;
2128 int dcnt = 0;
2129 if (minsize == 0)
2130 minsize = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
2131 for (dl = super->disks; dl ; dl = dl->next) {
2132 int found = 0;
2133
2134 pos = 0;
2135 i = 0;
2136 e = get_extents(super, dl);
2137 if (!e) continue;
2138 do {
2139 unsigned long long esize;
2140 esize = e[i].start - pos;
2141 if (esize >= minsize)
2142 found = 1;
2143 if (found && start_offset == ~0ULL) {
2144 start_offset = pos;
2145 break;
2146 } else if (found && pos != start_offset) {
2147 found = 0;
2148 break;
2149 }
2150 pos = e[i].start + e[i].size;
2151 i++;
2152 } while (e[i-1].size);
2153 if (found)
2154 dcnt++;
2155 free(e);
2156 }
2157 if (dcnt < raiddisks) {
2158 if (verbose)
2159 fprintf(stderr, Name ": imsm: Not enough "
2160 "devices with space for this array "
2161 "(%d < %d)\n",
2162 dcnt, raiddisks);
2163 return 0;
2164 }
2165 return 1;
2166 }
2167 /* This device must be a member of the set */
2168 if (stat(dev, &stb) < 0)
2169 return 0;
2170 if ((S_IFMT & stb.st_mode) != S_IFBLK)
2171 return 0;
2172 for (dl = super->disks ; dl ; dl = dl->next) {
2173 if (dl->major == major(stb.st_rdev) &&
2174 dl->minor == minor(stb.st_rdev))
2175 break;
2176 }
2177 if (!dl) {
2178 if (verbose)
2179 fprintf(stderr, Name ": %s is not in the "
2180 "same imsm set\n", dev);
2181 return 0;
2182 }
2183 e = get_extents(super, dl);
2184 maxsize = 0;
2185 i = 0;
2186 if (e) do {
2187 unsigned long long esize;
2188 esize = e[i].start - pos;
2189 if (esize >= maxsize)
2190 maxsize = esize;
2191 pos = e[i].start + e[i].size;
2192 i++;
2193 } while (e[i-1].size);
2194 *freesize = maxsize;
2195
2196 return 1;
2197 }
2198
2199 static int validate_geometry_imsm(struct supertype *st, int level, int layout,
2200 int raiddisks, int chunk, unsigned long long size,
2201 char *dev, unsigned long long *freesize,
2202 int verbose)
2203 {
2204 int fd, cfd;
2205 struct mdinfo *sra;
2206
2207 /* if given unused devices create a container
2208 * if given given devices in a container create a member volume
2209 */
2210 if (level == LEVEL_CONTAINER) {
2211 /* Must be a fresh device to add to a container */
2212 return validate_geometry_imsm_container(st, level, layout,
2213 raiddisks, chunk, size,
2214 dev, freesize,
2215 verbose);
2216 }
2217
2218 if (st->sb) {
2219 /* creating in a given container */
2220 return validate_geometry_imsm_volume(st, level, layout,
2221 raiddisks, chunk, size,
2222 dev, freesize, verbose);
2223 }
2224
2225 /* limit creation to the following levels */
2226 if (!dev)
2227 switch (level) {
2228 case 0:
2229 case 1:
2230 case 10:
2231 case 5:
2232 break;
2233 default:
2234 return 1;
2235 }
2236
2237 /* This device needs to be a device in an 'imsm' container */
2238 fd = open(dev, O_RDONLY|O_EXCL, 0);
2239 if (fd >= 0) {
2240 if (verbose)
2241 fprintf(stderr,
2242 Name ": Cannot create this array on device %s\n",
2243 dev);
2244 close(fd);
2245 return 0;
2246 }
2247 if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
2248 if (verbose)
2249 fprintf(stderr, Name ": Cannot open %s: %s\n",
2250 dev, strerror(errno));
2251 return 0;
2252 }
2253 /* Well, it is in use by someone, maybe an 'imsm' container. */
2254 cfd = open_container(fd);
2255 if (cfd < 0) {
2256 close(fd);
2257 if (verbose)
2258 fprintf(stderr, Name ": Cannot use %s: It is busy\n",
2259 dev);
2260 return 0;
2261 }
2262 sra = sysfs_read(cfd, 0, GET_VERSION);
2263 close(fd);
2264 if (sra && sra->array.major_version == -1 &&
2265 strcmp(sra->text_version, "imsm") == 0) {
2266 /* This is a member of a imsm container. Load the container
2267 * and try to create a volume
2268 */
2269 struct intel_super *super;
2270
2271 if (load_super_imsm_all(st, cfd, (void **) &super, NULL, 1) == 0) {
2272 st->sb = super;
2273 st->container_dev = fd2devnum(cfd);
2274 close(cfd);
2275 return validate_geometry_imsm_volume(st, level, layout,
2276 raiddisks, chunk,
2277 size, dev,
2278 freesize, verbose);
2279 }
2280 close(cfd);
2281 } else /* may belong to another container */
2282 return 0;
2283
2284 return 1;
2285 }
2286 #endif /* MDASSEMBLE */
2287
2288 static struct mdinfo *container_content_imsm(struct supertype *st)
2289 {
2290 /* Given a container loaded by load_super_imsm_all,
2291 * extract information about all the arrays into
2292 * an mdinfo tree.
2293 *
2294 * For each imsm_dev create an mdinfo, fill it in,
2295 * then look for matching devices in super->disks
2296 * and create appropriate device mdinfo.
2297 */
2298 struct intel_super *super = st->sb;
2299 struct imsm_super *mpb = super->anchor;
2300 struct mdinfo *rest = NULL;
2301 int i;
2302
2303 /* do not assemble arrays that might have bad blocks */
2304 if (imsm_bbm_log_size(super->anchor)) {
2305 fprintf(stderr, Name ": BBM log found in metadata. "
2306 "Cannot activate array(s).\n");
2307 return NULL;
2308 }
2309
2310 for (i = 0; i < mpb->num_raid_devs; i++) {
2311 struct imsm_dev *dev = get_imsm_dev(super, i);
2312 struct imsm_map *map = get_imsm_map(dev, 0);
2313 struct mdinfo *this;
2314 int slot;
2315
2316 this = malloc(sizeof(*this));
2317 memset(this, 0, sizeof(*this));
2318 this->next = rest;
2319
2320 super->current_vol = i;
2321 getinfo_super_imsm_volume(st, this);
2322 for (slot = 0 ; slot < map->num_members; slot++) {
2323 struct mdinfo *info_d;
2324 struct dl *d;
2325 int idx;
2326 int skip;
2327 __u32 s;
2328 __u32 ord;
2329
2330 skip = 0;
2331 idx = get_imsm_disk_idx(dev, slot);
2332 ord = get_imsm_ord_tbl_ent(dev, slot);
2333 for (d = super->disks; d ; d = d->next)
2334 if (d->index == idx)
2335 break;
2336
2337 if (d == NULL)
2338 skip = 1;
2339
2340 s = d ? __le32_to_cpu(d->disk.status) : 0;
2341 if (s & FAILED_DISK)
2342 skip = 1;
2343 if (!(s & USABLE_DISK))
2344 skip = 1;
2345 if (ord & IMSM_ORD_REBUILD)
2346 skip = 1;
2347
2348 /*
2349 * if we skip some disks the array will be assmebled degraded;
2350 * reset resync start to avoid a dirty-degraded situation
2351 *
2352 * FIXME handle dirty degraded
2353 */
2354 if (skip && !dev->vol.dirty)
2355 this->resync_start = ~0ULL;
2356 if (skip)
2357 continue;
2358
2359 info_d = malloc(sizeof(*info_d));
2360 if (!info_d) {
2361 fprintf(stderr, Name ": failed to allocate disk"
2362 " for volume %s\n", (char *) dev->volume);
2363 free(this);
2364 this = rest;
2365 break;
2366 }
2367 memset(info_d, 0, sizeof(*info_d));
2368 info_d->next = this->devs;
2369 this->devs = info_d;
2370
2371 info_d->disk.number = d->index;
2372 info_d->disk.major = d->major;
2373 info_d->disk.minor = d->minor;
2374 info_d->disk.raid_disk = slot;
2375
2376 this->array.working_disks++;
2377
2378 info_d->events = __le32_to_cpu(mpb->generation_num);
2379 info_d->data_offset = __le32_to_cpu(map->pba_of_lba0);
2380 info_d->component_size = __le32_to_cpu(map->blocks_per_member);
2381 if (d->devname)
2382 strcpy(info_d->name, d->devname);
2383 }
2384 rest = this;
2385 }
2386
2387 return rest;
2388 }
2389
2390
2391 #ifndef MDASSEMBLE
2392 static int imsm_open_new(struct supertype *c, struct active_array *a,
2393 char *inst)
2394 {
2395 struct intel_super *super = c->sb;
2396 struct imsm_super *mpb = super->anchor;
2397
2398 if (atoi(inst) >= mpb->num_raid_devs) {
2399 fprintf(stderr, "%s: subarry index %d, out of range\n",
2400 __func__, atoi(inst));
2401 return -ENODEV;
2402 }
2403
2404 dprintf("imsm: open_new %s\n", inst);
2405 a->info.container_member = atoi(inst);
2406 return 0;
2407 }
2408
2409 static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, int failed)
2410 {
2411 struct imsm_map *map = get_imsm_map(dev, 0);
2412
2413 if (!failed)
2414 return map->map_state == IMSM_T_STATE_UNINITIALIZED ?
2415 IMSM_T_STATE_UNINITIALIZED : IMSM_T_STATE_NORMAL;
2416
2417 switch (get_imsm_raid_level(map)) {
2418 case 0:
2419 return IMSM_T_STATE_FAILED;
2420 break;
2421 case 1:
2422 if (failed < map->num_members)
2423 return IMSM_T_STATE_DEGRADED;
2424 else
2425 return IMSM_T_STATE_FAILED;
2426 break;
2427 case 10:
2428 {
2429 /**
2430 * check to see if any mirrors have failed, otherwise we
2431 * are degraded. Even numbered slots are mirrored on
2432 * slot+1
2433 */
2434 int i;
2435 int insync;
2436
2437 for (i = 0; i < map->num_members; i++) {
2438 __u32 ord = get_imsm_ord_tbl_ent(dev, i);
2439 int idx = ord_to_idx(ord);
2440 struct imsm_disk *disk;
2441
2442 /* reset the potential in-sync count on even-numbered
2443 * slots. num_copies is always 2 for imsm raid10
2444 */
2445 if ((i & 1) == 0)
2446 insync = 2;
2447
2448 disk = get_imsm_disk(super, idx);
2449 if (!disk ||
2450 __le32_to_cpu(disk->status) & FAILED_DISK ||
2451 ord & IMSM_ORD_REBUILD)
2452 insync--;
2453
2454 /* no in-sync disks left in this mirror the
2455 * array has failed
2456 */
2457 if (insync == 0)
2458 return IMSM_T_STATE_FAILED;
2459 }
2460
2461 return IMSM_T_STATE_DEGRADED;
2462 }
2463 case 5:
2464 if (failed < 2)
2465 return IMSM_T_STATE_DEGRADED;
2466 else
2467 return IMSM_T_STATE_FAILED;
2468 break;
2469 default:
2470 break;
2471 }
2472
2473 return map->map_state;
2474 }
2475
2476 static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev)
2477 {
2478 int i;
2479 int failed = 0;
2480 struct imsm_disk *disk;
2481 struct imsm_map *map = get_imsm_map(dev, 0);
2482
2483 for (i = 0; i < map->num_members; i++) {
2484 __u32 ord = get_imsm_ord_tbl_ent(dev, i);
2485 int idx = ord_to_idx(ord);
2486
2487 disk = get_imsm_disk(super, idx);
2488 if (!disk ||
2489 __le32_to_cpu(disk->status) & FAILED_DISK ||
2490 ord & IMSM_ORD_REBUILD)
2491 failed++;
2492 }
2493
2494 return failed;
2495 }
2496
2497 static int is_resyncing(struct imsm_dev *dev)
2498 {
2499 struct imsm_map *migr_map;
2500
2501 if (!dev->vol.migr_state)
2502 return 0;
2503
2504 if (dev->vol.migr_type == 0)
2505 return 1;
2506
2507 migr_map = get_imsm_map(dev, 1);
2508
2509 if (migr_map->map_state == IMSM_T_STATE_NORMAL)
2510 return 1;
2511 else
2512 return 0;
2513 }
2514
2515 static int is_rebuilding(struct imsm_dev *dev)
2516 {
2517 struct imsm_map *migr_map;
2518
2519 if (!dev->vol.migr_state)
2520 return 0;
2521
2522 if (dev->vol.migr_type == 0)
2523 return 0;
2524
2525 migr_map = get_imsm_map(dev, 1);
2526
2527 if (migr_map->map_state == IMSM_T_STATE_DEGRADED)
2528 return 1;
2529 else
2530 return 0;
2531 }
2532
2533 static void mark_failure(struct imsm_disk *disk)
2534 {
2535 __u32 status = __le32_to_cpu(disk->status);
2536
2537 if (status & FAILED_DISK)
2538 return;
2539 status |= FAILED_DISK;
2540 disk->status = __cpu_to_le32(status);
2541 disk->scsi_id = __cpu_to_le32(~(__u32)0);
2542 memmove(&disk->serial[0], &disk->serial[1], MAX_RAID_SERIAL_LEN - 1);
2543 }
2544
2545 /* Handle dirty -> clean transititions and resync. Degraded and rebuild
2546 * states are handled in imsm_set_disk() with one exception, when a
2547 * resync is stopped due to a new failure this routine will set the
2548 * 'degraded' state for the array.
2549 */
2550 static int imsm_set_array_state(struct active_array *a, int consistent)
2551 {
2552 int inst = a->info.container_member;
2553 struct intel_super *super = a->container->sb;
2554 struct imsm_dev *dev = get_imsm_dev(super, inst);
2555 struct imsm_map *map = get_imsm_map(dev, 0);
2556 int failed = imsm_count_failed(super, dev);
2557 __u8 map_state = imsm_check_degraded(super, dev, failed);
2558
2559 /* before we activate this array handle any missing disks */
2560 if (consistent == 2 && super->missing) {
2561 struct dl *dl;
2562
2563 dprintf("imsm: mark missing\n");
2564 end_migration(dev, map_state);
2565 for (dl = super->missing; dl; dl = dl->next)
2566 mark_failure(&dl->disk);
2567 super->updates_pending++;
2568 }
2569
2570 if (consistent == 2 &&
2571 (!is_resync_complete(a) ||
2572 map_state != IMSM_T_STATE_NORMAL ||
2573 dev->vol.migr_state))
2574 consistent = 0;
2575
2576 if (is_resync_complete(a)) {
2577 /* complete intialization / resync,
2578 * recovery is completed in ->set_disk
2579 */
2580 if (is_resyncing(dev)) {
2581 dprintf("imsm: mark resync done\n");
2582 end_migration(dev, map_state);
2583 super->updates_pending++;
2584 }
2585 } else if (!is_resyncing(dev) && !failed) {
2586 /* mark the start of the init process if nothing is failed */
2587 dprintf("imsm: mark resync start (%llu)\n", a->resync_start);
2588 map->map_state = map_state;
2589 migrate(dev, IMSM_T_STATE_NORMAL,
2590 map->map_state == IMSM_T_STATE_NORMAL);
2591 super->updates_pending++;
2592 }
2593
2594 /* check if we can update the migration checkpoint */
2595 if (dev->vol.migr_state &&
2596 __le32_to_cpu(dev->vol.curr_migr_unit) != a->resync_start) {
2597 dprintf("imsm: checkpoint migration (%llu)\n", a->resync_start);
2598 dev->vol.curr_migr_unit = __cpu_to_le32(a->resync_start);
2599 super->updates_pending++;
2600 }
2601
2602 /* mark dirty / clean */
2603 if (dev->vol.dirty != !consistent) {
2604 dprintf("imsm: mark '%s' (%llu)\n",
2605 consistent ? "clean" : "dirty", a->resync_start);
2606 if (consistent)
2607 dev->vol.dirty = 0;
2608 else
2609 dev->vol.dirty = 1;
2610 super->updates_pending++;
2611 }
2612 return consistent;
2613 }
2614
2615 static void imsm_set_disk(struct active_array *a, int n, int state)
2616 {
2617 int inst = a->info.container_member;
2618 struct intel_super *super = a->container->sb;
2619 struct imsm_dev *dev = get_imsm_dev(super, inst);
2620 struct imsm_map *map = get_imsm_map(dev, 0);
2621 struct imsm_disk *disk;
2622 int failed;
2623 __u32 status;
2624 __u32 ord;
2625 __u8 map_state;
2626
2627 if (n > map->num_members)
2628 fprintf(stderr, "imsm: set_disk %d out of range 0..%d\n",
2629 n, map->num_members - 1);
2630
2631 if (n < 0)
2632 return;
2633
2634 dprintf("imsm: set_disk %d:%x\n", n, state);
2635
2636 ord = get_imsm_ord_tbl_ent(dev, n);
2637 disk = get_imsm_disk(super, ord_to_idx(ord));
2638
2639 /* check for new failures */
2640 status = __le32_to_cpu(disk->status);
2641 if ((state & DS_FAULTY) && !(status & FAILED_DISK)) {
2642 mark_failure(disk);
2643 super->updates_pending++;
2644 }
2645
2646 /* check if in_sync */
2647 if (state & DS_INSYNC && ord & IMSM_ORD_REBUILD) {
2648 struct imsm_map *migr_map = get_imsm_map(dev, 1);
2649
2650 set_imsm_ord_tbl_ent(migr_map, n, ord_to_idx(ord));
2651 super->updates_pending++;
2652 }
2653
2654 failed = imsm_count_failed(super, dev);
2655 map_state = imsm_check_degraded(super, dev, failed);
2656
2657 /* check if recovery complete, newly degraded, or failed */
2658 if (map_state == IMSM_T_STATE_NORMAL && is_rebuilding(dev)) {
2659 end_migration(dev, map_state);
2660 super->updates_pending++;
2661 } else if (map_state == IMSM_T_STATE_DEGRADED &&
2662 map->map_state != map_state &&
2663 !dev->vol.migr_state) {
2664 dprintf("imsm: mark degraded\n");
2665 map->map_state = map_state;
2666 super->updates_pending++;
2667 } else if (map_state == IMSM_T_STATE_FAILED &&
2668 map->map_state != map_state) {
2669 dprintf("imsm: mark failed\n");
2670 end_migration(dev, map_state);
2671 super->updates_pending++;
2672 }
2673 }
2674
2675 static int store_imsm_mpb(int fd, struct intel_super *super)
2676 {
2677 struct imsm_super *mpb = super->anchor;
2678 __u32 mpb_size = __le32_to_cpu(mpb->mpb_size);
2679 unsigned long long dsize;
2680 unsigned long long sectors;
2681
2682 get_dev_size(fd, NULL, &dsize);
2683
2684 if (mpb_size > 512) {
2685 /* -1 to account for anchor */
2686 sectors = mpb_sectors(mpb) - 1;
2687
2688 /* write the extended mpb to the sectors preceeding the anchor */
2689 if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0)
2690 return 1;
2691
2692 if (write(fd, super->buf + 512, 512 * sectors) != 512 * sectors)
2693 return 1;
2694 }
2695
2696 /* first block is stored on second to last sector of the disk */
2697 if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0)
2698 return 1;
2699
2700 if (write(fd, super->buf, 512) != 512)
2701 return 1;
2702
2703 return 0;
2704 }
2705
2706 static void imsm_sync_metadata(struct supertype *container)
2707 {
2708 struct intel_super *super = container->sb;
2709
2710 if (!super->updates_pending)
2711 return;
2712
2713 write_super_imsm(super, 0);
2714
2715 super->updates_pending = 0;
2716 }
2717
2718 static struct dl *imsm_readd(struct intel_super *super, int idx, struct active_array *a)
2719 {
2720 struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
2721 int i = get_imsm_disk_idx(dev, idx);
2722 struct dl *dl;
2723
2724 for (dl = super->disks; dl; dl = dl->next)
2725 if (dl->index == i)
2726 break;
2727
2728 if (dl && __le32_to_cpu(dl->disk.status) & FAILED_DISK)
2729 dl = NULL;
2730
2731 if (dl)
2732 dprintf("%s: found %x:%x\n", __func__, dl->major, dl->minor);
2733
2734 return dl;
2735 }
2736
2737 static struct dl *imsm_add_spare(struct intel_super *super, int slot, struct active_array *a)
2738 {
2739 struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
2740 int idx = get_imsm_disk_idx(dev, slot);
2741 struct imsm_map *map = get_imsm_map(dev, 0);
2742 unsigned long long esize;
2743 unsigned long long pos;
2744 struct mdinfo *d;
2745 struct extent *ex;
2746 int j;
2747 int found;
2748 __u32 array_start;
2749 __u32 status;
2750 struct dl *dl;
2751
2752 for (dl = super->disks; dl; dl = dl->next) {
2753 /* If in this array, skip */
2754 for (d = a->info.devs ; d ; d = d->next)
2755 if (d->state_fd >= 0 &&
2756 d->disk.major == dl->major &&
2757 d->disk.minor == dl->minor) {
2758 dprintf("%x:%x already in array\n", dl->major, dl->minor);
2759 break;
2760 }
2761 if (d)
2762 continue;
2763
2764 /* skip in use or failed drives */
2765 status = __le32_to_cpu(dl->disk.status);
2766 if (status & FAILED_DISK || idx == dl->index) {
2767 dprintf("%x:%x status ( %s%s)\n",
2768 dl->major, dl->minor,
2769 status & FAILED_DISK ? "failed " : "",
2770 idx == dl->index ? "in use " : "");
2771 continue;
2772 }
2773
2774 /* Does this unused device have the requisite free space?
2775 * We need a->info.component_size sectors
2776 */
2777 ex = get_extents(super, dl);
2778 if (!ex) {
2779 dprintf("cannot get extents\n");
2780 continue;
2781 }
2782 found = 0;
2783 j = 0;
2784 pos = 0;
2785 array_start = __le32_to_cpu(map->pba_of_lba0);
2786
2787 do {
2788 /* check that we can start at pba_of_lba0 with
2789 * a->info.component_size of space
2790 */
2791 esize = ex[j].start - pos;
2792 if (array_start >= pos &&
2793 array_start + a->info.component_size < ex[j].start) {
2794 found = 1;
2795 break;
2796 }
2797 pos = ex[j].start + ex[j].size;
2798 j++;
2799
2800 } while (ex[j-1].size);
2801
2802 free(ex);
2803 if (!found) {
2804 dprintf("%x:%x does not have %llu at %d\n",
2805 dl->major, dl->minor,
2806 a->info.component_size,
2807 __le32_to_cpu(map->pba_of_lba0));
2808 /* No room */
2809 continue;
2810 } else
2811 break;
2812 }
2813
2814 return dl;
2815 }
2816
2817 static struct mdinfo *imsm_activate_spare(struct active_array *a,
2818 struct metadata_update **updates)
2819 {
2820 /**
2821 * Find a device with unused free space and use it to replace a
2822 * failed/vacant region in an array. We replace failed regions one a
2823 * array at a time. The result is that a new spare disk will be added
2824 * to the first failed array and after the monitor has finished
2825 * propagating failures the remainder will be consumed.
2826 *
2827 * FIXME add a capability for mdmon to request spares from another
2828 * container.
2829 */
2830
2831 struct intel_super *super = a->container->sb;
2832 int inst = a->info.container_member;
2833 struct imsm_dev *dev = get_imsm_dev(super, inst);
2834 struct imsm_map *map = get_imsm_map(dev, 0);
2835 int failed = a->info.array.raid_disks;
2836 struct mdinfo *rv = NULL;
2837 struct mdinfo *d;
2838 struct mdinfo *di;
2839 struct metadata_update *mu;
2840 struct dl *dl;
2841 struct imsm_update_activate_spare *u;
2842 int num_spares = 0;
2843 int i;
2844
2845 for (d = a->info.devs ; d ; d = d->next) {
2846 if ((d->curr_state & DS_FAULTY) &&
2847 d->state_fd >= 0)
2848 /* wait for Removal to happen */
2849 return NULL;
2850 if (d->state_fd >= 0)
2851 failed--;
2852 }
2853
2854 dprintf("imsm: activate spare: inst=%d failed=%d (%d) level=%d\n",
2855 inst, failed, a->info.array.raid_disks, a->info.array.level);
2856 if (imsm_check_degraded(super, dev, failed) != IMSM_T_STATE_DEGRADED)
2857 return NULL;
2858
2859 /* For each slot, if it is not working, find a spare */
2860 for (i = 0; i < a->info.array.raid_disks; i++) {
2861 for (d = a->info.devs ; d ; d = d->next)
2862 if (d->disk.raid_disk == i)
2863 break;
2864 dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
2865 if (d && (d->state_fd >= 0))
2866 continue;
2867
2868 /*
2869 * OK, this device needs recovery. Try to re-add the previous
2870 * occupant of this slot, if this fails add a new spare
2871 */
2872 dl = imsm_readd(super, i, a);
2873 if (!dl)
2874 dl = imsm_add_spare(super, i, a);
2875 if (!dl)
2876 continue;
2877
2878 /* found a usable disk with enough space */
2879 di = malloc(sizeof(*di));
2880 memset(di, 0, sizeof(*di));
2881
2882 /* dl->index will be -1 in the case we are activating a
2883 * pristine spare. imsm_process_update() will create a
2884 * new index in this case. Once a disk is found to be
2885 * failed in all member arrays it is kicked from the
2886 * metadata
2887 */
2888 di->disk.number = dl->index;
2889
2890 /* (ab)use di->devs to store a pointer to the device
2891 * we chose
2892 */
2893 di->devs = (struct mdinfo *) dl;
2894
2895 di->disk.raid_disk = i;
2896 di->disk.major = dl->major;
2897 di->disk.minor = dl->minor;
2898 di->disk.state = 0;
2899 di->data_offset = __le32_to_cpu(map->pba_of_lba0);
2900 di->component_size = a->info.component_size;
2901 di->container_member = inst;
2902 di->next = rv;
2903 rv = di;
2904 num_spares++;
2905 dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
2906 i, di->data_offset);
2907
2908 break;
2909 }
2910
2911 if (!rv)
2912 /* No spares found */
2913 return rv;
2914 /* Now 'rv' has a list of devices to return.
2915 * Create a metadata_update record to update the
2916 * disk_ord_tbl for the array
2917 */
2918 mu = malloc(sizeof(*mu));
2919 mu->buf = malloc(sizeof(struct imsm_update_activate_spare) * num_spares);
2920 mu->space = NULL;
2921 mu->len = sizeof(struct imsm_update_activate_spare) * num_spares;
2922 mu->next = *updates;
2923 u = (struct imsm_update_activate_spare *) mu->buf;
2924
2925 for (di = rv ; di ; di = di->next) {
2926 u->type = update_activate_spare;
2927 u->dl = (struct dl *) di->devs;
2928 di->devs = NULL;
2929 u->slot = di->disk.raid_disk;
2930 u->array = inst;
2931 u->next = u + 1;
2932 u++;
2933 }
2934 (u-1)->next = NULL;
2935 *updates = mu;
2936
2937 return rv;
2938 }
2939
2940 static int disks_overlap(struct imsm_dev *d1, struct imsm_dev *d2)
2941 {
2942 struct imsm_map *m1 = get_imsm_map(d1, 0);
2943 struct imsm_map *m2 = get_imsm_map(d2, 0);
2944 int i;
2945 int j;
2946 int idx;
2947
2948 for (i = 0; i < m1->num_members; i++) {
2949 idx = get_imsm_disk_idx(d1, i);
2950 for (j = 0; j < m2->num_members; j++)
2951 if (idx == get_imsm_disk_idx(d2, j))
2952 return 1;
2953 }
2954
2955 return 0;
2956 }
2957
2958 static void imsm_delete(struct intel_super *super, struct dl **dlp, int index);
2959
2960 static void imsm_process_update(struct supertype *st,
2961 struct metadata_update *update)
2962 {
2963 /**
2964 * crack open the metadata_update envelope to find the update record
2965 * update can be one of:
2966 * update_activate_spare - a spare device has replaced a failed
2967 * device in an array, update the disk_ord_tbl. If this disk is
2968 * present in all member arrays then also clear the SPARE_DISK
2969 * flag
2970 */
2971 struct intel_super *super = st->sb;
2972 struct imsm_super *mpb;
2973 enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
2974
2975 /* update requires a larger buf but the allocation failed */
2976 if (super->next_len && !super->next_buf) {
2977 super->next_len = 0;
2978 return;
2979 }
2980
2981 if (super->next_buf) {
2982 memcpy(super->next_buf, super->buf, super->len);
2983 free(super->buf);
2984 super->len = super->next_len;
2985 super->buf = super->next_buf;
2986
2987 super->next_len = 0;
2988 super->next_buf = NULL;
2989 }
2990
2991 mpb = super->anchor;
2992
2993 switch (type) {
2994 case update_activate_spare: {
2995 struct imsm_update_activate_spare *u = (void *) update->buf;
2996 struct imsm_dev *dev = get_imsm_dev(super, u->array);
2997 struct imsm_map *map = get_imsm_map(dev, 0);
2998 struct imsm_map *migr_map;
2999 struct active_array *a;
3000 struct imsm_disk *disk;
3001 __u32 status;
3002 __u8 to_state;
3003 struct dl *dl;
3004 unsigned int found;
3005 int failed;
3006 int victim = get_imsm_disk_idx(dev, u->slot);
3007 int i;
3008
3009 for (dl = super->disks; dl; dl = dl->next)
3010 if (dl == u->dl)
3011 break;
3012
3013 if (!dl) {
3014 fprintf(stderr, "error: imsm_activate_spare passed "
3015 "an unknown disk (index: %d)\n",
3016 u->dl->index);
3017 return;
3018 }
3019
3020 super->updates_pending++;
3021
3022 /* count failures (excluding rebuilds and the victim)
3023 * to determine map[0] state
3024 */
3025 failed = 0;
3026 for (i = 0; i < map->num_members; i++) {
3027 if (i == u->slot)
3028 continue;
3029 disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i));
3030 if (!disk ||
3031 __le32_to_cpu(disk->status) & FAILED_DISK)
3032 failed++;
3033 }
3034
3035 /* adding a pristine spare, assign a new index */
3036 if (dl->index < 0) {
3037 dl->index = super->anchor->num_disks;
3038 super->anchor->num_disks++;
3039 }
3040 disk = &dl->disk;
3041 status = __le32_to_cpu(disk->status);
3042 status |= CONFIGURED_DISK;
3043 status &= ~SPARE_DISK;
3044 disk->status = __cpu_to_le32(status);
3045
3046 /* mark rebuild */
3047 to_state = imsm_check_degraded(super, dev, failed);
3048 map->map_state = IMSM_T_STATE_DEGRADED;
3049 migrate(dev, to_state, 1);
3050 migr_map = get_imsm_map(dev, 1);
3051 set_imsm_ord_tbl_ent(map, u->slot, dl->index);
3052 set_imsm_ord_tbl_ent(migr_map, u->slot, dl->index | IMSM_ORD_REBUILD);
3053
3054 /* count arrays using the victim in the metadata */
3055 found = 0;
3056 for (a = st->arrays; a ; a = a->next) {
3057 dev = get_imsm_dev(super, a->info.container_member);
3058 for (i = 0; i < map->num_members; i++)
3059 if (victim == get_imsm_disk_idx(dev, i))
3060 found++;
3061 }
3062
3063 /* delete the victim if it is no longer being
3064 * utilized anywhere
3065 */
3066 if (!found) {
3067 struct dl **dlp;
3068
3069 /* We know that 'manager' isn't touching anything,
3070 * so it is safe to delete
3071 */
3072 for (dlp = &super->disks; *dlp; dlp = &(*dlp)->next)
3073 if ((*dlp)->index == victim)
3074 break;
3075
3076 /* victim may be on the missing list */
3077 if (!*dlp)
3078 for (dlp = &super->missing; *dlp; dlp = &(*dlp)->next)
3079 if ((*dlp)->index == victim)
3080 break;
3081 imsm_delete(super, dlp, victim);
3082 }
3083 break;
3084 }
3085 case update_create_array: {
3086 /* someone wants to create a new array, we need to be aware of
3087 * a few races/collisions:
3088 * 1/ 'Create' called by two separate instances of mdadm
3089 * 2/ 'Create' versus 'activate_spare': mdadm has chosen
3090 * devices that have since been assimilated via
3091 * activate_spare.
3092 * In the event this update can not be carried out mdadm will
3093 * (FIX ME) notice that its update did not take hold.
3094 */
3095 struct imsm_update_create_array *u = (void *) update->buf;
3096 struct imsm_dev *dev;
3097 struct imsm_map *map, *new_map;
3098 unsigned long long start, end;
3099 unsigned long long new_start, new_end;
3100 int i;
3101 int overlap = 0;
3102
3103 /* handle racing creates: first come first serve */
3104 if (u->dev_idx < mpb->num_raid_devs) {
3105 dprintf("%s: subarray %d already defined\n",
3106 __func__, u->dev_idx);
3107 return;
3108 }
3109
3110 /* check update is next in sequence */
3111 if (u->dev_idx != mpb->num_raid_devs) {
3112 dprintf("%s: can not create array %d expected index %d\n",
3113 __func__, u->dev_idx, mpb->num_raid_devs);
3114 return;
3115 }
3116
3117 new_map = get_imsm_map(&u->dev, 0);
3118 new_start = __le32_to_cpu(new_map->pba_of_lba0);
3119 new_end = new_start + __le32_to_cpu(new_map->blocks_per_member);
3120
3121 /* handle activate_spare versus create race:
3122 * check to make sure that overlapping arrays do not include
3123 * overalpping disks
3124 */
3125 for (i = 0; i < mpb->num_raid_devs; i++) {
3126 dev = get_imsm_dev(super, i);
3127 map = get_imsm_map(dev, 0);
3128 start = __le32_to_cpu(map->pba_of_lba0);
3129 end = start + __le32_to_cpu(map->blocks_per_member);
3130 if ((new_start >= start && new_start <= end) ||
3131 (start >= new_start && start <= new_end))
3132 overlap = 1;
3133 if (overlap && disks_overlap(dev, &u->dev)) {
3134 dprintf("%s: arrays overlap\n", __func__);
3135 return;
3136 }
3137 }
3138 /* check num_members sanity */
3139 if (new_map->num_members > mpb->num_disks) {
3140 dprintf("%s: num_disks out of range\n", __func__);
3141 return;
3142 }
3143
3144 /* check that prepare update was successful */
3145 if (!update->space) {
3146 dprintf("%s: prepare update failed\n", __func__);
3147 return;
3148 }
3149
3150 super->updates_pending++;
3151 dev = update->space;
3152 map = get_imsm_map(dev, 0);
3153 update->space = NULL;
3154 imsm_copy_dev(dev, &u->dev);
3155 map = get_imsm_map(dev, 0);
3156 super->dev_tbl[u->dev_idx] = dev;
3157 mpb->num_raid_devs++;
3158
3159 /* fix up flags */
3160 for (i = 0; i < map->num_members; i++) {
3161 struct imsm_disk *disk;
3162 __u32 status;
3163
3164 disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i));
3165 status = __le32_to_cpu(disk->status);
3166 status |= CONFIGURED_DISK;
3167 status &= ~SPARE_DISK;
3168 disk->status = __cpu_to_le32(status);
3169 }
3170 break;
3171 }
3172 case update_add_disk:
3173
3174 /* we may be able to repair some arrays if disks are
3175 * being added */
3176 if (super->add) {
3177 struct active_array *a;
3178 for (a = st->arrays; a; a = a->next)
3179 a->check_degraded = 1;
3180 }
3181 /* add some spares to the metadata */
3182 while (super->add) {
3183 struct dl *al;
3184
3185 al = super->add;
3186 super->add = al->next;
3187 al->next = super->disks;
3188 super->disks = al;
3189 dprintf("%s: added %x:%x\n",
3190 __func__, al->major, al->minor);
3191 }
3192
3193 break;
3194 }
3195 }
3196
3197 static void imsm_prepare_update(struct supertype *st,
3198 struct metadata_update *update)
3199 {
3200 /**
3201 * Allocate space to hold new disk entries, raid-device entries or a new
3202 * mpb if necessary. The manager synchronously waits for updates to
3203 * complete in the monitor, so new mpb buffers allocated here can be
3204 * integrated by the monitor thread without worrying about live pointers
3205 * in the manager thread.
3206 */
3207 enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
3208 struct intel_super *super = st->sb;
3209 struct imsm_super *mpb = super->anchor;
3210 size_t buf_len;
3211 size_t len = 0;
3212
3213 switch (type) {
3214 case update_create_array: {
3215 struct imsm_update_create_array *u = (void *) update->buf;
3216
3217 len = sizeof_imsm_dev(&u->dev, 1);
3218 update->space = malloc(len);
3219 break;
3220 default:
3221 break;
3222 }
3223 }
3224
3225 /* check if we need a larger metadata buffer */
3226 if (super->next_buf)
3227 buf_len = super->next_len;
3228 else
3229 buf_len = super->len;
3230
3231 if (__le32_to_cpu(mpb->mpb_size) + len > buf_len) {
3232 /* ok we need a larger buf than what is currently allocated
3233 * if this allocation fails process_update will notice that
3234 * ->next_len is set and ->next_buf is NULL
3235 */
3236 buf_len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + len, 512);
3237 if (super->next_buf)
3238 free(super->next_buf);
3239
3240 super->next_len = buf_len;
3241 if (posix_memalign(&super->next_buf, buf_len, 512) != 0)
3242 super->next_buf = NULL;
3243 }
3244 }
3245
3246 /* must be called while manager is quiesced */
3247 static void imsm_delete(struct intel_super *super, struct dl **dlp, int index)
3248 {
3249 struct imsm_super *mpb = super->anchor;
3250 struct dl *iter;
3251 struct imsm_dev *dev;
3252 struct imsm_map *map;
3253 int i, j, num_members;
3254 __u32 ord;
3255
3256 dprintf("%s: deleting device[%d] from imsm_super\n",
3257 __func__, index);
3258
3259 /* shift all indexes down one */
3260 for (iter = super->disks; iter; iter = iter->next)
3261 if (iter->index > index)
3262 iter->index--;
3263 for (iter = super->missing; iter; iter = iter->next)
3264 if (iter->index > index)
3265 iter->index--;
3266
3267 for (i = 0; i < mpb->num_raid_devs; i++) {
3268 dev = get_imsm_dev(super, i);
3269 map = get_imsm_map(dev, 0);
3270 num_members = map->num_members;
3271 for (j = 0; j < num_members; j++) {
3272 /* update ord entries being careful not to propagate
3273 * ord-flags to the first map
3274 */
3275 ord = get_imsm_ord_tbl_ent(dev, j);
3276
3277 if (ord_to_idx(ord) <= index)
3278 continue;
3279
3280 map = get_imsm_map(dev, 0);
3281 set_imsm_ord_tbl_ent(map, j, ord_to_idx(ord - 1));
3282 map = get_imsm_map(dev, 1);
3283 if (map)
3284 set_imsm_ord_tbl_ent(map, j, ord - 1);
3285 }
3286 }
3287
3288 mpb->num_disks--;
3289 super->updates_pending++;
3290 if (*dlp) {
3291 struct dl *dl = *dlp;
3292
3293 *dlp = (*dlp)->next;
3294 __free_imsm_disk(dl);
3295 }
3296 }
3297 #endif /* MDASSEMBLE */
3298
3299 struct superswitch super_imsm = {
3300 #ifndef MDASSEMBLE
3301 .examine_super = examine_super_imsm,
3302 .brief_examine_super = brief_examine_super_imsm,
3303 .detail_super = detail_super_imsm,
3304 .brief_detail_super = brief_detail_super_imsm,
3305 .write_init_super = write_init_super_imsm,
3306 .validate_geometry = validate_geometry_imsm,
3307 .add_to_super = add_to_super_imsm,
3308 #endif
3309 .match_home = match_home_imsm,
3310 .uuid_from_super= uuid_from_super_imsm,
3311 .getinfo_super = getinfo_super_imsm,
3312 .update_super = update_super_imsm,
3313
3314 .avail_size = avail_size_imsm,
3315
3316 .compare_super = compare_super_imsm,
3317
3318 .load_super = load_super_imsm,
3319 .init_super = init_super_imsm,
3320 .store_super = store_zero_imsm,
3321 .free_super = free_super_imsm,
3322 .match_metadata_desc = match_metadata_desc_imsm,
3323 .container_content = container_content_imsm,
3324
3325 .external = 1,
3326
3327 #ifndef MDASSEMBLE
3328 /* for mdmon */
3329 .open_new = imsm_open_new,
3330 .load_super = load_super_imsm,
3331 .set_array_state= imsm_set_array_state,
3332 .set_disk = imsm_set_disk,
3333 .sync_metadata = imsm_sync_metadata,
3334 .activate_spare = imsm_activate_spare,
3335 .process_update = imsm_process_update,
3336 .prepare_update = imsm_prepare_update,
3337 #endif /* MDASSEMBLE */
3338 };