]> git.ipfire.org Git - thirdparty/mdadm.git/blob - super-intel.c
imsm: return associated uuid for spares
[thirdparty/mdadm.git] / super-intel.c
1 /*
2 * mdadm - Intel(R) Matrix Storage Manager Support
3 *
4 * Copyright (C) 2002-2008 Intel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #define HAVE_STDINT_H 1
21 #include "mdadm.h"
22 #include "mdmon.h"
23 #include "sha1.h"
24 #include <values.h>
25 #include <scsi/sg.h>
26 #include <ctype.h>
27
28 /* MPB == Metadata Parameter Block */
29 #define MPB_SIGNATURE "Intel Raid ISM Cfg Sig. "
30 #define MPB_SIG_LEN (strlen(MPB_SIGNATURE))
31 #define MPB_VERSION_RAID0 "1.0.00"
32 #define MPB_VERSION_RAID1 "1.1.00"
33 #define MPB_VERSION_RAID5 "1.2.02"
34 #define MAX_SIGNATURE_LENGTH 32
35 #define MAX_RAID_SERIAL_LEN 16
36 #define MPB_SECTOR_CNT 418
37 #define IMSM_RESERVED_SECTORS 4096
38
39 /* Disk configuration info. */
40 #define IMSM_MAX_DEVICES 255
41 struct imsm_disk {
42 __u8 serial[MAX_RAID_SERIAL_LEN];/* 0xD8 - 0xE7 ascii serial number */
43 __u32 total_blocks; /* 0xE8 - 0xEB total blocks */
44 __u32 scsi_id; /* 0xEC - 0xEF scsi ID */
45 __u32 status; /* 0xF0 - 0xF3 */
46 #define SPARE_DISK 0x01 /* Spare */
47 #define CONFIGURED_DISK 0x02 /* Member of some RaidDev */
48 #define FAILED_DISK 0x04 /* Permanent failure */
49 #define USABLE_DISK 0x08 /* Fully usable unless FAILED_DISK is set */
50
51 #define IMSM_DISK_FILLERS 5
52 __u32 filler[IMSM_DISK_FILLERS]; /* 0xF4 - 0x107 MPB_DISK_FILLERS for future expansion */
53 };
54
55 /* RAID map configuration infos. */
56 struct imsm_map {
57 __u32 pba_of_lba0; /* start address of partition */
58 __u32 blocks_per_member;/* blocks per member */
59 __u32 num_data_stripes; /* number of data stripes */
60 __u16 blocks_per_strip;
61 __u8 map_state; /* Normal, Uninitialized, Degraded, Failed */
62 #define IMSM_T_STATE_NORMAL 0
63 #define IMSM_T_STATE_UNINITIALIZED 1
64 #define IMSM_T_STATE_DEGRADED 2 /* FIXME: is this correct? */
65 #define IMSM_T_STATE_FAILED 3 /* FIXME: is this correct? */
66 __u8 raid_level;
67 #define IMSM_T_RAID0 0
68 #define IMSM_T_RAID1 1
69 #define IMSM_T_RAID5 5 /* since metadata version 1.2.02 ? */
70 __u8 num_members; /* number of member disks */
71 __u8 reserved[3];
72 __u32 filler[7]; /* expansion area */
73 #define IMSM_ORD_REBUILD (1 << 24)
74 __u32 disk_ord_tbl[1]; /* disk_ord_tbl[num_members],
75 * top byte contains some flags
76 */
77 } __attribute__ ((packed));
78
79 struct imsm_vol {
80 __u32 curr_migr_unit;
81 __u32 reserved;
82 __u8 migr_state; /* Normal or Migrating */
83 __u8 migr_type; /* Initializing, Rebuilding, ... */
84 __u8 dirty;
85 __u8 fill[1];
86 __u32 filler[5];
87 struct imsm_map map[1];
88 /* here comes another one if migr_state */
89 } __attribute__ ((packed));
90
91 struct imsm_dev {
92 __u8 volume[MAX_RAID_SERIAL_LEN];
93 __u32 size_low;
94 __u32 size_high;
95 __u32 status; /* Persistent RaidDev status */
96 __u32 reserved_blocks; /* Reserved blocks at beginning of volume */
97 #define IMSM_DEV_FILLERS 12
98 __u32 filler[IMSM_DEV_FILLERS];
99 struct imsm_vol vol;
100 } __attribute__ ((packed));
101
102 struct imsm_super {
103 __u8 sig[MAX_SIGNATURE_LENGTH]; /* 0x00 - 0x1F */
104 __u32 check_sum; /* 0x20 - 0x23 MPB Checksum */
105 __u32 mpb_size; /* 0x24 - 0x27 Size of MPB */
106 __u32 family_num; /* 0x28 - 0x2B Checksum from first time this config was written */
107 __u32 generation_num; /* 0x2C - 0x2F Incremented each time this array's MPB is written */
108 __u32 error_log_size; /* 0x30 - 0x33 in bytes */
109 __u32 attributes; /* 0x34 - 0x37 */
110 __u8 num_disks; /* 0x38 Number of configured disks */
111 __u8 num_raid_devs; /* 0x39 Number of configured volumes */
112 __u8 error_log_pos; /* 0x3A */
113 __u8 fill[1]; /* 0x3B */
114 __u32 cache_size; /* 0x3c - 0x40 in mb */
115 __u32 orig_family_num; /* 0x40 - 0x43 original family num */
116 __u32 pwr_cycle_count; /* 0x44 - 0x47 simulated power cycle count for array */
117 __u32 bbm_log_size; /* 0x48 - 0x4B - size of bad Block Mgmt Log in bytes */
118 #define IMSM_FILLERS 35
119 __u32 filler[IMSM_FILLERS]; /* 0x4C - 0xD7 RAID_MPB_FILLERS */
120 struct imsm_disk disk[1]; /* 0xD8 diskTbl[numDisks] */
121 /* here comes imsm_dev[num_raid_devs] */
122 /* here comes BBM logs */
123 } __attribute__ ((packed));
124
125 #define BBM_LOG_MAX_ENTRIES 254
126
127 struct bbm_log_entry {
128 __u64 defective_block_start;
129 #define UNREADABLE 0xFFFFFFFF
130 __u32 spare_block_offset;
131 __u16 remapped_marked_count;
132 __u16 disk_ordinal;
133 } __attribute__ ((__packed__));
134
135 struct bbm_log {
136 __u32 signature; /* 0xABADB10C */
137 __u32 entry_count;
138 __u32 reserved_spare_block_count; /* 0 */
139 __u32 reserved; /* 0xFFFF */
140 __u64 first_spare_lba;
141 struct bbm_log_entry mapped_block_entries[BBM_LOG_MAX_ENTRIES];
142 } __attribute__ ((__packed__));
143
144
145 #ifndef MDASSEMBLE
146 static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" };
147 #endif
148
149 static unsigned int sector_count(__u32 bytes)
150 {
151 return ((bytes + (512-1)) & (~(512-1))) / 512;
152 }
153
154 static unsigned int mpb_sectors(struct imsm_super *mpb)
155 {
156 return sector_count(__le32_to_cpu(mpb->mpb_size));
157 }
158
159 /* internal representation of IMSM metadata */
160 struct intel_super {
161 union {
162 void *buf; /* O_DIRECT buffer for reading/writing metadata */
163 struct imsm_super *anchor; /* immovable parameters */
164 };
165 size_t len; /* size of the 'buf' allocation */
166 void *next_buf; /* for realloc'ing buf from the manager */
167 size_t next_len;
168 int updates_pending; /* count of pending updates for mdmon */
169 int creating_imsm; /* flag to indicate container creation */
170 int current_vol; /* index of raid device undergoing creation */
171 #define IMSM_MAX_RAID_DEVS 2
172 struct imsm_dev *dev_tbl[IMSM_MAX_RAID_DEVS];
173 struct dl {
174 struct dl *next;
175 int index;
176 __u8 serial[MAX_RAID_SERIAL_LEN];
177 int major, minor;
178 char *devname;
179 struct imsm_disk disk;
180 int fd;
181 } *disks;
182 struct dl *add; /* list of disks to add while mdmon active */
183 struct dl *missing; /* disks removed while we weren't looking */
184 struct bbm_log *bbm_log;
185 };
186
187 struct extent {
188 unsigned long long start, size;
189 };
190
191 /* definition of messages passed to imsm_process_update */
192 enum imsm_update_type {
193 update_activate_spare,
194 update_create_array,
195 update_add_disk,
196 };
197
198 struct imsm_update_activate_spare {
199 enum imsm_update_type type;
200 struct dl *dl;
201 int slot;
202 int array;
203 struct imsm_update_activate_spare *next;
204 };
205
206 struct imsm_update_create_array {
207 enum imsm_update_type type;
208 int dev_idx;
209 struct imsm_dev dev;
210 };
211
212 struct imsm_update_add_disk {
213 enum imsm_update_type type;
214 };
215
216 static int imsm_env_devname_as_serial(void)
217 {
218 char *val = getenv("IMSM_DEVNAME_AS_SERIAL");
219
220 if (val && atoi(val) == 1)
221 return 1;
222
223 return 0;
224 }
225
226
227 static struct supertype *match_metadata_desc_imsm(char *arg)
228 {
229 struct supertype *st;
230
231 if (strcmp(arg, "imsm") != 0 &&
232 strcmp(arg, "default") != 0
233 )
234 return NULL;
235
236 st = malloc(sizeof(*st));
237 memset(st, 0, sizeof(*st));
238 st->ss = &super_imsm;
239 st->max_devs = IMSM_MAX_DEVICES;
240 st->minor_version = 0;
241 st->sb = NULL;
242 return st;
243 }
244
245 #ifndef MDASSEMBLE
246 static __u8 *get_imsm_version(struct imsm_super *mpb)
247 {
248 return &mpb->sig[MPB_SIG_LEN];
249 }
250 #endif
251
252 /* retrieve a disk directly from the anchor when the anchor is known to be
253 * up-to-date, currently only at load time
254 */
255 static struct imsm_disk *__get_imsm_disk(struct imsm_super *mpb, __u8 index)
256 {
257 if (index >= mpb->num_disks)
258 return NULL;
259 return &mpb->disk[index];
260 }
261
262 #ifndef MDASSEMBLE
263 /* retrieve a disk from the parsed metadata */
264 static struct imsm_disk *get_imsm_disk(struct intel_super *super, __u8 index)
265 {
266 struct dl *d;
267
268 for (d = super->disks; d; d = d->next)
269 if (d->index == index)
270 return &d->disk;
271
272 return NULL;
273 }
274 #endif
275
276 /* generate a checksum directly from the anchor when the anchor is known to be
277 * up-to-date, currently only at load or write_super after coalescing
278 */
279 static __u32 __gen_imsm_checksum(struct imsm_super *mpb)
280 {
281 __u32 end = mpb->mpb_size / sizeof(end);
282 __u32 *p = (__u32 *) mpb;
283 __u32 sum = 0;
284
285 while (end--)
286 sum += __le32_to_cpu(*p++);
287
288 return sum - __le32_to_cpu(mpb->check_sum);
289 }
290
291 static size_t sizeof_imsm_map(struct imsm_map *map)
292 {
293 return sizeof(struct imsm_map) + sizeof(__u32) * (map->num_members - 1);
294 }
295
296 struct imsm_map *get_imsm_map(struct imsm_dev *dev, int second_map)
297 {
298 struct imsm_map *map = &dev->vol.map[0];
299
300 if (second_map && !dev->vol.migr_state)
301 return NULL;
302 else if (second_map) {
303 void *ptr = map;
304
305 return ptr + sizeof_imsm_map(map);
306 } else
307 return map;
308
309 }
310
311 /* return the size of the device.
312 * migr_state increases the returned size if map[0] were to be duplicated
313 */
314 static size_t sizeof_imsm_dev(struct imsm_dev *dev, int migr_state)
315 {
316 size_t size = sizeof(*dev) - sizeof(struct imsm_map) +
317 sizeof_imsm_map(get_imsm_map(dev, 0));
318
319 /* migrating means an additional map */
320 if (dev->vol.migr_state)
321 size += sizeof_imsm_map(get_imsm_map(dev, 1));
322 else if (migr_state)
323 size += sizeof_imsm_map(get_imsm_map(dev, 0));
324
325 return size;
326 }
327
328 static struct imsm_dev *__get_imsm_dev(struct imsm_super *mpb, __u8 index)
329 {
330 int offset;
331 int i;
332 void *_mpb = mpb;
333
334 if (index >= mpb->num_raid_devs)
335 return NULL;
336
337 /* devices start after all disks */
338 offset = ((void *) &mpb->disk[mpb->num_disks]) - _mpb;
339
340 for (i = 0; i <= index; i++)
341 if (i == index)
342 return _mpb + offset;
343 else
344 offset += sizeof_imsm_dev(_mpb + offset, 0);
345
346 return NULL;
347 }
348
349 static struct imsm_dev *get_imsm_dev(struct intel_super *super, __u8 index)
350 {
351 if (index >= super->anchor->num_raid_devs)
352 return NULL;
353 return super->dev_tbl[index];
354 }
355
356 static __u32 get_imsm_ord_tbl_ent(struct imsm_dev *dev, int slot)
357 {
358 struct imsm_map *map;
359
360 if (dev->vol.migr_state)
361 map = get_imsm_map(dev, 1);
362 else
363 map = get_imsm_map(dev, 0);
364
365 /* top byte identifies disk under rebuild */
366 return __le32_to_cpu(map->disk_ord_tbl[slot]);
367 }
368
369 #define ord_to_idx(ord) (((ord) << 8) >> 8)
370 static __u32 get_imsm_disk_idx(struct imsm_dev *dev, int slot)
371 {
372 __u32 ord = get_imsm_ord_tbl_ent(dev, slot);
373
374 return ord_to_idx(ord);
375 }
376
377 static void set_imsm_ord_tbl_ent(struct imsm_map *map, int slot, __u32 ord)
378 {
379 map->disk_ord_tbl[slot] = __cpu_to_le32(ord);
380 }
381
382 static int get_imsm_raid_level(struct imsm_map *map)
383 {
384 if (map->raid_level == 1) {
385 if (map->num_members == 2)
386 return 1;
387 else
388 return 10;
389 }
390
391 return map->raid_level;
392 }
393
394 static int cmp_extent(const void *av, const void *bv)
395 {
396 const struct extent *a = av;
397 const struct extent *b = bv;
398 if (a->start < b->start)
399 return -1;
400 if (a->start > b->start)
401 return 1;
402 return 0;
403 }
404
405 static struct extent *get_extents(struct intel_super *super, struct dl *dl)
406 {
407 /* find a list of used extents on the given physical device */
408 struct extent *rv, *e;
409 int i, j;
410 int memberships = 0;
411 __u32 reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
412
413 for (i = 0; i < super->anchor->num_raid_devs; i++) {
414 struct imsm_dev *dev = get_imsm_dev(super, i);
415 struct imsm_map *map = get_imsm_map(dev, 0);
416
417 for (j = 0; j < map->num_members; j++) {
418 __u32 index = get_imsm_disk_idx(dev, j);
419
420 if (index == dl->index)
421 memberships++;
422 }
423 }
424 rv = malloc(sizeof(struct extent) * (memberships + 1));
425 if (!rv)
426 return NULL;
427 e = rv;
428
429 for (i = 0; i < super->anchor->num_raid_devs; i++) {
430 struct imsm_dev *dev = get_imsm_dev(super, i);
431 struct imsm_map *map = get_imsm_map(dev, 0);
432
433 for (j = 0; j < map->num_members; j++) {
434 __u32 index = get_imsm_disk_idx(dev, j);
435
436 if (index == dl->index) {
437 e->start = __le32_to_cpu(map->pba_of_lba0);
438 e->size = __le32_to_cpu(map->blocks_per_member);
439 e++;
440 }
441 }
442 }
443 qsort(rv, memberships, sizeof(*rv), cmp_extent);
444
445 /* determine the start of the metadata
446 * when no raid devices are defined use the default
447 * ...otherwise allow the metadata to truncate the value
448 * as is the case with older versions of imsm
449 */
450 if (memberships) {
451 struct extent *last = &rv[memberships - 1];
452 __u32 remainder;
453
454 remainder = __le32_to_cpu(dl->disk.total_blocks) -
455 (last->start + last->size);
456 if (reservation > remainder)
457 reservation = remainder;
458 }
459 e->start = __le32_to_cpu(dl->disk.total_blocks) - reservation;
460 e->size = 0;
461 return rv;
462 }
463
464 /* try to determine how much space is reserved for metadata from
465 * the last get_extents() entry, otherwise fallback to the
466 * default
467 */
468 static __u32 imsm_reserved_sectors(struct intel_super *super, struct dl *dl)
469 {
470 struct extent *e;
471 int i;
472 __u32 rv;
473
474 /* for spares just return a minimal reservation which will grow
475 * once the spare is picked up by an array
476 */
477 if (dl->index == -1)
478 return MPB_SECTOR_CNT;
479
480 e = get_extents(super, dl);
481 if (!e)
482 return MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
483
484 /* scroll to last entry */
485 for (i = 0; e[i].size; i++)
486 continue;
487
488 rv = __le32_to_cpu(dl->disk.total_blocks) - e[i].start;
489
490 free(e);
491
492 return rv;
493 }
494
495 #ifndef MDASSEMBLE
496 static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info);
497
498 static void print_imsm_dev(struct imsm_dev *dev, int index)
499 {
500 __u64 sz;
501 int slot;
502 struct imsm_map *map = get_imsm_map(dev, 0);
503 __u32 ord;
504
505 printf("\n");
506 printf("[%s]:\n", dev->volume);
507 printf(" RAID Level : %d\n", get_imsm_raid_level(map));
508 printf(" Members : %d\n", map->num_members);
509 for (slot = 0; slot < map->num_members; slot++)
510 if (index == get_imsm_disk_idx(dev, slot))
511 break;
512 if (slot < map->num_members) {
513 ord = get_imsm_ord_tbl_ent(dev, slot);
514 printf(" This Slot : %d%s\n", slot,
515 ord & IMSM_ORD_REBUILD ? " (out-of-sync)" : "");
516 } else
517 printf(" This Slot : ?\n");
518 sz = __le32_to_cpu(dev->size_high);
519 sz <<= 32;
520 sz += __le32_to_cpu(dev->size_low);
521 printf(" Array Size : %llu%s\n", (unsigned long long)sz,
522 human_size(sz * 512));
523 sz = __le32_to_cpu(map->blocks_per_member);
524 printf(" Per Dev Size : %llu%s\n", (unsigned long long)sz,
525 human_size(sz * 512));
526 printf(" Sector Offset : %u\n",
527 __le32_to_cpu(map->pba_of_lba0));
528 printf(" Num Stripes : %u\n",
529 __le32_to_cpu(map->num_data_stripes));
530 printf(" Chunk Size : %u KiB\n",
531 __le16_to_cpu(map->blocks_per_strip) / 2);
532 printf(" Reserved : %d\n", __le32_to_cpu(dev->reserved_blocks));
533 printf(" Migrate State : %s", dev->vol.migr_state ? "migrating" : "idle");
534 if (dev->vol.migr_state)
535 printf(": %s", dev->vol.migr_type ? "rebuilding" : "initializing");
536 printf("\n");
537 printf(" Map State : %s", map_state_str[map->map_state]);
538 if (dev->vol.migr_state) {
539 struct imsm_map *map = get_imsm_map(dev, 1);
540 printf(" <-- %s", map_state_str[map->map_state]);
541 }
542 printf("\n");
543 printf(" Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean");
544 }
545
546 static void print_imsm_disk(struct imsm_super *mpb, int index, __u32 reserved)
547 {
548 struct imsm_disk *disk = __get_imsm_disk(mpb, index);
549 char str[MAX_RAID_SERIAL_LEN + 1];
550 __u32 s;
551 __u64 sz;
552
553 if (index < 0)
554 return;
555
556 printf("\n");
557 snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial);
558 printf(" Disk%02d Serial : %s\n", index, str);
559 s = __le32_to_cpu(disk->status);
560 printf(" State :%s%s%s%s\n", s&SPARE_DISK ? " spare" : "",
561 s&CONFIGURED_DISK ? " active" : "",
562 s&FAILED_DISK ? " failed" : "",
563 s&USABLE_DISK ? " usable" : "");
564 printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id));
565 sz = __le32_to_cpu(disk->total_blocks) - reserved;
566 printf(" Usable Size : %llu%s\n", (unsigned long long)sz,
567 human_size(sz * 512));
568 }
569
570 static void examine_super_imsm(struct supertype *st, char *homehost)
571 {
572 struct intel_super *super = st->sb;
573 struct imsm_super *mpb = super->anchor;
574 char str[MAX_SIGNATURE_LENGTH];
575 int i;
576 struct mdinfo info;
577 char nbuf[64];
578 __u32 sum;
579 __u32 reserved = imsm_reserved_sectors(super, super->disks);
580
581
582 snprintf(str, MPB_SIG_LEN, "%s", mpb->sig);
583 printf(" Magic : %s\n", str);
584 snprintf(str, strlen(MPB_VERSION_RAID0), "%s", get_imsm_version(mpb));
585 printf(" Version : %s\n", get_imsm_version(mpb));
586 printf(" Family : %08x\n", __le32_to_cpu(mpb->family_num));
587 printf(" Generation : %08x\n", __le32_to_cpu(mpb->generation_num));
588 getinfo_super_imsm(st, &info);
589 fname_from_uuid(st, &info, nbuf,'-');
590 printf(" UUID : %s\n", nbuf + 5);
591 sum = __le32_to_cpu(mpb->check_sum);
592 printf(" Checksum : %08x %s\n", sum,
593 __gen_imsm_checksum(mpb) == sum ? "correct" : "incorrect");
594 printf(" MPB Sectors : %d\n", mpb_sectors(mpb));
595 printf(" Disks : %d\n", mpb->num_disks);
596 printf(" RAID Devices : %d\n", mpb->num_raid_devs);
597 print_imsm_disk(mpb, super->disks->index, reserved);
598 if (super->bbm_log) {
599 struct bbm_log *log = super->bbm_log;
600
601 printf("\n");
602 printf("Bad Block Management Log:\n");
603 printf(" Log Size : %d\n", __le32_to_cpu(mpb->bbm_log_size));
604 printf(" Signature : %x\n", __le32_to_cpu(log->signature));
605 printf(" Entry Count : %d\n", __le32_to_cpu(log->entry_count));
606 printf(" Spare Blocks : %d\n", __le32_to_cpu(log->reserved_spare_block_count));
607 printf(" First Spare : %llx\n", __le64_to_cpu(log->first_spare_lba));
608 }
609 for (i = 0; i < mpb->num_raid_devs; i++)
610 print_imsm_dev(__get_imsm_dev(mpb, i), super->disks->index);
611 for (i = 0; i < mpb->num_disks; i++) {
612 if (i == super->disks->index)
613 continue;
614 print_imsm_disk(mpb, i, reserved);
615 }
616 }
617
618 static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info);
619
620 static void brief_examine_super_imsm(struct supertype *st)
621 {
622 /* We just write a generic IMSM ARRAY entry */
623 struct mdinfo info;
624 char nbuf[64];
625
626 getinfo_super_imsm(st, &info);
627 fname_from_uuid(st, &info, nbuf,'-');
628 printf("ARRAY /dev/imsm metadata=imsm auto=md UUID=%s\n", nbuf + 5);
629 }
630
631 static void detail_super_imsm(struct supertype *st, char *homehost)
632 {
633 printf("%s\n", __FUNCTION__);
634 }
635
636 static void brief_detail_super_imsm(struct supertype *st)
637 {
638 struct mdinfo info;
639 char nbuf[64];
640 getinfo_super_imsm(st, &info);
641 fname_from_uuid(st, &info, nbuf,'-');
642 printf(" UUID=%s", nbuf + 5);
643 }
644 #endif
645
646 static int match_home_imsm(struct supertype *st, char *homehost)
647 {
648 printf("%s\n", __FUNCTION__);
649
650 return -1;
651 }
652
653 static void uuid_from_super_imsm(struct supertype *st, int uuid[4])
654 {
655 /* The uuid returned here is used for:
656 * uuid to put into bitmap file (Create, Grow)
657 * uuid for backup header when saving critical section (Grow)
658 * comparing uuids when re-adding a device into an array
659 * In these cases the uuid required is that of the data-array,
660 * not the device-set.
661 * uuid to recognise same set when adding a missing device back
662 * to an array. This is a uuid for the device-set.
663 *
664 * For each of these we can make do with a truncated
665 * or hashed uuid rather than the original, as long as
666 * everyone agrees.
667 * In each case the uuid required is that of the data-array,
668 * not the device-set.
669 */
670 /* imsm does not track uuid's so we synthesis one using sha1 on
671 * - The signature (Which is constant for all imsm array, but no matter)
672 * - the family_num of the container
673 * - the index number of the volume
674 * - the 'serial' number of the volume.
675 * Hopefully these are all constant.
676 */
677 struct intel_super *super = st->sb;
678
679 char buf[20];
680 struct sha1_ctx ctx;
681 struct imsm_dev *dev = NULL;
682
683 sha1_init_ctx(&ctx);
684 sha1_process_bytes(super->anchor->sig, MAX_SIGNATURE_LENGTH, &ctx);
685 sha1_process_bytes(&super->anchor->family_num, sizeof(__u32), &ctx);
686 if (super->current_vol >= 0)
687 dev = get_imsm_dev(super, super->current_vol);
688 if (dev) {
689 __u32 vol = super->current_vol;
690 sha1_process_bytes(&vol, sizeof(vol), &ctx);
691 sha1_process_bytes(dev->volume, MAX_RAID_SERIAL_LEN, &ctx);
692 }
693 sha1_finish_ctx(&ctx, buf);
694 memcpy(uuid, buf, 4*4);
695 }
696
697 #if 0
698 static void
699 get_imsm_numerical_version(struct imsm_super *mpb, int *m, int *p)
700 {
701 __u8 *v = get_imsm_version(mpb);
702 __u8 *end = mpb->sig + MAX_SIGNATURE_LENGTH;
703 char major[] = { 0, 0, 0 };
704 char minor[] = { 0 ,0, 0 };
705 char patch[] = { 0, 0, 0 };
706 char *ver_parse[] = { major, minor, patch };
707 int i, j;
708
709 i = j = 0;
710 while (*v != '\0' && v < end) {
711 if (*v != '.' && j < 2)
712 ver_parse[i][j++] = *v;
713 else {
714 i++;
715 j = 0;
716 }
717 v++;
718 }
719
720 *m = strtol(minor, NULL, 0);
721 *p = strtol(patch, NULL, 0);
722 }
723 #endif
724
725 static int imsm_level_to_layout(int level)
726 {
727 switch (level) {
728 case 0:
729 case 1:
730 return 0;
731 case 5:
732 case 6:
733 return ALGORITHM_LEFT_ASYMMETRIC;
734 case 10:
735 return 0x102;
736 }
737 return -1;
738 }
739
740 static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info)
741 {
742 struct intel_super *super = st->sb;
743 struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
744 struct imsm_map *map = get_imsm_map(dev, 0);
745
746 info->container_member = super->current_vol;
747 info->array.raid_disks = map->num_members;
748 info->array.level = get_imsm_raid_level(map);
749 info->array.layout = imsm_level_to_layout(info->array.level);
750 info->array.md_minor = -1;
751 info->array.ctime = 0;
752 info->array.utime = 0;
753 info->array.chunk_size = __le16_to_cpu(map->blocks_per_strip) << 9;
754 info->array.state = !dev->vol.dirty;
755
756 info->disk.major = 0;
757 info->disk.minor = 0;
758
759 info->data_offset = __le32_to_cpu(map->pba_of_lba0);
760 info->component_size = __le32_to_cpu(map->blocks_per_member);
761 memset(info->uuid, 0, sizeof(info->uuid));
762
763 if (map->map_state == IMSM_T_STATE_UNINITIALIZED || dev->vol.dirty)
764 info->resync_start = 0;
765 else if (dev->vol.migr_state)
766 info->resync_start = __le32_to_cpu(dev->vol.curr_migr_unit);
767 else
768 info->resync_start = ~0ULL;
769
770 strncpy(info->name, (char *) dev->volume, MAX_RAID_SERIAL_LEN);
771 info->name[MAX_RAID_SERIAL_LEN] = 0;
772
773 info->array.major_version = -1;
774 info->array.minor_version = -2;
775 sprintf(info->text_version, "/%s/%d",
776 devnum2devname(st->container_dev),
777 info->container_member);
778 info->safe_mode_delay = 4000; /* 4 secs like the Matrix driver */
779 uuid_from_super_imsm(st, info->uuid);
780 }
781
782
783 static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info)
784 {
785 struct intel_super *super = st->sb;
786 struct imsm_disk *disk;
787 __u32 s;
788
789 if (super->current_vol >= 0) {
790 getinfo_super_imsm_volume(st, info);
791 return;
792 }
793
794 /* Set raid_disks to zero so that Assemble will always pull in valid
795 * spares
796 */
797 info->array.raid_disks = 0;
798 info->array.level = LEVEL_CONTAINER;
799 info->array.layout = 0;
800 info->array.md_minor = -1;
801 info->array.ctime = 0; /* N/A for imsm */
802 info->array.utime = 0;
803 info->array.chunk_size = 0;
804
805 info->disk.major = 0;
806 info->disk.minor = 0;
807 info->disk.raid_disk = -1;
808 info->reshape_active = 0;
809 info->array.major_version = -1;
810 info->array.minor_version = -2;
811 strcpy(info->text_version, "imsm");
812 info->safe_mode_delay = 0;
813 info->disk.number = -1;
814 info->disk.state = 0;
815 info->name[0] = 0;
816
817 if (super->disks) {
818 __u32 reserved = imsm_reserved_sectors(super, super->disks);
819
820 disk = &super->disks->disk;
821 info->data_offset = __le32_to_cpu(disk->total_blocks) - reserved;
822 info->component_size = reserved;
823 s = __le32_to_cpu(disk->status);
824 info->disk.state = s & CONFIGURED_DISK ? (1 << MD_DISK_ACTIVE) : 0;
825 info->disk.state |= s & FAILED_DISK ? (1 << MD_DISK_FAULTY) : 0;
826 info->disk.state |= s & SPARE_DISK ? 0 : (1 << MD_DISK_SYNC);
827 }
828
829 /* only call uuid_from_super_imsm when this disk is part of a populated container,
830 * ->compare_super may have updated the 'num_raid_devs' field for spares
831 */
832 if (info->disk.state & (1 << MD_DISK_SYNC) || super->anchor->num_raid_devs)
833 uuid_from_super_imsm(st, info->uuid);
834 else
835 memcpy(info->uuid, uuid_match_any, sizeof(int[4]));
836 }
837
838 static int update_super_imsm(struct supertype *st, struct mdinfo *info,
839 char *update, char *devname, int verbose,
840 int uuid_set, char *homehost)
841 {
842 /* FIXME */
843
844 /* For 'assemble' and 'force' we need to return non-zero if any
845 * change was made. For others, the return value is ignored.
846 * Update options are:
847 * force-one : This device looks a bit old but needs to be included,
848 * update age info appropriately.
849 * assemble: clear any 'faulty' flag to allow this device to
850 * be assembled.
851 * force-array: Array is degraded but being forced, mark it clean
852 * if that will be needed to assemble it.
853 *
854 * newdev: not used ????
855 * grow: Array has gained a new device - this is currently for
856 * linear only
857 * resync: mark as dirty so a resync will happen.
858 * name: update the name - preserving the homehost
859 *
860 * Following are not relevant for this imsm:
861 * sparc2.2 : update from old dodgey metadata
862 * super-minor: change the preferred_minor number
863 * summaries: update redundant counters.
864 * uuid: Change the uuid of the array to match watch is given
865 * homehost: update the recorded homehost
866 * _reshape_progress: record new reshape_progress position.
867 */
868 int rv = 0;
869 //struct intel_super *super = st->sb;
870 //struct imsm_super *mpb = super->mpb;
871
872 if (strcmp(update, "grow") == 0) {
873 }
874 if (strcmp(update, "resync") == 0) {
875 /* dev->vol.dirty = 1; */
876 }
877
878 /* IMSM has no concept of UUID or homehost */
879
880 return rv;
881 }
882
883 static size_t disks_to_mpb_size(int disks)
884 {
885 size_t size;
886
887 size = sizeof(struct imsm_super);
888 size += (disks - 1) * sizeof(struct imsm_disk);
889 size += 2 * sizeof(struct imsm_dev);
890 /* up to 2 maps per raid device (-2 for imsm_maps in imsm_dev */
891 size += (4 - 2) * sizeof(struct imsm_map);
892 /* 4 possible disk_ord_tbl's */
893 size += 4 * (disks - 1) * sizeof(__u32);
894
895 return size;
896 }
897
898 static __u64 avail_size_imsm(struct supertype *st, __u64 devsize)
899 {
900 if (devsize < (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS))
901 return 0;
902
903 return devsize - (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS);
904 }
905
906 static int compare_super_imsm(struct supertype *st, struct supertype *tst)
907 {
908 /*
909 * return:
910 * 0 same, or first was empty, and second was copied
911 * 1 second had wrong number
912 * 2 wrong uuid
913 * 3 wrong other info
914 */
915 struct intel_super *first = st->sb;
916 struct intel_super *sec = tst->sb;
917
918 if (!first) {
919 st->sb = tst->sb;
920 tst->sb = NULL;
921 return 0;
922 }
923
924 if (memcmp(first->anchor->sig, sec->anchor->sig, MAX_SIGNATURE_LENGTH) != 0)
925 return 3;
926
927 /* if an anchor does not have num_raid_devs set then it is a free
928 * floating spare
929 */
930 if (first->anchor->num_raid_devs > 0 &&
931 sec->anchor->num_raid_devs > 0) {
932 if (first->anchor->family_num != sec->anchor->family_num)
933 return 3;
934 }
935
936 /* if 'first' is a spare promote it to a populated mpb with sec's
937 * family number
938 */
939 if (first->anchor->num_raid_devs == 0 &&
940 sec->anchor->num_raid_devs > 0) {
941 first->anchor->num_raid_devs = sec->anchor->num_raid_devs;
942 first->anchor->family_num = sec->anchor->family_num;
943 }
944
945 return 0;
946 }
947
948 static void fd2devname(int fd, char *name)
949 {
950 struct stat st;
951 char path[256];
952 char dname[100];
953 char *nm;
954 int rv;
955
956 name[0] = '\0';
957 if (fstat(fd, &st) != 0)
958 return;
959 sprintf(path, "/sys/dev/block/%d:%d",
960 major(st.st_rdev), minor(st.st_rdev));
961
962 rv = readlink(path, dname, sizeof(dname));
963 if (rv <= 0)
964 return;
965
966 dname[rv] = '\0';
967 nm = strrchr(dname, '/');
968 nm++;
969 snprintf(name, MAX_RAID_SERIAL_LEN, "/dev/%s", nm);
970 }
971
972
973 extern int scsi_get_serial(int fd, void *buf, size_t buf_len);
974
975 static int imsm_read_serial(int fd, char *devname,
976 __u8 serial[MAX_RAID_SERIAL_LEN])
977 {
978 unsigned char scsi_serial[255];
979 int rv;
980 int rsp_len;
981 int len;
982 char *c, *rsp_buf;
983
984 memset(scsi_serial, 0, sizeof(scsi_serial));
985
986 rv = scsi_get_serial(fd, scsi_serial, sizeof(scsi_serial));
987
988 if (rv && imsm_env_devname_as_serial()) {
989 memset(serial, 0, MAX_RAID_SERIAL_LEN);
990 fd2devname(fd, (char *) serial);
991 return 0;
992 }
993
994 if (rv != 0) {
995 if (devname)
996 fprintf(stderr,
997 Name ": Failed to retrieve serial for %s\n",
998 devname);
999 return rv;
1000 }
1001
1002 /* trim leading whitespace */
1003 rsp_len = scsi_serial[3];
1004 rsp_buf = (char *) &scsi_serial[4];
1005 c = rsp_buf;
1006 while (isspace(*c))
1007 c++;
1008
1009 /* truncate len to the end of rsp_buf if necessary */
1010 if (c + MAX_RAID_SERIAL_LEN > rsp_buf + rsp_len)
1011 len = rsp_len - (c - rsp_buf);
1012 else
1013 len = MAX_RAID_SERIAL_LEN;
1014
1015 /* initialize the buffer and copy rsp_buf characters */
1016 memset(serial, 0, MAX_RAID_SERIAL_LEN);
1017 memcpy(serial, c, len);
1018
1019 /* trim trailing whitespace starting with the last character copied */
1020 c = (char *) &serial[len - 1];
1021 while (isspace(*c) || *c == '\0')
1022 *c-- = '\0';
1023
1024 return 0;
1025 }
1026
1027 static int serialcmp(__u8 *s1, __u8 *s2)
1028 {
1029 return strncmp((char *) s1, (char *) s2, MAX_RAID_SERIAL_LEN);
1030 }
1031
1032 static void serialcpy(__u8 *dest, __u8 *src)
1033 {
1034 strncpy((char *) dest, (char *) src, MAX_RAID_SERIAL_LEN);
1035 }
1036
1037 static int
1038 load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd)
1039 {
1040 struct dl *dl;
1041 struct stat stb;
1042 int rv;
1043 int i;
1044 int alloc = 1;
1045 __u8 serial[MAX_RAID_SERIAL_LEN];
1046
1047 rv = imsm_read_serial(fd, devname, serial);
1048
1049 if (rv != 0)
1050 return 2;
1051
1052 /* check if this is a disk we have seen before. it may be a spare in
1053 * super->disks while the current anchor believes it is a raid member,
1054 * check if we need to update dl->index
1055 */
1056 for (dl = super->disks; dl; dl = dl->next)
1057 if (serialcmp(dl->serial, serial) == 0)
1058 break;
1059
1060 if (!dl)
1061 dl = malloc(sizeof(*dl));
1062 else
1063 alloc = 0;
1064
1065 if (!dl) {
1066 if (devname)
1067 fprintf(stderr,
1068 Name ": failed to allocate disk buffer for %s\n",
1069 devname);
1070 return 2;
1071 }
1072
1073 if (alloc) {
1074 fstat(fd, &stb);
1075 dl->major = major(stb.st_rdev);
1076 dl->minor = minor(stb.st_rdev);
1077 dl->next = super->disks;
1078 dl->fd = keep_fd ? fd : -1;
1079 dl->devname = devname ? strdup(devname) : NULL;
1080 serialcpy(dl->serial, serial);
1081 dl->index = -2;
1082 } else if (keep_fd) {
1083 close(dl->fd);
1084 dl->fd = fd;
1085 }
1086
1087 /* look up this disk's index in the current anchor */
1088 for (i = 0; i < super->anchor->num_disks; i++) {
1089 struct imsm_disk *disk_iter;
1090
1091 disk_iter = __get_imsm_disk(super->anchor, i);
1092
1093 if (serialcmp(disk_iter->serial, dl->serial) == 0) {
1094 __u32 status;
1095
1096 dl->disk = *disk_iter;
1097 status = __le32_to_cpu(dl->disk.status);
1098 /* only set index on disks that are a member of a
1099 * populated contianer, i.e. one with raid_devs
1100 */
1101 if (status & FAILED_DISK)
1102 dl->index = -2;
1103 else if (status & SPARE_DISK)
1104 dl->index = -1;
1105 else
1106 dl->index = i;
1107
1108 break;
1109 }
1110 }
1111
1112 /* no match, maybe a stale failed drive */
1113 if (i == super->anchor->num_disks && dl->index >= 0) {
1114 dl->disk = *__get_imsm_disk(super->anchor, dl->index);
1115 if (__le32_to_cpu(dl->disk.status) & FAILED_DISK)
1116 dl->index = -2;
1117 }
1118
1119 if (alloc)
1120 super->disks = dl;
1121
1122 return 0;
1123 }
1124
1125 static void imsm_copy_dev(struct imsm_dev *dest, struct imsm_dev *src)
1126 {
1127 memcpy(dest, src, sizeof_imsm_dev(src, 0));
1128 }
1129
1130 #ifndef MDASSEMBLE
1131 /* When migrating map0 contains the 'destination' state while map1
1132 * contains the current state. When not migrating map0 contains the
1133 * current state. This routine assumes that map[0].map_state is set to
1134 * the current array state before being called.
1135 *
1136 * Migration is indicated by one of the following states
1137 * 1/ Idle (migr_state=0 map0state=normal||unitialized||degraded||failed)
1138 * 2/ Initialize (migr_state=1 migr_type=0 map0state=normal
1139 * map1state=unitialized)
1140 * 3/ Verify (Resync) (migr_state=1 migr_type=1 map0state=normal
1141 * map1state=normal)
1142 * 4/ Rebuild (migr_state=1 migr_type=1 map0state=normal
1143 * map1state=degraded)
1144 */
1145 static void migrate(struct imsm_dev *dev, __u8 to_state, int rebuild_resync)
1146 {
1147 struct imsm_map *dest;
1148 struct imsm_map *src = get_imsm_map(dev, 0);
1149
1150 dev->vol.migr_state = 1;
1151 dev->vol.migr_type = rebuild_resync;
1152 dev->vol.curr_migr_unit = 0;
1153 dest = get_imsm_map(dev, 1);
1154
1155 memcpy(dest, src, sizeof_imsm_map(src));
1156 src->map_state = to_state;
1157 }
1158
1159 static void end_migration(struct imsm_dev *dev, __u8 map_state)
1160 {
1161 struct imsm_map *map = get_imsm_map(dev, 0);
1162
1163 dev->vol.migr_state = 0;
1164 dev->vol.curr_migr_unit = 0;
1165 map->map_state = map_state;
1166 }
1167 #endif
1168
1169 static int parse_raid_devices(struct intel_super *super)
1170 {
1171 int i;
1172 struct imsm_dev *dev_new;
1173 size_t len, len_migr;
1174 size_t space_needed = 0;
1175 struct imsm_super *mpb = super->anchor;
1176
1177 for (i = 0; i < super->anchor->num_raid_devs; i++) {
1178 struct imsm_dev *dev_iter = __get_imsm_dev(super->anchor, i);
1179
1180 len = sizeof_imsm_dev(dev_iter, 0);
1181 len_migr = sizeof_imsm_dev(dev_iter, 1);
1182 if (len_migr > len)
1183 space_needed += len_migr - len;
1184
1185 dev_new = malloc(len_migr);
1186 if (!dev_new)
1187 return 1;
1188 imsm_copy_dev(dev_new, dev_iter);
1189 super->dev_tbl[i] = dev_new;
1190 }
1191
1192 /* ensure that super->buf is large enough when all raid devices
1193 * are migrating
1194 */
1195 if (__le32_to_cpu(mpb->mpb_size) + space_needed > super->len) {
1196 void *buf;
1197
1198 len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + space_needed, 512);
1199 if (posix_memalign(&buf, 512, len) != 0)
1200 return 1;
1201
1202 memcpy(buf, super->buf, len);
1203 free(super->buf);
1204 super->buf = buf;
1205 super->len = len;
1206 }
1207
1208 return 0;
1209 }
1210
1211 /* retrieve a pointer to the bbm log which starts after all raid devices */
1212 struct bbm_log *__get_imsm_bbm_log(struct imsm_super *mpb)
1213 {
1214 void *ptr = NULL;
1215
1216 if (__le32_to_cpu(mpb->bbm_log_size)) {
1217 ptr = mpb;
1218 ptr += mpb->mpb_size - __le32_to_cpu(mpb->bbm_log_size);
1219 }
1220
1221 return ptr;
1222 }
1223
1224 static void __free_imsm(struct intel_super *super, int free_disks);
1225
1226 /* load_imsm_mpb - read matrix metadata
1227 * allocates super->mpb to be freed by free_super
1228 */
1229 static int load_imsm_mpb(int fd, struct intel_super *super, char *devname)
1230 {
1231 unsigned long long dsize;
1232 unsigned long long sectors;
1233 struct stat;
1234 struct imsm_super *anchor;
1235 __u32 check_sum;
1236 int rc;
1237
1238 get_dev_size(fd, NULL, &dsize);
1239
1240 if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0) {
1241 if (devname)
1242 fprintf(stderr,
1243 Name ": Cannot seek to anchor block on %s: %s\n",
1244 devname, strerror(errno));
1245 return 1;
1246 }
1247
1248 if (posix_memalign((void**)&anchor, 512, 512) != 0) {
1249 if (devname)
1250 fprintf(stderr,
1251 Name ": Failed to allocate imsm anchor buffer"
1252 " on %s\n", devname);
1253 return 1;
1254 }
1255 if (read(fd, anchor, 512) != 512) {
1256 if (devname)
1257 fprintf(stderr,
1258 Name ": Cannot read anchor block on %s: %s\n",
1259 devname, strerror(errno));
1260 free(anchor);
1261 return 1;
1262 }
1263
1264 if (strncmp((char *) anchor->sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0) {
1265 if (devname)
1266 fprintf(stderr,
1267 Name ": no IMSM anchor on %s\n", devname);
1268 free(anchor);
1269 return 2;
1270 }
1271
1272 __free_imsm(super, 0);
1273 super->len = ROUND_UP(anchor->mpb_size, 512);
1274 if (posix_memalign(&super->buf, 512, super->len) != 0) {
1275 if (devname)
1276 fprintf(stderr,
1277 Name ": unable to allocate %zu byte mpb buffer\n",
1278 super->len);
1279 free(anchor);
1280 return 2;
1281 }
1282 memcpy(super->buf, anchor, 512);
1283
1284 sectors = mpb_sectors(anchor) - 1;
1285 free(anchor);
1286 if (!sectors) {
1287 rc = load_imsm_disk(fd, super, devname, 0);
1288 if (rc == 0)
1289 rc = parse_raid_devices(super);
1290 return rc;
1291 }
1292
1293 /* read the extended mpb */
1294 if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0) {
1295 if (devname)
1296 fprintf(stderr,
1297 Name ": Cannot seek to extended mpb on %s: %s\n",
1298 devname, strerror(errno));
1299 return 1;
1300 }
1301
1302 if (read(fd, super->buf + 512, super->len - 512) != super->len - 512) {
1303 if (devname)
1304 fprintf(stderr,
1305 Name ": Cannot read extended mpb on %s: %s\n",
1306 devname, strerror(errno));
1307 return 2;
1308 }
1309
1310 check_sum = __gen_imsm_checksum(super->anchor);
1311 if (check_sum != __le32_to_cpu(super->anchor->check_sum)) {
1312 if (devname)
1313 fprintf(stderr,
1314 Name ": IMSM checksum %x != %x on %s\n",
1315 check_sum, __le32_to_cpu(super->anchor->check_sum),
1316 devname);
1317 return 2;
1318 }
1319
1320 /* FIXME the BBM log is disk specific so we cannot use this global
1321 * buffer for all disks. Ok for now since we only look at the global
1322 * bbm_log_size parameter to gate assembly
1323 */
1324 super->bbm_log = __get_imsm_bbm_log(super->anchor);
1325
1326 rc = load_imsm_disk(fd, super, devname, 0);
1327 if (rc == 0)
1328 rc = parse_raid_devices(super);
1329
1330 return rc;
1331 }
1332
1333 static void __free_imsm_disk(struct dl *d)
1334 {
1335 if (d->fd >= 0)
1336 close(d->fd);
1337 if (d->devname)
1338 free(d->devname);
1339 free(d);
1340
1341 }
1342 static void free_imsm_disks(struct intel_super *super)
1343 {
1344 struct dl *d;
1345
1346 while (super->disks) {
1347 d = super->disks;
1348 super->disks = d->next;
1349 __free_imsm_disk(d);
1350 }
1351 while (super->missing) {
1352 d = super->missing;
1353 super->missing = d->next;
1354 __free_imsm_disk(d);
1355 }
1356
1357 }
1358
1359 /* free all the pieces hanging off of a super pointer */
1360 static void __free_imsm(struct intel_super *super, int free_disks)
1361 {
1362 int i;
1363
1364 if (super->buf) {
1365 free(super->buf);
1366 super->buf = NULL;
1367 }
1368 if (free_disks)
1369 free_imsm_disks(super);
1370 for (i = 0; i < IMSM_MAX_RAID_DEVS; i++)
1371 if (super->dev_tbl[i]) {
1372 free(super->dev_tbl[i]);
1373 super->dev_tbl[i] = NULL;
1374 }
1375 }
1376
1377 static void free_imsm(struct intel_super *super)
1378 {
1379 __free_imsm(super, 1);
1380 free(super);
1381 }
1382
1383 static void free_super_imsm(struct supertype *st)
1384 {
1385 struct intel_super *super = st->sb;
1386
1387 if (!super)
1388 return;
1389
1390 free_imsm(super);
1391 st->sb = NULL;
1392 }
1393
1394 static struct intel_super *alloc_super(int creating_imsm)
1395 {
1396 struct intel_super *super = malloc(sizeof(*super));
1397
1398 if (super) {
1399 memset(super, 0, sizeof(*super));
1400 super->creating_imsm = creating_imsm;
1401 super->current_vol = -1;
1402 }
1403
1404 return super;
1405 }
1406
1407 #ifndef MDASSEMBLE
1408 /* find_missing - helper routine for load_super_imsm_all that identifies
1409 * disks that have disappeared from the system. This routine relies on
1410 * the mpb being uptodate, which it is at load time.
1411 */
1412 static int find_missing(struct intel_super *super)
1413 {
1414 int i;
1415 struct imsm_super *mpb = super->anchor;
1416 struct dl *dl;
1417 struct imsm_disk *disk;
1418 __u32 status;
1419
1420 for (i = 0; i < mpb->num_disks; i++) {
1421 disk = __get_imsm_disk(mpb, i);
1422 for (dl = super->disks; dl; dl = dl->next)
1423 if (serialcmp(dl->disk.serial, disk->serial) == 0)
1424 break;
1425 if (dl)
1426 continue;
1427 /* ok we have a 'disk' without a live entry in
1428 * super->disks
1429 */
1430 status = __le32_to_cpu(disk->status);
1431 if (status & FAILED_DISK || !(status & USABLE_DISK))
1432 continue; /* never mind, already marked */
1433
1434 dl = malloc(sizeof(*dl));
1435 if (!dl)
1436 return 1;
1437 dl->major = 0;
1438 dl->minor = 0;
1439 dl->fd = -1;
1440 dl->devname = strdup("missing");
1441 dl->index = i;
1442 serialcpy(dl->serial, disk->serial);
1443 dl->disk = *disk;
1444 dl->next = super->missing;
1445 super->missing = dl;
1446 }
1447
1448 return 0;
1449 }
1450
1451 static int load_super_imsm_all(struct supertype *st, int fd, void **sbp,
1452 char *devname, int keep_fd)
1453 {
1454 struct mdinfo *sra;
1455 struct intel_super *super;
1456 struct mdinfo *sd, *best = NULL;
1457 __u32 bestgen = 0;
1458 __u32 gen;
1459 char nm[20];
1460 int dfd;
1461 int rv;
1462
1463 /* check if this disk is a member of an active array */
1464 sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
1465 if (!sra)
1466 return 1;
1467
1468 if (sra->array.major_version != -1 ||
1469 sra->array.minor_version != -2 ||
1470 strcmp(sra->text_version, "imsm") != 0)
1471 return 1;
1472
1473 super = alloc_super(0);
1474 if (!super)
1475 return 1;
1476
1477 /* find the most up to date disk in this array, skipping spares */
1478 for (sd = sra->devs; sd; sd = sd->next) {
1479 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
1480 dfd = dev_open(nm, keep_fd ? O_RDWR : O_RDONLY);
1481 if (!dfd) {
1482 free_imsm(super);
1483 return 2;
1484 }
1485 rv = load_imsm_mpb(dfd, super, NULL);
1486 if (!keep_fd)
1487 close(dfd);
1488 if (rv == 0) {
1489 if (super->anchor->num_raid_devs == 0)
1490 gen = 0;
1491 else
1492 gen = __le32_to_cpu(super->anchor->generation_num);
1493 if (!best || gen > bestgen) {
1494 bestgen = gen;
1495 best = sd;
1496 }
1497 } else {
1498 free_imsm(super);
1499 return 2;
1500 }
1501 }
1502
1503 if (!best) {
1504 free_imsm(super);
1505 return 1;
1506 }
1507
1508 /* load the most up to date anchor */
1509 sprintf(nm, "%d:%d", best->disk.major, best->disk.minor);
1510 dfd = dev_open(nm, O_RDONLY);
1511 if (!dfd) {
1512 free_imsm(super);
1513 return 1;
1514 }
1515 rv = load_imsm_mpb(dfd, super, NULL);
1516 close(dfd);
1517 if (rv != 0) {
1518 free_imsm(super);
1519 return 2;
1520 }
1521
1522 /* re-parse the disk list with the current anchor */
1523 for (sd = sra->devs ; sd ; sd = sd->next) {
1524 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
1525 dfd = dev_open(nm, keep_fd? O_RDWR : O_RDONLY);
1526 if (!dfd) {
1527 free_imsm(super);
1528 return 2;
1529 }
1530 load_imsm_disk(dfd, super, NULL, keep_fd);
1531 if (!keep_fd)
1532 close(dfd);
1533 }
1534
1535
1536 if (find_missing(super) != 0) {
1537 free_imsm(super);
1538 return 2;
1539 }
1540
1541 if (st->subarray[0]) {
1542 if (atoi(st->subarray) <= super->anchor->num_raid_devs)
1543 super->current_vol = atoi(st->subarray);
1544 else
1545 return 1;
1546 }
1547
1548 *sbp = super;
1549 st->container_dev = fd2devnum(fd);
1550 if (st->ss == NULL) {
1551 st->ss = &super_imsm;
1552 st->minor_version = 0;
1553 st->max_devs = IMSM_MAX_DEVICES;
1554 }
1555 st->loaded_container = 1;
1556
1557 return 0;
1558 }
1559 #endif
1560
1561 static int load_super_imsm(struct supertype *st, int fd, char *devname)
1562 {
1563 struct intel_super *super;
1564 int rv;
1565
1566 #ifndef MDASSEMBLE
1567 if (load_super_imsm_all(st, fd, &st->sb, devname, 1) == 0)
1568 return 0;
1569 #endif
1570 if (st->subarray[0])
1571 return 1; /* FIXME */
1572
1573 super = alloc_super(0);
1574 if (!super) {
1575 fprintf(stderr,
1576 Name ": malloc of %zu failed.\n",
1577 sizeof(*super));
1578 return 1;
1579 }
1580
1581 rv = load_imsm_mpb(fd, super, devname);
1582
1583 if (rv) {
1584 if (devname)
1585 fprintf(stderr,
1586 Name ": Failed to load all information "
1587 "sections on %s\n", devname);
1588 free_imsm(super);
1589 return rv;
1590 }
1591
1592 st->sb = super;
1593 if (st->ss == NULL) {
1594 st->ss = &super_imsm;
1595 st->minor_version = 0;
1596 st->max_devs = IMSM_MAX_DEVICES;
1597 }
1598 st->loaded_container = 0;
1599
1600 return 0;
1601 }
1602
1603 static __u16 info_to_blocks_per_strip(mdu_array_info_t *info)
1604 {
1605 if (info->level == 1)
1606 return 128;
1607 return info->chunk_size >> 9;
1608 }
1609
1610 static __u32 info_to_num_data_stripes(mdu_array_info_t *info)
1611 {
1612 __u32 num_stripes;
1613
1614 num_stripes = (info->size * 2) / info_to_blocks_per_strip(info);
1615 if (info->level == 1)
1616 num_stripes /= 2;
1617
1618 return num_stripes;
1619 }
1620
1621 static __u32 info_to_blocks_per_member(mdu_array_info_t *info)
1622 {
1623 return (info->size * 2) & ~(info_to_blocks_per_strip(info) - 1);
1624 }
1625
1626 static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
1627 unsigned long long size, char *name,
1628 char *homehost, int *uuid)
1629 {
1630 /* We are creating a volume inside a pre-existing container.
1631 * so st->sb is already set.
1632 */
1633 struct intel_super *super = st->sb;
1634 struct imsm_super *mpb = super->anchor;
1635 struct imsm_dev *dev;
1636 struct imsm_vol *vol;
1637 struct imsm_map *map;
1638 int idx = mpb->num_raid_devs;
1639 int i;
1640 unsigned long long array_blocks;
1641 __u32 offset = 0;
1642 size_t size_old, size_new;
1643
1644 if (mpb->num_raid_devs >= 2) {
1645 fprintf(stderr, Name": This imsm-container already has the "
1646 "maximum of 2 volumes\n");
1647 return 0;
1648 }
1649
1650 /* ensure the mpb is large enough for the new data */
1651 size_old = __le32_to_cpu(mpb->mpb_size);
1652 size_new = disks_to_mpb_size(info->nr_disks);
1653 if (size_new > size_old) {
1654 void *mpb_new;
1655 size_t size_round = ROUND_UP(size_new, 512);
1656
1657 if (posix_memalign(&mpb_new, 512, size_round) != 0) {
1658 fprintf(stderr, Name": could not allocate new mpb\n");
1659 return 0;
1660 }
1661 memcpy(mpb_new, mpb, size_old);
1662 free(mpb);
1663 mpb = mpb_new;
1664 super->anchor = mpb_new;
1665 mpb->mpb_size = __cpu_to_le32(size_new);
1666 memset(mpb_new + size_old, 0, size_round - size_old);
1667 }
1668 super->current_vol = idx;
1669 /* when creating the first raid device in this container set num_disks
1670 * to zero, i.e. delete this spare and add raid member devices in
1671 * add_to_super_imsm_volume()
1672 */
1673 if (super->current_vol == 0)
1674 mpb->num_disks = 0;
1675 sprintf(st->subarray, "%d", idx);
1676 dev = malloc(sizeof(*dev) + sizeof(__u32) * (info->raid_disks - 1));
1677 if (!dev) {
1678 fprintf(stderr, Name": could not allocate raid device\n");
1679 return 0;
1680 }
1681 strncpy((char *) dev->volume, name, MAX_RAID_SERIAL_LEN);
1682 array_blocks = calc_array_size(info->level, info->raid_disks,
1683 info->layout, info->chunk_size,
1684 info->size*2);
1685 dev->size_low = __cpu_to_le32((__u32) array_blocks);
1686 dev->size_high = __cpu_to_le32((__u32) (array_blocks >> 32));
1687 dev->status = __cpu_to_le32(0);
1688 dev->reserved_blocks = __cpu_to_le32(0);
1689 vol = &dev->vol;
1690 vol->migr_state = 0;
1691 vol->migr_type = 0;
1692 vol->dirty = 0;
1693 vol->curr_migr_unit = 0;
1694 for (i = 0; i < idx; i++) {
1695 struct imsm_dev *prev = get_imsm_dev(super, i);
1696 struct imsm_map *pmap = get_imsm_map(prev, 0);
1697
1698 offset += __le32_to_cpu(pmap->blocks_per_member);
1699 offset += IMSM_RESERVED_SECTORS;
1700 }
1701 map = get_imsm_map(dev, 0);
1702 map->pba_of_lba0 = __cpu_to_le32(offset);
1703 map->blocks_per_member = __cpu_to_le32(info_to_blocks_per_member(info));
1704 map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info));
1705 map->num_data_stripes = __cpu_to_le32(info_to_num_data_stripes(info));
1706 map->map_state = info->level ? IMSM_T_STATE_UNINITIALIZED :
1707 IMSM_T_STATE_NORMAL;
1708
1709 if (info->level == 1 && info->raid_disks > 2) {
1710 fprintf(stderr, Name": imsm does not support more than 2 disks"
1711 "in a raid1 volume\n");
1712 return 0;
1713 }
1714 if (info->level == 10)
1715 map->raid_level = 1;
1716 else
1717 map->raid_level = info->level;
1718
1719 map->num_members = info->raid_disks;
1720 for (i = 0; i < map->num_members; i++) {
1721 /* initialized in add_to_super */
1722 set_imsm_ord_tbl_ent(map, i, 0);
1723 }
1724 mpb->num_raid_devs++;
1725 super->dev_tbl[super->current_vol] = dev;
1726
1727 return 1;
1728 }
1729
1730 static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
1731 unsigned long long size, char *name,
1732 char *homehost, int *uuid)
1733 {
1734 /* This is primarily called by Create when creating a new array.
1735 * We will then get add_to_super called for each component, and then
1736 * write_init_super called to write it out to each device.
1737 * For IMSM, Create can create on fresh devices or on a pre-existing
1738 * array.
1739 * To create on a pre-existing array a different method will be called.
1740 * This one is just for fresh drives.
1741 */
1742 struct intel_super *super;
1743 struct imsm_super *mpb;
1744 size_t mpb_size;
1745
1746 if (!info) {
1747 st->sb = NULL;
1748 return 0;
1749 }
1750 if (st->sb)
1751 return init_super_imsm_volume(st, info, size, name, homehost,
1752 uuid);
1753
1754 super = alloc_super(1);
1755 if (!super)
1756 return 0;
1757 mpb_size = disks_to_mpb_size(info->nr_disks);
1758 if (posix_memalign(&super->buf, 512, mpb_size) != 0) {
1759 free(super);
1760 return 0;
1761 }
1762 mpb = super->buf;
1763 memset(mpb, 0, mpb_size);
1764
1765 memcpy(mpb->sig, MPB_SIGNATURE, strlen(MPB_SIGNATURE));
1766 memcpy(mpb->sig + strlen(MPB_SIGNATURE), MPB_VERSION_RAID5,
1767 strlen(MPB_VERSION_RAID5));
1768 mpb->mpb_size = mpb_size;
1769
1770 st->sb = super;
1771 return 1;
1772 }
1773
1774 #ifndef MDASSEMBLE
1775 static void add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
1776 int fd, char *devname)
1777 {
1778 struct intel_super *super = st->sb;
1779 struct imsm_super *mpb = super->anchor;
1780 struct dl *dl;
1781 struct imsm_dev *dev;
1782 struct imsm_map *map;
1783 __u32 status;
1784
1785 dev = get_imsm_dev(super, super->current_vol);
1786 map = get_imsm_map(dev, 0);
1787
1788 for (dl = super->disks; dl ; dl = dl->next)
1789 if (dl->major == dk->major &&
1790 dl->minor == dk->minor)
1791 break;
1792
1793 if (!dl || ! (dk->state & (1<<MD_DISK_SYNC)))
1794 return;
1795
1796 /* add a pristine spare to the metadata */
1797 if (dl->index < 0) {
1798 dl->index = super->anchor->num_disks;
1799 super->anchor->num_disks++;
1800 }
1801 set_imsm_ord_tbl_ent(map, dk->number, dl->index);
1802 status = CONFIGURED_DISK | USABLE_DISK;
1803 dl->disk.status = __cpu_to_le32(status);
1804
1805 /* if we are creating the first raid device update the family number */
1806 if (super->current_vol == 0) {
1807 __u32 sum;
1808 struct imsm_dev *_dev = __get_imsm_dev(mpb, 0);
1809 struct imsm_disk *_disk = __get_imsm_disk(mpb, dl->index);
1810
1811 *_dev = *dev;
1812 *_disk = dl->disk;
1813 sum = __gen_imsm_checksum(mpb);
1814 mpb->family_num = __cpu_to_le32(sum);
1815 }
1816 }
1817
1818 static void add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
1819 int fd, char *devname)
1820 {
1821 struct intel_super *super = st->sb;
1822 struct dl *dd;
1823 unsigned long long size;
1824 __u32 status, id;
1825 int rv;
1826 struct stat stb;
1827
1828 if (super->current_vol >= 0) {
1829 add_to_super_imsm_volume(st, dk, fd, devname);
1830 return;
1831 }
1832
1833 fstat(fd, &stb);
1834 dd = malloc(sizeof(*dd));
1835 if (!dd) {
1836 fprintf(stderr,
1837 Name ": malloc failed %s:%d.\n", __func__, __LINE__);
1838 abort();
1839 }
1840 memset(dd, 0, sizeof(*dd));
1841 dd->major = major(stb.st_rdev);
1842 dd->minor = minor(stb.st_rdev);
1843 dd->index = -1;
1844 dd->devname = devname ? strdup(devname) : NULL;
1845 dd->fd = fd;
1846 rv = imsm_read_serial(fd, devname, dd->serial);
1847 if (rv) {
1848 fprintf(stderr,
1849 Name ": failed to retrieve scsi serial, aborting\n");
1850 free(dd);
1851 abort();
1852 }
1853
1854 get_dev_size(fd, NULL, &size);
1855 size /= 512;
1856 status = USABLE_DISK | SPARE_DISK;
1857 serialcpy(dd->disk.serial, dd->serial);
1858 dd->disk.total_blocks = __cpu_to_le32(size);
1859 dd->disk.status = __cpu_to_le32(status);
1860 if (sysfs_disk_to_scsi_id(fd, &id) == 0)
1861 dd->disk.scsi_id = __cpu_to_le32(id);
1862 else
1863 dd->disk.scsi_id = __cpu_to_le32(0);
1864
1865 if (st->update_tail) {
1866 dd->next = super->add;
1867 super->add = dd;
1868 } else {
1869 dd->next = super->disks;
1870 super->disks = dd;
1871 }
1872 }
1873
1874 static int store_imsm_mpb(int fd, struct intel_super *super);
1875
1876 /* spare records have their own family number and do not have any defined raid
1877 * devices
1878 */
1879 static int write_super_imsm_spares(struct intel_super *super, int doclose)
1880 {
1881 struct imsm_super mpb_save;
1882 struct imsm_super *mpb = super->anchor;
1883 __u32 sum;
1884 struct dl *d;
1885
1886 mpb_save = *mpb;
1887 mpb->num_raid_devs = 0;
1888 mpb->num_disks = 1;
1889 mpb->mpb_size = sizeof(struct imsm_super);
1890 mpb->generation_num = __cpu_to_le32(1UL);
1891
1892 for (d = super->disks; d; d = d->next) {
1893 if (d->index != -1)
1894 continue;
1895
1896 mpb->disk[0] = d->disk;
1897 sum = __gen_imsm_checksum(mpb);
1898 mpb->family_num = __cpu_to_le32(sum);
1899 sum = __gen_imsm_checksum(mpb);
1900 mpb->check_sum = __cpu_to_le32(sum);
1901
1902 if (store_imsm_mpb(d->fd, super)) {
1903 fprintf(stderr, "%s: failed for device %d:%d %s\n",
1904 __func__, d->major, d->minor, strerror(errno));
1905 *mpb = mpb_save;
1906 return 1;
1907 }
1908 if (doclose) {
1909 close(d->fd);
1910 d->fd = -1;
1911 }
1912 }
1913
1914 *mpb = mpb_save;
1915 return 0;
1916 }
1917
1918 static int write_super_imsm(struct intel_super *super, int doclose)
1919 {
1920 struct imsm_super *mpb = super->anchor;
1921 struct dl *d;
1922 __u32 generation;
1923 __u32 sum;
1924 int spares = 0;
1925 int i;
1926 __u32 mpb_size = sizeof(struct imsm_super) - sizeof(struct imsm_disk);
1927
1928 /* 'generation' is incremented everytime the metadata is written */
1929 generation = __le32_to_cpu(mpb->generation_num);
1930 generation++;
1931 mpb->generation_num = __cpu_to_le32(generation);
1932
1933 mpb_size += sizeof(struct imsm_disk) * mpb->num_disks;
1934 for (d = super->disks; d; d = d->next) {
1935 if (d->index == -1)
1936 spares++;
1937 else
1938 mpb->disk[d->index] = d->disk;
1939 }
1940 for (d = super->missing; d; d = d->next)
1941 mpb->disk[d->index] = d->disk;
1942
1943 for (i = 0; i < mpb->num_raid_devs; i++) {
1944 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
1945
1946 imsm_copy_dev(dev, super->dev_tbl[i]);
1947 mpb_size += sizeof_imsm_dev(dev, 0);
1948 }
1949 mpb_size += __le32_to_cpu(mpb->bbm_log_size);
1950 mpb->mpb_size = __cpu_to_le32(mpb_size);
1951
1952 /* recalculate checksum */
1953 sum = __gen_imsm_checksum(mpb);
1954 mpb->check_sum = __cpu_to_le32(sum);
1955
1956 /* write the mpb for disks that compose raid devices */
1957 for (d = super->disks; d ; d = d->next) {
1958 if (d->index < 0)
1959 continue;
1960 if (store_imsm_mpb(d->fd, super))
1961 fprintf(stderr, "%s: failed for device %d:%d %s\n",
1962 __func__, d->major, d->minor, strerror(errno));
1963 if (doclose) {
1964 close(d->fd);
1965 d->fd = -1;
1966 }
1967 }
1968
1969 if (spares)
1970 return write_super_imsm_spares(super, doclose);
1971
1972 return 0;
1973 }
1974
1975
1976 static int create_array(struct supertype *st)
1977 {
1978 size_t len;
1979 struct imsm_update_create_array *u;
1980 struct intel_super *super = st->sb;
1981 struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
1982
1983 len = sizeof(*u) - sizeof(*dev) + sizeof_imsm_dev(dev, 0);
1984 u = malloc(len);
1985 if (!u) {
1986 fprintf(stderr, "%s: failed to allocate update buffer\n",
1987 __func__);
1988 return 1;
1989 }
1990
1991 u->type = update_create_array;
1992 u->dev_idx = super->current_vol;
1993 imsm_copy_dev(&u->dev, dev);
1994 append_metadata_update(st, u, len);
1995
1996 return 0;
1997 }
1998
1999 static int _add_disk(struct supertype *st)
2000 {
2001 struct intel_super *super = st->sb;
2002 size_t len;
2003 struct imsm_update_add_disk *u;
2004
2005 if (!super->add)
2006 return 0;
2007
2008 len = sizeof(*u);
2009 u = malloc(len);
2010 if (!u) {
2011 fprintf(stderr, "%s: failed to allocate update buffer\n",
2012 __func__);
2013 return 1;
2014 }
2015
2016 u->type = update_add_disk;
2017 append_metadata_update(st, u, len);
2018
2019 return 0;
2020 }
2021
2022 static int write_init_super_imsm(struct supertype *st)
2023 {
2024 if (st->update_tail) {
2025 /* queue the recently created array / added disk
2026 * as a metadata update */
2027 struct intel_super *super = st->sb;
2028 struct dl *d;
2029 int rv;
2030
2031 /* determine if we are creating a volume or adding a disk */
2032 if (super->current_vol < 0) {
2033 /* in the add disk case we are running in mdmon
2034 * context, so don't close fd's
2035 */
2036 return _add_disk(st);
2037 } else
2038 rv = create_array(st);
2039
2040 for (d = super->disks; d ; d = d->next) {
2041 close(d->fd);
2042 d->fd = -1;
2043 }
2044
2045 return rv;
2046 } else
2047 return write_super_imsm(st->sb, 1);
2048 }
2049 #endif
2050
2051 static int store_zero_imsm(struct supertype *st, int fd)
2052 {
2053 unsigned long long dsize;
2054 void *buf;
2055
2056 get_dev_size(fd, NULL, &dsize);
2057
2058 /* first block is stored on second to last sector of the disk */
2059 if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0)
2060 return 1;
2061
2062 if (posix_memalign(&buf, 512, 512) != 0)
2063 return 1;
2064
2065 memset(buf, 0, 512);
2066 if (write(fd, buf, 512) != 512)
2067 return 1;
2068 return 0;
2069 }
2070
2071 static int imsm_bbm_log_size(struct imsm_super *mpb)
2072 {
2073 return __le32_to_cpu(mpb->bbm_log_size);
2074 }
2075
2076 #ifndef MDASSEMBLE
2077 static int validate_geometry_imsm_container(struct supertype *st, int level,
2078 int layout, int raiddisks, int chunk,
2079 unsigned long long size, char *dev,
2080 unsigned long long *freesize,
2081 int verbose)
2082 {
2083 int fd;
2084 unsigned long long ldsize;
2085
2086 if (level != LEVEL_CONTAINER)
2087 return 0;
2088 if (!dev)
2089 return 1;
2090
2091 fd = open(dev, O_RDONLY|O_EXCL, 0);
2092 if (fd < 0) {
2093 if (verbose)
2094 fprintf(stderr, Name ": imsm: Cannot open %s: %s\n",
2095 dev, strerror(errno));
2096 return 0;
2097 }
2098 if (!get_dev_size(fd, dev, &ldsize)) {
2099 close(fd);
2100 return 0;
2101 }
2102 close(fd);
2103
2104 *freesize = avail_size_imsm(st, ldsize >> 9);
2105
2106 return 1;
2107 }
2108
2109 /* validate_geometry_imsm_volume - lifted from validate_geometry_ddf_bvd
2110 * FIX ME add ahci details
2111 */
2112 static int validate_geometry_imsm_volume(struct supertype *st, int level,
2113 int layout, int raiddisks, int chunk,
2114 unsigned long long size, char *dev,
2115 unsigned long long *freesize,
2116 int verbose)
2117 {
2118 struct stat stb;
2119 struct intel_super *super = st->sb;
2120 struct dl *dl;
2121 unsigned long long pos = 0;
2122 unsigned long long maxsize;
2123 struct extent *e;
2124 int i;
2125
2126 if (level == LEVEL_CONTAINER)
2127 return 0;
2128
2129 if (level == 1 && raiddisks > 2) {
2130 if (verbose)
2131 fprintf(stderr, Name ": imsm does not support more "
2132 "than 2 in a raid1 configuration\n");
2133 return 0;
2134 }
2135
2136 /* We must have the container info already read in. */
2137 if (!super)
2138 return 0;
2139
2140 if (!dev) {
2141 /* General test: make sure there is space for
2142 * 'raiddisks' device extents of size 'size' at a given
2143 * offset
2144 */
2145 unsigned long long minsize = size*2 /* convert to blocks */;
2146 unsigned long long start_offset = ~0ULL;
2147 int dcnt = 0;
2148 if (minsize == 0)
2149 minsize = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
2150 for (dl = super->disks; dl ; dl = dl->next) {
2151 int found = 0;
2152
2153 pos = 0;
2154 i = 0;
2155 e = get_extents(super, dl);
2156 if (!e) continue;
2157 do {
2158 unsigned long long esize;
2159 esize = e[i].start - pos;
2160 if (esize >= minsize)
2161 found = 1;
2162 if (found && start_offset == ~0ULL) {
2163 start_offset = pos;
2164 break;
2165 } else if (found && pos != start_offset) {
2166 found = 0;
2167 break;
2168 }
2169 pos = e[i].start + e[i].size;
2170 i++;
2171 } while (e[i-1].size);
2172 if (found)
2173 dcnt++;
2174 free(e);
2175 }
2176 if (dcnt < raiddisks) {
2177 if (verbose)
2178 fprintf(stderr, Name ": imsm: Not enough "
2179 "devices with space for this array "
2180 "(%d < %d)\n",
2181 dcnt, raiddisks);
2182 return 0;
2183 }
2184 return 1;
2185 }
2186 /* This device must be a member of the set */
2187 if (stat(dev, &stb) < 0)
2188 return 0;
2189 if ((S_IFMT & stb.st_mode) != S_IFBLK)
2190 return 0;
2191 for (dl = super->disks ; dl ; dl = dl->next) {
2192 if (dl->major == major(stb.st_rdev) &&
2193 dl->minor == minor(stb.st_rdev))
2194 break;
2195 }
2196 if (!dl) {
2197 if (verbose)
2198 fprintf(stderr, Name ": %s is not in the "
2199 "same imsm set\n", dev);
2200 return 0;
2201 }
2202 e = get_extents(super, dl);
2203 maxsize = 0;
2204 i = 0;
2205 if (e) do {
2206 unsigned long long esize;
2207 esize = e[i].start - pos;
2208 if (esize >= maxsize)
2209 maxsize = esize;
2210 pos = e[i].start + e[i].size;
2211 i++;
2212 } while (e[i-1].size);
2213 *freesize = maxsize;
2214
2215 return 1;
2216 }
2217
2218 static int validate_geometry_imsm(struct supertype *st, int level, int layout,
2219 int raiddisks, int chunk, unsigned long long size,
2220 char *dev, unsigned long long *freesize,
2221 int verbose)
2222 {
2223 int fd, cfd;
2224 struct mdinfo *sra;
2225
2226 /* if given unused devices create a container
2227 * if given given devices in a container create a member volume
2228 */
2229 if (level == LEVEL_CONTAINER) {
2230 /* Must be a fresh device to add to a container */
2231 return validate_geometry_imsm_container(st, level, layout,
2232 raiddisks, chunk, size,
2233 dev, freesize,
2234 verbose);
2235 }
2236
2237 if (st->sb) {
2238 /* creating in a given container */
2239 return validate_geometry_imsm_volume(st, level, layout,
2240 raiddisks, chunk, size,
2241 dev, freesize, verbose);
2242 }
2243
2244 /* limit creation to the following levels */
2245 if (!dev)
2246 switch (level) {
2247 case 0:
2248 case 1:
2249 case 10:
2250 case 5:
2251 break;
2252 default:
2253 return 1;
2254 }
2255
2256 /* This device needs to be a device in an 'imsm' container */
2257 fd = open(dev, O_RDONLY|O_EXCL, 0);
2258 if (fd >= 0) {
2259 if (verbose)
2260 fprintf(stderr,
2261 Name ": Cannot create this array on device %s\n",
2262 dev);
2263 close(fd);
2264 return 0;
2265 }
2266 if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
2267 if (verbose)
2268 fprintf(stderr, Name ": Cannot open %s: %s\n",
2269 dev, strerror(errno));
2270 return 0;
2271 }
2272 /* Well, it is in use by someone, maybe an 'imsm' container. */
2273 cfd = open_container(fd);
2274 if (cfd < 0) {
2275 close(fd);
2276 if (verbose)
2277 fprintf(stderr, Name ": Cannot use %s: It is busy\n",
2278 dev);
2279 return 0;
2280 }
2281 sra = sysfs_read(cfd, 0, GET_VERSION);
2282 close(fd);
2283 if (sra && sra->array.major_version == -1 &&
2284 strcmp(sra->text_version, "imsm") == 0) {
2285 /* This is a member of a imsm container. Load the container
2286 * and try to create a volume
2287 */
2288 struct intel_super *super;
2289
2290 if (load_super_imsm_all(st, cfd, (void **) &super, NULL, 1) == 0) {
2291 st->sb = super;
2292 st->container_dev = fd2devnum(cfd);
2293 close(cfd);
2294 return validate_geometry_imsm_volume(st, level, layout,
2295 raiddisks, chunk,
2296 size, dev,
2297 freesize, verbose);
2298 }
2299 close(cfd);
2300 } else /* may belong to another container */
2301 return 0;
2302
2303 return 1;
2304 }
2305 #endif /* MDASSEMBLE */
2306
2307 static struct mdinfo *container_content_imsm(struct supertype *st)
2308 {
2309 /* Given a container loaded by load_super_imsm_all,
2310 * extract information about all the arrays into
2311 * an mdinfo tree.
2312 *
2313 * For each imsm_dev create an mdinfo, fill it in,
2314 * then look for matching devices in super->disks
2315 * and create appropriate device mdinfo.
2316 */
2317 struct intel_super *super = st->sb;
2318 struct imsm_super *mpb = super->anchor;
2319 struct mdinfo *rest = NULL;
2320 int i;
2321
2322 /* do not assemble arrays that might have bad blocks */
2323 if (imsm_bbm_log_size(super->anchor)) {
2324 fprintf(stderr, Name ": BBM log found in metadata. "
2325 "Cannot activate array(s).\n");
2326 return NULL;
2327 }
2328
2329 for (i = 0; i < mpb->num_raid_devs; i++) {
2330 struct imsm_dev *dev = get_imsm_dev(super, i);
2331 struct imsm_map *map = get_imsm_map(dev, 0);
2332 struct mdinfo *this;
2333 int slot;
2334
2335 this = malloc(sizeof(*this));
2336 memset(this, 0, sizeof(*this));
2337 this->next = rest;
2338
2339 super->current_vol = i;
2340 getinfo_super_imsm_volume(st, this);
2341 for (slot = 0 ; slot < map->num_members; slot++) {
2342 struct mdinfo *info_d;
2343 struct dl *d;
2344 int idx;
2345 int skip;
2346 __u32 s;
2347 __u32 ord;
2348
2349 skip = 0;
2350 idx = get_imsm_disk_idx(dev, slot);
2351 ord = get_imsm_ord_tbl_ent(dev, slot);
2352 for (d = super->disks; d ; d = d->next)
2353 if (d->index == idx)
2354 break;
2355
2356 if (d == NULL)
2357 skip = 1;
2358
2359 s = d ? __le32_to_cpu(d->disk.status) : 0;
2360 if (s & FAILED_DISK)
2361 skip = 1;
2362 if (!(s & USABLE_DISK))
2363 skip = 1;
2364 if (ord & IMSM_ORD_REBUILD)
2365 skip = 1;
2366
2367 /*
2368 * if we skip some disks the array will be assmebled degraded;
2369 * reset resync start to avoid a dirty-degraded situation
2370 *
2371 * FIXME handle dirty degraded
2372 */
2373 if (skip && !dev->vol.dirty)
2374 this->resync_start = ~0ULL;
2375 if (skip)
2376 continue;
2377
2378 info_d = malloc(sizeof(*info_d));
2379 if (!info_d) {
2380 fprintf(stderr, Name ": failed to allocate disk"
2381 " for volume %s\n", (char *) dev->volume);
2382 free(this);
2383 this = rest;
2384 break;
2385 }
2386 memset(info_d, 0, sizeof(*info_d));
2387 info_d->next = this->devs;
2388 this->devs = info_d;
2389
2390 info_d->disk.number = d->index;
2391 info_d->disk.major = d->major;
2392 info_d->disk.minor = d->minor;
2393 info_d->disk.raid_disk = slot;
2394
2395 this->array.working_disks++;
2396
2397 info_d->events = __le32_to_cpu(mpb->generation_num);
2398 info_d->data_offset = __le32_to_cpu(map->pba_of_lba0);
2399 info_d->component_size = __le32_to_cpu(map->blocks_per_member);
2400 if (d->devname)
2401 strcpy(info_d->name, d->devname);
2402 }
2403 rest = this;
2404 }
2405
2406 return rest;
2407 }
2408
2409
2410 #ifndef MDASSEMBLE
2411 static int imsm_open_new(struct supertype *c, struct active_array *a,
2412 char *inst)
2413 {
2414 struct intel_super *super = c->sb;
2415 struct imsm_super *mpb = super->anchor;
2416
2417 if (atoi(inst) >= mpb->num_raid_devs) {
2418 fprintf(stderr, "%s: subarry index %d, out of range\n",
2419 __func__, atoi(inst));
2420 return -ENODEV;
2421 }
2422
2423 dprintf("imsm: open_new %s\n", inst);
2424 a->info.container_member = atoi(inst);
2425 return 0;
2426 }
2427
2428 static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, int failed)
2429 {
2430 struct imsm_map *map = get_imsm_map(dev, 0);
2431
2432 if (!failed)
2433 return map->map_state == IMSM_T_STATE_UNINITIALIZED ?
2434 IMSM_T_STATE_UNINITIALIZED : IMSM_T_STATE_NORMAL;
2435
2436 switch (get_imsm_raid_level(map)) {
2437 case 0:
2438 return IMSM_T_STATE_FAILED;
2439 break;
2440 case 1:
2441 if (failed < map->num_members)
2442 return IMSM_T_STATE_DEGRADED;
2443 else
2444 return IMSM_T_STATE_FAILED;
2445 break;
2446 case 10:
2447 {
2448 /**
2449 * check to see if any mirrors have failed, otherwise we
2450 * are degraded. Even numbered slots are mirrored on
2451 * slot+1
2452 */
2453 int i;
2454 int insync;
2455
2456 for (i = 0; i < map->num_members; i++) {
2457 __u32 ord = get_imsm_ord_tbl_ent(dev, i);
2458 int idx = ord_to_idx(ord);
2459 struct imsm_disk *disk;
2460
2461 /* reset the potential in-sync count on even-numbered
2462 * slots. num_copies is always 2 for imsm raid10
2463 */
2464 if ((i & 1) == 0)
2465 insync = 2;
2466
2467 disk = get_imsm_disk(super, idx);
2468 if (!disk ||
2469 __le32_to_cpu(disk->status) & FAILED_DISK ||
2470 ord & IMSM_ORD_REBUILD)
2471 insync--;
2472
2473 /* no in-sync disks left in this mirror the
2474 * array has failed
2475 */
2476 if (insync == 0)
2477 return IMSM_T_STATE_FAILED;
2478 }
2479
2480 return IMSM_T_STATE_DEGRADED;
2481 }
2482 case 5:
2483 if (failed < 2)
2484 return IMSM_T_STATE_DEGRADED;
2485 else
2486 return IMSM_T_STATE_FAILED;
2487 break;
2488 default:
2489 break;
2490 }
2491
2492 return map->map_state;
2493 }
2494
2495 static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev)
2496 {
2497 int i;
2498 int failed = 0;
2499 struct imsm_disk *disk;
2500 struct imsm_map *map = get_imsm_map(dev, 0);
2501
2502 for (i = 0; i < map->num_members; i++) {
2503 __u32 ord = get_imsm_ord_tbl_ent(dev, i);
2504 int idx = ord_to_idx(ord);
2505
2506 disk = get_imsm_disk(super, idx);
2507 if (!disk ||
2508 __le32_to_cpu(disk->status) & FAILED_DISK ||
2509 ord & IMSM_ORD_REBUILD)
2510 failed++;
2511 }
2512
2513 return failed;
2514 }
2515
2516 static int is_resyncing(struct imsm_dev *dev)
2517 {
2518 struct imsm_map *migr_map;
2519
2520 if (!dev->vol.migr_state)
2521 return 0;
2522
2523 if (dev->vol.migr_type == 0)
2524 return 1;
2525
2526 migr_map = get_imsm_map(dev, 1);
2527
2528 if (migr_map->map_state == IMSM_T_STATE_NORMAL)
2529 return 1;
2530 else
2531 return 0;
2532 }
2533
2534 static int is_rebuilding(struct imsm_dev *dev)
2535 {
2536 struct imsm_map *migr_map;
2537
2538 if (!dev->vol.migr_state)
2539 return 0;
2540
2541 if (dev->vol.migr_type == 0)
2542 return 0;
2543
2544 migr_map = get_imsm_map(dev, 1);
2545
2546 if (migr_map->map_state == IMSM_T_STATE_DEGRADED)
2547 return 1;
2548 else
2549 return 0;
2550 }
2551
2552 static void mark_failure(struct imsm_disk *disk)
2553 {
2554 __u32 status = __le32_to_cpu(disk->status);
2555
2556 if (status & FAILED_DISK)
2557 return;
2558 status |= FAILED_DISK;
2559 disk->status = __cpu_to_le32(status);
2560 disk->scsi_id = __cpu_to_le32(~(__u32)0);
2561 memmove(&disk->serial[0], &disk->serial[1], MAX_RAID_SERIAL_LEN - 1);
2562 }
2563
2564 /* Handle dirty -> clean transititions and resync. Degraded and rebuild
2565 * states are handled in imsm_set_disk() with one exception, when a
2566 * resync is stopped due to a new failure this routine will set the
2567 * 'degraded' state for the array.
2568 */
2569 static int imsm_set_array_state(struct active_array *a, int consistent)
2570 {
2571 int inst = a->info.container_member;
2572 struct intel_super *super = a->container->sb;
2573 struct imsm_dev *dev = get_imsm_dev(super, inst);
2574 struct imsm_map *map = get_imsm_map(dev, 0);
2575 int failed = imsm_count_failed(super, dev);
2576 __u8 map_state = imsm_check_degraded(super, dev, failed);
2577
2578 /* before we activate this array handle any missing disks */
2579 if (consistent == 2 && super->missing) {
2580 struct dl *dl;
2581
2582 dprintf("imsm: mark missing\n");
2583 end_migration(dev, map_state);
2584 for (dl = super->missing; dl; dl = dl->next)
2585 mark_failure(&dl->disk);
2586 super->updates_pending++;
2587 }
2588
2589 if (consistent == 2 &&
2590 (!is_resync_complete(a) ||
2591 map_state != IMSM_T_STATE_NORMAL ||
2592 dev->vol.migr_state))
2593 consistent = 0;
2594
2595 if (is_resync_complete(a)) {
2596 /* complete intialization / resync,
2597 * recovery is completed in ->set_disk
2598 */
2599 if (is_resyncing(dev)) {
2600 dprintf("imsm: mark resync done\n");
2601 end_migration(dev, map_state);
2602 super->updates_pending++;
2603 }
2604 } else if (!is_resyncing(dev) && !failed) {
2605 /* mark the start of the init process if nothing is failed */
2606 dprintf("imsm: mark resync start (%llu)\n", a->resync_start);
2607 map->map_state = map_state;
2608 migrate(dev, IMSM_T_STATE_NORMAL,
2609 map->map_state == IMSM_T_STATE_NORMAL);
2610 super->updates_pending++;
2611 }
2612
2613 /* check if we can update the migration checkpoint */
2614 if (dev->vol.migr_state &&
2615 __le32_to_cpu(dev->vol.curr_migr_unit) != a->resync_start) {
2616 dprintf("imsm: checkpoint migration (%llu)\n", a->resync_start);
2617 dev->vol.curr_migr_unit = __cpu_to_le32(a->resync_start);
2618 super->updates_pending++;
2619 }
2620
2621 /* mark dirty / clean */
2622 if (dev->vol.dirty != !consistent) {
2623 dprintf("imsm: mark '%s' (%llu)\n",
2624 consistent ? "clean" : "dirty", a->resync_start);
2625 if (consistent)
2626 dev->vol.dirty = 0;
2627 else
2628 dev->vol.dirty = 1;
2629 super->updates_pending++;
2630 }
2631 return consistent;
2632 }
2633
2634 static void imsm_set_disk(struct active_array *a, int n, int state)
2635 {
2636 int inst = a->info.container_member;
2637 struct intel_super *super = a->container->sb;
2638 struct imsm_dev *dev = get_imsm_dev(super, inst);
2639 struct imsm_map *map = get_imsm_map(dev, 0);
2640 struct imsm_disk *disk;
2641 int failed;
2642 __u32 status;
2643 __u32 ord;
2644 __u8 map_state;
2645
2646 if (n > map->num_members)
2647 fprintf(stderr, "imsm: set_disk %d out of range 0..%d\n",
2648 n, map->num_members - 1);
2649
2650 if (n < 0)
2651 return;
2652
2653 dprintf("imsm: set_disk %d:%x\n", n, state);
2654
2655 ord = get_imsm_ord_tbl_ent(dev, n);
2656 disk = get_imsm_disk(super, ord_to_idx(ord));
2657
2658 /* check for new failures */
2659 status = __le32_to_cpu(disk->status);
2660 if ((state & DS_FAULTY) && !(status & FAILED_DISK)) {
2661 mark_failure(disk);
2662 super->updates_pending++;
2663 }
2664
2665 /* check if in_sync */
2666 if (state & DS_INSYNC && ord & IMSM_ORD_REBUILD) {
2667 struct imsm_map *migr_map = get_imsm_map(dev, 1);
2668
2669 set_imsm_ord_tbl_ent(migr_map, n, ord_to_idx(ord));
2670 super->updates_pending++;
2671 }
2672
2673 failed = imsm_count_failed(super, dev);
2674 map_state = imsm_check_degraded(super, dev, failed);
2675
2676 /* check if recovery complete, newly degraded, or failed */
2677 if (map_state == IMSM_T_STATE_NORMAL && is_rebuilding(dev)) {
2678 end_migration(dev, map_state);
2679 super->updates_pending++;
2680 } else if (map_state == IMSM_T_STATE_DEGRADED &&
2681 map->map_state != map_state &&
2682 !dev->vol.migr_state) {
2683 dprintf("imsm: mark degraded\n");
2684 map->map_state = map_state;
2685 super->updates_pending++;
2686 } else if (map_state == IMSM_T_STATE_FAILED &&
2687 map->map_state != map_state) {
2688 dprintf("imsm: mark failed\n");
2689 end_migration(dev, map_state);
2690 super->updates_pending++;
2691 }
2692 }
2693
2694 static int store_imsm_mpb(int fd, struct intel_super *super)
2695 {
2696 struct imsm_super *mpb = super->anchor;
2697 __u32 mpb_size = __le32_to_cpu(mpb->mpb_size);
2698 unsigned long long dsize;
2699 unsigned long long sectors;
2700
2701 get_dev_size(fd, NULL, &dsize);
2702
2703 if (mpb_size > 512) {
2704 /* -1 to account for anchor */
2705 sectors = mpb_sectors(mpb) - 1;
2706
2707 /* write the extended mpb to the sectors preceeding the anchor */
2708 if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0)
2709 return 1;
2710
2711 if (write(fd, super->buf + 512, 512 * sectors) != 512 * sectors)
2712 return 1;
2713 }
2714
2715 /* first block is stored on second to last sector of the disk */
2716 if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0)
2717 return 1;
2718
2719 if (write(fd, super->buf, 512) != 512)
2720 return 1;
2721
2722 return 0;
2723 }
2724
2725 static void imsm_sync_metadata(struct supertype *container)
2726 {
2727 struct intel_super *super = container->sb;
2728
2729 if (!super->updates_pending)
2730 return;
2731
2732 write_super_imsm(super, 0);
2733
2734 super->updates_pending = 0;
2735 }
2736
2737 static struct dl *imsm_readd(struct intel_super *super, int idx, struct active_array *a)
2738 {
2739 struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
2740 int i = get_imsm_disk_idx(dev, idx);
2741 struct dl *dl;
2742
2743 for (dl = super->disks; dl; dl = dl->next)
2744 if (dl->index == i)
2745 break;
2746
2747 if (dl && __le32_to_cpu(dl->disk.status) & FAILED_DISK)
2748 dl = NULL;
2749
2750 if (dl)
2751 dprintf("%s: found %x:%x\n", __func__, dl->major, dl->minor);
2752
2753 return dl;
2754 }
2755
2756 static struct dl *imsm_add_spare(struct intel_super *super, int slot, struct active_array *a)
2757 {
2758 struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
2759 int idx = get_imsm_disk_idx(dev, slot);
2760 struct imsm_map *map = get_imsm_map(dev, 0);
2761 unsigned long long esize;
2762 unsigned long long pos;
2763 struct mdinfo *d;
2764 struct extent *ex;
2765 int j;
2766 int found;
2767 __u32 array_start;
2768 __u32 status;
2769 struct dl *dl;
2770
2771 for (dl = super->disks; dl; dl = dl->next) {
2772 /* If in this array, skip */
2773 for (d = a->info.devs ; d ; d = d->next)
2774 if (d->state_fd >= 0 &&
2775 d->disk.major == dl->major &&
2776 d->disk.minor == dl->minor) {
2777 dprintf("%x:%x already in array\n", dl->major, dl->minor);
2778 break;
2779 }
2780 if (d)
2781 continue;
2782
2783 /* skip in use or failed drives */
2784 status = __le32_to_cpu(dl->disk.status);
2785 if (status & FAILED_DISK || idx == dl->index) {
2786 dprintf("%x:%x status ( %s%s)\n",
2787 dl->major, dl->minor,
2788 status & FAILED_DISK ? "failed " : "",
2789 idx == dl->index ? "in use " : "");
2790 continue;
2791 }
2792
2793 /* Does this unused device have the requisite free space?
2794 * We need a->info.component_size sectors
2795 */
2796 ex = get_extents(super, dl);
2797 if (!ex) {
2798 dprintf("cannot get extents\n");
2799 continue;
2800 }
2801 found = 0;
2802 j = 0;
2803 pos = 0;
2804 array_start = __le32_to_cpu(map->pba_of_lba0);
2805
2806 do {
2807 /* check that we can start at pba_of_lba0 with
2808 * a->info.component_size of space
2809 */
2810 esize = ex[j].start - pos;
2811 if (array_start >= pos &&
2812 array_start + a->info.component_size < ex[j].start) {
2813 found = 1;
2814 break;
2815 }
2816 pos = ex[j].start + ex[j].size;
2817 j++;
2818
2819 } while (ex[j-1].size);
2820
2821 free(ex);
2822 if (!found) {
2823 dprintf("%x:%x does not have %llu at %d\n",
2824 dl->major, dl->minor,
2825 a->info.component_size,
2826 __le32_to_cpu(map->pba_of_lba0));
2827 /* No room */
2828 continue;
2829 } else
2830 break;
2831 }
2832
2833 return dl;
2834 }
2835
2836 static struct mdinfo *imsm_activate_spare(struct active_array *a,
2837 struct metadata_update **updates)
2838 {
2839 /**
2840 * Find a device with unused free space and use it to replace a
2841 * failed/vacant region in an array. We replace failed regions one a
2842 * array at a time. The result is that a new spare disk will be added
2843 * to the first failed array and after the monitor has finished
2844 * propagating failures the remainder will be consumed.
2845 *
2846 * FIXME add a capability for mdmon to request spares from another
2847 * container.
2848 */
2849
2850 struct intel_super *super = a->container->sb;
2851 int inst = a->info.container_member;
2852 struct imsm_dev *dev = get_imsm_dev(super, inst);
2853 struct imsm_map *map = get_imsm_map(dev, 0);
2854 int failed = a->info.array.raid_disks;
2855 struct mdinfo *rv = NULL;
2856 struct mdinfo *d;
2857 struct mdinfo *di;
2858 struct metadata_update *mu;
2859 struct dl *dl;
2860 struct imsm_update_activate_spare *u;
2861 int num_spares = 0;
2862 int i;
2863
2864 for (d = a->info.devs ; d ; d = d->next) {
2865 if ((d->curr_state & DS_FAULTY) &&
2866 d->state_fd >= 0)
2867 /* wait for Removal to happen */
2868 return NULL;
2869 if (d->state_fd >= 0)
2870 failed--;
2871 }
2872
2873 dprintf("imsm: activate spare: inst=%d failed=%d (%d) level=%d\n",
2874 inst, failed, a->info.array.raid_disks, a->info.array.level);
2875 if (imsm_check_degraded(super, dev, failed) != IMSM_T_STATE_DEGRADED)
2876 return NULL;
2877
2878 /* For each slot, if it is not working, find a spare */
2879 for (i = 0; i < a->info.array.raid_disks; i++) {
2880 for (d = a->info.devs ; d ; d = d->next)
2881 if (d->disk.raid_disk == i)
2882 break;
2883 dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
2884 if (d && (d->state_fd >= 0))
2885 continue;
2886
2887 /*
2888 * OK, this device needs recovery. Try to re-add the previous
2889 * occupant of this slot, if this fails add a new spare
2890 */
2891 dl = imsm_readd(super, i, a);
2892 if (!dl)
2893 dl = imsm_add_spare(super, i, a);
2894 if (!dl)
2895 continue;
2896
2897 /* found a usable disk with enough space */
2898 di = malloc(sizeof(*di));
2899 if (!di)
2900 continue;
2901 memset(di, 0, sizeof(*di));
2902
2903 /* dl->index will be -1 in the case we are activating a
2904 * pristine spare. imsm_process_update() will create a
2905 * new index in this case. Once a disk is found to be
2906 * failed in all member arrays it is kicked from the
2907 * metadata
2908 */
2909 di->disk.number = dl->index;
2910
2911 /* (ab)use di->devs to store a pointer to the device
2912 * we chose
2913 */
2914 di->devs = (struct mdinfo *) dl;
2915
2916 di->disk.raid_disk = i;
2917 di->disk.major = dl->major;
2918 di->disk.minor = dl->minor;
2919 di->disk.state = 0;
2920 di->data_offset = __le32_to_cpu(map->pba_of_lba0);
2921 di->component_size = a->info.component_size;
2922 di->container_member = inst;
2923 di->next = rv;
2924 rv = di;
2925 num_spares++;
2926 dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
2927 i, di->data_offset);
2928
2929 break;
2930 }
2931
2932 if (!rv)
2933 /* No spares found */
2934 return rv;
2935 /* Now 'rv' has a list of devices to return.
2936 * Create a metadata_update record to update the
2937 * disk_ord_tbl for the array
2938 */
2939 mu = malloc(sizeof(*mu));
2940 if (mu) {
2941 mu->buf = malloc(sizeof(struct imsm_update_activate_spare) * num_spares);
2942 if (mu->buf == NULL) {
2943 free(mu);
2944 mu = NULL;
2945 }
2946 }
2947 if (!mu) {
2948 while (rv) {
2949 struct mdinfo *n = rv->next;
2950
2951 free(rv);
2952 rv = n;
2953 }
2954 return NULL;
2955 }
2956
2957 mu->space = NULL;
2958 mu->len = sizeof(struct imsm_update_activate_spare) * num_spares;
2959 mu->next = *updates;
2960 u = (struct imsm_update_activate_spare *) mu->buf;
2961
2962 for (di = rv ; di ; di = di->next) {
2963 u->type = update_activate_spare;
2964 u->dl = (struct dl *) di->devs;
2965 di->devs = NULL;
2966 u->slot = di->disk.raid_disk;
2967 u->array = inst;
2968 u->next = u + 1;
2969 u++;
2970 }
2971 (u-1)->next = NULL;
2972 *updates = mu;
2973
2974 return rv;
2975 }
2976
2977 static int disks_overlap(struct imsm_dev *d1, struct imsm_dev *d2)
2978 {
2979 struct imsm_map *m1 = get_imsm_map(d1, 0);
2980 struct imsm_map *m2 = get_imsm_map(d2, 0);
2981 int i;
2982 int j;
2983 int idx;
2984
2985 for (i = 0; i < m1->num_members; i++) {
2986 idx = get_imsm_disk_idx(d1, i);
2987 for (j = 0; j < m2->num_members; j++)
2988 if (idx == get_imsm_disk_idx(d2, j))
2989 return 1;
2990 }
2991
2992 return 0;
2993 }
2994
2995 static void imsm_delete(struct intel_super *super, struct dl **dlp, int index);
2996
2997 static void imsm_process_update(struct supertype *st,
2998 struct metadata_update *update)
2999 {
3000 /**
3001 * crack open the metadata_update envelope to find the update record
3002 * update can be one of:
3003 * update_activate_spare - a spare device has replaced a failed
3004 * device in an array, update the disk_ord_tbl. If this disk is
3005 * present in all member arrays then also clear the SPARE_DISK
3006 * flag
3007 */
3008 struct intel_super *super = st->sb;
3009 struct imsm_super *mpb;
3010 enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
3011
3012 /* update requires a larger buf but the allocation failed */
3013 if (super->next_len && !super->next_buf) {
3014 super->next_len = 0;
3015 return;
3016 }
3017
3018 if (super->next_buf) {
3019 memcpy(super->next_buf, super->buf, super->len);
3020 free(super->buf);
3021 super->len = super->next_len;
3022 super->buf = super->next_buf;
3023
3024 super->next_len = 0;
3025 super->next_buf = NULL;
3026 }
3027
3028 mpb = super->anchor;
3029
3030 switch (type) {
3031 case update_activate_spare: {
3032 struct imsm_update_activate_spare *u = (void *) update->buf;
3033 struct imsm_dev *dev = get_imsm_dev(super, u->array);
3034 struct imsm_map *map = get_imsm_map(dev, 0);
3035 struct imsm_map *migr_map;
3036 struct active_array *a;
3037 struct imsm_disk *disk;
3038 __u32 status;
3039 __u8 to_state;
3040 struct dl *dl;
3041 unsigned int found;
3042 int failed;
3043 int victim = get_imsm_disk_idx(dev, u->slot);
3044 int i;
3045
3046 for (dl = super->disks; dl; dl = dl->next)
3047 if (dl == u->dl)
3048 break;
3049
3050 if (!dl) {
3051 fprintf(stderr, "error: imsm_activate_spare passed "
3052 "an unknown disk (index: %d)\n",
3053 u->dl->index);
3054 return;
3055 }
3056
3057 super->updates_pending++;
3058
3059 /* count failures (excluding rebuilds and the victim)
3060 * to determine map[0] state
3061 */
3062 failed = 0;
3063 for (i = 0; i < map->num_members; i++) {
3064 if (i == u->slot)
3065 continue;
3066 disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i));
3067 if (!disk ||
3068 __le32_to_cpu(disk->status) & FAILED_DISK)
3069 failed++;
3070 }
3071
3072 /* adding a pristine spare, assign a new index */
3073 if (dl->index < 0) {
3074 dl->index = super->anchor->num_disks;
3075 super->anchor->num_disks++;
3076 }
3077 disk = &dl->disk;
3078 status = __le32_to_cpu(disk->status);
3079 status |= CONFIGURED_DISK;
3080 status &= ~SPARE_DISK;
3081 disk->status = __cpu_to_le32(status);
3082
3083 /* mark rebuild */
3084 to_state = imsm_check_degraded(super, dev, failed);
3085 map->map_state = IMSM_T_STATE_DEGRADED;
3086 migrate(dev, to_state, 1);
3087 migr_map = get_imsm_map(dev, 1);
3088 set_imsm_ord_tbl_ent(map, u->slot, dl->index);
3089 set_imsm_ord_tbl_ent(migr_map, u->slot, dl->index | IMSM_ORD_REBUILD);
3090
3091 /* count arrays using the victim in the metadata */
3092 found = 0;
3093 for (a = st->arrays; a ; a = a->next) {
3094 dev = get_imsm_dev(super, a->info.container_member);
3095 for (i = 0; i < map->num_members; i++)
3096 if (victim == get_imsm_disk_idx(dev, i))
3097 found++;
3098 }
3099
3100 /* delete the victim if it is no longer being
3101 * utilized anywhere
3102 */
3103 if (!found) {
3104 struct dl **dlp;
3105
3106 /* We know that 'manager' isn't touching anything,
3107 * so it is safe to delete
3108 */
3109 for (dlp = &super->disks; *dlp; dlp = &(*dlp)->next)
3110 if ((*dlp)->index == victim)
3111 break;
3112
3113 /* victim may be on the missing list */
3114 if (!*dlp)
3115 for (dlp = &super->missing; *dlp; dlp = &(*dlp)->next)
3116 if ((*dlp)->index == victim)
3117 break;
3118 imsm_delete(super, dlp, victim);
3119 }
3120 break;
3121 }
3122 case update_create_array: {
3123 /* someone wants to create a new array, we need to be aware of
3124 * a few races/collisions:
3125 * 1/ 'Create' called by two separate instances of mdadm
3126 * 2/ 'Create' versus 'activate_spare': mdadm has chosen
3127 * devices that have since been assimilated via
3128 * activate_spare.
3129 * In the event this update can not be carried out mdadm will
3130 * (FIX ME) notice that its update did not take hold.
3131 */
3132 struct imsm_update_create_array *u = (void *) update->buf;
3133 struct imsm_dev *dev;
3134 struct imsm_map *map, *new_map;
3135 unsigned long long start, end;
3136 unsigned long long new_start, new_end;
3137 int i;
3138 int overlap = 0;
3139
3140 /* handle racing creates: first come first serve */
3141 if (u->dev_idx < mpb->num_raid_devs) {
3142 dprintf("%s: subarray %d already defined\n",
3143 __func__, u->dev_idx);
3144 return;
3145 }
3146
3147 /* check update is next in sequence */
3148 if (u->dev_idx != mpb->num_raid_devs) {
3149 dprintf("%s: can not create array %d expected index %d\n",
3150 __func__, u->dev_idx, mpb->num_raid_devs);
3151 return;
3152 }
3153
3154 new_map = get_imsm_map(&u->dev, 0);
3155 new_start = __le32_to_cpu(new_map->pba_of_lba0);
3156 new_end = new_start + __le32_to_cpu(new_map->blocks_per_member);
3157
3158 /* handle activate_spare versus create race:
3159 * check to make sure that overlapping arrays do not include
3160 * overalpping disks
3161 */
3162 for (i = 0; i < mpb->num_raid_devs; i++) {
3163 dev = get_imsm_dev(super, i);
3164 map = get_imsm_map(dev, 0);
3165 start = __le32_to_cpu(map->pba_of_lba0);
3166 end = start + __le32_to_cpu(map->blocks_per_member);
3167 if ((new_start >= start && new_start <= end) ||
3168 (start >= new_start && start <= new_end))
3169 overlap = 1;
3170 if (overlap && disks_overlap(dev, &u->dev)) {
3171 dprintf("%s: arrays overlap\n", __func__);
3172 return;
3173 }
3174 }
3175 /* check num_members sanity */
3176 if (new_map->num_members > mpb->num_disks) {
3177 dprintf("%s: num_disks out of range\n", __func__);
3178 return;
3179 }
3180
3181 /* check that prepare update was successful */
3182 if (!update->space) {
3183 dprintf("%s: prepare update failed\n", __func__);
3184 return;
3185 }
3186
3187 super->updates_pending++;
3188 dev = update->space;
3189 map = get_imsm_map(dev, 0);
3190 update->space = NULL;
3191 imsm_copy_dev(dev, &u->dev);
3192 map = get_imsm_map(dev, 0);
3193 super->dev_tbl[u->dev_idx] = dev;
3194 mpb->num_raid_devs++;
3195
3196 /* fix up flags */
3197 for (i = 0; i < map->num_members; i++) {
3198 struct imsm_disk *disk;
3199 __u32 status;
3200
3201 disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i));
3202 status = __le32_to_cpu(disk->status);
3203 status |= CONFIGURED_DISK;
3204 status &= ~SPARE_DISK;
3205 disk->status = __cpu_to_le32(status);
3206 }
3207 break;
3208 }
3209 case update_add_disk:
3210
3211 /* we may be able to repair some arrays if disks are
3212 * being added */
3213 if (super->add) {
3214 struct active_array *a;
3215
3216 super->updates_pending++;
3217 for (a = st->arrays; a; a = a->next)
3218 a->check_degraded = 1;
3219 }
3220 /* add some spares to the metadata */
3221 while (super->add) {
3222 struct dl *al;
3223
3224 al = super->add;
3225 super->add = al->next;
3226 al->next = super->disks;
3227 super->disks = al;
3228 dprintf("%s: added %x:%x\n",
3229 __func__, al->major, al->minor);
3230 }
3231
3232 break;
3233 }
3234 }
3235
3236 static void imsm_prepare_update(struct supertype *st,
3237 struct metadata_update *update)
3238 {
3239 /**
3240 * Allocate space to hold new disk entries, raid-device entries or a new
3241 * mpb if necessary. The manager synchronously waits for updates to
3242 * complete in the monitor, so new mpb buffers allocated here can be
3243 * integrated by the monitor thread without worrying about live pointers
3244 * in the manager thread.
3245 */
3246 enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
3247 struct intel_super *super = st->sb;
3248 struct imsm_super *mpb = super->anchor;
3249 size_t buf_len;
3250 size_t len = 0;
3251
3252 switch (type) {
3253 case update_create_array: {
3254 struct imsm_update_create_array *u = (void *) update->buf;
3255
3256 len = sizeof_imsm_dev(&u->dev, 1);
3257 update->space = malloc(len);
3258 break;
3259 default:
3260 break;
3261 }
3262 }
3263
3264 /* check if we need a larger metadata buffer */
3265 if (super->next_buf)
3266 buf_len = super->next_len;
3267 else
3268 buf_len = super->len;
3269
3270 if (__le32_to_cpu(mpb->mpb_size) + len > buf_len) {
3271 /* ok we need a larger buf than what is currently allocated
3272 * if this allocation fails process_update will notice that
3273 * ->next_len is set and ->next_buf is NULL
3274 */
3275 buf_len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + len, 512);
3276 if (super->next_buf)
3277 free(super->next_buf);
3278
3279 super->next_len = buf_len;
3280 if (posix_memalign(&super->next_buf, buf_len, 512) != 0)
3281 super->next_buf = NULL;
3282 }
3283 }
3284
3285 /* must be called while manager is quiesced */
3286 static void imsm_delete(struct intel_super *super, struct dl **dlp, int index)
3287 {
3288 struct imsm_super *mpb = super->anchor;
3289 struct dl *iter;
3290 struct imsm_dev *dev;
3291 struct imsm_map *map;
3292 int i, j, num_members;
3293 __u32 ord;
3294
3295 dprintf("%s: deleting device[%d] from imsm_super\n",
3296 __func__, index);
3297
3298 /* shift all indexes down one */
3299 for (iter = super->disks; iter; iter = iter->next)
3300 if (iter->index > index)
3301 iter->index--;
3302 for (iter = super->missing; iter; iter = iter->next)
3303 if (iter->index > index)
3304 iter->index--;
3305
3306 for (i = 0; i < mpb->num_raid_devs; i++) {
3307 dev = get_imsm_dev(super, i);
3308 map = get_imsm_map(dev, 0);
3309 num_members = map->num_members;
3310 for (j = 0; j < num_members; j++) {
3311 /* update ord entries being careful not to propagate
3312 * ord-flags to the first map
3313 */
3314 ord = get_imsm_ord_tbl_ent(dev, j);
3315
3316 if (ord_to_idx(ord) <= index)
3317 continue;
3318
3319 map = get_imsm_map(dev, 0);
3320 set_imsm_ord_tbl_ent(map, j, ord_to_idx(ord - 1));
3321 map = get_imsm_map(dev, 1);
3322 if (map)
3323 set_imsm_ord_tbl_ent(map, j, ord - 1);
3324 }
3325 }
3326
3327 mpb->num_disks--;
3328 super->updates_pending++;
3329 if (*dlp) {
3330 struct dl *dl = *dlp;
3331
3332 *dlp = (*dlp)->next;
3333 __free_imsm_disk(dl);
3334 }
3335 }
3336 #endif /* MDASSEMBLE */
3337
3338 struct superswitch super_imsm = {
3339 #ifndef MDASSEMBLE
3340 .examine_super = examine_super_imsm,
3341 .brief_examine_super = brief_examine_super_imsm,
3342 .detail_super = detail_super_imsm,
3343 .brief_detail_super = brief_detail_super_imsm,
3344 .write_init_super = write_init_super_imsm,
3345 .validate_geometry = validate_geometry_imsm,
3346 .add_to_super = add_to_super_imsm,
3347 #endif
3348 .match_home = match_home_imsm,
3349 .uuid_from_super= uuid_from_super_imsm,
3350 .getinfo_super = getinfo_super_imsm,
3351 .update_super = update_super_imsm,
3352
3353 .avail_size = avail_size_imsm,
3354
3355 .compare_super = compare_super_imsm,
3356
3357 .load_super = load_super_imsm,
3358 .init_super = init_super_imsm,
3359 .store_super = store_zero_imsm,
3360 .free_super = free_super_imsm,
3361 .match_metadata_desc = match_metadata_desc_imsm,
3362 .container_content = container_content_imsm,
3363
3364 .external = 1,
3365
3366 #ifndef MDASSEMBLE
3367 /* for mdmon */
3368 .open_new = imsm_open_new,
3369 .load_super = load_super_imsm,
3370 .set_array_state= imsm_set_array_state,
3371 .set_disk = imsm_set_disk,
3372 .sync_metadata = imsm_sync_metadata,
3373 .activate_spare = imsm_activate_spare,
3374 .process_update = imsm_process_update,
3375 .prepare_update = imsm_prepare_update,
3376 #endif /* MDASSEMBLE */
3377 };