]> git.ipfire.org Git - thirdparty/mdadm.git/blob - super-intel.c
imsm: refactor mpb handling into parse and coalesce
[thirdparty/mdadm.git] / super-intel.c
1 /*
2 * mdadm - Intel(R) Matrix Storage Manager Support
3 *
4 * Copyright (C) 2002-2007 Intel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #include "mdadm.h"
21 #include "mdmon.h"
22 #include <values.h>
23 #include <scsi/sg.h>
24 #include <ctype.h>
25
26 /* MPB == Metadata Parameter Block */
27 #define MPB_SIGNATURE "Intel Raid ISM Cfg Sig. "
28 #define MPB_SIG_LEN (strlen(MPB_SIGNATURE))
29 #define MPB_VERSION_RAID0 "1.0.00"
30 #define MPB_VERSION_RAID1 "1.1.00"
31 #define MPB_VERSION_RAID5 "1.2.02"
32 #define MAX_SIGNATURE_LENGTH 32
33 #define MAX_RAID_SERIAL_LEN 16
34 #define MPB_SECTOR_CNT 418
35 #define IMSM_RESERVED_SECTORS 4096
36
37 /* Disk configuration info. */
38 #define IMSM_MAX_DEVICES 255
39 struct imsm_disk {
40 __u8 serial[MAX_RAID_SERIAL_LEN];/* 0xD8 - 0xE7 ascii serial number */
41 __u32 total_blocks; /* 0xE8 - 0xEB total blocks */
42 __u32 scsi_id; /* 0xEC - 0xEF scsi ID */
43 __u32 status; /* 0xF0 - 0xF3 */
44 #define SPARE_DISK 0x01 /* Spare */
45 #define CONFIGURED_DISK 0x02 /* Member of some RaidDev */
46 #define FAILED_DISK 0x04 /* Permanent failure */
47 #define USABLE_DISK 0x08 /* Fully usable unless FAILED_DISK is set */
48
49 #define IMSM_DISK_FILLERS 5
50 __u32 filler[IMSM_DISK_FILLERS]; /* 0xF4 - 0x107 MPB_DISK_FILLERS for future expansion */
51 };
52
53 /* RAID map configuration infos. */
54 struct imsm_map {
55 __u32 pba_of_lba0; /* start address of partition */
56 __u32 blocks_per_member;/* blocks per member */
57 __u32 num_data_stripes; /* number of data stripes */
58 __u16 blocks_per_strip;
59 __u8 map_state; /* Normal, Uninitialized, Degraded, Failed */
60 #define IMSM_T_STATE_NORMAL 0
61 #define IMSM_T_STATE_UNINITIALIZED 1
62 #define IMSM_T_STATE_DEGRADED 2 /* FIXME: is this correct? */
63 #define IMSM_T_STATE_FAILED 3 /* FIXME: is this correct? */
64 __u8 raid_level;
65 #define IMSM_T_RAID0 0
66 #define IMSM_T_RAID1 1
67 #define IMSM_T_RAID5 5 /* since metadata version 1.2.02 ? */
68 __u8 num_members; /* number of member disks */
69 __u8 reserved[3];
70 __u32 filler[7]; /* expansion area */
71 __u32 disk_ord_tbl[1]; /* disk_ord_tbl[num_members],
72 top byte special */
73 } __attribute__ ((packed));
74
75 struct imsm_vol {
76 __u32 reserved[2];
77 __u8 migr_state; /* Normal or Migrating */
78 __u8 migr_type; /* Initializing, Rebuilding, ... */
79 __u8 dirty;
80 __u8 fill[1];
81 __u32 filler[5];
82 struct imsm_map map[1];
83 /* here comes another one if migr_state */
84 } __attribute__ ((packed));
85
86 struct imsm_dev {
87 __u8 volume[MAX_RAID_SERIAL_LEN];
88 __u32 size_low;
89 __u32 size_high;
90 __u32 status; /* Persistent RaidDev status */
91 __u32 reserved_blocks; /* Reserved blocks at beginning of volume */
92 #define IMSM_DEV_FILLERS 12
93 __u32 filler[IMSM_DEV_FILLERS];
94 struct imsm_vol vol;
95 } __attribute__ ((packed));
96
97 struct imsm_super {
98 __u8 sig[MAX_SIGNATURE_LENGTH]; /* 0x00 - 0x1F */
99 __u32 check_sum; /* 0x20 - 0x23 MPB Checksum */
100 __u32 mpb_size; /* 0x24 - 0x27 Size of MPB */
101 __u32 family_num; /* 0x28 - 0x2B Checksum from first time this config was written */
102 __u32 generation_num; /* 0x2C - 0x2F Incremented each time this array's MPB is written */
103 __u32 reserved[2]; /* 0x30 - 0x37 */
104 __u8 num_disks; /* 0x38 Number of configured disks */
105 __u8 num_raid_devs; /* 0x39 Number of configured volumes */
106 __u8 fill[2]; /* 0x3A - 0x3B */
107 #define IMSM_FILLERS 39
108 __u32 filler[IMSM_FILLERS]; /* 0x3C - 0xD7 RAID_MPB_FILLERS */
109 struct imsm_disk disk[1]; /* 0xD8 diskTbl[numDisks] */
110 /* here comes imsm_dev[num_raid_devs] */
111 } __attribute__ ((packed));
112
113 #ifndef MDASSEMBLE
114 static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" };
115 #endif
116
117 static unsigned int sector_count(__u32 bytes)
118 {
119 return ((bytes + (512-1)) & (~(512-1))) / 512;
120 }
121
122 static unsigned int mpb_sectors(struct imsm_super *mpb)
123 {
124 return sector_count(__le32_to_cpu(mpb->mpb_size));
125 }
126
127 /* internal representation of IMSM metadata */
128 struct intel_super {
129 union {
130 void *buf; /* O_DIRECT buffer for reading/writing metadata */
131 struct imsm_super *anchor; /* immovable parameters */
132 };
133 size_t len; /* size of the 'buf' allocation */
134 int updates_pending; /* count of pending updates for mdmon */
135 int creating_imsm; /* flag to indicate container creation */
136 int current_vol; /* index of raid device undergoing creation */
137 #define IMSM_MAX_DISKS 6
138 struct imsm_disk *disk_tbl[IMSM_MAX_DISKS];
139 #define IMSM_MAX_RAID_DEVS 2
140 struct imsm_dev *dev_tbl[IMSM_MAX_RAID_DEVS];
141 struct dl {
142 struct dl *next;
143 int index;
144 __u8 serial[MAX_RAID_SERIAL_LEN];
145 int major, minor;
146 char *devname;
147 int fd;
148 } *disks;
149 };
150
151 struct extent {
152 unsigned long long start, size;
153 };
154
155 /* definition of messages passed to imsm_process_update */
156 enum imsm_update_type {
157 update_activate_spare,
158 update_create_array,
159 };
160
161 struct imsm_update_activate_spare {
162 enum imsm_update_type type;
163 int disk_idx;
164 int slot;
165 int array;
166 struct imsm_update_activate_spare *next;
167 };
168
169 struct imsm_update_create_array {
170 enum imsm_update_type type;
171 struct imsm_dev dev;
172 int dev_idx;
173 };
174
175 static int imsm_env_devname_as_serial(void)
176 {
177 char *val = getenv("IMSM_DEVNAME_AS_SERIAL");
178
179 if (val && atoi(val) == 1)
180 return 1;
181
182 return 0;
183 }
184
185
186 static struct supertype *match_metadata_desc_imsm(char *arg)
187 {
188 struct supertype *st;
189
190 if (strcmp(arg, "imsm") != 0 &&
191 strcmp(arg, "default") != 0
192 )
193 return NULL;
194
195 st = malloc(sizeof(*st));
196 memset(st, 0, sizeof(*st));
197 st->ss = &super_imsm;
198 st->max_devs = IMSM_MAX_DEVICES;
199 st->minor_version = 0;
200 st->sb = NULL;
201 return st;
202 }
203
204 static __u8 *get_imsm_version(struct imsm_super *mpb)
205 {
206 return &mpb->sig[MPB_SIG_LEN];
207 }
208
209 /* retrieve a disk directly from the anchor when the anchor is known to be
210 * up-to-date, currently only at load time
211 */
212 static struct imsm_disk *__get_imsm_disk(struct imsm_super *mpb, __u8 index)
213 {
214 if (index >= mpb->num_disks)
215 return NULL;
216 return &mpb->disk[index];
217 }
218
219 static struct imsm_disk *get_imsm_disk(struct intel_super *super, __u8 index)
220 {
221 if (index >= super->anchor->num_disks)
222 return NULL;
223 return super->disk_tbl[index];
224 }
225
226 /* generate a checksum directly from the anchor when the anchor is known to be
227 * up-to-date, currently only at load or write_super after coalescing
228 */
229 static __u32 __gen_imsm_checksum(struct imsm_super *mpb)
230 {
231 __u32 end = mpb->mpb_size / sizeof(end);
232 __u32 *p = (__u32 *) mpb;
233 __u32 sum = 0;
234
235 while (end--)
236 sum += __le32_to_cpu(*p++);
237
238 return sum - __le32_to_cpu(mpb->check_sum);
239 }
240
241 static size_t sizeof_imsm_dev(struct imsm_dev *dev)
242 {
243 size_t size = sizeof(*dev);
244
245 /* each map has disk_ord_tbl[num_members - 1] additional space */
246 size += sizeof(__u32) * (dev->vol.map[0].num_members - 1);
247
248 /* migrating means an additional map */
249 if (dev->vol.migr_state) {
250 size += sizeof(struct imsm_map);
251 size += sizeof(__u32) * (dev->vol.map[1].num_members - 1);
252 }
253
254 return size;
255 }
256
257 static struct imsm_dev *__get_imsm_dev(struct imsm_super *mpb, __u8 index)
258 {
259 int offset;
260 int i;
261 void *_mpb = mpb;
262
263 if (index >= mpb->num_raid_devs)
264 return NULL;
265
266 /* devices start after all disks */
267 offset = ((void *) &mpb->disk[mpb->num_disks]) - _mpb;
268
269 for (i = 0; i <= index; i++)
270 if (i == index)
271 return _mpb + offset;
272 else
273 offset += sizeof_imsm_dev(_mpb + offset);
274
275 return NULL;
276 }
277
278 static struct imsm_dev *get_imsm_dev(struct intel_super *super, __u8 index)
279 {
280 if (index >= super->anchor->num_raid_devs)
281 return NULL;
282 return super->dev_tbl[index];
283 }
284
285 static __u32 get_imsm_disk_idx(struct imsm_map *map, int slot)
286 {
287 __u32 *ord_tbl = &map->disk_ord_tbl[slot];
288
289 /* top byte is 'special' */
290 return __le32_to_cpu(*ord_tbl & ~(0xff << 24));
291 }
292
293 static int get_imsm_raid_level(struct imsm_map *map)
294 {
295 if (map->raid_level == 1) {
296 if (map->num_members == 2)
297 return 1;
298 else
299 return 10;
300 }
301
302 return map->raid_level;
303 }
304
305 static int cmp_extent(const void *av, const void *bv)
306 {
307 const struct extent *a = av;
308 const struct extent *b = bv;
309 if (a->start < b->start)
310 return -1;
311 if (a->start > b->start)
312 return 1;
313 return 0;
314 }
315
316 static struct extent *get_extents(struct intel_super *super, struct dl *dl)
317 {
318 /* find a list of used extents on the given physical device */
319 struct imsm_disk *disk;
320 struct extent *rv, *e;
321 int i, j;
322 int memberships = 0;
323
324 disk = get_imsm_disk(super, dl->index);
325 if (!disk)
326 return NULL;
327
328 for (i = 0; i < super->anchor->num_raid_devs; i++) {
329 struct imsm_dev *dev = get_imsm_dev(super, i);
330 struct imsm_map *map = dev->vol.map;
331
332 for (j = 0; j < map->num_members; j++) {
333 __u32 index = get_imsm_disk_idx(map, j);
334
335 if (index == dl->index)
336 memberships++;
337 }
338 }
339 rv = malloc(sizeof(struct extent) * (memberships + 1));
340 if (!rv)
341 return NULL;
342 e = rv;
343
344 for (i = 0; i < super->anchor->num_raid_devs; i++) {
345 struct imsm_dev *dev = get_imsm_dev(super, i);
346 struct imsm_map *map = dev->vol.map;
347
348 for (j = 0; j < map->num_members; j++) {
349 __u32 index = get_imsm_disk_idx(map, j);
350
351 if (index == dl->index) {
352 e->start = __le32_to_cpu(map->pba_of_lba0);
353 e->size = __le32_to_cpu(map->blocks_per_member);
354 e++;
355 }
356 }
357 }
358 qsort(rv, memberships, sizeof(*rv), cmp_extent);
359
360 e->start = __le32_to_cpu(disk->total_blocks) -
361 (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS);
362 e->size = 0;
363 return rv;
364 }
365
366 #ifndef MDASSEMBLE
367 static void print_imsm_dev(struct imsm_dev *dev, int index)
368 {
369 __u64 sz;
370 int slot;
371 struct imsm_map *map = dev->vol.map;
372
373 printf("\n");
374 printf("[%s]:\n", dev->volume);
375 printf(" RAID Level : %d\n", get_imsm_raid_level(map));
376 printf(" Members : %d\n", map->num_members);
377 for (slot = 0; slot < map->num_members; slot++)
378 if (index == get_imsm_disk_idx(map, slot))
379 break;
380 if (slot < map->num_members)
381 printf(" This Slot : %d\n", slot);
382 else
383 printf(" This Slot : ?\n");
384 sz = __le32_to_cpu(dev->size_high);
385 sz <<= 32;
386 sz += __le32_to_cpu(dev->size_low);
387 printf(" Array Size : %llu%s\n", (unsigned long long)sz,
388 human_size(sz * 512));
389 sz = __le32_to_cpu(map->blocks_per_member);
390 printf(" Per Dev Size : %llu%s\n", (unsigned long long)sz,
391 human_size(sz * 512));
392 printf(" Sector Offset : %u\n",
393 __le32_to_cpu(map->pba_of_lba0));
394 printf(" Num Stripes : %u\n",
395 __le32_to_cpu(map->num_data_stripes));
396 printf(" Chunk Size : %u KiB\n",
397 __le16_to_cpu(map->blocks_per_strip) / 2);
398 printf(" Reserved : %d\n", __le32_to_cpu(dev->reserved_blocks));
399 printf(" Migrate State : %s\n", dev->vol.migr_state ? "migrating" : "idle");
400 printf(" Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean");
401 printf(" Map State : %s\n", map_state_str[map->map_state]);
402 }
403
404 static void print_imsm_disk(struct imsm_super *mpb, int index)
405 {
406 struct imsm_disk *disk = __get_imsm_disk(mpb, index);
407 char str[MAX_RAID_SERIAL_LEN];
408 __u32 s;
409 __u64 sz;
410
411 if (index < 0)
412 return;
413
414 printf("\n");
415 snprintf(str, MAX_RAID_SERIAL_LEN, "%s", disk->serial);
416 printf(" Disk%02d Serial : %s\n", index, str);
417 s = __le32_to_cpu(disk->status);
418 printf(" State :%s%s%s%s\n", s&SPARE_DISK ? " spare" : "",
419 s&CONFIGURED_DISK ? " active" : "",
420 s&FAILED_DISK ? " failed" : "",
421 s&USABLE_DISK ? " usable" : "");
422 printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id));
423 sz = __le32_to_cpu(disk->total_blocks) -
424 (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS * mpb->num_raid_devs);
425 printf(" Usable Size : %llu%s\n", (unsigned long long)sz,
426 human_size(sz * 512));
427 }
428
429 static void examine_super_imsm(struct supertype *st, char *homehost)
430 {
431 struct intel_super *super = st->sb;
432 struct imsm_super *mpb = super->anchor;
433 char str[MAX_SIGNATURE_LENGTH];
434 int i;
435 __u32 sum;
436
437 snprintf(str, MPB_SIG_LEN, "%s", mpb->sig);
438 printf(" Magic : %s\n", str);
439 snprintf(str, strlen(MPB_VERSION_RAID0), "%s", get_imsm_version(mpb));
440 printf(" Version : %s\n", get_imsm_version(mpb));
441 printf(" Family : %08x\n", __le32_to_cpu(mpb->family_num));
442 printf(" Generation : %08x\n", __le32_to_cpu(mpb->generation_num));
443 sum = __le32_to_cpu(mpb->check_sum);
444 printf(" Checksum : %08x %s\n", sum,
445 __gen_imsm_checksum(mpb) == sum ? "correct" : "incorrect");
446 printf(" MPB Sectors : %d\n", mpb_sectors(mpb));
447 printf(" Disks : %d\n", mpb->num_disks);
448 printf(" RAID Devices : %d\n", mpb->num_raid_devs);
449 print_imsm_disk(mpb, super->disks->index);
450 for (i = 0; i < mpb->num_raid_devs; i++)
451 print_imsm_dev(__get_imsm_dev(mpb, i), super->disks->index);
452 for (i = 0; i < mpb->num_disks; i++) {
453 if (i == super->disks->index)
454 continue;
455 print_imsm_disk(mpb, i);
456 }
457 }
458
459 static void brief_examine_super_imsm(struct supertype *st)
460 {
461 struct intel_super *super = st->sb;
462
463 printf("ARRAY /dev/imsm family=%08x metadata=external:imsm\n",
464 __le32_to_cpu(super->anchor->family_num));
465 }
466
467 static void detail_super_imsm(struct supertype *st, char *homehost)
468 {
469 printf("%s\n", __FUNCTION__);
470 }
471
472 static void brief_detail_super_imsm(struct supertype *st)
473 {
474 printf("%s\n", __FUNCTION__);
475 }
476 #endif
477
478 static int match_home_imsm(struct supertype *st, char *homehost)
479 {
480 printf("%s\n", __FUNCTION__);
481
482 return 0;
483 }
484
485 static void uuid_from_super_imsm(struct supertype *st, int uuid[4])
486 {
487 printf("%s\n", __FUNCTION__);
488 }
489
490 #if 0
491 static void
492 get_imsm_numerical_version(struct imsm_super *mpb, int *m, int *p)
493 {
494 __u8 *v = get_imsm_version(mpb);
495 __u8 *end = mpb->sig + MAX_SIGNATURE_LENGTH;
496 char major[] = { 0, 0, 0 };
497 char minor[] = { 0 ,0, 0 };
498 char patch[] = { 0, 0, 0 };
499 char *ver_parse[] = { major, minor, patch };
500 int i, j;
501
502 i = j = 0;
503 while (*v != '\0' && v < end) {
504 if (*v != '.' && j < 2)
505 ver_parse[i][j++] = *v;
506 else {
507 i++;
508 j = 0;
509 }
510 v++;
511 }
512
513 *m = strtol(minor, NULL, 0);
514 *p = strtol(patch, NULL, 0);
515 }
516 #endif
517
518 static int imsm_level_to_layout(int level)
519 {
520 switch (level) {
521 case 0:
522 case 1:
523 return 0;
524 case 5:
525 case 6:
526 return ALGORITHM_LEFT_SYMMETRIC;
527 case 10:
528 return 0x102; //FIXME is this correct?
529 }
530 return -1;
531 }
532
533 static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info)
534 {
535 struct intel_super *super = st->sb;
536 struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
537 struct imsm_map *map = &dev->vol.map[0];
538
539 info->container_member = super->current_vol;
540 info->array.raid_disks = map->num_members;
541 info->array.level = get_imsm_raid_level(map);
542 info->array.layout = imsm_level_to_layout(info->array.level);
543 info->array.md_minor = -1;
544 info->array.ctime = 0;
545 info->array.utime = 0;
546 info->array.chunk_size = __le16_to_cpu(map->blocks_per_strip * 512);
547
548 info->data_offset = __le32_to_cpu(map->pba_of_lba0);
549 info->component_size = __le32_to_cpu(map->blocks_per_member);
550
551 info->disk.major = 0;
552 info->disk.minor = 0;
553
554 sprintf(info->text_version, "/%s/%d",
555 devnum2devname(st->container_dev),
556 info->container_member);
557 }
558
559
560 static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info)
561 {
562 struct intel_super *super = st->sb;
563 struct imsm_disk *disk;
564 __u32 s;
565
566 if (super->current_vol >= 0) {
567 getinfo_super_imsm_volume(st, info);
568 return;
569 }
570 info->array.raid_disks = super->anchor->num_disks;
571 info->array.level = LEVEL_CONTAINER;
572 info->array.layout = 0;
573 info->array.md_minor = -1;
574 info->array.ctime = 0; /* N/A for imsm */
575 info->array.utime = 0;
576 info->array.chunk_size = 0;
577
578 info->disk.major = 0;
579 info->disk.minor = 0;
580 info->disk.raid_disk = -1;
581 info->reshape_active = 0;
582 strcpy(info->text_version, "imsm");
583 info->disk.number = -1;
584 info->disk.state = 0;
585
586 if (super->disks) {
587 disk = get_imsm_disk(super, super->disks->index);
588 if (!disk) {
589 info->disk.number = -1;
590 info->disk.raid_disk = -1;
591 return;
592 }
593 info->disk.number = super->disks->index;
594 info->disk.raid_disk = super->disks->index;
595 info->data_offset = __le32_to_cpu(disk->total_blocks) -
596 (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS);
597 info->component_size = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
598 s = __le32_to_cpu(disk->status);
599 info->disk.state = s & CONFIGURED_DISK ? (1 << MD_DISK_ACTIVE) : 0;
600 info->disk.state |= s & FAILED_DISK ? (1 << MD_DISK_FAULTY) : 0;
601 info->disk.state |= s & USABLE_DISK ? (1 << MD_DISK_SYNC) : 0;
602 }
603 }
604
605 static int update_super_imsm(struct supertype *st, struct mdinfo *info,
606 char *update, char *devname, int verbose,
607 int uuid_set, char *homehost)
608 {
609 /* FIXME */
610
611 /* For 'assemble' and 'force' we need to return non-zero if any
612 * change was made. For others, the return value is ignored.
613 * Update options are:
614 * force-one : This device looks a bit old but needs to be included,
615 * update age info appropriately.
616 * assemble: clear any 'faulty' flag to allow this device to
617 * be assembled.
618 * force-array: Array is degraded but being forced, mark it clean
619 * if that will be needed to assemble it.
620 *
621 * newdev: not used ????
622 * grow: Array has gained a new device - this is currently for
623 * linear only
624 * resync: mark as dirty so a resync will happen.
625 * name: update the name - preserving the homehost
626 *
627 * Following are not relevant for this imsm:
628 * sparc2.2 : update from old dodgey metadata
629 * super-minor: change the preferred_minor number
630 * summaries: update redundant counters.
631 * uuid: Change the uuid of the array to match watch is given
632 * homehost: update the recorded homehost
633 * _reshape_progress: record new reshape_progress position.
634 */
635 int rv = 0;
636 //struct intel_super *super = st->sb;
637 //struct imsm_super *mpb = super->mpb;
638
639 if (strcmp(update, "grow") == 0) {
640 }
641 if (strcmp(update, "resync") == 0) {
642 /* dev->vol.dirty = 1; */
643 }
644
645 /* IMSM has no concept of UUID or homehost */
646
647 return rv;
648 }
649
650 static size_t disks_to_mpb_size(int disks)
651 {
652 size_t size;
653
654 size = sizeof(struct imsm_super);
655 size += (disks - 1) * sizeof(struct imsm_disk);
656 size += 2 * sizeof(struct imsm_dev);
657 /* up to 2 maps per raid device (-2 for imsm_maps in imsm_dev */
658 size += (4 - 2) * sizeof(struct imsm_map);
659 /* 4 possible disk_ord_tbl's */
660 size += 4 * (disks - 1) * sizeof(__u32);
661
662 return size;
663 }
664
665 static __u64 avail_size_imsm(struct supertype *st, __u64 devsize)
666 {
667 if (devsize < (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS))
668 return 0;
669
670 return devsize - (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS);
671 }
672
673 static int compare_super_imsm(struct supertype *st, struct supertype *tst)
674 {
675 /*
676 * return:
677 * 0 same, or first was empty, and second was copied
678 * 1 second had wrong number
679 * 2 wrong uuid
680 * 3 wrong other info
681 */
682 struct intel_super *first = st->sb;
683 struct intel_super *sec = tst->sb;
684
685 if (!first) {
686 st->sb = tst->sb;
687 tst->sb = NULL;
688 return 0;
689 }
690
691 if (memcmp(first->anchor->sig, sec->anchor->sig, MAX_SIGNATURE_LENGTH) != 0)
692 return 3;
693 if (first->anchor->family_num != sec->anchor->family_num)
694 return 3;
695 if (first->anchor->mpb_size != sec->anchor->mpb_size)
696 return 3;
697 if (first->anchor->check_sum != sec->anchor->check_sum)
698 return 3;
699
700 return 0;
701 }
702
703 static void fd2devname(int fd, char *name)
704 {
705 struct stat st;
706 char path[256];
707 char dname[100];
708 char *nm;
709 int rv;
710
711 name[0] = '\0';
712 if (fstat(fd, &st) != 0)
713 return;
714 sprintf(path, "/sys/dev/block/%d:%d",
715 major(st.st_rdev), minor(st.st_rdev));
716
717 rv = readlink(path, dname, sizeof(dname));
718 if (rv <= 0)
719 return;
720
721 dname[rv] = '\0';
722 nm = strrchr(dname, '/');
723 nm++;
724 snprintf(name, MAX_RAID_SERIAL_LEN, "/dev/%s", nm);
725 }
726
727
728 extern int scsi_get_serial(int fd, void *buf, size_t buf_len);
729
730 static int imsm_read_serial(int fd, char *devname,
731 __u8 serial[MAX_RAID_SERIAL_LEN])
732 {
733 unsigned char scsi_serial[255];
734 int rv;
735 int rsp_len;
736 int i, cnt;
737
738 memset(scsi_serial, 0, sizeof(scsi_serial));
739
740 if (imsm_env_devname_as_serial()) {
741 char name[MAX_RAID_SERIAL_LEN];
742
743 fd2devname(fd, name);
744 strcpy((char *) serial, name);
745 return 0;
746 }
747
748 rv = scsi_get_serial(fd, scsi_serial, sizeof(scsi_serial));
749
750 if (rv != 0) {
751 if (devname)
752 fprintf(stderr,
753 Name ": Failed to retrieve serial for %s\n",
754 devname);
755 return rv;
756 }
757
758 rsp_len = scsi_serial[3];
759 for (i = 0, cnt = 0; i < rsp_len; i++) {
760 if (!isspace(scsi_serial[4 + i]))
761 serial[cnt++] = scsi_serial[4 + i];
762 if (cnt == MAX_RAID_SERIAL_LEN)
763 break;
764 }
765
766 serial[MAX_RAID_SERIAL_LEN - 1] = '\0';
767
768 return 0;
769 }
770
771 static int
772 load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd)
773 {
774 struct dl *dl;
775 struct stat stb;
776 struct imsm_disk *disk;
777 int rv;
778 int i;
779
780 dl = malloc(sizeof(*dl));
781 disk = malloc(sizeof(*disk));
782 if (!dl || !disk) {
783 if (devname)
784 fprintf(stderr,
785 Name ": failed to allocate disk buffer for %s\n",
786 devname);
787 if (disk)
788 free(disk);
789 if (dl)
790 free(dl);
791 return 2;
792 }
793 memset(dl, 0, sizeof(*dl));
794 memset(disk, 0, sizeof(*disk));
795
796 fstat(fd, &stb);
797 dl->major = major(stb.st_rdev);
798 dl->minor = minor(stb.st_rdev);
799 dl->next = super->disks;
800 dl->fd = keep_fd ? fd : -1;
801 dl->devname = devname ? strdup(devname) : NULL;
802 dl->index = -1;
803 super->disks = dl;
804 rv = imsm_read_serial(fd, devname, dl->serial);
805
806 if (rv != 0)
807 return 2;
808
809 /* look up this disk's index */
810 for (i = 0; i < super->anchor->num_disks; i++) {
811 struct imsm_disk *disk_iter;
812
813 disk_iter = __get_imsm_disk(super->anchor, i);
814
815 if (memcmp(disk_iter->serial, dl->serial,
816 MAX_RAID_SERIAL_LEN) == 0) {
817 *disk = *disk_iter;
818 super->disk_tbl[i] = disk;
819 dl->index = i;
820 break;
821 }
822 }
823
824 if (i == super->anchor->num_disks) {
825 if (devname)
826 fprintf(stderr,
827 Name ": failed to match serial \'%s\' for %s\n",
828 dl->serial, devname);
829 free(disk);
830 return 0;
831 }
832
833 return 0;
834 }
835
836 static void imsm_copy_dev(struct imsm_dev *dest, struct imsm_dev *src)
837 {
838 int i;
839
840 *dest = *src;
841
842 for (i = 0; i < src->vol.map[0].num_members; i++)
843 dest->vol.map[0].disk_ord_tbl[i] = src->vol.map[0].disk_ord_tbl[i];
844
845 if (!src->vol.migr_state)
846 return;
847
848 dest->vol.map[1] = src->vol.map[1];
849 for (i = 0; i < src->vol.map[1].num_members; i++)
850 dest->vol.map[1].disk_ord_tbl[i] = src->vol.map[1].disk_ord_tbl[i];
851 }
852
853 static int parse_raid_devices(struct intel_super *super)
854 {
855 int i;
856 struct imsm_dev *dev_new;
857 size_t len;
858
859 for (i = 0; i < super->anchor->num_raid_devs; i++) {
860 struct imsm_dev *dev_iter = __get_imsm_dev(super->anchor, i);
861
862 len = sizeof_imsm_dev(dev_iter);
863 dev_new = malloc(len);
864 if (!dev_new)
865 return 1;
866 imsm_copy_dev(dev_new, dev_iter);
867 super->dev_tbl[i] = dev_new;
868 }
869
870 return 0;
871 }
872
873 /* load_imsm_mpb - read matrix metadata
874 * allocates super->mpb to be freed by free_super
875 */
876 static int load_imsm_mpb(int fd, struct intel_super *super, char *devname)
877 {
878 unsigned long long dsize;
879 unsigned long long sectors;
880 struct stat;
881 struct imsm_super *anchor;
882 __u32 check_sum;
883 int rc;
884
885 get_dev_size(fd, NULL, &dsize);
886
887 if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0) {
888 if (devname)
889 fprintf(stderr,
890 Name ": Cannot seek to anchor block on %s: %s\n",
891 devname, strerror(errno));
892 return 1;
893 }
894
895 if (posix_memalign((void**)&anchor, 512, 512) != 0) {
896 if (devname)
897 fprintf(stderr,
898 Name ": Failed to allocate imsm anchor buffer"
899 " on %s\n", devname);
900 return 1;
901 }
902 if (read(fd, anchor, 512) != 512) {
903 if (devname)
904 fprintf(stderr,
905 Name ": Cannot read anchor block on %s: %s\n",
906 devname, strerror(errno));
907 free(anchor);
908 return 1;
909 }
910
911 if (strncmp((char *) anchor->sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0) {
912 if (devname)
913 fprintf(stderr,
914 Name ": no IMSM anchor on %s\n", devname);
915 free(anchor);
916 return 2;
917 }
918
919 super->len = __le32_to_cpu(anchor->mpb_size);
920 super->len = ROUND_UP(anchor->mpb_size, 512);
921 if (posix_memalign(&super->buf, 512, super->len) != 0) {
922 if (devname)
923 fprintf(stderr,
924 Name ": unable to allocate %zu byte mpb buffer\n",
925 super->len);
926 free(anchor);
927 return 2;
928 }
929 memcpy(super->buf, anchor, 512);
930
931 sectors = mpb_sectors(anchor) - 1;
932 free(anchor);
933 if (!sectors) {
934 rc = load_imsm_disk(fd, super, devname, 0);
935 if (rc == 0)
936 rc = parse_raid_devices(super);
937 return rc;
938 }
939
940 /* read the extended mpb */
941 if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0) {
942 if (devname)
943 fprintf(stderr,
944 Name ": Cannot seek to extended mpb on %s: %s\n",
945 devname, strerror(errno));
946 return 1;
947 }
948
949 if (read(fd, super->buf + 512, super->len - 512) != super->len - 512) {
950 if (devname)
951 fprintf(stderr,
952 Name ": Cannot read extended mpb on %s: %s\n",
953 devname, strerror(errno));
954 return 2;
955 }
956
957 check_sum = __gen_imsm_checksum(super->anchor);
958 if (check_sum != __le32_to_cpu(super->anchor->check_sum)) {
959 if (devname)
960 fprintf(stderr,
961 Name ": IMSM checksum %x != %x on %s\n",
962 check_sum, __le32_to_cpu(super->anchor->check_sum),
963 devname);
964 return 2;
965 }
966
967 rc = load_imsm_disk(fd, super, devname, 0);
968 if (rc == 0)
969 rc = parse_raid_devices(super);
970 return rc;
971 }
972
973 static void free_imsm_disks(struct intel_super *super)
974 {
975 int i;
976
977 while (super->disks) {
978 struct dl *d = super->disks;
979
980 super->disks = d->next;
981 if (d->fd >= 0)
982 close(d->fd);
983 if (d->devname)
984 free(d->devname);
985 free(d);
986 }
987 for (i = 0; i < IMSM_MAX_DISKS; i++)
988 if (super->disk_tbl[i]) {
989 free(super->disk_tbl[i]);
990 super->disk_tbl[i] = NULL;
991 }
992 }
993
994 static void free_imsm(struct intel_super *super)
995 {
996 int i;
997
998 if (super->buf)
999 free(super->buf);
1000 free_imsm_disks(super);
1001 for (i = 0; i < IMSM_MAX_RAID_DEVS; i++)
1002 if (super->dev_tbl[i])
1003 free(super->dev_tbl[i]);
1004 free(super);
1005 }
1006
1007
1008 static void free_super_imsm(struct supertype *st)
1009 {
1010 struct intel_super *super = st->sb;
1011
1012 if (!super)
1013 return;
1014
1015 free_imsm(super);
1016 st->sb = NULL;
1017 }
1018
1019 static struct intel_super *alloc_super(int creating_imsm)
1020 {
1021 struct intel_super *super = malloc(sizeof(*super));
1022
1023 if (super) {
1024 memset(super, 0, sizeof(*super));
1025 super->creating_imsm = creating_imsm;
1026 super->current_vol = -1;
1027 }
1028
1029 return super;
1030 }
1031
1032 #ifndef MDASSEMBLE
1033 static int load_super_imsm_all(struct supertype *st, int fd, void **sbp,
1034 char *devname, int keep_fd)
1035 {
1036 struct mdinfo *sra;
1037 struct intel_super *super;
1038 struct mdinfo *sd, *best = NULL;
1039 __u32 bestgen = 0;
1040 __u32 gen;
1041 char nm[20];
1042 int dfd;
1043 int rv;
1044
1045 /* check if this disk is a member of an active array */
1046 sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
1047 if (!sra)
1048 return 1;
1049
1050 if (sra->array.major_version != -1 ||
1051 sra->array.minor_version != -2 ||
1052 strcmp(sra->text_version, "imsm") != 0)
1053 return 1;
1054
1055 super = alloc_super(0);
1056 if (!super)
1057 return 1;
1058
1059 /* find the most up to date disk in this array */
1060 for (sd = sra->devs; sd; sd = sd->next) {
1061 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
1062 dfd = dev_open(nm, keep_fd ? O_RDWR : O_RDONLY);
1063 if (!dfd) {
1064 free_imsm(super);
1065 return 2;
1066 }
1067 rv = load_imsm_mpb(dfd, super, NULL);
1068 if (!keep_fd)
1069 close(dfd);
1070 if (rv == 0) {
1071 gen = __le32_to_cpu(super->anchor->generation_num);
1072 if (!best || gen > bestgen) {
1073 bestgen = gen;
1074 best = sd;
1075 }
1076 } else {
1077 free_imsm(super);
1078 return 2;
1079 }
1080 }
1081
1082 if (!best) {
1083 free_imsm(super);
1084 return 1;
1085 }
1086
1087 /* load the most up to date anchor */
1088 sprintf(nm, "%d:%d", best->disk.major, best->disk.minor);
1089 dfd = dev_open(nm, O_RDONLY);
1090 if (!dfd) {
1091 free_imsm(super);
1092 return 1;
1093 }
1094 rv = load_imsm_mpb(dfd, super, NULL);
1095 close(dfd);
1096 if (rv != 0) {
1097 free_imsm(super);
1098 return 2;
1099 }
1100
1101 /* reset the disk list */
1102 free_imsm_disks(super);
1103
1104 /* populate disk list */
1105 for (sd = sra->devs ; sd ; sd = sd->next) {
1106 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
1107 dfd = dev_open(nm, keep_fd? O_RDWR : O_RDONLY);
1108 if (!dfd) {
1109 free_imsm(super);
1110 return 2;
1111 }
1112 load_imsm_disk(dfd, super, NULL, keep_fd);
1113 if (!keep_fd)
1114 close(dfd);
1115 }
1116
1117 if (st->subarray[0]) {
1118 if (atoi(st->subarray) <= super->anchor->num_raid_devs)
1119 super->current_vol = atoi(st->subarray);
1120 else
1121 return 1;
1122 }
1123
1124 *sbp = super;
1125 if (st->ss == NULL) {
1126 st->ss = &super_imsm;
1127 st->minor_version = 0;
1128 st->max_devs = IMSM_MAX_DEVICES;
1129 st->container_dev = fd2devnum(fd);
1130 }
1131
1132 return 0;
1133 }
1134 #endif
1135
1136 static int load_super_imsm(struct supertype *st, int fd, char *devname)
1137 {
1138 struct intel_super *super;
1139 int rv;
1140
1141 #ifndef MDASSEMBLE
1142 if (load_super_imsm_all(st, fd, &st->sb, devname, 1) == 0)
1143 return 0;
1144 #endif
1145 if (st->subarray[0])
1146 return 1; /* FIXME */
1147
1148 super = alloc_super(0);
1149 if (!super) {
1150 fprintf(stderr,
1151 Name ": malloc of %zu failed.\n",
1152 sizeof(*super));
1153 return 1;
1154 }
1155
1156 rv = load_imsm_mpb(fd, super, devname);
1157
1158 if (rv) {
1159 if (devname)
1160 fprintf(stderr,
1161 Name ": Failed to load all information "
1162 "sections on %s\n", devname);
1163 free_imsm(super);
1164 return rv;
1165 }
1166
1167 st->sb = super;
1168 if (st->ss == NULL) {
1169 st->ss = &super_imsm;
1170 st->minor_version = 0;
1171 st->max_devs = IMSM_MAX_DEVICES;
1172 }
1173
1174 return 0;
1175 }
1176
1177 static __u16 info_to_blocks_per_strip(mdu_array_info_t *info)
1178 {
1179 if (info->level == 1)
1180 return 128;
1181 return info->chunk_size >> 9;
1182 }
1183
1184 static __u32 info_to_num_data_stripes(mdu_array_info_t *info)
1185 {
1186 __u32 num_stripes;
1187
1188 num_stripes = (info->size * 2) / info_to_blocks_per_strip(info);
1189 if (info->level == 1)
1190 num_stripes /= 2;
1191
1192 return num_stripes;
1193 }
1194
1195 static __u32 info_to_blocks_per_member(mdu_array_info_t *info)
1196 {
1197 return (info->size * 2) & ~(info_to_blocks_per_strip(info) - 1);
1198 }
1199
1200 static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
1201 unsigned long long size, char *name,
1202 char *homehost, int *uuid)
1203 {
1204 /* We are creating a volume inside a pre-existing container.
1205 * so st->sb is already set.
1206 */
1207 struct intel_super *super = st->sb;
1208 struct imsm_super *mpb = super->anchor;
1209 struct imsm_dev *dev;
1210 struct imsm_vol *vol;
1211 struct imsm_map *map;
1212 int idx = mpb->num_raid_devs;
1213 int i;
1214 unsigned long long array_blocks;
1215 __u32 offset = 0;
1216 size_t size_old, size_new;
1217
1218 if (mpb->num_raid_devs >= 2) {
1219 fprintf(stderr, Name": This imsm-container already has the "
1220 "maximum of 2 volumes\n");
1221 return 0;
1222 }
1223
1224 /* ensure the mpb is large enough for the new data */
1225 size_old = __le32_to_cpu(mpb->mpb_size);
1226 size_new = disks_to_mpb_size(info->nr_disks);
1227 if (size_new > size_old) {
1228 void *mpb_new;
1229 size_t size_round = ROUND_UP(size_new, 512);
1230
1231 if (posix_memalign(&mpb_new, 512, size_round) != 0) {
1232 fprintf(stderr, Name": could not allocate new mpb\n");
1233 return 0;
1234 }
1235 memcpy(mpb_new, mpb, size_old);
1236 free(mpb);
1237 mpb = mpb_new;
1238 super->anchor = mpb_new;
1239 mpb->mpb_size = __cpu_to_le32(size_new);
1240 memset(mpb_new + size_old, 0, size_round - size_old);
1241 }
1242 super->current_vol = idx;
1243 sprintf(st->subarray, "%d", idx);
1244 dev = malloc(sizeof(*dev) + sizeof(__u32) * (info->raid_disks - 1));
1245 if (!dev) {
1246 fprintf(stderr, Name": could not allocate raid device\n");
1247 return 0;
1248 }
1249 strncpy((char *) dev->volume, name, MAX_RAID_SERIAL_LEN);
1250 array_blocks = calc_array_size(info->level, info->raid_disks,
1251 info->layout, info->chunk_size,
1252 info->size*2);
1253 dev->size_low = __cpu_to_le32((__u32) array_blocks);
1254 dev->size_high = __cpu_to_le32((__u32) (array_blocks >> 32));
1255 dev->status = __cpu_to_le32(0);
1256 dev->reserved_blocks = __cpu_to_le32(0);
1257 vol = &dev->vol;
1258 vol->migr_state = 0;
1259 vol->migr_type = 0;
1260 vol->dirty = 0;
1261 for (i = 0; i < idx; i++) {
1262 struct imsm_dev *prev = get_imsm_dev(super, i);
1263 struct imsm_map *pmap = &prev->vol.map[0];
1264
1265 offset += __le32_to_cpu(pmap->blocks_per_member);
1266 offset += IMSM_RESERVED_SECTORS;
1267 }
1268 map = &vol->map[0];
1269 map->pba_of_lba0 = __cpu_to_le32(offset);
1270 map->blocks_per_member = __cpu_to_le32(info_to_blocks_per_member(info));
1271 map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info));
1272 map->num_data_stripes = __cpu_to_le32(info_to_num_data_stripes(info));
1273 map->map_state = info->level ? IMSM_T_STATE_UNINITIALIZED :
1274 IMSM_T_STATE_NORMAL;
1275
1276 if (info->level == 1 && info->raid_disks > 2) {
1277 fprintf(stderr, Name": imsm does not support more than 2 disks"
1278 "in a raid1 volume\n");
1279 return 0;
1280 }
1281 if (info->level == 10)
1282 map->raid_level = 1;
1283 else
1284 map->raid_level = info->level;
1285
1286 map->num_members = info->raid_disks;
1287 for (i = 0; i < map->num_members; i++) {
1288 /* initialized in add_to_super */
1289 map->disk_ord_tbl[i] = __cpu_to_le32(0);
1290 }
1291 mpb->num_raid_devs++;
1292 super->dev_tbl[super->current_vol] = dev;
1293
1294 return 1;
1295 }
1296
1297 static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
1298 unsigned long long size, char *name,
1299 char *homehost, int *uuid)
1300 {
1301 /* This is primarily called by Create when creating a new array.
1302 * We will then get add_to_super called for each component, and then
1303 * write_init_super called to write it out to each device.
1304 * For IMSM, Create can create on fresh devices or on a pre-existing
1305 * array.
1306 * To create on a pre-existing array a different method will be called.
1307 * This one is just for fresh drives.
1308 */
1309 struct intel_super *super;
1310 struct imsm_super *mpb;
1311 size_t mpb_size;
1312
1313 if (!info) {
1314 st->sb = NULL;
1315 return 0;
1316 }
1317 if (st->sb)
1318 return init_super_imsm_volume(st, info, size, name, homehost,
1319 uuid);
1320
1321 super = alloc_super(1);
1322 if (!super)
1323 return 0;
1324 mpb_size = disks_to_mpb_size(info->nr_disks);
1325 if (posix_memalign(&super->buf, 512, mpb_size) != 0) {
1326 free(super);
1327 return 0;
1328 }
1329 mpb = super->buf;
1330 memset(mpb, 0, mpb_size);
1331
1332 memcpy(mpb->sig, MPB_SIGNATURE, strlen(MPB_SIGNATURE));
1333 memcpy(mpb->sig + strlen(MPB_SIGNATURE), MPB_VERSION_RAID5,
1334 strlen(MPB_VERSION_RAID5));
1335 mpb->mpb_size = mpb_size;
1336
1337 st->sb = super;
1338 return 1;
1339 }
1340
1341 static void add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
1342 int fd, char *devname)
1343 {
1344 struct intel_super *super = st->sb;
1345 struct dl *dl;
1346 struct imsm_dev *dev;
1347 struct imsm_map *map;
1348 struct imsm_disk *disk;
1349 __u32 status;
1350
1351 dev = get_imsm_dev(super, super->current_vol);
1352 map = &dev->vol.map[0];
1353
1354 for (dl = super->disks; dl ; dl = dl->next)
1355 if (dl->major == dk->major &&
1356 dl->minor == dk->minor)
1357 break;
1358 if (!dl || ! (dk->state & (1<<MD_DISK_SYNC)))
1359 return;
1360
1361 map->disk_ord_tbl[dk->number] = __cpu_to_le32(dl->index);
1362
1363 disk = get_imsm_disk(super, dl->index);
1364 status = CONFIGURED_DISK | USABLE_DISK;
1365 disk->status = __cpu_to_le32(status);
1366 }
1367
1368 static void add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
1369 int fd, char *devname)
1370 {
1371 struct intel_super *super = st->sb;
1372 struct imsm_super *mpb = super->anchor;
1373 struct imsm_disk *disk;
1374 struct dl *dd;
1375 unsigned long long size;
1376 __u32 status, id;
1377 int rv;
1378 struct stat stb;
1379
1380 if (super->current_vol >= 0) {
1381 add_to_super_imsm_volume(st, dk, fd, devname);
1382 return;
1383 }
1384
1385 fstat(fd, &stb);
1386 dd = malloc(sizeof(*dd));
1387 disk = malloc(sizeof(*disk));
1388 if (!dd || !disk) {
1389 fprintf(stderr,
1390 Name ": malloc failed %s:%d.\n", __func__, __LINE__);
1391 if (!dd)
1392 free(dd);
1393 if (!disk)
1394 free(disk);
1395 abort();
1396 }
1397 memset(dd, 0, sizeof(*dd));
1398 memset(disk, 0, sizeof(*disk));
1399 dd->major = major(stb.st_rdev);
1400 dd->minor = minor(stb.st_rdev);
1401 dd->index = dk->number;
1402 dd->devname = devname ? strdup(devname) : NULL;
1403 dd->next = super->disks;
1404 dd->fd = fd;
1405 rv = imsm_read_serial(fd, devname, dd->serial);
1406 if (rv) {
1407 fprintf(stderr,
1408 Name ": failed to retrieve scsi serial, aborting\n");
1409 free(dd);
1410 free(disk);
1411 abort();
1412 }
1413
1414 if (mpb->num_disks <= dk->number)
1415 mpb->num_disks = dk->number + 1;
1416
1417 get_dev_size(fd, NULL, &size);
1418 size /= 512;
1419 status = USABLE_DISK | SPARE_DISK;
1420 strcpy((char *) disk->serial, (char *) dd->serial);
1421 disk->total_blocks = __cpu_to_le32(size);
1422 disk->status = __cpu_to_le32(status);
1423 if (sysfs_disk_to_scsi_id(fd, &id) == 0)
1424 disk->scsi_id = __cpu_to_le32(id);
1425 else
1426 disk->scsi_id = __cpu_to_le32(0);
1427 super->disk_tbl[dd->index] = disk;
1428
1429 /* update the family number if we are creating a container */
1430 if (super->creating_imsm) {
1431 disk = __get_imsm_disk(mpb, dd->index);
1432 *disk = *super->disk_tbl[dd->index]; /* copy in new disk */
1433 mpb->family_num = __cpu_to_le32(__gen_imsm_checksum(mpb));
1434 }
1435
1436 super->disks = dd;
1437 }
1438
1439 static int store_imsm_mpb(int fd, struct intel_super *super);
1440
1441 static int write_super_imsm(struct intel_super *super, int doclose)
1442 {
1443 struct imsm_super *mpb = super->anchor;
1444 struct dl *d;
1445 __u32 generation;
1446 __u32 sum;
1447 int i;
1448
1449 /* 'generation' is incremented everytime the metadata is written */
1450 generation = __le32_to_cpu(mpb->generation_num);
1451 generation++;
1452 mpb->generation_num = __cpu_to_le32(generation);
1453
1454 for (i = 0; i < mpb->num_disks; i++)
1455 mpb->disk[i] = *super->disk_tbl[i];
1456 for (i = 0; i < mpb->num_raid_devs; i++) {
1457 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
1458
1459 imsm_copy_dev(dev, super->dev_tbl[i]);
1460 }
1461
1462 /* recalculate checksum */
1463 sum = __gen_imsm_checksum(mpb);
1464 mpb->check_sum = __cpu_to_le32(sum);
1465
1466 for (d = super->disks; d ; d = d->next) {
1467 if (store_imsm_mpb(d->fd, super)) {
1468 fprintf(stderr, "%s: failed for device %d:%d %s\n",
1469 __func__, d->major, d->minor, strerror(errno));
1470 return 0;
1471 }
1472 if (doclose) {
1473 close(d->fd);
1474 d->fd = -1;
1475 }
1476 }
1477
1478 return 1;
1479 }
1480
1481 static int write_init_super_imsm(struct supertype *st)
1482 {
1483 if (st->update_tail) {
1484 /* queue the recently created array as a metadata update */
1485 size_t len;
1486 struct imsm_update_create_array *u;
1487 struct intel_super *super = st->sb;
1488 struct imsm_dev *dev;
1489 struct dl *d;
1490
1491 if (super->current_vol < 0 ||
1492 !(dev = get_imsm_dev(super, super->current_vol))) {
1493 fprintf(stderr, "%s: could not determine sub-array\n",
1494 __func__);
1495 return 1;
1496 }
1497
1498
1499 len = sizeof(*u) - sizeof(*dev) + sizeof_imsm_dev(dev);
1500 u = malloc(len);
1501 if (!u) {
1502 fprintf(stderr, "%s: failed to allocate update buffer\n",
1503 __func__);
1504 return 1;
1505 }
1506
1507 u->type = update_create_array;
1508 u->dev_idx = super->current_vol;
1509 imsm_copy_dev(&u->dev, dev);
1510 append_metadata_update(st, u, len);
1511
1512 for (d = super->disks; d ; d = d->next) {
1513 close(d->fd);
1514 d->fd = -1;
1515 }
1516
1517 return 0;
1518 } else
1519 return write_super_imsm(st->sb, 1);
1520 }
1521
1522 static int store_zero_imsm(struct supertype *st, int fd)
1523 {
1524 unsigned long long dsize;
1525 void *buf;
1526
1527 get_dev_size(fd, NULL, &dsize);
1528
1529 /* first block is stored on second to last sector of the disk */
1530 if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0)
1531 return 1;
1532
1533 if (posix_memalign(&buf, 512, 512) != 0)
1534 return 1;
1535
1536 memset(buf, 0, 512);
1537 if (write(fd, buf, 512) != 512)
1538 return 1;
1539 return 0;
1540 }
1541
1542 static int validate_geometry_imsm_container(struct supertype *st, int level,
1543 int layout, int raiddisks, int chunk,
1544 unsigned long long size, char *dev,
1545 unsigned long long *freesize,
1546 int verbose)
1547 {
1548 int fd;
1549 unsigned long long ldsize;
1550
1551 if (level != LEVEL_CONTAINER)
1552 return 0;
1553 if (!dev)
1554 return 1;
1555
1556 fd = open(dev, O_RDONLY|O_EXCL, 0);
1557 if (fd < 0) {
1558 if (verbose)
1559 fprintf(stderr, Name ": imsm: Cannot open %s: %s\n",
1560 dev, strerror(errno));
1561 return 0;
1562 }
1563 if (!get_dev_size(fd, dev, &ldsize)) {
1564 close(fd);
1565 return 0;
1566 }
1567 close(fd);
1568
1569 *freesize = avail_size_imsm(st, ldsize >> 9);
1570
1571 return 1;
1572 }
1573
1574 /* validate_geometry_imsm_volume - lifted from validate_geometry_ddf_bvd
1575 * FIX ME add ahci details
1576 */
1577 static int validate_geometry_imsm_volume(struct supertype *st, int level,
1578 int layout, int raiddisks, int chunk,
1579 unsigned long long size, char *dev,
1580 unsigned long long *freesize,
1581 int verbose)
1582 {
1583 struct stat stb;
1584 struct intel_super *super = st->sb;
1585 struct dl *dl;
1586 unsigned long long pos = 0;
1587 unsigned long long maxsize;
1588 struct extent *e;
1589 int i;
1590
1591 if (level == LEVEL_CONTAINER)
1592 return 0;
1593
1594 if (level == 1 && raiddisks > 2) {
1595 if (verbose)
1596 fprintf(stderr, Name ": imsm does not support more "
1597 "than 2 in a raid1 configuration\n");
1598 return 0;
1599 }
1600
1601 /* We must have the container info already read in. */
1602 if (!super)
1603 return 0;
1604
1605 if (!dev) {
1606 /* General test: make sure there is space for
1607 * 'raiddisks' device extents of size 'size' at a given
1608 * offset
1609 */
1610 unsigned long long minsize = size*2 /* convert to blocks */;
1611 unsigned long long start_offset = ~0ULL;
1612 int dcnt = 0;
1613 if (minsize == 0)
1614 minsize = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
1615 for (dl = super->disks; dl ; dl = dl->next) {
1616 int found = 0;
1617
1618 pos = 0;
1619 i = 0;
1620 e = get_extents(super, dl);
1621 if (!e) continue;
1622 do {
1623 unsigned long long esize;
1624 esize = e[i].start - pos;
1625 if (esize >= minsize)
1626 found = 1;
1627 if (found && start_offset == ~0ULL) {
1628 start_offset = pos;
1629 break;
1630 } else if (found && pos != start_offset) {
1631 found = 0;
1632 break;
1633 }
1634 pos = e[i].start + e[i].size;
1635 i++;
1636 } while (e[i-1].size);
1637 if (found)
1638 dcnt++;
1639 free(e);
1640 }
1641 if (dcnt < raiddisks) {
1642 if (verbose)
1643 fprintf(stderr, Name ": imsm: Not enough "
1644 "devices with space for this array "
1645 "(%d < %d)\n",
1646 dcnt, raiddisks);
1647 return 0;
1648 }
1649 return 1;
1650 }
1651 /* This device must be a member of the set */
1652 if (stat(dev, &stb) < 0)
1653 return 0;
1654 if ((S_IFMT & stb.st_mode) != S_IFBLK)
1655 return 0;
1656 for (dl = super->disks ; dl ; dl = dl->next) {
1657 if (dl->major == major(stb.st_rdev) &&
1658 dl->minor == minor(stb.st_rdev))
1659 break;
1660 }
1661 if (!dl) {
1662 if (verbose)
1663 fprintf(stderr, Name ": %s is not in the "
1664 "same imsm set\n", dev);
1665 return 0;
1666 }
1667 e = get_extents(super, dl);
1668 maxsize = 0;
1669 i = 0;
1670 if (e) do {
1671 unsigned long long esize;
1672 esize = e[i].start - pos;
1673 if (esize >= maxsize)
1674 maxsize = esize;
1675 pos = e[i].start + e[i].size;
1676 i++;
1677 } while (e[i-1].size);
1678 *freesize = maxsize;
1679
1680 return 1;
1681 }
1682
1683 static int validate_geometry_imsm(struct supertype *st, int level, int layout,
1684 int raiddisks, int chunk, unsigned long long size,
1685 char *dev, unsigned long long *freesize,
1686 int verbose)
1687 {
1688 int fd, cfd;
1689 struct mdinfo *sra;
1690
1691 /* if given unused devices create a container
1692 * if given given devices in a container create a member volume
1693 */
1694 if (level == LEVEL_CONTAINER) {
1695 /* Must be a fresh device to add to a container */
1696 return validate_geometry_imsm_container(st, level, layout,
1697 raiddisks, chunk, size,
1698 dev, freesize,
1699 verbose);
1700 }
1701
1702 if (st->sb) {
1703 /* creating in a given container */
1704 return validate_geometry_imsm_volume(st, level, layout,
1705 raiddisks, chunk, size,
1706 dev, freesize, verbose);
1707 }
1708
1709 /* limit creation to the following levels */
1710 if (!dev)
1711 switch (level) {
1712 case 0:
1713 case 1:
1714 case 10:
1715 case 5:
1716 break;
1717 default:
1718 return 1;
1719 }
1720
1721 /* This device needs to be a device in an 'imsm' container */
1722 fd = open(dev, O_RDONLY|O_EXCL, 0);
1723 if (fd >= 0) {
1724 if (verbose)
1725 fprintf(stderr,
1726 Name ": Cannot create this array on device %s\n",
1727 dev);
1728 close(fd);
1729 return 0;
1730 }
1731 if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
1732 if (verbose)
1733 fprintf(stderr, Name ": Cannot open %s: %s\n",
1734 dev, strerror(errno));
1735 return 0;
1736 }
1737 /* Well, it is in use by someone, maybe an 'imsm' container. */
1738 cfd = open_container(fd);
1739 if (cfd < 0) {
1740 close(fd);
1741 if (verbose)
1742 fprintf(stderr, Name ": Cannot use %s: It is busy\n",
1743 dev);
1744 return 0;
1745 }
1746 sra = sysfs_read(cfd, 0, GET_VERSION);
1747 close(fd);
1748 if (sra && sra->array.major_version == -1 &&
1749 strcmp(sra->text_version, "imsm") == 0) {
1750 /* This is a member of a imsm container. Load the container
1751 * and try to create a volume
1752 */
1753 struct intel_super *super;
1754
1755 if (load_super_imsm_all(st, cfd, (void **) &super, NULL, 1) == 0) {
1756 st->sb = super;
1757 st->container_dev = fd2devnum(cfd);
1758 close(cfd);
1759 return validate_geometry_imsm_volume(st, level, layout,
1760 raiddisks, chunk,
1761 size, dev,
1762 freesize, verbose);
1763 }
1764 close(cfd);
1765 } else /* may belong to another container */
1766 return 0;
1767
1768 return 1;
1769 }
1770
1771 static struct mdinfo *container_content_imsm(struct supertype *st)
1772 {
1773 /* Given a container loaded by load_super_imsm_all,
1774 * extract information about all the arrays into
1775 * an mdinfo tree.
1776 *
1777 * For each imsm_dev create an mdinfo, fill it in,
1778 * then look for matching devices in super->disks
1779 * and create appropriate device mdinfo.
1780 */
1781 struct intel_super *super = st->sb;
1782 struct imsm_super *mpb = super->anchor;
1783 struct mdinfo *rest = NULL;
1784 int i;
1785
1786 for (i = 0; i < mpb->num_raid_devs; i++) {
1787 struct imsm_dev *dev = get_imsm_dev(super, i);
1788 struct imsm_vol *vol = &dev->vol;
1789 struct imsm_map *map = vol->map;
1790 struct mdinfo *this;
1791 int slot;
1792
1793 this = malloc(sizeof(*this));
1794 memset(this, 0, sizeof(*this));
1795 this->next = rest;
1796 rest = this;
1797
1798 this->array.level = get_imsm_raid_level(map);
1799 this->array.raid_disks = map->num_members;
1800 this->array.layout = imsm_level_to_layout(this->array.level);
1801 this->array.md_minor = -1;
1802 this->array.ctime = 0;
1803 this->array.utime = 0;
1804 this->array.chunk_size = __le16_to_cpu(map->blocks_per_strip) << 9;
1805 this->array.state = !vol->dirty;
1806 this->container_member = i;
1807 if (map->map_state == IMSM_T_STATE_UNINITIALIZED || dev->vol.dirty)
1808 this->resync_start = 0;
1809 else
1810 this->resync_start = ~0ULL;
1811
1812 strncpy(this->name, (char *) dev->volume, MAX_RAID_SERIAL_LEN);
1813 this->name[MAX_RAID_SERIAL_LEN] = 0;
1814
1815 sprintf(this->text_version, "/%s/%d",
1816 devnum2devname(st->container_dev),
1817 this->container_member);
1818
1819 memset(this->uuid, 0, sizeof(this->uuid));
1820
1821 this->component_size = __le32_to_cpu(map->blocks_per_member);
1822
1823 for (slot = 0 ; slot < map->num_members; slot++) {
1824 struct imsm_disk *disk;
1825 struct mdinfo *info_d;
1826 struct dl *d;
1827 int idx;
1828 __u32 s;
1829
1830 idx = __le32_to_cpu(map->disk_ord_tbl[slot] & ~(0xff << 24));
1831 for (d = super->disks; d ; d = d->next)
1832 if (d->index == idx)
1833 break;
1834
1835 if (d == NULL)
1836 break; /* shouldn't this be continue ?? */
1837
1838 info_d = malloc(sizeof(*info_d));
1839 if (!info_d)
1840 break; /* ditto ?? */
1841 memset(info_d, 0, sizeof(*info_d));
1842 info_d->next = this->devs;
1843 this->devs = info_d;
1844
1845 disk = get_imsm_disk(super, idx);
1846 s = __le32_to_cpu(disk->status);
1847
1848 info_d->disk.number = d->index;
1849 info_d->disk.major = d->major;
1850 info_d->disk.minor = d->minor;
1851 info_d->disk.raid_disk = slot;
1852 info_d->disk.state = s & CONFIGURED_DISK ? (1 << MD_DISK_ACTIVE) : 0;
1853 info_d->disk.state |= s & FAILED_DISK ? (1 << MD_DISK_FAULTY) : 0;
1854 info_d->disk.state |= s & USABLE_DISK ? (1 << MD_DISK_SYNC) : 0;
1855
1856 this->array.working_disks++;
1857
1858 info_d->events = __le32_to_cpu(mpb->generation_num);
1859 info_d->data_offset = __le32_to_cpu(map->pba_of_lba0);
1860 info_d->component_size = __le32_to_cpu(map->blocks_per_member);
1861 if (d->devname)
1862 strcpy(info_d->name, d->devname);
1863 }
1864 }
1865
1866 return rest;
1867 }
1868
1869
1870 static int imsm_open_new(struct supertype *c, struct active_array *a,
1871 char *inst)
1872 {
1873 struct intel_super *super = c->sb;
1874 struct imsm_super *mpb = super->anchor;
1875
1876 if (atoi(inst) >= mpb->num_raid_devs) {
1877 fprintf(stderr, "%s: subarry index %d, out of range\n",
1878 __func__, atoi(inst));
1879 return -ENODEV;
1880 }
1881
1882 dprintf("imsm: open_new %s\n", inst);
1883 a->info.container_member = atoi(inst);
1884 return 0;
1885 }
1886
1887 static __u8 imsm_check_degraded(struct intel_super *super, int n, int failed)
1888 {
1889 struct imsm_dev *dev = get_imsm_dev(super, n);
1890 struct imsm_map *map = dev->vol.map;
1891
1892 if (!failed)
1893 return map->map_state;
1894
1895 switch (get_imsm_raid_level(map)) {
1896 case 0:
1897 return IMSM_T_STATE_FAILED;
1898 break;
1899 case 1:
1900 if (failed < map->num_members)
1901 return IMSM_T_STATE_DEGRADED;
1902 else
1903 return IMSM_T_STATE_FAILED;
1904 break;
1905 case 10:
1906 {
1907 /**
1908 * check to see if any mirrors have failed,
1909 * otherwise we are degraded
1910 */
1911 int device_per_mirror = 2; /* FIXME is this always the case?
1912 * and are they always adjacent?
1913 */
1914 int failed = 0;
1915 int i;
1916
1917 for (i = 0; i < map->num_members; i++) {
1918 int idx = get_imsm_disk_idx(map, i);
1919 struct imsm_disk *disk = get_imsm_disk(super, idx);
1920
1921 if (__le32_to_cpu(disk->status) & FAILED_DISK)
1922 failed++;
1923
1924 if (failed >= device_per_mirror)
1925 return IMSM_T_STATE_FAILED;
1926
1927 /* reset 'failed' for next mirror set */
1928 if (!((i + 1) % device_per_mirror))
1929 failed = 0;
1930 }
1931
1932 return IMSM_T_STATE_DEGRADED;
1933 }
1934 case 5:
1935 if (failed < 2)
1936 return IMSM_T_STATE_DEGRADED;
1937 else
1938 return IMSM_T_STATE_FAILED;
1939 break;
1940 default:
1941 break;
1942 }
1943
1944 return map->map_state;
1945 }
1946
1947 static int imsm_count_failed(struct intel_super *super, struct imsm_map *map)
1948 {
1949 int i;
1950 int failed = 0;
1951 struct imsm_disk *disk;
1952
1953 for (i = 0; i < map->num_members; i++) {
1954 int idx = get_imsm_disk_idx(map, i);
1955
1956 disk = get_imsm_disk(super, idx);
1957 if (__le32_to_cpu(disk->status) & FAILED_DISK)
1958 failed++;
1959 }
1960
1961 return failed;
1962 }
1963
1964 static void imsm_set_array_state(struct active_array *a, int consistent)
1965 {
1966 int inst = a->info.container_member;
1967 struct intel_super *super = a->container->sb;
1968 struct imsm_dev *dev = get_imsm_dev(super, inst);
1969 struct imsm_map *map = &dev->vol.map[0];
1970 int dirty = !consistent;
1971 int failed;
1972 __u8 map_state;
1973
1974 if (a->resync_start == ~0ULL) {
1975 failed = imsm_count_failed(super, map);
1976 map_state = imsm_check_degraded(super, inst, failed);
1977 if (!failed)
1978 map_state = IMSM_T_STATE_NORMAL;
1979 if (map->map_state != map_state) {
1980 dprintf("imsm: map_state %d: %d\n",
1981 inst, map_state);
1982 map->map_state = map_state;
1983 super->updates_pending++;
1984 }
1985 }
1986
1987 if (dev->vol.dirty != dirty) {
1988 dprintf("imsm: mark '%s' (%llu)\n",
1989 dirty?"dirty":"clean", a->resync_start);
1990
1991 dev->vol.dirty = dirty;
1992 super->updates_pending++;
1993 }
1994 }
1995
1996 static void imsm_set_disk(struct active_array *a, int n, int state)
1997 {
1998 int inst = a->info.container_member;
1999 struct intel_super *super = a->container->sb;
2000 struct imsm_dev *dev = get_imsm_dev(super, inst);
2001 struct imsm_map *map = dev->vol.map;
2002 struct imsm_disk *disk;
2003 __u32 status;
2004 int failed = 0;
2005 int new_failure = 0;
2006
2007 if (n > map->num_members)
2008 fprintf(stderr, "imsm: set_disk %d out of range 0..%d\n",
2009 n, map->num_members - 1);
2010
2011 if (n < 0)
2012 return;
2013
2014 dprintf("imsm: set_disk %d:%x\n", n, state);
2015
2016 disk = get_imsm_disk(super, get_imsm_disk_idx(map, n));
2017
2018 /* check for new failures */
2019 status = __le32_to_cpu(disk->status);
2020 if ((state & DS_FAULTY) && !(status & FAILED_DISK)) {
2021 status |= FAILED_DISK;
2022 disk->status = __cpu_to_le32(status);
2023 new_failure = 1;
2024 super->updates_pending++;
2025 }
2026
2027 /* the number of failures have changed, count up 'failed' to determine
2028 * degraded / failed status
2029 */
2030 if (new_failure && map->map_state != IMSM_T_STATE_FAILED)
2031 failed = imsm_count_failed(super, map);
2032
2033 /* determine map_state based on failed or in_sync count */
2034 if (failed)
2035 map->map_state = imsm_check_degraded(super, inst, failed);
2036 else if (map->map_state == IMSM_T_STATE_DEGRADED) {
2037 struct mdinfo *d;
2038 int working = 0;
2039
2040 for (d = a->info.devs ; d ; d = d->next)
2041 if (d->curr_state & DS_INSYNC)
2042 working++;
2043
2044 if (working == a->info.array.raid_disks) {
2045 map->map_state = IMSM_T_STATE_NORMAL;
2046 super->updates_pending++;
2047 }
2048 }
2049 }
2050
2051 static int store_imsm_mpb(int fd, struct intel_super *super)
2052 {
2053 struct imsm_super *mpb = super->anchor;
2054 __u32 mpb_size = __le32_to_cpu(mpb->mpb_size);
2055 unsigned long long dsize;
2056 unsigned long long sectors;
2057
2058 get_dev_size(fd, NULL, &dsize);
2059
2060 if (mpb_size > 512) {
2061 /* -1 to account for anchor */
2062 sectors = mpb_sectors(mpb) - 1;
2063
2064 /* write the extended mpb to the sectors preceeding the anchor */
2065 if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0)
2066 return 1;
2067
2068 if (write(fd, super->buf + 512, 512 * sectors) != 512 * sectors)
2069 return 1;
2070 }
2071
2072 /* first block is stored on second to last sector of the disk */
2073 if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0)
2074 return 1;
2075
2076 if (write(fd, super->buf, 512) != 512)
2077 return 1;
2078
2079 return 0;
2080 }
2081
2082 static void imsm_sync_metadata(struct supertype *container)
2083 {
2084 struct intel_super *super = container->sb;
2085
2086 if (!super->updates_pending)
2087 return;
2088
2089 write_super_imsm(super, 0);
2090
2091 super->updates_pending = 0;
2092 }
2093
2094 static struct mdinfo *imsm_activate_spare(struct active_array *a,
2095 struct metadata_update **updates)
2096 {
2097 /**
2098 * Take a device that is marked spare in the metadata and use it to
2099 * replace a failed/vacant slot in an array. There may be a case where
2100 * a device is failed in one array but active in a second.
2101 * imsm_process_update catches this case and does not clear the SPARE_DISK
2102 * flag, allowing the second array to start using the device on failure.
2103 * SPARE_DISK is cleared when all arrays are using a device.
2104 *
2105 * FIXME: is this a valid use of SPARE_DISK?
2106 */
2107
2108 struct intel_super *super = a->container->sb;
2109 int inst = a->info.container_member;
2110 struct imsm_dev *dev = get_imsm_dev(super, inst);
2111 struct imsm_map *map = dev->vol.map;
2112 int failed = a->info.array.raid_disks;
2113 struct mdinfo *rv = NULL;
2114 struct mdinfo *d;
2115 struct mdinfo *di;
2116 struct metadata_update *mu;
2117 struct dl *dl;
2118 struct imsm_update_activate_spare *u;
2119 int num_spares = 0;
2120 int i;
2121
2122 for (d = a->info.devs ; d ; d = d->next) {
2123 if ((d->curr_state & DS_FAULTY) &&
2124 d->state_fd >= 0)
2125 /* wait for Removal to happen */
2126 return NULL;
2127 if (d->state_fd >= 0)
2128 failed--;
2129 }
2130
2131 dprintf("imsm: activate spare: inst=%d failed=%d (%d) level=%d\n",
2132 inst, failed, a->info.array.raid_disks, a->info.array.level);
2133 if (imsm_check_degraded(super, inst, failed) != IMSM_T_STATE_DEGRADED)
2134 return NULL;
2135
2136 /* For each slot, if it is not working, find a spare */
2137 dl = super->disks;
2138 for (i = 0; i < a->info.array.raid_disks; i++) {
2139 for (d = a->info.devs ; d ; d = d->next)
2140 if (d->disk.raid_disk == i)
2141 break;
2142 dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
2143 if (d && (d->state_fd >= 0))
2144 continue;
2145
2146 /* OK, this device needs recovery. Find a spare */
2147 for ( ; dl ; dl = dl->next) {
2148 unsigned long long esize;
2149 unsigned long long pos;
2150 struct mdinfo *d2;
2151 struct extent *ex;
2152 struct imsm_disk *disk;
2153 int j;
2154 int found;
2155 __u32 array_start;
2156
2157 /* If in this array, skip */
2158 for (d2 = a->info.devs ; d2 ; d2 = d2->next)
2159 if (d2->disk.major == dl->major &&
2160 d2->disk.minor == dl->minor) {
2161 dprintf("%x:%x already in array\n", dl->major, dl->minor);
2162 break;
2163 }
2164 if (d2)
2165 continue;
2166
2167 /* is this unused device marked as a spare? */
2168 disk = get_imsm_disk(super, dl->index);
2169 if (!(__le32_to_cpu(disk->status) & SPARE_DISK))
2170 continue;
2171
2172 /* We are allowed to use this device - is there space?
2173 * We need a->info.component_size sectors */
2174 ex = get_extents(super, dl);
2175 if (!ex) {
2176 dprintf("cannot get extents\n");
2177 continue;
2178 }
2179 found = 0;
2180 j = 0;
2181 pos = 0;
2182 array_start = __le32_to_cpu(map->pba_of_lba0);
2183
2184 do {
2185 /* check that we can start at pba_of_lba0 with
2186 * a->info.component_size of space
2187 */
2188 esize = ex[j].start - pos;
2189 if (array_start >= pos &&
2190 array_start + a->info.component_size < ex[j].start) {
2191 found = 1;
2192 break;
2193 }
2194 pos = ex[j].start + ex[j].size;
2195 j++;
2196
2197 } while (ex[j-1].size);
2198
2199 free(ex);
2200 if (!found) {
2201 dprintf("%x:%x does not have %llu at %d\n",
2202 dl->major, dl->minor,
2203 a->info.component_size,
2204 __le32_to_cpu(map->pba_of_lba0));
2205 /* No room */
2206 continue;
2207 }
2208
2209 /* found a usable disk with enough space */
2210 di = malloc(sizeof(*di));
2211 memset(di, 0, sizeof(*di));
2212 di->disk.number = dl->index;
2213 di->disk.raid_disk = i;
2214 di->disk.major = dl->major;
2215 di->disk.minor = dl->minor;
2216 di->disk.state = 0;
2217 di->data_offset = array_start;
2218 di->component_size = a->info.component_size;
2219 di->container_member = inst;
2220 di->next = rv;
2221 rv = di;
2222 num_spares++;
2223 dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
2224 i, pos);
2225
2226 break;
2227 }
2228 }
2229
2230 if (!rv)
2231 /* No spares found */
2232 return rv;
2233 /* Now 'rv' has a list of devices to return.
2234 * Create a metadata_update record to update the
2235 * disk_ord_tbl for the array
2236 */
2237 mu = malloc(sizeof(*mu));
2238 mu->buf = malloc(sizeof(struct imsm_update_activate_spare) * num_spares);
2239 mu->space = NULL;
2240 mu->len = sizeof(struct imsm_update_activate_spare) * num_spares;
2241 mu->next = *updates;
2242 u = (struct imsm_update_activate_spare *) mu->buf;
2243
2244 for (di = rv ; di ; di = di->next) {
2245 u->type = update_activate_spare;
2246 u->disk_idx = di->disk.number;
2247 u->slot = di->disk.raid_disk;
2248 u->array = inst;
2249 u->next = u + 1;
2250 u++;
2251 }
2252 (u-1)->next = NULL;
2253 *updates = mu;
2254
2255 return rv;
2256 }
2257
2258 static int weight(unsigned int field)
2259 {
2260 int weight;
2261
2262 for (weight = 0; field; weight++)
2263 field &= field - 1;
2264
2265 return weight;
2266 }
2267
2268 static int disks_overlap(struct imsm_map *m1, struct imsm_map *m2)
2269 {
2270 int i;
2271 int j;
2272 int idx;
2273
2274 for (i = 0; i < m1->num_members; i++) {
2275 idx = get_imsm_disk_idx(m1, i);
2276 for (j = 0; j < m2->num_members; j++)
2277 if (idx == get_imsm_disk_idx(m2, j))
2278 return 1;
2279 }
2280
2281 return 0;
2282 }
2283
2284 static void imsm_process_update(struct supertype *st,
2285 struct metadata_update *update)
2286 {
2287 /**
2288 * crack open the metadata_update envelope to find the update record
2289 * update can be one of:
2290 * update_activate_spare - a spare device has replaced a failed
2291 * device in an array, update the disk_ord_tbl. If this disk is
2292 * present in all member arrays then also clear the SPARE_DISK
2293 * flag
2294 */
2295 struct intel_super *super = st->sb;
2296 struct imsm_super *mpb = super->anchor;
2297 enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
2298
2299 switch (type) {
2300 case update_activate_spare: {
2301 struct imsm_update_activate_spare *u = (void *) update->buf;
2302 struct imsm_dev *dev = get_imsm_dev(super, u->array);
2303 struct imsm_map *map = &dev->vol.map[0];
2304 struct active_array *a;
2305 struct imsm_disk *disk;
2306 __u32 status;
2307 struct dl *dl;
2308 struct mdinfo *d;
2309 unsigned int members;
2310 unsigned int found;
2311 int victim;
2312 int i;
2313
2314 for (dl = super->disks; dl; dl = dl->next)
2315 if (dl->index == u->disk_idx)
2316 break;
2317
2318 if (!dl) {
2319 fprintf(stderr, "error: imsm_activate_spare passed "
2320 "an unknown disk_idx: %d\n", u->disk_idx);
2321 return;
2322 }
2323
2324 super->updates_pending++;
2325
2326 victim = get_imsm_disk_idx(map, u->slot);
2327 map->disk_ord_tbl[u->slot] = __cpu_to_le32(u->disk_idx);
2328 disk = get_imsm_disk(super, u->disk_idx);
2329 status = __le32_to_cpu(disk->status);
2330 status |= CONFIGURED_DISK;
2331 disk->status = __cpu_to_le32(status);
2332
2333 /* map unique/live arrays using the spare */
2334 members = 0;
2335 found = 0;
2336 for (a = st->arrays; a; a = a->next) {
2337 int inst = a->info.container_member;
2338
2339 dev = get_imsm_dev(super, inst);
2340 map = &dev->vol.map[0];
2341 if (map->raid_level > 0)
2342 members |= 1 << inst;
2343 for (d = a->info.devs; d; d = d->next)
2344 if (d->disk.major == dl->major &&
2345 d->disk.minor == dl->minor)
2346 found |= 1 << inst;
2347 }
2348
2349 /* until all arrays that can absorb this disk have absorbed
2350 * this disk it can still be considered a spare
2351 */
2352 if (weight(found) >= weight(members)) {
2353 status = __le32_to_cpu(disk->status);
2354 status &= ~SPARE_DISK;
2355 disk->status = __cpu_to_le32(status);
2356 }
2357
2358 /* count arrays using the victim in the metadata */
2359 found = 0;
2360 for (a = st->arrays; a ; a = a->next) {
2361 dev = get_imsm_dev(super, a->info.container_member);
2362 map = &dev->vol.map[0];
2363 for (i = 0; i < map->num_members; i++)
2364 if (victim == get_imsm_disk_idx(map, i))
2365 found++;
2366 }
2367
2368 /* clear some flags if the victim is no longer being
2369 * utilized anywhere
2370 */
2371 disk = get_imsm_disk(super, victim);
2372 if (!found) {
2373 status = __le32_to_cpu(disk->status);
2374 status &= ~(CONFIGURED_DISK | USABLE_DISK);
2375 disk->status = __cpu_to_le32(status);
2376 }
2377 break;
2378 }
2379 case update_create_array: {
2380 /* someone wants to create a new array, we need to be aware of
2381 * a few races/collisions:
2382 * 1/ 'Create' called by two separate instances of mdadm
2383 * 2/ 'Create' versus 'activate_spare': mdadm has chosen
2384 * devices that have since been assimilated via
2385 * activate_spare.
2386 * In the event this update can not be carried out mdadm will
2387 * (FIX ME) notice that its update did not take hold.
2388 */
2389 struct imsm_update_create_array *u = (void *) update->buf;
2390 struct imsm_dev *dev;
2391 struct imsm_map *map, *new_map;
2392 unsigned long long start, end;
2393 unsigned long long new_start, new_end;
2394 int i;
2395 int overlap = 0;
2396
2397 /* handle racing creates: first come first serve */
2398 if (u->dev_idx < mpb->num_raid_devs) {
2399 dprintf("%s: subarray %d already defined\n",
2400 __func__, u->dev_idx);
2401 return;
2402 }
2403
2404 /* check update is next in sequence */
2405 if (u->dev_idx != mpb->num_raid_devs) {
2406 dprintf("%s: can not create arrays out of sequence\n",
2407 __func__);
2408 return;
2409 }
2410
2411 new_map = &u->dev.vol.map[0];
2412 new_start = __le32_to_cpu(new_map->pba_of_lba0);
2413 new_end = new_start + __le32_to_cpu(new_map->blocks_per_member);
2414
2415 /* handle activate_spare versus create race:
2416 * check to make sure that overlapping arrays do not include
2417 * overalpping disks
2418 */
2419 for (i = 0; i < mpb->num_raid_devs; i++) {
2420 dev = get_imsm_dev(super, i);
2421 map = &dev->vol.map[0];
2422 start = __le32_to_cpu(map->pba_of_lba0);
2423 end = start + __le32_to_cpu(map->blocks_per_member);
2424 if ((new_start >= start && new_start <= end) ||
2425 (start >= new_start && start <= new_end))
2426 overlap = 1;
2427 if (overlap && disks_overlap(map, new_map)) {
2428 dprintf("%s: arrays overlap\n", __func__);
2429 return;
2430 }
2431 }
2432 /* check num_members sanity */
2433 if (new_map->num_members > mpb->num_disks) {
2434 dprintf("%s: num_disks out of range\n", __func__);
2435 return;
2436 }
2437
2438 /* check that prepare update was successful */
2439 if (!update->space) {
2440 dprintf("%s: prepare update failed\n", __func__);
2441 return;
2442 }
2443
2444 super->updates_pending++;
2445 dev = update->space;
2446 update->space = NULL;
2447 imsm_copy_dev(dev, &u->dev);
2448 super->dev_tbl[u->dev_idx] = dev;
2449 mpb->num_raid_devs++;
2450
2451 /* fix up flags, if arrays overlap then the drives can not be
2452 * spares
2453 */
2454 for (i = 0; i < map->num_members; i++) {
2455 struct imsm_disk *disk;
2456 __u32 status;
2457
2458 disk = get_imsm_disk(super, get_imsm_disk_idx(map, i));
2459 status = __le32_to_cpu(disk->status);
2460 status |= CONFIGURED_DISK;
2461 if (overlap)
2462 status &= ~SPARE_DISK;
2463 disk->status = __cpu_to_le32(status);
2464 }
2465 break;
2466 }
2467 }
2468 }
2469
2470 static void imsm_prepare_update(struct supertype *st,
2471 struct metadata_update *update)
2472 {
2473 /**
2474 * Allocate space to hold new disk entries, raid-device entries or a
2475 * new mpb if necessary. We currently maintain an mpb large enough to
2476 * hold 2 subarrays for the given number of disks. This may not be
2477 * sufficient when reshaping.
2478 *
2479 * FIX ME handle the reshape case.
2480 *
2481 * The monitor will be able to safely change super->mpb by arranging
2482 * for it to be freed in check_update_queue(). I.e. the monitor thread
2483 * will start using the new pointer and the manager can continue to use
2484 * the old value until check_update_queue() runs.
2485 */
2486 enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
2487
2488 switch (type) {
2489 case update_create_array: {
2490 struct imsm_update_create_array *u = (void *) update->buf;
2491 size_t len = sizeof_imsm_dev(&u->dev);
2492
2493 update->space = malloc(len);
2494 break;
2495 default:
2496 break;
2497 }
2498 }
2499
2500 return;
2501 }
2502
2503 struct superswitch super_imsm = {
2504 #ifndef MDASSEMBLE
2505 .examine_super = examine_super_imsm,
2506 .brief_examine_super = brief_examine_super_imsm,
2507 .detail_super = detail_super_imsm,
2508 .brief_detail_super = brief_detail_super_imsm,
2509 .write_init_super = write_init_super_imsm,
2510 #endif
2511 .match_home = match_home_imsm,
2512 .uuid_from_super= uuid_from_super_imsm,
2513 .getinfo_super = getinfo_super_imsm,
2514 .update_super = update_super_imsm,
2515
2516 .avail_size = avail_size_imsm,
2517
2518 .compare_super = compare_super_imsm,
2519
2520 .load_super = load_super_imsm,
2521 .init_super = init_super_imsm,
2522 .add_to_super = add_to_super_imsm,
2523 .store_super = store_zero_imsm,
2524 .free_super = free_super_imsm,
2525 .match_metadata_desc = match_metadata_desc_imsm,
2526 .container_content = container_content_imsm,
2527
2528 .validate_geometry = validate_geometry_imsm,
2529 .external = 1,
2530
2531 /* for mdmon */
2532 .open_new = imsm_open_new,
2533 .load_super = load_super_imsm,
2534 .set_array_state= imsm_set_array_state,
2535 .set_disk = imsm_set_disk,
2536 .sync_metadata = imsm_sync_metadata,
2537 .activate_spare = imsm_activate_spare,
2538 .process_update = imsm_process_update,
2539 .prepare_update = imsm_prepare_update,
2540 };