]> git.ipfire.org Git - thirdparty/mdadm.git/blob - super-intel.c
imsm: provide a detail_platform method
[thirdparty/mdadm.git] / super-intel.c
1 /*
2 * mdadm - Intel(R) Matrix Storage Manager Support
3 *
4 * Copyright (C) 2002-2008 Intel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #define HAVE_STDINT_H 1
21 #include "mdadm.h"
22 #include "mdmon.h"
23 #include "sha1.h"
24 #include "platform-intel.h"
25 #include <values.h>
26 #include <scsi/sg.h>
27 #include <ctype.h>
28 #include <dirent.h>
29
30 /* MPB == Metadata Parameter Block */
31 #define MPB_SIGNATURE "Intel Raid ISM Cfg Sig. "
32 #define MPB_SIG_LEN (strlen(MPB_SIGNATURE))
33 #define MPB_VERSION_RAID0 "1.0.00"
34 #define MPB_VERSION_RAID1 "1.1.00"
35 #define MPB_VERSION_MANY_VOLUMES_PER_ARRAY "1.2.00"
36 #define MPB_VERSION_3OR4_DISK_ARRAY "1.2.01"
37 #define MPB_VERSION_RAID5 "1.2.02"
38 #define MPB_VERSION_5OR6_DISK_ARRAY "1.2.04"
39 #define MPB_VERSION_CNG "1.2.06"
40 #define MPB_VERSION_ATTRIBS "1.3.00"
41 #define MAX_SIGNATURE_LENGTH 32
42 #define MAX_RAID_SERIAL_LEN 16
43
44 #define MPB_ATTRIB_CHECKSUM_VERIFY __cpu_to_le32(0x80000000)
45 #define MPB_ATTRIB_PM __cpu_to_le32(0x40000000)
46 #define MPB_ATTRIB_2TB __cpu_to_le32(0x20000000)
47 #define MPB_ATTRIB_RAID0 __cpu_to_le32(0x00000001)
48 #define MPB_ATTRIB_RAID1 __cpu_to_le32(0x00000002)
49 #define MPB_ATTRIB_RAID10 __cpu_to_le32(0x00000004)
50 #define MPB_ATTRIB_RAID1E __cpu_to_le32(0x00000008)
51 #define MPB_ATTRIB_RAID5 __cpu_to_le32(0x00000010)
52 #define MPB_ATTRIB_RAIDCNG __cpu_to_le32(0x00000020)
53
54 #define MPB_SECTOR_CNT 418
55 #define IMSM_RESERVED_SECTORS 4096
56
57 /* Disk configuration info. */
58 #define IMSM_MAX_DEVICES 255
59 struct imsm_disk {
60 __u8 serial[MAX_RAID_SERIAL_LEN];/* 0xD8 - 0xE7 ascii serial number */
61 __u32 total_blocks; /* 0xE8 - 0xEB total blocks */
62 __u32 scsi_id; /* 0xEC - 0xEF scsi ID */
63 #define SPARE_DISK __cpu_to_le32(0x01) /* Spare */
64 #define CONFIGURED_DISK __cpu_to_le32(0x02) /* Member of some RaidDev */
65 #define FAILED_DISK __cpu_to_le32(0x04) /* Permanent failure */
66 #define USABLE_DISK __cpu_to_le32(0x08) /* Fully usable unless FAILED_DISK is set */
67 __u32 status; /* 0xF0 - 0xF3 */
68 __u32 owner_cfg_num; /* which config 0,1,2... owns this disk */
69 #define IMSM_DISK_FILLERS 4
70 __u32 filler[IMSM_DISK_FILLERS]; /* 0xF4 - 0x107 MPB_DISK_FILLERS for future expansion */
71 };
72
73 /* RAID map configuration infos. */
74 struct imsm_map {
75 __u32 pba_of_lba0; /* start address of partition */
76 __u32 blocks_per_member;/* blocks per member */
77 __u32 num_data_stripes; /* number of data stripes */
78 __u16 blocks_per_strip;
79 __u8 map_state; /* Normal, Uninitialized, Degraded, Failed */
80 #define IMSM_T_STATE_NORMAL 0
81 #define IMSM_T_STATE_UNINITIALIZED 1
82 #define IMSM_T_STATE_DEGRADED 2
83 #define IMSM_T_STATE_FAILED 3
84 __u8 raid_level;
85 #define IMSM_T_RAID0 0
86 #define IMSM_T_RAID1 1
87 #define IMSM_T_RAID5 5 /* since metadata version 1.2.02 ? */
88 __u8 num_members; /* number of member disks */
89 __u8 num_domains; /* number of parity domains */
90 __u8 failed_disk_num; /* valid only when state is degraded */
91 __u8 reserved[1];
92 __u32 filler[7]; /* expansion area */
93 #define IMSM_ORD_REBUILD (1 << 24)
94 __u32 disk_ord_tbl[1]; /* disk_ord_tbl[num_members],
95 * top byte contains some flags
96 */
97 } __attribute__ ((packed));
98
99 struct imsm_vol {
100 __u32 curr_migr_unit;
101 __u32 checkpoint_id; /* id to access curr_migr_unit */
102 __u8 migr_state; /* Normal or Migrating */
103 #define MIGR_INIT 0
104 #define MIGR_REBUILD 1
105 #define MIGR_VERIFY 2 /* analagous to echo check > sync_action */
106 #define MIGR_GEN_MIGR 3
107 #define MIGR_STATE_CHANGE 4
108 __u8 migr_type; /* Initializing, Rebuilding, ... */
109 __u8 dirty;
110 __u8 fs_state; /* fast-sync state for CnG (0xff == disabled) */
111 __u16 verify_errors; /* number of mismatches */
112 __u16 bad_blocks; /* number of bad blocks during verify */
113 __u32 filler[4];
114 struct imsm_map map[1];
115 /* here comes another one if migr_state */
116 } __attribute__ ((packed));
117
118 struct imsm_dev {
119 __u8 volume[MAX_RAID_SERIAL_LEN];
120 __u32 size_low;
121 __u32 size_high;
122 #define DEV_BOOTABLE __cpu_to_le32(0x01)
123 #define DEV_BOOT_DEVICE __cpu_to_le32(0x02)
124 #define DEV_READ_COALESCING __cpu_to_le32(0x04)
125 #define DEV_WRITE_COALESCING __cpu_to_le32(0x08)
126 #define DEV_LAST_SHUTDOWN_DIRTY __cpu_to_le32(0x10)
127 #define DEV_HIDDEN_AT_BOOT __cpu_to_le32(0x20)
128 #define DEV_CURRENTLY_HIDDEN __cpu_to_le32(0x40)
129 #define DEV_VERIFY_AND_FIX __cpu_to_le32(0x80)
130 #define DEV_MAP_STATE_UNINIT __cpu_to_le32(0x100)
131 #define DEV_NO_AUTO_RECOVERY __cpu_to_le32(0x200)
132 #define DEV_CLONE_N_GO __cpu_to_le32(0x400)
133 #define DEV_CLONE_MAN_SYNC __cpu_to_le32(0x800)
134 #define DEV_CNG_MASTER_DISK_NUM __cpu_to_le32(0x1000)
135 __u32 status; /* Persistent RaidDev status */
136 __u32 reserved_blocks; /* Reserved blocks at beginning of volume */
137 __u8 migr_priority;
138 __u8 num_sub_vols;
139 __u8 tid;
140 __u8 cng_master_disk;
141 __u16 cache_policy;
142 __u8 cng_state;
143 __u8 cng_sub_state;
144 #define IMSM_DEV_FILLERS 10
145 __u32 filler[IMSM_DEV_FILLERS];
146 struct imsm_vol vol;
147 } __attribute__ ((packed));
148
149 struct imsm_super {
150 __u8 sig[MAX_SIGNATURE_LENGTH]; /* 0x00 - 0x1F */
151 __u32 check_sum; /* 0x20 - 0x23 MPB Checksum */
152 __u32 mpb_size; /* 0x24 - 0x27 Size of MPB */
153 __u32 family_num; /* 0x28 - 0x2B Checksum from first time this config was written */
154 __u32 generation_num; /* 0x2C - 0x2F Incremented each time this array's MPB is written */
155 __u32 error_log_size; /* 0x30 - 0x33 in bytes */
156 __u32 attributes; /* 0x34 - 0x37 */
157 __u8 num_disks; /* 0x38 Number of configured disks */
158 __u8 num_raid_devs; /* 0x39 Number of configured volumes */
159 __u8 error_log_pos; /* 0x3A */
160 __u8 fill[1]; /* 0x3B */
161 __u32 cache_size; /* 0x3c - 0x40 in mb */
162 __u32 orig_family_num; /* 0x40 - 0x43 original family num */
163 __u32 pwr_cycle_count; /* 0x44 - 0x47 simulated power cycle count for array */
164 __u32 bbm_log_size; /* 0x48 - 0x4B - size of bad Block Mgmt Log in bytes */
165 #define IMSM_FILLERS 35
166 __u32 filler[IMSM_FILLERS]; /* 0x4C - 0xD7 RAID_MPB_FILLERS */
167 struct imsm_disk disk[1]; /* 0xD8 diskTbl[numDisks] */
168 /* here comes imsm_dev[num_raid_devs] */
169 /* here comes BBM logs */
170 } __attribute__ ((packed));
171
172 #define BBM_LOG_MAX_ENTRIES 254
173
174 struct bbm_log_entry {
175 __u64 defective_block_start;
176 #define UNREADABLE 0xFFFFFFFF
177 __u32 spare_block_offset;
178 __u16 remapped_marked_count;
179 __u16 disk_ordinal;
180 } __attribute__ ((__packed__));
181
182 struct bbm_log {
183 __u32 signature; /* 0xABADB10C */
184 __u32 entry_count;
185 __u32 reserved_spare_block_count; /* 0 */
186 __u32 reserved; /* 0xFFFF */
187 __u64 first_spare_lba;
188 struct bbm_log_entry mapped_block_entries[BBM_LOG_MAX_ENTRIES];
189 } __attribute__ ((__packed__));
190
191
192 #ifndef MDASSEMBLE
193 static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" };
194 #endif
195
196 static unsigned int sector_count(__u32 bytes)
197 {
198 return ((bytes + (512-1)) & (~(512-1))) / 512;
199 }
200
201 static unsigned int mpb_sectors(struct imsm_super *mpb)
202 {
203 return sector_count(__le32_to_cpu(mpb->mpb_size));
204 }
205
206 /* internal representation of IMSM metadata */
207 struct intel_super {
208 union {
209 void *buf; /* O_DIRECT buffer for reading/writing metadata */
210 struct imsm_super *anchor; /* immovable parameters */
211 };
212 size_t len; /* size of the 'buf' allocation */
213 void *next_buf; /* for realloc'ing buf from the manager */
214 size_t next_len;
215 int updates_pending; /* count of pending updates for mdmon */
216 int creating_imsm; /* flag to indicate container creation */
217 int current_vol; /* index of raid device undergoing creation */
218 __u32 create_offset; /* common start for 'current_vol' */
219 #define IMSM_MAX_RAID_DEVS 2
220 struct imsm_dev *dev_tbl[IMSM_MAX_RAID_DEVS];
221 struct dl {
222 struct dl *next;
223 int index;
224 __u8 serial[MAX_RAID_SERIAL_LEN];
225 int major, minor;
226 char *devname;
227 struct imsm_disk disk;
228 int fd;
229 int extent_cnt;
230 struct extent *e; /* for determining freespace @ create */
231 } *disks;
232 struct dl *add; /* list of disks to add while mdmon active */
233 struct dl *missing; /* disks removed while we weren't looking */
234 struct bbm_log *bbm_log;
235 const char *hba; /* device path of the raid controller for this metadata */
236 const struct imsm_orom *orom; /* platform firmware support */
237 };
238
239 struct extent {
240 unsigned long long start, size;
241 };
242
243 /* definition of messages passed to imsm_process_update */
244 enum imsm_update_type {
245 update_activate_spare,
246 update_create_array,
247 update_add_disk,
248 };
249
250 struct imsm_update_activate_spare {
251 enum imsm_update_type type;
252 struct dl *dl;
253 int slot;
254 int array;
255 struct imsm_update_activate_spare *next;
256 };
257
258 struct disk_info {
259 __u8 serial[MAX_RAID_SERIAL_LEN];
260 };
261
262 struct imsm_update_create_array {
263 enum imsm_update_type type;
264 int dev_idx;
265 struct imsm_dev dev;
266 };
267
268 struct imsm_update_add_disk {
269 enum imsm_update_type type;
270 };
271
272 static struct supertype *match_metadata_desc_imsm(char *arg)
273 {
274 struct supertype *st;
275
276 if (strcmp(arg, "imsm") != 0 &&
277 strcmp(arg, "default") != 0
278 )
279 return NULL;
280
281 st = malloc(sizeof(*st));
282 memset(st, 0, sizeof(*st));
283 st->ss = &super_imsm;
284 st->max_devs = IMSM_MAX_DEVICES;
285 st->minor_version = 0;
286 st->sb = NULL;
287 return st;
288 }
289
290 #ifndef MDASSEMBLE
291 static __u8 *get_imsm_version(struct imsm_super *mpb)
292 {
293 return &mpb->sig[MPB_SIG_LEN];
294 }
295 #endif
296
297 /* retrieve a disk directly from the anchor when the anchor is known to be
298 * up-to-date, currently only at load time
299 */
300 static struct imsm_disk *__get_imsm_disk(struct imsm_super *mpb, __u8 index)
301 {
302 if (index >= mpb->num_disks)
303 return NULL;
304 return &mpb->disk[index];
305 }
306
307 #ifndef MDASSEMBLE
308 /* retrieve a disk from the parsed metadata */
309 static struct imsm_disk *get_imsm_disk(struct intel_super *super, __u8 index)
310 {
311 struct dl *d;
312
313 for (d = super->disks; d; d = d->next)
314 if (d->index == index)
315 return &d->disk;
316
317 return NULL;
318 }
319 #endif
320
321 /* generate a checksum directly from the anchor when the anchor is known to be
322 * up-to-date, currently only at load or write_super after coalescing
323 */
324 static __u32 __gen_imsm_checksum(struct imsm_super *mpb)
325 {
326 __u32 end = mpb->mpb_size / sizeof(end);
327 __u32 *p = (__u32 *) mpb;
328 __u32 sum = 0;
329
330 while (end--) {
331 sum += __le32_to_cpu(*p);
332 p++;
333 }
334
335 return sum - __le32_to_cpu(mpb->check_sum);
336 }
337
338 static size_t sizeof_imsm_map(struct imsm_map *map)
339 {
340 return sizeof(struct imsm_map) + sizeof(__u32) * (map->num_members - 1);
341 }
342
343 struct imsm_map *get_imsm_map(struct imsm_dev *dev, int second_map)
344 {
345 struct imsm_map *map = &dev->vol.map[0];
346
347 if (second_map && !dev->vol.migr_state)
348 return NULL;
349 else if (second_map) {
350 void *ptr = map;
351
352 return ptr + sizeof_imsm_map(map);
353 } else
354 return map;
355
356 }
357
358 /* return the size of the device.
359 * migr_state increases the returned size if map[0] were to be duplicated
360 */
361 static size_t sizeof_imsm_dev(struct imsm_dev *dev, int migr_state)
362 {
363 size_t size = sizeof(*dev) - sizeof(struct imsm_map) +
364 sizeof_imsm_map(get_imsm_map(dev, 0));
365
366 /* migrating means an additional map */
367 if (dev->vol.migr_state)
368 size += sizeof_imsm_map(get_imsm_map(dev, 1));
369 else if (migr_state)
370 size += sizeof_imsm_map(get_imsm_map(dev, 0));
371
372 return size;
373 }
374
375 #ifndef MDASSEMBLE
376 /* retrieve disk serial number list from a metadata update */
377 static struct disk_info *get_disk_info(struct imsm_update_create_array *update)
378 {
379 void *u = update;
380 struct disk_info *inf;
381
382 inf = u + sizeof(*update) - sizeof(struct imsm_dev) +
383 sizeof_imsm_dev(&update->dev, 0);
384
385 return inf;
386 }
387 #endif
388
389 static struct imsm_dev *__get_imsm_dev(struct imsm_super *mpb, __u8 index)
390 {
391 int offset;
392 int i;
393 void *_mpb = mpb;
394
395 if (index >= mpb->num_raid_devs)
396 return NULL;
397
398 /* devices start after all disks */
399 offset = ((void *) &mpb->disk[mpb->num_disks]) - _mpb;
400
401 for (i = 0; i <= index; i++)
402 if (i == index)
403 return _mpb + offset;
404 else
405 offset += sizeof_imsm_dev(_mpb + offset, 0);
406
407 return NULL;
408 }
409
410 static struct imsm_dev *get_imsm_dev(struct intel_super *super, __u8 index)
411 {
412 if (index >= super->anchor->num_raid_devs)
413 return NULL;
414 return super->dev_tbl[index];
415 }
416
417 static __u32 get_imsm_ord_tbl_ent(struct imsm_dev *dev, int slot)
418 {
419 struct imsm_map *map;
420
421 if (dev->vol.migr_state)
422 map = get_imsm_map(dev, 1);
423 else
424 map = get_imsm_map(dev, 0);
425
426 /* top byte identifies disk under rebuild */
427 return __le32_to_cpu(map->disk_ord_tbl[slot]);
428 }
429
430 #define ord_to_idx(ord) (((ord) << 8) >> 8)
431 static __u32 get_imsm_disk_idx(struct imsm_dev *dev, int slot)
432 {
433 __u32 ord = get_imsm_ord_tbl_ent(dev, slot);
434
435 return ord_to_idx(ord);
436 }
437
438 static void set_imsm_ord_tbl_ent(struct imsm_map *map, int slot, __u32 ord)
439 {
440 map->disk_ord_tbl[slot] = __cpu_to_le32(ord);
441 }
442
443 static int get_imsm_raid_level(struct imsm_map *map)
444 {
445 if (map->raid_level == 1) {
446 if (map->num_members == 2)
447 return 1;
448 else
449 return 10;
450 }
451
452 return map->raid_level;
453 }
454
455 static int cmp_extent(const void *av, const void *bv)
456 {
457 const struct extent *a = av;
458 const struct extent *b = bv;
459 if (a->start < b->start)
460 return -1;
461 if (a->start > b->start)
462 return 1;
463 return 0;
464 }
465
466 static int count_memberships(struct dl *dl, struct intel_super *super)
467 {
468 int memberships = 0;
469 int i, j;
470
471 for (i = 0; i < super->anchor->num_raid_devs; i++) {
472 struct imsm_dev *dev = get_imsm_dev(super, i);
473 struct imsm_map *map = get_imsm_map(dev, 0);
474
475 for (j = 0; j < map->num_members; j++) {
476 __u32 index = get_imsm_disk_idx(dev, j);
477
478 if (index == dl->index)
479 memberships++;
480 }
481 }
482
483 return memberships;
484 }
485
486 static struct extent *get_extents(struct intel_super *super, struct dl *dl)
487 {
488 /* find a list of used extents on the given physical device */
489 struct extent *rv, *e;
490 int i, j;
491 int memberships = count_memberships(dl, super);
492 __u32 reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
493
494 rv = malloc(sizeof(struct extent) * (memberships + 1));
495 if (!rv)
496 return NULL;
497 e = rv;
498
499 for (i = 0; i < super->anchor->num_raid_devs; i++) {
500 struct imsm_dev *dev = get_imsm_dev(super, i);
501 struct imsm_map *map = get_imsm_map(dev, 0);
502
503 for (j = 0; j < map->num_members; j++) {
504 __u32 index = get_imsm_disk_idx(dev, j);
505
506 if (index == dl->index) {
507 e->start = __le32_to_cpu(map->pba_of_lba0);
508 e->size = __le32_to_cpu(map->blocks_per_member);
509 e++;
510 }
511 }
512 }
513 qsort(rv, memberships, sizeof(*rv), cmp_extent);
514
515 /* determine the start of the metadata
516 * when no raid devices are defined use the default
517 * ...otherwise allow the metadata to truncate the value
518 * as is the case with older versions of imsm
519 */
520 if (memberships) {
521 struct extent *last = &rv[memberships - 1];
522 __u32 remainder;
523
524 remainder = __le32_to_cpu(dl->disk.total_blocks) -
525 (last->start + last->size);
526 /* round down to 1k block to satisfy precision of the kernel
527 * 'size' interface
528 */
529 remainder &= ~1UL;
530 /* make sure remainder is still sane */
531 if (remainder < ROUND_UP(super->len, 512) >> 9)
532 remainder = ROUND_UP(super->len, 512) >> 9;
533 if (reservation > remainder)
534 reservation = remainder;
535 }
536 e->start = __le32_to_cpu(dl->disk.total_blocks) - reservation;
537 e->size = 0;
538 return rv;
539 }
540
541 /* try to determine how much space is reserved for metadata from
542 * the last get_extents() entry, otherwise fallback to the
543 * default
544 */
545 static __u32 imsm_reserved_sectors(struct intel_super *super, struct dl *dl)
546 {
547 struct extent *e;
548 int i;
549 __u32 rv;
550
551 /* for spares just return a minimal reservation which will grow
552 * once the spare is picked up by an array
553 */
554 if (dl->index == -1)
555 return MPB_SECTOR_CNT;
556
557 e = get_extents(super, dl);
558 if (!e)
559 return MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
560
561 /* scroll to last entry */
562 for (i = 0; e[i].size; i++)
563 continue;
564
565 rv = __le32_to_cpu(dl->disk.total_blocks) - e[i].start;
566
567 free(e);
568
569 return rv;
570 }
571
572 #ifndef MDASSEMBLE
573 static void print_imsm_dev(struct imsm_dev *dev, char *uuid, int disk_idx)
574 {
575 __u64 sz;
576 int slot;
577 struct imsm_map *map = get_imsm_map(dev, 0);
578 __u32 ord;
579
580 printf("\n");
581 printf("[%.16s]:\n", dev->volume);
582 printf(" UUID : %s\n", uuid);
583 printf(" RAID Level : %d\n", get_imsm_raid_level(map));
584 printf(" Members : %d\n", map->num_members);
585 for (slot = 0; slot < map->num_members; slot++)
586 if (disk_idx== get_imsm_disk_idx(dev, slot))
587 break;
588 if (slot < map->num_members) {
589 ord = get_imsm_ord_tbl_ent(dev, slot);
590 printf(" This Slot : %d%s\n", slot,
591 ord & IMSM_ORD_REBUILD ? " (out-of-sync)" : "");
592 } else
593 printf(" This Slot : ?\n");
594 sz = __le32_to_cpu(dev->size_high);
595 sz <<= 32;
596 sz += __le32_to_cpu(dev->size_low);
597 printf(" Array Size : %llu%s\n", (unsigned long long)sz,
598 human_size(sz * 512));
599 sz = __le32_to_cpu(map->blocks_per_member);
600 printf(" Per Dev Size : %llu%s\n", (unsigned long long)sz,
601 human_size(sz * 512));
602 printf(" Sector Offset : %u\n",
603 __le32_to_cpu(map->pba_of_lba0));
604 printf(" Num Stripes : %u\n",
605 __le32_to_cpu(map->num_data_stripes));
606 printf(" Chunk Size : %u KiB\n",
607 __le16_to_cpu(map->blocks_per_strip) / 2);
608 printf(" Reserved : %d\n", __le32_to_cpu(dev->reserved_blocks));
609 printf(" Migrate State : %s", dev->vol.migr_state ? "migrating" : "idle");
610 if (dev->vol.migr_state)
611 printf(": %s", dev->vol.migr_type ? "rebuilding" : "initializing");
612 printf("\n");
613 printf(" Map State : %s", map_state_str[map->map_state]);
614 if (dev->vol.migr_state) {
615 struct imsm_map *map = get_imsm_map(dev, 1);
616 printf(" <-- %s", map_state_str[map->map_state]);
617 }
618 printf("\n");
619 printf(" Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean");
620 }
621
622 static void print_imsm_disk(struct imsm_super *mpb, int index, __u32 reserved)
623 {
624 struct imsm_disk *disk = __get_imsm_disk(mpb, index);
625 char str[MAX_RAID_SERIAL_LEN + 1];
626 __u32 s;
627 __u64 sz;
628
629 if (index < 0)
630 return;
631
632 printf("\n");
633 snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial);
634 printf(" Disk%02d Serial : %s\n", index, str);
635 s = disk->status;
636 printf(" State :%s%s%s%s\n", s&SPARE_DISK ? " spare" : "",
637 s&CONFIGURED_DISK ? " active" : "",
638 s&FAILED_DISK ? " failed" : "",
639 s&USABLE_DISK ? " usable" : "");
640 printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id));
641 sz = __le32_to_cpu(disk->total_blocks) - reserved;
642 printf(" Usable Size : %llu%s\n", (unsigned long long)sz,
643 human_size(sz * 512));
644 }
645
646 static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info);
647
648 static void examine_super_imsm(struct supertype *st, char *homehost)
649 {
650 struct intel_super *super = st->sb;
651 struct imsm_super *mpb = super->anchor;
652 char str[MAX_SIGNATURE_LENGTH];
653 int i;
654 struct mdinfo info;
655 char nbuf[64];
656 __u32 sum;
657 __u32 reserved = imsm_reserved_sectors(super, super->disks);
658
659
660 snprintf(str, MPB_SIG_LEN, "%s", mpb->sig);
661 printf(" Magic : %s\n", str);
662 snprintf(str, strlen(MPB_VERSION_RAID0), "%s", get_imsm_version(mpb));
663 printf(" Version : %s\n", get_imsm_version(mpb));
664 printf(" Family : %08x\n", __le32_to_cpu(mpb->family_num));
665 printf(" Generation : %08x\n", __le32_to_cpu(mpb->generation_num));
666 getinfo_super_imsm(st, &info);
667 fname_from_uuid(st, &info, nbuf,'-');
668 printf(" UUID : %s\n", nbuf + 5);
669 sum = __le32_to_cpu(mpb->check_sum);
670 printf(" Checksum : %08x %s\n", sum,
671 __gen_imsm_checksum(mpb) == sum ? "correct" : "incorrect");
672 printf(" MPB Sectors : %d\n", mpb_sectors(mpb));
673 printf(" Disks : %d\n", mpb->num_disks);
674 printf(" RAID Devices : %d\n", mpb->num_raid_devs);
675 print_imsm_disk(mpb, super->disks->index, reserved);
676 if (super->bbm_log) {
677 struct bbm_log *log = super->bbm_log;
678
679 printf("\n");
680 printf("Bad Block Management Log:\n");
681 printf(" Log Size : %d\n", __le32_to_cpu(mpb->bbm_log_size));
682 printf(" Signature : %x\n", __le32_to_cpu(log->signature));
683 printf(" Entry Count : %d\n", __le32_to_cpu(log->entry_count));
684 printf(" Spare Blocks : %d\n", __le32_to_cpu(log->reserved_spare_block_count));
685 printf(" First Spare : %llx\n", __le64_to_cpu(log->first_spare_lba));
686 }
687 for (i = 0; i < mpb->num_raid_devs; i++) {
688 struct mdinfo info;
689 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
690
691 super->current_vol = i;
692 getinfo_super_imsm(st, &info);
693 fname_from_uuid(st, &info, nbuf, '-');
694 print_imsm_dev(dev, nbuf + 5, super->disks->index);
695 }
696 for (i = 0; i < mpb->num_disks; i++) {
697 if (i == super->disks->index)
698 continue;
699 print_imsm_disk(mpb, i, reserved);
700 }
701 }
702
703 static void brief_examine_super_imsm(struct supertype *st)
704 {
705 /* We just write a generic IMSM ARRAY entry */
706 struct mdinfo info;
707 char nbuf[64];
708 char nbuf1[64];
709 struct intel_super *super = st->sb;
710 int i;
711
712 if (!super->anchor->num_raid_devs)
713 return;
714
715 getinfo_super_imsm(st, &info);
716 fname_from_uuid(st, &info, nbuf,'-');
717 printf("ARRAY metadata=imsm auto=md UUID=%s\n", nbuf + 5);
718 for (i = 0; i < super->anchor->num_raid_devs; i++) {
719 struct imsm_dev *dev = get_imsm_dev(super, i);
720
721 super->current_vol = i;
722 getinfo_super_imsm(st, &info);
723 fname_from_uuid(st, &info, nbuf1,'-');
724 printf("ARRAY /dev/md/%.16s container=%s\n"
725 " member=%d auto=mdp UUID=%s\n",
726 dev->volume, nbuf + 5, i, nbuf1 + 5);
727 }
728 }
729
730 static void detail_super_imsm(struct supertype *st, char *homehost)
731 {
732 struct mdinfo info;
733 char nbuf[64];
734
735 getinfo_super_imsm(st, &info);
736 fname_from_uuid(st, &info, nbuf,'-');
737 printf("\n UUID : %s\n", nbuf + 5);
738 }
739
740 static void brief_detail_super_imsm(struct supertype *st)
741 {
742 struct mdinfo info;
743 char nbuf[64];
744 getinfo_super_imsm(st, &info);
745 fname_from_uuid(st, &info, nbuf,'-');
746 printf(" UUID=%s", nbuf + 5);
747 }
748
749 static int imsm_read_serial(int fd, char *devname, __u8 *serial);
750 static void fd2devname(int fd, char *name);
751
752 static int imsm_enumerate_ports(const char *hba_path, int port_count, int host_base, int verbose)
753 {
754 /* dump an unsorted list of devices attached to ahci, as well as
755 * non-connected ports
756 */
757 int hba_len = strlen(hba_path) + 1;
758 struct dirent *ent;
759 DIR *dir;
760 char *path = NULL;
761 int err = 0;
762 unsigned long port_mask = (1 << port_count) - 1;
763
764 if (port_count > sizeof(port_mask) * 8) {
765 if (verbose)
766 fprintf(stderr, Name ": port_count %d out of range\n", port_count);
767 return 2;
768 }
769
770 /* scroll through /sys/dev/block looking for devices attached to
771 * this hba
772 */
773 dir = opendir("/sys/dev/block");
774 for (ent = dir ? readdir(dir) : NULL; ent; ent = readdir(dir)) {
775 int fd;
776 char model[64];
777 char vendor[64];
778 char buf[1024];
779 int major, minor;
780 char *device;
781 char *c;
782 int port;
783 int type;
784
785 if (sscanf(ent->d_name, "%d:%d", &major, &minor) != 2)
786 continue;
787 path = devt_to_devpath(makedev(major, minor));
788 if (!path)
789 continue;
790 if (!path_attached_to_hba(path, hba_path)) {
791 free(path);
792 path = NULL;
793 continue;
794 }
795
796 /* retrieve the scsi device type */
797 if (asprintf(&device, "/sys/dev/block/%d:%d/device/xxxxxxx", major, minor) < 0) {
798 if (verbose)
799 fprintf(stderr, Name ": failed to allocate 'device'\n");
800 err = 2;
801 break;
802 }
803 sprintf(device, "/sys/dev/block/%d:%d/device/type", major, minor);
804 if (load_sys(device, buf) != 0) {
805 if (verbose)
806 fprintf(stderr, Name ": failed to read device type for %s\n",
807 path);
808 err = 2;
809 free(device);
810 break;
811 }
812 type = strtoul(buf, NULL, 10);
813
814 /* if it's not a disk print the vendor and model */
815 if (!(type == 0 || type == 7 || type == 14)) {
816 vendor[0] = '\0';
817 model[0] = '\0';
818 sprintf(device, "/sys/dev/block/%d:%d/device/vendor", major, minor);
819 if (load_sys(device, buf) == 0) {
820 strncpy(vendor, buf, sizeof(vendor));
821 vendor[sizeof(vendor) - 1] = '\0';
822 c = (char *) &vendor[sizeof(vendor) - 1];
823 while (isspace(*c) || *c == '\0')
824 *c-- = '\0';
825
826 }
827 sprintf(device, "/sys/dev/block/%d:%d/device/model", major, minor);
828 if (load_sys(device, buf) == 0) {
829 strncpy(model, buf, sizeof(model));
830 model[sizeof(model) - 1] = '\0';
831 c = (char *) &model[sizeof(model) - 1];
832 while (isspace(*c) || *c == '\0')
833 *c-- = '\0';
834 }
835
836 if (vendor[0] && model[0])
837 sprintf(buf, "%.64s %.64s", vendor, model);
838 else
839 switch (type) { /* numbers from hald/linux/device.c */
840 case 1: sprintf(buf, "tape"); break;
841 case 2: sprintf(buf, "printer"); break;
842 case 3: sprintf(buf, "processor"); break;
843 case 4:
844 case 5: sprintf(buf, "cdrom"); break;
845 case 6: sprintf(buf, "scanner"); break;
846 case 8: sprintf(buf, "media_changer"); break;
847 case 9: sprintf(buf, "comm"); break;
848 case 12: sprintf(buf, "raid"); break;
849 default: sprintf(buf, "unknown");
850 }
851 } else
852 buf[0] = '\0';
853 free(device);
854
855 /* chop device path to 'host%d' and calculate the port number */
856 c = strchr(&path[hba_len], '/');
857 *c = '\0';
858 if (sscanf(&path[hba_len], "host%d", &port) == 1)
859 port -= host_base;
860 else {
861 if (verbose) {
862 *c = '/'; /* repair the full string */
863 fprintf(stderr, Name ": failed to determine port number for %s\n",
864 path);
865 }
866 err = 2;
867 break;
868 }
869
870 /* mark this port as used */
871 port_mask &= ~(1 << port);
872
873 /* print out the device information */
874 if (buf[0]) {
875 printf(" Port%d : - non-disk device (%s) -\n", port, buf);
876 continue;
877 }
878
879 fd = dev_open(ent->d_name, O_RDONLY);
880 if (fd < 0)
881 printf(" Port%d : - disk info unavailable -\n", port);
882 else {
883 fd2devname(fd, buf);
884 printf(" Port%d : %s", port, buf);
885 if (imsm_read_serial(fd, NULL, (__u8 *) buf) == 0)
886 printf(" (%s)\n", buf);
887 else
888 printf("()\n");
889 }
890 close(fd);
891 free(path);
892 path = NULL;
893 }
894 if (path)
895 free(path);
896 if (dir)
897 closedir(dir);
898 if (err == 0) {
899 int i;
900
901 for (i = 0; i < port_count; i++)
902 if (port_mask & (1 << i))
903 printf(" Port%d : - no device attached -\n", i);
904 }
905
906 return err;
907 }
908
909 static int detail_platform_imsm(int verbose)
910 {
911 /* There are two components to imsm platform support, the ahci SATA
912 * controller and the option-rom. To find the SATA controller we
913 * simply look in /sys/bus/pci/drivers/ahci to see if an ahci
914 * controller with the Intel vendor id is present. This approach
915 * allows mdadm to leverage the kernel's ahci detection logic, with the
916 * caveat that if ahci.ko is not loaded mdadm will not be able to
917 * detect platform raid capabilities. The option-rom resides in a
918 * platform "Adapter ROM". We scan for its signature to retrieve the
919 * platform capabilities. If raid support is disabled in the BIOS the
920 * option-rom capability structure will not be available.
921 */
922 const struct imsm_orom *orom;
923 struct sys_dev *list, *hba;
924 DIR *dir;
925 struct dirent *ent;
926 const char *hba_path;
927 int host_base = 0;
928 int port_count = 0;
929
930 list = find_driver_devices("pci", "ahci");
931 for (hba = list; hba; hba = hba->next)
932 if (devpath_to_vendor(hba->path) == 0x8086)
933 break;
934
935 if (!hba) {
936 if (verbose)
937 fprintf(stderr, Name ": unable to find active ahci controller\n");
938 free_sys_dev(&list);
939 return 2;
940 } else if (verbose)
941 fprintf(stderr, Name ": found Intel SATA AHCI Controller\n");
942 hba_path = hba->path;
943 hba->path = NULL;
944 free_sys_dev(&list);
945
946 orom = find_imsm_orom();
947 if (!orom) {
948 if (verbose)
949 fprintf(stderr, Name ": imsm option-rom not found\n");
950 return 2;
951 }
952
953 printf(" Platform : Intel(R) Matrix Storage Manager\n");
954 printf(" Version : %d.%d.%d.%d\n", orom->major_ver, orom->minor_ver,
955 orom->hotfix_ver, orom->build);
956 printf(" RAID Levels :%s%s%s%s%s\n",
957 imsm_orom_has_raid0(orom) ? " raid0" : "",
958 imsm_orom_has_raid1(orom) ? " raid1" : "",
959 imsm_orom_has_raid1e(orom) ? " raid1e" : "",
960 imsm_orom_has_raid10(orom) ? " raid10" : "",
961 imsm_orom_has_raid5(orom) ? " raid5" : "");
962 printf(" Max Disks : %d\n", orom->tds);
963 printf(" Max Volumes : %d\n", orom->vpa);
964 printf(" I/O Controller : %s\n", hba_path);
965
966 /* find the smallest scsi host number to determine a port number base */
967 dir = opendir(hba_path);
968 for (ent = dir ? readdir(dir) : NULL; ent; ent = readdir(dir)) {
969 int host;
970
971 if (sscanf(ent->d_name, "host%d", &host) != 1)
972 continue;
973 if (port_count == 0)
974 host_base = host;
975 else if (host < host_base)
976 host_base = host;
977
978 if (host + 1 > port_count + host_base)
979 port_count = host + 1 - host_base;
980
981 }
982 if (dir)
983 closedir(dir);
984
985 if (!port_count || imsm_enumerate_ports(hba_path, port_count,
986 host_base, verbose) != 0) {
987 if (verbose)
988 fprintf(stderr, Name ": failed to enumerate ports\n");
989 return 2;
990 }
991
992 return 0;
993 }
994 #endif
995
996 static int match_home_imsm(struct supertype *st, char *homehost)
997 {
998 /* the imsm metadata format does not specify any host
999 * identification information. We return -1 since we can never
1000 * confirm nor deny whether a given array is "meant" for this
1001 * host. We rely on compare_super and the 'family_num' field to
1002 * exclude member disks that do not belong, and we rely on
1003 * mdadm.conf to specify the arrays that should be assembled.
1004 * Auto-assembly may still pick up "foreign" arrays.
1005 */
1006
1007 return -1;
1008 }
1009
1010 static void uuid_from_super_imsm(struct supertype *st, int uuid[4])
1011 {
1012 /* The uuid returned here is used for:
1013 * uuid to put into bitmap file (Create, Grow)
1014 * uuid for backup header when saving critical section (Grow)
1015 * comparing uuids when re-adding a device into an array
1016 * In these cases the uuid required is that of the data-array,
1017 * not the device-set.
1018 * uuid to recognise same set when adding a missing device back
1019 * to an array. This is a uuid for the device-set.
1020 *
1021 * For each of these we can make do with a truncated
1022 * or hashed uuid rather than the original, as long as
1023 * everyone agrees.
1024 * In each case the uuid required is that of the data-array,
1025 * not the device-set.
1026 */
1027 /* imsm does not track uuid's so we synthesis one using sha1 on
1028 * - The signature (Which is constant for all imsm array, but no matter)
1029 * - the family_num of the container
1030 * - the index number of the volume
1031 * - the 'serial' number of the volume.
1032 * Hopefully these are all constant.
1033 */
1034 struct intel_super *super = st->sb;
1035
1036 char buf[20];
1037 struct sha1_ctx ctx;
1038 struct imsm_dev *dev = NULL;
1039
1040 sha1_init_ctx(&ctx);
1041 sha1_process_bytes(super->anchor->sig, MPB_SIG_LEN, &ctx);
1042 sha1_process_bytes(&super->anchor->family_num, sizeof(__u32), &ctx);
1043 if (super->current_vol >= 0)
1044 dev = get_imsm_dev(super, super->current_vol);
1045 if (dev) {
1046 __u32 vol = super->current_vol;
1047 sha1_process_bytes(&vol, sizeof(vol), &ctx);
1048 sha1_process_bytes(dev->volume, MAX_RAID_SERIAL_LEN, &ctx);
1049 }
1050 sha1_finish_ctx(&ctx, buf);
1051 memcpy(uuid, buf, 4*4);
1052 }
1053
1054 #if 0
1055 static void
1056 get_imsm_numerical_version(struct imsm_super *mpb, int *m, int *p)
1057 {
1058 __u8 *v = get_imsm_version(mpb);
1059 __u8 *end = mpb->sig + MAX_SIGNATURE_LENGTH;
1060 char major[] = { 0, 0, 0 };
1061 char minor[] = { 0 ,0, 0 };
1062 char patch[] = { 0, 0, 0 };
1063 char *ver_parse[] = { major, minor, patch };
1064 int i, j;
1065
1066 i = j = 0;
1067 while (*v != '\0' && v < end) {
1068 if (*v != '.' && j < 2)
1069 ver_parse[i][j++] = *v;
1070 else {
1071 i++;
1072 j = 0;
1073 }
1074 v++;
1075 }
1076
1077 *m = strtol(minor, NULL, 0);
1078 *p = strtol(patch, NULL, 0);
1079 }
1080 #endif
1081
1082 static int imsm_level_to_layout(int level)
1083 {
1084 switch (level) {
1085 case 0:
1086 case 1:
1087 return 0;
1088 case 5:
1089 case 6:
1090 return ALGORITHM_LEFT_ASYMMETRIC;
1091 case 10:
1092 return 0x102;
1093 }
1094 return -1;
1095 }
1096
1097 static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info)
1098 {
1099 struct intel_super *super = st->sb;
1100 struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
1101 struct imsm_map *map = get_imsm_map(dev, 0);
1102
1103 info->container_member = super->current_vol;
1104 info->array.raid_disks = map->num_members;
1105 info->array.level = get_imsm_raid_level(map);
1106 info->array.layout = imsm_level_to_layout(info->array.level);
1107 info->array.md_minor = -1;
1108 info->array.ctime = 0;
1109 info->array.utime = 0;
1110 info->array.chunk_size = __le16_to_cpu(map->blocks_per_strip) << 9;
1111 info->array.state = !dev->vol.dirty;
1112
1113 info->disk.major = 0;
1114 info->disk.minor = 0;
1115
1116 info->data_offset = __le32_to_cpu(map->pba_of_lba0);
1117 info->component_size = __le32_to_cpu(map->blocks_per_member);
1118 memset(info->uuid, 0, sizeof(info->uuid));
1119
1120 if (map->map_state == IMSM_T_STATE_UNINITIALIZED || dev->vol.dirty)
1121 info->resync_start = 0;
1122 else if (dev->vol.migr_state)
1123 info->resync_start = __le32_to_cpu(dev->vol.curr_migr_unit);
1124 else
1125 info->resync_start = ~0ULL;
1126
1127 strncpy(info->name, (char *) dev->volume, MAX_RAID_SERIAL_LEN);
1128 info->name[MAX_RAID_SERIAL_LEN] = 0;
1129
1130 info->array.major_version = -1;
1131 info->array.minor_version = -2;
1132 sprintf(info->text_version, "/%s/%d",
1133 devnum2devname(st->container_dev),
1134 info->container_member);
1135 info->safe_mode_delay = 4000; /* 4 secs like the Matrix driver */
1136 uuid_from_super_imsm(st, info->uuid);
1137 }
1138
1139
1140 static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info)
1141 {
1142 struct intel_super *super = st->sb;
1143 struct imsm_disk *disk;
1144 __u32 s;
1145
1146 if (super->current_vol >= 0) {
1147 getinfo_super_imsm_volume(st, info);
1148 return;
1149 }
1150
1151 /* Set raid_disks to zero so that Assemble will always pull in valid
1152 * spares
1153 */
1154 info->array.raid_disks = 0;
1155 info->array.level = LEVEL_CONTAINER;
1156 info->array.layout = 0;
1157 info->array.md_minor = -1;
1158 info->array.ctime = 0; /* N/A for imsm */
1159 info->array.utime = 0;
1160 info->array.chunk_size = 0;
1161
1162 info->disk.major = 0;
1163 info->disk.minor = 0;
1164 info->disk.raid_disk = -1;
1165 info->reshape_active = 0;
1166 info->array.major_version = -1;
1167 info->array.minor_version = -2;
1168 strcpy(info->text_version, "imsm");
1169 info->safe_mode_delay = 0;
1170 info->disk.number = -1;
1171 info->disk.state = 0;
1172 info->name[0] = 0;
1173
1174 if (super->disks) {
1175 __u32 reserved = imsm_reserved_sectors(super, super->disks);
1176
1177 disk = &super->disks->disk;
1178 info->data_offset = __le32_to_cpu(disk->total_blocks) - reserved;
1179 info->component_size = reserved;
1180 s = disk->status;
1181 info->disk.state = s & CONFIGURED_DISK ? (1 << MD_DISK_ACTIVE) : 0;
1182 info->disk.state |= s & FAILED_DISK ? (1 << MD_DISK_FAULTY) : 0;
1183 info->disk.state |= s & SPARE_DISK ? 0 : (1 << MD_DISK_SYNC);
1184 }
1185
1186 /* only call uuid_from_super_imsm when this disk is part of a populated container,
1187 * ->compare_super may have updated the 'num_raid_devs' field for spares
1188 */
1189 if (info->disk.state & (1 << MD_DISK_SYNC) || super->anchor->num_raid_devs)
1190 uuid_from_super_imsm(st, info->uuid);
1191 else
1192 memcpy(info->uuid, uuid_match_any, sizeof(int[4]));
1193 }
1194
1195 static int update_super_imsm(struct supertype *st, struct mdinfo *info,
1196 char *update, char *devname, int verbose,
1197 int uuid_set, char *homehost)
1198 {
1199 /* FIXME */
1200
1201 /* For 'assemble' and 'force' we need to return non-zero if any
1202 * change was made. For others, the return value is ignored.
1203 * Update options are:
1204 * force-one : This device looks a bit old but needs to be included,
1205 * update age info appropriately.
1206 * assemble: clear any 'faulty' flag to allow this device to
1207 * be assembled.
1208 * force-array: Array is degraded but being forced, mark it clean
1209 * if that will be needed to assemble it.
1210 *
1211 * newdev: not used ????
1212 * grow: Array has gained a new device - this is currently for
1213 * linear only
1214 * resync: mark as dirty so a resync will happen.
1215 * name: update the name - preserving the homehost
1216 *
1217 * Following are not relevant for this imsm:
1218 * sparc2.2 : update from old dodgey metadata
1219 * super-minor: change the preferred_minor number
1220 * summaries: update redundant counters.
1221 * uuid: Change the uuid of the array to match watch is given
1222 * homehost: update the recorded homehost
1223 * _reshape_progress: record new reshape_progress position.
1224 */
1225 int rv = 0;
1226 //struct intel_super *super = st->sb;
1227 //struct imsm_super *mpb = super->mpb;
1228
1229 if (strcmp(update, "grow") == 0) {
1230 }
1231 if (strcmp(update, "resync") == 0) {
1232 /* dev->vol.dirty = 1; */
1233 }
1234
1235 /* IMSM has no concept of UUID or homehost */
1236
1237 return rv;
1238 }
1239
1240 static size_t disks_to_mpb_size(int disks)
1241 {
1242 size_t size;
1243
1244 size = sizeof(struct imsm_super);
1245 size += (disks - 1) * sizeof(struct imsm_disk);
1246 size += 2 * sizeof(struct imsm_dev);
1247 /* up to 2 maps per raid device (-2 for imsm_maps in imsm_dev */
1248 size += (4 - 2) * sizeof(struct imsm_map);
1249 /* 4 possible disk_ord_tbl's */
1250 size += 4 * (disks - 1) * sizeof(__u32);
1251
1252 return size;
1253 }
1254
1255 static __u64 avail_size_imsm(struct supertype *st, __u64 devsize)
1256 {
1257 if (devsize < (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS))
1258 return 0;
1259
1260 return devsize - (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS);
1261 }
1262
1263 static int compare_super_imsm(struct supertype *st, struct supertype *tst)
1264 {
1265 /*
1266 * return:
1267 * 0 same, or first was empty, and second was copied
1268 * 1 second had wrong number
1269 * 2 wrong uuid
1270 * 3 wrong other info
1271 */
1272 struct intel_super *first = st->sb;
1273 struct intel_super *sec = tst->sb;
1274
1275 if (!first) {
1276 st->sb = tst->sb;
1277 tst->sb = NULL;
1278 return 0;
1279 }
1280
1281 if (memcmp(first->anchor->sig, sec->anchor->sig, MAX_SIGNATURE_LENGTH) != 0)
1282 return 3;
1283
1284 /* if an anchor does not have num_raid_devs set then it is a free
1285 * floating spare
1286 */
1287 if (first->anchor->num_raid_devs > 0 &&
1288 sec->anchor->num_raid_devs > 0) {
1289 if (first->anchor->family_num != sec->anchor->family_num)
1290 return 3;
1291 }
1292
1293 /* if 'first' is a spare promote it to a populated mpb with sec's
1294 * family number
1295 */
1296 if (first->anchor->num_raid_devs == 0 &&
1297 sec->anchor->num_raid_devs > 0) {
1298 int i;
1299
1300 /* we need to copy raid device info from sec if an allocation
1301 * fails here we don't associate the spare
1302 */
1303 for (i = 0; i < sec->anchor->num_raid_devs; i++) {
1304 first->dev_tbl[i] = malloc(sizeof(struct imsm_dev));
1305 if (!first->dev_tbl) {
1306 while (--i >= 0) {
1307 free(first->dev_tbl[i]);
1308 first->dev_tbl[i] = NULL;
1309 }
1310 fprintf(stderr, "imsm: failed to associate spare\n");
1311 return 3;
1312 }
1313 *first->dev_tbl[i] = *sec->dev_tbl[i];
1314 }
1315
1316 first->anchor->num_raid_devs = sec->anchor->num_raid_devs;
1317 first->anchor->family_num = sec->anchor->family_num;
1318 }
1319
1320 return 0;
1321 }
1322
1323 static void fd2devname(int fd, char *name)
1324 {
1325 struct stat st;
1326 char path[256];
1327 char dname[100];
1328 char *nm;
1329 int rv;
1330
1331 name[0] = '\0';
1332 if (fstat(fd, &st) != 0)
1333 return;
1334 sprintf(path, "/sys/dev/block/%d:%d",
1335 major(st.st_rdev), minor(st.st_rdev));
1336
1337 rv = readlink(path, dname, sizeof(dname));
1338 if (rv <= 0)
1339 return;
1340
1341 dname[rv] = '\0';
1342 nm = strrchr(dname, '/');
1343 nm++;
1344 snprintf(name, MAX_RAID_SERIAL_LEN, "/dev/%s", nm);
1345 }
1346
1347
1348 extern int scsi_get_serial(int fd, void *buf, size_t buf_len);
1349
1350 static int imsm_read_serial(int fd, char *devname,
1351 __u8 serial[MAX_RAID_SERIAL_LEN])
1352 {
1353 unsigned char scsi_serial[255];
1354 int rv;
1355 int rsp_len;
1356 int len;
1357 char *c, *rsp_buf;
1358
1359 memset(scsi_serial, 0, sizeof(scsi_serial));
1360
1361 rv = scsi_get_serial(fd, scsi_serial, sizeof(scsi_serial));
1362
1363 if (rv && check_env("IMSM_DEVNAME_AS_SERIAL")) {
1364 memset(serial, 0, MAX_RAID_SERIAL_LEN);
1365 fd2devname(fd, (char *) serial);
1366 return 0;
1367 }
1368
1369 if (rv != 0) {
1370 if (devname)
1371 fprintf(stderr,
1372 Name ": Failed to retrieve serial for %s\n",
1373 devname);
1374 return rv;
1375 }
1376
1377 /* trim leading whitespace */
1378 rsp_len = scsi_serial[3];
1379 rsp_buf = (char *) &scsi_serial[4];
1380 c = rsp_buf;
1381 while (isspace(*c))
1382 c++;
1383
1384 /* truncate len to the end of rsp_buf if necessary */
1385 if (c + MAX_RAID_SERIAL_LEN > rsp_buf + rsp_len)
1386 len = rsp_len - (c - rsp_buf);
1387 else
1388 len = MAX_RAID_SERIAL_LEN;
1389
1390 /* initialize the buffer and copy rsp_buf characters */
1391 memset(serial, 0, MAX_RAID_SERIAL_LEN);
1392 memcpy(serial, c, len);
1393
1394 /* trim trailing whitespace starting with the last character copied */
1395 c = (char *) &serial[len - 1];
1396 while (isspace(*c) || *c == '\0')
1397 *c-- = '\0';
1398
1399 return 0;
1400 }
1401
1402 static int serialcmp(__u8 *s1, __u8 *s2)
1403 {
1404 return strncmp((char *) s1, (char *) s2, MAX_RAID_SERIAL_LEN);
1405 }
1406
1407 static void serialcpy(__u8 *dest, __u8 *src)
1408 {
1409 strncpy((char *) dest, (char *) src, MAX_RAID_SERIAL_LEN);
1410 }
1411
1412 static struct dl *serial_to_dl(__u8 *serial, struct intel_super *super)
1413 {
1414 struct dl *dl;
1415
1416 for (dl = super->disks; dl; dl = dl->next)
1417 if (serialcmp(dl->serial, serial) == 0)
1418 break;
1419
1420 return dl;
1421 }
1422
1423 static int
1424 load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd)
1425 {
1426 struct dl *dl;
1427 struct stat stb;
1428 int rv;
1429 int i;
1430 int alloc = 1;
1431 __u8 serial[MAX_RAID_SERIAL_LEN];
1432
1433 rv = imsm_read_serial(fd, devname, serial);
1434
1435 if (rv != 0)
1436 return 2;
1437
1438 /* check if this is a disk we have seen before. it may be a spare in
1439 * super->disks while the current anchor believes it is a raid member,
1440 * check if we need to update dl->index
1441 */
1442 dl = serial_to_dl(serial, super);
1443 if (!dl)
1444 dl = malloc(sizeof(*dl));
1445 else
1446 alloc = 0;
1447
1448 if (!dl) {
1449 if (devname)
1450 fprintf(stderr,
1451 Name ": failed to allocate disk buffer for %s\n",
1452 devname);
1453 return 2;
1454 }
1455
1456 if (alloc) {
1457 fstat(fd, &stb);
1458 dl->major = major(stb.st_rdev);
1459 dl->minor = minor(stb.st_rdev);
1460 dl->next = super->disks;
1461 dl->fd = keep_fd ? fd : -1;
1462 dl->devname = devname ? strdup(devname) : NULL;
1463 serialcpy(dl->serial, serial);
1464 dl->index = -2;
1465 dl->e = NULL;
1466 } else if (keep_fd) {
1467 close(dl->fd);
1468 dl->fd = fd;
1469 }
1470
1471 /* look up this disk's index in the current anchor */
1472 for (i = 0; i < super->anchor->num_disks; i++) {
1473 struct imsm_disk *disk_iter;
1474
1475 disk_iter = __get_imsm_disk(super->anchor, i);
1476
1477 if (serialcmp(disk_iter->serial, dl->serial) == 0) {
1478 dl->disk = *disk_iter;
1479 /* only set index on disks that are a member of a
1480 * populated contianer, i.e. one with raid_devs
1481 */
1482 if (dl->disk.status & FAILED_DISK)
1483 dl->index = -2;
1484 else if (dl->disk.status & SPARE_DISK)
1485 dl->index = -1;
1486 else
1487 dl->index = i;
1488
1489 break;
1490 }
1491 }
1492
1493 /* no match, maybe a stale failed drive */
1494 if (i == super->anchor->num_disks && dl->index >= 0) {
1495 dl->disk = *__get_imsm_disk(super->anchor, dl->index);
1496 if (dl->disk.status & FAILED_DISK)
1497 dl->index = -2;
1498 }
1499
1500 if (alloc)
1501 super->disks = dl;
1502
1503 return 0;
1504 }
1505
1506 static void imsm_copy_dev(struct imsm_dev *dest, struct imsm_dev *src)
1507 {
1508 memcpy(dest, src, sizeof_imsm_dev(src, 0));
1509 }
1510
1511 #ifndef MDASSEMBLE
1512 /* When migrating map0 contains the 'destination' state while map1
1513 * contains the current state. When not migrating map0 contains the
1514 * current state. This routine assumes that map[0].map_state is set to
1515 * the current array state before being called.
1516 *
1517 * Migration is indicated by one of the following states
1518 * 1/ Idle (migr_state=0 map0state=normal||unitialized||degraded||failed)
1519 * 2/ Initialize (migr_state=1 migr_type=MIGR_INIT map0state=normal
1520 * map1state=unitialized)
1521 * 3/ Verify (Resync) (migr_state=1 migr_type=MIGR_REBUILD map0state=normal
1522 * map1state=normal)
1523 * 4/ Rebuild (migr_state=1 migr_type=MIGR_REBUILD map0state=normal
1524 * map1state=degraded)
1525 */
1526 static void migrate(struct imsm_dev *dev, __u8 to_state, int rebuild_resync)
1527 {
1528 struct imsm_map *dest;
1529 struct imsm_map *src = get_imsm_map(dev, 0);
1530
1531 dev->vol.migr_state = 1;
1532 dev->vol.migr_type = rebuild_resync;
1533 dev->vol.curr_migr_unit = 0;
1534 dest = get_imsm_map(dev, 1);
1535
1536 memcpy(dest, src, sizeof_imsm_map(src));
1537 src->map_state = to_state;
1538 }
1539
1540 static void end_migration(struct imsm_dev *dev, __u8 map_state)
1541 {
1542 struct imsm_map *map = get_imsm_map(dev, 0);
1543
1544 dev->vol.migr_state = 0;
1545 dev->vol.curr_migr_unit = 0;
1546 map->map_state = map_state;
1547 }
1548 #endif
1549
1550 static int parse_raid_devices(struct intel_super *super)
1551 {
1552 int i;
1553 struct imsm_dev *dev_new;
1554 size_t len, len_migr;
1555 size_t space_needed = 0;
1556 struct imsm_super *mpb = super->anchor;
1557
1558 for (i = 0; i < super->anchor->num_raid_devs; i++) {
1559 struct imsm_dev *dev_iter = __get_imsm_dev(super->anchor, i);
1560
1561 len = sizeof_imsm_dev(dev_iter, 0);
1562 len_migr = sizeof_imsm_dev(dev_iter, 1);
1563 if (len_migr > len)
1564 space_needed += len_migr - len;
1565
1566 dev_new = malloc(len_migr);
1567 if (!dev_new)
1568 return 1;
1569 imsm_copy_dev(dev_new, dev_iter);
1570 super->dev_tbl[i] = dev_new;
1571 }
1572
1573 /* ensure that super->buf is large enough when all raid devices
1574 * are migrating
1575 */
1576 if (__le32_to_cpu(mpb->mpb_size) + space_needed > super->len) {
1577 void *buf;
1578
1579 len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + space_needed, 512);
1580 if (posix_memalign(&buf, 512, len) != 0)
1581 return 1;
1582
1583 memcpy(buf, super->buf, len);
1584 free(super->buf);
1585 super->buf = buf;
1586 super->len = len;
1587 }
1588
1589 return 0;
1590 }
1591
1592 /* retrieve a pointer to the bbm log which starts after all raid devices */
1593 struct bbm_log *__get_imsm_bbm_log(struct imsm_super *mpb)
1594 {
1595 void *ptr = NULL;
1596
1597 if (__le32_to_cpu(mpb->bbm_log_size)) {
1598 ptr = mpb;
1599 ptr += mpb->mpb_size - __le32_to_cpu(mpb->bbm_log_size);
1600 }
1601
1602 return ptr;
1603 }
1604
1605 static void __free_imsm(struct intel_super *super, int free_disks);
1606
1607 /* load_imsm_mpb - read matrix metadata
1608 * allocates super->mpb to be freed by free_super
1609 */
1610 static int load_imsm_mpb(int fd, struct intel_super *super, char *devname)
1611 {
1612 unsigned long long dsize;
1613 unsigned long long sectors;
1614 struct stat;
1615 struct imsm_super *anchor;
1616 __u32 check_sum;
1617 int rc;
1618
1619 get_dev_size(fd, NULL, &dsize);
1620
1621 if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0) {
1622 if (devname)
1623 fprintf(stderr,
1624 Name ": Cannot seek to anchor block on %s: %s\n",
1625 devname, strerror(errno));
1626 return 1;
1627 }
1628
1629 if (posix_memalign((void**)&anchor, 512, 512) != 0) {
1630 if (devname)
1631 fprintf(stderr,
1632 Name ": Failed to allocate imsm anchor buffer"
1633 " on %s\n", devname);
1634 return 1;
1635 }
1636 if (read(fd, anchor, 512) != 512) {
1637 if (devname)
1638 fprintf(stderr,
1639 Name ": Cannot read anchor block on %s: %s\n",
1640 devname, strerror(errno));
1641 free(anchor);
1642 return 1;
1643 }
1644
1645 if (strncmp((char *) anchor->sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0) {
1646 if (devname)
1647 fprintf(stderr,
1648 Name ": no IMSM anchor on %s\n", devname);
1649 free(anchor);
1650 return 2;
1651 }
1652
1653 __free_imsm(super, 0);
1654 super->len = ROUND_UP(anchor->mpb_size, 512);
1655 if (posix_memalign(&super->buf, 512, super->len) != 0) {
1656 if (devname)
1657 fprintf(stderr,
1658 Name ": unable to allocate %zu byte mpb buffer\n",
1659 super->len);
1660 free(anchor);
1661 return 2;
1662 }
1663 memcpy(super->buf, anchor, 512);
1664
1665 sectors = mpb_sectors(anchor) - 1;
1666 free(anchor);
1667 if (!sectors) {
1668 rc = load_imsm_disk(fd, super, devname, 0);
1669 if (rc == 0)
1670 rc = parse_raid_devices(super);
1671 return rc;
1672 }
1673
1674 /* read the extended mpb */
1675 if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0) {
1676 if (devname)
1677 fprintf(stderr,
1678 Name ": Cannot seek to extended mpb on %s: %s\n",
1679 devname, strerror(errno));
1680 return 1;
1681 }
1682
1683 if (read(fd, super->buf + 512, super->len - 512) != super->len - 512) {
1684 if (devname)
1685 fprintf(stderr,
1686 Name ": Cannot read extended mpb on %s: %s\n",
1687 devname, strerror(errno));
1688 return 2;
1689 }
1690
1691 check_sum = __gen_imsm_checksum(super->anchor);
1692 if (check_sum != __le32_to_cpu(super->anchor->check_sum)) {
1693 if (devname)
1694 fprintf(stderr,
1695 Name ": IMSM checksum %x != %x on %s\n",
1696 check_sum, __le32_to_cpu(super->anchor->check_sum),
1697 devname);
1698 return 2;
1699 }
1700
1701 /* FIXME the BBM log is disk specific so we cannot use this global
1702 * buffer for all disks. Ok for now since we only look at the global
1703 * bbm_log_size parameter to gate assembly
1704 */
1705 super->bbm_log = __get_imsm_bbm_log(super->anchor);
1706
1707 rc = load_imsm_disk(fd, super, devname, 0);
1708 if (rc == 0)
1709 rc = parse_raid_devices(super);
1710
1711 return rc;
1712 }
1713
1714 static void __free_imsm_disk(struct dl *d)
1715 {
1716 if (d->fd >= 0)
1717 close(d->fd);
1718 if (d->devname)
1719 free(d->devname);
1720 if (d->e)
1721 free(d->e);
1722 free(d);
1723
1724 }
1725 static void free_imsm_disks(struct intel_super *super)
1726 {
1727 struct dl *d;
1728
1729 while (super->disks) {
1730 d = super->disks;
1731 super->disks = d->next;
1732 __free_imsm_disk(d);
1733 }
1734 while (super->missing) {
1735 d = super->missing;
1736 super->missing = d->next;
1737 __free_imsm_disk(d);
1738 }
1739
1740 }
1741
1742 /* free all the pieces hanging off of a super pointer */
1743 static void __free_imsm(struct intel_super *super, int free_disks)
1744 {
1745 int i;
1746
1747 if (super->buf) {
1748 free(super->buf);
1749 super->buf = NULL;
1750 }
1751 if (free_disks)
1752 free_imsm_disks(super);
1753 for (i = 0; i < IMSM_MAX_RAID_DEVS; i++)
1754 if (super->dev_tbl[i]) {
1755 free(super->dev_tbl[i]);
1756 super->dev_tbl[i] = NULL;
1757 }
1758 if (super->hba) {
1759 free((void *) super->hba);
1760 super->hba = NULL;
1761 }
1762 }
1763
1764 static void free_imsm(struct intel_super *super)
1765 {
1766 __free_imsm(super, 1);
1767 free(super);
1768 }
1769
1770 static void free_super_imsm(struct supertype *st)
1771 {
1772 struct intel_super *super = st->sb;
1773
1774 if (!super)
1775 return;
1776
1777 free_imsm(super);
1778 st->sb = NULL;
1779 }
1780
1781 static struct intel_super *alloc_super(int creating_imsm)
1782 {
1783 struct intel_super *super = malloc(sizeof(*super));
1784
1785 if (super) {
1786 memset(super, 0, sizeof(*super));
1787 super->creating_imsm = creating_imsm;
1788 super->current_vol = -1;
1789 super->create_offset = ~((__u32 ) 0);
1790 if (!check_env("IMSM_NO_PLATFORM"))
1791 super->orom = find_imsm_orom();
1792 if (super->orom) {
1793 struct sys_dev *list, *ent;
1794
1795 /* find the first intel ahci controller */
1796 list = find_driver_devices("pci", "ahci");
1797 for (ent = list; ent; ent = ent->next)
1798 if (devpath_to_vendor(ent->path) == 0x8086)
1799 break;
1800 if (ent) {
1801 super->hba = ent->path;
1802 ent->path = NULL;
1803 }
1804 free_sys_dev(&list);
1805 }
1806 }
1807
1808 return super;
1809 }
1810
1811 #ifndef MDASSEMBLE
1812 /* find_missing - helper routine for load_super_imsm_all that identifies
1813 * disks that have disappeared from the system. This routine relies on
1814 * the mpb being uptodate, which it is at load time.
1815 */
1816 static int find_missing(struct intel_super *super)
1817 {
1818 int i;
1819 struct imsm_super *mpb = super->anchor;
1820 struct dl *dl;
1821 struct imsm_disk *disk;
1822
1823 for (i = 0; i < mpb->num_disks; i++) {
1824 disk = __get_imsm_disk(mpb, i);
1825 dl = serial_to_dl(disk->serial, super);
1826 if (dl)
1827 continue;
1828 /* ok we have a 'disk' without a live entry in
1829 * super->disks
1830 */
1831 if (disk->status & FAILED_DISK || !(disk->status & USABLE_DISK))
1832 continue; /* never mind, already marked */
1833
1834 dl = malloc(sizeof(*dl));
1835 if (!dl)
1836 return 1;
1837 dl->major = 0;
1838 dl->minor = 0;
1839 dl->fd = -1;
1840 dl->devname = strdup("missing");
1841 dl->index = i;
1842 serialcpy(dl->serial, disk->serial);
1843 dl->disk = *disk;
1844 dl->next = super->missing;
1845 super->missing = dl;
1846 }
1847
1848 return 0;
1849 }
1850
1851 static int load_super_imsm_all(struct supertype *st, int fd, void **sbp,
1852 char *devname, int keep_fd)
1853 {
1854 struct mdinfo *sra;
1855 struct intel_super *super;
1856 struct mdinfo *sd, *best = NULL;
1857 __u32 bestgen = 0;
1858 __u32 gen;
1859 char nm[20];
1860 int dfd;
1861 int rv;
1862
1863 /* check if this disk is a member of an active array */
1864 sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
1865 if (!sra)
1866 return 1;
1867
1868 if (sra->array.major_version != -1 ||
1869 sra->array.minor_version != -2 ||
1870 strcmp(sra->text_version, "imsm") != 0)
1871 return 1;
1872
1873 super = alloc_super(0);
1874 if (!super)
1875 return 1;
1876
1877 /* find the most up to date disk in this array, skipping spares */
1878 for (sd = sra->devs; sd; sd = sd->next) {
1879 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
1880 dfd = dev_open(nm, keep_fd ? O_RDWR : O_RDONLY);
1881 if (!dfd) {
1882 free_imsm(super);
1883 return 2;
1884 }
1885 rv = load_imsm_mpb(dfd, super, NULL);
1886 if (!keep_fd)
1887 close(dfd);
1888 if (rv == 0) {
1889 if (super->anchor->num_raid_devs == 0)
1890 gen = 0;
1891 else
1892 gen = __le32_to_cpu(super->anchor->generation_num);
1893 if (!best || gen > bestgen) {
1894 bestgen = gen;
1895 best = sd;
1896 }
1897 } else {
1898 free_imsm(super);
1899 return 2;
1900 }
1901 }
1902
1903 if (!best) {
1904 free_imsm(super);
1905 return 1;
1906 }
1907
1908 /* load the most up to date anchor */
1909 sprintf(nm, "%d:%d", best->disk.major, best->disk.minor);
1910 dfd = dev_open(nm, O_RDONLY);
1911 if (!dfd) {
1912 free_imsm(super);
1913 return 1;
1914 }
1915 rv = load_imsm_mpb(dfd, super, NULL);
1916 close(dfd);
1917 if (rv != 0) {
1918 free_imsm(super);
1919 return 2;
1920 }
1921
1922 /* re-parse the disk list with the current anchor */
1923 for (sd = sra->devs ; sd ; sd = sd->next) {
1924 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
1925 dfd = dev_open(nm, keep_fd? O_RDWR : O_RDONLY);
1926 if (!dfd) {
1927 free_imsm(super);
1928 return 2;
1929 }
1930 load_imsm_disk(dfd, super, NULL, keep_fd);
1931 if (!keep_fd)
1932 close(dfd);
1933 }
1934
1935
1936 if (find_missing(super) != 0) {
1937 free_imsm(super);
1938 return 2;
1939 }
1940
1941 if (st->subarray[0]) {
1942 if (atoi(st->subarray) <= super->anchor->num_raid_devs)
1943 super->current_vol = atoi(st->subarray);
1944 else
1945 return 1;
1946 }
1947
1948 *sbp = super;
1949 st->container_dev = fd2devnum(fd);
1950 if (st->ss == NULL) {
1951 st->ss = &super_imsm;
1952 st->minor_version = 0;
1953 st->max_devs = IMSM_MAX_DEVICES;
1954 }
1955 st->loaded_container = 1;
1956
1957 return 0;
1958 }
1959 #endif
1960
1961 static int load_super_imsm(struct supertype *st, int fd, char *devname)
1962 {
1963 struct intel_super *super;
1964 int rv;
1965
1966 #ifndef MDASSEMBLE
1967 if (load_super_imsm_all(st, fd, &st->sb, devname, 1) == 0)
1968 return 0;
1969 #endif
1970 if (st->subarray[0])
1971 return 1; /* FIXME */
1972
1973 super = alloc_super(0);
1974 if (!super) {
1975 fprintf(stderr,
1976 Name ": malloc of %zu failed.\n",
1977 sizeof(*super));
1978 return 1;
1979 }
1980
1981 rv = load_imsm_mpb(fd, super, devname);
1982
1983 if (rv) {
1984 if (devname)
1985 fprintf(stderr,
1986 Name ": Failed to load all information "
1987 "sections on %s\n", devname);
1988 free_imsm(super);
1989 return rv;
1990 }
1991
1992 st->sb = super;
1993 if (st->ss == NULL) {
1994 st->ss = &super_imsm;
1995 st->minor_version = 0;
1996 st->max_devs = IMSM_MAX_DEVICES;
1997 }
1998 st->loaded_container = 0;
1999
2000 return 0;
2001 }
2002
2003 static __u16 info_to_blocks_per_strip(mdu_array_info_t *info)
2004 {
2005 if (info->level == 1)
2006 return 128;
2007 return info->chunk_size >> 9;
2008 }
2009
2010 static __u32 info_to_num_data_stripes(mdu_array_info_t *info)
2011 {
2012 __u32 num_stripes;
2013
2014 num_stripes = (info->size * 2) / info_to_blocks_per_strip(info);
2015 if (info->level == 1)
2016 num_stripes /= 2;
2017
2018 return num_stripes;
2019 }
2020
2021 static __u32 info_to_blocks_per_member(mdu_array_info_t *info)
2022 {
2023 return (info->size * 2) & ~(info_to_blocks_per_strip(info) - 1);
2024 }
2025
2026 static void imsm_update_version_info(struct intel_super *super)
2027 {
2028 /* update the version and attributes */
2029 struct imsm_super *mpb = super->anchor;
2030 char *version;
2031 struct imsm_dev *dev;
2032 struct imsm_map *map;
2033 int i;
2034
2035 for (i = 0; i < mpb->num_raid_devs; i++) {
2036 dev = get_imsm_dev(super, i);
2037 map = get_imsm_map(dev, 0);
2038 if (__le32_to_cpu(dev->size_high) > 0)
2039 mpb->attributes |= MPB_ATTRIB_2TB;
2040
2041 /* FIXME detect when an array spans a port multiplier */
2042 #if 0
2043 mpb->attributes |= MPB_ATTRIB_PM;
2044 #endif
2045
2046 if (mpb->num_raid_devs > 1 ||
2047 mpb->attributes != MPB_ATTRIB_CHECKSUM_VERIFY) {
2048 version = MPB_VERSION_ATTRIBS;
2049 switch (get_imsm_raid_level(map)) {
2050 case 0: mpb->attributes |= MPB_ATTRIB_RAID0; break;
2051 case 1: mpb->attributes |= MPB_ATTRIB_RAID1; break;
2052 case 10: mpb->attributes |= MPB_ATTRIB_RAID10; break;
2053 case 5: mpb->attributes |= MPB_ATTRIB_RAID5; break;
2054 }
2055 } else {
2056 if (map->num_members >= 5)
2057 version = MPB_VERSION_5OR6_DISK_ARRAY;
2058 else if (dev->status == DEV_CLONE_N_GO)
2059 version = MPB_VERSION_CNG;
2060 else if (get_imsm_raid_level(map) == 5)
2061 version = MPB_VERSION_RAID5;
2062 else if (map->num_members >= 3)
2063 version = MPB_VERSION_3OR4_DISK_ARRAY;
2064 else if (get_imsm_raid_level(map) == 1)
2065 version = MPB_VERSION_RAID1;
2066 else
2067 version = MPB_VERSION_RAID0;
2068 }
2069 strcpy(((char *) mpb->sig) + strlen(MPB_SIGNATURE), version);
2070 }
2071 }
2072
2073 static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
2074 unsigned long long size, char *name,
2075 char *homehost, int *uuid)
2076 {
2077 /* We are creating a volume inside a pre-existing container.
2078 * so st->sb is already set.
2079 */
2080 struct intel_super *super = st->sb;
2081 struct imsm_super *mpb = super->anchor;
2082 struct imsm_dev *dev;
2083 struct imsm_vol *vol;
2084 struct imsm_map *map;
2085 int idx = mpb->num_raid_devs;
2086 int i;
2087 unsigned long long array_blocks;
2088 size_t size_old, size_new;
2089
2090 if (super->orom && mpb->num_raid_devs >= super->orom->vpa) {
2091 fprintf(stderr, Name": This imsm-container already has the "
2092 "maximum of %d volumes\n", super->orom->vpa);
2093 return 0;
2094 }
2095
2096 /* ensure the mpb is large enough for the new data */
2097 size_old = __le32_to_cpu(mpb->mpb_size);
2098 size_new = disks_to_mpb_size(info->nr_disks);
2099 if (size_new > size_old) {
2100 void *mpb_new;
2101 size_t size_round = ROUND_UP(size_new, 512);
2102
2103 if (posix_memalign(&mpb_new, 512, size_round) != 0) {
2104 fprintf(stderr, Name": could not allocate new mpb\n");
2105 return 0;
2106 }
2107 memcpy(mpb_new, mpb, size_old);
2108 free(mpb);
2109 mpb = mpb_new;
2110 super->anchor = mpb_new;
2111 mpb->mpb_size = __cpu_to_le32(size_new);
2112 memset(mpb_new + size_old, 0, size_round - size_old);
2113 }
2114 super->current_vol = idx;
2115 /* when creating the first raid device in this container set num_disks
2116 * to zero, i.e. delete this spare and add raid member devices in
2117 * add_to_super_imsm_volume()
2118 */
2119 if (super->current_vol == 0)
2120 mpb->num_disks = 0;
2121 sprintf(st->subarray, "%d", idx);
2122 dev = malloc(sizeof(*dev) + sizeof(__u32) * (info->raid_disks - 1));
2123 if (!dev) {
2124 fprintf(stderr, Name": could not allocate raid device\n");
2125 return 0;
2126 }
2127 strncpy((char *) dev->volume, name, MAX_RAID_SERIAL_LEN);
2128 if (info->level == 1)
2129 array_blocks = info_to_blocks_per_member(info);
2130 else
2131 array_blocks = calc_array_size(info->level, info->raid_disks,
2132 info->layout, info->chunk_size,
2133 info->size*2);
2134 dev->size_low = __cpu_to_le32((__u32) array_blocks);
2135 dev->size_high = __cpu_to_le32((__u32) (array_blocks >> 32));
2136 dev->status = __cpu_to_le32(0);
2137 dev->reserved_blocks = __cpu_to_le32(0);
2138 vol = &dev->vol;
2139 vol->migr_state = 0;
2140 vol->migr_type = MIGR_INIT;
2141 vol->dirty = 0;
2142 vol->curr_migr_unit = 0;
2143 map = get_imsm_map(dev, 0);
2144 map->pba_of_lba0 = __cpu_to_le32(super->create_offset);
2145 map->blocks_per_member = __cpu_to_le32(info_to_blocks_per_member(info));
2146 map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info));
2147 map->num_data_stripes = __cpu_to_le32(info_to_num_data_stripes(info));
2148 map->map_state = info->level ? IMSM_T_STATE_UNINITIALIZED :
2149 IMSM_T_STATE_NORMAL;
2150
2151 if (info->level == 1 && info->raid_disks > 2) {
2152 fprintf(stderr, Name": imsm does not support more than 2 disks"
2153 "in a raid1 volume\n");
2154 return 0;
2155 }
2156 if (info->level == 10) {
2157 map->raid_level = 1;
2158 map->num_domains = info->raid_disks / 2;
2159 } else {
2160 map->raid_level = info->level;
2161 map->num_domains = !!map->raid_level;
2162 }
2163
2164 map->num_members = info->raid_disks;
2165 for (i = 0; i < map->num_members; i++) {
2166 /* initialized in add_to_super */
2167 set_imsm_ord_tbl_ent(map, i, 0);
2168 }
2169 mpb->num_raid_devs++;
2170 super->dev_tbl[super->current_vol] = dev;
2171
2172 imsm_update_version_info(super);
2173
2174 return 1;
2175 }
2176
2177 static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
2178 unsigned long long size, char *name,
2179 char *homehost, int *uuid)
2180 {
2181 /* This is primarily called by Create when creating a new array.
2182 * We will then get add_to_super called for each component, and then
2183 * write_init_super called to write it out to each device.
2184 * For IMSM, Create can create on fresh devices or on a pre-existing
2185 * array.
2186 * To create on a pre-existing array a different method will be called.
2187 * This one is just for fresh drives.
2188 */
2189 struct intel_super *super;
2190 struct imsm_super *mpb;
2191 size_t mpb_size;
2192 char *version;
2193
2194 if (!info) {
2195 st->sb = NULL;
2196 return 0;
2197 }
2198 if (st->sb)
2199 return init_super_imsm_volume(st, info, size, name, homehost,
2200 uuid);
2201
2202 super = alloc_super(1);
2203 if (!super)
2204 return 0;
2205 mpb_size = disks_to_mpb_size(info->nr_disks);
2206 if (posix_memalign(&super->buf, 512, mpb_size) != 0) {
2207 free(super);
2208 return 0;
2209 }
2210 mpb = super->buf;
2211 memset(mpb, 0, mpb_size);
2212
2213 mpb->attributes = MPB_ATTRIB_CHECKSUM_VERIFY;
2214
2215 version = (char *) mpb->sig;
2216 strcpy(version, MPB_SIGNATURE);
2217 version += strlen(MPB_SIGNATURE);
2218 strcpy(version, MPB_VERSION_RAID0);
2219 mpb->mpb_size = mpb_size;
2220
2221 st->sb = super;
2222 return 1;
2223 }
2224
2225 #ifndef MDASSEMBLE
2226 static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
2227 int fd, char *devname)
2228 {
2229 struct intel_super *super = st->sb;
2230 struct imsm_super *mpb = super->anchor;
2231 struct dl *dl;
2232 struct imsm_dev *dev;
2233 struct imsm_map *map;
2234
2235 dev = get_imsm_dev(super, super->current_vol);
2236 map = get_imsm_map(dev, 0);
2237
2238 if (! (dk->state & (1<<MD_DISK_SYNC))) {
2239 fprintf(stderr, Name ": %s: Cannot add spare devices to IMSM volume\n",
2240 devname);
2241 return 1;
2242 }
2243
2244 for (dl = super->disks; dl ; dl = dl->next)
2245 if (dl->major == dk->major &&
2246 dl->minor == dk->minor)
2247 break;
2248
2249 if (!dl) {
2250 fprintf(stderr, Name ": %s is not a member of the same container\n", devname);
2251 return 1;
2252 }
2253
2254 /* add a pristine spare to the metadata */
2255 if (dl->index < 0) {
2256 dl->index = super->anchor->num_disks;
2257 super->anchor->num_disks++;
2258 }
2259 set_imsm_ord_tbl_ent(map, dk->number, dl->index);
2260 dl->disk.status = CONFIGURED_DISK | USABLE_DISK;
2261
2262 /* if we are creating the first raid device update the family number */
2263 if (super->current_vol == 0) {
2264 __u32 sum;
2265 struct imsm_dev *_dev = __get_imsm_dev(mpb, 0);
2266 struct imsm_disk *_disk = __get_imsm_disk(mpb, dl->index);
2267
2268 *_dev = *dev;
2269 *_disk = dl->disk;
2270 sum = __gen_imsm_checksum(mpb);
2271 mpb->family_num = __cpu_to_le32(sum);
2272 }
2273
2274 return 0;
2275 }
2276
2277 static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
2278 int fd, char *devname)
2279 {
2280 struct intel_super *super = st->sb;
2281 struct dl *dd;
2282 unsigned long long size;
2283 __u32 id;
2284 int rv;
2285 struct stat stb;
2286
2287 /* if we are on an RAID enabled platform check that the disk is
2288 * attached to the raid controller
2289 */
2290 if (super->hba && !disk_attached_to_hba(fd, super->hba)) {
2291 fprintf(stderr,
2292 Name ": %s is not attached to the raid controller: %s\n",
2293 devname ? : "disk", super->hba);
2294 return 1;
2295 }
2296
2297 if (super->current_vol >= 0)
2298 return add_to_super_imsm_volume(st, dk, fd, devname);
2299
2300 fstat(fd, &stb);
2301 dd = malloc(sizeof(*dd));
2302 if (!dd) {
2303 fprintf(stderr,
2304 Name ": malloc failed %s:%d.\n", __func__, __LINE__);
2305 return 1;
2306 }
2307 memset(dd, 0, sizeof(*dd));
2308 dd->major = major(stb.st_rdev);
2309 dd->minor = minor(stb.st_rdev);
2310 dd->index = -1;
2311 dd->devname = devname ? strdup(devname) : NULL;
2312 dd->fd = fd;
2313 rv = imsm_read_serial(fd, devname, dd->serial);
2314 if (rv) {
2315 fprintf(stderr,
2316 Name ": failed to retrieve scsi serial, aborting\n");
2317 free(dd);
2318 abort();
2319 }
2320
2321 get_dev_size(fd, NULL, &size);
2322 size /= 512;
2323 serialcpy(dd->disk.serial, dd->serial);
2324 dd->disk.total_blocks = __cpu_to_le32(size);
2325 dd->disk.status = USABLE_DISK | SPARE_DISK;
2326 if (sysfs_disk_to_scsi_id(fd, &id) == 0)
2327 dd->disk.scsi_id = __cpu_to_le32(id);
2328 else
2329 dd->disk.scsi_id = __cpu_to_le32(0);
2330
2331 if (st->update_tail) {
2332 dd->next = super->add;
2333 super->add = dd;
2334 } else {
2335 dd->next = super->disks;
2336 super->disks = dd;
2337 }
2338
2339 return 0;
2340 }
2341
2342 static int store_imsm_mpb(int fd, struct intel_super *super);
2343
2344 /* spare records have their own family number and do not have any defined raid
2345 * devices
2346 */
2347 static int write_super_imsm_spares(struct intel_super *super, int doclose)
2348 {
2349 struct imsm_super mpb_save;
2350 struct imsm_super *mpb = super->anchor;
2351 __u32 sum;
2352 struct dl *d;
2353
2354 mpb_save = *mpb;
2355 mpb->num_raid_devs = 0;
2356 mpb->num_disks = 1;
2357 mpb->mpb_size = sizeof(struct imsm_super);
2358 mpb->generation_num = __cpu_to_le32(1UL);
2359
2360 for (d = super->disks; d; d = d->next) {
2361 if (d->index != -1)
2362 continue;
2363
2364 mpb->disk[0] = d->disk;
2365 sum = __gen_imsm_checksum(mpb);
2366 mpb->family_num = __cpu_to_le32(sum);
2367 sum = __gen_imsm_checksum(mpb);
2368 mpb->check_sum = __cpu_to_le32(sum);
2369
2370 if (store_imsm_mpb(d->fd, super)) {
2371 fprintf(stderr, "%s: failed for device %d:%d %s\n",
2372 __func__, d->major, d->minor, strerror(errno));
2373 *mpb = mpb_save;
2374 return 1;
2375 }
2376 if (doclose) {
2377 close(d->fd);
2378 d->fd = -1;
2379 }
2380 }
2381
2382 *mpb = mpb_save;
2383 return 0;
2384 }
2385
2386 static int write_super_imsm(struct intel_super *super, int doclose)
2387 {
2388 struct imsm_super *mpb = super->anchor;
2389 struct dl *d;
2390 __u32 generation;
2391 __u32 sum;
2392 int spares = 0;
2393 int i;
2394 __u32 mpb_size = sizeof(struct imsm_super) - sizeof(struct imsm_disk);
2395
2396 /* 'generation' is incremented everytime the metadata is written */
2397 generation = __le32_to_cpu(mpb->generation_num);
2398 generation++;
2399 mpb->generation_num = __cpu_to_le32(generation);
2400
2401 mpb_size += sizeof(struct imsm_disk) * mpb->num_disks;
2402 for (d = super->disks; d; d = d->next) {
2403 if (d->index == -1)
2404 spares++;
2405 else
2406 mpb->disk[d->index] = d->disk;
2407 }
2408 for (d = super->missing; d; d = d->next)
2409 mpb->disk[d->index] = d->disk;
2410
2411 for (i = 0; i < mpb->num_raid_devs; i++) {
2412 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
2413
2414 imsm_copy_dev(dev, super->dev_tbl[i]);
2415 mpb_size += sizeof_imsm_dev(dev, 0);
2416 }
2417 mpb_size += __le32_to_cpu(mpb->bbm_log_size);
2418 mpb->mpb_size = __cpu_to_le32(mpb_size);
2419
2420 /* recalculate checksum */
2421 sum = __gen_imsm_checksum(mpb);
2422 mpb->check_sum = __cpu_to_le32(sum);
2423
2424 /* write the mpb for disks that compose raid devices */
2425 for (d = super->disks; d ; d = d->next) {
2426 if (d->index < 0)
2427 continue;
2428 if (store_imsm_mpb(d->fd, super))
2429 fprintf(stderr, "%s: failed for device %d:%d %s\n",
2430 __func__, d->major, d->minor, strerror(errno));
2431 if (doclose) {
2432 close(d->fd);
2433 d->fd = -1;
2434 }
2435 }
2436
2437 if (spares)
2438 return write_super_imsm_spares(super, doclose);
2439
2440 return 0;
2441 }
2442
2443
2444 static int create_array(struct supertype *st)
2445 {
2446 size_t len;
2447 struct imsm_update_create_array *u;
2448 struct intel_super *super = st->sb;
2449 struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
2450 struct imsm_map *map = get_imsm_map(dev, 0);
2451 struct disk_info *inf;
2452 struct imsm_disk *disk;
2453 int i;
2454 int idx;
2455
2456 len = sizeof(*u) - sizeof(*dev) + sizeof_imsm_dev(dev, 0) +
2457 sizeof(*inf) * map->num_members;
2458 u = malloc(len);
2459 if (!u) {
2460 fprintf(stderr, "%s: failed to allocate update buffer\n",
2461 __func__);
2462 return 1;
2463 }
2464
2465 u->type = update_create_array;
2466 u->dev_idx = super->current_vol;
2467 imsm_copy_dev(&u->dev, dev);
2468 inf = get_disk_info(u);
2469 for (i = 0; i < map->num_members; i++) {
2470 idx = get_imsm_disk_idx(dev, i);
2471 disk = get_imsm_disk(super, idx);
2472 serialcpy(inf[i].serial, disk->serial);
2473 }
2474 append_metadata_update(st, u, len);
2475
2476 return 0;
2477 }
2478
2479 static int _add_disk(struct supertype *st)
2480 {
2481 struct intel_super *super = st->sb;
2482 size_t len;
2483 struct imsm_update_add_disk *u;
2484
2485 if (!super->add)
2486 return 0;
2487
2488 len = sizeof(*u);
2489 u = malloc(len);
2490 if (!u) {
2491 fprintf(stderr, "%s: failed to allocate update buffer\n",
2492 __func__);
2493 return 1;
2494 }
2495
2496 u->type = update_add_disk;
2497 append_metadata_update(st, u, len);
2498
2499 return 0;
2500 }
2501
2502 static int write_init_super_imsm(struct supertype *st)
2503 {
2504 if (st->update_tail) {
2505 /* queue the recently created array / added disk
2506 * as a metadata update */
2507 struct intel_super *super = st->sb;
2508 struct dl *d;
2509 int rv;
2510
2511 /* determine if we are creating a volume or adding a disk */
2512 if (super->current_vol < 0) {
2513 /* in the add disk case we are running in mdmon
2514 * context, so don't close fd's
2515 */
2516 return _add_disk(st);
2517 } else
2518 rv = create_array(st);
2519
2520 for (d = super->disks; d ; d = d->next) {
2521 close(d->fd);
2522 d->fd = -1;
2523 }
2524
2525 return rv;
2526 } else
2527 return write_super_imsm(st->sb, 1);
2528 }
2529 #endif
2530
2531 static int store_zero_imsm(struct supertype *st, int fd)
2532 {
2533 unsigned long long dsize;
2534 void *buf;
2535
2536 get_dev_size(fd, NULL, &dsize);
2537
2538 /* first block is stored on second to last sector of the disk */
2539 if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0)
2540 return 1;
2541
2542 if (posix_memalign(&buf, 512, 512) != 0)
2543 return 1;
2544
2545 memset(buf, 0, 512);
2546 if (write(fd, buf, 512) != 512)
2547 return 1;
2548 return 0;
2549 }
2550
2551 static int imsm_bbm_log_size(struct imsm_super *mpb)
2552 {
2553 return __le32_to_cpu(mpb->bbm_log_size);
2554 }
2555
2556 #ifndef MDASSEMBLE
2557 static int validate_geometry_imsm_container(struct supertype *st, int level,
2558 int layout, int raiddisks, int chunk,
2559 unsigned long long size, char *dev,
2560 unsigned long long *freesize,
2561 int verbose)
2562 {
2563 int fd;
2564 unsigned long long ldsize;
2565 const struct imsm_orom *orom;
2566
2567 if (level != LEVEL_CONTAINER)
2568 return 0;
2569 if (!dev)
2570 return 1;
2571
2572 if (check_env("IMSM_NO_PLATFORM"))
2573 orom = NULL;
2574 else
2575 orom = find_imsm_orom();
2576 if (orom && raiddisks > orom->tds) {
2577 if (verbose)
2578 fprintf(stderr, Name ": %d exceeds maximum number of"
2579 " platform supported disks: %d\n",
2580 raiddisks, orom->tds);
2581 return 0;
2582 }
2583
2584 fd = open(dev, O_RDONLY|O_EXCL, 0);
2585 if (fd < 0) {
2586 if (verbose)
2587 fprintf(stderr, Name ": imsm: Cannot open %s: %s\n",
2588 dev, strerror(errno));
2589 return 0;
2590 }
2591 if (!get_dev_size(fd, dev, &ldsize)) {
2592 close(fd);
2593 return 0;
2594 }
2595 close(fd);
2596
2597 *freesize = avail_size_imsm(st, ldsize >> 9);
2598
2599 return 1;
2600 }
2601
2602 static unsigned long long find_size(struct extent *e, int *idx, int num_extents)
2603 {
2604 const unsigned long long base_start = e[*idx].start;
2605 unsigned long long end = base_start + e[*idx].size;
2606 int i;
2607
2608 if (base_start == end)
2609 return 0;
2610
2611 *idx = *idx + 1;
2612 for (i = *idx; i < num_extents; i++) {
2613 /* extend overlapping extents */
2614 if (e[i].start >= base_start &&
2615 e[i].start <= end) {
2616 if (e[i].size == 0)
2617 return 0;
2618 if (e[i].start + e[i].size > end)
2619 end = e[i].start + e[i].size;
2620 } else if (e[i].start > end) {
2621 *idx = i;
2622 break;
2623 }
2624 }
2625
2626 return end - base_start;
2627 }
2628
2629 static unsigned long long merge_extents(struct intel_super *super, int sum_extents)
2630 {
2631 /* build a composite disk with all known extents and generate a new
2632 * 'maxsize' given the "all disks in an array must share a common start
2633 * offset" constraint
2634 */
2635 struct extent *e = calloc(sum_extents, sizeof(*e));
2636 struct dl *dl;
2637 int i, j;
2638 int start_extent;
2639 unsigned long long pos;
2640 unsigned long long start;
2641 unsigned long long maxsize;
2642 unsigned long reserve;
2643
2644 if (!e)
2645 return ~0ULL; /* error */
2646
2647 /* coalesce and sort all extents. also, check to see if we need to
2648 * reserve space between member arrays
2649 */
2650 j = 0;
2651 for (dl = super->disks; dl; dl = dl->next) {
2652 if (!dl->e)
2653 continue;
2654 for (i = 0; i < dl->extent_cnt; i++)
2655 e[j++] = dl->e[i];
2656 }
2657 qsort(e, sum_extents, sizeof(*e), cmp_extent);
2658
2659 /* merge extents */
2660 i = 0;
2661 j = 0;
2662 while (i < sum_extents) {
2663 e[j].start = e[i].start;
2664 e[j].size = find_size(e, &i, sum_extents);
2665 j++;
2666 if (e[j-1].size == 0)
2667 break;
2668 }
2669
2670 pos = 0;
2671 maxsize = 0;
2672 start_extent = 0;
2673 i = 0;
2674 do {
2675 unsigned long long esize;
2676
2677 esize = e[i].start - pos;
2678 if (esize >= maxsize) {
2679 maxsize = esize;
2680 start = pos;
2681 start_extent = i;
2682 }
2683 pos = e[i].start + e[i].size;
2684 i++;
2685 } while (e[i-1].size);
2686 free(e);
2687
2688 if (start_extent > 0)
2689 reserve = IMSM_RESERVED_SECTORS; /* gap between raid regions */
2690 else
2691 reserve = 0;
2692
2693 if (maxsize < reserve)
2694 return ~0ULL;
2695
2696 super->create_offset = ~((__u32) 0);
2697 if (start + reserve > super->create_offset)
2698 return ~0ULL; /* start overflows create_offset */
2699 super->create_offset = start + reserve;
2700
2701 return maxsize - reserve;
2702 }
2703
2704 static int is_raid_level_supported(const struct imsm_orom *orom, int level, int raiddisks)
2705 {
2706 if (level < 0 || level == 6 || level == 4)
2707 return 0;
2708
2709 /* if we have an orom prevent invalid raid levels */
2710 if (orom)
2711 switch (level) {
2712 case 0: return imsm_orom_has_raid0(orom);
2713 case 1:
2714 if (raiddisks > 2)
2715 return imsm_orom_has_raid1e(orom);
2716 else
2717 return imsm_orom_has_raid1(orom);
2718 case 10: return imsm_orom_has_raid10(orom);
2719 case 5: return imsm_orom_has_raid5(orom);
2720 }
2721 else
2722 return 1; /* not on an Intel RAID platform so anything goes */
2723
2724 return 0;
2725 }
2726
2727 #define vprintf(fmt, arg...) (void) (verbose && fprintf(stderr, Name fmt, ##arg))
2728 /* validate_geometry_imsm_volume - lifted from validate_geometry_ddf_bvd
2729 * FIX ME add ahci details
2730 */
2731 static int validate_geometry_imsm_volume(struct supertype *st, int level,
2732 int layout, int raiddisks, int chunk,
2733 unsigned long long size, char *dev,
2734 unsigned long long *freesize,
2735 int verbose)
2736 {
2737 struct stat stb;
2738 struct intel_super *super = st->sb;
2739 struct dl *dl;
2740 unsigned long long pos = 0;
2741 unsigned long long maxsize;
2742 struct extent *e;
2743 int i;
2744
2745 /* We must have the container info already read in. */
2746 if (!super)
2747 return 0;
2748
2749 if (!is_raid_level_supported(super->orom, level, raiddisks)) {
2750 vprintf(": platform does not support raid level: %d\n", level);
2751 return 0;
2752 }
2753 if (super->orom && !imsm_orom_has_chunk(super->orom, chunk)) {
2754 vprintf(": platform does not support a chunk size of: %d\n", chunk);
2755 return 0;
2756 }
2757 if (layout != imsm_level_to_layout(level)) {
2758 if (level == 5)
2759 vprintf(": imsm raid 5 only supports the left-asymmetric layout\n");
2760 else if (level == 10)
2761 vprintf(": imsm raid 10 only supports the n2 layout\n");
2762 else
2763 vprintf(": imsm unknown layout %#x for this raid level %d\n",
2764 layout, level);
2765 return 0;
2766 }
2767
2768 if (!dev) {
2769 /* General test: make sure there is space for
2770 * 'raiddisks' device extents of size 'size' at a given
2771 * offset
2772 */
2773 unsigned long long minsize = size*2 /* convert to blocks */;
2774 unsigned long long start_offset = ~0ULL;
2775 int dcnt = 0;
2776 if (minsize == 0)
2777 minsize = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
2778 for (dl = super->disks; dl ; dl = dl->next) {
2779 int found = 0;
2780
2781 pos = 0;
2782 i = 0;
2783 e = get_extents(super, dl);
2784 if (!e) continue;
2785 do {
2786 unsigned long long esize;
2787 esize = e[i].start - pos;
2788 if (esize >= minsize)
2789 found = 1;
2790 if (found && start_offset == ~0ULL) {
2791 start_offset = pos;
2792 break;
2793 } else if (found && pos != start_offset) {
2794 found = 0;
2795 break;
2796 }
2797 pos = e[i].start + e[i].size;
2798 i++;
2799 } while (e[i-1].size);
2800 if (found)
2801 dcnt++;
2802 free(e);
2803 }
2804 if (dcnt < raiddisks) {
2805 if (verbose)
2806 fprintf(stderr, Name ": imsm: Not enough "
2807 "devices with space for this array "
2808 "(%d < %d)\n",
2809 dcnt, raiddisks);
2810 return 0;
2811 }
2812 return 1;
2813 }
2814
2815 /* This device must be a member of the set */
2816 if (stat(dev, &stb) < 0)
2817 return 0;
2818 if ((S_IFMT & stb.st_mode) != S_IFBLK)
2819 return 0;
2820 for (dl = super->disks ; dl ; dl = dl->next) {
2821 if (dl->major == major(stb.st_rdev) &&
2822 dl->minor == minor(stb.st_rdev))
2823 break;
2824 }
2825 if (!dl) {
2826 if (verbose)
2827 fprintf(stderr, Name ": %s is not in the "
2828 "same imsm set\n", dev);
2829 return 0;
2830 }
2831
2832 /* retrieve the largest free space block */
2833 e = get_extents(super, dl);
2834 maxsize = 0;
2835 i = 0;
2836 if (e) {
2837 do {
2838 unsigned long long esize;
2839
2840 esize = e[i].start - pos;
2841 if (esize >= maxsize)
2842 maxsize = esize;
2843 pos = e[i].start + e[i].size;
2844 i++;
2845 } while (e[i-1].size);
2846 dl->e = e;
2847 dl->extent_cnt = i;
2848 } else {
2849 if (verbose)
2850 fprintf(stderr, Name ": unable to determine free space for: %s\n",
2851 dev);
2852 return 0;
2853 }
2854 if (maxsize < size) {
2855 if (verbose)
2856 fprintf(stderr, Name ": %s not enough space (%llu < %llu)\n",
2857 dev, maxsize, size);
2858 return 0;
2859 }
2860
2861 /* count total number of extents for merge */
2862 i = 0;
2863 for (dl = super->disks; dl; dl = dl->next)
2864 if (dl->e)
2865 i += dl->extent_cnt;
2866
2867 maxsize = merge_extents(super, i);
2868 if (maxsize < size) {
2869 if (verbose)
2870 fprintf(stderr, Name ": not enough space after merge (%llu < %llu)\n",
2871 maxsize, size);
2872 return 0;
2873 } else if (maxsize == ~0ULL) {
2874 if (verbose)
2875 fprintf(stderr, Name ": failed to merge %d extents\n", i);
2876 return 0;
2877 }
2878
2879 *freesize = maxsize;
2880
2881 return 1;
2882 }
2883
2884 static int validate_geometry_imsm(struct supertype *st, int level, int layout,
2885 int raiddisks, int chunk, unsigned long long size,
2886 char *dev, unsigned long long *freesize,
2887 int verbose)
2888 {
2889 int fd, cfd;
2890 struct mdinfo *sra;
2891
2892 /* if given unused devices create a container
2893 * if given given devices in a container create a member volume
2894 */
2895 if (level == LEVEL_CONTAINER) {
2896 /* Must be a fresh device to add to a container */
2897 return validate_geometry_imsm_container(st, level, layout,
2898 raiddisks, chunk, size,
2899 dev, freesize,
2900 verbose);
2901 }
2902
2903 if (st->sb) {
2904 /* creating in a given container */
2905 return validate_geometry_imsm_volume(st, level, layout,
2906 raiddisks, chunk, size,
2907 dev, freesize, verbose);
2908 }
2909
2910 /* limit creation to the following levels */
2911 if (!dev)
2912 switch (level) {
2913 case 0:
2914 case 1:
2915 case 10:
2916 case 5:
2917 break;
2918 default:
2919 return 1;
2920 }
2921
2922 /* This device needs to be a device in an 'imsm' container */
2923 fd = open(dev, O_RDONLY|O_EXCL, 0);
2924 if (fd >= 0) {
2925 if (verbose)
2926 fprintf(stderr,
2927 Name ": Cannot create this array on device %s\n",
2928 dev);
2929 close(fd);
2930 return 0;
2931 }
2932 if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
2933 if (verbose)
2934 fprintf(stderr, Name ": Cannot open %s: %s\n",
2935 dev, strerror(errno));
2936 return 0;
2937 }
2938 /* Well, it is in use by someone, maybe an 'imsm' container. */
2939 cfd = open_container(fd);
2940 if (cfd < 0) {
2941 close(fd);
2942 if (verbose)
2943 fprintf(stderr, Name ": Cannot use %s: It is busy\n",
2944 dev);
2945 return 0;
2946 }
2947 sra = sysfs_read(cfd, 0, GET_VERSION);
2948 close(fd);
2949 if (sra && sra->array.major_version == -1 &&
2950 strcmp(sra->text_version, "imsm") == 0) {
2951 /* This is a member of a imsm container. Load the container
2952 * and try to create a volume
2953 */
2954 struct intel_super *super;
2955
2956 if (load_super_imsm_all(st, cfd, (void **) &super, NULL, 1) == 0) {
2957 st->sb = super;
2958 st->container_dev = fd2devnum(cfd);
2959 close(cfd);
2960 return validate_geometry_imsm_volume(st, level, layout,
2961 raiddisks, chunk,
2962 size, dev,
2963 freesize, verbose);
2964 }
2965 close(cfd);
2966 } else /* may belong to another container */
2967 return 0;
2968
2969 return 1;
2970 }
2971 #endif /* MDASSEMBLE */
2972
2973 static struct mdinfo *container_content_imsm(struct supertype *st)
2974 {
2975 /* Given a container loaded by load_super_imsm_all,
2976 * extract information about all the arrays into
2977 * an mdinfo tree.
2978 *
2979 * For each imsm_dev create an mdinfo, fill it in,
2980 * then look for matching devices in super->disks
2981 * and create appropriate device mdinfo.
2982 */
2983 struct intel_super *super = st->sb;
2984 struct imsm_super *mpb = super->anchor;
2985 struct mdinfo *rest = NULL;
2986 int i;
2987
2988 /* do not assemble arrays that might have bad blocks */
2989 if (imsm_bbm_log_size(super->anchor)) {
2990 fprintf(stderr, Name ": BBM log found in metadata. "
2991 "Cannot activate array(s).\n");
2992 return NULL;
2993 }
2994
2995 for (i = 0; i < mpb->num_raid_devs; i++) {
2996 struct imsm_dev *dev = get_imsm_dev(super, i);
2997 struct imsm_map *map = get_imsm_map(dev, 0);
2998 struct mdinfo *this;
2999 int slot;
3000
3001 this = malloc(sizeof(*this));
3002 memset(this, 0, sizeof(*this));
3003 this->next = rest;
3004
3005 super->current_vol = i;
3006 getinfo_super_imsm_volume(st, this);
3007 for (slot = 0 ; slot < map->num_members; slot++) {
3008 struct mdinfo *info_d;
3009 struct dl *d;
3010 int idx;
3011 int skip;
3012 __u32 s;
3013 __u32 ord;
3014
3015 skip = 0;
3016 idx = get_imsm_disk_idx(dev, slot);
3017 ord = get_imsm_ord_tbl_ent(dev, slot);
3018 for (d = super->disks; d ; d = d->next)
3019 if (d->index == idx)
3020 break;
3021
3022 if (d == NULL)
3023 skip = 1;
3024
3025 s = d ? d->disk.status : 0;
3026 if (s & FAILED_DISK)
3027 skip = 1;
3028 if (!(s & USABLE_DISK))
3029 skip = 1;
3030 if (ord & IMSM_ORD_REBUILD)
3031 skip = 1;
3032
3033 /*
3034 * if we skip some disks the array will be assmebled degraded;
3035 * reset resync start to avoid a dirty-degraded situation
3036 *
3037 * FIXME handle dirty degraded
3038 */
3039 if (skip && !dev->vol.dirty)
3040 this->resync_start = ~0ULL;
3041 if (skip)
3042 continue;
3043
3044 info_d = malloc(sizeof(*info_d));
3045 if (!info_d) {
3046 fprintf(stderr, Name ": failed to allocate disk"
3047 " for volume %s\n", (char *) dev->volume);
3048 free(this);
3049 this = rest;
3050 break;
3051 }
3052 memset(info_d, 0, sizeof(*info_d));
3053 info_d->next = this->devs;
3054 this->devs = info_d;
3055
3056 info_d->disk.number = d->index;
3057 info_d->disk.major = d->major;
3058 info_d->disk.minor = d->minor;
3059 info_d->disk.raid_disk = slot;
3060
3061 this->array.working_disks++;
3062
3063 info_d->events = __le32_to_cpu(mpb->generation_num);
3064 info_d->data_offset = __le32_to_cpu(map->pba_of_lba0);
3065 info_d->component_size = __le32_to_cpu(map->blocks_per_member);
3066 if (d->devname)
3067 strcpy(info_d->name, d->devname);
3068 }
3069 rest = this;
3070 }
3071
3072 return rest;
3073 }
3074
3075
3076 #ifndef MDASSEMBLE
3077 static int imsm_open_new(struct supertype *c, struct active_array *a,
3078 char *inst)
3079 {
3080 struct intel_super *super = c->sb;
3081 struct imsm_super *mpb = super->anchor;
3082
3083 if (atoi(inst) >= mpb->num_raid_devs) {
3084 fprintf(stderr, "%s: subarry index %d, out of range\n",
3085 __func__, atoi(inst));
3086 return -ENODEV;
3087 }
3088
3089 dprintf("imsm: open_new %s\n", inst);
3090 a->info.container_member = atoi(inst);
3091 return 0;
3092 }
3093
3094 static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, int failed)
3095 {
3096 struct imsm_map *map = get_imsm_map(dev, 0);
3097
3098 if (!failed)
3099 return map->map_state == IMSM_T_STATE_UNINITIALIZED ?
3100 IMSM_T_STATE_UNINITIALIZED : IMSM_T_STATE_NORMAL;
3101
3102 switch (get_imsm_raid_level(map)) {
3103 case 0:
3104 return IMSM_T_STATE_FAILED;
3105 break;
3106 case 1:
3107 if (failed < map->num_members)
3108 return IMSM_T_STATE_DEGRADED;
3109 else
3110 return IMSM_T_STATE_FAILED;
3111 break;
3112 case 10:
3113 {
3114 /**
3115 * check to see if any mirrors have failed, otherwise we
3116 * are degraded. Even numbered slots are mirrored on
3117 * slot+1
3118 */
3119 int i;
3120 /* gcc -Os complains that this is unused */
3121 int insync = insync;
3122
3123 for (i = 0; i < map->num_members; i++) {
3124 __u32 ord = get_imsm_ord_tbl_ent(dev, i);
3125 int idx = ord_to_idx(ord);
3126 struct imsm_disk *disk;
3127
3128 /* reset the potential in-sync count on even-numbered
3129 * slots. num_copies is always 2 for imsm raid10
3130 */
3131 if ((i & 1) == 0)
3132 insync = 2;
3133
3134 disk = get_imsm_disk(super, idx);
3135 if (!disk || disk->status & FAILED_DISK ||
3136 ord & IMSM_ORD_REBUILD)
3137 insync--;
3138
3139 /* no in-sync disks left in this mirror the
3140 * array has failed
3141 */
3142 if (insync == 0)
3143 return IMSM_T_STATE_FAILED;
3144 }
3145
3146 return IMSM_T_STATE_DEGRADED;
3147 }
3148 case 5:
3149 if (failed < 2)
3150 return IMSM_T_STATE_DEGRADED;
3151 else
3152 return IMSM_T_STATE_FAILED;
3153 break;
3154 default:
3155 break;
3156 }
3157
3158 return map->map_state;
3159 }
3160
3161 static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev)
3162 {
3163 int i;
3164 int failed = 0;
3165 struct imsm_disk *disk;
3166 struct imsm_map *map = get_imsm_map(dev, 0);
3167
3168 for (i = 0; i < map->num_members; i++) {
3169 __u32 ord = get_imsm_ord_tbl_ent(dev, i);
3170 int idx = ord_to_idx(ord);
3171
3172 disk = get_imsm_disk(super, idx);
3173 if (!disk || disk->status & FAILED_DISK ||
3174 ord & IMSM_ORD_REBUILD)
3175 failed++;
3176 }
3177
3178 return failed;
3179 }
3180
3181 static int is_resyncing(struct imsm_dev *dev)
3182 {
3183 struct imsm_map *migr_map;
3184
3185 if (!dev->vol.migr_state)
3186 return 0;
3187
3188 if (dev->vol.migr_type == MIGR_INIT)
3189 return 1;
3190
3191 migr_map = get_imsm_map(dev, 1);
3192
3193 if (migr_map->map_state == IMSM_T_STATE_NORMAL)
3194 return 1;
3195 else
3196 return 0;
3197 }
3198
3199 static int is_rebuilding(struct imsm_dev *dev)
3200 {
3201 struct imsm_map *migr_map;
3202
3203 if (!dev->vol.migr_state)
3204 return 0;
3205
3206 if (dev->vol.migr_type != MIGR_REBUILD)
3207 return 0;
3208
3209 migr_map = get_imsm_map(dev, 1);
3210
3211 if (migr_map->map_state == IMSM_T_STATE_DEGRADED)
3212 return 1;
3213 else
3214 return 0;
3215 }
3216
3217 static void mark_failure(struct imsm_disk *disk)
3218 {
3219 if (disk->status & FAILED_DISK)
3220 return;
3221 disk->status |= FAILED_DISK;
3222 disk->scsi_id = __cpu_to_le32(~(__u32)0);
3223 memmove(&disk->serial[0], &disk->serial[1], MAX_RAID_SERIAL_LEN - 1);
3224 }
3225
3226 /* Handle dirty -> clean transititions and resync. Degraded and rebuild
3227 * states are handled in imsm_set_disk() with one exception, when a
3228 * resync is stopped due to a new failure this routine will set the
3229 * 'degraded' state for the array.
3230 */
3231 static int imsm_set_array_state(struct active_array *a, int consistent)
3232 {
3233 int inst = a->info.container_member;
3234 struct intel_super *super = a->container->sb;
3235 struct imsm_dev *dev = get_imsm_dev(super, inst);
3236 struct imsm_map *map = get_imsm_map(dev, 0);
3237 int failed = imsm_count_failed(super, dev);
3238 __u8 map_state = imsm_check_degraded(super, dev, failed);
3239
3240 /* before we activate this array handle any missing disks */
3241 if (consistent == 2 && super->missing) {
3242 struct dl *dl;
3243
3244 dprintf("imsm: mark missing\n");
3245 end_migration(dev, map_state);
3246 for (dl = super->missing; dl; dl = dl->next)
3247 mark_failure(&dl->disk);
3248 super->updates_pending++;
3249 }
3250
3251 if (consistent == 2 &&
3252 (!is_resync_complete(a) ||
3253 map_state != IMSM_T_STATE_NORMAL ||
3254 dev->vol.migr_state))
3255 consistent = 0;
3256
3257 if (is_resync_complete(a)) {
3258 /* complete intialization / resync,
3259 * recovery is completed in ->set_disk
3260 */
3261 if (is_resyncing(dev)) {
3262 dprintf("imsm: mark resync done\n");
3263 end_migration(dev, map_state);
3264 super->updates_pending++;
3265 }
3266 } else if (!is_resyncing(dev) && !failed) {
3267 /* mark the start of the init process if nothing is failed */
3268 dprintf("imsm: mark resync start (%llu)\n", a->resync_start);
3269 if (map->map_state == IMSM_T_STATE_NORMAL)
3270 migrate(dev, IMSM_T_STATE_NORMAL, MIGR_REBUILD);
3271 else
3272 migrate(dev, IMSM_T_STATE_NORMAL, MIGR_INIT);
3273 super->updates_pending++;
3274 }
3275
3276 /* check if we can update the migration checkpoint */
3277 if (dev->vol.migr_state &&
3278 __le32_to_cpu(dev->vol.curr_migr_unit) != a->resync_start) {
3279 dprintf("imsm: checkpoint migration (%llu)\n", a->resync_start);
3280 dev->vol.curr_migr_unit = __cpu_to_le32(a->resync_start);
3281 super->updates_pending++;
3282 }
3283
3284 /* mark dirty / clean */
3285 if (dev->vol.dirty != !consistent) {
3286 dprintf("imsm: mark '%s' (%llu)\n",
3287 consistent ? "clean" : "dirty", a->resync_start);
3288 if (consistent)
3289 dev->vol.dirty = 0;
3290 else
3291 dev->vol.dirty = 1;
3292 super->updates_pending++;
3293 }
3294 return consistent;
3295 }
3296
3297 static void imsm_set_disk(struct active_array *a, int n, int state)
3298 {
3299 int inst = a->info.container_member;
3300 struct intel_super *super = a->container->sb;
3301 struct imsm_dev *dev = get_imsm_dev(super, inst);
3302 struct imsm_map *map = get_imsm_map(dev, 0);
3303 struct imsm_disk *disk;
3304 int failed;
3305 __u32 ord;
3306 __u8 map_state;
3307
3308 if (n > map->num_members)
3309 fprintf(stderr, "imsm: set_disk %d out of range 0..%d\n",
3310 n, map->num_members - 1);
3311
3312 if (n < 0)
3313 return;
3314
3315 dprintf("imsm: set_disk %d:%x\n", n, state);
3316
3317 ord = get_imsm_ord_tbl_ent(dev, n);
3318 disk = get_imsm_disk(super, ord_to_idx(ord));
3319
3320 /* check for new failures */
3321 if ((state & DS_FAULTY) && !(disk->status & FAILED_DISK)) {
3322 mark_failure(disk);
3323 super->updates_pending++;
3324 }
3325
3326 /* check if in_sync */
3327 if (state & DS_INSYNC && ord & IMSM_ORD_REBUILD) {
3328 struct imsm_map *migr_map = get_imsm_map(dev, 1);
3329
3330 set_imsm_ord_tbl_ent(migr_map, n, ord_to_idx(ord));
3331 super->updates_pending++;
3332 }
3333
3334 failed = imsm_count_failed(super, dev);
3335 map_state = imsm_check_degraded(super, dev, failed);
3336
3337 /* check if recovery complete, newly degraded, or failed */
3338 if (map_state == IMSM_T_STATE_NORMAL && is_rebuilding(dev)) {
3339 end_migration(dev, map_state);
3340 super->updates_pending++;
3341 } else if (map_state == IMSM_T_STATE_DEGRADED &&
3342 map->map_state != map_state &&
3343 !dev->vol.migr_state) {
3344 dprintf("imsm: mark degraded\n");
3345 map->map_state = map_state;
3346 super->updates_pending++;
3347 } else if (map_state == IMSM_T_STATE_FAILED &&
3348 map->map_state != map_state) {
3349 dprintf("imsm: mark failed\n");
3350 end_migration(dev, map_state);
3351 super->updates_pending++;
3352 }
3353 }
3354
3355 static int store_imsm_mpb(int fd, struct intel_super *super)
3356 {
3357 struct imsm_super *mpb = super->anchor;
3358 __u32 mpb_size = __le32_to_cpu(mpb->mpb_size);
3359 unsigned long long dsize;
3360 unsigned long long sectors;
3361
3362 get_dev_size(fd, NULL, &dsize);
3363
3364 if (mpb_size > 512) {
3365 /* -1 to account for anchor */
3366 sectors = mpb_sectors(mpb) - 1;
3367
3368 /* write the extended mpb to the sectors preceeding the anchor */
3369 if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0)
3370 return 1;
3371
3372 if (write(fd, super->buf + 512, 512 * sectors) != 512 * sectors)
3373 return 1;
3374 }
3375
3376 /* first block is stored on second to last sector of the disk */
3377 if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0)
3378 return 1;
3379
3380 if (write(fd, super->buf, 512) != 512)
3381 return 1;
3382
3383 return 0;
3384 }
3385
3386 static void imsm_sync_metadata(struct supertype *container)
3387 {
3388 struct intel_super *super = container->sb;
3389
3390 if (!super->updates_pending)
3391 return;
3392
3393 write_super_imsm(super, 0);
3394
3395 super->updates_pending = 0;
3396 }
3397
3398 static struct dl *imsm_readd(struct intel_super *super, int idx, struct active_array *a)
3399 {
3400 struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
3401 int i = get_imsm_disk_idx(dev, idx);
3402 struct dl *dl;
3403
3404 for (dl = super->disks; dl; dl = dl->next)
3405 if (dl->index == i)
3406 break;
3407
3408 if (dl && dl->disk.status & FAILED_DISK)
3409 dl = NULL;
3410
3411 if (dl)
3412 dprintf("%s: found %x:%x\n", __func__, dl->major, dl->minor);
3413
3414 return dl;
3415 }
3416
3417 static struct dl *imsm_add_spare(struct intel_super *super, int slot, struct active_array *a)
3418 {
3419 struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
3420 int idx = get_imsm_disk_idx(dev, slot);
3421 struct imsm_map *map = get_imsm_map(dev, 0);
3422 unsigned long long esize;
3423 unsigned long long pos;
3424 struct mdinfo *d;
3425 struct extent *ex;
3426 int j;
3427 int found;
3428 __u32 array_start;
3429 struct dl *dl;
3430
3431 for (dl = super->disks; dl; dl = dl->next) {
3432 /* If in this array, skip */
3433 for (d = a->info.devs ; d ; d = d->next)
3434 if (d->state_fd >= 0 &&
3435 d->disk.major == dl->major &&
3436 d->disk.minor == dl->minor) {
3437 dprintf("%x:%x already in array\n", dl->major, dl->minor);
3438 break;
3439 }
3440 if (d)
3441 continue;
3442
3443 /* skip in use or failed drives */
3444 if (dl->disk.status & FAILED_DISK || idx == dl->index) {
3445 dprintf("%x:%x status ( %s%s)\n",
3446 dl->major, dl->minor,
3447 dl->disk.status & FAILED_DISK ? "failed " : "",
3448 idx == dl->index ? "in use " : "");
3449 continue;
3450 }
3451
3452 /* Does this unused device have the requisite free space?
3453 * We need a->info.component_size sectors
3454 */
3455 ex = get_extents(super, dl);
3456 if (!ex) {
3457 dprintf("cannot get extents\n");
3458 continue;
3459 }
3460 found = 0;
3461 j = 0;
3462 pos = 0;
3463 array_start = __le32_to_cpu(map->pba_of_lba0);
3464
3465 do {
3466 /* check that we can start at pba_of_lba0 with
3467 * a->info.component_size of space
3468 */
3469 esize = ex[j].start - pos;
3470 if (array_start >= pos &&
3471 array_start + a->info.component_size < ex[j].start) {
3472 found = 1;
3473 break;
3474 }
3475 pos = ex[j].start + ex[j].size;
3476 j++;
3477
3478 } while (ex[j-1].size);
3479
3480 free(ex);
3481 if (!found) {
3482 dprintf("%x:%x does not have %llu at %d\n",
3483 dl->major, dl->minor,
3484 a->info.component_size,
3485 __le32_to_cpu(map->pba_of_lba0));
3486 /* No room */
3487 continue;
3488 } else
3489 break;
3490 }
3491
3492 return dl;
3493 }
3494
3495 static struct mdinfo *imsm_activate_spare(struct active_array *a,
3496 struct metadata_update **updates)
3497 {
3498 /**
3499 * Find a device with unused free space and use it to replace a
3500 * failed/vacant region in an array. We replace failed regions one a
3501 * array at a time. The result is that a new spare disk will be added
3502 * to the first failed array and after the monitor has finished
3503 * propagating failures the remainder will be consumed.
3504 *
3505 * FIXME add a capability for mdmon to request spares from another
3506 * container.
3507 */
3508
3509 struct intel_super *super = a->container->sb;
3510 int inst = a->info.container_member;
3511 struct imsm_dev *dev = get_imsm_dev(super, inst);
3512 struct imsm_map *map = get_imsm_map(dev, 0);
3513 int failed = a->info.array.raid_disks;
3514 struct mdinfo *rv = NULL;
3515 struct mdinfo *d;
3516 struct mdinfo *di;
3517 struct metadata_update *mu;
3518 struct dl *dl;
3519 struct imsm_update_activate_spare *u;
3520 int num_spares = 0;
3521 int i;
3522
3523 for (d = a->info.devs ; d ; d = d->next) {
3524 if ((d->curr_state & DS_FAULTY) &&
3525 d->state_fd >= 0)
3526 /* wait for Removal to happen */
3527 return NULL;
3528 if (d->state_fd >= 0)
3529 failed--;
3530 }
3531
3532 dprintf("imsm: activate spare: inst=%d failed=%d (%d) level=%d\n",
3533 inst, failed, a->info.array.raid_disks, a->info.array.level);
3534 if (imsm_check_degraded(super, dev, failed) != IMSM_T_STATE_DEGRADED)
3535 return NULL;
3536
3537 /* For each slot, if it is not working, find a spare */
3538 for (i = 0; i < a->info.array.raid_disks; i++) {
3539 for (d = a->info.devs ; d ; d = d->next)
3540 if (d->disk.raid_disk == i)
3541 break;
3542 dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
3543 if (d && (d->state_fd >= 0))
3544 continue;
3545
3546 /*
3547 * OK, this device needs recovery. Try to re-add the previous
3548 * occupant of this slot, if this fails add a new spare
3549 */
3550 dl = imsm_readd(super, i, a);
3551 if (!dl)
3552 dl = imsm_add_spare(super, i, a);
3553 if (!dl)
3554 continue;
3555
3556 /* found a usable disk with enough space */
3557 di = malloc(sizeof(*di));
3558 if (!di)
3559 continue;
3560 memset(di, 0, sizeof(*di));
3561
3562 /* dl->index will be -1 in the case we are activating a
3563 * pristine spare. imsm_process_update() will create a
3564 * new index in this case. Once a disk is found to be
3565 * failed in all member arrays it is kicked from the
3566 * metadata
3567 */
3568 di->disk.number = dl->index;
3569
3570 /* (ab)use di->devs to store a pointer to the device
3571 * we chose
3572 */
3573 di->devs = (struct mdinfo *) dl;
3574
3575 di->disk.raid_disk = i;
3576 di->disk.major = dl->major;
3577 di->disk.minor = dl->minor;
3578 di->disk.state = 0;
3579 di->data_offset = __le32_to_cpu(map->pba_of_lba0);
3580 di->component_size = a->info.component_size;
3581 di->container_member = inst;
3582 di->next = rv;
3583 rv = di;
3584 num_spares++;
3585 dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
3586 i, di->data_offset);
3587
3588 break;
3589 }
3590
3591 if (!rv)
3592 /* No spares found */
3593 return rv;
3594 /* Now 'rv' has a list of devices to return.
3595 * Create a metadata_update record to update the
3596 * disk_ord_tbl for the array
3597 */
3598 mu = malloc(sizeof(*mu));
3599 if (mu) {
3600 mu->buf = malloc(sizeof(struct imsm_update_activate_spare) * num_spares);
3601 if (mu->buf == NULL) {
3602 free(mu);
3603 mu = NULL;
3604 }
3605 }
3606 if (!mu) {
3607 while (rv) {
3608 struct mdinfo *n = rv->next;
3609
3610 free(rv);
3611 rv = n;
3612 }
3613 return NULL;
3614 }
3615
3616 mu->space = NULL;
3617 mu->len = sizeof(struct imsm_update_activate_spare) * num_spares;
3618 mu->next = *updates;
3619 u = (struct imsm_update_activate_spare *) mu->buf;
3620
3621 for (di = rv ; di ; di = di->next) {
3622 u->type = update_activate_spare;
3623 u->dl = (struct dl *) di->devs;
3624 di->devs = NULL;
3625 u->slot = di->disk.raid_disk;
3626 u->array = inst;
3627 u->next = u + 1;
3628 u++;
3629 }
3630 (u-1)->next = NULL;
3631 *updates = mu;
3632
3633 return rv;
3634 }
3635
3636 static int disks_overlap(struct intel_super *super, int idx, struct imsm_update_create_array *u)
3637 {
3638 struct imsm_dev *dev = get_imsm_dev(super, idx);
3639 struct imsm_map *map = get_imsm_map(dev, 0);
3640 struct imsm_map *new_map = get_imsm_map(&u->dev, 0);
3641 struct disk_info *inf = get_disk_info(u);
3642 struct imsm_disk *disk;
3643 int i;
3644 int j;
3645
3646 for (i = 0; i < map->num_members; i++) {
3647 disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i));
3648 for (j = 0; j < new_map->num_members; j++)
3649 if (serialcmp(disk->serial, inf[j].serial) == 0)
3650 return 1;
3651 }
3652
3653 return 0;
3654 }
3655
3656 static void imsm_delete(struct intel_super *super, struct dl **dlp, int index);
3657
3658 static void imsm_process_update(struct supertype *st,
3659 struct metadata_update *update)
3660 {
3661 /**
3662 * crack open the metadata_update envelope to find the update record
3663 * update can be one of:
3664 * update_activate_spare - a spare device has replaced a failed
3665 * device in an array, update the disk_ord_tbl. If this disk is
3666 * present in all member arrays then also clear the SPARE_DISK
3667 * flag
3668 */
3669 struct intel_super *super = st->sb;
3670 struct imsm_super *mpb;
3671 enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
3672
3673 /* update requires a larger buf but the allocation failed */
3674 if (super->next_len && !super->next_buf) {
3675 super->next_len = 0;
3676 return;
3677 }
3678
3679 if (super->next_buf) {
3680 memcpy(super->next_buf, super->buf, super->len);
3681 free(super->buf);
3682 super->len = super->next_len;
3683 super->buf = super->next_buf;
3684
3685 super->next_len = 0;
3686 super->next_buf = NULL;
3687 }
3688
3689 mpb = super->anchor;
3690
3691 switch (type) {
3692 case update_activate_spare: {
3693 struct imsm_update_activate_spare *u = (void *) update->buf;
3694 struct imsm_dev *dev = get_imsm_dev(super, u->array);
3695 struct imsm_map *map = get_imsm_map(dev, 0);
3696 struct imsm_map *migr_map;
3697 struct active_array *a;
3698 struct imsm_disk *disk;
3699 __u8 to_state;
3700 struct dl *dl;
3701 unsigned int found;
3702 int failed;
3703 int victim = get_imsm_disk_idx(dev, u->slot);
3704 int i;
3705
3706 for (dl = super->disks; dl; dl = dl->next)
3707 if (dl == u->dl)
3708 break;
3709
3710 if (!dl) {
3711 fprintf(stderr, "error: imsm_activate_spare passed "
3712 "an unknown disk (index: %d)\n",
3713 u->dl->index);
3714 return;
3715 }
3716
3717 super->updates_pending++;
3718
3719 /* count failures (excluding rebuilds and the victim)
3720 * to determine map[0] state
3721 */
3722 failed = 0;
3723 for (i = 0; i < map->num_members; i++) {
3724 if (i == u->slot)
3725 continue;
3726 disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i));
3727 if (!disk || disk->status & FAILED_DISK)
3728 failed++;
3729 }
3730
3731 /* adding a pristine spare, assign a new index */
3732 if (dl->index < 0) {
3733 dl->index = super->anchor->num_disks;
3734 super->anchor->num_disks++;
3735 }
3736 disk = &dl->disk;
3737 disk->status |= CONFIGURED_DISK;
3738 disk->status &= ~SPARE_DISK;
3739
3740 /* mark rebuild */
3741 to_state = imsm_check_degraded(super, dev, failed);
3742 map->map_state = IMSM_T_STATE_DEGRADED;
3743 migrate(dev, to_state, MIGR_REBUILD);
3744 migr_map = get_imsm_map(dev, 1);
3745 set_imsm_ord_tbl_ent(map, u->slot, dl->index);
3746 set_imsm_ord_tbl_ent(migr_map, u->slot, dl->index | IMSM_ORD_REBUILD);
3747
3748 /* count arrays using the victim in the metadata */
3749 found = 0;
3750 for (a = st->arrays; a ; a = a->next) {
3751 dev = get_imsm_dev(super, a->info.container_member);
3752 for (i = 0; i < map->num_members; i++)
3753 if (victim == get_imsm_disk_idx(dev, i))
3754 found++;
3755 }
3756
3757 /* delete the victim if it is no longer being
3758 * utilized anywhere
3759 */
3760 if (!found) {
3761 struct dl **dlp;
3762
3763 /* We know that 'manager' isn't touching anything,
3764 * so it is safe to delete
3765 */
3766 for (dlp = &super->disks; *dlp; dlp = &(*dlp)->next)
3767 if ((*dlp)->index == victim)
3768 break;
3769
3770 /* victim may be on the missing list */
3771 if (!*dlp)
3772 for (dlp = &super->missing; *dlp; dlp = &(*dlp)->next)
3773 if ((*dlp)->index == victim)
3774 break;
3775 imsm_delete(super, dlp, victim);
3776 }
3777 break;
3778 }
3779 case update_create_array: {
3780 /* someone wants to create a new array, we need to be aware of
3781 * a few races/collisions:
3782 * 1/ 'Create' called by two separate instances of mdadm
3783 * 2/ 'Create' versus 'activate_spare': mdadm has chosen
3784 * devices that have since been assimilated via
3785 * activate_spare.
3786 * In the event this update can not be carried out mdadm will
3787 * (FIX ME) notice that its update did not take hold.
3788 */
3789 struct imsm_update_create_array *u = (void *) update->buf;
3790 struct imsm_dev *dev;
3791 struct imsm_map *map, *new_map;
3792 unsigned long long start, end;
3793 unsigned long long new_start, new_end;
3794 int i;
3795 struct disk_info *inf;
3796 struct dl *dl;
3797
3798 /* handle racing creates: first come first serve */
3799 if (u->dev_idx < mpb->num_raid_devs) {
3800 dprintf("%s: subarray %d already defined\n",
3801 __func__, u->dev_idx);
3802 return;
3803 }
3804
3805 /* check update is next in sequence */
3806 if (u->dev_idx != mpb->num_raid_devs) {
3807 dprintf("%s: can not create array %d expected index %d\n",
3808 __func__, u->dev_idx, mpb->num_raid_devs);
3809 return;
3810 }
3811
3812 new_map = get_imsm_map(&u->dev, 0);
3813 new_start = __le32_to_cpu(new_map->pba_of_lba0);
3814 new_end = new_start + __le32_to_cpu(new_map->blocks_per_member);
3815 inf = get_disk_info(u);
3816
3817 /* handle activate_spare versus create race:
3818 * check to make sure that overlapping arrays do not include
3819 * overalpping disks
3820 */
3821 for (i = 0; i < mpb->num_raid_devs; i++) {
3822 dev = get_imsm_dev(super, i);
3823 map = get_imsm_map(dev, 0);
3824 start = __le32_to_cpu(map->pba_of_lba0);
3825 end = start + __le32_to_cpu(map->blocks_per_member);
3826 if ((new_start >= start && new_start <= end) ||
3827 (start >= new_start && start <= new_end))
3828 /* overlap */;
3829 else
3830 continue;
3831
3832 if (disks_overlap(super, i, u)) {
3833 dprintf("%s: arrays overlap\n", __func__);
3834 return;
3835 }
3836 }
3837
3838 /* check that prepare update was successful */
3839 if (!update->space) {
3840 dprintf("%s: prepare update failed\n", __func__);
3841 return;
3842 }
3843
3844 /* check that all disks are still active before committing
3845 * changes. FIXME: could we instead handle this by creating a
3846 * degraded array? That's probably not what the user expects,
3847 * so better to drop this update on the floor.
3848 */
3849 for (i = 0; i < new_map->num_members; i++) {
3850 dl = serial_to_dl(inf[i].serial, super);
3851 if (!dl) {
3852 dprintf("%s: disk disappeared\n", __func__);
3853 return;
3854 }
3855 }
3856
3857 super->updates_pending++;
3858
3859 /* convert spares to members and fixup ord_tbl */
3860 for (i = 0; i < new_map->num_members; i++) {
3861 dl = serial_to_dl(inf[i].serial, super);
3862 if (dl->index == -1) {
3863 dl->index = mpb->num_disks;
3864 mpb->num_disks++;
3865 dl->disk.status |= CONFIGURED_DISK;
3866 dl->disk.status &= ~SPARE_DISK;
3867 }
3868 set_imsm_ord_tbl_ent(new_map, i, dl->index);
3869 }
3870
3871 dev = update->space;
3872 update->space = NULL;
3873 imsm_copy_dev(dev, &u->dev);
3874 super->dev_tbl[u->dev_idx] = dev;
3875 mpb->num_raid_devs++;
3876
3877 imsm_update_version_info(super);
3878 break;
3879 }
3880 case update_add_disk:
3881
3882 /* we may be able to repair some arrays if disks are
3883 * being added */
3884 if (super->add) {
3885 struct active_array *a;
3886
3887 super->updates_pending++;
3888 for (a = st->arrays; a; a = a->next)
3889 a->check_degraded = 1;
3890 }
3891 /* add some spares to the metadata */
3892 while (super->add) {
3893 struct dl *al;
3894
3895 al = super->add;
3896 super->add = al->next;
3897 al->next = super->disks;
3898 super->disks = al;
3899 dprintf("%s: added %x:%x\n",
3900 __func__, al->major, al->minor);
3901 }
3902
3903 break;
3904 }
3905 }
3906
3907 static void imsm_prepare_update(struct supertype *st,
3908 struct metadata_update *update)
3909 {
3910 /**
3911 * Allocate space to hold new disk entries, raid-device entries or a new
3912 * mpb if necessary. The manager synchronously waits for updates to
3913 * complete in the monitor, so new mpb buffers allocated here can be
3914 * integrated by the monitor thread without worrying about live pointers
3915 * in the manager thread.
3916 */
3917 enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
3918 struct intel_super *super = st->sb;
3919 struct imsm_super *mpb = super->anchor;
3920 size_t buf_len;
3921 size_t len = 0;
3922
3923 switch (type) {
3924 case update_create_array: {
3925 struct imsm_update_create_array *u = (void *) update->buf;
3926 struct imsm_dev *dev = &u->dev;
3927 struct imsm_map *map = get_imsm_map(dev, 0);
3928 struct dl *dl;
3929 struct disk_info *inf;
3930 int i;
3931 int activate = 0;
3932
3933 inf = get_disk_info(u);
3934 len = sizeof_imsm_dev(dev, 1);
3935 /* allocate a new super->dev_tbl entry */
3936 update->space = malloc(len);
3937
3938 /* count how many spares will be converted to members */
3939 for (i = 0; i < map->num_members; i++) {
3940 dl = serial_to_dl(inf[i].serial, super);
3941 if (!dl) {
3942 /* hmm maybe it failed?, nothing we can do about
3943 * it here
3944 */
3945 continue;
3946 }
3947 if (count_memberships(dl, super) == 0)
3948 activate++;
3949 }
3950 len += activate * sizeof(struct imsm_disk);
3951 break;
3952 default:
3953 break;
3954 }
3955 }
3956
3957 /* check if we need a larger metadata buffer */
3958 if (super->next_buf)
3959 buf_len = super->next_len;
3960 else
3961 buf_len = super->len;
3962
3963 if (__le32_to_cpu(mpb->mpb_size) + len > buf_len) {
3964 /* ok we need a larger buf than what is currently allocated
3965 * if this allocation fails process_update will notice that
3966 * ->next_len is set and ->next_buf is NULL
3967 */
3968 buf_len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + len, 512);
3969 if (super->next_buf)
3970 free(super->next_buf);
3971
3972 super->next_len = buf_len;
3973 if (posix_memalign(&super->next_buf, buf_len, 512) != 0)
3974 super->next_buf = NULL;
3975 }
3976 }
3977
3978 /* must be called while manager is quiesced */
3979 static void imsm_delete(struct intel_super *super, struct dl **dlp, int index)
3980 {
3981 struct imsm_super *mpb = super->anchor;
3982 struct dl *iter;
3983 struct imsm_dev *dev;
3984 struct imsm_map *map;
3985 int i, j, num_members;
3986 __u32 ord;
3987
3988 dprintf("%s: deleting device[%d] from imsm_super\n",
3989 __func__, index);
3990
3991 /* shift all indexes down one */
3992 for (iter = super->disks; iter; iter = iter->next)
3993 if (iter->index > index)
3994 iter->index--;
3995 for (iter = super->missing; iter; iter = iter->next)
3996 if (iter->index > index)
3997 iter->index--;
3998
3999 for (i = 0; i < mpb->num_raid_devs; i++) {
4000 dev = get_imsm_dev(super, i);
4001 map = get_imsm_map(dev, 0);
4002 num_members = map->num_members;
4003 for (j = 0; j < num_members; j++) {
4004 /* update ord entries being careful not to propagate
4005 * ord-flags to the first map
4006 */
4007 ord = get_imsm_ord_tbl_ent(dev, j);
4008
4009 if (ord_to_idx(ord) <= index)
4010 continue;
4011
4012 map = get_imsm_map(dev, 0);
4013 set_imsm_ord_tbl_ent(map, j, ord_to_idx(ord - 1));
4014 map = get_imsm_map(dev, 1);
4015 if (map)
4016 set_imsm_ord_tbl_ent(map, j, ord - 1);
4017 }
4018 }
4019
4020 mpb->num_disks--;
4021 super->updates_pending++;
4022 if (*dlp) {
4023 struct dl *dl = *dlp;
4024
4025 *dlp = (*dlp)->next;
4026 __free_imsm_disk(dl);
4027 }
4028 }
4029 #endif /* MDASSEMBLE */
4030
4031 struct superswitch super_imsm = {
4032 #ifndef MDASSEMBLE
4033 .examine_super = examine_super_imsm,
4034 .brief_examine_super = brief_examine_super_imsm,
4035 .detail_super = detail_super_imsm,
4036 .brief_detail_super = brief_detail_super_imsm,
4037 .write_init_super = write_init_super_imsm,
4038 .validate_geometry = validate_geometry_imsm,
4039 .add_to_super = add_to_super_imsm,
4040 .detail_platform = detail_platform_imsm,
4041 #endif
4042 .match_home = match_home_imsm,
4043 .uuid_from_super= uuid_from_super_imsm,
4044 .getinfo_super = getinfo_super_imsm,
4045 .update_super = update_super_imsm,
4046
4047 .avail_size = avail_size_imsm,
4048
4049 .compare_super = compare_super_imsm,
4050
4051 .load_super = load_super_imsm,
4052 .init_super = init_super_imsm,
4053 .store_super = store_zero_imsm,
4054 .free_super = free_super_imsm,
4055 .match_metadata_desc = match_metadata_desc_imsm,
4056 .container_content = container_content_imsm,
4057
4058 .external = 1,
4059 .name = "imsm",
4060
4061 #ifndef MDASSEMBLE
4062 /* for mdmon */
4063 .open_new = imsm_open_new,
4064 .load_super = load_super_imsm,
4065 .set_array_state= imsm_set_array_state,
4066 .set_disk = imsm_set_disk,
4067 .sync_metadata = imsm_sync_metadata,
4068 .activate_spare = imsm_activate_spare,
4069 .process_update = imsm_process_update,
4070 .prepare_update = imsm_prepare_update,
4071 #endif /* MDASSEMBLE */
4072 };