]> git.ipfire.org Git - thirdparty/mdadm.git/blame - super-intel.c
imsm: introduce set_imsm_ord_tbl_ent()
[thirdparty/mdadm.git] / super-intel.c
CommitLineData
cdddbdbc
DW
1/*
2 * mdadm - Intel(R) Matrix Storage Manager Support
3 *
4 * Copyright (C) 2002-2007 Intel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20#include "mdadm.h"
c2a1e7da 21#include "mdmon.h"
cdddbdbc
DW
22#include <values.h>
23#include <scsi/sg.h>
24#include <ctype.h>
25
26/* MPB == Metadata Parameter Block */
27#define MPB_SIGNATURE "Intel Raid ISM Cfg Sig. "
28#define MPB_SIG_LEN (strlen(MPB_SIGNATURE))
29#define MPB_VERSION_RAID0 "1.0.00"
30#define MPB_VERSION_RAID1 "1.1.00"
31#define MPB_VERSION_RAID5 "1.2.02"
32#define MAX_SIGNATURE_LENGTH 32
33#define MAX_RAID_SERIAL_LEN 16
c2c087e6
DW
34#define MPB_SECTOR_CNT 418
35#define IMSM_RESERVED_SECTORS 4096
cdddbdbc
DW
36
37/* Disk configuration info. */
38#define IMSM_MAX_DEVICES 255
39struct imsm_disk {
40 __u8 serial[MAX_RAID_SERIAL_LEN];/* 0xD8 - 0xE7 ascii serial number */
41 __u32 total_blocks; /* 0xE8 - 0xEB total blocks */
42 __u32 scsi_id; /* 0xEC - 0xEF scsi ID */
43 __u32 status; /* 0xF0 - 0xF3 */
44#define SPARE_DISK 0x01 /* Spare */
45#define CONFIGURED_DISK 0x02 /* Member of some RaidDev */
46#define FAILED_DISK 0x04 /* Permanent failure */
47#define USABLE_DISK 0x08 /* Fully usable unless FAILED_DISK is set */
48
49#define IMSM_DISK_FILLERS 5
50 __u32 filler[IMSM_DISK_FILLERS]; /* 0xF4 - 0x107 MPB_DISK_FILLERS for future expansion */
51};
52
53/* RAID map configuration infos. */
54struct imsm_map {
55 __u32 pba_of_lba0; /* start address of partition */
56 __u32 blocks_per_member;/* blocks per member */
57 __u32 num_data_stripes; /* number of data stripes */
58 __u16 blocks_per_strip;
59 __u8 map_state; /* Normal, Uninitialized, Degraded, Failed */
60#define IMSM_T_STATE_NORMAL 0
61#define IMSM_T_STATE_UNINITIALIZED 1
62#define IMSM_T_STATE_DEGRADED 2 /* FIXME: is this correct? */
63#define IMSM_T_STATE_FAILED 3 /* FIXME: is this correct? */
64 __u8 raid_level;
65#define IMSM_T_RAID0 0
66#define IMSM_T_RAID1 1
67#define IMSM_T_RAID5 5 /* since metadata version 1.2.02 ? */
68 __u8 num_members; /* number of member disks */
69 __u8 reserved[3];
70 __u32 filler[7]; /* expansion area */
7eef0453 71#define IMSM_ORD_REBUILD (1 << 24)
cdddbdbc 72 __u32 disk_ord_tbl[1]; /* disk_ord_tbl[num_members],
7eef0453
DW
73 * top byte contains some flags
74 */
cdddbdbc
DW
75} __attribute__ ((packed));
76
77struct imsm_vol {
78 __u32 reserved[2];
79 __u8 migr_state; /* Normal or Migrating */
80 __u8 migr_type; /* Initializing, Rebuilding, ... */
81 __u8 dirty;
82 __u8 fill[1];
83 __u32 filler[5];
84 struct imsm_map map[1];
85 /* here comes another one if migr_state */
86} __attribute__ ((packed));
87
88struct imsm_dev {
89 __u8 volume[MAX_RAID_SERIAL_LEN];
90 __u32 size_low;
91 __u32 size_high;
92 __u32 status; /* Persistent RaidDev status */
93 __u32 reserved_blocks; /* Reserved blocks at beginning of volume */
94#define IMSM_DEV_FILLERS 12
95 __u32 filler[IMSM_DEV_FILLERS];
96 struct imsm_vol vol;
97} __attribute__ ((packed));
98
99struct imsm_super {
100 __u8 sig[MAX_SIGNATURE_LENGTH]; /* 0x00 - 0x1F */
101 __u32 check_sum; /* 0x20 - 0x23 MPB Checksum */
102 __u32 mpb_size; /* 0x24 - 0x27 Size of MPB */
103 __u32 family_num; /* 0x28 - 0x2B Checksum from first time this config was written */
104 __u32 generation_num; /* 0x2C - 0x2F Incremented each time this array's MPB is written */
604b746f
JD
105 __u32 error_log_size; /* 0x30 - 0x33 in bytes */
106 __u32 attributes; /* 0x34 - 0x37 */
cdddbdbc
DW
107 __u8 num_disks; /* 0x38 Number of configured disks */
108 __u8 num_raid_devs; /* 0x39 Number of configured volumes */
604b746f
JD
109 __u8 error_log_pos; /* 0x3A */
110 __u8 fill[1]; /* 0x3B */
111 __u32 cache_size; /* 0x3c - 0x40 in mb */
112 __u32 orig_family_num; /* 0x40 - 0x43 original family num */
113 __u32 pwr_cycle_count; /* 0x44 - 0x47 simulated power cycle count for array */
114 __u32 bbm_log_size; /* 0x48 - 0x4B - size of bad Block Mgmt Log in bytes */
115#define IMSM_FILLERS 35
116 __u32 filler[IMSM_FILLERS]; /* 0x4C - 0xD7 RAID_MPB_FILLERS */
cdddbdbc
DW
117 struct imsm_disk disk[1]; /* 0xD8 diskTbl[numDisks] */
118 /* here comes imsm_dev[num_raid_devs] */
604b746f 119 /* here comes BBM logs */
cdddbdbc
DW
120} __attribute__ ((packed));
121
604b746f
JD
122#define BBM_LOG_MAX_ENTRIES 254
123
124struct bbm_log_entry {
125 __u64 defective_block_start;
126#define UNREADABLE 0xFFFFFFFF
127 __u32 spare_block_offset;
128 __u16 remapped_marked_count;
129 __u16 disk_ordinal;
130} __attribute__ ((__packed__));
131
132struct bbm_log {
133 __u32 signature; /* 0xABADB10C */
134 __u32 entry_count;
135 __u32 reserved_spare_block_count; /* 0 */
136 __u32 reserved; /* 0xFFFF */
137 __u64 first_spare_lba;
138 struct bbm_log_entry mapped_block_entries[BBM_LOG_MAX_ENTRIES];
139} __attribute__ ((__packed__));
140
141
cdddbdbc
DW
142#ifndef MDASSEMBLE
143static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" };
144#endif
145
87eb16df 146static unsigned int sector_count(__u32 bytes)
cdddbdbc 147{
87eb16df
DW
148 return ((bytes + (512-1)) & (~(512-1))) / 512;
149}
cdddbdbc 150
87eb16df
DW
151static unsigned int mpb_sectors(struct imsm_super *mpb)
152{
153 return sector_count(__le32_to_cpu(mpb->mpb_size));
cdddbdbc
DW
154}
155
156/* internal representation of IMSM metadata */
157struct intel_super {
158 union {
949c47a0
DW
159 void *buf; /* O_DIRECT buffer for reading/writing metadata */
160 struct imsm_super *anchor; /* immovable parameters */
cdddbdbc 161 };
949c47a0 162 size_t len; /* size of the 'buf' allocation */
4d7b1503
DW
163 void *next_buf; /* for realloc'ing buf from the manager */
164 size_t next_len;
c2c087e6
DW
165 int updates_pending; /* count of pending updates for mdmon */
166 int creating_imsm; /* flag to indicate container creation */
bf5a934a 167 int current_vol; /* index of raid device undergoing creation */
949c47a0
DW
168 #define IMSM_MAX_RAID_DEVS 2
169 struct imsm_dev *dev_tbl[IMSM_MAX_RAID_DEVS];
cdddbdbc
DW
170 struct dl {
171 struct dl *next;
172 int index;
173 __u8 serial[MAX_RAID_SERIAL_LEN];
174 int major, minor;
175 char *devname;
b9f594fe 176 struct imsm_disk disk;
cdddbdbc
DW
177 int fd;
178 } *disks;
43dad3d6
DW
179 struct dl *add; /* list of disks to add while mdmon active */
180 struct bbm_log *bbm_log;
cdddbdbc
DW
181};
182
c2c087e6
DW
183struct extent {
184 unsigned long long start, size;
185};
186
88758e9d
DW
187/* definition of messages passed to imsm_process_update */
188enum imsm_update_type {
189 update_activate_spare,
8273f55e 190 update_create_array,
43dad3d6 191 update_add_disk,
88758e9d
DW
192};
193
194struct imsm_update_activate_spare {
195 enum imsm_update_type type;
d23fe947 196 struct dl *dl;
88758e9d
DW
197 int slot;
198 int array;
199 struct imsm_update_activate_spare *next;
200};
201
8273f55e
DW
202struct imsm_update_create_array {
203 enum imsm_update_type type;
8273f55e 204 int dev_idx;
6a3e913e 205 struct imsm_dev dev;
8273f55e
DW
206};
207
43dad3d6
DW
208struct imsm_update_add_disk {
209 enum imsm_update_type type;
210};
211
0030e8d6
DW
212static int imsm_env_devname_as_serial(void)
213{
214 char *val = getenv("IMSM_DEVNAME_AS_SERIAL");
215
216 if (val && atoi(val) == 1)
217 return 1;
218
219 return 0;
220}
221
222
cdddbdbc
DW
223static struct supertype *match_metadata_desc_imsm(char *arg)
224{
225 struct supertype *st;
226
227 if (strcmp(arg, "imsm") != 0 &&
228 strcmp(arg, "default") != 0
229 )
230 return NULL;
231
232 st = malloc(sizeof(*st));
ef609477 233 memset(st, 0, sizeof(*st));
cdddbdbc
DW
234 st->ss = &super_imsm;
235 st->max_devs = IMSM_MAX_DEVICES;
236 st->minor_version = 0;
237 st->sb = NULL;
238 return st;
239}
240
cdddbdbc
DW
241static __u8 *get_imsm_version(struct imsm_super *mpb)
242{
243 return &mpb->sig[MPB_SIG_LEN];
244}
245
949c47a0
DW
246/* retrieve a disk directly from the anchor when the anchor is known to be
247 * up-to-date, currently only at load time
248 */
249static struct imsm_disk *__get_imsm_disk(struct imsm_super *mpb, __u8 index)
cdddbdbc 250{
949c47a0 251 if (index >= mpb->num_disks)
cdddbdbc
DW
252 return NULL;
253 return &mpb->disk[index];
254}
255
b9f594fe 256/* retrieve a disk from the parsed metadata */
949c47a0
DW
257static struct imsm_disk *get_imsm_disk(struct intel_super *super, __u8 index)
258{
b9f594fe
DW
259 struct dl *d;
260
261 for (d = super->disks; d; d = d->next)
262 if (d->index == index)
263 return &d->disk;
264
265 return NULL;
949c47a0
DW
266}
267
268/* generate a checksum directly from the anchor when the anchor is known to be
269 * up-to-date, currently only at load or write_super after coalescing
270 */
271static __u32 __gen_imsm_checksum(struct imsm_super *mpb)
cdddbdbc
DW
272{
273 __u32 end = mpb->mpb_size / sizeof(end);
274 __u32 *p = (__u32 *) mpb;
275 __u32 sum = 0;
276
277 while (end--)
278 sum += __le32_to_cpu(*p++);
279
280 return sum - __le32_to_cpu(mpb->check_sum);
281}
282
a965f303
DW
283static size_t sizeof_imsm_map(struct imsm_map *map)
284{
285 return sizeof(struct imsm_map) + sizeof(__u32) * (map->num_members - 1);
286}
287
288struct imsm_map *get_imsm_map(struct imsm_dev *dev, int second_map)
cdddbdbc 289{
a965f303
DW
290 struct imsm_map *map = &dev->vol.map[0];
291
292 if (second_map && !dev->vol.migr_state)
293 return NULL;
294 else if (second_map) {
295 void *ptr = map;
296
297 return ptr + sizeof_imsm_map(map);
298 } else
299 return map;
300
301}
cdddbdbc 302
3393c6af
DW
303/* return the size of the device.
304 * migr_state increases the returned size if map[0] were to be duplicated
305 */
306static size_t sizeof_imsm_dev(struct imsm_dev *dev, int migr_state)
a965f303
DW
307{
308 size_t size = sizeof(*dev) - sizeof(struct imsm_map) +
309 sizeof_imsm_map(get_imsm_map(dev, 0));
cdddbdbc
DW
310
311 /* migrating means an additional map */
a965f303
DW
312 if (dev->vol.migr_state)
313 size += sizeof_imsm_map(get_imsm_map(dev, 1));
3393c6af
DW
314 else if (migr_state)
315 size += sizeof_imsm_map(get_imsm_map(dev, 0));
cdddbdbc
DW
316
317 return size;
318}
319
949c47a0 320static struct imsm_dev *__get_imsm_dev(struct imsm_super *mpb, __u8 index)
cdddbdbc
DW
321{
322 int offset;
323 int i;
324 void *_mpb = mpb;
325
949c47a0 326 if (index >= mpb->num_raid_devs)
cdddbdbc
DW
327 return NULL;
328
329 /* devices start after all disks */
330 offset = ((void *) &mpb->disk[mpb->num_disks]) - _mpb;
331
332 for (i = 0; i <= index; i++)
333 if (i == index)
334 return _mpb + offset;
335 else
3393c6af 336 offset += sizeof_imsm_dev(_mpb + offset, 0);
cdddbdbc
DW
337
338 return NULL;
339}
340
949c47a0
DW
341static struct imsm_dev *get_imsm_dev(struct intel_super *super, __u8 index)
342{
343 if (index >= super->anchor->num_raid_devs)
344 return NULL;
345 return super->dev_tbl[index];
346}
347
7eef0453
DW
348static __u32 get_imsm_ord_tbl_ent(struct imsm_dev *dev, int slot)
349{
350 struct imsm_map *map;
351
352 if (dev->vol.migr_state)
7eef0453 353 map = get_imsm_map(dev, 1);
fb9bf0d3
DW
354 else
355 map = get_imsm_map(dev, 0);
7eef0453 356
ff077194
DW
357 /* top byte identifies disk under rebuild */
358 return __le32_to_cpu(map->disk_ord_tbl[slot]);
359}
360
361#define ord_to_idx(ord) (((ord) << 8) >> 8)
362static __u32 get_imsm_disk_idx(struct imsm_dev *dev, int slot)
363{
364 __u32 ord = get_imsm_ord_tbl_ent(dev, slot);
365
366 return ord_to_idx(ord);
7eef0453
DW
367}
368
be73972f
DW
369static void set_imsm_ord_tbl_ent(struct imsm_map *map, int slot, __u32 ord)
370{
371 map->disk_ord_tbl[slot] = __cpu_to_le32(ord);
372}
373
cdddbdbc
DW
374static int get_imsm_raid_level(struct imsm_map *map)
375{
376 if (map->raid_level == 1) {
377 if (map->num_members == 2)
378 return 1;
379 else
380 return 10;
381 }
382
383 return map->raid_level;
384}
385
c2c087e6
DW
386static int cmp_extent(const void *av, const void *bv)
387{
388 const struct extent *a = av;
389 const struct extent *b = bv;
390 if (a->start < b->start)
391 return -1;
392 if (a->start > b->start)
393 return 1;
394 return 0;
395}
396
397static struct extent *get_extents(struct intel_super *super, struct dl *dl)
398{
399 /* find a list of used extents on the given physical device */
c2c087e6
DW
400 struct extent *rv, *e;
401 int i, j;
402 int memberships = 0;
403
949c47a0
DW
404 for (i = 0; i < super->anchor->num_raid_devs; i++) {
405 struct imsm_dev *dev = get_imsm_dev(super, i);
a965f303 406 struct imsm_map *map = get_imsm_map(dev, 0);
c2c087e6
DW
407
408 for (j = 0; j < map->num_members; j++) {
ff077194 409 __u32 index = get_imsm_disk_idx(dev, j);
c2c087e6
DW
410
411 if (index == dl->index)
412 memberships++;
413 }
414 }
415 rv = malloc(sizeof(struct extent) * (memberships + 1));
416 if (!rv)
417 return NULL;
418 e = rv;
419
949c47a0
DW
420 for (i = 0; i < super->anchor->num_raid_devs; i++) {
421 struct imsm_dev *dev = get_imsm_dev(super, i);
a965f303 422 struct imsm_map *map = get_imsm_map(dev, 0);
c2c087e6
DW
423
424 for (j = 0; j < map->num_members; j++) {
ff077194 425 __u32 index = get_imsm_disk_idx(dev, j);
c2c087e6
DW
426
427 if (index == dl->index) {
428 e->start = __le32_to_cpu(map->pba_of_lba0);
429 e->size = __le32_to_cpu(map->blocks_per_member);
430 e++;
431 }
432 }
433 }
434 qsort(rv, memberships, sizeof(*rv), cmp_extent);
435
b9f594fe 436 e->start = __le32_to_cpu(dl->disk.total_blocks) -
c2c087e6
DW
437 (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS);
438 e->size = 0;
439 return rv;
440}
441
4f5bc454 442#ifndef MDASSEMBLE
cdddbdbc
DW
443static void print_imsm_dev(struct imsm_dev *dev, int index)
444{
445 __u64 sz;
446 int slot;
a965f303 447 struct imsm_map *map = get_imsm_map(dev, 0);
cdddbdbc
DW
448
449 printf("\n");
450 printf("[%s]:\n", dev->volume);
451 printf(" RAID Level : %d\n", get_imsm_raid_level(map));
452 printf(" Members : %d\n", map->num_members);
453 for (slot = 0; slot < map->num_members; slot++)
ff077194 454 if (index == get_imsm_disk_idx(dev, slot))
cdddbdbc
DW
455 break;
456 if (slot < map->num_members)
457 printf(" This Slot : %d\n", slot);
458 else
459 printf(" This Slot : ?\n");
460 sz = __le32_to_cpu(dev->size_high);
461 sz <<= 32;
462 sz += __le32_to_cpu(dev->size_low);
463 printf(" Array Size : %llu%s\n", (unsigned long long)sz,
464 human_size(sz * 512));
465 sz = __le32_to_cpu(map->blocks_per_member);
466 printf(" Per Dev Size : %llu%s\n", (unsigned long long)sz,
467 human_size(sz * 512));
468 printf(" Sector Offset : %u\n",
469 __le32_to_cpu(map->pba_of_lba0));
470 printf(" Num Stripes : %u\n",
471 __le32_to_cpu(map->num_data_stripes));
472 printf(" Chunk Size : %u KiB\n",
473 __le16_to_cpu(map->blocks_per_strip) / 2);
474 printf(" Reserved : %d\n", __le32_to_cpu(dev->reserved_blocks));
3393c6af
DW
475 printf(" Migrate State : %s", dev->vol.migr_state ? "migrating" : "idle");
476 if (dev->vol.migr_state)
477 printf(": %s", dev->vol.migr_type ? "rebuilding" : "initializing");
478 printf("\n");
479 printf(" Map State : %s", map_state_str[map->map_state]);
480 if (dev->vol.migr_state) {
481 struct imsm_map *map = get_imsm_map(dev, 1);
482 printf(", %s", map_state_str[map->map_state]);
483 }
484 printf("\n");
cdddbdbc 485 printf(" Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean");
cdddbdbc
DW
486}
487
488static void print_imsm_disk(struct imsm_super *mpb, int index)
489{
949c47a0 490 struct imsm_disk *disk = __get_imsm_disk(mpb, index);
cdddbdbc
DW
491 char str[MAX_RAID_SERIAL_LEN];
492 __u32 s;
493 __u64 sz;
494
e9d82038
DW
495 if (index < 0)
496 return;
497
cdddbdbc
DW
498 printf("\n");
499 snprintf(str, MAX_RAID_SERIAL_LEN, "%s", disk->serial);
500 printf(" Disk%02d Serial : %s\n", index, str);
501 s = __le32_to_cpu(disk->status);
502 printf(" State :%s%s%s%s\n", s&SPARE_DISK ? " spare" : "",
503 s&CONFIGURED_DISK ? " active" : "",
504 s&FAILED_DISK ? " failed" : "",
505 s&USABLE_DISK ? " usable" : "");
506 printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id));
c2c087e6
DW
507 sz = __le32_to_cpu(disk->total_blocks) -
508 (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS * mpb->num_raid_devs);
cdddbdbc
DW
509 printf(" Usable Size : %llu%s\n", (unsigned long long)sz,
510 human_size(sz * 512));
511}
512
513static void examine_super_imsm(struct supertype *st, char *homehost)
514{
515 struct intel_super *super = st->sb;
949c47a0 516 struct imsm_super *mpb = super->anchor;
cdddbdbc
DW
517 char str[MAX_SIGNATURE_LENGTH];
518 int i;
519 __u32 sum;
520
521 snprintf(str, MPB_SIG_LEN, "%s", mpb->sig);
522 printf(" Magic : %s\n", str);
523 snprintf(str, strlen(MPB_VERSION_RAID0), "%s", get_imsm_version(mpb));
524 printf(" Version : %s\n", get_imsm_version(mpb));
525 printf(" Family : %08x\n", __le32_to_cpu(mpb->family_num));
526 printf(" Generation : %08x\n", __le32_to_cpu(mpb->generation_num));
527 sum = __le32_to_cpu(mpb->check_sum);
528 printf(" Checksum : %08x %s\n", sum,
949c47a0 529 __gen_imsm_checksum(mpb) == sum ? "correct" : "incorrect");
87eb16df 530 printf(" MPB Sectors : %d\n", mpb_sectors(mpb));
cdddbdbc
DW
531 printf(" Disks : %d\n", mpb->num_disks);
532 printf(" RAID Devices : %d\n", mpb->num_raid_devs);
533 print_imsm_disk(mpb, super->disks->index);
604b746f
JD
534 if (super->bbm_log) {
535 struct bbm_log *log = super->bbm_log;
536
537 printf("\n");
538 printf("Bad Block Management Log:\n");
539 printf(" Log Size : %d\n", __le32_to_cpu(mpb->bbm_log_size));
540 printf(" Signature : %x\n", __le32_to_cpu(log->signature));
541 printf(" Entry Count : %d\n", __le32_to_cpu(log->entry_count));
542 printf(" Spare Blocks : %d\n", __le32_to_cpu(log->reserved_spare_block_count));
543 printf(" First Spare : %llx\n", __le64_to_cpu(log->first_spare_lba));
544 }
cdddbdbc 545 for (i = 0; i < mpb->num_raid_devs; i++)
949c47a0 546 print_imsm_dev(__get_imsm_dev(mpb, i), super->disks->index);
cdddbdbc
DW
547 for (i = 0; i < mpb->num_disks; i++) {
548 if (i == super->disks->index)
549 continue;
550 print_imsm_disk(mpb, i);
551 }
552}
553
554static void brief_examine_super_imsm(struct supertype *st)
555{
828408eb 556 printf("ARRAY /dev/imsm metadata=imsm\n");
cdddbdbc
DW
557}
558
559static void detail_super_imsm(struct supertype *st, char *homehost)
560{
561 printf("%s\n", __FUNCTION__);
562}
563
564static void brief_detail_super_imsm(struct supertype *st)
565{
566 printf("%s\n", __FUNCTION__);
567}
568#endif
569
570static int match_home_imsm(struct supertype *st, char *homehost)
571{
572 printf("%s\n", __FUNCTION__);
573
574 return 0;
575}
576
577static void uuid_from_super_imsm(struct supertype *st, int uuid[4])
578{
43dad3d6
DW
579 /* imsm does not track uuid's so just make sure we never return
580 * the same value twice to break uuid matching in Manage_subdevs
581 * FIXME what about the use of uuid's with bitmap's?
582 */
583 static int dummy_id = 0;
584
585 uuid[0] = dummy_id++;
cdddbdbc
DW
586}
587
0d481d37 588#if 0
4f5bc454
DW
589static void
590get_imsm_numerical_version(struct imsm_super *mpb, int *m, int *p)
cdddbdbc 591{
cdddbdbc
DW
592 __u8 *v = get_imsm_version(mpb);
593 __u8 *end = mpb->sig + MAX_SIGNATURE_LENGTH;
594 char major[] = { 0, 0, 0 };
595 char minor[] = { 0 ,0, 0 };
596 char patch[] = { 0, 0, 0 };
597 char *ver_parse[] = { major, minor, patch };
598 int i, j;
599
600 i = j = 0;
601 while (*v != '\0' && v < end) {
602 if (*v != '.' && j < 2)
603 ver_parse[i][j++] = *v;
604 else {
605 i++;
606 j = 0;
607 }
608 v++;
609 }
610
4f5bc454
DW
611 *m = strtol(minor, NULL, 0);
612 *p = strtol(patch, NULL, 0);
613}
0d481d37 614#endif
4f5bc454 615
c2c087e6
DW
616static int imsm_level_to_layout(int level)
617{
618 switch (level) {
619 case 0:
620 case 1:
621 return 0;
622 case 5:
623 case 6:
a380c027 624 return ALGORITHM_LEFT_ASYMMETRIC;
c2c087e6
DW
625 case 10:
626 return 0x102; //FIXME is this correct?
627 }
628 return -1;
629}
630
bf5a934a
DW
631static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info)
632{
633 struct intel_super *super = st->sb;
949c47a0 634 struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
a965f303 635 struct imsm_map *map = get_imsm_map(dev, 0);
bf5a934a
DW
636
637 info->container_member = super->current_vol;
638 info->array.raid_disks = map->num_members;
639 info->array.level = get_imsm_raid_level(map);
640 info->array.layout = imsm_level_to_layout(info->array.level);
641 info->array.md_minor = -1;
642 info->array.ctime = 0;
643 info->array.utime = 0;
644 info->array.chunk_size = __le16_to_cpu(map->blocks_per_strip * 512);
645
646 info->data_offset = __le32_to_cpu(map->pba_of_lba0);
647 info->component_size = __le32_to_cpu(map->blocks_per_member);
648
649 info->disk.major = 0;
650 info->disk.minor = 0;
651
652 sprintf(info->text_version, "/%s/%d",
653 devnum2devname(st->container_dev),
654 info->container_member);
655}
656
657
4f5bc454
DW
658static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info)
659{
660 struct intel_super *super = st->sb;
4f5bc454
DW
661 struct imsm_disk *disk;
662 __u32 s;
4f5bc454 663
bf5a934a
DW
664 if (super->current_vol >= 0) {
665 getinfo_super_imsm_volume(st, info);
666 return;
667 }
d23fe947
DW
668
669 /* Set raid_disks to zero so that Assemble will always pull in valid
670 * spares
671 */
672 info->array.raid_disks = 0;
cdddbdbc
DW
673 info->array.level = LEVEL_CONTAINER;
674 info->array.layout = 0;
675 info->array.md_minor = -1;
c2c087e6 676 info->array.ctime = 0; /* N/A for imsm */
cdddbdbc
DW
677 info->array.utime = 0;
678 info->array.chunk_size = 0;
679
680 info->disk.major = 0;
681 info->disk.minor = 0;
cdddbdbc 682 info->disk.raid_disk = -1;
c2c087e6
DW
683 info->reshape_active = 0;
684 strcpy(info->text_version, "imsm");
685 info->disk.number = -1;
686 info->disk.state = 0;
687
4a04ec6c 688 if (super->disks) {
b9f594fe 689 disk = &super->disks->disk;
4a04ec6c
DW
690 info->disk.number = super->disks->index;
691 info->disk.raid_disk = super->disks->index;
bf5a934a
DW
692 info->data_offset = __le32_to_cpu(disk->total_blocks) -
693 (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS);
694 info->component_size = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
4a04ec6c
DW
695 s = __le32_to_cpu(disk->status);
696 info->disk.state = s & CONFIGURED_DISK ? (1 << MD_DISK_ACTIVE) : 0;
697 info->disk.state |= s & FAILED_DISK ? (1 << MD_DISK_FAULTY) : 0;
698 info->disk.state |= s & USABLE_DISK ? (1 << MD_DISK_SYNC) : 0;
cdddbdbc 699 }
cdddbdbc
DW
700}
701
cdddbdbc
DW
702static int update_super_imsm(struct supertype *st, struct mdinfo *info,
703 char *update, char *devname, int verbose,
704 int uuid_set, char *homehost)
705{
f352c545
DW
706 /* FIXME */
707
708 /* For 'assemble' and 'force' we need to return non-zero if any
709 * change was made. For others, the return value is ignored.
710 * Update options are:
711 * force-one : This device looks a bit old but needs to be included,
712 * update age info appropriately.
713 * assemble: clear any 'faulty' flag to allow this device to
714 * be assembled.
715 * force-array: Array is degraded but being forced, mark it clean
716 * if that will be needed to assemble it.
717 *
718 * newdev: not used ????
719 * grow: Array has gained a new device - this is currently for
720 * linear only
721 * resync: mark as dirty so a resync will happen.
722 * name: update the name - preserving the homehost
723 *
724 * Following are not relevant for this imsm:
725 * sparc2.2 : update from old dodgey metadata
726 * super-minor: change the preferred_minor number
727 * summaries: update redundant counters.
728 * uuid: Change the uuid of the array to match watch is given
729 * homehost: update the recorded homehost
730 * _reshape_progress: record new reshape_progress position.
731 */
732 int rv = 0;
733 //struct intel_super *super = st->sb;
734 //struct imsm_super *mpb = super->mpb;
735
736 if (strcmp(update, "grow") == 0) {
737 }
738 if (strcmp(update, "resync") == 0) {
739 /* dev->vol.dirty = 1; */
740 }
741
742 /* IMSM has no concept of UUID or homehost */
743
744 return rv;
cdddbdbc
DW
745}
746
c2c087e6 747static size_t disks_to_mpb_size(int disks)
cdddbdbc 748{
c2c087e6 749 size_t size;
cdddbdbc 750
c2c087e6
DW
751 size = sizeof(struct imsm_super);
752 size += (disks - 1) * sizeof(struct imsm_disk);
753 size += 2 * sizeof(struct imsm_dev);
754 /* up to 2 maps per raid device (-2 for imsm_maps in imsm_dev */
755 size += (4 - 2) * sizeof(struct imsm_map);
756 /* 4 possible disk_ord_tbl's */
757 size += 4 * (disks - 1) * sizeof(__u32);
758
759 return size;
760}
761
762static __u64 avail_size_imsm(struct supertype *st, __u64 devsize)
763{
764 if (devsize < (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS))
765 return 0;
766
767 return devsize - (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS);
cdddbdbc
DW
768}
769
770static int compare_super_imsm(struct supertype *st, struct supertype *tst)
771{
772 /*
773 * return:
774 * 0 same, or first was empty, and second was copied
775 * 1 second had wrong number
776 * 2 wrong uuid
777 * 3 wrong other info
778 */
779 struct intel_super *first = st->sb;
780 struct intel_super *sec = tst->sb;
781
782 if (!first) {
783 st->sb = tst->sb;
784 tst->sb = NULL;
785 return 0;
786 }
787
949c47a0 788 if (memcmp(first->anchor->sig, sec->anchor->sig, MAX_SIGNATURE_LENGTH) != 0)
cdddbdbc 789 return 3;
d23fe947
DW
790
791 /* if an anchor does not have num_raid_devs set then it is a free
792 * floating spare
793 */
794 if (first->anchor->num_raid_devs > 0 &&
795 sec->anchor->num_raid_devs > 0) {
796 if (first->anchor->family_num != sec->anchor->family_num)
797 return 3;
d23fe947 798 }
cdddbdbc 799
3e372e5a
DW
800 /* if 'first' is a spare promote it to a populated mpb with sec's
801 * family number
802 */
803 if (first->anchor->num_raid_devs == 0 &&
804 sec->anchor->num_raid_devs > 0) {
805 first->anchor->num_raid_devs = sec->anchor->num_raid_devs;
806 first->anchor->family_num = sec->anchor->family_num;
807 }
808
cdddbdbc
DW
809 return 0;
810}
811
0030e8d6
DW
812static void fd2devname(int fd, char *name)
813{
814 struct stat st;
815 char path[256];
816 char dname[100];
817 char *nm;
818 int rv;
819
820 name[0] = '\0';
821 if (fstat(fd, &st) != 0)
822 return;
823 sprintf(path, "/sys/dev/block/%d:%d",
824 major(st.st_rdev), minor(st.st_rdev));
825
826 rv = readlink(path, dname, sizeof(dname));
827 if (rv <= 0)
828 return;
829
830 dname[rv] = '\0';
831 nm = strrchr(dname, '/');
832 nm++;
833 snprintf(name, MAX_RAID_SERIAL_LEN, "/dev/%s", nm);
834}
835
836
cdddbdbc
DW
837extern int scsi_get_serial(int fd, void *buf, size_t buf_len);
838
839static int imsm_read_serial(int fd, char *devname,
840 __u8 serial[MAX_RAID_SERIAL_LEN])
841{
842 unsigned char scsi_serial[255];
cdddbdbc
DW
843 int rv;
844 int rsp_len;
845 int i, cnt;
846
847 memset(scsi_serial, 0, sizeof(scsi_serial));
cdddbdbc 848
0030e8d6
DW
849 if (imsm_env_devname_as_serial()) {
850 char name[MAX_RAID_SERIAL_LEN];
851
852 fd2devname(fd, name);
853 strcpy((char *) serial, name);
854 return 0;
855 }
856
755c99fa 857 rv = scsi_get_serial(fd, scsi_serial, sizeof(scsi_serial));
cdddbdbc
DW
858
859 if (rv != 0) {
860 if (devname)
861 fprintf(stderr,
862 Name ": Failed to retrieve serial for %s\n",
863 devname);
864 return rv;
865 }
866
867 rsp_len = scsi_serial[3];
868 for (i = 0, cnt = 0; i < rsp_len; i++) {
869 if (!isspace(scsi_serial[4 + i]))
870 serial[cnt++] = scsi_serial[4 + i];
871 if (cnt == MAX_RAID_SERIAL_LEN)
872 break;
873 }
874
875 serial[MAX_RAID_SERIAL_LEN - 1] = '\0';
876
877 return 0;
878}
879
880static int
881load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd)
882{
cdddbdbc
DW
883 struct dl *dl;
884 struct stat stb;
cdddbdbc
DW
885 int rv;
886 int i;
d23fe947
DW
887 int alloc = 1;
888 __u8 serial[MAX_RAID_SERIAL_LEN];
889
890 rv = imsm_read_serial(fd, devname, serial);
891
892 if (rv != 0)
893 return 2;
894
895 /* check if this is a disk we have seen before. it may be a spare in
896 * super->disks while the current anchor believes it is a raid member,
897 * check if we need to update dl->index
898 */
899 for (dl = super->disks; dl; dl = dl->next)
900 if (memcmp(dl->serial, serial, MAX_RAID_SERIAL_LEN) == 0)
901 break;
902
903 if (!dl)
904 dl = malloc(sizeof(*dl));
905 else
906 alloc = 0;
cdddbdbc 907
b9f594fe 908 if (!dl) {
cdddbdbc
DW
909 if (devname)
910 fprintf(stderr,
911 Name ": failed to allocate disk buffer for %s\n",
912 devname);
913 return 2;
914 }
cdddbdbc 915
d23fe947
DW
916 if (alloc) {
917 fstat(fd, &stb);
918 dl->major = major(stb.st_rdev);
919 dl->minor = minor(stb.st_rdev);
920 dl->next = super->disks;
921 dl->fd = keep_fd ? fd : -1;
922 dl->devname = devname ? strdup(devname) : NULL;
923 strncpy((char *) dl->serial, (char *) serial, MAX_RAID_SERIAL_LEN);
8796fdc4 924 dl->index = -2;
d23fe947
DW
925 } else if (keep_fd) {
926 close(dl->fd);
927 dl->fd = fd;
928 }
cdddbdbc 929
d23fe947 930 /* look up this disk's index in the current anchor */
949c47a0
DW
931 for (i = 0; i < super->anchor->num_disks; i++) {
932 struct imsm_disk *disk_iter;
933
934 disk_iter = __get_imsm_disk(super->anchor, i);
cdddbdbc 935
949c47a0
DW
936 if (memcmp(disk_iter->serial, dl->serial,
937 MAX_RAID_SERIAL_LEN) == 0) {
d23fe947
DW
938 __u32 status;
939
b9f594fe 940 dl->disk = *disk_iter;
d23fe947
DW
941 status = __le32_to_cpu(dl->disk.status);
942 /* only set index on disks that are a member of a
943 * populated contianer, i.e. one with raid_devs
944 */
6c386dd3
DW
945 if (status & FAILED_DISK)
946 dl->index = -2;
947 else if (status & SPARE_DISK)
d23fe947
DW
948 dl->index = -1;
949 else
950 dl->index = i;
8796fdc4 951
cdddbdbc 952 break;
949c47a0 953 }
cdddbdbc
DW
954 }
955
d23fe947
DW
956 if (alloc)
957 super->disks = dl;
6c386dd3 958
949c47a0
DW
959 return 0;
960}
961
962static void imsm_copy_dev(struct imsm_dev *dest, struct imsm_dev *src)
963{
3393c6af
DW
964 memcpy(dest, src, sizeof_imsm_dev(src, 0));
965}
966
967static void dup_map(struct imsm_dev *dev)
968{
969 struct imsm_map *dest = get_imsm_map(dev, 1);
970 struct imsm_map *src = get_imsm_map(dev, 0);
971
972 memcpy(dest, src, sizeof_imsm_map(src));
949c47a0
DW
973}
974
975static int parse_raid_devices(struct intel_super *super)
976{
977 int i;
978 struct imsm_dev *dev_new;
4d7b1503
DW
979 size_t len, len_migr;
980 size_t space_needed = 0;
981 struct imsm_super *mpb = super->anchor;
949c47a0
DW
982
983 for (i = 0; i < super->anchor->num_raid_devs; i++) {
984 struct imsm_dev *dev_iter = __get_imsm_dev(super->anchor, i);
985
4d7b1503
DW
986 len = sizeof_imsm_dev(dev_iter, 0);
987 len_migr = sizeof_imsm_dev(dev_iter, 1);
988 if (len_migr > len)
989 space_needed += len_migr - len;
990
991 dev_new = malloc(len_migr);
949c47a0
DW
992 if (!dev_new)
993 return 1;
994 imsm_copy_dev(dev_new, dev_iter);
995 super->dev_tbl[i] = dev_new;
996 }
cdddbdbc 997
4d7b1503
DW
998 /* ensure that super->buf is large enough when all raid devices
999 * are migrating
1000 */
1001 if (__le32_to_cpu(mpb->mpb_size) + space_needed > super->len) {
1002 void *buf;
1003
1004 len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + space_needed, 512);
1005 if (posix_memalign(&buf, 512, len) != 0)
1006 return 1;
1007
1008 memcpy(buf, super->buf, len);
1009 free(super->buf);
1010 super->buf = buf;
1011 super->len = len;
1012 }
1013
cdddbdbc
DW
1014 return 0;
1015}
1016
604b746f
JD
1017/* retrieve a pointer to the bbm log which starts after all raid devices */
1018struct bbm_log *__get_imsm_bbm_log(struct imsm_super *mpb)
1019{
1020 void *ptr = NULL;
1021
1022 if (__le32_to_cpu(mpb->bbm_log_size)) {
1023 ptr = mpb;
1024 ptr += mpb->mpb_size - __le32_to_cpu(mpb->bbm_log_size);
1025 }
1026
1027 return ptr;
1028}
1029
d23fe947 1030static void __free_imsm(struct intel_super *super, int free_disks);
9ca2c81c 1031
cdddbdbc
DW
1032/* load_imsm_mpb - read matrix metadata
1033 * allocates super->mpb to be freed by free_super
1034 */
1035static int load_imsm_mpb(int fd, struct intel_super *super, char *devname)
1036{
1037 unsigned long long dsize;
cdddbdbc
DW
1038 unsigned long long sectors;
1039 struct stat;
6416d527 1040 struct imsm_super *anchor;
cdddbdbc 1041 __u32 check_sum;
949c47a0 1042 int rc;
cdddbdbc 1043
cdddbdbc
DW
1044 get_dev_size(fd, NULL, &dsize);
1045
1046 if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0) {
1047 if (devname)
1048 fprintf(stderr,
1049 Name ": Cannot seek to anchor block on %s: %s\n",
1050 devname, strerror(errno));
1051 return 1;
1052 }
1053
949c47a0 1054 if (posix_memalign((void**)&anchor, 512, 512) != 0) {
ad97895e
DW
1055 if (devname)
1056 fprintf(stderr,
1057 Name ": Failed to allocate imsm anchor buffer"
1058 " on %s\n", devname);
1059 return 1;
1060 }
949c47a0 1061 if (read(fd, anchor, 512) != 512) {
cdddbdbc
DW
1062 if (devname)
1063 fprintf(stderr,
1064 Name ": Cannot read anchor block on %s: %s\n",
1065 devname, strerror(errno));
6416d527 1066 free(anchor);
cdddbdbc
DW
1067 return 1;
1068 }
1069
6416d527 1070 if (strncmp((char *) anchor->sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0) {
cdddbdbc
DW
1071 if (devname)
1072 fprintf(stderr,
1073 Name ": no IMSM anchor on %s\n", devname);
6416d527 1074 free(anchor);
cdddbdbc
DW
1075 return 2;
1076 }
1077
d23fe947 1078 __free_imsm(super, 0);
949c47a0
DW
1079 super->len = ROUND_UP(anchor->mpb_size, 512);
1080 if (posix_memalign(&super->buf, 512, super->len) != 0) {
cdddbdbc
DW
1081 if (devname)
1082 fprintf(stderr,
1083 Name ": unable to allocate %zu byte mpb buffer\n",
949c47a0 1084 super->len);
6416d527 1085 free(anchor);
cdddbdbc
DW
1086 return 2;
1087 }
949c47a0 1088 memcpy(super->buf, anchor, 512);
cdddbdbc 1089
6416d527
NB
1090 sectors = mpb_sectors(anchor) - 1;
1091 free(anchor);
949c47a0
DW
1092 if (!sectors) {
1093 rc = load_imsm_disk(fd, super, devname, 0);
1094 if (rc == 0)
1095 rc = parse_raid_devices(super);
1096 return rc;
1097 }
cdddbdbc
DW
1098
1099 /* read the extended mpb */
1100 if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0) {
1101 if (devname)
1102 fprintf(stderr,
1103 Name ": Cannot seek to extended mpb on %s: %s\n",
1104 devname, strerror(errno));
1105 return 1;
1106 }
1107
949c47a0 1108 if (read(fd, super->buf + 512, super->len - 512) != super->len - 512) {
cdddbdbc
DW
1109 if (devname)
1110 fprintf(stderr,
1111 Name ": Cannot read extended mpb on %s: %s\n",
1112 devname, strerror(errno));
1113 return 2;
1114 }
1115
949c47a0
DW
1116 check_sum = __gen_imsm_checksum(super->anchor);
1117 if (check_sum != __le32_to_cpu(super->anchor->check_sum)) {
cdddbdbc
DW
1118 if (devname)
1119 fprintf(stderr,
1120 Name ": IMSM checksum %x != %x on %s\n",
949c47a0 1121 check_sum, __le32_to_cpu(super->anchor->check_sum),
cdddbdbc
DW
1122 devname);
1123 return 2;
1124 }
1125
604b746f
JD
1126 /* FIXME the BBM log is disk specific so we cannot use this global
1127 * buffer for all disks. Ok for now since we only look at the global
1128 * bbm_log_size parameter to gate assembly
1129 */
1130 super->bbm_log = __get_imsm_bbm_log(super->anchor);
1131
949c47a0
DW
1132 rc = load_imsm_disk(fd, super, devname, 0);
1133 if (rc == 0)
1134 rc = parse_raid_devices(super);
4d7b1503 1135
949c47a0 1136 return rc;
cdddbdbc
DW
1137}
1138
ae6aad82
DW
1139static void __free_imsm_disk(struct dl *d)
1140{
1141 if (d->fd >= 0)
1142 close(d->fd);
1143 if (d->devname)
1144 free(d->devname);
1145 free(d);
1146
1147}
cdddbdbc
DW
1148static void free_imsm_disks(struct intel_super *super)
1149{
1150 while (super->disks) {
1151 struct dl *d = super->disks;
1152
1153 super->disks = d->next;
ae6aad82 1154 __free_imsm_disk(d);
cdddbdbc
DW
1155 }
1156}
1157
9ca2c81c 1158/* free all the pieces hanging off of a super pointer */
d23fe947 1159static void __free_imsm(struct intel_super *super, int free_disks)
cdddbdbc 1160{
949c47a0
DW
1161 int i;
1162
9ca2c81c 1163 if (super->buf) {
949c47a0 1164 free(super->buf);
9ca2c81c
DW
1165 super->buf = NULL;
1166 }
d23fe947
DW
1167 if (free_disks)
1168 free_imsm_disks(super);
949c47a0 1169 for (i = 0; i < IMSM_MAX_RAID_DEVS; i++)
9ca2c81c 1170 if (super->dev_tbl[i]) {
949c47a0 1171 free(super->dev_tbl[i]);
9ca2c81c
DW
1172 super->dev_tbl[i] = NULL;
1173 }
cdddbdbc
DW
1174}
1175
9ca2c81c
DW
1176static void free_imsm(struct intel_super *super)
1177{
d23fe947 1178 __free_imsm(super, 1);
9ca2c81c
DW
1179 free(super);
1180}
cdddbdbc
DW
1181
1182static void free_super_imsm(struct supertype *st)
1183{
1184 struct intel_super *super = st->sb;
1185
1186 if (!super)
1187 return;
1188
1189 free_imsm(super);
1190 st->sb = NULL;
1191}
1192
c2c087e6
DW
1193static struct intel_super *alloc_super(int creating_imsm)
1194{
1195 struct intel_super *super = malloc(sizeof(*super));
1196
1197 if (super) {
1198 memset(super, 0, sizeof(*super));
1199 super->creating_imsm = creating_imsm;
bf5a934a 1200 super->current_vol = -1;
c2c087e6
DW
1201 }
1202
1203 return super;
1204}
1205
cdddbdbc
DW
1206#ifndef MDASSEMBLE
1207static int load_super_imsm_all(struct supertype *st, int fd, void **sbp,
1208 char *devname, int keep_fd)
1209{
1210 struct mdinfo *sra;
1211 struct intel_super *super;
1212 struct mdinfo *sd, *best = NULL;
1213 __u32 bestgen = 0;
1214 __u32 gen;
1215 char nm[20];
1216 int dfd;
1217 int rv;
1218
1219 /* check if this disk is a member of an active array */
1220 sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
1221 if (!sra)
1222 return 1;
1223
1224 if (sra->array.major_version != -1 ||
1225 sra->array.minor_version != -2 ||
1226 strcmp(sra->text_version, "imsm") != 0)
1227 return 1;
1228
c2c087e6 1229 super = alloc_super(0);
cdddbdbc
DW
1230 if (!super)
1231 return 1;
1232
d23fe947 1233 /* find the most up to date disk in this array, skipping spares */
cdddbdbc
DW
1234 for (sd = sra->devs; sd; sd = sd->next) {
1235 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
1236 dfd = dev_open(nm, keep_fd ? O_RDWR : O_RDONLY);
1237 if (!dfd) {
1238 free_imsm(super);
1239 return 2;
1240 }
1241 rv = load_imsm_mpb(dfd, super, NULL);
1242 if (!keep_fd)
1243 close(dfd);
1244 if (rv == 0) {
d23fe947
DW
1245 if (super->anchor->num_raid_devs == 0)
1246 gen = 0;
1247 else
1248 gen = __le32_to_cpu(super->anchor->generation_num);
cdddbdbc
DW
1249 if (!best || gen > bestgen) {
1250 bestgen = gen;
1251 best = sd;
1252 }
1253 } else {
1254 free_imsm(super);
1255 return 2;
1256 }
1257 }
1258
1259 if (!best) {
1260 free_imsm(super);
1261 return 1;
1262 }
1263
1264 /* load the most up to date anchor */
1265 sprintf(nm, "%d:%d", best->disk.major, best->disk.minor);
1266 dfd = dev_open(nm, O_RDONLY);
1267 if (!dfd) {
1268 free_imsm(super);
1269 return 1;
1270 }
1271 rv = load_imsm_mpb(dfd, super, NULL);
1272 close(dfd);
1273 if (rv != 0) {
1274 free_imsm(super);
1275 return 2;
1276 }
1277
d23fe947 1278 /* re-parse the disk list with the current anchor */
cdddbdbc
DW
1279 for (sd = sra->devs ; sd ; sd = sd->next) {
1280 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
1281 dfd = dev_open(nm, keep_fd? O_RDWR : O_RDONLY);
1282 if (!dfd) {
1283 free_imsm(super);
1284 return 2;
1285 }
1286 load_imsm_disk(dfd, super, NULL, keep_fd);
1287 if (!keep_fd)
1288 close(dfd);
1289 }
1290
f7e7067b 1291 if (st->subarray[0]) {
949c47a0 1292 if (atoi(st->subarray) <= super->anchor->num_raid_devs)
bf5a934a
DW
1293 super->current_vol = atoi(st->subarray);
1294 else
1295 return 1;
f7e7067b
NB
1296 }
1297
cdddbdbc 1298 *sbp = super;
43dad3d6 1299 st->container_dev = fd2devnum(fd);
cdddbdbc 1300 if (st->ss == NULL) {
bf5a934a 1301 st->ss = &super_imsm;
cdddbdbc
DW
1302 st->minor_version = 0;
1303 st->max_devs = IMSM_MAX_DEVICES;
1304 }
1305
1306 return 0;
1307}
1308#endif
1309
1310static int load_super_imsm(struct supertype *st, int fd, char *devname)
1311{
1312 struct intel_super *super;
1313 int rv;
1314
1315#ifndef MDASSEMBLE
3dbccbcf 1316 if (load_super_imsm_all(st, fd, &st->sb, devname, 1) == 0)
cdddbdbc
DW
1317 return 0;
1318#endif
f7e7067b
NB
1319 if (st->subarray[0])
1320 return 1; /* FIXME */
cdddbdbc 1321
c2c087e6 1322 super = alloc_super(0);
cdddbdbc
DW
1323 if (!super) {
1324 fprintf(stderr,
1325 Name ": malloc of %zu failed.\n",
1326 sizeof(*super));
1327 return 1;
1328 }
1329
1330 rv = load_imsm_mpb(fd, super, devname);
1331
1332 if (rv) {
1333 if (devname)
1334 fprintf(stderr,
1335 Name ": Failed to load all information "
1336 "sections on %s\n", devname);
1337 free_imsm(super);
1338 return rv;
1339 }
1340
1341 st->sb = super;
1342 if (st->ss == NULL) {
1343 st->ss = &super_imsm;
1344 st->minor_version = 0;
1345 st->max_devs = IMSM_MAX_DEVICES;
1346 }
1347
1348 return 0;
1349}
1350
ef6ffade
DW
1351static __u16 info_to_blocks_per_strip(mdu_array_info_t *info)
1352{
1353 if (info->level == 1)
1354 return 128;
1355 return info->chunk_size >> 9;
1356}
1357
1358static __u32 info_to_num_data_stripes(mdu_array_info_t *info)
1359{
1360 __u32 num_stripes;
1361
1362 num_stripes = (info->size * 2) / info_to_blocks_per_strip(info);
1363 if (info->level == 1)
1364 num_stripes /= 2;
1365
1366 return num_stripes;
1367}
1368
fcfd9599
DW
1369static __u32 info_to_blocks_per_member(mdu_array_info_t *info)
1370{
1371 return (info->size * 2) & ~(info_to_blocks_per_strip(info) - 1);
1372}
1373
8b353278
DW
1374static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
1375 unsigned long long size, char *name,
1376 char *homehost, int *uuid)
cdddbdbc 1377{
c2c087e6
DW
1378 /* We are creating a volume inside a pre-existing container.
1379 * so st->sb is already set.
1380 */
1381 struct intel_super *super = st->sb;
949c47a0 1382 struct imsm_super *mpb = super->anchor;
c2c087e6
DW
1383 struct imsm_dev *dev;
1384 struct imsm_vol *vol;
1385 struct imsm_map *map;
1386 int idx = mpb->num_raid_devs;
1387 int i;
1388 unsigned long long array_blocks;
c2c087e6 1389 __u32 offset = 0;
2c092cad 1390 size_t size_old, size_new;
cdddbdbc 1391
c2c087e6
DW
1392 if (mpb->num_raid_devs >= 2) {
1393 fprintf(stderr, Name": This imsm-container already has the "
1394 "maximum of 2 volumes\n");
1395 return 0;
1396 }
1397
2c092cad
DW
1398 /* ensure the mpb is large enough for the new data */
1399 size_old = __le32_to_cpu(mpb->mpb_size);
1400 size_new = disks_to_mpb_size(info->nr_disks);
1401 if (size_new > size_old) {
1402 void *mpb_new;
1403 size_t size_round = ROUND_UP(size_new, 512);
1404
1405 if (posix_memalign(&mpb_new, 512, size_round) != 0) {
1406 fprintf(stderr, Name": could not allocate new mpb\n");
1407 return 0;
1408 }
1409 memcpy(mpb_new, mpb, size_old);
1410 free(mpb);
1411 mpb = mpb_new;
949c47a0 1412 super->anchor = mpb_new;
2c092cad
DW
1413 mpb->mpb_size = __cpu_to_le32(size_new);
1414 memset(mpb_new + size_old, 0, size_round - size_old);
1415 }
bf5a934a 1416 super->current_vol = idx;
d23fe947
DW
1417 /* when creating the first raid device in this container set num_disks
1418 * to zero, i.e. delete this spare and add raid member devices in
1419 * add_to_super_imsm_volume()
1420 */
1421 if (super->current_vol == 0)
1422 mpb->num_disks = 0;
bf5a934a 1423 sprintf(st->subarray, "%d", idx);
949c47a0
DW
1424 dev = malloc(sizeof(*dev) + sizeof(__u32) * (info->raid_disks - 1));
1425 if (!dev) {
1426 fprintf(stderr, Name": could not allocate raid device\n");
1427 return 0;
1428 }
c2c087e6
DW
1429 strncpy((char *) dev->volume, name, MAX_RAID_SERIAL_LEN);
1430 array_blocks = calc_array_size(info->level, info->raid_disks,
1431 info->layout, info->chunk_size,
1432 info->size*2);
1433 dev->size_low = __cpu_to_le32((__u32) array_blocks);
1434 dev->size_high = __cpu_to_le32((__u32) (array_blocks >> 32));
1435 dev->status = __cpu_to_le32(0);
1436 dev->reserved_blocks = __cpu_to_le32(0);
1437 vol = &dev->vol;
1438 vol->migr_state = 0;
1439 vol->migr_type = 0;
1440 vol->dirty = 0;
1441 for (i = 0; i < idx; i++) {
949c47a0 1442 struct imsm_dev *prev = get_imsm_dev(super, i);
a965f303 1443 struct imsm_map *pmap = get_imsm_map(prev, 0);
c2c087e6
DW
1444
1445 offset += __le32_to_cpu(pmap->blocks_per_member);
1446 offset += IMSM_RESERVED_SECTORS;
1447 }
a965f303 1448 map = get_imsm_map(dev, 0);
c2c087e6 1449 map->pba_of_lba0 = __cpu_to_le32(offset);
fcfd9599 1450 map->blocks_per_member = __cpu_to_le32(info_to_blocks_per_member(info));
ef6ffade
DW
1451 map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info));
1452 map->num_data_stripes = __cpu_to_le32(info_to_num_data_stripes(info));
c2c087e6
DW
1453 map->map_state = info->level ? IMSM_T_STATE_UNINITIALIZED :
1454 IMSM_T_STATE_NORMAL;
ef6ffade
DW
1455
1456 if (info->level == 1 && info->raid_disks > 2) {
1457 fprintf(stderr, Name": imsm does not support more than 2 disks"
1458 "in a raid1 volume\n");
1459 return 0;
1460 }
c2c087e6
DW
1461 if (info->level == 10)
1462 map->raid_level = 1;
1463 else
1464 map->raid_level = info->level;
ef6ffade 1465
c2c087e6
DW
1466 map->num_members = info->raid_disks;
1467 for (i = 0; i < map->num_members; i++) {
1468 /* initialized in add_to_super */
be73972f 1469 set_imsm_ord_tbl_ent(map, i, 0);
c2c087e6 1470 }
949c47a0
DW
1471 mpb->num_raid_devs++;
1472 super->dev_tbl[super->current_vol] = dev;
c2c087e6
DW
1473
1474 return 1;
cdddbdbc
DW
1475}
1476
bf5a934a
DW
1477static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
1478 unsigned long long size, char *name,
1479 char *homehost, int *uuid)
1480{
1481 /* This is primarily called by Create when creating a new array.
1482 * We will then get add_to_super called for each component, and then
1483 * write_init_super called to write it out to each device.
1484 * For IMSM, Create can create on fresh devices or on a pre-existing
1485 * array.
1486 * To create on a pre-existing array a different method will be called.
1487 * This one is just for fresh drives.
1488 */
1489 struct intel_super *super;
1490 struct imsm_super *mpb;
1491 size_t mpb_size;
1492
1493 if (!info) {
1494 st->sb = NULL;
1495 return 0;
1496 }
1497 if (st->sb)
1498 return init_super_imsm_volume(st, info, size, name, homehost,
1499 uuid);
1500
1501 super = alloc_super(1);
1502 if (!super)
1503 return 0;
1504 mpb_size = disks_to_mpb_size(info->nr_disks);
ef649044 1505 if (posix_memalign(&super->buf, 512, mpb_size) != 0) {
bf5a934a
DW
1506 free(super);
1507 return 0;
1508 }
ef649044 1509 mpb = super->buf;
bf5a934a
DW
1510 memset(mpb, 0, mpb_size);
1511
1512 memcpy(mpb->sig, MPB_SIGNATURE, strlen(MPB_SIGNATURE));
1513 memcpy(mpb->sig + strlen(MPB_SIGNATURE), MPB_VERSION_RAID5,
1514 strlen(MPB_VERSION_RAID5));
1515 mpb->mpb_size = mpb_size;
1516
bf5a934a
DW
1517 st->sb = super;
1518 return 1;
1519}
1520
1521static void add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
1522 int fd, char *devname)
1523{
1524 struct intel_super *super = st->sb;
d23fe947 1525 struct imsm_super *mpb = super->anchor;
bf5a934a
DW
1526 struct dl *dl;
1527 struct imsm_dev *dev;
1528 struct imsm_map *map;
bf5a934a
DW
1529 __u32 status;
1530
949c47a0 1531 dev = get_imsm_dev(super, super->current_vol);
a965f303 1532 map = get_imsm_map(dev, 0);
bf5a934a
DW
1533
1534 for (dl = super->disks; dl ; dl = dl->next)
1535 if (dl->major == dk->major &&
1536 dl->minor == dk->minor)
1537 break;
d23fe947 1538
bf5a934a
DW
1539 if (!dl || ! (dk->state & (1<<MD_DISK_SYNC)))
1540 return;
1541
d23fe947
DW
1542 /* add a pristine spare to the metadata */
1543 if (dl->index < 0) {
1544 dl->index = super->anchor->num_disks;
1545 super->anchor->num_disks++;
1546 }
be73972f 1547 set_imsm_ord_tbl_ent(map, dk->number, dl->index);
bf5a934a 1548 status = CONFIGURED_DISK | USABLE_DISK;
d23fe947
DW
1549 dl->disk.status = __cpu_to_le32(status);
1550
1551 /* if we are creating the first raid device update the family number */
1552 if (super->current_vol == 0) {
1553 __u32 sum;
1554 struct imsm_dev *_dev = __get_imsm_dev(mpb, 0);
1555 struct imsm_disk *_disk = __get_imsm_disk(mpb, dl->index);
1556
1557 *_dev = *dev;
1558 *_disk = dl->disk;
1559 sum = __gen_imsm_checksum(mpb);
1560 mpb->family_num = __cpu_to_le32(sum);
1561 }
bf5a934a
DW
1562}
1563
c2c087e6 1564static void add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
cdddbdbc
DW
1565 int fd, char *devname)
1566{
c2c087e6 1567 struct intel_super *super = st->sb;
c2c087e6
DW
1568 struct dl *dd;
1569 unsigned long long size;
1570 __u32 status, id;
1571 int rv;
1572 struct stat stb;
1573
bf5a934a
DW
1574 if (super->current_vol >= 0) {
1575 add_to_super_imsm_volume(st, dk, fd, devname);
1576 return;
1577 }
1578
c2c087e6
DW
1579 fstat(fd, &stb);
1580 dd = malloc(sizeof(*dd));
b9f594fe 1581 if (!dd) {
c2c087e6
DW
1582 fprintf(stderr,
1583 Name ": malloc failed %s:%d.\n", __func__, __LINE__);
1584 abort();
1585 }
1586 memset(dd, 0, sizeof(*dd));
1587 dd->major = major(stb.st_rdev);
1588 dd->minor = minor(stb.st_rdev);
b9f594fe 1589 dd->index = -1;
c2c087e6 1590 dd->devname = devname ? strdup(devname) : NULL;
c2c087e6
DW
1591 dd->fd = fd;
1592 rv = imsm_read_serial(fd, devname, dd->serial);
1593 if (rv) {
1594 fprintf(stderr,
0030e8d6 1595 Name ": failed to retrieve scsi serial, aborting\n");
949c47a0 1596 free(dd);
0030e8d6 1597 abort();
c2c087e6
DW
1598 }
1599
c2c087e6
DW
1600 get_dev_size(fd, NULL, &size);
1601 size /= 512;
1602 status = USABLE_DISK | SPARE_DISK;
b9f594fe
DW
1603 strcpy((char *) dd->disk.serial, (char *) dd->serial);
1604 dd->disk.total_blocks = __cpu_to_le32(size);
1605 dd->disk.status = __cpu_to_le32(status);
c2c087e6 1606 if (sysfs_disk_to_scsi_id(fd, &id) == 0)
b9f594fe 1607 dd->disk.scsi_id = __cpu_to_le32(id);
c2c087e6 1608 else
b9f594fe 1609 dd->disk.scsi_id = __cpu_to_le32(0);
43dad3d6
DW
1610
1611 if (st->update_tail) {
1612 dd->next = super->add;
1613 super->add = dd;
1614 } else {
1615 dd->next = super->disks;
1616 super->disks = dd;
1617 }
cdddbdbc
DW
1618}
1619
c2c087e6
DW
1620static int store_imsm_mpb(int fd, struct intel_super *super);
1621
d23fe947
DW
1622/* spare records have their own family number and do not have any defined raid
1623 * devices
1624 */
1625static int write_super_imsm_spares(struct intel_super *super, int doclose)
1626{
1627 struct imsm_super mpb_save;
1628 struct imsm_super *mpb = super->anchor;
1629 __u32 sum;
1630 struct dl *d;
1631
1632 mpb_save = *mpb;
1633 mpb->num_raid_devs = 0;
1634 mpb->num_disks = 1;
1635 mpb->mpb_size = sizeof(struct imsm_super);
1636 mpb->generation_num = __cpu_to_le32(1UL);
1637
1638 for (d = super->disks; d; d = d->next) {
8796fdc4 1639 if (d->index != -1)
d23fe947
DW
1640 continue;
1641
1642 mpb->disk[0] = d->disk;
1643 sum = __gen_imsm_checksum(mpb);
1644 mpb->family_num = __cpu_to_le32(sum);
1645 sum = __gen_imsm_checksum(mpb);
1646 mpb->check_sum = __cpu_to_le32(sum);
1647
1648 if (store_imsm_mpb(d->fd, super)) {
1649 fprintf(stderr, "%s: failed for device %d:%d %s\n",
1650 __func__, d->major, d->minor, strerror(errno));
1651 *mpb = mpb_save;
e74255d9 1652 return 1;
d23fe947
DW
1653 }
1654 if (doclose) {
1655 close(d->fd);
1656 d->fd = -1;
1657 }
1658 }
1659
1660 *mpb = mpb_save;
e74255d9 1661 return 0;
d23fe947
DW
1662}
1663
c2c087e6 1664static int write_super_imsm(struct intel_super *super, int doclose)
cdddbdbc 1665{
949c47a0 1666 struct imsm_super *mpb = super->anchor;
c2c087e6
DW
1667 struct dl *d;
1668 __u32 generation;
1669 __u32 sum;
d23fe947 1670 int spares = 0;
949c47a0 1671 int i;
a48ac0a8 1672 __u32 mpb_size = sizeof(struct imsm_super) - sizeof(struct imsm_disk);
cdddbdbc 1673
c2c087e6
DW
1674 /* 'generation' is incremented everytime the metadata is written */
1675 generation = __le32_to_cpu(mpb->generation_num);
1676 generation++;
1677 mpb->generation_num = __cpu_to_le32(generation);
1678
d23fe947 1679 for (d = super->disks; d; d = d->next) {
8796fdc4 1680 if (d->index == -1)
d23fe947
DW
1681 spares++;
1682 else {
d23fe947 1683 mpb->disk[d->index] = d->disk;
a48ac0a8 1684 mpb_size += sizeof(struct imsm_disk);
d23fe947
DW
1685 }
1686 }
b9f594fe 1687
949c47a0
DW
1688 for (i = 0; i < mpb->num_raid_devs; i++) {
1689 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
1690
1691 imsm_copy_dev(dev, super->dev_tbl[i]);
a48ac0a8 1692 mpb_size += sizeof_imsm_dev(dev, 0);
949c47a0 1693 }
a48ac0a8
DW
1694 mpb_size += __le32_to_cpu(mpb->bbm_log_size);
1695 mpb->mpb_size = __cpu_to_le32(mpb_size);
949c47a0 1696
c2c087e6 1697 /* recalculate checksum */
949c47a0 1698 sum = __gen_imsm_checksum(mpb);
c2c087e6
DW
1699 mpb->check_sum = __cpu_to_le32(sum);
1700
d23fe947 1701 /* write the mpb for disks that compose raid devices */
c2c087e6 1702 for (d = super->disks; d ; d = d->next) {
d23fe947
DW
1703 if (d->index < 0)
1704 continue;
8796fdc4 1705 if (store_imsm_mpb(d->fd, super))
c2c087e6
DW
1706 fprintf(stderr, "%s: failed for device %d:%d %s\n",
1707 __func__, d->major, d->minor, strerror(errno));
c2c087e6
DW
1708 if (doclose) {
1709 close(d->fd);
1710 d->fd = -1;
1711 }
1712 }
1713
d23fe947
DW
1714 if (spares)
1715 return write_super_imsm_spares(super, doclose);
1716
e74255d9 1717 return 0;
c2c087e6
DW
1718}
1719
43dad3d6
DW
1720static int create_array(struct supertype *st)
1721{
1722 size_t len;
1723 struct imsm_update_create_array *u;
1724 struct intel_super *super = st->sb;
1725 struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
1726
1727 len = sizeof(*u) - sizeof(*dev) + sizeof_imsm_dev(dev, 0);
1728 u = malloc(len);
1729 if (!u) {
1730 fprintf(stderr, "%s: failed to allocate update buffer\n",
1731 __func__);
1732 return 1;
1733 }
1734
1735 u->type = update_create_array;
1736 u->dev_idx = super->current_vol;
1737 imsm_copy_dev(&u->dev, dev);
1738 append_metadata_update(st, u, len);
1739
1740 return 0;
1741}
1742
1743static int add_disk(struct supertype *st)
1744{
1745 struct intel_super *super = st->sb;
1746 size_t len;
1747 struct imsm_update_add_disk *u;
1748
1749 if (!super->add)
1750 return 0;
1751
1752 len = sizeof(*u);
1753 u = malloc(len);
1754 if (!u) {
1755 fprintf(stderr, "%s: failed to allocate update buffer\n",
1756 __func__);
1757 return 1;
1758 }
1759
1760 u->type = update_add_disk;
1761 append_metadata_update(st, u, len);
1762
1763 return 0;
1764}
1765
c2c087e6
DW
1766static int write_init_super_imsm(struct supertype *st)
1767{
8273f55e 1768 if (st->update_tail) {
43dad3d6
DW
1769 /* queue the recently created array / added disk
1770 * as a metadata update */
8273f55e 1771 struct intel_super *super = st->sb;
8273f55e 1772 struct dl *d;
43dad3d6 1773 int rv;
8273f55e 1774
43dad3d6
DW
1775 /* determine if we are creating a volume or adding a disk */
1776 if (super->current_vol < 0) {
1777 /* in the add disk case we are running in mdmon
1778 * context, so don't close fd's
1779 */
1780 return add_disk(st);
1781 } else
1782 rv = create_array(st);
8273f55e
DW
1783
1784 for (d = super->disks; d ; d = d->next) {
1785 close(d->fd);
1786 d->fd = -1;
1787 }
1788
43dad3d6 1789 return rv;
8273f55e
DW
1790 } else
1791 return write_super_imsm(st->sb, 1);
cdddbdbc
DW
1792}
1793
1794static int store_zero_imsm(struct supertype *st, int fd)
1795{
551c80c1 1796 unsigned long long dsize;
6416d527 1797 void *buf;
551c80c1
DW
1798
1799 get_dev_size(fd, NULL, &dsize);
1800
1801 /* first block is stored on second to last sector of the disk */
1802 if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0)
1803 return 1;
1804
ad97895e
DW
1805 if (posix_memalign(&buf, 512, 512) != 0)
1806 return 1;
1807
eb7ea463
DW
1808 memset(buf, 0, 512);
1809 if (write(fd, buf, 512) != 512)
551c80c1 1810 return 1;
cdddbdbc
DW
1811 return 0;
1812}
1813
cdddbdbc
DW
1814static int validate_geometry_imsm_container(struct supertype *st, int level,
1815 int layout, int raiddisks, int chunk,
c2c087e6 1816 unsigned long long size, char *dev,
2c514b71
NB
1817 unsigned long long *freesize,
1818 int verbose)
cdddbdbc 1819{
c2c087e6
DW
1820 int fd;
1821 unsigned long long ldsize;
cdddbdbc 1822
c2c087e6
DW
1823 if (level != LEVEL_CONTAINER)
1824 return 0;
1825 if (!dev)
1826 return 1;
1827
1828 fd = open(dev, O_RDONLY|O_EXCL, 0);
1829 if (fd < 0) {
2c514b71
NB
1830 if (verbose)
1831 fprintf(stderr, Name ": imsm: Cannot open %s: %s\n",
1832 dev, strerror(errno));
c2c087e6
DW
1833 return 0;
1834 }
1835 if (!get_dev_size(fd, dev, &ldsize)) {
1836 close(fd);
1837 return 0;
1838 }
1839 close(fd);
1840
1841 *freesize = avail_size_imsm(st, ldsize >> 9);
1842
1843 return 1;
cdddbdbc
DW
1844}
1845
c2c087e6
DW
1846/* validate_geometry_imsm_volume - lifted from validate_geometry_ddf_bvd
1847 * FIX ME add ahci details
1848 */
8b353278
DW
1849static int validate_geometry_imsm_volume(struct supertype *st, int level,
1850 int layout, int raiddisks, int chunk,
c2c087e6 1851 unsigned long long size, char *dev,
2c514b71
NB
1852 unsigned long long *freesize,
1853 int verbose)
cdddbdbc 1854{
c2c087e6
DW
1855 struct stat stb;
1856 struct intel_super *super = st->sb;
1857 struct dl *dl;
1858 unsigned long long pos = 0;
1859 unsigned long long maxsize;
1860 struct extent *e;
1861 int i;
cdddbdbc 1862
c2c087e6
DW
1863 if (level == LEVEL_CONTAINER)
1864 return 0;
1865
1866 if (level == 1 && raiddisks > 2) {
2c514b71
NB
1867 if (verbose)
1868 fprintf(stderr, Name ": imsm does not support more "
1869 "than 2 in a raid1 configuration\n");
c2c087e6
DW
1870 return 0;
1871 }
1872
1873 /* We must have the container info already read in. */
1874 if (!super)
1875 return 0;
1876
1877 if (!dev) {
1878 /* General test: make sure there is space for
2da8544a
DW
1879 * 'raiddisks' device extents of size 'size' at a given
1880 * offset
c2c087e6
DW
1881 */
1882 unsigned long long minsize = size*2 /* convert to blocks */;
2da8544a 1883 unsigned long long start_offset = ~0ULL;
c2c087e6
DW
1884 int dcnt = 0;
1885 if (minsize == 0)
1886 minsize = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
1887 for (dl = super->disks; dl ; dl = dl->next) {
1888 int found = 0;
1889
bf5a934a 1890 pos = 0;
c2c087e6
DW
1891 i = 0;
1892 e = get_extents(super, dl);
1893 if (!e) continue;
1894 do {
1895 unsigned long long esize;
1896 esize = e[i].start - pos;
1897 if (esize >= minsize)
1898 found = 1;
2da8544a
DW
1899 if (found && start_offset == ~0ULL) {
1900 start_offset = pos;
1901 break;
1902 } else if (found && pos != start_offset) {
1903 found = 0;
1904 break;
1905 }
c2c087e6
DW
1906 pos = e[i].start + e[i].size;
1907 i++;
1908 } while (e[i-1].size);
1909 if (found)
1910 dcnt++;
1911 free(e);
1912 }
1913 if (dcnt < raiddisks) {
2c514b71
NB
1914 if (verbose)
1915 fprintf(stderr, Name ": imsm: Not enough "
1916 "devices with space for this array "
1917 "(%d < %d)\n",
1918 dcnt, raiddisks);
c2c087e6
DW
1919 return 0;
1920 }
1921 return 1;
1922 }
1923 /* This device must be a member of the set */
1924 if (stat(dev, &stb) < 0)
1925 return 0;
1926 if ((S_IFMT & stb.st_mode) != S_IFBLK)
1927 return 0;
1928 for (dl = super->disks ; dl ; dl = dl->next) {
1929 if (dl->major == major(stb.st_rdev) &&
1930 dl->minor == minor(stb.st_rdev))
1931 break;
1932 }
1933 if (!dl) {
2c514b71
NB
1934 if (verbose)
1935 fprintf(stderr, Name ": %s is not in the "
1936 "same imsm set\n", dev);
c2c087e6
DW
1937 return 0;
1938 }
1939 e = get_extents(super, dl);
1940 maxsize = 0;
1941 i = 0;
1942 if (e) do {
1943 unsigned long long esize;
1944 esize = e[i].start - pos;
1945 if (esize >= maxsize)
1946 maxsize = esize;
1947 pos = e[i].start + e[i].size;
1948 i++;
1949 } while (e[i-1].size);
1950 *freesize = maxsize;
1951
1952 return 1;
cdddbdbc
DW
1953}
1954
604b746f
JD
1955int imsm_bbm_log_size(struct imsm_super *mpb)
1956{
1957 return __le32_to_cpu(mpb->bbm_log_size);
1958}
1959
bf5a934a
DW
1960static int validate_geometry_imsm(struct supertype *st, int level, int layout,
1961 int raiddisks, int chunk, unsigned long long size,
1962 char *dev, unsigned long long *freesize,
1963 int verbose)
1964{
1965 int fd, cfd;
1966 struct mdinfo *sra;
1967
1968 /* if given unused devices create a container
1969 * if given given devices in a container create a member volume
1970 */
1971 if (level == LEVEL_CONTAINER) {
1972 /* Must be a fresh device to add to a container */
1973 return validate_geometry_imsm_container(st, level, layout,
1974 raiddisks, chunk, size,
1975 dev, freesize,
1976 verbose);
1977 }
1978
1979 if (st->sb) {
1980 /* creating in a given container */
1981 return validate_geometry_imsm_volume(st, level, layout,
1982 raiddisks, chunk, size,
1983 dev, freesize, verbose);
1984 }
1985
1986 /* limit creation to the following levels */
1987 if (!dev)
1988 switch (level) {
1989 case 0:
1990 case 1:
1991 case 10:
1992 case 5:
1993 break;
1994 default:
1995 return 1;
1996 }
1997
1998 /* This device needs to be a device in an 'imsm' container */
1999 fd = open(dev, O_RDONLY|O_EXCL, 0);
2000 if (fd >= 0) {
2001 if (verbose)
2002 fprintf(stderr,
2003 Name ": Cannot create this array on device %s\n",
2004 dev);
2005 close(fd);
2006 return 0;
2007 }
2008 if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
2009 if (verbose)
2010 fprintf(stderr, Name ": Cannot open %s: %s\n",
2011 dev, strerror(errno));
2012 return 0;
2013 }
2014 /* Well, it is in use by someone, maybe an 'imsm' container. */
2015 cfd = open_container(fd);
2016 if (cfd < 0) {
2017 close(fd);
2018 if (verbose)
2019 fprintf(stderr, Name ": Cannot use %s: It is busy\n",
2020 dev);
2021 return 0;
2022 }
2023 sra = sysfs_read(cfd, 0, GET_VERSION);
2024 close(fd);
2025 if (sra && sra->array.major_version == -1 &&
2026 strcmp(sra->text_version, "imsm") == 0) {
2027 /* This is a member of a imsm container. Load the container
2028 * and try to create a volume
2029 */
2030 struct intel_super *super;
2031
2032 if (load_super_imsm_all(st, cfd, (void **) &super, NULL, 1) == 0) {
2033 st->sb = super;
2034 st->container_dev = fd2devnum(cfd);
2035 close(cfd);
2036 return validate_geometry_imsm_volume(st, level, layout,
2037 raiddisks, chunk,
2038 size, dev,
2039 freesize, verbose);
2040 }
2041 close(cfd);
2042 } else /* may belong to another container */
2043 return 0;
2044
2045 return 1;
2046}
2047
cdddbdbc
DW
2048static struct mdinfo *container_content_imsm(struct supertype *st)
2049{
4f5bc454
DW
2050 /* Given a container loaded by load_super_imsm_all,
2051 * extract information about all the arrays into
2052 * an mdinfo tree.
2053 *
2054 * For each imsm_dev create an mdinfo, fill it in,
2055 * then look for matching devices in super->disks
2056 * and create appropriate device mdinfo.
2057 */
2058 struct intel_super *super = st->sb;
949c47a0 2059 struct imsm_super *mpb = super->anchor;
4f5bc454
DW
2060 struct mdinfo *rest = NULL;
2061 int i;
cdddbdbc 2062
604b746f
JD
2063 /* do not assemble arrays that might have bad blocks */
2064 if (imsm_bbm_log_size(super->anchor)) {
2065 fprintf(stderr, Name ": BBM log found in metadata. "
2066 "Cannot activate array(s).\n");
2067 return NULL;
2068 }
2069
4f5bc454 2070 for (i = 0; i < mpb->num_raid_devs; i++) {
949c47a0 2071 struct imsm_dev *dev = get_imsm_dev(super, i);
4f5bc454 2072 struct imsm_vol *vol = &dev->vol;
a965f303 2073 struct imsm_map *map = get_imsm_map(dev, 0);
4f5bc454 2074 struct mdinfo *this;
4f5bc454
DW
2075 int slot;
2076
2077 this = malloc(sizeof(*this));
2078 memset(this, 0, sizeof(*this));
2079 this->next = rest;
4f5bc454 2080
4f5bc454
DW
2081 this->array.level = get_imsm_raid_level(map);
2082 this->array.raid_disks = map->num_members;
c2c087e6 2083 this->array.layout = imsm_level_to_layout(this->array.level);
4f5bc454
DW
2084 this->array.md_minor = -1;
2085 this->array.ctime = 0;
2086 this->array.utime = 0;
2087 this->array.chunk_size = __le16_to_cpu(map->blocks_per_strip) << 9;
2088 this->array.state = !vol->dirty;
2089 this->container_member = i;
9a1608e5
DW
2090 if (map->map_state == IMSM_T_STATE_UNINITIALIZED ||
2091 dev->vol.dirty || dev->vol.migr_state)
0fd5c350
DW
2092 this->resync_start = 0;
2093 else
2094 this->resync_start = ~0ULL;
2095
4f5bc454
DW
2096 strncpy(this->name, (char *) dev->volume, MAX_RAID_SERIAL_LEN);
2097 this->name[MAX_RAID_SERIAL_LEN] = 0;
2098
159c3a1a
NB
2099 sprintf(this->text_version, "/%s/%d",
2100 devnum2devname(st->container_dev),
2101 this->container_member);
2102
4f5bc454
DW
2103 memset(this->uuid, 0, sizeof(this->uuid));
2104
f54e6321 2105 this->component_size = __le32_to_cpu(map->blocks_per_member);
4f5bc454
DW
2106
2107 for (slot = 0 ; slot < map->num_members; slot++) {
4f5bc454
DW
2108 struct mdinfo *info_d;
2109 struct dl *d;
2110 int idx;
9a1608e5 2111 int skip;
4f5bc454 2112 __u32 s;
7eef0453 2113 __u32 ord;
4f5bc454 2114
9a1608e5 2115 skip = 0;
ff077194 2116 idx = get_imsm_disk_idx(dev, slot);
7eef0453 2117 ord = get_imsm_ord_tbl_ent(dev, slot);
4f5bc454
DW
2118 for (d = super->disks; d ; d = d->next)
2119 if (d->index == idx)
2120 break;
2121
2122 if (d == NULL)
9a1608e5
DW
2123 skip = 1;
2124
2125 s = d ? __le32_to_cpu(d->disk.status) : 0;
2126 if (s & FAILED_DISK)
2127 skip = 1;
2128 if (!(s & USABLE_DISK))
2129 skip = 1;
7eef0453
DW
2130 if (ord & IMSM_ORD_REBUILD)
2131 skip = 1;
9a1608e5
DW
2132
2133 /*
2134 * if we skip some disks the array will be assmebled degraded;
2135 * reset resync start to avoid a dirty-degraded situation
2136 *
2137 * FIXME handle dirty degraded
2138 */
2139 if (skip && !dev->vol.dirty)
2140 this->resync_start = ~0ULL;
2141 if (skip)
2142 continue;
4f5bc454
DW
2143
2144 info_d = malloc(sizeof(*info_d));
9a1608e5
DW
2145 if (!info_d) {
2146 fprintf(stderr, Name ": failed to allocate disk"
2147 " for volume %s\n", (char *) dev->volume);
2148 free(this);
2149 this = rest;
2150 break;
2151 }
4f5bc454
DW
2152 memset(info_d, 0, sizeof(*info_d));
2153 info_d->next = this->devs;
2154 this->devs = info_d;
2155
4f5bc454
DW
2156 info_d->disk.number = d->index;
2157 info_d->disk.major = d->major;
2158 info_d->disk.minor = d->minor;
2159 info_d->disk.raid_disk = slot;
4f5bc454
DW
2160
2161 this->array.working_disks++;
2162
2163 info_d->events = __le32_to_cpu(mpb->generation_num);
2164 info_d->data_offset = __le32_to_cpu(map->pba_of_lba0);
2165 info_d->component_size = __le32_to_cpu(map->blocks_per_member);
2166 if (d->devname)
2167 strcpy(info_d->name, d->devname);
2168 }
9a1608e5 2169 rest = this;
4f5bc454
DW
2170 }
2171
2172 return rest;
cdddbdbc
DW
2173}
2174
845dea95 2175
cba0191b
NB
2176static int imsm_open_new(struct supertype *c, struct active_array *a,
2177 char *inst)
845dea95 2178{
0372d5a2 2179 struct intel_super *super = c->sb;
949c47a0 2180 struct imsm_super *mpb = super->anchor;
0372d5a2 2181
949c47a0 2182 if (atoi(inst) >= mpb->num_raid_devs) {
0372d5a2
DW
2183 fprintf(stderr, "%s: subarry index %d, out of range\n",
2184 __func__, atoi(inst));
2185 return -ENODEV;
2186 }
2187
4e6e574a 2188 dprintf("imsm: open_new %s\n", inst);
cba0191b 2189 a->info.container_member = atoi(inst);
845dea95
NB
2190 return 0;
2191}
2192
fb49eef2 2193static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, int failed)
c2a1e7da 2194{
a965f303 2195 struct imsm_map *map = get_imsm_map(dev, 0);
c2a1e7da
DW
2196
2197 if (!failed)
3393c6af
DW
2198 return map->map_state == IMSM_T_STATE_UNINITIALIZED ?
2199 IMSM_T_STATE_UNINITIALIZED : IMSM_T_STATE_NORMAL;
c2a1e7da
DW
2200
2201 switch (get_imsm_raid_level(map)) {
2202 case 0:
2203 return IMSM_T_STATE_FAILED;
2204 break;
2205 case 1:
2206 if (failed < map->num_members)
2207 return IMSM_T_STATE_DEGRADED;
2208 else
2209 return IMSM_T_STATE_FAILED;
2210 break;
2211 case 10:
2212 {
2213 /**
2214 * check to see if any mirrors have failed,
2215 * otherwise we are degraded
2216 */
2217 int device_per_mirror = 2; /* FIXME is this always the case?
2218 * and are they always adjacent?
2219 */
8796fdc4 2220 int r10fail = 0;
c2a1e7da
DW
2221 int i;
2222
2223 for (i = 0; i < map->num_members; i++) {
ff077194 2224 int idx = get_imsm_disk_idx(dev, i);
949c47a0 2225 struct imsm_disk *disk = get_imsm_disk(super, idx);
c2a1e7da 2226
8796fdc4
DW
2227 if (!disk)
2228 r10fail++;
2229 else if (__le32_to_cpu(disk->status) & FAILED_DISK)
2230 r10fail++;
c2a1e7da 2231
8796fdc4 2232 if (r10fail >= device_per_mirror)
c2a1e7da
DW
2233 return IMSM_T_STATE_FAILED;
2234
8796fdc4 2235 /* reset 'r10fail' for next mirror set */
c2a1e7da 2236 if (!((i + 1) % device_per_mirror))
8796fdc4 2237 r10fail = 0;
c2a1e7da
DW
2238 }
2239
2240 return IMSM_T_STATE_DEGRADED;
2241 }
2242 case 5:
2243 if (failed < 2)
2244 return IMSM_T_STATE_DEGRADED;
2245 else
2246 return IMSM_T_STATE_FAILED;
2247 break;
2248 default:
2249 break;
2250 }
2251
2252 return map->map_state;
2253}
2254
ff077194 2255static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev)
c2a1e7da
DW
2256{
2257 int i;
2258 int failed = 0;
2259 struct imsm_disk *disk;
ff077194 2260 struct imsm_map *map = get_imsm_map(dev, 0);
c2a1e7da
DW
2261
2262 for (i = 0; i < map->num_members; i++) {
ff077194 2263 int idx = get_imsm_disk_idx(dev, i);
c2a1e7da 2264
949c47a0 2265 disk = get_imsm_disk(super, idx);
8796fdc4
DW
2266 if (!disk)
2267 failed++;
2268 else if (__le32_to_cpu(disk->status) & FAILED_DISK)
c2a1e7da 2269 failed++;
fcb84475
DW
2270 else if (!(__le32_to_cpu(disk->status) & USABLE_DISK))
2271 failed++;
c2a1e7da
DW
2272 }
2273
2274 return failed;
845dea95
NB
2275}
2276
01f157d7 2277static int imsm_set_array_state(struct active_array *a, int consistent)
a862209d
DW
2278{
2279 int inst = a->info.container_member;
2280 struct intel_super *super = a->container->sb;
949c47a0 2281 struct imsm_dev *dev = get_imsm_dev(super, inst);
a965f303 2282 struct imsm_map *map = get_imsm_map(dev, 0);
a862209d
DW
2283 int dirty = !consistent;
2284 int failed;
2285 __u8 map_state;
2286
ff077194 2287 failed = imsm_count_failed(super, dev);
fb49eef2 2288 map_state = imsm_check_degraded(super, dev, failed);
3393c6af 2289
272906ef
DW
2290 if (consistent && !dev->vol.dirty &&
2291 (dev->vol.migr_state || map_state != IMSM_T_STATE_NORMAL))
2292 a->resync_start = 0ULL;
01f157d7
N
2293 if (consistent == 2 && a->resync_start != ~0ULL)
2294 consistent = 0;
272906ef 2295
a862209d 2296 if (a->resync_start == ~0ULL) {
115c3803 2297 /* complete recovery or initial resync */
a862209d 2298 if (map->map_state != map_state) {
4e6e574a 2299 dprintf("imsm: map_state %d: %d\n",
a862209d
DW
2300 inst, map_state);
2301 map->map_state = map_state;
2302 super->updates_pending++;
2303 }
3393c6af
DW
2304 if (dev->vol.migr_state) {
2305 dprintf("imsm: mark resync complete\n");
2306 dev->vol.migr_state = 0;
2307 dev->vol.migr_type = 0;
115c3803 2308 super->updates_pending++;
115c3803 2309 }
3393c6af
DW
2310 } else if (!dev->vol.migr_state) {
2311 dprintf("imsm: mark '%s' (%llu)\n",
2312 failed ? "rebuild" : "initializing", a->resync_start);
2313 /* mark that we are rebuilding */
2314 map->map_state = failed ? map_state : IMSM_T_STATE_NORMAL;
2315 dev->vol.migr_state = 1;
2316 dev->vol.migr_type = failed ? 1 : 0;
2317 dup_map(dev);
272906ef 2318 a->check_degraded = 1;
3393c6af 2319 super->updates_pending++;
115c3803 2320 }
a862209d 2321
3393c6af
DW
2322 /* mark dirty / clean */
2323 if (dirty != dev->vol.dirty) {
2324 dprintf("imsm: mark '%s' (%llu)\n",
2325 dirty ? "dirty" : "clean", a->resync_start);
2326 dev->vol.dirty = dirty;
a862209d
DW
2327 super->updates_pending++;
2328 }
01f157d7 2329 return consistent;
a862209d
DW
2330}
2331
8d45d196 2332static void imsm_set_disk(struct active_array *a, int n, int state)
845dea95 2333{
8d45d196
DW
2334 int inst = a->info.container_member;
2335 struct intel_super *super = a->container->sb;
949c47a0 2336 struct imsm_dev *dev = get_imsm_dev(super, inst);
a965f303 2337 struct imsm_map *map = get_imsm_map(dev, 0);
8d45d196
DW
2338 struct imsm_disk *disk;
2339 __u32 status;
2340 int failed = 0;
2341 int new_failure = 0;
2342
2343 if (n > map->num_members)
2344 fprintf(stderr, "imsm: set_disk %d out of range 0..%d\n",
2345 n, map->num_members - 1);
2346
2347 if (n < 0)
2348 return;
2349
4e6e574a 2350 dprintf("imsm: set_disk %d:%x\n", n, state);
8d45d196 2351
ff077194 2352 disk = get_imsm_disk(super, get_imsm_disk_idx(dev, n));
8d45d196 2353
5802a811 2354 /* check for new failures */
8d45d196
DW
2355 status = __le32_to_cpu(disk->status);
2356 if ((state & DS_FAULTY) && !(status & FAILED_DISK)) {
2357 status |= FAILED_DISK;
2358 disk->status = __cpu_to_le32(status);
8796fdc4
DW
2359 disk->scsi_id = __cpu_to_le32(~0UL);
2360 memmove(&disk->serial[0], &disk->serial[1], MAX_RAID_SERIAL_LEN - 1);
8d45d196 2361 new_failure = 1;
5802a811 2362 super->updates_pending++;
8d45d196 2363 }
19859edc
DW
2364 /* check if in_sync */
2365 if ((state & DS_INSYNC) && !(status & USABLE_DISK)) {
2366 status |= USABLE_DISK;
2367 disk->status = __cpu_to_le32(status);
2368 super->updates_pending++;
2369 }
8d45d196 2370
5802a811 2371 /* the number of failures have changed, count up 'failed' to determine
8d45d196
DW
2372 * degraded / failed status
2373 */
2374 if (new_failure && map->map_state != IMSM_T_STATE_FAILED)
ff077194 2375 failed = imsm_count_failed(super, dev);
8d45d196 2376
5802a811 2377 /* determine map_state based on failed or in_sync count */
8d45d196 2378 if (failed)
fb49eef2 2379 map->map_state = imsm_check_degraded(super, dev, failed);
5802a811
DW
2380 else if (map->map_state == IMSM_T_STATE_DEGRADED) {
2381 struct mdinfo *d;
2382 int working = 0;
8d45d196 2383
5802a811
DW
2384 for (d = a->info.devs ; d ; d = d->next)
2385 if (d->curr_state & DS_INSYNC)
2386 working++;
2387
2388 if (working == a->info.array.raid_disks) {
2389 map->map_state = IMSM_T_STATE_NORMAL;
3393c6af
DW
2390 dev->vol.migr_state = 0;
2391 dev->vol.migr_type = 0;
5802a811
DW
2392 super->updates_pending++;
2393 }
2394 }
845dea95
NB
2395}
2396
c2a1e7da
DW
2397static int store_imsm_mpb(int fd, struct intel_super *super)
2398{
949c47a0 2399 struct imsm_super *mpb = super->anchor;
c2a1e7da
DW
2400 __u32 mpb_size = __le32_to_cpu(mpb->mpb_size);
2401 unsigned long long dsize;
2402 unsigned long long sectors;
2403
2404 get_dev_size(fd, NULL, &dsize);
2405
272f648f
DW
2406 if (mpb_size > 512) {
2407 /* -1 to account for anchor */
2408 sectors = mpb_sectors(mpb) - 1;
c2a1e7da 2409
272f648f
DW
2410 /* write the extended mpb to the sectors preceeding the anchor */
2411 if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0)
2412 return 1;
c2a1e7da 2413
99e29264 2414 if (write(fd, super->buf + 512, 512 * sectors) != 512 * sectors)
272f648f
DW
2415 return 1;
2416 }
c2a1e7da 2417
272f648f
DW
2418 /* first block is stored on second to last sector of the disk */
2419 if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0)
c2a1e7da
DW
2420 return 1;
2421
272f648f 2422 if (write(fd, super->buf, 512) != 512)
c2a1e7da
DW
2423 return 1;
2424
c2a1e7da
DW
2425 return 0;
2426}
2427
2e735d19 2428static void imsm_sync_metadata(struct supertype *container)
845dea95 2429{
2e735d19 2430 struct intel_super *super = container->sb;
c2a1e7da
DW
2431
2432 if (!super->updates_pending)
2433 return;
2434
c2c087e6 2435 write_super_imsm(super, 0);
c2a1e7da
DW
2436
2437 super->updates_pending = 0;
845dea95
NB
2438}
2439
272906ef
DW
2440static struct dl *imsm_readd(struct intel_super *super, int idx, struct active_array *a)
2441{
2442 struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
ff077194 2443 int i = get_imsm_disk_idx(dev, idx);
272906ef
DW
2444 struct dl *dl;
2445
2446 for (dl = super->disks; dl; dl = dl->next)
2447 if (dl->index == i)
2448 break;
2449
8796fdc4 2450 if (dl && __le32_to_cpu(dl->disk.status) & FAILED_DISK)
272906ef
DW
2451 dl = NULL;
2452
2453 if (dl)
2454 dprintf("%s: found %x:%x\n", __func__, dl->major, dl->minor);
2455
2456 return dl;
2457}
2458
2459static struct dl *imsm_add_spare(struct intel_super *super, int idx, struct active_array *a)
2460{
2461 struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
2462 struct imsm_map *map = get_imsm_map(dev, 0);
2463 unsigned long long esize;
2464 unsigned long long pos;
2465 struct mdinfo *d;
2466 struct extent *ex;
2467 int j;
2468 int found;
2469 __u32 array_start;
9a1608e5 2470 __u32 status;
272906ef
DW
2471 struct dl *dl;
2472
2473 for (dl = super->disks; dl; dl = dl->next) {
2474 /* If in this array, skip */
2475 for (d = a->info.devs ; d ; d = d->next)
2476 if (d->disk.major == dl->major &&
2477 d->disk.minor == dl->minor) {
2478 dprintf("%x:%x already in array\n", dl->major, dl->minor);
2479 break;
2480 }
2481 if (d)
2482 continue;
2483
9a1608e5
DW
2484 /* skip marked in use or failed drives */
2485 status = __le32_to_cpu(dl->disk.status);
2486 if (status & FAILED_DISK || status & CONFIGURED_DISK) {
2487 dprintf("%x:%x status ( %s%s)\n",
2488 dl->major, dl->minor,
2489 status & FAILED_DISK ? "failed " : "",
2490 status & CONFIGURED_DISK ? "configured " : "");
2491 continue;
2492 }
2493
272906ef
DW
2494 /* Does this unused device have the requisite free space?
2495 * We need a->info.component_size sectors
2496 */
2497 ex = get_extents(super, dl);
2498 if (!ex) {
2499 dprintf("cannot get extents\n");
2500 continue;
2501 }
2502 found = 0;
2503 j = 0;
2504 pos = 0;
2505 array_start = __le32_to_cpu(map->pba_of_lba0);
2506
2507 do {
2508 /* check that we can start at pba_of_lba0 with
2509 * a->info.component_size of space
2510 */
2511 esize = ex[j].start - pos;
2512 if (array_start >= pos &&
2513 array_start + a->info.component_size < ex[j].start) {
2514 found = 1;
2515 break;
2516 }
2517 pos = ex[j].start + ex[j].size;
2518 j++;
2519
2520 } while (ex[j-1].size);
2521
2522 free(ex);
2523 if (!found) {
2524 dprintf("%x:%x does not have %llu at %d\n",
2525 dl->major, dl->minor,
2526 a->info.component_size,
2527 __le32_to_cpu(map->pba_of_lba0));
2528 /* No room */
2529 continue;
2530 } else
2531 break;
2532 }
2533
2534 return dl;
2535}
2536
88758e9d
DW
2537static struct mdinfo *imsm_activate_spare(struct active_array *a,
2538 struct metadata_update **updates)
2539{
2540 /**
d23fe947
DW
2541 * Find a device with unused free space and use it to replace a
2542 * failed/vacant region in an array. We replace failed regions one a
2543 * array at a time. The result is that a new spare disk will be added
2544 * to the first failed array and after the monitor has finished
2545 * propagating failures the remainder will be consumed.
88758e9d 2546 *
d23fe947
DW
2547 * FIXME add a capability for mdmon to request spares from another
2548 * container.
88758e9d
DW
2549 */
2550
2551 struct intel_super *super = a->container->sb;
88758e9d 2552 int inst = a->info.container_member;
949c47a0 2553 struct imsm_dev *dev = get_imsm_dev(super, inst);
a965f303 2554 struct imsm_map *map = get_imsm_map(dev, 0);
88758e9d
DW
2555 int failed = a->info.array.raid_disks;
2556 struct mdinfo *rv = NULL;
2557 struct mdinfo *d;
2558 struct mdinfo *di;
2559 struct metadata_update *mu;
2560 struct dl *dl;
2561 struct imsm_update_activate_spare *u;
2562 int num_spares = 0;
2563 int i;
2564
2565 for (d = a->info.devs ; d ; d = d->next) {
2566 if ((d->curr_state & DS_FAULTY) &&
2567 d->state_fd >= 0)
2568 /* wait for Removal to happen */
2569 return NULL;
2570 if (d->state_fd >= 0)
2571 failed--;
2572 }
2573
2574 dprintf("imsm: activate spare: inst=%d failed=%d (%d) level=%d\n",
2575 inst, failed, a->info.array.raid_disks, a->info.array.level);
fb49eef2 2576 if (imsm_check_degraded(super, dev, failed) != IMSM_T_STATE_DEGRADED)
88758e9d
DW
2577 return NULL;
2578
2579 /* For each slot, if it is not working, find a spare */
88758e9d
DW
2580 for (i = 0; i < a->info.array.raid_disks; i++) {
2581 for (d = a->info.devs ; d ; d = d->next)
2582 if (d->disk.raid_disk == i)
2583 break;
2584 dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
2585 if (d && (d->state_fd >= 0))
2586 continue;
2587
272906ef
DW
2588 /*
2589 * OK, this device needs recovery. Try to re-add the previous
2590 * occupant of this slot, if this fails add a new spare
2591 */
2592 dl = imsm_readd(super, i, a);
2593 if (!dl)
2594 dl = imsm_add_spare(super, i, a);
2595 if (!dl)
2596 continue;
2597
2598 /* found a usable disk with enough space */
2599 di = malloc(sizeof(*di));
2600 memset(di, 0, sizeof(*di));
2601
2602 /* dl->index will be -1 in the case we are activating a
2603 * pristine spare. imsm_process_update() will create a
2604 * new index in this case. Once a disk is found to be
2605 * failed in all member arrays it is kicked from the
2606 * metadata
2607 */
2608 di->disk.number = dl->index;
d23fe947 2609
272906ef
DW
2610 /* (ab)use di->devs to store a pointer to the device
2611 * we chose
2612 */
2613 di->devs = (struct mdinfo *) dl;
2614
2615 di->disk.raid_disk = i;
2616 di->disk.major = dl->major;
2617 di->disk.minor = dl->minor;
2618 di->disk.state = 0;
2619 di->data_offset = __le32_to_cpu(map->pba_of_lba0);
2620 di->component_size = a->info.component_size;
2621 di->container_member = inst;
2622 di->next = rv;
2623 rv = di;
2624 num_spares++;
2625 dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
2626 i, di->data_offset);
88758e9d 2627
272906ef 2628 break;
88758e9d
DW
2629 }
2630
2631 if (!rv)
2632 /* No spares found */
2633 return rv;
2634 /* Now 'rv' has a list of devices to return.
2635 * Create a metadata_update record to update the
2636 * disk_ord_tbl for the array
2637 */
2638 mu = malloc(sizeof(*mu));
2639 mu->buf = malloc(sizeof(struct imsm_update_activate_spare) * num_spares);
2640 mu->space = NULL;
2641 mu->len = sizeof(struct imsm_update_activate_spare) * num_spares;
2642 mu->next = *updates;
2643 u = (struct imsm_update_activate_spare *) mu->buf;
2644
2645 for (di = rv ; di ; di = di->next) {
2646 u->type = update_activate_spare;
d23fe947
DW
2647 u->dl = (struct dl *) di->devs;
2648 di->devs = NULL;
88758e9d
DW
2649 u->slot = di->disk.raid_disk;
2650 u->array = inst;
2651 u->next = u + 1;
2652 u++;
2653 }
2654 (u-1)->next = NULL;
2655 *updates = mu;
2656
2657 return rv;
2658}
2659
ff077194 2660static int disks_overlap(struct imsm_dev *d1, struct imsm_dev *d2)
8273f55e 2661{
ff077194
DW
2662 struct imsm_map *m1 = get_imsm_map(d1, 0);
2663 struct imsm_map *m2 = get_imsm_map(d2, 0);
8273f55e
DW
2664 int i;
2665 int j;
2666 int idx;
2667
2668 for (i = 0; i < m1->num_members; i++) {
ff077194 2669 idx = get_imsm_disk_idx(d1, i);
8273f55e 2670 for (j = 0; j < m2->num_members; j++)
ff077194 2671 if (idx == get_imsm_disk_idx(d2, j))
8273f55e
DW
2672 return 1;
2673 }
2674
2675 return 0;
2676}
2677
ae6aad82
DW
2678static void imsm_delete(struct intel_super *super, struct dl **dlp);
2679
e8319a19
DW
2680static void imsm_process_update(struct supertype *st,
2681 struct metadata_update *update)
2682{
2683 /**
2684 * crack open the metadata_update envelope to find the update record
2685 * update can be one of:
2686 * update_activate_spare - a spare device has replaced a failed
2687 * device in an array, update the disk_ord_tbl. If this disk is
2688 * present in all member arrays then also clear the SPARE_DISK
2689 * flag
2690 */
2691 struct intel_super *super = st->sb;
4d7b1503 2692 struct imsm_super *mpb;
e8319a19
DW
2693 enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
2694
4d7b1503
DW
2695 /* update requires a larger buf but the allocation failed */
2696 if (super->next_len && !super->next_buf) {
2697 super->next_len = 0;
2698 return;
2699 }
2700
2701 if (super->next_buf) {
2702 memcpy(super->next_buf, super->buf, super->len);
2703 free(super->buf);
2704 super->len = super->next_len;
2705 super->buf = super->next_buf;
2706
2707 super->next_len = 0;
2708 super->next_buf = NULL;
2709 }
2710
2711 mpb = super->anchor;
2712
e8319a19
DW
2713 switch (type) {
2714 case update_activate_spare: {
2715 struct imsm_update_activate_spare *u = (void *) update->buf;
949c47a0 2716 struct imsm_dev *dev = get_imsm_dev(super, u->array);
a965f303 2717 struct imsm_map *map = get_imsm_map(dev, 0);
e8319a19
DW
2718 struct active_array *a;
2719 struct imsm_disk *disk;
2720 __u32 status;
2721 struct dl *dl;
e8319a19
DW
2722 unsigned int found;
2723 int victim;
2724 int i;
2725
2726 for (dl = super->disks; dl; dl = dl->next)
d23fe947 2727 if (dl == u->dl)
e8319a19
DW
2728 break;
2729
2730 if (!dl) {
2731 fprintf(stderr, "error: imsm_activate_spare passed "
d23fe947
DW
2732 "an unknown disk (index: %d serial: %s)\n",
2733 u->dl->index, u->dl->serial);
e8319a19
DW
2734 return;
2735 }
2736
2737 super->updates_pending++;
2738
d23fe947
DW
2739 /* adding a pristine spare, assign a new index */
2740 if (dl->index < 0) {
2741 dl->index = super->anchor->num_disks;
2742 super->anchor->num_disks++;
2743 }
ff077194 2744 victim = get_imsm_disk_idx(dev, u->slot);
be73972f 2745 set_imsm_ord_tbl_ent(map, u->slot, dl->index);
d23fe947 2746 disk = &dl->disk;
e8319a19
DW
2747 status = __le32_to_cpu(disk->status);
2748 status |= CONFIGURED_DISK;
19859edc 2749 status &= ~(SPARE_DISK | USABLE_DISK);
e8319a19
DW
2750 disk->status = __cpu_to_le32(status);
2751
e8319a19
DW
2752 /* count arrays using the victim in the metadata */
2753 found = 0;
2754 for (a = st->arrays; a ; a = a->next) {
949c47a0 2755 dev = get_imsm_dev(super, a->info.container_member);
e8319a19 2756 for (i = 0; i < map->num_members; i++)
ff077194 2757 if (victim == get_imsm_disk_idx(dev, i))
e8319a19
DW
2758 found++;
2759 }
2760
2761 /* clear some flags if the victim is no longer being
2762 * utilized anywhere
2763 */
e8319a19 2764 if (!found) {
ae6aad82
DW
2765 struct dl **dlp;
2766 for (dlp = &super->disks; *dlp; )
2767 if ((*dlp)->index == victim)
2768 break;
2769 disk = &(*dlp)->disk;
e8319a19
DW
2770 status = __le32_to_cpu(disk->status);
2771 status &= ~(CONFIGURED_DISK | USABLE_DISK);
2772 disk->status = __cpu_to_le32(status);
ae6aad82
DW
2773 /* We know that 'manager' isn't touching anything,
2774 * so it is safe to:
2775 */
2776 imsm_delete(super, dlp);
e8319a19 2777 }
8273f55e
DW
2778 break;
2779 }
2780 case update_create_array: {
2781 /* someone wants to create a new array, we need to be aware of
2782 * a few races/collisions:
2783 * 1/ 'Create' called by two separate instances of mdadm
2784 * 2/ 'Create' versus 'activate_spare': mdadm has chosen
2785 * devices that have since been assimilated via
2786 * activate_spare.
2787 * In the event this update can not be carried out mdadm will
2788 * (FIX ME) notice that its update did not take hold.
2789 */
2790 struct imsm_update_create_array *u = (void *) update->buf;
2791 struct imsm_dev *dev;
2792 struct imsm_map *map, *new_map;
2793 unsigned long long start, end;
2794 unsigned long long new_start, new_end;
2795 int i;
2796 int overlap = 0;
2797
2798 /* handle racing creates: first come first serve */
2799 if (u->dev_idx < mpb->num_raid_devs) {
2800 dprintf("%s: subarray %d already defined\n",
2801 __func__, u->dev_idx);
2802 return;
2803 }
2804
2805 /* check update is next in sequence */
2806 if (u->dev_idx != mpb->num_raid_devs) {
6a3e913e
DW
2807 dprintf("%s: can not create array %d expected index %d\n",
2808 __func__, u->dev_idx, mpb->num_raid_devs);
8273f55e
DW
2809 return;
2810 }
2811
a965f303 2812 new_map = get_imsm_map(&u->dev, 0);
8273f55e
DW
2813 new_start = __le32_to_cpu(new_map->pba_of_lba0);
2814 new_end = new_start + __le32_to_cpu(new_map->blocks_per_member);
2815
2816 /* handle activate_spare versus create race:
2817 * check to make sure that overlapping arrays do not include
2818 * overalpping disks
2819 */
2820 for (i = 0; i < mpb->num_raid_devs; i++) {
949c47a0 2821 dev = get_imsm_dev(super, i);
a965f303 2822 map = get_imsm_map(dev, 0);
8273f55e
DW
2823 start = __le32_to_cpu(map->pba_of_lba0);
2824 end = start + __le32_to_cpu(map->blocks_per_member);
2825 if ((new_start >= start && new_start <= end) ||
2826 (start >= new_start && start <= new_end))
2827 overlap = 1;
ff077194 2828 if (overlap && disks_overlap(dev, &u->dev)) {
8273f55e
DW
2829 dprintf("%s: arrays overlap\n", __func__);
2830 return;
2831 }
2832 }
2833 /* check num_members sanity */
2834 if (new_map->num_members > mpb->num_disks) {
2835 dprintf("%s: num_disks out of range\n", __func__);
2836 return;
2837 }
2838
949c47a0
DW
2839 /* check that prepare update was successful */
2840 if (!update->space) {
2841 dprintf("%s: prepare update failed\n", __func__);
2842 return;
2843 }
2844
8273f55e 2845 super->updates_pending++;
949c47a0 2846 dev = update->space;
ff077194 2847 map = get_imsm_map(dev, 0);
949c47a0
DW
2848 update->space = NULL;
2849 imsm_copy_dev(dev, &u->dev);
e0783b41 2850 map = get_imsm_map(dev, 0);
949c47a0 2851 super->dev_tbl[u->dev_idx] = dev;
8273f55e 2852 mpb->num_raid_devs++;
8273f55e 2853
e0783b41 2854 /* fix up flags */
8273f55e
DW
2855 for (i = 0; i < map->num_members; i++) {
2856 struct imsm_disk *disk;
2857 __u32 status;
2858
ff077194 2859 disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i));
8273f55e
DW
2860 status = __le32_to_cpu(disk->status);
2861 status |= CONFIGURED_DISK;
e0783b41 2862 status &= ~SPARE_DISK;
8273f55e
DW
2863 disk->status = __cpu_to_le32(status);
2864 }
2865 break;
e8319a19 2866 }
43dad3d6
DW
2867 case update_add_disk:
2868
2869 /* we may be able to repair some arrays if disks are
2870 * being added */
2871 if (super->add) {
2872 struct active_array *a;
2873 for (a = st->arrays; a; a = a->next)
2874 a->check_degraded = 1;
2875 }
2876 /* check if we can add / replace some disks in the
2877 * metadata */
2878 while (super->add) {
2879 struct dl **dlp, *dl, *al;
2880 al = super->add;
2881 super->add = al->next;
2882 for (dlp = &super->disks; *dlp ; ) {
2883 if (memcmp(al->serial, (*dlp)->serial,
2884 MAX_RAID_SERIAL_LEN) == 0) {
2885 dl = *dlp;
2886 *dlp = (*dlp)->next;
2887 __free_imsm_disk(dl);
2888 break;
2889 } else
2890 dlp = &(*dlp)->next;
2891 }
2892 al->next = super->disks;
2893 super->disks = al;
2894 }
2895
2896 break;
e8319a19
DW
2897 }
2898}
88758e9d 2899
8273f55e
DW
2900static void imsm_prepare_update(struct supertype *st,
2901 struct metadata_update *update)
2902{
949c47a0 2903 /**
4d7b1503
DW
2904 * Allocate space to hold new disk entries, raid-device entries or a new
2905 * mpb if necessary. The manager synchronously waits for updates to
2906 * complete in the monitor, so new mpb buffers allocated here can be
2907 * integrated by the monitor thread without worrying about live pointers
2908 * in the manager thread.
8273f55e 2909 */
949c47a0 2910 enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
4d7b1503
DW
2911 struct intel_super *super = st->sb;
2912 struct imsm_super *mpb = super->anchor;
2913 size_t buf_len;
2914 size_t len = 0;
949c47a0
DW
2915
2916 switch (type) {
2917 case update_create_array: {
2918 struct imsm_update_create_array *u = (void *) update->buf;
949c47a0 2919
4d7b1503 2920 len = sizeof_imsm_dev(&u->dev, 1);
949c47a0
DW
2921 update->space = malloc(len);
2922 break;
2923 default:
2924 break;
2925 }
2926 }
8273f55e 2927
4d7b1503
DW
2928 /* check if we need a larger metadata buffer */
2929 if (super->next_buf)
2930 buf_len = super->next_len;
2931 else
2932 buf_len = super->len;
2933
2934 if (__le32_to_cpu(mpb->mpb_size) + len > buf_len) {
2935 /* ok we need a larger buf than what is currently allocated
2936 * if this allocation fails process_update will notice that
2937 * ->next_len is set and ->next_buf is NULL
2938 */
2939 buf_len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + len, 512);
2940 if (super->next_buf)
2941 free(super->next_buf);
2942
2943 super->next_len = buf_len;
2944 if (posix_memalign(&super->next_buf, buf_len, 512) != 0)
2945 super->next_buf = NULL;
2946 }
8273f55e
DW
2947}
2948
ae6aad82
DW
2949/* must be called while manager is quiesced */
2950static void imsm_delete(struct intel_super *super, struct dl **dlp)
2951{
2952 struct imsm_super *mpb = super->anchor;
2953 struct dl *dl = *dlp;
2954 struct dl *iter;
2955 struct imsm_dev *dev;
2956 struct imsm_map *map;
2957 int i, j;
2958
2959 dprintf("%s: deleting device %x:%x from imsm_super\n",
2960 __func__, dl->major, dl->minor);
2961
2962 /* shift all indexes down one */
2963 for (iter = super->disks; iter; iter = iter->next)
2964 if (iter->index > dl->index)
2965 iter->index--;
2966
2967 for (i = 0; i < mpb->num_raid_devs; i++) {
2968 dev = get_imsm_dev(super, i);
2969 map = get_imsm_map(dev, 0);
2970
2971 for (j = 0; j < map->num_members; j++) {
ff077194 2972 int idx = get_imsm_disk_idx(dev, j);
ae6aad82
DW
2973
2974 if (idx > dl->index)
be73972f 2975 set_imsm_ord_tbl_ent(map, j, idx - 1);
ae6aad82
DW
2976 }
2977 }
2978
2979 mpb->num_disks--;
2980 super->updates_pending++;
2981 *dlp = (*dlp)->next;
2982 __free_imsm_disk(dl);
2983}
2984
cdddbdbc
DW
2985struct superswitch super_imsm = {
2986#ifndef MDASSEMBLE
2987 .examine_super = examine_super_imsm,
2988 .brief_examine_super = brief_examine_super_imsm,
2989 .detail_super = detail_super_imsm,
2990 .brief_detail_super = brief_detail_super_imsm,
bf5a934a 2991 .write_init_super = write_init_super_imsm,
cdddbdbc
DW
2992#endif
2993 .match_home = match_home_imsm,
2994 .uuid_from_super= uuid_from_super_imsm,
2995 .getinfo_super = getinfo_super_imsm,
2996 .update_super = update_super_imsm,
2997
2998 .avail_size = avail_size_imsm,
2999
3000 .compare_super = compare_super_imsm,
3001
3002 .load_super = load_super_imsm,
bf5a934a
DW
3003 .init_super = init_super_imsm,
3004 .add_to_super = add_to_super_imsm,
cdddbdbc
DW
3005 .store_super = store_zero_imsm,
3006 .free_super = free_super_imsm,
3007 .match_metadata_desc = match_metadata_desc_imsm,
bf5a934a 3008 .container_content = container_content_imsm,
cdddbdbc
DW
3009
3010 .validate_geometry = validate_geometry_imsm,
cdddbdbc 3011 .external = 1,
845dea95
NB
3012
3013/* for mdmon */
3014 .open_new = imsm_open_new,
3015 .load_super = load_super_imsm,
ed9d66aa 3016 .set_array_state= imsm_set_array_state,
845dea95
NB
3017 .set_disk = imsm_set_disk,
3018 .sync_metadata = imsm_sync_metadata,
88758e9d 3019 .activate_spare = imsm_activate_spare,
e8319a19 3020 .process_update = imsm_process_update,
8273f55e 3021 .prepare_update = imsm_prepare_update,
cdddbdbc 3022};