]> git.ipfire.org Git - thirdparty/mdadm.git/blob - super-ddf.c
DDF: __write_ddf_structure: Fix wrong reference to ddf->primary
[thirdparty/mdadm.git] / super-ddf.c
1 /*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
4 * Copyright (C) 2006-2009 Neil Brown <neilb@suse.de>
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neil@brown.name>
23 *
24 * Specifications for DDF takes from Common RAID DDF Specification Revision 1.2
25 * (July 28 2006). Reused by permission of SNIA.
26 */
27
28 #define HAVE_STDINT_H 1
29 #include "mdadm.h"
30 #include "mdmon.h"
31 #include "sha1.h"
32 #include <values.h>
33
34 /* a non-official T10 name for creation GUIDs */
35 static char T10[] = "Linux-MD";
36
37 /* DDF timestamps are 1980 based, so we need to add
38 * second-in-decade-of-seventies to convert to linux timestamps.
39 * 10 years with 2 leap years.
40 */
41 #define DECADE (3600*24*(365*10+2))
42 unsigned long crc32(
43 unsigned long crc,
44 const unsigned char *buf,
45 unsigned len);
46
47 /* The DDF metadata handling.
48 * DDF metadata lives at the end of the device.
49 * The last 512 byte block provides an 'anchor' which is used to locate
50 * the rest of the metadata which usually lives immediately behind the anchor.
51 *
52 * Note:
53 * - all multibyte numeric fields are bigendian.
54 * - all strings are space padded.
55 *
56 */
57
58 /* Primary Raid Level (PRL) */
59 #define DDF_RAID0 0x00
60 #define DDF_RAID1 0x01
61 #define DDF_RAID3 0x03
62 #define DDF_RAID4 0x04
63 #define DDF_RAID5 0x05
64 #define DDF_RAID1E 0x11
65 #define DDF_JBOD 0x0f
66 #define DDF_CONCAT 0x1f
67 #define DDF_RAID5E 0x15
68 #define DDF_RAID5EE 0x25
69 #define DDF_RAID6 0x06
70
71 /* Raid Level Qualifier (RLQ) */
72 #define DDF_RAID0_SIMPLE 0x00
73 #define DDF_RAID1_SIMPLE 0x00 /* just 2 devices in this plex */
74 #define DDF_RAID1_MULTI 0x01 /* exactly 3 devices in this plex */
75 #define DDF_RAID3_0 0x00 /* parity in first extent */
76 #define DDF_RAID3_N 0x01 /* parity in last extent */
77 #define DDF_RAID4_0 0x00 /* parity in first extent */
78 #define DDF_RAID4_N 0x01 /* parity in last extent */
79 /* these apply to raid5e and raid5ee as well */
80 #define DDF_RAID5_0_RESTART 0x00 /* same as 'right asymmetric' - layout 1 */
81 #define DDF_RAID6_0_RESTART 0x01 /* raid6 different from raid5 here!!! */
82 #define DDF_RAID5_N_RESTART 0x02 /* same as 'left asymmetric' - layout 0 */
83 #define DDF_RAID5_N_CONTINUE 0x03 /* same as 'left symmetric' - layout 2 */
84
85 #define DDF_RAID1E_ADJACENT 0x00 /* raid10 nearcopies==2 */
86 #define DDF_RAID1E_OFFSET 0x01 /* raid10 offsetcopies==2 */
87
88 /* Secondary RAID Level (SRL) */
89 #define DDF_2STRIPED 0x00 /* This is weirder than RAID0 !! */
90 #define DDF_2MIRRORED 0x01
91 #define DDF_2CONCAT 0x02
92 #define DDF_2SPANNED 0x03 /* This is also weird - be careful */
93
94 /* Magic numbers */
95 #define DDF_HEADER_MAGIC __cpu_to_be32(0xDE11DE11)
96 #define DDF_CONTROLLER_MAGIC __cpu_to_be32(0xAD111111)
97 #define DDF_PHYS_RECORDS_MAGIC __cpu_to_be32(0x22222222)
98 #define DDF_PHYS_DATA_MAGIC __cpu_to_be32(0x33333333)
99 #define DDF_VIRT_RECORDS_MAGIC __cpu_to_be32(0xDDDDDDDD)
100 #define DDF_VD_CONF_MAGIC __cpu_to_be32(0xEEEEEEEE)
101 #define DDF_SPARE_ASSIGN_MAGIC __cpu_to_be32(0x55555555)
102 #define DDF_VU_CONF_MAGIC __cpu_to_be32(0x88888888)
103 #define DDF_VENDOR_LOG_MAGIC __cpu_to_be32(0x01dBEEF0)
104 #define DDF_BBM_LOG_MAGIC __cpu_to_be32(0xABADB10C)
105
106 #define DDF_GUID_LEN 24
107 #define DDF_REVISION_0 "01.00.00"
108 #define DDF_REVISION_2 "01.02.00"
109
110 struct ddf_header {
111 __u32 magic; /* DDF_HEADER_MAGIC */
112 __u32 crc;
113 char guid[DDF_GUID_LEN];
114 char revision[8]; /* 01.02.00 */
115 __u32 seq; /* starts at '1' */
116 __u32 timestamp;
117 __u8 openflag;
118 __u8 foreignflag;
119 __u8 enforcegroups;
120 __u8 pad0; /* 0xff */
121 __u8 pad1[12]; /* 12 * 0xff */
122 /* 64 bytes so far */
123 __u8 header_ext[32]; /* reserved: fill with 0xff */
124 __u64 primary_lba;
125 __u64 secondary_lba;
126 __u8 type;
127 __u8 pad2[3]; /* 0xff */
128 __u32 workspace_len; /* sectors for vendor space -
129 * at least 32768(sectors) */
130 __u64 workspace_lba;
131 __u16 max_pd_entries; /* one of 15, 63, 255, 1023, 4095 */
132 __u16 max_vd_entries; /* 2^(4,6,8,10,12)-1 : i.e. as above */
133 __u16 max_partitions; /* i.e. max num of configuration
134 record entries per disk */
135 __u16 config_record_len; /* 1 +ROUNDUP(max_primary_element_entries
136 *12/512) */
137 __u16 max_primary_element_entries; /* 16, 64, 256, 1024, or 4096 */
138 __u8 pad3[54]; /* 0xff */
139 /* 192 bytes so far */
140 __u32 controller_section_offset;
141 __u32 controller_section_length;
142 __u32 phys_section_offset;
143 __u32 phys_section_length;
144 __u32 virt_section_offset;
145 __u32 virt_section_length;
146 __u32 config_section_offset;
147 __u32 config_section_length;
148 __u32 data_section_offset;
149 __u32 data_section_length;
150 __u32 bbm_section_offset;
151 __u32 bbm_section_length;
152 __u32 diag_space_offset;
153 __u32 diag_space_length;
154 __u32 vendor_offset;
155 __u32 vendor_length;
156 /* 256 bytes so far */
157 __u8 pad4[256]; /* 0xff */
158 };
159
160 /* type field */
161 #define DDF_HEADER_ANCHOR 0x00
162 #define DDF_HEADER_PRIMARY 0x01
163 #define DDF_HEADER_SECONDARY 0x02
164
165 /* The content of the 'controller section' - global scope */
166 struct ddf_controller_data {
167 __u32 magic; /* DDF_CONTROLLER_MAGIC */
168 __u32 crc;
169 char guid[DDF_GUID_LEN];
170 struct controller_type {
171 __u16 vendor_id;
172 __u16 device_id;
173 __u16 sub_vendor_id;
174 __u16 sub_device_id;
175 } type;
176 char product_id[16];
177 __u8 pad[8]; /* 0xff */
178 __u8 vendor_data[448];
179 };
180
181 /* The content of phys_section - global scope */
182 struct phys_disk {
183 __u32 magic; /* DDF_PHYS_RECORDS_MAGIC */
184 __u32 crc;
185 __u16 used_pdes;
186 __u16 max_pdes;
187 __u8 pad[52];
188 struct phys_disk_entry {
189 char guid[DDF_GUID_LEN];
190 __u32 refnum;
191 __u16 type;
192 __u16 state;
193 __u64 config_size; /* DDF structures must be after here */
194 char path[18]; /* another horrible structure really */
195 __u8 pad[6];
196 } entries[0];
197 };
198
199 /* phys_disk_entry.type is a bitmap - bigendian remember */
200 #define DDF_Forced_PD_GUID 1
201 #define DDF_Active_in_VD 2
202 #define DDF_Global_Spare 4 /* VD_CONF records are ignored */
203 #define DDF_Spare 8 /* overrides Global_spare */
204 #define DDF_Foreign 16
205 #define DDF_Legacy 32 /* no DDF on this device */
206
207 #define DDF_Interface_mask 0xf00
208 #define DDF_Interface_SCSI 0x100
209 #define DDF_Interface_SAS 0x200
210 #define DDF_Interface_SATA 0x300
211 #define DDF_Interface_FC 0x400
212
213 /* phys_disk_entry.state is a bigendian bitmap */
214 #define DDF_Online 1
215 #define DDF_Failed 2 /* overrides 1,4,8 */
216 #define DDF_Rebuilding 4
217 #define DDF_Transition 8
218 #define DDF_SMART 16
219 #define DDF_ReadErrors 32
220 #define DDF_Missing 64
221
222 /* The content of the virt_section global scope */
223 struct virtual_disk {
224 __u32 magic; /* DDF_VIRT_RECORDS_MAGIC */
225 __u32 crc;
226 __u16 populated_vdes;
227 __u16 max_vdes;
228 __u8 pad[52];
229 struct virtual_entry {
230 char guid[DDF_GUID_LEN];
231 __u16 unit;
232 __u16 pad0; /* 0xffff */
233 __u16 guid_crc;
234 __u16 type;
235 __u8 state;
236 __u8 init_state;
237 __u8 pad1[14];
238 char name[16];
239 } entries[0];
240 };
241
242 /* virtual_entry.type is a bitmap - bigendian */
243 #define DDF_Shared 1
244 #define DDF_Enforce_Groups 2
245 #define DDF_Unicode 4
246 #define DDF_Owner_Valid 8
247
248 /* virtual_entry.state is a bigendian bitmap */
249 #define DDF_state_mask 0x7
250 #define DDF_state_optimal 0x0
251 #define DDF_state_degraded 0x1
252 #define DDF_state_deleted 0x2
253 #define DDF_state_missing 0x3
254 #define DDF_state_failed 0x4
255 #define DDF_state_part_optimal 0x5
256
257 #define DDF_state_morphing 0x8
258 #define DDF_state_inconsistent 0x10
259
260 /* virtual_entry.init_state is a bigendian bitmap */
261 #define DDF_initstate_mask 0x03
262 #define DDF_init_not 0x00
263 #define DDF_init_quick 0x01 /* initialisation is progress.
264 * i.e. 'state_inconsistent' */
265 #define DDF_init_full 0x02
266
267 #define DDF_access_mask 0xc0
268 #define DDF_access_rw 0x00
269 #define DDF_access_ro 0x80
270 #define DDF_access_blocked 0xc0
271
272 /* The content of the config_section - local scope
273 * It has multiple records each config_record_len sectors
274 * They can be vd_config or spare_assign
275 */
276
277 struct vd_config {
278 __u32 magic; /* DDF_VD_CONF_MAGIC */
279 __u32 crc;
280 char guid[DDF_GUID_LEN];
281 __u32 timestamp;
282 __u32 seqnum;
283 __u8 pad0[24];
284 __u16 prim_elmnt_count;
285 __u8 chunk_shift; /* 0 == 512, 1==1024 etc */
286 __u8 prl;
287 __u8 rlq;
288 __u8 sec_elmnt_count;
289 __u8 sec_elmnt_seq;
290 __u8 srl;
291 __u64 blocks; /* blocks per component could be different
292 * on different component devices...(only
293 * for concat I hope) */
294 __u64 array_blocks; /* blocks in array */
295 __u8 pad1[8];
296 __u32 spare_refs[8];
297 __u8 cache_pol[8];
298 __u8 bg_rate;
299 __u8 pad2[3];
300 __u8 pad3[52];
301 __u8 pad4[192];
302 __u8 v0[32]; /* reserved- 0xff */
303 __u8 v1[32]; /* reserved- 0xff */
304 __u8 v2[16]; /* reserved- 0xff */
305 __u8 v3[16]; /* reserved- 0xff */
306 __u8 vendor[32];
307 __u32 phys_refnum[0]; /* refnum of each disk in sequence */
308 /*__u64 lba_offset[0]; LBA offset in each phys. Note extents in a
309 bvd are always the same size */
310 };
311
312 /* vd_config.cache_pol[7] is a bitmap */
313 #define DDF_cache_writeback 1 /* else writethrough */
314 #define DDF_cache_wadaptive 2 /* only applies if writeback */
315 #define DDF_cache_readahead 4
316 #define DDF_cache_radaptive 8 /* only if doing read-ahead */
317 #define DDF_cache_ifnobatt 16 /* even to write cache if battery is poor */
318 #define DDF_cache_wallowed 32 /* enable write caching */
319 #define DDF_cache_rallowed 64 /* enable read caching */
320
321 struct spare_assign {
322 __u32 magic; /* DDF_SPARE_ASSIGN_MAGIC */
323 __u32 crc;
324 __u32 timestamp;
325 __u8 reserved[7];
326 __u8 type;
327 __u16 populated; /* SAEs used */
328 __u16 max; /* max SAEs */
329 __u8 pad[8];
330 struct spare_assign_entry {
331 char guid[DDF_GUID_LEN];
332 __u16 secondary_element;
333 __u8 pad[6];
334 } spare_ents[0];
335 };
336 /* spare_assign.type is a bitmap */
337 #define DDF_spare_dedicated 0x1 /* else global */
338 #define DDF_spare_revertible 0x2 /* else committable */
339 #define DDF_spare_active 0x4 /* else not active */
340 #define DDF_spare_affinity 0x8 /* enclosure affinity */
341
342 /* The data_section contents - local scope */
343 struct disk_data {
344 __u32 magic; /* DDF_PHYS_DATA_MAGIC */
345 __u32 crc;
346 char guid[DDF_GUID_LEN];
347 __u32 refnum; /* crc of some magic drive data ... */
348 __u8 forced_ref; /* set when above was not result of magic */
349 __u8 forced_guid; /* set if guid was forced rather than magic */
350 __u8 vendor[32];
351 __u8 pad[442];
352 };
353
354 /* bbm_section content */
355 struct bad_block_log {
356 __u32 magic;
357 __u32 crc;
358 __u16 entry_count;
359 __u32 spare_count;
360 __u8 pad[10];
361 __u64 first_spare;
362 struct mapped_block {
363 __u64 defective_start;
364 __u32 replacement_start;
365 __u16 remap_count;
366 __u8 pad[2];
367 } entries[0];
368 };
369
370 /* Struct for internally holding ddf structures */
371 /* The DDF structure stored on each device is potentially
372 * quite different, as some data is global and some is local.
373 * The global data is:
374 * - ddf header
375 * - controller_data
376 * - Physical disk records
377 * - Virtual disk records
378 * The local data is:
379 * - Configuration records
380 * - Physical Disk data section
381 * ( and Bad block and vendor which I don't care about yet).
382 *
383 * The local data is parsed into separate lists as it is read
384 * and reconstructed for writing. This means that we only need
385 * to make config changes once and they are automatically
386 * propagated to all devices.
387 * Note that the ddf_super has space of the conf and disk data
388 * for this disk and also for a list of all such data.
389 * The list is only used for the superblock that is being
390 * built in Create or Assemble to describe the whole array.
391 */
392 struct ddf_super {
393 struct ddf_header anchor, primary, secondary;
394 struct ddf_controller_data controller;
395 struct ddf_header *active;
396 struct phys_disk *phys;
397 struct virtual_disk *virt;
398 int pdsize, vdsize;
399 unsigned int max_part, mppe, conf_rec_len;
400 int currentdev;
401 int updates_pending;
402 struct vcl {
403 union {
404 char space[512];
405 struct {
406 struct vcl *next;
407 __u64 *lba_offset; /* location in 'conf' of
408 * the lba table */
409 unsigned int vcnum; /* index into ->virt */
410 struct vd_config **other_bvds;
411 __u64 *block_sizes; /* NULL if all the same */
412 };
413 };
414 struct vd_config conf;
415 } *conflist, *currentconf;
416 struct dl {
417 union {
418 char space[512];
419 struct {
420 struct dl *next;
421 int major, minor;
422 char *devname;
423 int fd;
424 unsigned long long size; /* sectors */
425 unsigned long long primary_lba; /* sectors */
426 unsigned long long secondary_lba; /* sectors */
427 unsigned long long workspace_lba; /* sectors */
428 int pdnum; /* index in ->phys */
429 struct spare_assign *spare;
430 void *mdupdate; /* hold metadata update */
431
432 /* These fields used by auto-layout */
433 int raiddisk; /* slot to fill in autolayout */
434 __u64 esize;
435 };
436 };
437 struct disk_data disk;
438 struct vcl *vlist[0]; /* max_part in size */
439 } *dlist, *add_list;
440 };
441
442 #ifndef offsetof
443 #define offsetof(t,f) ((size_t)&(((t*)0)->f))
444 #endif
445
446 static unsigned int calc_crc(void *buf, int len)
447 {
448 /* crcs are always at the same place as in the ddf_header */
449 struct ddf_header *ddf = buf;
450 __u32 oldcrc = ddf->crc;
451 __u32 newcrc;
452 ddf->crc = 0xffffffff;
453
454 newcrc = crc32(0, buf, len);
455 ddf->crc = oldcrc;
456 /* The crc is store (like everything) bigendian, so convert
457 * here for simplicity
458 */
459 return __cpu_to_be32(newcrc);
460 }
461
462 static int load_ddf_header(int fd, unsigned long long lba,
463 unsigned long long size,
464 int type,
465 struct ddf_header *hdr, struct ddf_header *anchor)
466 {
467 /* read a ddf header (primary or secondary) from fd/lba
468 * and check that it is consistent with anchor
469 * Need to check:
470 * magic, crc, guid, rev, and LBA's header_type, and
471 * everything after header_type must be the same
472 */
473 if (lba >= size-1)
474 return 0;
475
476 if (lseek64(fd, lba<<9, 0) < 0)
477 return 0;
478
479 if (read(fd, hdr, 512) != 512)
480 return 0;
481
482 if (hdr->magic != DDF_HEADER_MAGIC)
483 return 0;
484 if (calc_crc(hdr, 512) != hdr->crc)
485 return 0;
486 if (memcmp(anchor->guid, hdr->guid, DDF_GUID_LEN) != 0 ||
487 memcmp(anchor->revision, hdr->revision, 8) != 0 ||
488 anchor->primary_lba != hdr->primary_lba ||
489 anchor->secondary_lba != hdr->secondary_lba ||
490 hdr->type != type ||
491 memcmp(anchor->pad2, hdr->pad2, 512 -
492 offsetof(struct ddf_header, pad2)) != 0)
493 return 0;
494
495 /* Looks good enough to me... */
496 return 1;
497 }
498
499 static void *load_section(int fd, struct ddf_super *super, void *buf,
500 __u32 offset_be, __u32 len_be, int check)
501 {
502 unsigned long long offset = __be32_to_cpu(offset_be);
503 unsigned long long len = __be32_to_cpu(len_be);
504 int dofree = (buf == NULL);
505
506 if (check)
507 if (len != 2 && len != 8 && len != 32
508 && len != 128 && len != 512)
509 return NULL;
510
511 if (len > 1024)
512 return NULL;
513 if (buf) {
514 /* All pre-allocated sections are a single block */
515 if (len != 1)
516 return NULL;
517 } else if (posix_memalign(&buf, 512, len<<9) != 0)
518 buf = NULL;
519
520 if (!buf)
521 return NULL;
522
523 if (super->active->type == 1)
524 offset += __be64_to_cpu(super->active->primary_lba);
525 else
526 offset += __be64_to_cpu(super->active->secondary_lba);
527
528 if ((unsigned long long)lseek64(fd, offset<<9, 0) != (offset<<9)) {
529 if (dofree)
530 free(buf);
531 return NULL;
532 }
533 if ((unsigned long long)read(fd, buf, len<<9) != (len<<9)) {
534 if (dofree)
535 free(buf);
536 return NULL;
537 }
538 return buf;
539 }
540
541 static int load_ddf_headers(int fd, struct ddf_super *super, char *devname)
542 {
543 unsigned long long dsize;
544
545 get_dev_size(fd, NULL, &dsize);
546
547 if (lseek64(fd, dsize-512, 0) < 0) {
548 if (devname)
549 pr_err("Cannot seek to anchor block on %s: %s\n",
550 devname, strerror(errno));
551 return 1;
552 }
553 if (read(fd, &super->anchor, 512) != 512) {
554 if (devname)
555 pr_err("Cannot read anchor block on %s: %s\n",
556 devname, strerror(errno));
557 return 1;
558 }
559 if (super->anchor.magic != DDF_HEADER_MAGIC) {
560 if (devname)
561 pr_err("no DDF anchor found on %s\n",
562 devname);
563 return 2;
564 }
565 if (calc_crc(&super->anchor, 512) != super->anchor.crc) {
566 if (devname)
567 pr_err("bad CRC on anchor on %s\n",
568 devname);
569 return 2;
570 }
571 if (memcmp(super->anchor.revision, DDF_REVISION_0, 8) != 0 &&
572 memcmp(super->anchor.revision, DDF_REVISION_2, 8) != 0) {
573 if (devname)
574 pr_err("can only support super revision"
575 " %.8s and earlier, not %.8s on %s\n",
576 DDF_REVISION_2, super->anchor.revision,devname);
577 return 2;
578 }
579 if (load_ddf_header(fd, __be64_to_cpu(super->anchor.primary_lba),
580 dsize >> 9, 1,
581 &super->primary, &super->anchor) == 0) {
582 if (devname)
583 pr_err("Failed to load primary DDF header "
584 "on %s\n", devname);
585 return 2;
586 }
587 super->active = &super->primary;
588 if (load_ddf_header(fd, __be64_to_cpu(super->anchor.secondary_lba),
589 dsize >> 9, 2,
590 &super->secondary, &super->anchor)) {
591 if ((__be32_to_cpu(super->primary.seq)
592 < __be32_to_cpu(super->secondary.seq) &&
593 !super->secondary.openflag)
594 || (__be32_to_cpu(super->primary.seq)
595 == __be32_to_cpu(super->secondary.seq) &&
596 super->primary.openflag && !super->secondary.openflag)
597 )
598 super->active = &super->secondary;
599 }
600 return 0;
601 }
602
603 static int load_ddf_global(int fd, struct ddf_super *super, char *devname)
604 {
605 void *ok;
606 ok = load_section(fd, super, &super->controller,
607 super->active->controller_section_offset,
608 super->active->controller_section_length,
609 0);
610 super->phys = load_section(fd, super, NULL,
611 super->active->phys_section_offset,
612 super->active->phys_section_length,
613 1);
614 super->pdsize = __be32_to_cpu(super->active->phys_section_length) * 512;
615
616 super->virt = load_section(fd, super, NULL,
617 super->active->virt_section_offset,
618 super->active->virt_section_length,
619 1);
620 super->vdsize = __be32_to_cpu(super->active->virt_section_length) * 512;
621 if (!ok ||
622 !super->phys ||
623 !super->virt) {
624 free(super->phys);
625 free(super->virt);
626 super->phys = NULL;
627 super->virt = NULL;
628 return 2;
629 }
630 super->conflist = NULL;
631 super->dlist = NULL;
632
633 super->max_part = __be16_to_cpu(super->active->max_partitions);
634 super->mppe = __be16_to_cpu(super->active->max_primary_element_entries);
635 super->conf_rec_len = __be16_to_cpu(super->active->config_record_len);
636 return 0;
637 }
638
639 static void add_other_bvd(struct vcl *vcl, struct vd_config *vd,
640 unsigned int len)
641 {
642 int i;
643 for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
644 if (vcl->other_bvds[i] != NULL &&
645 vcl->other_bvds[i]->sec_elmnt_seq == vd->sec_elmnt_seq)
646 break;
647
648 if (i < vcl->conf.sec_elmnt_count-1) {
649 if (vd->seqnum <= vcl->other_bvds[i]->seqnum)
650 return;
651 } else {
652 for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
653 if (vcl->other_bvds[i] == NULL)
654 break;
655 if (i == vcl->conf.sec_elmnt_count-1) {
656 pr_err("no space for sec level config %u, count is %u\n",
657 vd->sec_elmnt_seq, vcl->conf.sec_elmnt_count);
658 return;
659 }
660 if (posix_memalign((void **)&vcl->other_bvds[i], 512, len)
661 != 0) {
662 pr_err("%s could not allocate vd buf\n", __func__);
663 return;
664 }
665 }
666 memcpy(vcl->other_bvds[i], vd, len);
667 }
668
669 static int load_ddf_local(int fd, struct ddf_super *super,
670 char *devname, int keep)
671 {
672 struct dl *dl;
673 struct stat stb;
674 char *conf;
675 unsigned int i;
676 unsigned int confsec;
677 int vnum;
678 unsigned int max_virt_disks = __be16_to_cpu(super->active->max_vd_entries);
679 unsigned long long dsize;
680
681 /* First the local disk info */
682 if (posix_memalign((void**)&dl, 512,
683 sizeof(*dl) +
684 (super->max_part) * sizeof(dl->vlist[0])) != 0) {
685 pr_err("%s could not allocate disk info buffer\n",
686 __func__);
687 return 1;
688 }
689
690 load_section(fd, super, &dl->disk,
691 super->active->data_section_offset,
692 super->active->data_section_length,
693 0);
694 dl->devname = devname ? xstrdup(devname) : NULL;
695
696 fstat(fd, &stb);
697 dl->major = major(stb.st_rdev);
698 dl->minor = minor(stb.st_rdev);
699 dl->next = super->dlist;
700 dl->fd = keep ? fd : -1;
701
702 dl->size = 0;
703 if (get_dev_size(fd, devname, &dsize))
704 dl->size = dsize >> 9;
705 /* If the disks have different sizes, the LBAs will differ
706 * between phys disks.
707 * At this point here, the values in super->active must be valid
708 * for this phys disk. */
709 dl->primary_lba = super->active->primary_lba;
710 dl->secondary_lba = super->active->secondary_lba;
711 dl->workspace_lba = super->active->workspace_lba;
712 dl->spare = NULL;
713 for (i = 0 ; i < super->max_part ; i++)
714 dl->vlist[i] = NULL;
715 super->dlist = dl;
716 dl->pdnum = -1;
717 for (i = 0; i < __be16_to_cpu(super->active->max_pd_entries); i++)
718 if (memcmp(super->phys->entries[i].guid,
719 dl->disk.guid, DDF_GUID_LEN) == 0)
720 dl->pdnum = i;
721
722 /* Now the config list. */
723 /* 'conf' is an array of config entries, some of which are
724 * probably invalid. Those which are good need to be copied into
725 * the conflist
726 */
727
728 conf = load_section(fd, super, NULL,
729 super->active->config_section_offset,
730 super->active->config_section_length,
731 0);
732
733 vnum = 0;
734 for (confsec = 0;
735 confsec < __be32_to_cpu(super->active->config_section_length);
736 confsec += super->conf_rec_len) {
737 struct vd_config *vd =
738 (struct vd_config *)((char*)conf + confsec*512);
739 struct vcl *vcl;
740
741 if (vd->magic == DDF_SPARE_ASSIGN_MAGIC) {
742 if (dl->spare)
743 continue;
744 if (posix_memalign((void**)&dl->spare, 512,
745 super->conf_rec_len*512) != 0) {
746 pr_err("%s could not allocate spare info buf\n",
747 __func__);
748 return 1;
749 }
750
751 memcpy(dl->spare, vd, super->conf_rec_len*512);
752 continue;
753 }
754 if (vd->magic != DDF_VD_CONF_MAGIC)
755 continue;
756 for (vcl = super->conflist; vcl; vcl = vcl->next) {
757 if (memcmp(vcl->conf.guid,
758 vd->guid, DDF_GUID_LEN) == 0)
759 break;
760 }
761
762 if (vcl) {
763 dl->vlist[vnum++] = vcl;
764 if (vcl->other_bvds != NULL &&
765 vcl->conf.sec_elmnt_seq != vd->sec_elmnt_seq) {
766 add_other_bvd(vcl, vd, super->conf_rec_len*512);
767 continue;
768 }
769 if (__be32_to_cpu(vd->seqnum) <=
770 __be32_to_cpu(vcl->conf.seqnum))
771 continue;
772 } else {
773 if (posix_memalign((void**)&vcl, 512,
774 (super->conf_rec_len*512 +
775 offsetof(struct vcl, conf))) != 0) {
776 pr_err("%s could not allocate vcl buf\n",
777 __func__);
778 return 1;
779 }
780 vcl->next = super->conflist;
781 vcl->block_sizes = NULL; /* FIXME not for CONCAT */
782 if (vd->sec_elmnt_count > 1)
783 vcl->other_bvds =
784 xcalloc(vd->sec_elmnt_count - 1,
785 sizeof(struct vd_config *));
786 else
787 vcl->other_bvds = NULL;
788 super->conflist = vcl;
789 dl->vlist[vnum++] = vcl;
790 }
791 memcpy(&vcl->conf, vd, super->conf_rec_len*512);
792 vcl->lba_offset = (__u64*)
793 &vcl->conf.phys_refnum[super->mppe];
794
795 for (i=0; i < max_virt_disks ; i++)
796 if (memcmp(super->virt->entries[i].guid,
797 vcl->conf.guid, DDF_GUID_LEN)==0)
798 break;
799 if (i < max_virt_disks)
800 vcl->vcnum = i;
801 }
802 free(conf);
803
804 return 0;
805 }
806
807 #ifndef MDASSEMBLE
808 static int load_super_ddf_all(struct supertype *st, int fd,
809 void **sbp, char *devname);
810 #endif
811
812 static void free_super_ddf(struct supertype *st);
813
814 static int load_super_ddf(struct supertype *st, int fd,
815 char *devname)
816 {
817 unsigned long long dsize;
818 struct ddf_super *super;
819 int rv;
820
821 if (get_dev_size(fd, devname, &dsize) == 0)
822 return 1;
823
824 if (test_partition(fd))
825 /* DDF is not allowed on partitions */
826 return 1;
827
828 /* 32M is a lower bound */
829 if (dsize <= 32*1024*1024) {
830 if (devname)
831 pr_err("%s is too small for ddf: "
832 "size is %llu sectors.\n",
833 devname, dsize>>9);
834 return 1;
835 }
836 if (dsize & 511) {
837 if (devname)
838 pr_err("%s is an odd size for ddf: "
839 "size is %llu bytes.\n",
840 devname, dsize);
841 return 1;
842 }
843
844 free_super_ddf(st);
845
846 if (posix_memalign((void**)&super, 512, sizeof(*super))!= 0) {
847 pr_err("malloc of %zu failed.\n",
848 sizeof(*super));
849 return 1;
850 }
851 memset(super, 0, sizeof(*super));
852
853 rv = load_ddf_headers(fd, super, devname);
854 if (rv) {
855 free(super);
856 return rv;
857 }
858
859 /* Have valid headers and have chosen the best. Let's read in the rest*/
860
861 rv = load_ddf_global(fd, super, devname);
862
863 if (rv) {
864 if (devname)
865 pr_err("Failed to load all information "
866 "sections on %s\n", devname);
867 free(super);
868 return rv;
869 }
870
871 rv = load_ddf_local(fd, super, devname, 0);
872
873 if (rv) {
874 if (devname)
875 pr_err("Failed to load all information "
876 "sections on %s\n", devname);
877 free(super);
878 return rv;
879 }
880
881 /* Should possibly check the sections .... */
882
883 st->sb = super;
884 if (st->ss == NULL) {
885 st->ss = &super_ddf;
886 st->minor_version = 0;
887 st->max_devs = 512;
888 }
889 return 0;
890
891 }
892
893 static void free_super_ddf(struct supertype *st)
894 {
895 struct ddf_super *ddf = st->sb;
896 if (ddf == NULL)
897 return;
898 free(ddf->phys);
899 free(ddf->virt);
900 while (ddf->conflist) {
901 struct vcl *v = ddf->conflist;
902 ddf->conflist = v->next;
903 if (v->block_sizes)
904 free(v->block_sizes);
905 if (v->other_bvds) {
906 int i;
907 for (i = 0; i < v->conf.sec_elmnt_count-1; i++)
908 if (v->other_bvds[i] != NULL)
909 free(v->other_bvds[i]);
910 free(v->other_bvds);
911 }
912 free(v);
913 }
914 while (ddf->dlist) {
915 struct dl *d = ddf->dlist;
916 ddf->dlist = d->next;
917 if (d->fd >= 0)
918 close(d->fd);
919 if (d->spare)
920 free(d->spare);
921 free(d);
922 }
923 while (ddf->add_list) {
924 struct dl *d = ddf->add_list;
925 ddf->add_list = d->next;
926 if (d->fd >= 0)
927 close(d->fd);
928 if (d->spare)
929 free(d->spare);
930 free(d);
931 }
932 free(ddf);
933 st->sb = NULL;
934 }
935
936 static struct supertype *match_metadata_desc_ddf(char *arg)
937 {
938 /* 'ddf' only support containers */
939 struct supertype *st;
940 if (strcmp(arg, "ddf") != 0 &&
941 strcmp(arg, "default") != 0
942 )
943 return NULL;
944
945 st = xcalloc(1, sizeof(*st));
946 st->ss = &super_ddf;
947 st->max_devs = 512;
948 st->minor_version = 0;
949 st->sb = NULL;
950 return st;
951 }
952
953 #ifndef MDASSEMBLE
954
955 static mapping_t ddf_state[] = {
956 { "Optimal", 0},
957 { "Degraded", 1},
958 { "Deleted", 2},
959 { "Missing", 3},
960 { "Failed", 4},
961 { "Partially Optimal", 5},
962 { "-reserved-", 6},
963 { "-reserved-", 7},
964 { NULL, 0}
965 };
966
967 static mapping_t ddf_init_state[] = {
968 { "Not Initialised", 0},
969 { "QuickInit in Progress", 1},
970 { "Fully Initialised", 2},
971 { "*UNKNOWN*", 3},
972 { NULL, 0}
973 };
974 static mapping_t ddf_access[] = {
975 { "Read/Write", 0},
976 { "Reserved", 1},
977 { "Read Only", 2},
978 { "Blocked (no access)", 3},
979 { NULL ,0}
980 };
981
982 static mapping_t ddf_level[] = {
983 { "RAID0", DDF_RAID0},
984 { "RAID1", DDF_RAID1},
985 { "RAID3", DDF_RAID3},
986 { "RAID4", DDF_RAID4},
987 { "RAID5", DDF_RAID5},
988 { "RAID1E",DDF_RAID1E},
989 { "JBOD", DDF_JBOD},
990 { "CONCAT",DDF_CONCAT},
991 { "RAID5E",DDF_RAID5E},
992 { "RAID5EE",DDF_RAID5EE},
993 { "RAID6", DDF_RAID6},
994 { NULL, 0}
995 };
996 static mapping_t ddf_sec_level[] = {
997 { "Striped", DDF_2STRIPED},
998 { "Mirrored", DDF_2MIRRORED},
999 { "Concat", DDF_2CONCAT},
1000 { "Spanned", DDF_2SPANNED},
1001 { NULL, 0}
1002 };
1003 #endif
1004
1005 struct num_mapping {
1006 int num1, num2;
1007 };
1008 static struct num_mapping ddf_level_num[] = {
1009 { DDF_RAID0, 0 },
1010 { DDF_RAID1, 1 },
1011 { DDF_RAID3, LEVEL_UNSUPPORTED },
1012 { DDF_RAID4, 4 },
1013 { DDF_RAID5, 5 },
1014 { DDF_RAID1E, LEVEL_UNSUPPORTED },
1015 { DDF_JBOD, LEVEL_UNSUPPORTED },
1016 { DDF_CONCAT, LEVEL_LINEAR },
1017 { DDF_RAID5E, LEVEL_UNSUPPORTED },
1018 { DDF_RAID5EE, LEVEL_UNSUPPORTED },
1019 { DDF_RAID6, 6},
1020 { MAXINT, MAXINT }
1021 };
1022
1023 static int map_num1(struct num_mapping *map, int num)
1024 {
1025 int i;
1026 for (i=0 ; map[i].num1 != MAXINT; i++)
1027 if (map[i].num1 == num)
1028 break;
1029 return map[i].num2;
1030 }
1031
1032 static int all_ff(char *guid)
1033 {
1034 int i;
1035 for (i = 0; i < DDF_GUID_LEN; i++)
1036 if (guid[i] != (char)0xff)
1037 return 0;
1038 return 1;
1039 }
1040
1041 #ifndef MDASSEMBLE
1042 static void print_guid(char *guid, int tstamp)
1043 {
1044 /* A GUIDs are part (or all) ASCII and part binary.
1045 * They tend to be space padded.
1046 * We print the GUID in HEX, then in parentheses add
1047 * any initial ASCII sequence, and a possible
1048 * time stamp from bytes 16-19
1049 */
1050 int l = DDF_GUID_LEN;
1051 int i;
1052
1053 for (i=0 ; i<DDF_GUID_LEN ; i++) {
1054 if ((i&3)==0 && i != 0) printf(":");
1055 printf("%02X", guid[i]&255);
1056 }
1057
1058 printf("\n (");
1059 while (l && guid[l-1] == ' ')
1060 l--;
1061 for (i=0 ; i<l ; i++) {
1062 if (guid[i] >= 0x20 && guid[i] < 0x7f)
1063 fputc(guid[i], stdout);
1064 else
1065 break;
1066 }
1067 if (tstamp) {
1068 time_t then = __be32_to_cpu(*(__u32*)(guid+16)) + DECADE;
1069 char tbuf[100];
1070 struct tm *tm;
1071 tm = localtime(&then);
1072 strftime(tbuf, 100, " %D %T",tm);
1073 fputs(tbuf, stdout);
1074 }
1075 printf(")");
1076 }
1077
1078 static void examine_vd(int n, struct ddf_super *sb, char *guid)
1079 {
1080 int crl = sb->conf_rec_len;
1081 struct vcl *vcl;
1082
1083 for (vcl = sb->conflist ; vcl ; vcl = vcl->next) {
1084 unsigned int i;
1085 struct vd_config *vc = &vcl->conf;
1086
1087 if (calc_crc(vc, crl*512) != vc->crc)
1088 continue;
1089 if (memcmp(vc->guid, guid, DDF_GUID_LEN) != 0)
1090 continue;
1091
1092 /* Ok, we know about this VD, let's give more details */
1093 printf(" Raid Devices[%d] : %d (", n,
1094 __be16_to_cpu(vc->prim_elmnt_count));
1095 for (i = 0; i < __be16_to_cpu(vc->prim_elmnt_count); i++) {
1096 int j;
1097 int cnt = __be16_to_cpu(sb->phys->used_pdes);
1098 for (j=0; j<cnt; j++)
1099 if (vc->phys_refnum[i] == sb->phys->entries[j].refnum)
1100 break;
1101 if (i) printf(" ");
1102 if (j < cnt)
1103 printf("%d", j);
1104 else
1105 printf("--");
1106 }
1107 printf(")\n");
1108 if (vc->chunk_shift != 255)
1109 printf(" Chunk Size[%d] : %d sectors\n", n,
1110 1 << vc->chunk_shift);
1111 printf(" Raid Level[%d] : %s\n", n,
1112 map_num(ddf_level, vc->prl)?:"-unknown-");
1113 if (vc->sec_elmnt_count != 1) {
1114 printf(" Secondary Position[%d] : %d of %d\n", n,
1115 vc->sec_elmnt_seq, vc->sec_elmnt_count);
1116 printf(" Secondary Level[%d] : %s\n", n,
1117 map_num(ddf_sec_level, vc->srl) ?: "-unknown-");
1118 }
1119 printf(" Device Size[%d] : %llu\n", n,
1120 (unsigned long long)__be64_to_cpu(vc->blocks)/2);
1121 printf(" Array Size[%d] : %llu\n", n,
1122 (unsigned long long)__be64_to_cpu(vc->array_blocks)/2);
1123 }
1124 }
1125
1126 static void examine_vds(struct ddf_super *sb)
1127 {
1128 int cnt = __be16_to_cpu(sb->virt->populated_vdes);
1129 int i;
1130 printf(" Virtual Disks : %d\n", cnt);
1131
1132 for (i=0; i<cnt; i++) {
1133 struct virtual_entry *ve = &sb->virt->entries[i];
1134 printf("\n");
1135 printf(" VD GUID[%d] : ", i); print_guid(ve->guid, 1);
1136 printf("\n");
1137 printf(" unit[%d] : %d\n", i, __be16_to_cpu(ve->unit));
1138 printf(" state[%d] : %s, %s%s\n", i,
1139 map_num(ddf_state, ve->state & 7),
1140 (ve->state & 8) ? "Morphing, ": "",
1141 (ve->state & 16)? "Not Consistent" : "Consistent");
1142 printf(" init state[%d] : %s\n", i,
1143 map_num(ddf_init_state, ve->init_state&3));
1144 printf(" access[%d] : %s\n", i,
1145 map_num(ddf_access, (ve->init_state>>6) & 3));
1146 printf(" Name[%d] : %.16s\n", i, ve->name);
1147 examine_vd(i, sb, ve->guid);
1148 }
1149 if (cnt) printf("\n");
1150 }
1151
1152 static void examine_pds(struct ddf_super *sb)
1153 {
1154 int cnt = __be16_to_cpu(sb->phys->used_pdes);
1155 int i;
1156 struct dl *dl;
1157 printf(" Physical Disks : %d\n", cnt);
1158 printf(" Number RefNo Size Device Type/State\n");
1159
1160 for (i=0 ; i<cnt ; i++) {
1161 struct phys_disk_entry *pd = &sb->phys->entries[i];
1162 int type = __be16_to_cpu(pd->type);
1163 int state = __be16_to_cpu(pd->state);
1164
1165 //printf(" PD GUID[%d] : ", i); print_guid(pd->guid, 0);
1166 //printf("\n");
1167 printf(" %3d %08x ", i,
1168 __be32_to_cpu(pd->refnum));
1169 printf("%8lluK ",
1170 (unsigned long long)__be64_to_cpu(pd->config_size)>>1);
1171 for (dl = sb->dlist; dl ; dl = dl->next) {
1172 if (dl->disk.refnum == pd->refnum) {
1173 char *dv = map_dev(dl->major, dl->minor, 0);
1174 if (dv) {
1175 printf("%-15s", dv);
1176 break;
1177 }
1178 }
1179 }
1180 if (!dl)
1181 printf("%15s","");
1182 printf(" %s%s%s%s%s",
1183 (type&2) ? "active":"",
1184 (type&4) ? "Global-Spare":"",
1185 (type&8) ? "spare" : "",
1186 (type&16)? ", foreign" : "",
1187 (type&32)? "pass-through" : "");
1188 if (state & DDF_Failed)
1189 /* This over-rides these three */
1190 state &= ~(DDF_Online|DDF_Rebuilding|DDF_Transition);
1191 printf("/%s%s%s%s%s%s%s",
1192 (state&1)? "Online": "Offline",
1193 (state&2)? ", Failed": "",
1194 (state&4)? ", Rebuilding": "",
1195 (state&8)? ", in-transition": "",
1196 (state&16)? ", SMART-errors": "",
1197 (state&32)? ", Unrecovered-Read-Errors": "",
1198 (state&64)? ", Missing" : "");
1199 printf("\n");
1200 }
1201 }
1202
1203 static void examine_super_ddf(struct supertype *st, char *homehost)
1204 {
1205 struct ddf_super *sb = st->sb;
1206
1207 printf(" Magic : %08x\n", __be32_to_cpu(sb->anchor.magic));
1208 printf(" Version : %.8s\n", sb->anchor.revision);
1209 printf("Controller GUID : "); print_guid(sb->controller.guid, 0);
1210 printf("\n");
1211 printf(" Container GUID : "); print_guid(sb->anchor.guid, 1);
1212 printf("\n");
1213 printf(" Seq : %08x\n", __be32_to_cpu(sb->active->seq));
1214 printf(" Redundant hdr : %s\n", sb->secondary.magic == DDF_HEADER_MAGIC
1215 ?"yes" : "no");
1216 examine_vds(sb);
1217 examine_pds(sb);
1218 }
1219
1220 static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map);
1221
1222 static void uuid_from_super_ddf(struct supertype *st, int uuid[4]);
1223
1224 static void brief_examine_super_ddf(struct supertype *st, int verbose)
1225 {
1226 /* We just write a generic DDF ARRAY entry
1227 */
1228 struct mdinfo info;
1229 char nbuf[64];
1230 getinfo_super_ddf(st, &info, NULL);
1231 fname_from_uuid(st, &info, nbuf, ':');
1232
1233 printf("ARRAY metadata=ddf UUID=%s\n", nbuf + 5);
1234 }
1235
1236 static void brief_examine_subarrays_ddf(struct supertype *st, int verbose)
1237 {
1238 /* We just write a generic DDF ARRAY entry
1239 */
1240 struct ddf_super *ddf = st->sb;
1241 struct mdinfo info;
1242 unsigned int i;
1243 char nbuf[64];
1244 getinfo_super_ddf(st, &info, NULL);
1245 fname_from_uuid(st, &info, nbuf, ':');
1246
1247 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
1248 struct virtual_entry *ve = &ddf->virt->entries[i];
1249 struct vcl vcl;
1250 char nbuf1[64];
1251 if (all_ff(ve->guid))
1252 continue;
1253 memcpy(vcl.conf.guid, ve->guid, DDF_GUID_LEN);
1254 ddf->currentconf =&vcl;
1255 uuid_from_super_ddf(st, info.uuid);
1256 fname_from_uuid(st, &info, nbuf1, ':');
1257 printf("ARRAY container=%s member=%d UUID=%s\n",
1258 nbuf+5, i, nbuf1+5);
1259 }
1260 }
1261
1262 static void export_examine_super_ddf(struct supertype *st)
1263 {
1264 struct mdinfo info;
1265 char nbuf[64];
1266 getinfo_super_ddf(st, &info, NULL);
1267 fname_from_uuid(st, &info, nbuf, ':');
1268 printf("MD_METADATA=ddf\n");
1269 printf("MD_LEVEL=container\n");
1270 printf("MD_UUID=%s\n", nbuf+5);
1271 }
1272
1273 static void detail_super_ddf(struct supertype *st, char *homehost)
1274 {
1275 /* FIXME later
1276 * Could print DDF GUID
1277 * Need to find which array
1278 * If whole, briefly list all arrays
1279 * If one, give name
1280 */
1281 }
1282
1283 static void brief_detail_super_ddf(struct supertype *st)
1284 {
1285 /* FIXME I really need to know which array we are detailing.
1286 * Can that be stored in ddf_super??
1287 */
1288 // struct ddf_super *ddf = st->sb;
1289 struct mdinfo info;
1290 char nbuf[64];
1291 getinfo_super_ddf(st, &info, NULL);
1292 fname_from_uuid(st, &info, nbuf,':');
1293 printf(" UUID=%s", nbuf + 5);
1294 }
1295 #endif
1296
1297 static int match_home_ddf(struct supertype *st, char *homehost)
1298 {
1299 /* It matches 'this' host if the controller is a
1300 * Linux-MD controller with vendor_data matching
1301 * the hostname
1302 */
1303 struct ddf_super *ddf = st->sb;
1304 unsigned int len;
1305
1306 if (!homehost)
1307 return 0;
1308 len = strlen(homehost);
1309
1310 return (memcmp(ddf->controller.guid, T10, 8) == 0 &&
1311 len < sizeof(ddf->controller.vendor_data) &&
1312 memcmp(ddf->controller.vendor_data, homehost,len) == 0 &&
1313 ddf->controller.vendor_data[len] == 0);
1314 }
1315
1316 #ifndef MDASSEMBLE
1317 static struct vd_config *find_vdcr(struct ddf_super *ddf, unsigned int inst)
1318 {
1319 struct vcl *v;
1320
1321 for (v = ddf->conflist; v; v = v->next)
1322 if (inst == v->vcnum)
1323 return &v->conf;
1324 return NULL;
1325 }
1326 #endif
1327
1328 static int find_phys(struct ddf_super *ddf, __u32 phys_refnum)
1329 {
1330 /* Find the entry in phys_disk which has the given refnum
1331 * and return it's index
1332 */
1333 unsigned int i;
1334 for (i = 0; i < __be16_to_cpu(ddf->phys->max_pdes); i++)
1335 if (ddf->phys->entries[i].refnum == phys_refnum)
1336 return i;
1337 return -1;
1338 }
1339
1340 static void uuid_from_super_ddf(struct supertype *st, int uuid[4])
1341 {
1342 /* The uuid returned here is used for:
1343 * uuid to put into bitmap file (Create, Grow)
1344 * uuid for backup header when saving critical section (Grow)
1345 * comparing uuids when re-adding a device into an array
1346 * In these cases the uuid required is that of the data-array,
1347 * not the device-set.
1348 * uuid to recognise same set when adding a missing device back
1349 * to an array. This is a uuid for the device-set.
1350 *
1351 * For each of these we can make do with a truncated
1352 * or hashed uuid rather than the original, as long as
1353 * everyone agrees.
1354 * In the case of SVD we assume the BVD is of interest,
1355 * though that might be the case if a bitmap were made for
1356 * a mirrored SVD - worry about that later.
1357 * So we need to find the VD configuration record for the
1358 * relevant BVD and extract the GUID and Secondary_Element_Seq.
1359 * The first 16 bytes of the sha1 of these is used.
1360 */
1361 struct ddf_super *ddf = st->sb;
1362 struct vcl *vcl = ddf->currentconf;
1363 char *guid;
1364 char buf[20];
1365 struct sha1_ctx ctx;
1366
1367 if (vcl)
1368 guid = vcl->conf.guid;
1369 else
1370 guid = ddf->anchor.guid;
1371
1372 sha1_init_ctx(&ctx);
1373 sha1_process_bytes(guid, DDF_GUID_LEN, &ctx);
1374 sha1_finish_ctx(&ctx, buf);
1375 memcpy(uuid, buf, 4*4);
1376 }
1377
1378 static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map);
1379
1380 static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map)
1381 {
1382 struct ddf_super *ddf = st->sb;
1383 int map_disks = info->array.raid_disks;
1384 __u32 *cptr;
1385
1386 if (ddf->currentconf) {
1387 getinfo_super_ddf_bvd(st, info, map);
1388 return;
1389 }
1390 memset(info, 0, sizeof(*info));
1391
1392 info->array.raid_disks = __be16_to_cpu(ddf->phys->used_pdes);
1393 info->array.level = LEVEL_CONTAINER;
1394 info->array.layout = 0;
1395 info->array.md_minor = -1;
1396 cptr = (__u32 *)(ddf->anchor.guid + 16);
1397 info->array.ctime = DECADE + __be32_to_cpu(*cptr);
1398
1399 info->array.utime = 0;
1400 info->array.chunk_size = 0;
1401 info->container_enough = 1;
1402
1403 info->disk.major = 0;
1404 info->disk.minor = 0;
1405 if (ddf->dlist) {
1406 info->disk.number = __be32_to_cpu(ddf->dlist->disk.refnum);
1407 info->disk.raid_disk = find_phys(ddf, ddf->dlist->disk.refnum);
1408
1409 info->data_offset = __be64_to_cpu(ddf->phys->
1410 entries[info->disk.raid_disk].
1411 config_size);
1412 info->component_size = ddf->dlist->size - info->data_offset;
1413 } else {
1414 info->disk.number = -1;
1415 info->disk.raid_disk = -1;
1416 // info->disk.raid_disk = find refnum in the table and use index;
1417 }
1418 info->disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
1419
1420 info->recovery_start = MaxSector;
1421 info->reshape_active = 0;
1422 info->recovery_blocked = 0;
1423 info->name[0] = 0;
1424
1425 info->array.major_version = -1;
1426 info->array.minor_version = -2;
1427 strcpy(info->text_version, "ddf");
1428 info->safe_mode_delay = 0;
1429
1430 uuid_from_super_ddf(st, info->uuid);
1431
1432 if (map) {
1433 int i;
1434 for (i = 0 ; i < map_disks; i++) {
1435 if (i < info->array.raid_disks &&
1436 (__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Online) &&
1437 !(__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Failed))
1438 map[i] = 1;
1439 else
1440 map[i] = 0;
1441 }
1442 }
1443 }
1444
1445 static int rlq_to_layout(int rlq, int prl, int raiddisks);
1446
1447 static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map)
1448 {
1449 struct ddf_super *ddf = st->sb;
1450 struct vcl *vc = ddf->currentconf;
1451 int cd = ddf->currentdev;
1452 int j;
1453 struct dl *dl;
1454 int map_disks = info->array.raid_disks;
1455 __u32 *cptr;
1456
1457 memset(info, 0, sizeof(*info));
1458 /* FIXME this returns BVD info - what if we want SVD ?? */
1459
1460 info->array.raid_disks = __be16_to_cpu(vc->conf.prim_elmnt_count);
1461 info->array.level = map_num1(ddf_level_num, vc->conf.prl);
1462 info->array.layout = rlq_to_layout(vc->conf.rlq, vc->conf.prl,
1463 info->array.raid_disks);
1464 info->array.md_minor = -1;
1465 cptr = (__u32 *)(vc->conf.guid + 16);
1466 info->array.ctime = DECADE + __be32_to_cpu(*cptr);
1467 info->array.utime = DECADE + __be32_to_cpu(vc->conf.timestamp);
1468 info->array.chunk_size = 512 << vc->conf.chunk_shift;
1469 info->custom_array_size = 0;
1470
1471 if (cd >= 0 && (unsigned)cd < ddf->mppe) {
1472 info->data_offset = __be64_to_cpu(vc->lba_offset[cd]);
1473 if (vc->block_sizes)
1474 info->component_size = vc->block_sizes[cd];
1475 else
1476 info->component_size = __be64_to_cpu(vc->conf.blocks);
1477 }
1478
1479 for (dl = ddf->dlist; dl ; dl = dl->next)
1480 if (dl->raiddisk == ddf->currentdev)
1481 break;
1482
1483 info->disk.major = 0;
1484 info->disk.minor = 0;
1485 info->disk.state = 0;
1486 if (dl) {
1487 info->disk.major = dl->major;
1488 info->disk.minor = dl->minor;
1489 info->disk.raid_disk = dl->raiddisk;
1490 info->disk.number = dl->pdnum;
1491 info->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
1492 }
1493
1494 info->container_member = ddf->currentconf->vcnum;
1495
1496 info->recovery_start = MaxSector;
1497 info->resync_start = 0;
1498 info->reshape_active = 0;
1499 info->recovery_blocked = 0;
1500 if (!(ddf->virt->entries[info->container_member].state
1501 & DDF_state_inconsistent) &&
1502 (ddf->virt->entries[info->container_member].init_state
1503 & DDF_initstate_mask)
1504 == DDF_init_full)
1505 info->resync_start = MaxSector;
1506
1507 uuid_from_super_ddf(st, info->uuid);
1508
1509 info->array.major_version = -1;
1510 info->array.minor_version = -2;
1511 sprintf(info->text_version, "/%s/%d",
1512 st->container_devnm,
1513 info->container_member);
1514 info->safe_mode_delay = 200;
1515
1516 memcpy(info->name, ddf->virt->entries[info->container_member].name, 16);
1517 info->name[16]=0;
1518 for(j=0; j<16; j++)
1519 if (info->name[j] == ' ')
1520 info->name[j] = 0;
1521
1522 if (map)
1523 for (j = 0; j < map_disks; j++) {
1524 map[j] = 0;
1525 if (j < info->array.raid_disks) {
1526 int i = find_phys(ddf, vc->conf.phys_refnum[j]);
1527 if (i >= 0 &&
1528 (__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Online) &&
1529 !(__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Failed))
1530 map[i] = 1;
1531 }
1532 }
1533 }
1534
1535 static int update_super_ddf(struct supertype *st, struct mdinfo *info,
1536 char *update,
1537 char *devname, int verbose,
1538 int uuid_set, char *homehost)
1539 {
1540 /* For 'assemble' and 'force' we need to return non-zero if any
1541 * change was made. For others, the return value is ignored.
1542 * Update options are:
1543 * force-one : This device looks a bit old but needs to be included,
1544 * update age info appropriately.
1545 * assemble: clear any 'faulty' flag to allow this device to
1546 * be assembled.
1547 * force-array: Array is degraded but being forced, mark it clean
1548 * if that will be needed to assemble it.
1549 *
1550 * newdev: not used ????
1551 * grow: Array has gained a new device - this is currently for
1552 * linear only
1553 * resync: mark as dirty so a resync will happen.
1554 * uuid: Change the uuid of the array to match what is given
1555 * homehost: update the recorded homehost
1556 * name: update the name - preserving the homehost
1557 * _reshape_progress: record new reshape_progress position.
1558 *
1559 * Following are not relevant for this version:
1560 * sparc2.2 : update from old dodgey metadata
1561 * super-minor: change the preferred_minor number
1562 * summaries: update redundant counters.
1563 */
1564 int rv = 0;
1565 // struct ddf_super *ddf = st->sb;
1566 // struct vd_config *vd = find_vdcr(ddf, info->container_member);
1567 // struct virtual_entry *ve = find_ve(ddf);
1568
1569 /* we don't need to handle "force-*" or "assemble" as
1570 * there is no need to 'trick' the kernel. We the metadata is
1571 * first updated to activate the array, all the implied modifications
1572 * will just happen.
1573 */
1574
1575 if (strcmp(update, "grow") == 0) {
1576 /* FIXME */
1577 } else if (strcmp(update, "resync") == 0) {
1578 // info->resync_checkpoint = 0;
1579 } else if (strcmp(update, "homehost") == 0) {
1580 /* homehost is stored in controller->vendor_data,
1581 * or it is when we are the vendor
1582 */
1583 // if (info->vendor_is_local)
1584 // strcpy(ddf->controller.vendor_data, homehost);
1585 rv = -1;
1586 } else if (strcmp(update, "name") == 0) {
1587 /* name is stored in virtual_entry->name */
1588 // memset(ve->name, ' ', 16);
1589 // strncpy(ve->name, info->name, 16);
1590 rv = -1;
1591 } else if (strcmp(update, "_reshape_progress") == 0) {
1592 /* We don't support reshape yet */
1593 } else if (strcmp(update, "assemble") == 0 ) {
1594 /* Do nothing, just succeed */
1595 rv = 0;
1596 } else
1597 rv = -1;
1598
1599 // update_all_csum(ddf);
1600
1601 return rv;
1602 }
1603
1604 static void make_header_guid(char *guid)
1605 {
1606 __u32 stamp;
1607 /* Create a DDF Header of Virtual Disk GUID */
1608
1609 /* 24 bytes of fiction required.
1610 * first 8 are a 'vendor-id' - "Linux-MD"
1611 * next 8 are controller type.. how about 0X DEAD BEEF 0000 0000
1612 * Remaining 8 random number plus timestamp
1613 */
1614 memcpy(guid, T10, sizeof(T10));
1615 stamp = __cpu_to_be32(0xdeadbeef);
1616 memcpy(guid+8, &stamp, 4);
1617 stamp = __cpu_to_be32(0);
1618 memcpy(guid+12, &stamp, 4);
1619 stamp = __cpu_to_be32(time(0) - DECADE);
1620 memcpy(guid+16, &stamp, 4);
1621 stamp = random32();
1622 memcpy(guid+20, &stamp, 4);
1623 }
1624
1625 static int init_super_ddf_bvd(struct supertype *st,
1626 mdu_array_info_t *info,
1627 unsigned long long size,
1628 char *name, char *homehost,
1629 int *uuid, unsigned long long data_offset);
1630
1631 static int init_super_ddf(struct supertype *st,
1632 mdu_array_info_t *info,
1633 unsigned long long size, char *name, char *homehost,
1634 int *uuid, unsigned long long data_offset)
1635 {
1636 /* This is primarily called by Create when creating a new array.
1637 * We will then get add_to_super called for each component, and then
1638 * write_init_super called to write it out to each device.
1639 * For DDF, Create can create on fresh devices or on a pre-existing
1640 * array.
1641 * To create on a pre-existing array a different method will be called.
1642 * This one is just for fresh drives.
1643 *
1644 * We need to create the entire 'ddf' structure which includes:
1645 * DDF headers - these are easy.
1646 * Controller data - a Sector describing this controller .. not that
1647 * this is a controller exactly.
1648 * Physical Disk Record - one entry per device, so
1649 * leave plenty of space.
1650 * Virtual Disk Records - again, just leave plenty of space.
1651 * This just lists VDs, doesn't give details
1652 * Config records - describes the VDs that use this disk
1653 * DiskData - describes 'this' device.
1654 * BadBlockManagement - empty
1655 * Diag Space - empty
1656 * Vendor Logs - Could we put bitmaps here?
1657 *
1658 */
1659 struct ddf_super *ddf;
1660 char hostname[17];
1661 int hostlen;
1662 int max_phys_disks, max_virt_disks;
1663 unsigned long long sector;
1664 int clen;
1665 int i;
1666 int pdsize, vdsize;
1667 struct phys_disk *pd;
1668 struct virtual_disk *vd;
1669
1670 if (data_offset != INVALID_SECTORS) {
1671 fprintf(stderr, Name ": data-offset not supported by DDF\n");
1672 return 0;
1673 }
1674
1675 if (st->sb)
1676 return init_super_ddf_bvd(st, info, size, name, homehost, uuid,
1677 data_offset);
1678
1679 if (posix_memalign((void**)&ddf, 512, sizeof(*ddf)) != 0) {
1680 pr_err("%s could not allocate superblock\n", __func__);
1681 return 0;
1682 }
1683 memset(ddf, 0, sizeof(*ddf));
1684 ddf->dlist = NULL; /* no physical disks yet */
1685 ddf->conflist = NULL; /* No virtual disks yet */
1686 st->sb = ddf;
1687
1688 if (info == NULL) {
1689 /* zeroing superblock */
1690 return 0;
1691 }
1692
1693 /* At least 32MB *must* be reserved for the ddf. So let's just
1694 * start 32MB from the end, and put the primary header there.
1695 * Don't do secondary for now.
1696 * We don't know exactly where that will be yet as it could be
1697 * different on each device. To just set up the lengths.
1698 *
1699 */
1700
1701 ddf->anchor.magic = DDF_HEADER_MAGIC;
1702 make_header_guid(ddf->anchor.guid);
1703
1704 memcpy(ddf->anchor.revision, DDF_REVISION_2, 8);
1705 ddf->anchor.seq = __cpu_to_be32(1);
1706 ddf->anchor.timestamp = __cpu_to_be32(time(0) - DECADE);
1707 ddf->anchor.openflag = 0xFF;
1708 ddf->anchor.foreignflag = 0;
1709 ddf->anchor.enforcegroups = 0; /* Is this best?? */
1710 ddf->anchor.pad0 = 0xff;
1711 memset(ddf->anchor.pad1, 0xff, 12);
1712 memset(ddf->anchor.header_ext, 0xff, 32);
1713 ddf->anchor.primary_lba = ~(__u64)0;
1714 ddf->anchor.secondary_lba = ~(__u64)0;
1715 ddf->anchor.type = DDF_HEADER_ANCHOR;
1716 memset(ddf->anchor.pad2, 0xff, 3);
1717 ddf->anchor.workspace_len = __cpu_to_be32(32768); /* Must be reserved */
1718 ddf->anchor.workspace_lba = ~(__u64)0; /* Put this at bottom
1719 of 32M reserved.. */
1720 max_phys_disks = 1023; /* Should be enough */
1721 ddf->anchor.max_pd_entries = __cpu_to_be16(max_phys_disks);
1722 max_virt_disks = 255;
1723 ddf->anchor.max_vd_entries = __cpu_to_be16(max_virt_disks); /* ?? */
1724 ddf->anchor.max_partitions = __cpu_to_be16(64); /* ?? */
1725 ddf->max_part = 64;
1726 ddf->mppe = 256;
1727 ddf->conf_rec_len = 1 + ROUND_UP(ddf->mppe * (4+8), 512)/512;
1728 ddf->anchor.config_record_len = __cpu_to_be16(ddf->conf_rec_len);
1729 ddf->anchor.max_primary_element_entries = __cpu_to_be16(ddf->mppe);
1730 memset(ddf->anchor.pad3, 0xff, 54);
1731 /* controller sections is one sector long immediately
1732 * after the ddf header */
1733 sector = 1;
1734 ddf->anchor.controller_section_offset = __cpu_to_be32(sector);
1735 ddf->anchor.controller_section_length = __cpu_to_be32(1);
1736 sector += 1;
1737
1738 /* phys is 8 sectors after that */
1739 pdsize = ROUND_UP(sizeof(struct phys_disk) +
1740 sizeof(struct phys_disk_entry)*max_phys_disks,
1741 512);
1742 switch(pdsize/512) {
1743 case 2: case 8: case 32: case 128: case 512: break;
1744 default: abort();
1745 }
1746 ddf->anchor.phys_section_offset = __cpu_to_be32(sector);
1747 ddf->anchor.phys_section_length =
1748 __cpu_to_be32(pdsize/512); /* max_primary_element_entries/8 */
1749 sector += pdsize/512;
1750
1751 /* virt is another 32 sectors */
1752 vdsize = ROUND_UP(sizeof(struct virtual_disk) +
1753 sizeof(struct virtual_entry) * max_virt_disks,
1754 512);
1755 switch(vdsize/512) {
1756 case 2: case 8: case 32: case 128: case 512: break;
1757 default: abort();
1758 }
1759 ddf->anchor.virt_section_offset = __cpu_to_be32(sector);
1760 ddf->anchor.virt_section_length =
1761 __cpu_to_be32(vdsize/512); /* max_vd_entries/8 */
1762 sector += vdsize/512;
1763
1764 clen = ddf->conf_rec_len * (ddf->max_part+1);
1765 ddf->anchor.config_section_offset = __cpu_to_be32(sector);
1766 ddf->anchor.config_section_length = __cpu_to_be32(clen);
1767 sector += clen;
1768
1769 ddf->anchor.data_section_offset = __cpu_to_be32(sector);
1770 ddf->anchor.data_section_length = __cpu_to_be32(1);
1771 sector += 1;
1772
1773 ddf->anchor.bbm_section_length = __cpu_to_be32(0);
1774 ddf->anchor.bbm_section_offset = __cpu_to_be32(0xFFFFFFFF);
1775 ddf->anchor.diag_space_length = __cpu_to_be32(0);
1776 ddf->anchor.diag_space_offset = __cpu_to_be32(0xFFFFFFFF);
1777 ddf->anchor.vendor_length = __cpu_to_be32(0);
1778 ddf->anchor.vendor_offset = __cpu_to_be32(0xFFFFFFFF);
1779
1780 memset(ddf->anchor.pad4, 0xff, 256);
1781
1782 memcpy(&ddf->primary, &ddf->anchor, 512);
1783 memcpy(&ddf->secondary, &ddf->anchor, 512);
1784
1785 ddf->primary.openflag = 1; /* I guess.. */
1786 ddf->primary.type = DDF_HEADER_PRIMARY;
1787
1788 ddf->secondary.openflag = 1; /* I guess.. */
1789 ddf->secondary.type = DDF_HEADER_SECONDARY;
1790
1791 ddf->active = &ddf->primary;
1792
1793 ddf->controller.magic = DDF_CONTROLLER_MAGIC;
1794
1795 /* 24 more bytes of fiction required.
1796 * first 8 are a 'vendor-id' - "Linux-MD"
1797 * Remaining 16 are serial number.... maybe a hostname would do?
1798 */
1799 memcpy(ddf->controller.guid, T10, sizeof(T10));
1800 gethostname(hostname, sizeof(hostname));
1801 hostname[sizeof(hostname) - 1] = 0;
1802 hostlen = strlen(hostname);
1803 memcpy(ddf->controller.guid + 24 - hostlen, hostname, hostlen);
1804 for (i = strlen(T10) ; i+hostlen < 24; i++)
1805 ddf->controller.guid[i] = ' ';
1806
1807 ddf->controller.type.vendor_id = __cpu_to_be16(0xDEAD);
1808 ddf->controller.type.device_id = __cpu_to_be16(0xBEEF);
1809 ddf->controller.type.sub_vendor_id = 0;
1810 ddf->controller.type.sub_device_id = 0;
1811 memcpy(ddf->controller.product_id, "What Is My PID??", 16);
1812 memset(ddf->controller.pad, 0xff, 8);
1813 memset(ddf->controller.vendor_data, 0xff, 448);
1814 if (homehost && strlen(homehost) < 440)
1815 strcpy((char*)ddf->controller.vendor_data, homehost);
1816
1817 if (posix_memalign((void**)&pd, 512, pdsize) != 0) {
1818 pr_err("%s could not allocate pd\n", __func__);
1819 return 0;
1820 }
1821 ddf->phys = pd;
1822 ddf->pdsize = pdsize;
1823
1824 memset(pd, 0xff, pdsize);
1825 memset(pd, 0, sizeof(*pd));
1826 pd->magic = DDF_PHYS_RECORDS_MAGIC;
1827 pd->used_pdes = __cpu_to_be16(0);
1828 pd->max_pdes = __cpu_to_be16(max_phys_disks);
1829 memset(pd->pad, 0xff, 52);
1830
1831 if (posix_memalign((void**)&vd, 512, vdsize) != 0) {
1832 pr_err("%s could not allocate vd\n", __func__);
1833 return 0;
1834 }
1835 ddf->virt = vd;
1836 ddf->vdsize = vdsize;
1837 memset(vd, 0, vdsize);
1838 vd->magic = DDF_VIRT_RECORDS_MAGIC;
1839 vd->populated_vdes = __cpu_to_be16(0);
1840 vd->max_vdes = __cpu_to_be16(max_virt_disks);
1841 memset(vd->pad, 0xff, 52);
1842
1843 for (i=0; i<max_virt_disks; i++)
1844 memset(&vd->entries[i], 0xff, sizeof(struct virtual_entry));
1845
1846 st->sb = ddf;
1847 ddf->updates_pending = 1;
1848 return 1;
1849 }
1850
1851 static int chunk_to_shift(int chunksize)
1852 {
1853 return ffs(chunksize/512)-1;
1854 }
1855
1856 static int level_to_prl(int level)
1857 {
1858 switch (level) {
1859 case LEVEL_LINEAR: return DDF_CONCAT;
1860 case 0: return DDF_RAID0;
1861 case 1: return DDF_RAID1;
1862 case 4: return DDF_RAID4;
1863 case 5: return DDF_RAID5;
1864 case 6: return DDF_RAID6;
1865 default: return -1;
1866 }
1867 }
1868
1869 static int layout_to_rlq(int level, int layout, int raiddisks)
1870 {
1871 switch(level) {
1872 case 0:
1873 return DDF_RAID0_SIMPLE;
1874 case 1:
1875 switch(raiddisks) {
1876 case 2: return DDF_RAID1_SIMPLE;
1877 case 3: return DDF_RAID1_MULTI;
1878 default: return -1;
1879 }
1880 case 4:
1881 switch(layout) {
1882 case 0: return DDF_RAID4_N;
1883 }
1884 break;
1885 case 5:
1886 switch(layout) {
1887 case ALGORITHM_LEFT_ASYMMETRIC:
1888 return DDF_RAID5_N_RESTART;
1889 case ALGORITHM_RIGHT_ASYMMETRIC:
1890 return DDF_RAID5_0_RESTART;
1891 case ALGORITHM_LEFT_SYMMETRIC:
1892 return DDF_RAID5_N_CONTINUE;
1893 case ALGORITHM_RIGHT_SYMMETRIC:
1894 return -1; /* not mentioned in standard */
1895 }
1896 case 6:
1897 switch(layout) {
1898 case ALGORITHM_ROTATING_N_RESTART:
1899 return DDF_RAID5_N_RESTART;
1900 case ALGORITHM_ROTATING_ZERO_RESTART:
1901 return DDF_RAID6_0_RESTART;
1902 case ALGORITHM_ROTATING_N_CONTINUE:
1903 return DDF_RAID5_N_CONTINUE;
1904 }
1905 }
1906 return -1;
1907 }
1908
1909 static int rlq_to_layout(int rlq, int prl, int raiddisks)
1910 {
1911 switch(prl) {
1912 case DDF_RAID0:
1913 return 0; /* hopefully rlq == DDF_RAID0_SIMPLE */
1914 case DDF_RAID1:
1915 return 0; /* hopefully rlq == SIMPLE or MULTI depending
1916 on raiddisks*/
1917 case DDF_RAID4:
1918 switch(rlq) {
1919 case DDF_RAID4_N:
1920 return 0;
1921 default:
1922 /* not supported */
1923 return -1; /* FIXME this isn't checked */
1924 }
1925 case DDF_RAID5:
1926 switch(rlq) {
1927 case DDF_RAID5_N_RESTART:
1928 return ALGORITHM_LEFT_ASYMMETRIC;
1929 case DDF_RAID5_0_RESTART:
1930 return ALGORITHM_RIGHT_ASYMMETRIC;
1931 case DDF_RAID5_N_CONTINUE:
1932 return ALGORITHM_LEFT_SYMMETRIC;
1933 default:
1934 return -1;
1935 }
1936 case DDF_RAID6:
1937 switch(rlq) {
1938 case DDF_RAID5_N_RESTART:
1939 return ALGORITHM_ROTATING_N_RESTART;
1940 case DDF_RAID6_0_RESTART:
1941 return ALGORITHM_ROTATING_ZERO_RESTART;
1942 case DDF_RAID5_N_CONTINUE:
1943 return ALGORITHM_ROTATING_N_CONTINUE;
1944 default:
1945 return -1;
1946 }
1947 }
1948 return -1;
1949 }
1950
1951 #ifndef MDASSEMBLE
1952 struct extent {
1953 unsigned long long start, size;
1954 };
1955 static int cmp_extent(const void *av, const void *bv)
1956 {
1957 const struct extent *a = av;
1958 const struct extent *b = bv;
1959 if (a->start < b->start)
1960 return -1;
1961 if (a->start > b->start)
1962 return 1;
1963 return 0;
1964 }
1965
1966 static struct extent *get_extents(struct ddf_super *ddf, struct dl *dl)
1967 {
1968 /* find a list of used extents on the give physical device
1969 * (dnum) of the given ddf.
1970 * Return a malloced array of 'struct extent'
1971
1972 * FIXME ignore DDF_Legacy devices?
1973
1974 */
1975 struct extent *rv;
1976 int n = 0;
1977 unsigned int i, j;
1978
1979 rv = xmalloc(sizeof(struct extent) * (ddf->max_part + 2));
1980
1981 for (i = 0; i < ddf->max_part; i++) {
1982 struct vcl *v = dl->vlist[i];
1983 if (v == NULL)
1984 continue;
1985 for (j = 0; j < v->conf.prim_elmnt_count; j++)
1986 if (v->conf.phys_refnum[j] == dl->disk.refnum) {
1987 /* This device plays role 'j' in 'v'. */
1988 rv[n].start = __be64_to_cpu(v->lba_offset[j]);
1989 rv[n].size = __be64_to_cpu(v->conf.blocks);
1990 n++;
1991 break;
1992 }
1993 }
1994 qsort(rv, n, sizeof(*rv), cmp_extent);
1995
1996 rv[n].start = __be64_to_cpu(ddf->phys->entries[dl->pdnum].config_size);
1997 rv[n].size = 0;
1998 return rv;
1999 }
2000 #endif
2001
2002 static int init_super_ddf_bvd(struct supertype *st,
2003 mdu_array_info_t *info,
2004 unsigned long long size,
2005 char *name, char *homehost,
2006 int *uuid, unsigned long long data_offset)
2007 {
2008 /* We are creating a BVD inside a pre-existing container.
2009 * so st->sb is already set.
2010 * We need to create a new vd_config and a new virtual_entry
2011 */
2012 struct ddf_super *ddf = st->sb;
2013 unsigned int venum;
2014 struct virtual_entry *ve;
2015 struct vcl *vcl;
2016 struct vd_config *vc;
2017
2018 if (__be16_to_cpu(ddf->virt->populated_vdes)
2019 >= __be16_to_cpu(ddf->virt->max_vdes)) {
2020 pr_err("This ddf already has the "
2021 "maximum of %d virtual devices\n",
2022 __be16_to_cpu(ddf->virt->max_vdes));
2023 return 0;
2024 }
2025
2026 if (name)
2027 for (venum = 0; venum < __be16_to_cpu(ddf->virt->max_vdes); venum++)
2028 if (!all_ff(ddf->virt->entries[venum].guid)) {
2029 char *n = ddf->virt->entries[venum].name;
2030
2031 if (strncmp(name, n, 16) == 0) {
2032 pr_err("This ddf already"
2033 " has an array called %s\n",
2034 name);
2035 return 0;
2036 }
2037 }
2038
2039 for (venum = 0; venum < __be16_to_cpu(ddf->virt->max_vdes); venum++)
2040 if (all_ff(ddf->virt->entries[venum].guid))
2041 break;
2042 if (venum == __be16_to_cpu(ddf->virt->max_vdes)) {
2043 pr_err("Cannot find spare slot for "
2044 "virtual disk - DDF is corrupt\n");
2045 return 0;
2046 }
2047 ve = &ddf->virt->entries[venum];
2048
2049 /* A Virtual Disk GUID contains the T10 Vendor ID, controller type,
2050 * timestamp, random number
2051 */
2052 make_header_guid(ve->guid);
2053 ve->unit = __cpu_to_be16(info->md_minor);
2054 ve->pad0 = 0xFFFF;
2055 ve->guid_crc = crc32(0, (unsigned char*)ddf->anchor.guid, DDF_GUID_LEN);
2056 ve->type = 0;
2057 ve->state = DDF_state_degraded; /* Will be modified as devices are added */
2058 if (info->state & 1) /* clean */
2059 ve->init_state = DDF_init_full;
2060 else
2061 ve->init_state = DDF_init_not;
2062
2063 memset(ve->pad1, 0xff, 14);
2064 memset(ve->name, ' ', 16);
2065 if (name)
2066 strncpy(ve->name, name, 16);
2067 ddf->virt->populated_vdes =
2068 __cpu_to_be16(__be16_to_cpu(ddf->virt->populated_vdes)+1);
2069
2070 /* Now create a new vd_config */
2071 if (posix_memalign((void**)&vcl, 512,
2072 (offsetof(struct vcl, conf) + ddf->conf_rec_len * 512)) != 0) {
2073 pr_err("%s could not allocate vd_config\n", __func__);
2074 return 0;
2075 }
2076 vcl->lba_offset = (__u64*) &vcl->conf.phys_refnum[ddf->mppe];
2077 vcl->vcnum = venum;
2078 vcl->block_sizes = NULL; /* FIXME not for CONCAT */
2079 vcl->other_bvds = NULL;
2080
2081 vc = &vcl->conf;
2082
2083 vc->magic = DDF_VD_CONF_MAGIC;
2084 memcpy(vc->guid, ve->guid, DDF_GUID_LEN);
2085 vc->timestamp = __cpu_to_be32(time(0)-DECADE);
2086 vc->seqnum = __cpu_to_be32(1);
2087 memset(vc->pad0, 0xff, 24);
2088 vc->prim_elmnt_count = __cpu_to_be16(info->raid_disks);
2089 vc->chunk_shift = chunk_to_shift(info->chunk_size);
2090 vc->prl = level_to_prl(info->level);
2091 vc->rlq = layout_to_rlq(info->level, info->layout, info->raid_disks);
2092 vc->sec_elmnt_count = 1;
2093 vc->sec_elmnt_seq = 0;
2094 vc->srl = 0;
2095 vc->blocks = __cpu_to_be64(info->size * 2);
2096 vc->array_blocks = __cpu_to_be64(
2097 calc_array_size(info->level, info->raid_disks, info->layout,
2098 info->chunk_size, info->size*2));
2099 memset(vc->pad1, 0xff, 8);
2100 vc->spare_refs[0] = 0xffffffff;
2101 vc->spare_refs[1] = 0xffffffff;
2102 vc->spare_refs[2] = 0xffffffff;
2103 vc->spare_refs[3] = 0xffffffff;
2104 vc->spare_refs[4] = 0xffffffff;
2105 vc->spare_refs[5] = 0xffffffff;
2106 vc->spare_refs[6] = 0xffffffff;
2107 vc->spare_refs[7] = 0xffffffff;
2108 memset(vc->cache_pol, 0, 8);
2109 vc->bg_rate = 0x80;
2110 memset(vc->pad2, 0xff, 3);
2111 memset(vc->pad3, 0xff, 52);
2112 memset(vc->pad4, 0xff, 192);
2113 memset(vc->v0, 0xff, 32);
2114 memset(vc->v1, 0xff, 32);
2115 memset(vc->v2, 0xff, 16);
2116 memset(vc->v3, 0xff, 16);
2117 memset(vc->vendor, 0xff, 32);
2118
2119 memset(vc->phys_refnum, 0xff, 4*ddf->mppe);
2120 memset(vc->phys_refnum+ddf->mppe, 0x00, 8*ddf->mppe);
2121
2122 vcl->next = ddf->conflist;
2123 ddf->conflist = vcl;
2124 ddf->currentconf = vcl;
2125 ddf->updates_pending = 1;
2126 return 1;
2127 }
2128
2129 #ifndef MDASSEMBLE
2130 static void add_to_super_ddf_bvd(struct supertype *st,
2131 mdu_disk_info_t *dk, int fd, char *devname)
2132 {
2133 /* fd and devname identify a device with-in the ddf container (st).
2134 * dk identifies a location in the new BVD.
2135 * We need to find suitable free space in that device and update
2136 * the phys_refnum and lba_offset for the newly created vd_config.
2137 * We might also want to update the type in the phys_disk
2138 * section.
2139 *
2140 * Alternately: fd == -1 and we have already chosen which device to
2141 * use and recorded in dlist->raid_disk;
2142 */
2143 struct dl *dl;
2144 struct ddf_super *ddf = st->sb;
2145 struct vd_config *vc;
2146 __u64 *lba_offset;
2147 unsigned int working;
2148 unsigned int i;
2149 unsigned long long blocks, pos, esize;
2150 struct extent *ex;
2151
2152 if (fd == -1) {
2153 for (dl = ddf->dlist; dl ; dl = dl->next)
2154 if (dl->raiddisk == dk->raid_disk)
2155 break;
2156 } else {
2157 for (dl = ddf->dlist; dl ; dl = dl->next)
2158 if (dl->major == dk->major &&
2159 dl->minor == dk->minor)
2160 break;
2161 }
2162 if (!dl || ! (dk->state & (1<<MD_DISK_SYNC)))
2163 return;
2164
2165 vc = &ddf->currentconf->conf;
2166 lba_offset = ddf->currentconf->lba_offset;
2167
2168 ex = get_extents(ddf, dl);
2169 if (!ex)
2170 return;
2171
2172 i = 0; pos = 0;
2173 blocks = __be64_to_cpu(vc->blocks);
2174 if (ddf->currentconf->block_sizes)
2175 blocks = ddf->currentconf->block_sizes[dk->raid_disk];
2176
2177 do {
2178 esize = ex[i].start - pos;
2179 if (esize >= blocks)
2180 break;
2181 pos = ex[i].start + ex[i].size;
2182 i++;
2183 } while (ex[i-1].size);
2184
2185 free(ex);
2186 if (esize < blocks)
2187 return;
2188
2189 ddf->currentdev = dk->raid_disk;
2190 vc->phys_refnum[dk->raid_disk] = dl->disk.refnum;
2191 lba_offset[dk->raid_disk] = __cpu_to_be64(pos);
2192
2193 for (i = 0; i < ddf->max_part ; i++)
2194 if (dl->vlist[i] == NULL)
2195 break;
2196 if (i == ddf->max_part)
2197 return;
2198 dl->vlist[i] = ddf->currentconf;
2199
2200 if (fd >= 0)
2201 dl->fd = fd;
2202 if (devname)
2203 dl->devname = devname;
2204
2205 /* Check how many working raid_disks, and if we can mark
2206 * array as optimal yet
2207 */
2208 working = 0;
2209
2210 for (i = 0; i < __be16_to_cpu(vc->prim_elmnt_count); i++)
2211 if (vc->phys_refnum[i] != 0xffffffff)
2212 working++;
2213
2214 /* Find which virtual_entry */
2215 i = ddf->currentconf->vcnum;
2216 if (working == __be16_to_cpu(vc->prim_elmnt_count))
2217 ddf->virt->entries[i].state =
2218 (ddf->virt->entries[i].state & ~DDF_state_mask)
2219 | DDF_state_optimal;
2220
2221 if (vc->prl == DDF_RAID6 &&
2222 working+1 == __be16_to_cpu(vc->prim_elmnt_count))
2223 ddf->virt->entries[i].state =
2224 (ddf->virt->entries[i].state & ~DDF_state_mask)
2225 | DDF_state_part_optimal;
2226
2227 ddf->phys->entries[dl->pdnum].type &= ~__cpu_to_be16(DDF_Global_Spare);
2228 ddf->phys->entries[dl->pdnum].type |= __cpu_to_be16(DDF_Active_in_VD);
2229 ddf->updates_pending = 1;
2230 }
2231
2232 /* add a device to a container, either while creating it or while
2233 * expanding a pre-existing container
2234 */
2235 static int add_to_super_ddf(struct supertype *st,
2236 mdu_disk_info_t *dk, int fd, char *devname,
2237 unsigned long long data_offset)
2238 {
2239 struct ddf_super *ddf = st->sb;
2240 struct dl *dd;
2241 time_t now;
2242 struct tm *tm;
2243 unsigned long long size;
2244 struct phys_disk_entry *pde;
2245 unsigned int n, i;
2246 struct stat stb;
2247 __u32 *tptr;
2248
2249 if (ddf->currentconf) {
2250 add_to_super_ddf_bvd(st, dk, fd, devname);
2251 return 0;
2252 }
2253
2254 /* This is device numbered dk->number. We need to create
2255 * a phys_disk entry and a more detailed disk_data entry.
2256 */
2257 fstat(fd, &stb);
2258 if (posix_memalign((void**)&dd, 512,
2259 sizeof(*dd) + sizeof(dd->vlist[0]) * ddf->max_part) != 0) {
2260 pr_err("%s could allocate buffer for new disk, aborting\n",
2261 __func__);
2262 return 1;
2263 }
2264 dd->major = major(stb.st_rdev);
2265 dd->minor = minor(stb.st_rdev);
2266 dd->devname = devname;
2267 dd->fd = fd;
2268 dd->spare = NULL;
2269
2270 dd->disk.magic = DDF_PHYS_DATA_MAGIC;
2271 now = time(0);
2272 tm = localtime(&now);
2273 sprintf(dd->disk.guid, "%8s%04d%02d%02d",
2274 T10, tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday);
2275 tptr = (__u32 *)(dd->disk.guid + 16);
2276 *tptr++ = random32();
2277 *tptr = random32();
2278
2279 do {
2280 /* Cannot be bothered finding a CRC of some irrelevant details*/
2281 dd->disk.refnum = random32();
2282 for (i = __be16_to_cpu(ddf->active->max_pd_entries);
2283 i > 0; i--)
2284 if (ddf->phys->entries[i-1].refnum == dd->disk.refnum)
2285 break;
2286 } while (i > 0);
2287
2288 dd->disk.forced_ref = 1;
2289 dd->disk.forced_guid = 1;
2290 memset(dd->disk.vendor, ' ', 32);
2291 memcpy(dd->disk.vendor, "Linux", 5);
2292 memset(dd->disk.pad, 0xff, 442);
2293 for (i = 0; i < ddf->max_part ; i++)
2294 dd->vlist[i] = NULL;
2295
2296 n = __be16_to_cpu(ddf->phys->used_pdes);
2297 pde = &ddf->phys->entries[n];
2298 dd->pdnum = n;
2299
2300 if (st->update_tail) {
2301 int len = (sizeof(struct phys_disk) +
2302 sizeof(struct phys_disk_entry));
2303 struct phys_disk *pd;
2304
2305 pd = xmalloc(len);
2306 pd->magic = DDF_PHYS_RECORDS_MAGIC;
2307 pd->used_pdes = __cpu_to_be16(n);
2308 pde = &pd->entries[0];
2309 dd->mdupdate = pd;
2310 } else {
2311 n++;
2312 ddf->phys->used_pdes = __cpu_to_be16(n);
2313 }
2314
2315 memcpy(pde->guid, dd->disk.guid, DDF_GUID_LEN);
2316 pde->refnum = dd->disk.refnum;
2317 pde->type = __cpu_to_be16(DDF_Forced_PD_GUID | DDF_Global_Spare);
2318 pde->state = __cpu_to_be16(DDF_Online);
2319 get_dev_size(fd, NULL, &size);
2320 /* We are required to reserve 32Meg, and record the size in sectors */
2321 pde->config_size = __cpu_to_be64( (size - 32*1024*1024) / 512);
2322 sprintf(pde->path, "%17.17s","Information: nil") ;
2323 memset(pde->pad, 0xff, 6);
2324
2325 dd->size = size >> 9;
2326 if (st->update_tail) {
2327 dd->next = ddf->add_list;
2328 ddf->add_list = dd;
2329 } else {
2330 dd->next = ddf->dlist;
2331 ddf->dlist = dd;
2332 ddf->updates_pending = 1;
2333 }
2334
2335 return 0;
2336 }
2337
2338 static int remove_from_super_ddf(struct supertype *st, mdu_disk_info_t *dk)
2339 {
2340 struct ddf_super *ddf = st->sb;
2341 struct dl *dl;
2342
2343 /* mdmon has noticed that this disk (dk->major/dk->minor) has
2344 * disappeared from the container.
2345 * We need to arrange that it disappears from the metadata and
2346 * internal data structures too.
2347 * Most of the work is done by ddf_process_update which edits
2348 * the metadata and closes the file handle and attaches the memory
2349 * where free_updates will free it.
2350 */
2351 for (dl = ddf->dlist; dl ; dl = dl->next)
2352 if (dl->major == dk->major &&
2353 dl->minor == dk->minor)
2354 break;
2355 if (!dl)
2356 return -1;
2357
2358 if (st->update_tail) {
2359 int len = (sizeof(struct phys_disk) +
2360 sizeof(struct phys_disk_entry));
2361 struct phys_disk *pd;
2362
2363 pd = xmalloc(len);
2364 pd->magic = DDF_PHYS_RECORDS_MAGIC;
2365 pd->used_pdes = __cpu_to_be16(dl->pdnum);
2366 pd->entries[0].state = __cpu_to_be16(DDF_Missing);
2367 append_metadata_update(st, pd, len);
2368 }
2369 return 0;
2370 }
2371
2372 /*
2373 * This is the write_init_super method for a ddf container. It is
2374 * called when creating a container or adding another device to a
2375 * container.
2376 */
2377 #define NULL_CONF_SZ 4096
2378
2379 static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
2380 __u32 refnum, unsigned int nmax,
2381 const struct vd_config **bvd,
2382 unsigned int *idx);
2383
2384 static int __write_ddf_structure(struct dl *d, struct ddf_super *ddf, __u8 type,
2385 char *null_aligned)
2386 {
2387 unsigned long long sector;
2388 struct ddf_header *header;
2389 int fd, i, n_config, conf_size;
2390
2391 fd = d->fd;
2392
2393 switch (type) {
2394 case DDF_HEADER_PRIMARY:
2395 header = &ddf->primary;
2396 sector = __be64_to_cpu(header->primary_lba);
2397 break;
2398 case DDF_HEADER_SECONDARY:
2399 header = &ddf->secondary;
2400 sector = __be64_to_cpu(header->secondary_lba);
2401 break;
2402 default:
2403 return 0;
2404 }
2405
2406 header->type = type;
2407 header->openflag = 0;
2408 header->crc = calc_crc(header, 512);
2409
2410 lseek64(fd, sector<<9, 0);
2411 if (write(fd, header, 512) < 0)
2412 return 0;
2413
2414 ddf->controller.crc = calc_crc(&ddf->controller, 512);
2415 if (write(fd, &ddf->controller, 512) < 0)
2416 return 0;
2417
2418 ddf->phys->crc = calc_crc(ddf->phys, ddf->pdsize);
2419 if (write(fd, ddf->phys, ddf->pdsize) < 0)
2420 return 0;
2421 ddf->virt->crc = calc_crc(ddf->virt, ddf->vdsize);
2422 if (write(fd, ddf->virt, ddf->vdsize) < 0)
2423 return 0;
2424
2425 /* Now write lots of config records. */
2426 n_config = ddf->max_part;
2427 conf_size = ddf->conf_rec_len * 512;
2428 for (i = 0 ; i <= n_config ; i++) {
2429 struct vcl *c;
2430 struct vd_config *vdc = NULL;
2431 if (i == n_config) {
2432 c = (struct vcl *)d->spare;
2433 if (c)
2434 vdc = &c->conf;
2435 } else {
2436 unsigned int dummy;
2437 c = d->vlist[i];
2438 if (c)
2439 get_pd_index_from_refnum(
2440 c, d->disk.refnum,
2441 ddf->mppe,
2442 (const struct vd_config **)&vdc,
2443 &dummy);
2444 }
2445 if (c) {
2446 vdc->seqnum = header->seq;
2447 vdc->crc = calc_crc(vdc, conf_size);
2448 if (write(fd, vdc, conf_size) < 0)
2449 break;
2450 } else {
2451 unsigned int togo = conf_size;
2452 while (togo > NULL_CONF_SZ) {
2453 if (write(fd, null_aligned, NULL_CONF_SZ) < 0)
2454 break;
2455 togo -= NULL_CONF_SZ;
2456 }
2457 if (write(fd, null_aligned, togo) < 0)
2458 break;
2459 }
2460 }
2461 if (i <= n_config)
2462 return 0;
2463
2464 d->disk.crc = calc_crc(&d->disk, 512);
2465 if (write(fd, &d->disk, 512) < 0)
2466 return 0;
2467
2468 return 1;
2469 }
2470
2471 static int __write_init_super_ddf(struct supertype *st)
2472 {
2473 struct ddf_super *ddf = st->sb;
2474 struct dl *d;
2475 int attempts = 0;
2476 int successes = 0;
2477 unsigned long long size;
2478 char *null_aligned;
2479 __u32 seq;
2480
2481 if (posix_memalign((void**)&null_aligned, 4096, NULL_CONF_SZ) != 0) {
2482 return -ENOMEM;
2483 }
2484 memset(null_aligned, 0xff, NULL_CONF_SZ);
2485
2486 if (ddf->primary.seq != 0xffffffff)
2487 seq = __cpu_to_be32(__be32_to_cpu(ddf->primary.seq)+1);
2488 else if (ddf->secondary.seq != 0xffffffff)
2489 seq = __cpu_to_be32(__be32_to_cpu(ddf->secondary.seq)+1);
2490 else
2491 seq = __cpu_to_be32(1);
2492
2493 /* try to write updated metadata,
2494 * if we catch a failure move on to the next disk
2495 */
2496 for (d = ddf->dlist; d; d=d->next) {
2497 int fd = d->fd;
2498
2499 if (fd < 0)
2500 continue;
2501
2502 attempts++;
2503 /* We need to fill in the primary, (secondary) and workspace
2504 * lba's in the headers, set their checksums,
2505 * Also checksum phys, virt....
2506 *
2507 * Then write everything out, finally the anchor is written.
2508 */
2509 get_dev_size(fd, NULL, &size);
2510 size /= 512;
2511 if (d->workspace_lba != 0)
2512 ddf->anchor.workspace_lba = d->workspace_lba;
2513 else
2514 ddf->anchor.workspace_lba =
2515 __cpu_to_be64(size - 32*1024*2);
2516 if (d->primary_lba != 0)
2517 ddf->anchor.primary_lba = d->primary_lba;
2518 else
2519 ddf->anchor.primary_lba =
2520 __cpu_to_be64(size - 16*1024*2);
2521 if (d->secondary_lba != 0)
2522 ddf->anchor.secondary_lba = d->secondary_lba;
2523 else
2524 ddf->anchor.secondary_lba =
2525 __cpu_to_be64(size - 32*1024*2);
2526 ddf->anchor.seq = seq;
2527 memcpy(&ddf->primary, &ddf->anchor, 512);
2528 memcpy(&ddf->secondary, &ddf->anchor, 512);
2529
2530 ddf->anchor.openflag = 0xFF; /* 'open' means nothing */
2531 ddf->anchor.seq = 0xFFFFFFFF; /* no sequencing in anchor */
2532 ddf->anchor.crc = calc_crc(&ddf->anchor, 512);
2533
2534 if (!__write_ddf_structure(d, ddf, DDF_HEADER_PRIMARY,
2535 null_aligned))
2536 continue;
2537
2538 if (!__write_ddf_structure(d, ddf, DDF_HEADER_SECONDARY,
2539 null_aligned))
2540 continue;
2541
2542 lseek64(fd, (size-1)*512, SEEK_SET);
2543 if (write(fd, &ddf->anchor, 512) < 0)
2544 continue;
2545 successes++;
2546 }
2547 free(null_aligned);
2548
2549 return attempts != successes;
2550 }
2551
2552 static int write_init_super_ddf(struct supertype *st)
2553 {
2554 struct ddf_super *ddf = st->sb;
2555 struct vcl *currentconf = ddf->currentconf;
2556
2557 /* we are done with currentconf reset it to point st at the container */
2558 ddf->currentconf = NULL;
2559
2560 if (st->update_tail) {
2561 /* queue the virtual_disk and vd_config as metadata updates */
2562 struct virtual_disk *vd;
2563 struct vd_config *vc;
2564 int len;
2565
2566 if (!currentconf) {
2567 int len = (sizeof(struct phys_disk) +
2568 sizeof(struct phys_disk_entry));
2569
2570 /* adding a disk to the container. */
2571 if (!ddf->add_list)
2572 return 0;
2573
2574 append_metadata_update(st, ddf->add_list->mdupdate, len);
2575 ddf->add_list->mdupdate = NULL;
2576 return 0;
2577 }
2578
2579 /* Newly created VD */
2580
2581 /* First the virtual disk. We have a slightly fake header */
2582 len = sizeof(struct virtual_disk) + sizeof(struct virtual_entry);
2583 vd = xmalloc(len);
2584 *vd = *ddf->virt;
2585 vd->entries[0] = ddf->virt->entries[currentconf->vcnum];
2586 vd->populated_vdes = __cpu_to_be16(currentconf->vcnum);
2587 append_metadata_update(st, vd, len);
2588
2589 /* Then the vd_config */
2590 len = ddf->conf_rec_len * 512;
2591 vc = xmalloc(len);
2592 memcpy(vc, &currentconf->conf, len);
2593 append_metadata_update(st, vc, len);
2594
2595 /* FIXME I need to close the fds! */
2596 return 0;
2597 } else {
2598 struct dl *d;
2599 for (d = ddf->dlist; d; d=d->next)
2600 while (Kill(d->devname, NULL, 0, -1, 1) == 0);
2601 return __write_init_super_ddf(st);
2602 }
2603 }
2604
2605 #endif
2606
2607 static __u64 avail_size_ddf(struct supertype *st, __u64 devsize,
2608 unsigned long long data_offset)
2609 {
2610 /* We must reserve the last 32Meg */
2611 if (devsize <= 32*1024*2)
2612 return 0;
2613 return devsize - 32*1024*2;
2614 }
2615
2616 #ifndef MDASSEMBLE
2617
2618 static int reserve_space(struct supertype *st, int raiddisks,
2619 unsigned long long size, int chunk,
2620 unsigned long long *freesize)
2621 {
2622 /* Find 'raiddisks' spare extents at least 'size' big (but
2623 * only caring about multiples of 'chunk') and remember
2624 * them.
2625 * If the cannot be found, fail.
2626 */
2627 struct dl *dl;
2628 struct ddf_super *ddf = st->sb;
2629 int cnt = 0;
2630
2631 for (dl = ddf->dlist; dl ; dl=dl->next) {
2632 dl->raiddisk = -1;
2633 dl->esize = 0;
2634 }
2635 /* Now find largest extent on each device */
2636 for (dl = ddf->dlist ; dl ; dl=dl->next) {
2637 struct extent *e = get_extents(ddf, dl);
2638 unsigned long long pos = 0;
2639 int i = 0;
2640 int found = 0;
2641 unsigned long long minsize = size;
2642
2643 if (size == 0)
2644 minsize = chunk;
2645
2646 if (!e)
2647 continue;
2648 do {
2649 unsigned long long esize;
2650 esize = e[i].start - pos;
2651 if (esize >= minsize) {
2652 found = 1;
2653 minsize = esize;
2654 }
2655 pos = e[i].start + e[i].size;
2656 i++;
2657 } while (e[i-1].size);
2658 if (found) {
2659 cnt++;
2660 dl->esize = minsize;
2661 }
2662 free(e);
2663 }
2664 if (cnt < raiddisks) {
2665 pr_err("not enough devices with space to create array.\n");
2666 return 0; /* No enough free spaces large enough */
2667 }
2668 if (size == 0) {
2669 /* choose the largest size of which there are at least 'raiddisk' */
2670 for (dl = ddf->dlist ; dl ; dl=dl->next) {
2671 struct dl *dl2;
2672 if (dl->esize <= size)
2673 continue;
2674 /* This is bigger than 'size', see if there are enough */
2675 cnt = 0;
2676 for (dl2 = ddf->dlist; dl2 ; dl2=dl2->next)
2677 if (dl2->esize >= dl->esize)
2678 cnt++;
2679 if (cnt >= raiddisks)
2680 size = dl->esize;
2681 }
2682 if (chunk) {
2683 size = size / chunk;
2684 size *= chunk;
2685 }
2686 *freesize = size;
2687 if (size < 32) {
2688 pr_err("not enough spare devices to create array.\n");
2689 return 0;
2690 }
2691 }
2692 /* We have a 'size' of which there are enough spaces.
2693 * We simply do a first-fit */
2694 cnt = 0;
2695 for (dl = ddf->dlist ; dl && cnt < raiddisks ; dl=dl->next) {
2696 if (dl->esize < size)
2697 continue;
2698
2699 dl->raiddisk = cnt;
2700 cnt++;
2701 }
2702 return 1;
2703 }
2704
2705 static int
2706 validate_geometry_ddf_container(struct supertype *st,
2707 int level, int layout, int raiddisks,
2708 int chunk, unsigned long long size,
2709 unsigned long long data_offset,
2710 char *dev, unsigned long long *freesize,
2711 int verbose);
2712
2713 static int validate_geometry_ddf_bvd(struct supertype *st,
2714 int level, int layout, int raiddisks,
2715 int *chunk, unsigned long long size,
2716 unsigned long long data_offset,
2717 char *dev, unsigned long long *freesize,
2718 int verbose);
2719
2720 static int validate_geometry_ddf(struct supertype *st,
2721 int level, int layout, int raiddisks,
2722 int *chunk, unsigned long long size,
2723 unsigned long long data_offset,
2724 char *dev, unsigned long long *freesize,
2725 int verbose)
2726 {
2727 int fd;
2728 struct mdinfo *sra;
2729 int cfd;
2730
2731 /* ddf potentially supports lots of things, but it depends on
2732 * what devices are offered (and maybe kernel version?)
2733 * If given unused devices, we will make a container.
2734 * If given devices in a container, we will make a BVD.
2735 * If given BVDs, we make an SVD, changing all the GUIDs in the process.
2736 */
2737
2738 if (chunk && *chunk == UnSet)
2739 *chunk = DEFAULT_CHUNK;
2740
2741 if (level == -1000000) level = LEVEL_CONTAINER;
2742 if (level == LEVEL_CONTAINER) {
2743 /* Must be a fresh device to add to a container */
2744 return validate_geometry_ddf_container(st, level, layout,
2745 raiddisks, chunk?*chunk:0,
2746 size, data_offset, dev,
2747 freesize,
2748 verbose);
2749 }
2750
2751 if (!dev) {
2752 /* Initial sanity check. Exclude illegal levels. */
2753 int i;
2754 for (i=0; ddf_level_num[i].num1 != MAXINT; i++)
2755 if (ddf_level_num[i].num2 == level)
2756 break;
2757 if (ddf_level_num[i].num1 == MAXINT) {
2758 if (verbose)
2759 pr_err("DDF does not support level %d arrays\n",
2760 level);
2761 return 0;
2762 }
2763 /* Should check layout? etc */
2764
2765 if (st->sb && freesize) {
2766 /* --create was given a container to create in.
2767 * So we need to check that there are enough
2768 * free spaces and return the amount of space.
2769 * We may as well remember which drives were
2770 * chosen so that add_to_super/getinfo_super
2771 * can return them.
2772 */
2773 return reserve_space(st, raiddisks, size, chunk?*chunk:0, freesize);
2774 }
2775 return 1;
2776 }
2777
2778 if (st->sb) {
2779 /* A container has already been opened, so we are
2780 * creating in there. Maybe a BVD, maybe an SVD.
2781 * Should make a distinction one day.
2782 */
2783 return validate_geometry_ddf_bvd(st, level, layout, raiddisks,
2784 chunk, size, data_offset, dev,
2785 freesize,
2786 verbose);
2787 }
2788 /* This is the first device for the array.
2789 * If it is a container, we read it in and do automagic allocations,
2790 * no other devices should be given.
2791 * Otherwise it must be a member device of a container, and we
2792 * do manual allocation.
2793 * Later we should check for a BVD and make an SVD.
2794 */
2795 fd = open(dev, O_RDONLY|O_EXCL, 0);
2796 if (fd >= 0) {
2797 sra = sysfs_read(fd, NULL, GET_VERSION);
2798 close(fd);
2799 if (sra && sra->array.major_version == -1 &&
2800 strcmp(sra->text_version, "ddf") == 0) {
2801
2802 /* load super */
2803 /* find space for 'n' devices. */
2804 /* remember the devices */
2805 /* Somehow return the fact that we have enough */
2806 }
2807
2808 if (verbose)
2809 pr_err("ddf: Cannot create this array "
2810 "on device %s - a container is required.\n",
2811 dev);
2812 return 0;
2813 }
2814 if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
2815 if (verbose)
2816 pr_err("ddf: Cannot open %s: %s\n",
2817 dev, strerror(errno));
2818 return 0;
2819 }
2820 /* Well, it is in use by someone, maybe a 'ddf' container. */
2821 cfd = open_container(fd);
2822 if (cfd < 0) {
2823 close(fd);
2824 if (verbose)
2825 pr_err("ddf: Cannot use %s: %s\n",
2826 dev, strerror(EBUSY));
2827 return 0;
2828 }
2829 sra = sysfs_read(cfd, NULL, GET_VERSION);
2830 close(fd);
2831 if (sra && sra->array.major_version == -1 &&
2832 strcmp(sra->text_version, "ddf") == 0) {
2833 /* This is a member of a ddf container. Load the container
2834 * and try to create a bvd
2835 */
2836 struct ddf_super *ddf;
2837 if (load_super_ddf_all(st, cfd, (void **)&ddf, NULL) == 0) {
2838 st->sb = ddf;
2839 strcpy(st->container_devnm, fd2devnm(cfd));
2840 close(cfd);
2841 return validate_geometry_ddf_bvd(st, level, layout,
2842 raiddisks, chunk, size,
2843 data_offset,
2844 dev, freesize,
2845 verbose);
2846 }
2847 close(cfd);
2848 } else /* device may belong to a different container */
2849 return 0;
2850
2851 return 1;
2852 }
2853
2854 static int
2855 validate_geometry_ddf_container(struct supertype *st,
2856 int level, int layout, int raiddisks,
2857 int chunk, unsigned long long size,
2858 unsigned long long data_offset,
2859 char *dev, unsigned long long *freesize,
2860 int verbose)
2861 {
2862 int fd;
2863 unsigned long long ldsize;
2864
2865 if (level != LEVEL_CONTAINER)
2866 return 0;
2867 if (!dev)
2868 return 1;
2869
2870 fd = open(dev, O_RDONLY|O_EXCL, 0);
2871 if (fd < 0) {
2872 if (verbose)
2873 pr_err("ddf: Cannot open %s: %s\n",
2874 dev, strerror(errno));
2875 return 0;
2876 }
2877 if (!get_dev_size(fd, dev, &ldsize)) {
2878 close(fd);
2879 return 0;
2880 }
2881 close(fd);
2882
2883 *freesize = avail_size_ddf(st, ldsize >> 9, INVALID_SECTORS);
2884 if (*freesize == 0)
2885 return 0;
2886
2887 return 1;
2888 }
2889
2890 static int validate_geometry_ddf_bvd(struct supertype *st,
2891 int level, int layout, int raiddisks,
2892 int *chunk, unsigned long long size,
2893 unsigned long long data_offset,
2894 char *dev, unsigned long long *freesize,
2895 int verbose)
2896 {
2897 struct stat stb;
2898 struct ddf_super *ddf = st->sb;
2899 struct dl *dl;
2900 unsigned long long pos = 0;
2901 unsigned long long maxsize;
2902 struct extent *e;
2903 int i;
2904 /* ddf/bvd supports lots of things, but not containers */
2905 if (level == LEVEL_CONTAINER) {
2906 if (verbose)
2907 pr_err("DDF cannot create a container within an container\n");
2908 return 0;
2909 }
2910 /* We must have the container info already read in. */
2911 if (!ddf)
2912 return 0;
2913
2914 if (!dev) {
2915 /* General test: make sure there is space for
2916 * 'raiddisks' device extents of size 'size'.
2917 */
2918 unsigned long long minsize = size;
2919 int dcnt = 0;
2920 if (minsize == 0)
2921 minsize = 8;
2922 for (dl = ddf->dlist; dl ; dl = dl->next)
2923 {
2924 int found = 0;
2925 pos = 0;
2926
2927 i = 0;
2928 e = get_extents(ddf, dl);
2929 if (!e) continue;
2930 do {
2931 unsigned long long esize;
2932 esize = e[i].start - pos;
2933 if (esize >= minsize)
2934 found = 1;
2935 pos = e[i].start + e[i].size;
2936 i++;
2937 } while (e[i-1].size);
2938 if (found)
2939 dcnt++;
2940 free(e);
2941 }
2942 if (dcnt < raiddisks) {
2943 if (verbose)
2944 pr_err("ddf: Not enough devices with "
2945 "space for this array (%d < %d)\n",
2946 dcnt, raiddisks);
2947 return 0;
2948 }
2949 return 1;
2950 }
2951 /* This device must be a member of the set */
2952 if (stat(dev, &stb) < 0)
2953 return 0;
2954 if ((S_IFMT & stb.st_mode) != S_IFBLK)
2955 return 0;
2956 for (dl = ddf->dlist ; dl ; dl = dl->next) {
2957 if (dl->major == (int)major(stb.st_rdev) &&
2958 dl->minor == (int)minor(stb.st_rdev))
2959 break;
2960 }
2961 if (!dl) {
2962 if (verbose)
2963 pr_err("ddf: %s is not in the "
2964 "same DDF set\n",
2965 dev);
2966 return 0;
2967 }
2968 e = get_extents(ddf, dl);
2969 maxsize = 0;
2970 i = 0;
2971 if (e) do {
2972 unsigned long long esize;
2973 esize = e[i].start - pos;
2974 if (esize >= maxsize)
2975 maxsize = esize;
2976 pos = e[i].start + e[i].size;
2977 i++;
2978 } while (e[i-1].size);
2979 *freesize = maxsize;
2980 // FIXME here I am
2981
2982 return 1;
2983 }
2984
2985 static int load_super_ddf_all(struct supertype *st, int fd,
2986 void **sbp, char *devname)
2987 {
2988 struct mdinfo *sra;
2989 struct ddf_super *super;
2990 struct mdinfo *sd, *best = NULL;
2991 int bestseq = 0;
2992 int seq;
2993 char nm[20];
2994 int dfd;
2995
2996 sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
2997 if (!sra)
2998 return 1;
2999 if (sra->array.major_version != -1 ||
3000 sra->array.minor_version != -2 ||
3001 strcmp(sra->text_version, "ddf") != 0)
3002 return 1;
3003
3004 if (posix_memalign((void**)&super, 512, sizeof(*super)) != 0)
3005 return 1;
3006 memset(super, 0, sizeof(*super));
3007
3008 /* first, try each device, and choose the best ddf */
3009 for (sd = sra->devs ; sd ; sd = sd->next) {
3010 int rv;
3011 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
3012 dfd = dev_open(nm, O_RDONLY);
3013 if (dfd < 0)
3014 return 2;
3015 rv = load_ddf_headers(dfd, super, NULL);
3016 close(dfd);
3017 if (rv == 0) {
3018 seq = __be32_to_cpu(super->active->seq);
3019 if (super->active->openflag)
3020 seq--;
3021 if (!best || seq > bestseq) {
3022 bestseq = seq;
3023 best = sd;
3024 }
3025 }
3026 }
3027 if (!best)
3028 return 1;
3029 /* OK, load this ddf */
3030 sprintf(nm, "%d:%d", best->disk.major, best->disk.minor);
3031 dfd = dev_open(nm, O_RDONLY);
3032 if (dfd < 0)
3033 return 1;
3034 load_ddf_headers(dfd, super, NULL);
3035 load_ddf_global(dfd, super, NULL);
3036 close(dfd);
3037 /* Now we need the device-local bits */
3038 for (sd = sra->devs ; sd ; sd = sd->next) {
3039 int rv;
3040
3041 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
3042 dfd = dev_open(nm, O_RDWR);
3043 if (dfd < 0)
3044 return 2;
3045 rv = load_ddf_headers(dfd, super, NULL);
3046 if (rv == 0)
3047 rv = load_ddf_local(dfd, super, NULL, 1);
3048 if (rv)
3049 return 1;
3050 }
3051
3052 *sbp = super;
3053 if (st->ss == NULL) {
3054 st->ss = &super_ddf;
3055 st->minor_version = 0;
3056 st->max_devs = 512;
3057 }
3058 strcpy(st->container_devnm, fd2devnm(fd));
3059 return 0;
3060 }
3061
3062 static int load_container_ddf(struct supertype *st, int fd,
3063 char *devname)
3064 {
3065 return load_super_ddf_all(st, fd, &st->sb, devname);
3066 }
3067
3068 #endif /* MDASSEMBLE */
3069
3070 static int check_secondary(const struct vcl *vc)
3071 {
3072 const struct vd_config *conf = &vc->conf;
3073 int i;
3074
3075 /* The only DDF secondary RAID level md can support is
3076 * RAID 10, if the stripe sizes and Basic volume sizes
3077 * are all equal.
3078 * Other configurations could in theory be supported by exposing
3079 * the BVDs to user space and using device mapper for the secondary
3080 * mapping. So far we don't support that.
3081 */
3082
3083 __u64 sec_elements[4] = {0, 0, 0, 0};
3084 #define __set_sec_seen(n) (sec_elements[(n)>>6] |= (1<<((n)&63)))
3085 #define __was_sec_seen(n) ((sec_elements[(n)>>6] & (1<<((n)&63))) != 0)
3086
3087 if (vc->other_bvds == NULL) {
3088 pr_err("No BVDs for secondary RAID found\n");
3089 return -1;
3090 }
3091 if (conf->prl != DDF_RAID1) {
3092 pr_err("Secondary RAID level only supported for mirrored BVD\n");
3093 return -1;
3094 }
3095 if (conf->srl != DDF_2STRIPED && conf->srl != DDF_2SPANNED) {
3096 pr_err("Secondary RAID level %d is unsupported\n",
3097 conf->srl);
3098 return -1;
3099 }
3100 __set_sec_seen(conf->sec_elmnt_seq);
3101 for (i = 0; i < conf->sec_elmnt_count-1; i++) {
3102 const struct vd_config *bvd = vc->other_bvds[i];
3103 if (bvd == NULL) {
3104 pr_err("BVD %d is missing\n", i+1);
3105 return -1;
3106 }
3107 if (bvd->srl != conf->srl) {
3108 pr_err("Inconsistent secondary RAID level across BVDs\n");
3109 return -1;
3110 }
3111 if (bvd->prl != conf->prl) {
3112 pr_err("Different RAID levels for BVDs are unsupported\n");
3113 return -1;
3114 }
3115 if (bvd->prim_elmnt_count != conf->prim_elmnt_count) {
3116 pr_err("All BVDs must have the same number of primary elements\n");
3117 return -1;
3118 }
3119 if (bvd->chunk_shift != conf->chunk_shift) {
3120 pr_err("Different strip sizes for BVDs are unsupported\n");
3121 return -1;
3122 }
3123 if (bvd->array_blocks != conf->array_blocks) {
3124 pr_err("Different BVD sizes are unsupported\n");
3125 return -1;
3126 }
3127 __set_sec_seen(bvd->sec_elmnt_seq);
3128 }
3129 for (i = 0; i < conf->sec_elmnt_count; i++) {
3130 if (!__was_sec_seen(i)) {
3131 pr_err("BVD %d is missing\n", i);
3132 return -1;
3133 }
3134 }
3135 return 0;
3136 }
3137
3138 #define NO_SUCH_REFNUM (0xFFFFFFFF)
3139 static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
3140 __u32 refnum, unsigned int nmax,
3141 const struct vd_config **bvd,
3142 unsigned int *idx)
3143 {
3144 unsigned int i, j, n, sec, cnt;
3145
3146 cnt = __be16_to_cpu(vc->conf.prim_elmnt_count);
3147 sec = (vc->conf.sec_elmnt_count == 1 ? 0 : vc->conf.sec_elmnt_seq);
3148
3149 for (i = 0, j = 0 ; i < nmax ; i++) {
3150 /* j counts valid entries for this BVD */
3151 if (vc->conf.phys_refnum[i] != 0xffffffff)
3152 j++;
3153 if (vc->conf.phys_refnum[i] == refnum) {
3154 *bvd = &vc->conf;
3155 *idx = i;
3156 return sec * cnt + j - 1;
3157 }
3158 }
3159 if (vc->other_bvds == NULL)
3160 goto bad;
3161
3162 for (n = 1; n < vc->conf.sec_elmnt_count; n++) {
3163 struct vd_config *vd = vc->other_bvds[n-1];
3164 if (vd == NULL)
3165 continue;
3166 sec = vd->sec_elmnt_seq;
3167 for (i = 0, j = 0 ; i < nmax ; i++) {
3168 if (vd->phys_refnum[i] != 0xffffffff)
3169 j++;
3170 if (vd->phys_refnum[i] == refnum) {
3171 *bvd = vd;
3172 *idx = i;
3173 return sec * cnt + j - 1;
3174 }
3175 }
3176 }
3177 bad:
3178 *bvd = NULL;
3179 return NO_SUCH_REFNUM;
3180 }
3181
3182 static struct mdinfo *container_content_ddf(struct supertype *st, char *subarray)
3183 {
3184 /* Given a container loaded by load_super_ddf_all,
3185 * extract information about all the arrays into
3186 * an mdinfo tree.
3187 *
3188 * For each vcl in conflist: create an mdinfo, fill it in,
3189 * then look for matching devices (phys_refnum) in dlist
3190 * and create appropriate device mdinfo.
3191 */
3192 struct ddf_super *ddf = st->sb;
3193 struct mdinfo *rest = NULL;
3194 struct vcl *vc;
3195
3196 for (vc = ddf->conflist ; vc ; vc=vc->next)
3197 {
3198 unsigned int i;
3199 unsigned int j;
3200 struct mdinfo *this;
3201 char *ep;
3202 __u32 *cptr;
3203 unsigned int pd;
3204
3205 if (subarray &&
3206 (strtoul(subarray, &ep, 10) != vc->vcnum ||
3207 *ep != '\0'))
3208 continue;
3209
3210 if (vc->conf.sec_elmnt_count > 1) {
3211 if (check_secondary(vc) != 0)
3212 continue;
3213 }
3214
3215 this = xcalloc(1, sizeof(*this));
3216 this->next = rest;
3217 rest = this;
3218
3219 if (vc->conf.sec_elmnt_count == 1) {
3220 this->array.level = map_num1(ddf_level_num,
3221 vc->conf.prl);
3222 this->array.raid_disks =
3223 __be16_to_cpu(vc->conf.prim_elmnt_count);
3224 this->array.layout =
3225 rlq_to_layout(vc->conf.rlq, vc->conf.prl,
3226 this->array.raid_disks);
3227 } else {
3228 /* The only supported layout is RAID 10.
3229 * Compatibility has been checked in check_secondary()
3230 * above.
3231 */
3232 this->array.level = 10;
3233 this->array.raid_disks =
3234 __be16_to_cpu(vc->conf.prim_elmnt_count)
3235 * vc->conf.sec_elmnt_count;
3236 this->array.layout = 0x100 |
3237 __be16_to_cpu(vc->conf.prim_elmnt_count);
3238 }
3239 this->array.md_minor = -1;
3240 this->array.major_version = -1;
3241 this->array.minor_version = -2;
3242 cptr = (__u32 *)(vc->conf.guid + 16);
3243 this->array.ctime = DECADE + __be32_to_cpu(*cptr);
3244 this->array.utime = DECADE +
3245 __be32_to_cpu(vc->conf.timestamp);
3246 this->array.chunk_size = 512 << vc->conf.chunk_shift;
3247
3248 i = vc->vcnum;
3249 if ((ddf->virt->entries[i].state & DDF_state_inconsistent) ||
3250 (ddf->virt->entries[i].init_state & DDF_initstate_mask) !=
3251 DDF_init_full) {
3252 this->array.state = 0;
3253 this->resync_start = 0;
3254 } else {
3255 this->array.state = 1;
3256 this->resync_start = MaxSector;
3257 }
3258 memcpy(this->name, ddf->virt->entries[i].name, 16);
3259 this->name[16]=0;
3260 for(j=0; j<16; j++)
3261 if (this->name[j] == ' ')
3262 this->name[j] = 0;
3263
3264 memset(this->uuid, 0, sizeof(this->uuid));
3265 this->component_size = __be64_to_cpu(vc->conf.blocks);
3266 this->array.size = this->component_size / 2;
3267 this->container_member = i;
3268
3269 ddf->currentconf = vc;
3270 uuid_from_super_ddf(st, this->uuid);
3271 ddf->currentconf = NULL;
3272
3273 sprintf(this->text_version, "/%s/%d",
3274 st->container_devnm, this->container_member);
3275
3276 for (pd = 0; pd < __be16_to_cpu(ddf->phys->used_pdes); pd++) {
3277 struct mdinfo *dev;
3278 struct dl *d;
3279 const struct vd_config *bvd;
3280 unsigned int iphys;
3281 __u64 *lba_offset;
3282 int stt;
3283
3284 if (ddf->phys->entries[pd].refnum == 0xFFFFFFFF)
3285 continue;
3286
3287 stt = __be16_to_cpu(ddf->phys->entries[pd].state);
3288 if ((stt & (DDF_Online|DDF_Failed|DDF_Rebuilding))
3289 != DDF_Online)
3290 continue;
3291
3292 i = get_pd_index_from_refnum(
3293 vc, ddf->phys->entries[pd].refnum,
3294 ddf->mppe, &bvd, &iphys);
3295 if (i == NO_SUCH_REFNUM)
3296 continue;
3297
3298 this->array.working_disks++;
3299
3300 for (d = ddf->dlist; d ; d=d->next)
3301 if (d->disk.refnum ==
3302 ddf->phys->entries[pd].refnum)
3303 break;
3304 if (d == NULL)
3305 /* Haven't found that one yet, maybe there are others */
3306 continue;
3307
3308 dev = xcalloc(1, sizeof(*dev));
3309 dev->next = this->devs;
3310 this->devs = dev;
3311
3312 dev->disk.number = __be32_to_cpu(d->disk.refnum);
3313 dev->disk.major = d->major;
3314 dev->disk.minor = d->minor;
3315 dev->disk.raid_disk = i;
3316 dev->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
3317 dev->recovery_start = MaxSector;
3318
3319 dev->events = __be32_to_cpu(ddf->primary.seq);
3320 lba_offset = (__u64 *)&bvd->phys_refnum[ddf->mppe];
3321 dev->data_offset = __be64_to_cpu(lba_offset[iphys]);
3322 dev->component_size = __be64_to_cpu(bvd->blocks);
3323 if (d->devname)
3324 strcpy(dev->name, d->devname);
3325 }
3326 }
3327 return rest;
3328 }
3329
3330 static int store_super_ddf(struct supertype *st, int fd)
3331 {
3332 struct ddf_super *ddf = st->sb;
3333 unsigned long long dsize;
3334 void *buf;
3335 int rc;
3336
3337 if (!ddf)
3338 return 1;
3339
3340 /* ->dlist and ->conflist will be set for updates, currently not
3341 * supported
3342 */
3343 if (ddf->dlist || ddf->conflist)
3344 return 1;
3345
3346 if (!get_dev_size(fd, NULL, &dsize))
3347 return 1;
3348
3349 if (posix_memalign(&buf, 512, 512) != 0)
3350 return 1;
3351 memset(buf, 0, 512);
3352
3353 lseek64(fd, dsize-512, 0);
3354 rc = write(fd, buf, 512);
3355 free(buf);
3356 if (rc < 0)
3357 return 1;
3358 return 0;
3359 }
3360
3361 static int compare_super_ddf(struct supertype *st, struct supertype *tst)
3362 {
3363 /*
3364 * return:
3365 * 0 same, or first was empty, and second was copied
3366 * 1 second had wrong number
3367 * 2 wrong uuid
3368 * 3 wrong other info
3369 */
3370 struct ddf_super *first = st->sb;
3371 struct ddf_super *second = tst->sb;
3372 struct dl *dl1, *dl2;
3373 struct vcl *vl1, *vl2;
3374 unsigned int max_vds, max_pds, pd, vd;
3375
3376 if (!first) {
3377 st->sb = tst->sb;
3378 tst->sb = NULL;
3379 return 0;
3380 }
3381
3382 if (memcmp(first->anchor.guid, second->anchor.guid, DDF_GUID_LEN) != 0)
3383 return 2;
3384
3385 if (first->anchor.seq != second->anchor.seq) {
3386 dprintf("%s: sequence number mismatch %u/%u\n", __func__,
3387 __be32_to_cpu(first->anchor.seq),
3388 __be32_to_cpu(second->anchor.seq));
3389 return 3;
3390 }
3391 if (first->max_part != second->max_part ||
3392 first->phys->used_pdes != second->phys->used_pdes ||
3393 first->virt->populated_vdes != second->virt->populated_vdes) {
3394 dprintf("%s: PD/VD number mismatch\n", __func__);
3395 return 3;
3396 }
3397
3398 max_pds = __be16_to_cpu(first->phys->used_pdes);
3399 for (dl2 = second->dlist; dl2; dl2 = dl2->next) {
3400 for (pd = 0; pd < max_pds; pd++)
3401 if (first->phys->entries[pd].refnum == dl2->disk.refnum)
3402 break;
3403 if (pd == max_pds) {
3404 dprintf("%s: no match for disk %08x\n", __func__,
3405 __be32_to_cpu(dl2->disk.refnum));
3406 return 3;
3407 }
3408 }
3409
3410 max_vds = __be16_to_cpu(first->active->max_vd_entries);
3411 for (vl2 = second->conflist; vl2; vl2 = vl2->next) {
3412 if (vl2->conf.magic != DDF_VD_CONF_MAGIC)
3413 continue;
3414 for (vd = 0; vd < max_vds; vd++)
3415 if (!memcmp(first->virt->entries[vd].guid,
3416 vl2->conf.guid, DDF_GUID_LEN))
3417 break;
3418 if (vd == max_vds) {
3419 dprintf("%s: no match for VD config\n", __func__);
3420 return 3;
3421 }
3422 }
3423 /* FIXME should I look at anything else? */
3424
3425 /*
3426 At this point we are fairly sure that the meta data matches.
3427 But the new disk may contain additional local data.
3428 Add it to the super block.
3429 */
3430 for (vl2 = second->conflist; vl2; vl2 = vl2->next) {
3431 for (vl1 = first->conflist; vl1; vl1 = vl1->next)
3432 if (!memcmp(vl1->conf.guid, vl2->conf.guid,
3433 DDF_GUID_LEN))
3434 break;
3435 if (vl1) {
3436 if (vl1->other_bvds != NULL &&
3437 vl1->conf.sec_elmnt_seq !=
3438 vl2->conf.sec_elmnt_seq) {
3439 dprintf("%s: adding BVD %u\n", __func__,
3440 vl2->conf.sec_elmnt_seq);
3441 add_other_bvd(vl1, &vl2->conf,
3442 first->conf_rec_len*512);
3443 }
3444 continue;
3445 }
3446
3447 if (posix_memalign((void **)&vl1, 512,
3448 (first->conf_rec_len*512 +
3449 offsetof(struct vcl, conf))) != 0) {
3450 pr_err("%s could not allocate vcl buf\n",
3451 __func__);
3452 return 3;
3453 }
3454
3455 vl1->next = first->conflist;
3456 vl1->block_sizes = NULL;
3457 if (vl1->conf.sec_elmnt_count > 1) {
3458 vl1->other_bvds = xcalloc(vl2->conf.sec_elmnt_count - 1,
3459 sizeof(struct vd_config *));
3460 } else
3461 vl1->other_bvds = NULL;
3462 memcpy(&vl1->conf, &vl2->conf, first->conf_rec_len*512);
3463 vl1->lba_offset = (__u64 *)
3464 &vl1->conf.phys_refnum[first->mppe];
3465 for (vd = 0; vd < max_vds; vd++)
3466 if (!memcmp(first->virt->entries[vd].guid,
3467 vl1->conf.guid, DDF_GUID_LEN))
3468 break;
3469 vl1->vcnum = vd;
3470 dprintf("%s: added config for VD %u\n", __func__, vl1->vcnum);
3471 first->conflist = vl1;
3472 }
3473
3474 for (dl2 = second->dlist; dl2; dl2 = dl2->next) {
3475 for (dl1 = first->dlist; dl1; dl1 = dl1->next)
3476 if (dl1->disk.refnum == dl2->disk.refnum)
3477 break;
3478 if (dl1)
3479 continue;
3480
3481 if (posix_memalign((void **)&dl1, 512,
3482 sizeof(*dl1) + (first->max_part) * sizeof(dl1->vlist[0]))
3483 != 0) {
3484 pr_err("%s could not allocate disk info buffer\n",
3485 __func__);
3486 return 3;
3487 }
3488 memcpy(dl1, dl2, sizeof(*dl1));
3489 dl1->mdupdate = NULL;
3490 dl1->next = first->dlist;
3491 dl1->fd = -1;
3492 for (pd = 0; pd < max_pds; pd++)
3493 if (first->phys->entries[pd].refnum == dl1->disk.refnum)
3494 break;
3495 dl1->pdnum = pd;
3496 if (dl2->spare) {
3497 if (posix_memalign((void **)&dl1->spare, 512,
3498 first->conf_rec_len*512) != 0) {
3499 pr_err("%s could not allocate spare info buf\n",
3500 __func__);
3501 return 3;
3502 }
3503 memcpy(dl1->spare, dl2->spare, first->conf_rec_len*512);
3504 }
3505 for (vd = 0 ; vd < first->max_part ; vd++) {
3506 if (!dl2->vlist[vd]) {
3507 dl1->vlist[vd] = NULL;
3508 continue;
3509 }
3510 for (vl1 = first->conflist; vl1; vl1 = vl1->next) {
3511 if (!memcmp(vl1->conf.guid,
3512 dl2->vlist[vd]->conf.guid,
3513 DDF_GUID_LEN))
3514 break;
3515 dl1->vlist[vd] = vl1;
3516 }
3517 }
3518 first->dlist = dl1;
3519 dprintf("%s: added disk %d: %08x\n", __func__, dl1->pdnum,
3520 dl1->disk.refnum);
3521 }
3522
3523 return 0;
3524 }
3525
3526 #ifndef MDASSEMBLE
3527 /*
3528 * A new array 'a' has been started which claims to be instance 'inst'
3529 * within container 'c'.
3530 * We need to confirm that the array matches the metadata in 'c' so
3531 * that we don't corrupt any metadata.
3532 */
3533 static int ddf_open_new(struct supertype *c, struct active_array *a, char *inst)
3534 {
3535 dprintf("ddf: open_new %s\n", inst);
3536 a->info.container_member = atoi(inst);
3537 return 0;
3538 }
3539
3540 /*
3541 * The array 'a' is to be marked clean in the metadata.
3542 * If '->resync_start' is not ~(unsigned long long)0, then the array is only
3543 * clean up to the point (in sectors). If that cannot be recorded in the
3544 * metadata, then leave it as dirty.
3545 *
3546 * For DDF, we need to clear the DDF_state_inconsistent bit in the
3547 * !global! virtual_disk.virtual_entry structure.
3548 */
3549 static int ddf_set_array_state(struct active_array *a, int consistent)
3550 {
3551 struct ddf_super *ddf = a->container->sb;
3552 int inst = a->info.container_member;
3553 int old = ddf->virt->entries[inst].state;
3554 if (consistent == 2) {
3555 /* Should check if a recovery should be started FIXME */
3556 consistent = 1;
3557 if (!is_resync_complete(&a->info))
3558 consistent = 0;
3559 }
3560 if (consistent)
3561 ddf->virt->entries[inst].state &= ~DDF_state_inconsistent;
3562 else
3563 ddf->virt->entries[inst].state |= DDF_state_inconsistent;
3564 if (old != ddf->virt->entries[inst].state)
3565 ddf->updates_pending = 1;
3566
3567 old = ddf->virt->entries[inst].init_state;
3568 ddf->virt->entries[inst].init_state &= ~DDF_initstate_mask;
3569 if (is_resync_complete(&a->info))
3570 ddf->virt->entries[inst].init_state |= DDF_init_full;
3571 else if (a->info.resync_start == 0)
3572 ddf->virt->entries[inst].init_state |= DDF_init_not;
3573 else
3574 ddf->virt->entries[inst].init_state |= DDF_init_quick;
3575 if (old != ddf->virt->entries[inst].init_state)
3576 ddf->updates_pending = 1;
3577
3578 dprintf("ddf mark %d %s %llu\n", inst, consistent?"clean":"dirty",
3579 a->info.resync_start);
3580 return consistent;
3581 }
3582
3583 #define container_of(ptr, type, member) ({ \
3584 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
3585 (type *)( (char *)__mptr - offsetof(type,member) );})
3586 /*
3587 * The state of each disk is stored in the global phys_disk structure
3588 * in phys_disk.entries[n].state.
3589 * This makes various combinations awkward.
3590 * - When a device fails in any array, it must be failed in all arrays
3591 * that include a part of this device.
3592 * - When a component is rebuilding, we cannot include it officially in the
3593 * array unless this is the only array that uses the device.
3594 *
3595 * So: when transitioning:
3596 * Online -> failed, just set failed flag. monitor will propagate
3597 * spare -> online, the device might need to be added to the array.
3598 * spare -> failed, just set failed. Don't worry if in array or not.
3599 */
3600 static void ddf_set_disk(struct active_array *a, int n, int state)
3601 {
3602 struct ddf_super *ddf = a->container->sb;
3603 unsigned int inst = a->info.container_member;
3604 struct vd_config *vc = find_vdcr(ddf, inst);
3605 int pd = find_phys(ddf, vc->phys_refnum[n]);
3606 int i, st, working;
3607 struct mdinfo *mdi;
3608 struct dl *dl;
3609
3610 if (vc == NULL) {
3611 dprintf("ddf: cannot find instance %d!!\n", inst);
3612 return;
3613 }
3614 /* Find the matching slot in 'info'. */
3615 for (mdi = a->info.devs; mdi; mdi = mdi->next)
3616 if (mdi->disk.raid_disk == n)
3617 break;
3618 if (!mdi)
3619 return;
3620
3621 /* and find the 'dl' entry corresponding to that. */
3622 for (dl = ddf->dlist; dl; dl = dl->next)
3623 if (mdi->state_fd >= 0 &&
3624 mdi->disk.major == dl->major &&
3625 mdi->disk.minor == dl->minor)
3626 break;
3627 if (!dl)
3628 return;
3629
3630 if (pd < 0 || pd != dl->pdnum) {
3631 /* disk doesn't currently exist or has changed.
3632 * If it is now in_sync, insert it. */
3633 if ((state & DS_INSYNC) && ! (state & DS_FAULTY)) {
3634 struct vcl *vcl;
3635 pd = dl->pdnum;
3636 vc->phys_refnum[n] = dl->disk.refnum;
3637 vcl = container_of(vc, struct vcl, conf);
3638 vcl->lba_offset[n] = mdi->data_offset;
3639 ddf->phys->entries[pd].type &=
3640 ~__cpu_to_be16(DDF_Global_Spare);
3641 ddf->phys->entries[pd].type |=
3642 __cpu_to_be16(DDF_Active_in_VD);
3643 ddf->updates_pending = 1;
3644 }
3645 } else {
3646 int old = ddf->phys->entries[pd].state;
3647 if (state & DS_FAULTY)
3648 ddf->phys->entries[pd].state |= __cpu_to_be16(DDF_Failed);
3649 if (state & DS_INSYNC) {
3650 ddf->phys->entries[pd].state |= __cpu_to_be16(DDF_Online);
3651 ddf->phys->entries[pd].state &= __cpu_to_be16(~DDF_Rebuilding);
3652 }
3653 if (old != ddf->phys->entries[pd].state)
3654 ddf->updates_pending = 1;
3655 }
3656
3657 dprintf("ddf: set_disk %d to %x\n", n, state);
3658
3659 /* Now we need to check the state of the array and update
3660 * virtual_disk.entries[n].state.
3661 * It needs to be one of "optimal", "degraded", "failed".
3662 * I don't understand 'deleted' or 'missing'.
3663 */
3664 working = 0;
3665 for (i=0; i < a->info.array.raid_disks; i++) {
3666 pd = find_phys(ddf, vc->phys_refnum[i]);
3667 if (pd < 0)
3668 continue;
3669 st = __be16_to_cpu(ddf->phys->entries[pd].state);
3670 if ((st & (DDF_Online|DDF_Failed|DDF_Rebuilding))
3671 == DDF_Online)
3672 working++;
3673 }
3674 state = DDF_state_degraded;
3675 if (working == a->info.array.raid_disks)
3676 state = DDF_state_optimal;
3677 else switch(vc->prl) {
3678 case DDF_RAID0:
3679 case DDF_CONCAT:
3680 case DDF_JBOD:
3681 state = DDF_state_failed;
3682 break;
3683 case DDF_RAID1:
3684 if (working == 0)
3685 state = DDF_state_failed;
3686 else if (working == 2 && state == DDF_state_degraded)
3687 state = DDF_state_part_optimal;
3688 break;
3689 case DDF_RAID4:
3690 case DDF_RAID5:
3691 if (working < a->info.array.raid_disks-1)
3692 state = DDF_state_failed;
3693 break;
3694 case DDF_RAID6:
3695 if (working < a->info.array.raid_disks-2)
3696 state = DDF_state_failed;
3697 else if (working == a->info.array.raid_disks-1)
3698 state = DDF_state_part_optimal;
3699 break;
3700 }
3701
3702 if (ddf->virt->entries[inst].state !=
3703 ((ddf->virt->entries[inst].state & ~DDF_state_mask)
3704 | state)) {
3705
3706 ddf->virt->entries[inst].state =
3707 (ddf->virt->entries[inst].state & ~DDF_state_mask)
3708 | state;
3709 ddf->updates_pending = 1;
3710 }
3711
3712 }
3713
3714 static void ddf_sync_metadata(struct supertype *st)
3715 {
3716
3717 /*
3718 * Write all data to all devices.
3719 * Later, we might be able to track whether only local changes
3720 * have been made, or whether any global data has been changed,
3721 * but ddf is sufficiently weird that it probably always
3722 * changes global data ....
3723 */
3724 struct ddf_super *ddf = st->sb;
3725 if (!ddf->updates_pending)
3726 return;
3727 ddf->updates_pending = 0;
3728 __write_init_super_ddf(st);
3729 dprintf("ddf: sync_metadata\n");
3730 }
3731
3732 static void ddf_process_update(struct supertype *st,
3733 struct metadata_update *update)
3734 {
3735 /* Apply this update to the metadata.
3736 * The first 4 bytes are a DDF_*_MAGIC which guides
3737 * our actions.
3738 * Possible update are:
3739 * DDF_PHYS_RECORDS_MAGIC
3740 * Add a new physical device or remove an old one.
3741 * Changes to this record only happen implicitly.
3742 * used_pdes is the device number.
3743 * DDF_VIRT_RECORDS_MAGIC
3744 * Add a new VD. Possibly also change the 'access' bits.
3745 * populated_vdes is the entry number.
3746 * DDF_VD_CONF_MAGIC
3747 * New or updated VD. the VIRT_RECORD must already
3748 * exist. For an update, phys_refnum and lba_offset
3749 * (at least) are updated, and the VD_CONF must
3750 * be written to precisely those devices listed with
3751 * a phys_refnum.
3752 * DDF_SPARE_ASSIGN_MAGIC
3753 * replacement Spare Assignment Record... but for which device?
3754 *
3755 * So, e.g.:
3756 * - to create a new array, we send a VIRT_RECORD and
3757 * a VD_CONF. Then assemble and start the array.
3758 * - to activate a spare we send a VD_CONF to add the phys_refnum
3759 * and offset. This will also mark the spare as active with
3760 * a spare-assignment record.
3761 */
3762 struct ddf_super *ddf = st->sb;
3763 __u32 *magic = (__u32*)update->buf;
3764 struct phys_disk *pd;
3765 struct virtual_disk *vd;
3766 struct vd_config *vc;
3767 struct vcl *vcl;
3768 struct dl *dl;
3769 unsigned int mppe;
3770 unsigned int ent;
3771 unsigned int pdnum, pd2;
3772
3773 dprintf("Process update %x\n", *magic);
3774
3775 switch (*magic) {
3776 case DDF_PHYS_RECORDS_MAGIC:
3777
3778 if (update->len != (sizeof(struct phys_disk) +
3779 sizeof(struct phys_disk_entry)))
3780 return;
3781 pd = (struct phys_disk*)update->buf;
3782
3783 ent = __be16_to_cpu(pd->used_pdes);
3784 if (ent >= __be16_to_cpu(ddf->phys->max_pdes))
3785 return;
3786 if (pd->entries[0].state & __cpu_to_be16(DDF_Missing)) {
3787 struct dl **dlp;
3788 /* removing this disk. */
3789 ddf->phys->entries[ent].state |= __cpu_to_be16(DDF_Missing);
3790 for (dlp = &ddf->dlist; *dlp; dlp = &(*dlp)->next) {
3791 struct dl *dl = *dlp;
3792 if (dl->pdnum == (signed)ent) {
3793 close(dl->fd);
3794 dl->fd = -1;
3795 /* FIXME this doesn't free
3796 * dl->devname */
3797 update->space = dl;
3798 *dlp = dl->next;
3799 break;
3800 }
3801 }
3802 ddf->updates_pending = 1;
3803 return;
3804 }
3805 if (!all_ff(ddf->phys->entries[ent].guid))
3806 return;
3807 ddf->phys->entries[ent] = pd->entries[0];
3808 ddf->phys->used_pdes = __cpu_to_be16(1 +
3809 __be16_to_cpu(ddf->phys->used_pdes));
3810 ddf->updates_pending = 1;
3811 if (ddf->add_list) {
3812 struct active_array *a;
3813 struct dl *al = ddf->add_list;
3814 ddf->add_list = al->next;
3815
3816 al->next = ddf->dlist;
3817 ddf->dlist = al;
3818
3819 /* As a device has been added, we should check
3820 * for any degraded devices that might make
3821 * use of this spare */
3822 for (a = st->arrays ; a; a=a->next)
3823 a->check_degraded = 1;
3824 }
3825 break;
3826
3827 case DDF_VIRT_RECORDS_MAGIC:
3828
3829 if (update->len != (sizeof(struct virtual_disk) +
3830 sizeof(struct virtual_entry)))
3831 return;
3832 vd = (struct virtual_disk*)update->buf;
3833
3834 ent = __be16_to_cpu(vd->populated_vdes);
3835 if (ent >= __be16_to_cpu(ddf->virt->max_vdes))
3836 return;
3837 if (!all_ff(ddf->virt->entries[ent].guid))
3838 return;
3839 ddf->virt->entries[ent] = vd->entries[0];
3840 ddf->virt->populated_vdes = __cpu_to_be16(1 +
3841 __be16_to_cpu(ddf->virt->populated_vdes));
3842 ddf->updates_pending = 1;
3843 break;
3844
3845 case DDF_VD_CONF_MAGIC:
3846 dprintf("len %d %d\n", update->len, ddf->conf_rec_len);
3847
3848 mppe = __be16_to_cpu(ddf->anchor.max_primary_element_entries);
3849 if ((unsigned)update->len != ddf->conf_rec_len * 512)
3850 return;
3851 vc = (struct vd_config*)update->buf;
3852 for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
3853 if (memcmp(vcl->conf.guid, vc->guid, DDF_GUID_LEN) == 0)
3854 break;
3855 dprintf("vcl = %p\n", vcl);
3856 if (vcl) {
3857 /* An update, just copy the phys_refnum and lba_offset
3858 * fields
3859 */
3860 memcpy(vcl->conf.phys_refnum, vc->phys_refnum,
3861 mppe * (sizeof(__u32) + sizeof(__u64)));
3862 } else {
3863 /* A new VD_CONF */
3864 if (!update->space)
3865 return;
3866 vcl = update->space;
3867 update->space = NULL;
3868 vcl->next = ddf->conflist;
3869 memcpy(&vcl->conf, vc, update->len);
3870 vcl->lba_offset = (__u64*)
3871 &vcl->conf.phys_refnum[mppe];
3872 for (ent = 0;
3873 ent < __be16_to_cpu(ddf->virt->populated_vdes);
3874 ent++)
3875 if (memcmp(vc->guid, ddf->virt->entries[ent].guid,
3876 DDF_GUID_LEN) == 0) {
3877 vcl->vcnum = ent;
3878 break;
3879 }
3880 ddf->conflist = vcl;
3881 }
3882 /* Set DDF_Transition on all Failed devices - to help
3883 * us detect those that are no longer in use
3884 */
3885 for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
3886 if (ddf->phys->entries[pdnum].state
3887 & __be16_to_cpu(DDF_Failed))
3888 ddf->phys->entries[pdnum].state
3889 |= __be16_to_cpu(DDF_Transition);
3890 /* Now make sure vlist is correct for each dl. */
3891 for (dl = ddf->dlist; dl; dl = dl->next) {
3892 unsigned int dn;
3893 unsigned int vn = 0;
3894 int in_degraded = 0;
3895 for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
3896 for (dn=0; dn < ddf->mppe ; dn++)
3897 if (vcl->conf.phys_refnum[dn] ==
3898 dl->disk.refnum) {
3899 int vstate;
3900 dprintf("dev %d has %p at %d\n",
3901 dl->pdnum, vcl, vn);
3902 /* Clear the Transition flag */
3903 if (ddf->phys->entries[dl->pdnum].state
3904 & __be16_to_cpu(DDF_Failed))
3905 ddf->phys->entries[dl->pdnum].state &=
3906 ~__be16_to_cpu(DDF_Transition);
3907
3908 dl->vlist[vn++] = vcl;
3909 vstate = ddf->virt->entries[vcl->vcnum].state
3910 & DDF_state_mask;
3911 if (vstate == DDF_state_degraded ||
3912 vstate == DDF_state_part_optimal)
3913 in_degraded = 1;
3914 break;
3915 }
3916 while (vn < ddf->max_part)
3917 dl->vlist[vn++] = NULL;
3918 if (dl->vlist[0]) {
3919 ddf->phys->entries[dl->pdnum].type &=
3920 ~__cpu_to_be16(DDF_Global_Spare);
3921 if (!(ddf->phys->entries[dl->pdnum].type &
3922 __cpu_to_be16(DDF_Active_in_VD))) {
3923 ddf->phys->entries[dl->pdnum].type |=
3924 __cpu_to_be16(DDF_Active_in_VD);
3925 if (in_degraded)
3926 ddf->phys->entries[dl->pdnum].state |=
3927 __cpu_to_be16(DDF_Rebuilding);
3928 }
3929 }
3930 if (dl->spare) {
3931 ddf->phys->entries[dl->pdnum].type &=
3932 ~__cpu_to_be16(DDF_Global_Spare);
3933 ddf->phys->entries[dl->pdnum].type |=
3934 __cpu_to_be16(DDF_Spare);
3935 }
3936 if (!dl->vlist[0] && !dl->spare) {
3937 ddf->phys->entries[dl->pdnum].type |=
3938 __cpu_to_be16(DDF_Global_Spare);
3939 ddf->phys->entries[dl->pdnum].type &=
3940 ~__cpu_to_be16(DDF_Spare |
3941 DDF_Active_in_VD);
3942 }
3943 }
3944
3945 /* Now remove any 'Failed' devices that are not part
3946 * of any VD. They will have the Transition flag set.
3947 * Once done, we need to update all dl->pdnum numbers.
3948 */
3949 pd2 = 0;
3950 for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
3951 if ((ddf->phys->entries[pdnum].state
3952 & __be16_to_cpu(DDF_Failed))
3953 && (ddf->phys->entries[pdnum].state
3954 & __be16_to_cpu(DDF_Transition)))
3955 /* skip this one */;
3956 else if (pdnum == pd2)
3957 pd2++;
3958 else {
3959 ddf->phys->entries[pd2] = ddf->phys->entries[pdnum];
3960 for (dl = ddf->dlist; dl; dl = dl->next)
3961 if (dl->pdnum == (int)pdnum)
3962 dl->pdnum = pd2;
3963 pd2++;
3964 }
3965 ddf->phys->used_pdes = __cpu_to_be16(pd2);
3966 while (pd2 < pdnum) {
3967 memset(ddf->phys->entries[pd2].guid, 0xff, DDF_GUID_LEN);
3968 pd2++;
3969 }
3970
3971 ddf->updates_pending = 1;
3972 break;
3973 case DDF_SPARE_ASSIGN_MAGIC:
3974 default: break;
3975 }
3976 }
3977
3978 static void ddf_prepare_update(struct supertype *st,
3979 struct metadata_update *update)
3980 {
3981 /* This update arrived at managemon.
3982 * We are about to pass it to monitor.
3983 * If a malloc is needed, do it here.
3984 */
3985 struct ddf_super *ddf = st->sb;
3986 __u32 *magic = (__u32*)update->buf;
3987 if (*magic == DDF_VD_CONF_MAGIC)
3988 if (posix_memalign(&update->space, 512,
3989 offsetof(struct vcl, conf)
3990 + ddf->conf_rec_len * 512) != 0)
3991 update->space = NULL;
3992 }
3993
3994 /*
3995 * Check if the array 'a' is degraded but not failed.
3996 * If it is, find as many spares as are available and needed and
3997 * arrange for their inclusion.
3998 * We only choose devices which are not already in the array,
3999 * and prefer those with a spare-assignment to this array.
4000 * otherwise we choose global spares - assuming always that
4001 * there is enough room.
4002 * For each spare that we assign, we return an 'mdinfo' which
4003 * describes the position for the device in the array.
4004 * We also add to 'updates' a DDF_VD_CONF_MAGIC update with
4005 * the new phys_refnum and lba_offset values.
4006 *
4007 * Only worry about BVDs at the moment.
4008 */
4009 static struct mdinfo *ddf_activate_spare(struct active_array *a,
4010 struct metadata_update **updates)
4011 {
4012 int working = 0;
4013 struct mdinfo *d;
4014 struct ddf_super *ddf = a->container->sb;
4015 int global_ok = 0;
4016 struct mdinfo *rv = NULL;
4017 struct mdinfo *di;
4018 struct metadata_update *mu;
4019 struct dl *dl;
4020 int i;
4021 struct vd_config *vc;
4022 __u64 *lba;
4023
4024 for (d = a->info.devs ; d ; d = d->next) {
4025 if ((d->curr_state & DS_FAULTY) &&
4026 d->state_fd >= 0)
4027 /* wait for Removal to happen */
4028 return NULL;
4029 if (d->state_fd >= 0)
4030 working ++;
4031 }
4032
4033 dprintf("ddf_activate: working=%d (%d) level=%d\n", working, a->info.array.raid_disks,
4034 a->info.array.level);
4035 if (working == a->info.array.raid_disks)
4036 return NULL; /* array not degraded */
4037 switch (a->info.array.level) {
4038 case 1:
4039 if (working == 0)
4040 return NULL; /* failed */
4041 break;
4042 case 4:
4043 case 5:
4044 if (working < a->info.array.raid_disks - 1)
4045 return NULL; /* failed */
4046 break;
4047 case 6:
4048 if (working < a->info.array.raid_disks - 2)
4049 return NULL; /* failed */
4050 break;
4051 default: /* concat or stripe */
4052 return NULL; /* failed */
4053 }
4054
4055 /* For each slot, if it is not working, find a spare */
4056 dl = ddf->dlist;
4057 for (i = 0; i < a->info.array.raid_disks; i++) {
4058 for (d = a->info.devs ; d ; d = d->next)
4059 if (d->disk.raid_disk == i)
4060 break;
4061 dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
4062 if (d && (d->state_fd >= 0))
4063 continue;
4064
4065 /* OK, this device needs recovery. Find a spare */
4066 again:
4067 for ( ; dl ; dl = dl->next) {
4068 unsigned long long esize;
4069 unsigned long long pos;
4070 struct mdinfo *d2;
4071 int is_global = 0;
4072 int is_dedicated = 0;
4073 struct extent *ex;
4074 unsigned int j;
4075 /* If in this array, skip */
4076 for (d2 = a->info.devs ; d2 ; d2 = d2->next)
4077 if (d2->state_fd >= 0 &&
4078 d2->disk.major == dl->major &&
4079 d2->disk.minor == dl->minor) {
4080 dprintf("%x:%x already in array\n", dl->major, dl->minor);
4081 break;
4082 }
4083 if (d2)
4084 continue;
4085 if (ddf->phys->entries[dl->pdnum].type &
4086 __cpu_to_be16(DDF_Spare)) {
4087 /* Check spare assign record */
4088 if (dl->spare) {
4089 if (dl->spare->type & DDF_spare_dedicated) {
4090 /* check spare_ents for guid */
4091 for (j = 0 ;
4092 j < __be16_to_cpu(dl->spare->populated);
4093 j++) {
4094 if (memcmp(dl->spare->spare_ents[j].guid,
4095 ddf->virt->entries[a->info.container_member].guid,
4096 DDF_GUID_LEN) == 0)
4097 is_dedicated = 1;
4098 }
4099 } else
4100 is_global = 1;
4101 }
4102 } else if (ddf->phys->entries[dl->pdnum].type &
4103 __cpu_to_be16(DDF_Global_Spare)) {
4104 is_global = 1;
4105 } else if (!(ddf->phys->entries[dl->pdnum].state &
4106 __cpu_to_be16(DDF_Failed))) {
4107 /* we can possibly use some of this */
4108 is_global = 1;
4109 }
4110 if ( ! (is_dedicated ||
4111 (is_global && global_ok))) {
4112 dprintf("%x:%x not suitable: %d %d\n", dl->major, dl->minor,
4113 is_dedicated, is_global);
4114 continue;
4115 }
4116
4117 /* We are allowed to use this device - is there space?
4118 * We need a->info.component_size sectors */
4119 ex = get_extents(ddf, dl);
4120 if (!ex) {
4121 dprintf("cannot get extents\n");
4122 continue;
4123 }
4124 j = 0; pos = 0;
4125 esize = 0;
4126
4127 do {
4128 esize = ex[j].start - pos;
4129 if (esize >= a->info.component_size)
4130 break;
4131 pos = ex[j].start + ex[j].size;
4132 j++;
4133 } while (ex[j-1].size);
4134
4135 free(ex);
4136 if (esize < a->info.component_size) {
4137 dprintf("%x:%x has no room: %llu %llu\n",
4138 dl->major, dl->minor,
4139 esize, a->info.component_size);
4140 /* No room */
4141 continue;
4142 }
4143
4144 /* Cool, we have a device with some space at pos */
4145 di = xcalloc(1, sizeof(*di));
4146 di->disk.number = i;
4147 di->disk.raid_disk = i;
4148 di->disk.major = dl->major;
4149 di->disk.minor = dl->minor;
4150 di->disk.state = 0;
4151 di->recovery_start = 0;
4152 di->data_offset = pos;
4153 di->component_size = a->info.component_size;
4154 di->container_member = dl->pdnum;
4155 di->next = rv;
4156 rv = di;
4157 dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
4158 i, pos);
4159
4160 break;
4161 }
4162 if (!dl && ! global_ok) {
4163 /* not enough dedicated spares, try global */
4164 global_ok = 1;
4165 dl = ddf->dlist;
4166 goto again;
4167 }
4168 }
4169
4170 if (!rv)
4171 /* No spares found */
4172 return rv;
4173 /* Now 'rv' has a list of devices to return.
4174 * Create a metadata_update record to update the
4175 * phys_refnum and lba_offset values
4176 */
4177 mu = xmalloc(sizeof(*mu));
4178 if (posix_memalign(&mu->space, 512, sizeof(struct vcl)) != 0) {
4179 free(mu);
4180 mu = NULL;
4181 }
4182 mu->buf = xmalloc(ddf->conf_rec_len * 512);
4183 mu->len = ddf->conf_rec_len * 512;
4184 mu->space = NULL;
4185 mu->space_list = NULL;
4186 mu->next = *updates;
4187 vc = find_vdcr(ddf, a->info.container_member);
4188 memcpy(mu->buf, vc, ddf->conf_rec_len * 512);
4189
4190 vc = (struct vd_config*)mu->buf;
4191 lba = (__u64*)&vc->phys_refnum[ddf->mppe];
4192 for (di = rv ; di ; di = di->next) {
4193 vc->phys_refnum[di->disk.raid_disk] =
4194 ddf->phys->entries[dl->pdnum].refnum;
4195 lba[di->disk.raid_disk] = di->data_offset;
4196 }
4197 *updates = mu;
4198 return rv;
4199 }
4200 #endif /* MDASSEMBLE */
4201
4202 static int ddf_level_to_layout(int level)
4203 {
4204 switch(level) {
4205 case 0:
4206 case 1:
4207 return 0;
4208 case 5:
4209 return ALGORITHM_LEFT_SYMMETRIC;
4210 case 6:
4211 return ALGORITHM_ROTATING_N_CONTINUE;
4212 case 10:
4213 return 0x102;
4214 default:
4215 return UnSet;
4216 }
4217 }
4218
4219 static void default_geometry_ddf(struct supertype *st, int *level, int *layout, int *chunk)
4220 {
4221 if (level && *level == UnSet)
4222 *level = LEVEL_CONTAINER;
4223
4224 if (level && layout && *layout == UnSet)
4225 *layout = ddf_level_to_layout(*level);
4226 }
4227
4228 struct superswitch super_ddf = {
4229 #ifndef MDASSEMBLE
4230 .examine_super = examine_super_ddf,
4231 .brief_examine_super = brief_examine_super_ddf,
4232 .brief_examine_subarrays = brief_examine_subarrays_ddf,
4233 .export_examine_super = export_examine_super_ddf,
4234 .detail_super = detail_super_ddf,
4235 .brief_detail_super = brief_detail_super_ddf,
4236 .validate_geometry = validate_geometry_ddf,
4237 .write_init_super = write_init_super_ddf,
4238 .add_to_super = add_to_super_ddf,
4239 .remove_from_super = remove_from_super_ddf,
4240 .load_container = load_container_ddf,
4241 #endif
4242 .match_home = match_home_ddf,
4243 .uuid_from_super= uuid_from_super_ddf,
4244 .getinfo_super = getinfo_super_ddf,
4245 .update_super = update_super_ddf,
4246
4247 .avail_size = avail_size_ddf,
4248
4249 .compare_super = compare_super_ddf,
4250
4251 .load_super = load_super_ddf,
4252 .init_super = init_super_ddf,
4253 .store_super = store_super_ddf,
4254 .free_super = free_super_ddf,
4255 .match_metadata_desc = match_metadata_desc_ddf,
4256 .container_content = container_content_ddf,
4257 .default_geometry = default_geometry_ddf,
4258
4259 .external = 1,
4260
4261 #ifndef MDASSEMBLE
4262 /* for mdmon */
4263 .open_new = ddf_open_new,
4264 .set_array_state= ddf_set_array_state,
4265 .set_disk = ddf_set_disk,
4266 .sync_metadata = ddf_sync_metadata,
4267 .process_update = ddf_process_update,
4268 .prepare_update = ddf_prepare_update,
4269 .activate_spare = ddf_activate_spare,
4270 #endif
4271 .name = "ddf",
4272 };