]> git.ipfire.org Git - thirdparty/mdadm.git/blame - super-ddf.c
DDF: layout_ddf2md: new DDF->md RAID layout conversion
[thirdparty/mdadm.git] / super-ddf.c
CommitLineData
a322f70c
DW
1/*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
e736b623 4 * Copyright (C) 2006-2009 Neil Brown <neilb@suse.de>
a322f70c
DW
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neil@brown.name>
23 *
24 * Specifications for DDF takes from Common RAID DDF Specification Revision 1.2
25 * (July 28 2006). Reused by permission of SNIA.
26 */
27
28#define HAVE_STDINT_H 1
29#include "mdadm.h"
549e9569 30#include "mdmon.h"
a322f70c
DW
31#include "sha1.h"
32#include <values.h>
33
a322f70c
DW
34/* a non-official T10 name for creation GUIDs */
35static char T10[] = "Linux-MD";
36
37/* DDF timestamps are 1980 based, so we need to add
38 * second-in-decade-of-seventies to convert to linux timestamps.
39 * 10 years with 2 leap years.
40 */
41#define DECADE (3600*24*(365*10+2))
42unsigned long crc32(
43 unsigned long crc,
44 const unsigned char *buf,
45 unsigned len);
46
bedbf68a 47#define DDF_NOTFOUND (~0U)
48#define DDF_CONTAINER (DDF_NOTFOUND-1)
49
a322f70c
DW
50/* The DDF metadata handling.
51 * DDF metadata lives at the end of the device.
52 * The last 512 byte block provides an 'anchor' which is used to locate
53 * the rest of the metadata which usually lives immediately behind the anchor.
54 *
55 * Note:
56 * - all multibyte numeric fields are bigendian.
57 * - all strings are space padded.
58 *
59 */
60
61/* Primary Raid Level (PRL) */
62#define DDF_RAID0 0x00
63#define DDF_RAID1 0x01
64#define DDF_RAID3 0x03
65#define DDF_RAID4 0x04
66#define DDF_RAID5 0x05
67#define DDF_RAID1E 0x11
68#define DDF_JBOD 0x0f
69#define DDF_CONCAT 0x1f
70#define DDF_RAID5E 0x15
71#define DDF_RAID5EE 0x25
59e36268 72#define DDF_RAID6 0x06
a322f70c
DW
73
74/* Raid Level Qualifier (RLQ) */
75#define DDF_RAID0_SIMPLE 0x00
76#define DDF_RAID1_SIMPLE 0x00 /* just 2 devices in this plex */
77#define DDF_RAID1_MULTI 0x01 /* exactly 3 devices in this plex */
78#define DDF_RAID3_0 0x00 /* parity in first extent */
79#define DDF_RAID3_N 0x01 /* parity in last extent */
80#define DDF_RAID4_0 0x00 /* parity in first extent */
81#define DDF_RAID4_N 0x01 /* parity in last extent */
82/* these apply to raid5e and raid5ee as well */
83#define DDF_RAID5_0_RESTART 0x00 /* same as 'right asymmetric' - layout 1 */
59e36268 84#define DDF_RAID6_0_RESTART 0x01 /* raid6 different from raid5 here!!! */
a322f70c
DW
85#define DDF_RAID5_N_RESTART 0x02 /* same as 'left asymmetric' - layout 0 */
86#define DDF_RAID5_N_CONTINUE 0x03 /* same as 'left symmetric' - layout 2 */
87
88#define DDF_RAID1E_ADJACENT 0x00 /* raid10 nearcopies==2 */
89#define DDF_RAID1E_OFFSET 0x01 /* raid10 offsetcopies==2 */
90
91/* Secondary RAID Level (SRL) */
92#define DDF_2STRIPED 0x00 /* This is weirder than RAID0 !! */
93#define DDF_2MIRRORED 0x01
94#define DDF_2CONCAT 0x02
95#define DDF_2SPANNED 0x03 /* This is also weird - be careful */
96
97/* Magic numbers */
98#define DDF_HEADER_MAGIC __cpu_to_be32(0xDE11DE11)
99#define DDF_CONTROLLER_MAGIC __cpu_to_be32(0xAD111111)
100#define DDF_PHYS_RECORDS_MAGIC __cpu_to_be32(0x22222222)
101#define DDF_PHYS_DATA_MAGIC __cpu_to_be32(0x33333333)
102#define DDF_VIRT_RECORDS_MAGIC __cpu_to_be32(0xDDDDDDDD)
103#define DDF_VD_CONF_MAGIC __cpu_to_be32(0xEEEEEEEE)
104#define DDF_SPARE_ASSIGN_MAGIC __cpu_to_be32(0x55555555)
105#define DDF_VU_CONF_MAGIC __cpu_to_be32(0x88888888)
106#define DDF_VENDOR_LOG_MAGIC __cpu_to_be32(0x01dBEEF0)
107#define DDF_BBM_LOG_MAGIC __cpu_to_be32(0xABADB10C)
108
109#define DDF_GUID_LEN 24
59e36268
NB
110#define DDF_REVISION_0 "01.00.00"
111#define DDF_REVISION_2 "01.02.00"
a322f70c
DW
112
113struct ddf_header {
88c164f4 114 __u32 magic; /* DDF_HEADER_MAGIC */
a322f70c
DW
115 __u32 crc;
116 char guid[DDF_GUID_LEN];
59e36268 117 char revision[8]; /* 01.02.00 */
a322f70c
DW
118 __u32 seq; /* starts at '1' */
119 __u32 timestamp;
120 __u8 openflag;
121 __u8 foreignflag;
122 __u8 enforcegroups;
123 __u8 pad0; /* 0xff */
124 __u8 pad1[12]; /* 12 * 0xff */
125 /* 64 bytes so far */
126 __u8 header_ext[32]; /* reserved: fill with 0xff */
127 __u64 primary_lba;
128 __u64 secondary_lba;
129 __u8 type;
130 __u8 pad2[3]; /* 0xff */
131 __u32 workspace_len; /* sectors for vendor space -
132 * at least 32768(sectors) */
133 __u64 workspace_lba;
134 __u16 max_pd_entries; /* one of 15, 63, 255, 1023, 4095 */
135 __u16 max_vd_entries; /* 2^(4,6,8,10,12)-1 : i.e. as above */
136 __u16 max_partitions; /* i.e. max num of configuration
137 record entries per disk */
138 __u16 config_record_len; /* 1 +ROUNDUP(max_primary_element_entries
139 *12/512) */
140 __u16 max_primary_element_entries; /* 16, 64, 256, 1024, or 4096 */
141 __u8 pad3[54]; /* 0xff */
142 /* 192 bytes so far */
143 __u32 controller_section_offset;
144 __u32 controller_section_length;
145 __u32 phys_section_offset;
146 __u32 phys_section_length;
147 __u32 virt_section_offset;
148 __u32 virt_section_length;
149 __u32 config_section_offset;
150 __u32 config_section_length;
151 __u32 data_section_offset;
152 __u32 data_section_length;
153 __u32 bbm_section_offset;
154 __u32 bbm_section_length;
155 __u32 diag_space_offset;
156 __u32 diag_space_length;
157 __u32 vendor_offset;
158 __u32 vendor_length;
159 /* 256 bytes so far */
160 __u8 pad4[256]; /* 0xff */
161};
162
163/* type field */
164#define DDF_HEADER_ANCHOR 0x00
165#define DDF_HEADER_PRIMARY 0x01
166#define DDF_HEADER_SECONDARY 0x02
167
168/* The content of the 'controller section' - global scope */
169struct ddf_controller_data {
88c164f4 170 __u32 magic; /* DDF_CONTROLLER_MAGIC */
a322f70c
DW
171 __u32 crc;
172 char guid[DDF_GUID_LEN];
173 struct controller_type {
174 __u16 vendor_id;
175 __u16 device_id;
176 __u16 sub_vendor_id;
177 __u16 sub_device_id;
178 } type;
179 char product_id[16];
180 __u8 pad[8]; /* 0xff */
181 __u8 vendor_data[448];
182};
183
184/* The content of phys_section - global scope */
185struct phys_disk {
88c164f4 186 __u32 magic; /* DDF_PHYS_RECORDS_MAGIC */
a322f70c
DW
187 __u32 crc;
188 __u16 used_pdes;
189 __u16 max_pdes;
190 __u8 pad[52];
191 struct phys_disk_entry {
192 char guid[DDF_GUID_LEN];
193 __u32 refnum;
194 __u16 type;
195 __u16 state;
196 __u64 config_size; /* DDF structures must be after here */
197 char path[18]; /* another horrible structure really */
198 __u8 pad[6];
199 } entries[0];
200};
201
202/* phys_disk_entry.type is a bitmap - bigendian remember */
203#define DDF_Forced_PD_GUID 1
204#define DDF_Active_in_VD 2
88c164f4 205#define DDF_Global_Spare 4 /* VD_CONF records are ignored */
a322f70c
DW
206#define DDF_Spare 8 /* overrides Global_spare */
207#define DDF_Foreign 16
208#define DDF_Legacy 32 /* no DDF on this device */
209
210#define DDF_Interface_mask 0xf00
211#define DDF_Interface_SCSI 0x100
212#define DDF_Interface_SAS 0x200
213#define DDF_Interface_SATA 0x300
214#define DDF_Interface_FC 0x400
215
216/* phys_disk_entry.state is a bigendian bitmap */
217#define DDF_Online 1
218#define DDF_Failed 2 /* overrides 1,4,8 */
219#define DDF_Rebuilding 4
220#define DDF_Transition 8
221#define DDF_SMART 16
222#define DDF_ReadErrors 32
223#define DDF_Missing 64
224
225/* The content of the virt_section global scope */
226struct virtual_disk {
88c164f4 227 __u32 magic; /* DDF_VIRT_RECORDS_MAGIC */
a322f70c
DW
228 __u32 crc;
229 __u16 populated_vdes;
230 __u16 max_vdes;
231 __u8 pad[52];
232 struct virtual_entry {
233 char guid[DDF_GUID_LEN];
234 __u16 unit;
235 __u16 pad0; /* 0xffff */
236 __u16 guid_crc;
237 __u16 type;
238 __u8 state;
239 __u8 init_state;
240 __u8 pad1[14];
241 char name[16];
242 } entries[0];
243};
244
245/* virtual_entry.type is a bitmap - bigendian */
246#define DDF_Shared 1
247#define DDF_Enforce_Groups 2
248#define DDF_Unicode 4
249#define DDF_Owner_Valid 8
250
251/* virtual_entry.state is a bigendian bitmap */
252#define DDF_state_mask 0x7
253#define DDF_state_optimal 0x0
254#define DDF_state_degraded 0x1
255#define DDF_state_deleted 0x2
256#define DDF_state_missing 0x3
257#define DDF_state_failed 0x4
7a7cc504 258#define DDF_state_part_optimal 0x5
a322f70c
DW
259
260#define DDF_state_morphing 0x8
261#define DDF_state_inconsistent 0x10
262
263/* virtual_entry.init_state is a bigendian bitmap */
264#define DDF_initstate_mask 0x03
265#define DDF_init_not 0x00
7a7cc504
NB
266#define DDF_init_quick 0x01 /* initialisation is progress.
267 * i.e. 'state_inconsistent' */
a322f70c
DW
268#define DDF_init_full 0x02
269
270#define DDF_access_mask 0xc0
271#define DDF_access_rw 0x00
272#define DDF_access_ro 0x80
273#define DDF_access_blocked 0xc0
274
275/* The content of the config_section - local scope
276 * It has multiple records each config_record_len sectors
277 * They can be vd_config or spare_assign
278 */
279
280struct vd_config {
88c164f4 281 __u32 magic; /* DDF_VD_CONF_MAGIC */
a322f70c
DW
282 __u32 crc;
283 char guid[DDF_GUID_LEN];
284 __u32 timestamp;
285 __u32 seqnum;
286 __u8 pad0[24];
287 __u16 prim_elmnt_count;
288 __u8 chunk_shift; /* 0 == 512, 1==1024 etc */
289 __u8 prl;
290 __u8 rlq;
291 __u8 sec_elmnt_count;
292 __u8 sec_elmnt_seq;
293 __u8 srl;
598f0d58
NB
294 __u64 blocks; /* blocks per component could be different
295 * on different component devices...(only
296 * for concat I hope) */
297 __u64 array_blocks; /* blocks in array */
a322f70c
DW
298 __u8 pad1[8];
299 __u32 spare_refs[8];
300 __u8 cache_pol[8];
301 __u8 bg_rate;
302 __u8 pad2[3];
303 __u8 pad3[52];
304 __u8 pad4[192];
305 __u8 v0[32]; /* reserved- 0xff */
306 __u8 v1[32]; /* reserved- 0xff */
307 __u8 v2[16]; /* reserved- 0xff */
308 __u8 v3[16]; /* reserved- 0xff */
309 __u8 vendor[32];
310 __u32 phys_refnum[0]; /* refnum of each disk in sequence */
311 /*__u64 lba_offset[0]; LBA offset in each phys. Note extents in a
312 bvd are always the same size */
313};
314
315/* vd_config.cache_pol[7] is a bitmap */
316#define DDF_cache_writeback 1 /* else writethrough */
317#define DDF_cache_wadaptive 2 /* only applies if writeback */
318#define DDF_cache_readahead 4
319#define DDF_cache_radaptive 8 /* only if doing read-ahead */
320#define DDF_cache_ifnobatt 16 /* even to write cache if battery is poor */
321#define DDF_cache_wallowed 32 /* enable write caching */
322#define DDF_cache_rallowed 64 /* enable read caching */
323
324struct spare_assign {
88c164f4 325 __u32 magic; /* DDF_SPARE_ASSIGN_MAGIC */
a322f70c
DW
326 __u32 crc;
327 __u32 timestamp;
328 __u8 reserved[7];
329 __u8 type;
330 __u16 populated; /* SAEs used */
331 __u16 max; /* max SAEs */
332 __u8 pad[8];
333 struct spare_assign_entry {
334 char guid[DDF_GUID_LEN];
335 __u16 secondary_element;
336 __u8 pad[6];
337 } spare_ents[0];
338};
339/* spare_assign.type is a bitmap */
340#define DDF_spare_dedicated 0x1 /* else global */
341#define DDF_spare_revertible 0x2 /* else committable */
342#define DDF_spare_active 0x4 /* else not active */
343#define DDF_spare_affinity 0x8 /* enclosure affinity */
344
345/* The data_section contents - local scope */
346struct disk_data {
88c164f4 347 __u32 magic; /* DDF_PHYS_DATA_MAGIC */
a322f70c
DW
348 __u32 crc;
349 char guid[DDF_GUID_LEN];
350 __u32 refnum; /* crc of some magic drive data ... */
351 __u8 forced_ref; /* set when above was not result of magic */
352 __u8 forced_guid; /* set if guid was forced rather than magic */
353 __u8 vendor[32];
354 __u8 pad[442];
355};
356
357/* bbm_section content */
358struct bad_block_log {
359 __u32 magic;
360 __u32 crc;
361 __u16 entry_count;
362 __u32 spare_count;
363 __u8 pad[10];
364 __u64 first_spare;
365 struct mapped_block {
366 __u64 defective_start;
367 __u32 replacement_start;
368 __u16 remap_count;
369 __u8 pad[2];
370 } entries[0];
371};
372
373/* Struct for internally holding ddf structures */
374/* The DDF structure stored on each device is potentially
375 * quite different, as some data is global and some is local.
376 * The global data is:
377 * - ddf header
378 * - controller_data
379 * - Physical disk records
380 * - Virtual disk records
381 * The local data is:
382 * - Configuration records
383 * - Physical Disk data section
384 * ( and Bad block and vendor which I don't care about yet).
385 *
386 * The local data is parsed into separate lists as it is read
387 * and reconstructed for writing. This means that we only need
388 * to make config changes once and they are automatically
389 * propagated to all devices.
390 * Note that the ddf_super has space of the conf and disk data
391 * for this disk and also for a list of all such data.
392 * The list is only used for the superblock that is being
393 * built in Create or Assemble to describe the whole array.
394 */
395struct ddf_super {
6416d527 396 struct ddf_header anchor, primary, secondary;
a322f70c 397 struct ddf_controller_data controller;
6416d527 398 struct ddf_header *active;
a322f70c
DW
399 struct phys_disk *phys;
400 struct virtual_disk *virt;
401 int pdsize, vdsize;
f21e18ca 402 unsigned int max_part, mppe, conf_rec_len;
d2ca6449 403 int currentdev;
18a2f463 404 int updates_pending;
a322f70c 405 struct vcl {
6416d527
NB
406 union {
407 char space[512];
408 struct {
409 struct vcl *next;
410 __u64 *lba_offset; /* location in 'conf' of
411 * the lba table */
f21e18ca 412 unsigned int vcnum; /* index into ->virt */
8ec5d685 413 struct vd_config **other_bvds;
6416d527
NB
414 __u64 *block_sizes; /* NULL if all the same */
415 };
416 };
a322f70c 417 struct vd_config conf;
d2ca6449 418 } *conflist, *currentconf;
a322f70c 419 struct dl {
6416d527
NB
420 union {
421 char space[512];
422 struct {
423 struct dl *next;
424 int major, minor;
425 char *devname;
426 int fd;
427 unsigned long long size; /* sectors */
097bcf00 428 unsigned long long primary_lba; /* sectors */
429 unsigned long long secondary_lba; /* sectors */
430 unsigned long long workspace_lba; /* sectors */
6416d527
NB
431 int pdnum; /* index in ->phys */
432 struct spare_assign *spare;
8592f29d
N
433 void *mdupdate; /* hold metadata update */
434
435 /* These fields used by auto-layout */
436 int raiddisk; /* slot to fill in autolayout */
437 __u64 esize;
6416d527
NB
438 };
439 };
a322f70c 440 struct disk_data disk;
b2280677 441 struct vcl *vlist[0]; /* max_part in size */
2cc2983d 442 } *dlist, *add_list;
a322f70c
DW
443};
444
445#ifndef offsetof
446#define offsetof(t,f) ((size_t)&(((t*)0)->f))
447#endif
448
7d5a7ff3 449#if DEBUG
fb9d0acb 450static int all_ff(const char *guid);
7d5a7ff3 451static void pr_state(struct ddf_super *ddf, const char *msg)
452{
453 unsigned int i;
454 dprintf("%s/%s: ", __func__, msg);
455 for (i = 0; i < __be16_to_cpu(ddf->active->max_vd_entries); i++) {
456 if (all_ff(ddf->virt->entries[i].guid))
457 continue;
458 dprintf("%u(s=%02x i=%02x) ", i,
459 ddf->virt->entries[i].state,
460 ddf->virt->entries[i].init_state);
461 }
462 dprintf("\n");
463}
464#else
465static void pr_state(const struct ddf_super *ddf, const char *msg) {}
466#endif
467
468#define ddf_set_updates_pending(x) \
469 do { (x)->updates_pending = 1; pr_state(x, __func__); } while (0)
470
f21e18ca 471static unsigned int calc_crc(void *buf, int len)
a322f70c
DW
472{
473 /* crcs are always at the same place as in the ddf_header */
474 struct ddf_header *ddf = buf;
475 __u32 oldcrc = ddf->crc;
476 __u32 newcrc;
477 ddf->crc = 0xffffffff;
478
479 newcrc = crc32(0, buf, len);
480 ddf->crc = oldcrc;
4abe6b70
N
481 /* The crc is store (like everything) bigendian, so convert
482 * here for simplicity
483 */
484 return __cpu_to_be32(newcrc);
a322f70c
DW
485}
486
8a2848a7 487static int err_bad_ddf_layout(const struct vd_config *conf)
488{
489 pr_err("DDF RAID %u qualifier %u with %u disks is unsupported\n",
490 conf->prl, conf->rlq, __be16_to_cpu(conf->prim_elmnt_count));
491 return -1;
492}
493
494static int layout_ddf2md(const struct vd_config *conf,
495 mdu_array_info_t *array)
496{
497 int level = LEVEL_UNSUPPORTED;
498 int layout = 0;
499 int raiddisks = __be16_to_cpu(conf->prim_elmnt_count);
500
501 if (conf->sec_elmnt_count > 1) {
502 /* see also check_secondary() */
503 if (conf->prl != DDF_RAID1 ||
504 (conf->srl != DDF_2STRIPED && conf->srl != DDF_2SPANNED)) {
505 pr_err("Unsupported secondary RAID level %u/%u\n",
506 conf->prl, conf->srl);
507 return -1;
508 }
509 if (raiddisks == 2 && conf->rlq == DDF_RAID1_SIMPLE)
510 layout = 0x102;
511 else if (raiddisks == 3 && conf->rlq == DDF_RAID1_MULTI)
512 layout = 0x103;
513 else
514 return err_bad_ddf_layout(conf);
515 raiddisks *= conf->sec_elmnt_count;
516 level = 10;
517 goto good;
518 }
519
520 switch (conf->prl) {
521 case DDF_CONCAT:
522 level = LEVEL_LINEAR;
523 break;
524 case DDF_RAID0:
525 if (conf->rlq != DDF_RAID0_SIMPLE)
526 return err_bad_ddf_layout(conf);
527 level = 0;
528 break;
529 case DDF_RAID1:
530 if (!((conf->rlq == DDF_RAID1_SIMPLE && raiddisks == 2) ||
531 (conf->rlq == DDF_RAID1_MULTI && raiddisks == 3)))
532 return err_bad_ddf_layout(conf);
533 level = 1;
534 break;
535 case DDF_RAID4:
536 if (conf->rlq != DDF_RAID4_N)
537 return err_bad_ddf_layout(conf);
538 level = 4;
539 break;
540 case DDF_RAID5:
541 switch (conf->rlq) {
542 case DDF_RAID5_N_RESTART:
543 layout = ALGORITHM_LEFT_ASYMMETRIC;
544 break;
545 case DDF_RAID5_0_RESTART:
546 layout = ALGORITHM_RIGHT_ASYMMETRIC;
547 break;
548 case DDF_RAID5_N_CONTINUE:
549 layout = ALGORITHM_LEFT_SYMMETRIC;
550 break;
551 default:
552 return err_bad_ddf_layout(conf);
553 }
554 level = 5;
555 break;
556 case DDF_RAID6:
557 switch (conf->rlq) {
558 case DDF_RAID5_N_RESTART:
559 layout = ALGORITHM_ROTATING_N_RESTART;
560 break;
561 case DDF_RAID6_0_RESTART:
562 layout = ALGORITHM_ROTATING_ZERO_RESTART;
563 break;
564 case DDF_RAID5_N_CONTINUE:
565 layout = ALGORITHM_ROTATING_N_CONTINUE;
566 break;
567 default:
568 return err_bad_ddf_layout(conf);
569 }
570 level = 6;
571 break;
572 default:
573 return err_bad_ddf_layout(conf);
574 };
575
576good:
577 array->level = level;
578 array->layout = layout;
579 array->raid_disks = raiddisks;
580 return 0;
581}
582
a322f70c
DW
583static int load_ddf_header(int fd, unsigned long long lba,
584 unsigned long long size,
585 int type,
586 struct ddf_header *hdr, struct ddf_header *anchor)
587{
588 /* read a ddf header (primary or secondary) from fd/lba
589 * and check that it is consistent with anchor
590 * Need to check:
591 * magic, crc, guid, rev, and LBA's header_type, and
592 * everything after header_type must be the same
593 */
594 if (lba >= size-1)
595 return 0;
596
597 if (lseek64(fd, lba<<9, 0) < 0)
598 return 0;
599
600 if (read(fd, hdr, 512) != 512)
601 return 0;
602
603 if (hdr->magic != DDF_HEADER_MAGIC)
604 return 0;
605 if (calc_crc(hdr, 512) != hdr->crc)
606 return 0;
607 if (memcmp(anchor->guid, hdr->guid, DDF_GUID_LEN) != 0 ||
608 memcmp(anchor->revision, hdr->revision, 8) != 0 ||
609 anchor->primary_lba != hdr->primary_lba ||
610 anchor->secondary_lba != hdr->secondary_lba ||
611 hdr->type != type ||
612 memcmp(anchor->pad2, hdr->pad2, 512 -
613 offsetof(struct ddf_header, pad2)) != 0)
614 return 0;
615
616 /* Looks good enough to me... */
617 return 1;
618}
619
620static void *load_section(int fd, struct ddf_super *super, void *buf,
621 __u32 offset_be, __u32 len_be, int check)
622{
623 unsigned long long offset = __be32_to_cpu(offset_be);
624 unsigned long long len = __be32_to_cpu(len_be);
625 int dofree = (buf == NULL);
626
627 if (check)
628 if (len != 2 && len != 8 && len != 32
629 && len != 128 && len != 512)
630 return NULL;
631
632 if (len > 1024)
633 return NULL;
634 if (buf) {
635 /* All pre-allocated sections are a single block */
636 if (len != 1)
637 return NULL;
3d2c4fc7
DW
638 } else if (posix_memalign(&buf, 512, len<<9) != 0)
639 buf = NULL;
6416d527 640
a322f70c
DW
641 if (!buf)
642 return NULL;
643
644 if (super->active->type == 1)
645 offset += __be64_to_cpu(super->active->primary_lba);
646 else
647 offset += __be64_to_cpu(super->active->secondary_lba);
648
f21e18ca 649 if ((unsigned long long)lseek64(fd, offset<<9, 0) != (offset<<9)) {
a322f70c
DW
650 if (dofree)
651 free(buf);
652 return NULL;
653 }
f21e18ca 654 if ((unsigned long long)read(fd, buf, len<<9) != (len<<9)) {
a322f70c
DW
655 if (dofree)
656 free(buf);
657 return NULL;
658 }
659 return buf;
660}
661
662static int load_ddf_headers(int fd, struct ddf_super *super, char *devname)
663{
664 unsigned long long dsize;
665
666 get_dev_size(fd, NULL, &dsize);
667
668 if (lseek64(fd, dsize-512, 0) < 0) {
669 if (devname)
e7b84f9d
N
670 pr_err("Cannot seek to anchor block on %s: %s\n",
671 devname, strerror(errno));
a322f70c
DW
672 return 1;
673 }
674 if (read(fd, &super->anchor, 512) != 512) {
675 if (devname)
e7b84f9d
N
676 pr_err("Cannot read anchor block on %s: %s\n",
677 devname, strerror(errno));
a322f70c
DW
678 return 1;
679 }
680 if (super->anchor.magic != DDF_HEADER_MAGIC) {
681 if (devname)
e7b84f9d 682 pr_err("no DDF anchor found on %s\n",
a322f70c
DW
683 devname);
684 return 2;
685 }
686 if (calc_crc(&super->anchor, 512) != super->anchor.crc) {
687 if (devname)
e7b84f9d 688 pr_err("bad CRC on anchor on %s\n",
a322f70c
DW
689 devname);
690 return 2;
691 }
59e36268
NB
692 if (memcmp(super->anchor.revision, DDF_REVISION_0, 8) != 0 &&
693 memcmp(super->anchor.revision, DDF_REVISION_2, 8) != 0) {
a322f70c 694 if (devname)
e7b84f9d 695 pr_err("can only support super revision"
59e36268
NB
696 " %.8s and earlier, not %.8s on %s\n",
697 DDF_REVISION_2, super->anchor.revision,devname);
a322f70c
DW
698 return 2;
699 }
dbeb699a 700 super->active = NULL;
a322f70c
DW
701 if (load_ddf_header(fd, __be64_to_cpu(super->anchor.primary_lba),
702 dsize >> 9, 1,
703 &super->primary, &super->anchor) == 0) {
704 if (devname)
e7b84f9d
N
705 pr_err("Failed to load primary DDF header "
706 "on %s\n", devname);
dbeb699a 707 } else
708 super->active = &super->primary;
a322f70c
DW
709 if (load_ddf_header(fd, __be64_to_cpu(super->anchor.secondary_lba),
710 dsize >> 9, 2,
711 &super->secondary, &super->anchor)) {
712 if ((__be32_to_cpu(super->primary.seq)
713 < __be32_to_cpu(super->secondary.seq) &&
714 !super->secondary.openflag)
715 || (__be32_to_cpu(super->primary.seq)
716 == __be32_to_cpu(super->secondary.seq) &&
717 super->primary.openflag && !super->secondary.openflag)
dbeb699a 718 || super->active == NULL
a322f70c
DW
719 )
720 super->active = &super->secondary;
dbeb699a 721 } else if (devname)
722 pr_err("Failed to load secondary DDF header on %s\n",
723 devname);
724 if (super->active == NULL)
725 return 2;
a322f70c
DW
726 return 0;
727}
728
729static int load_ddf_global(int fd, struct ddf_super *super, char *devname)
730{
731 void *ok;
732 ok = load_section(fd, super, &super->controller,
733 super->active->controller_section_offset,
734 super->active->controller_section_length,
735 0);
736 super->phys = load_section(fd, super, NULL,
737 super->active->phys_section_offset,
738 super->active->phys_section_length,
739 1);
740 super->pdsize = __be32_to_cpu(super->active->phys_section_length) * 512;
741
742 super->virt = load_section(fd, super, NULL,
743 super->active->virt_section_offset,
744 super->active->virt_section_length,
745 1);
746 super->vdsize = __be32_to_cpu(super->active->virt_section_length) * 512;
747 if (!ok ||
748 !super->phys ||
749 !super->virt) {
750 free(super->phys);
751 free(super->virt);
a2349791
NB
752 super->phys = NULL;
753 super->virt = NULL;
a322f70c
DW
754 return 2;
755 }
756 super->conflist = NULL;
757 super->dlist = NULL;
8c3b8c2c
NB
758
759 super->max_part = __be16_to_cpu(super->active->max_partitions);
760 super->mppe = __be16_to_cpu(super->active->max_primary_element_entries);
761 super->conf_rec_len = __be16_to_cpu(super->active->config_record_len);
a322f70c
DW
762 return 0;
763}
764
3dc821b0 765static void add_other_bvd(struct vcl *vcl, struct vd_config *vd,
766 unsigned int len)
767{
768 int i;
769 for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
770 if (vcl->other_bvds[i] != NULL &&
771 vcl->other_bvds[i]->sec_elmnt_seq == vd->sec_elmnt_seq)
772 break;
773
774 if (i < vcl->conf.sec_elmnt_count-1) {
775 if (vd->seqnum <= vcl->other_bvds[i]->seqnum)
776 return;
777 } else {
778 for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
779 if (vcl->other_bvds[i] == NULL)
780 break;
781 if (i == vcl->conf.sec_elmnt_count-1) {
782 pr_err("no space for sec level config %u, count is %u\n",
783 vd->sec_elmnt_seq, vcl->conf.sec_elmnt_count);
784 return;
785 }
786 if (posix_memalign((void **)&vcl->other_bvds[i], 512, len)
787 != 0) {
788 pr_err("%s could not allocate vd buf\n", __func__);
789 return;
790 }
791 }
792 memcpy(vcl->other_bvds[i], vd, len);
793}
794
a322f70c
DW
795static int load_ddf_local(int fd, struct ddf_super *super,
796 char *devname, int keep)
797{
798 struct dl *dl;
799 struct stat stb;
800 char *conf;
f21e18ca
N
801 unsigned int i;
802 unsigned int confsec;
b2280677 803 int vnum;
f21e18ca 804 unsigned int max_virt_disks = __be16_to_cpu(super->active->max_vd_entries);
d2ca6449 805 unsigned long long dsize;
a322f70c
DW
806
807 /* First the local disk info */
3d2c4fc7 808 if (posix_memalign((void**)&dl, 512,
6416d527 809 sizeof(*dl) +
3d2c4fc7 810 (super->max_part) * sizeof(dl->vlist[0])) != 0) {
e7b84f9d 811 pr_err("%s could not allocate disk info buffer\n",
3d2c4fc7
DW
812 __func__);
813 return 1;
814 }
a322f70c
DW
815
816 load_section(fd, super, &dl->disk,
817 super->active->data_section_offset,
818 super->active->data_section_length,
819 0);
503975b9 820 dl->devname = devname ? xstrdup(devname) : NULL;
598f0d58 821
a322f70c
DW
822 fstat(fd, &stb);
823 dl->major = major(stb.st_rdev);
824 dl->minor = minor(stb.st_rdev);
825 dl->next = super->dlist;
826 dl->fd = keep ? fd : -1;
d2ca6449
NB
827
828 dl->size = 0;
829 if (get_dev_size(fd, devname, &dsize))
830 dl->size = dsize >> 9;
097bcf00 831 /* If the disks have different sizes, the LBAs will differ
832 * between phys disks.
833 * At this point here, the values in super->active must be valid
834 * for this phys disk. */
835 dl->primary_lba = super->active->primary_lba;
836 dl->secondary_lba = super->active->secondary_lba;
837 dl->workspace_lba = super->active->workspace_lba;
b2280677 838 dl->spare = NULL;
f21e18ca 839 for (i = 0 ; i < super->max_part ; i++)
a322f70c
DW
840 dl->vlist[i] = NULL;
841 super->dlist = dl;
59e36268 842 dl->pdnum = -1;
f21e18ca 843 for (i = 0; i < __be16_to_cpu(super->active->max_pd_entries); i++)
5575e7d9
NB
844 if (memcmp(super->phys->entries[i].guid,
845 dl->disk.guid, DDF_GUID_LEN) == 0)
846 dl->pdnum = i;
847
a322f70c
DW
848 /* Now the config list. */
849 /* 'conf' is an array of config entries, some of which are
850 * probably invalid. Those which are good need to be copied into
851 * the conflist
852 */
a322f70c
DW
853
854 conf = load_section(fd, super, NULL,
855 super->active->config_section_offset,
856 super->active->config_section_length,
857 0);
858
b2280677 859 vnum = 0;
e223334f
N
860 for (confsec = 0;
861 confsec < __be32_to_cpu(super->active->config_section_length);
862 confsec += super->conf_rec_len) {
a322f70c 863 struct vd_config *vd =
e223334f 864 (struct vd_config *)((char*)conf + confsec*512);
a322f70c
DW
865 struct vcl *vcl;
866
b2280677
NB
867 if (vd->magic == DDF_SPARE_ASSIGN_MAGIC) {
868 if (dl->spare)
869 continue;
3d2c4fc7
DW
870 if (posix_memalign((void**)&dl->spare, 512,
871 super->conf_rec_len*512) != 0) {
e7b84f9d
N
872 pr_err("%s could not allocate spare info buf\n",
873 __func__);
3d2c4fc7
DW
874 return 1;
875 }
613b0d17 876
b2280677
NB
877 memcpy(dl->spare, vd, super->conf_rec_len*512);
878 continue;
879 }
a322f70c
DW
880 if (vd->magic != DDF_VD_CONF_MAGIC)
881 continue;
882 for (vcl = super->conflist; vcl; vcl = vcl->next) {
883 if (memcmp(vcl->conf.guid,
884 vd->guid, DDF_GUID_LEN) == 0)
885 break;
886 }
887
888 if (vcl) {
b2280677 889 dl->vlist[vnum++] = vcl;
3dc821b0 890 if (vcl->other_bvds != NULL &&
891 vcl->conf.sec_elmnt_seq != vd->sec_elmnt_seq) {
892 add_other_bvd(vcl, vd, super->conf_rec_len*512);
893 continue;
894 }
a322f70c
DW
895 if (__be32_to_cpu(vd->seqnum) <=
896 __be32_to_cpu(vcl->conf.seqnum))
897 continue;
59e36268 898 } else {
3d2c4fc7 899 if (posix_memalign((void**)&vcl, 512,
6416d527 900 (super->conf_rec_len*512 +
3d2c4fc7 901 offsetof(struct vcl, conf))) != 0) {
e7b84f9d
N
902 pr_err("%s could not allocate vcl buf\n",
903 __func__);
3d2c4fc7
DW
904 return 1;
905 }
a322f70c 906 vcl->next = super->conflist;
59e36268 907 vcl->block_sizes = NULL; /* FIXME not for CONCAT */
8ec5d685 908 if (vd->sec_elmnt_count > 1)
909 vcl->other_bvds =
910 xcalloc(vd->sec_elmnt_count - 1,
911 sizeof(struct vd_config *));
912 else
913 vcl->other_bvds = NULL;
a322f70c 914 super->conflist = vcl;
b2280677 915 dl->vlist[vnum++] = vcl;
a322f70c 916 }
8c3b8c2c 917 memcpy(&vcl->conf, vd, super->conf_rec_len*512);
a322f70c 918 vcl->lba_offset = (__u64*)
8c3b8c2c 919 &vcl->conf.phys_refnum[super->mppe];
59e36268
NB
920
921 for (i=0; i < max_virt_disks ; i++)
922 if (memcmp(super->virt->entries[i].guid,
923 vcl->conf.guid, DDF_GUID_LEN)==0)
924 break;
925 if (i < max_virt_disks)
926 vcl->vcnum = i;
a322f70c
DW
927 }
928 free(conf);
929
930 return 0;
931}
932
933#ifndef MDASSEMBLE
934static int load_super_ddf_all(struct supertype *st, int fd,
e1902a7b 935 void **sbp, char *devname);
a322f70c 936#endif
37424f13
DW
937
938static void free_super_ddf(struct supertype *st);
939
a322f70c
DW
940static int load_super_ddf(struct supertype *st, int fd,
941 char *devname)
942{
943 unsigned long long dsize;
944 struct ddf_super *super;
945 int rv;
946
a322f70c
DW
947 if (get_dev_size(fd, devname, &dsize) == 0)
948 return 1;
949
b31df436 950 if (!st->ignore_hw_compat && test_partition(fd))
691c6ee1
N
951 /* DDF is not allowed on partitions */
952 return 1;
953
a322f70c
DW
954 /* 32M is a lower bound */
955 if (dsize <= 32*1024*1024) {
97320d7c 956 if (devname)
e7b84f9d
N
957 pr_err("%s is too small for ddf: "
958 "size is %llu sectors.\n",
959 devname, dsize>>9);
97320d7c 960 return 1;
a322f70c
DW
961 }
962 if (dsize & 511) {
97320d7c 963 if (devname)
e7b84f9d
N
964 pr_err("%s is an odd size for ddf: "
965 "size is %llu bytes.\n",
966 devname, dsize);
97320d7c 967 return 1;
a322f70c
DW
968 }
969
37424f13
DW
970 free_super_ddf(st);
971
6416d527 972 if (posix_memalign((void**)&super, 512, sizeof(*super))!= 0) {
e7b84f9d 973 pr_err("malloc of %zu failed.\n",
a322f70c
DW
974 sizeof(*super));
975 return 1;
976 }
a2349791 977 memset(super, 0, sizeof(*super));
a322f70c
DW
978
979 rv = load_ddf_headers(fd, super, devname);
980 if (rv) {
981 free(super);
982 return rv;
983 }
984
985 /* Have valid headers and have chosen the best. Let's read in the rest*/
986
987 rv = load_ddf_global(fd, super, devname);
988
989 if (rv) {
990 if (devname)
e7b84f9d
N
991 pr_err("Failed to load all information "
992 "sections on %s\n", devname);
a322f70c
DW
993 free(super);
994 return rv;
995 }
996
3d2c4fc7
DW
997 rv = load_ddf_local(fd, super, devname, 0);
998
999 if (rv) {
1000 if (devname)
e7b84f9d
N
1001 pr_err("Failed to load all information "
1002 "sections on %s\n", devname);
3d2c4fc7
DW
1003 free(super);
1004 return rv;
1005 }
a322f70c
DW
1006
1007 /* Should possibly check the sections .... */
1008
1009 st->sb = super;
1010 if (st->ss == NULL) {
1011 st->ss = &super_ddf;
1012 st->minor_version = 0;
1013 st->max_devs = 512;
1014 }
1015 return 0;
1016
1017}
1018
1019static void free_super_ddf(struct supertype *st)
1020{
1021 struct ddf_super *ddf = st->sb;
1022 if (ddf == NULL)
1023 return;
1024 free(ddf->phys);
1025 free(ddf->virt);
1026 while (ddf->conflist) {
1027 struct vcl *v = ddf->conflist;
1028 ddf->conflist = v->next;
59e36268
NB
1029 if (v->block_sizes)
1030 free(v->block_sizes);
3dc821b0 1031 if (v->other_bvds) {
1032 int i;
1033 for (i = 0; i < v->conf.sec_elmnt_count-1; i++)
1034 if (v->other_bvds[i] != NULL)
1035 free(v->other_bvds[i]);
8ec5d685 1036 free(v->other_bvds);
3dc821b0 1037 }
a322f70c
DW
1038 free(v);
1039 }
1040 while (ddf->dlist) {
1041 struct dl *d = ddf->dlist;
1042 ddf->dlist = d->next;
1043 if (d->fd >= 0)
1044 close(d->fd);
b2280677
NB
1045 if (d->spare)
1046 free(d->spare);
a322f70c
DW
1047 free(d);
1048 }
8a38cb04
N
1049 while (ddf->add_list) {
1050 struct dl *d = ddf->add_list;
1051 ddf->add_list = d->next;
1052 if (d->fd >= 0)
1053 close(d->fd);
1054 if (d->spare)
1055 free(d->spare);
1056 free(d);
1057 }
a322f70c
DW
1058 free(ddf);
1059 st->sb = NULL;
1060}
1061
1062static struct supertype *match_metadata_desc_ddf(char *arg)
1063{
1064 /* 'ddf' only support containers */
1065 struct supertype *st;
1066 if (strcmp(arg, "ddf") != 0 &&
1067 strcmp(arg, "default") != 0
1068 )
1069 return NULL;
1070
503975b9 1071 st = xcalloc(1, sizeof(*st));
a322f70c
DW
1072 st->ss = &super_ddf;
1073 st->max_devs = 512;
1074 st->minor_version = 0;
1075 st->sb = NULL;
1076 return st;
1077}
1078
a322f70c
DW
1079#ifndef MDASSEMBLE
1080
1081static mapping_t ddf_state[] = {
1082 { "Optimal", 0},
1083 { "Degraded", 1},
1084 { "Deleted", 2},
1085 { "Missing", 3},
1086 { "Failed", 4},
1087 { "Partially Optimal", 5},
1088 { "-reserved-", 6},
1089 { "-reserved-", 7},
1090 { NULL, 0}
1091};
1092
1093static mapping_t ddf_init_state[] = {
1094 { "Not Initialised", 0},
1095 { "QuickInit in Progress", 1},
1096 { "Fully Initialised", 2},
1097 { "*UNKNOWN*", 3},
1098 { NULL, 0}
1099};
1100static mapping_t ddf_access[] = {
1101 { "Read/Write", 0},
1102 { "Reserved", 1},
1103 { "Read Only", 2},
1104 { "Blocked (no access)", 3},
1105 { NULL ,0}
1106};
1107
1108static mapping_t ddf_level[] = {
1109 { "RAID0", DDF_RAID0},
1110 { "RAID1", DDF_RAID1},
1111 { "RAID3", DDF_RAID3},
1112 { "RAID4", DDF_RAID4},
1113 { "RAID5", DDF_RAID5},
1114 { "RAID1E",DDF_RAID1E},
1115 { "JBOD", DDF_JBOD},
1116 { "CONCAT",DDF_CONCAT},
1117 { "RAID5E",DDF_RAID5E},
1118 { "RAID5EE",DDF_RAID5EE},
1119 { "RAID6", DDF_RAID6},
1120 { NULL, 0}
1121};
1122static mapping_t ddf_sec_level[] = {
1123 { "Striped", DDF_2STRIPED},
1124 { "Mirrored", DDF_2MIRRORED},
1125 { "Concat", DDF_2CONCAT},
1126 { "Spanned", DDF_2SPANNED},
1127 { NULL, 0}
1128};
1129#endif
1130
1131struct num_mapping {
1132 int num1, num2;
1133};
1134static struct num_mapping ddf_level_num[] = {
1135 { DDF_RAID0, 0 },
1136 { DDF_RAID1, 1 },
1137 { DDF_RAID3, LEVEL_UNSUPPORTED },
60f18132
NB
1138 { DDF_RAID4, 4 },
1139 { DDF_RAID5, 5 },
a322f70c
DW
1140 { DDF_RAID1E, LEVEL_UNSUPPORTED },
1141 { DDF_JBOD, LEVEL_UNSUPPORTED },
1142 { DDF_CONCAT, LEVEL_LINEAR },
1143 { DDF_RAID5E, LEVEL_UNSUPPORTED },
1144 { DDF_RAID5EE, LEVEL_UNSUPPORTED },
1145 { DDF_RAID6, 6},
1146 { MAXINT, MAXINT }
1147};
1148
fb9d0acb 1149static int all_ff(const char *guid)
42dc2744
N
1150{
1151 int i;
1152 for (i = 0; i < DDF_GUID_LEN; i++)
1153 if (guid[i] != (char)0xff)
1154 return 0;
1155 return 1;
1156}
1157
a322f70c
DW
1158#ifndef MDASSEMBLE
1159static void print_guid(char *guid, int tstamp)
1160{
1161 /* A GUIDs are part (or all) ASCII and part binary.
1162 * They tend to be space padded.
59e36268
NB
1163 * We print the GUID in HEX, then in parentheses add
1164 * any initial ASCII sequence, and a possible
1165 * time stamp from bytes 16-19
a322f70c
DW
1166 */
1167 int l = DDF_GUID_LEN;
1168 int i;
59e36268
NB
1169
1170 for (i=0 ; i<DDF_GUID_LEN ; i++) {
1171 if ((i&3)==0 && i != 0) printf(":");
1172 printf("%02X", guid[i]&255);
1173 }
1174
cfccea8c 1175 printf("\n (");
a322f70c
DW
1176 while (l && guid[l-1] == ' ')
1177 l--;
1178 for (i=0 ; i<l ; i++) {
1179 if (guid[i] >= 0x20 && guid[i] < 0x7f)
1180 fputc(guid[i], stdout);
1181 else
59e36268 1182 break;
a322f70c
DW
1183 }
1184 if (tstamp) {
1185 time_t then = __be32_to_cpu(*(__u32*)(guid+16)) + DECADE;
1186 char tbuf[100];
1187 struct tm *tm;
1188 tm = localtime(&then);
59e36268 1189 strftime(tbuf, 100, " %D %T",tm);
a322f70c
DW
1190 fputs(tbuf, stdout);
1191 }
59e36268 1192 printf(")");
a322f70c
DW
1193}
1194
1195static void examine_vd(int n, struct ddf_super *sb, char *guid)
1196{
8c3b8c2c 1197 int crl = sb->conf_rec_len;
a322f70c
DW
1198 struct vcl *vcl;
1199
1200 for (vcl = sb->conflist ; vcl ; vcl = vcl->next) {
f21e18ca 1201 unsigned int i;
a322f70c
DW
1202 struct vd_config *vc = &vcl->conf;
1203
1204 if (calc_crc(vc, crl*512) != vc->crc)
1205 continue;
1206 if (memcmp(vc->guid, guid, DDF_GUID_LEN) != 0)
1207 continue;
1208
1209 /* Ok, we know about this VD, let's give more details */
b06e3095 1210 printf(" Raid Devices[%d] : %d (", n,
a322f70c 1211 __be16_to_cpu(vc->prim_elmnt_count));
f21e18ca 1212 for (i = 0; i < __be16_to_cpu(vc->prim_elmnt_count); i++) {
b06e3095
N
1213 int j;
1214 int cnt = __be16_to_cpu(sb->phys->used_pdes);
1215 for (j=0; j<cnt; j++)
1216 if (vc->phys_refnum[i] == sb->phys->entries[j].refnum)
1217 break;
1218 if (i) printf(" ");
1219 if (j < cnt)
1220 printf("%d", j);
1221 else
1222 printf("--");
1223 }
1224 printf(")\n");
1225 if (vc->chunk_shift != 255)
613b0d17
N
1226 printf(" Chunk Size[%d] : %d sectors\n", n,
1227 1 << vc->chunk_shift);
a322f70c
DW
1228 printf(" Raid Level[%d] : %s\n", n,
1229 map_num(ddf_level, vc->prl)?:"-unknown-");
1230 if (vc->sec_elmnt_count != 1) {
1231 printf(" Secondary Position[%d] : %d of %d\n", n,
1232 vc->sec_elmnt_seq, vc->sec_elmnt_count);
1233 printf(" Secondary Level[%d] : %s\n", n,
1234 map_num(ddf_sec_level, vc->srl) ?: "-unknown-");
1235 }
1236 printf(" Device Size[%d] : %llu\n", n,
c9b6907b 1237 (unsigned long long)__be64_to_cpu(vc->blocks)/2);
a322f70c 1238 printf(" Array Size[%d] : %llu\n", n,
c9b6907b 1239 (unsigned long long)__be64_to_cpu(vc->array_blocks)/2);
a322f70c
DW
1240 }
1241}
1242
1243static void examine_vds(struct ddf_super *sb)
1244{
1245 int cnt = __be16_to_cpu(sb->virt->populated_vdes);
fb9d0acb 1246 unsigned int i;
a322f70c
DW
1247 printf(" Virtual Disks : %d\n", cnt);
1248
fb9d0acb 1249 for (i = 0; i < __be16_to_cpu(sb->virt->max_vdes); i++) {
a322f70c 1250 struct virtual_entry *ve = &sb->virt->entries[i];
fb9d0acb 1251 if (all_ff(ve->guid))
1252 continue;
b06e3095 1253 printf("\n");
a322f70c
DW
1254 printf(" VD GUID[%d] : ", i); print_guid(ve->guid, 1);
1255 printf("\n");
1256 printf(" unit[%d] : %d\n", i, __be16_to_cpu(ve->unit));
1257 printf(" state[%d] : %s, %s%s\n", i,
1258 map_num(ddf_state, ve->state & 7),
1259 (ve->state & 8) ? "Morphing, ": "",
1260 (ve->state & 16)? "Not Consistent" : "Consistent");
1261 printf(" init state[%d] : %s\n", i,
1262 map_num(ddf_init_state, ve->init_state&3));
1263 printf(" access[%d] : %s\n", i,
1264 map_num(ddf_access, (ve->init_state>>6) & 3));
1265 printf(" Name[%d] : %.16s\n", i, ve->name);
1266 examine_vd(i, sb, ve->guid);
1267 }
1268 if (cnt) printf("\n");
1269}
1270
1271static void examine_pds(struct ddf_super *sb)
1272{
1273 int cnt = __be16_to_cpu(sb->phys->used_pdes);
1274 int i;
1275 struct dl *dl;
1276 printf(" Physical Disks : %d\n", cnt);
962371a5 1277 printf(" Number RefNo Size Device Type/State\n");
a322f70c
DW
1278
1279 for (i=0 ; i<cnt ; i++) {
1280 struct phys_disk_entry *pd = &sb->phys->entries[i];
1281 int type = __be16_to_cpu(pd->type);
1282 int state = __be16_to_cpu(pd->state);
1283
b06e3095
N
1284 //printf(" PD GUID[%d] : ", i); print_guid(pd->guid, 0);
1285 //printf("\n");
1286 printf(" %3d %08x ", i,
a322f70c 1287 __be32_to_cpu(pd->refnum));
613b0d17 1288 printf("%8lluK ",
c9b6907b 1289 (unsigned long long)__be64_to_cpu(pd->config_size)>>1);
b06e3095
N
1290 for (dl = sb->dlist; dl ; dl = dl->next) {
1291 if (dl->disk.refnum == pd->refnum) {
1292 char *dv = map_dev(dl->major, dl->minor, 0);
1293 if (dv) {
962371a5 1294 printf("%-15s", dv);
b06e3095
N
1295 break;
1296 }
1297 }
1298 }
1299 if (!dl)
962371a5 1300 printf("%15s","");
b06e3095 1301 printf(" %s%s%s%s%s",
a322f70c 1302 (type&2) ? "active":"",
b06e3095 1303 (type&4) ? "Global-Spare":"",
a322f70c
DW
1304 (type&8) ? "spare" : "",
1305 (type&16)? ", foreign" : "",
1306 (type&32)? "pass-through" : "");
18cb4496
N
1307 if (state & DDF_Failed)
1308 /* This over-rides these three */
1309 state &= ~(DDF_Online|DDF_Rebuilding|DDF_Transition);
b06e3095 1310 printf("/%s%s%s%s%s%s%s",
a322f70c
DW
1311 (state&1)? "Online": "Offline",
1312 (state&2)? ", Failed": "",
1313 (state&4)? ", Rebuilding": "",
1314 (state&8)? ", in-transition": "",
b06e3095
N
1315 (state&16)? ", SMART-errors": "",
1316 (state&32)? ", Unrecovered-Read-Errors": "",
a322f70c 1317 (state&64)? ", Missing" : "");
a322f70c
DW
1318 printf("\n");
1319 }
1320}
1321
1322static void examine_super_ddf(struct supertype *st, char *homehost)
1323{
1324 struct ddf_super *sb = st->sb;
1325
1326 printf(" Magic : %08x\n", __be32_to_cpu(sb->anchor.magic));
1327 printf(" Version : %.8s\n", sb->anchor.revision);
598f0d58
NB
1328 printf("Controller GUID : "); print_guid(sb->controller.guid, 0);
1329 printf("\n");
1330 printf(" Container GUID : "); print_guid(sb->anchor.guid, 1);
a322f70c
DW
1331 printf("\n");
1332 printf(" Seq : %08x\n", __be32_to_cpu(sb->active->seq));
1333 printf(" Redundant hdr : %s\n", sb->secondary.magic == DDF_HEADER_MAGIC
1334 ?"yes" : "no");
1335 examine_vds(sb);
1336 examine_pds(sb);
1337}
1338
a5d85af7 1339static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map);
ff54de6e 1340
bedbf68a 1341static void uuid_from_ddf_guid(const char *guid, int uuid[4]);
42dc2744 1342static void uuid_from_super_ddf(struct supertype *st, int uuid[4]);
ff54de6e 1343
bedbf68a 1344static unsigned int get_vd_num_of_subarray(struct supertype *st)
1345{
1346 /*
1347 * Figure out the VD number for this supertype.
1348 * Returns DDF_CONTAINER for the container itself,
1349 * and DDF_NOTFOUND on error.
1350 */
1351 struct ddf_super *ddf = st->sb;
1352 struct mdinfo *sra;
1353 char *sub, *end;
1354 unsigned int vcnum;
1355
1356 if (*st->container_devnm == '\0')
1357 return DDF_CONTAINER;
1358
1359 sra = sysfs_read(-1, st->devnm, GET_VERSION);
1360 if (!sra || sra->array.major_version != -1 ||
1361 sra->array.minor_version != -2 ||
1362 !is_subarray(sra->text_version))
1363 return DDF_NOTFOUND;
1364
1365 sub = strchr(sra->text_version + 1, '/');
1366 if (sub != NULL)
1367 vcnum = strtoul(sub + 1, &end, 10);
1368 if (sub == NULL || *sub == '\0' || *end != '\0' ||
1369 vcnum >= __be16_to_cpu(ddf->active->max_vd_entries))
1370 return DDF_NOTFOUND;
1371
1372 return vcnum;
1373}
1374
061f2c6a 1375static void brief_examine_super_ddf(struct supertype *st, int verbose)
4737ae25
N
1376{
1377 /* We just write a generic DDF ARRAY entry
1378 */
1379 struct mdinfo info;
1380 char nbuf[64];
a5d85af7 1381 getinfo_super_ddf(st, &info, NULL);
4737ae25
N
1382 fname_from_uuid(st, &info, nbuf, ':');
1383
1384 printf("ARRAY metadata=ddf UUID=%s\n", nbuf + 5);
1385}
1386
1387static void brief_examine_subarrays_ddf(struct supertype *st, int verbose)
a322f70c
DW
1388{
1389 /* We just write a generic DDF ARRAY entry
a322f70c 1390 */
42dc2744 1391 struct ddf_super *ddf = st->sb;
ff54de6e 1392 struct mdinfo info;
f21e18ca 1393 unsigned int i;
ff54de6e 1394 char nbuf[64];
a5d85af7 1395 getinfo_super_ddf(st, &info, NULL);
ff54de6e 1396 fname_from_uuid(st, &info, nbuf, ':');
42dc2744 1397
f21e18ca 1398 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
42dc2744
N
1399 struct virtual_entry *ve = &ddf->virt->entries[i];
1400 struct vcl vcl;
1401 char nbuf1[64];
1402 if (all_ff(ve->guid))
1403 continue;
1404 memcpy(vcl.conf.guid, ve->guid, DDF_GUID_LEN);
1405 ddf->currentconf =&vcl;
1406 uuid_from_super_ddf(st, info.uuid);
1407 fname_from_uuid(st, &info, nbuf1, ':');
1408 printf("ARRAY container=%s member=%d UUID=%s\n",
1409 nbuf+5, i, nbuf1+5);
1410 }
a322f70c
DW
1411}
1412
bceedeec
N
1413static void export_examine_super_ddf(struct supertype *st)
1414{
1415 struct mdinfo info;
1416 char nbuf[64];
a5d85af7 1417 getinfo_super_ddf(st, &info, NULL);
bceedeec
N
1418 fname_from_uuid(st, &info, nbuf, ':');
1419 printf("MD_METADATA=ddf\n");
1420 printf("MD_LEVEL=container\n");
1421 printf("MD_UUID=%s\n", nbuf+5);
1422}
bceedeec 1423
74db60b0
N
1424static int copy_metadata_ddf(struct supertype *st, int from, int to)
1425{
1426 void *buf;
1427 unsigned long long dsize, offset;
1428 int bytes;
1429 struct ddf_header *ddf;
1430 int written = 0;
1431
1432 /* The meta consists of an anchor, a primary, and a secondary.
1433 * This all lives at the end of the device.
1434 * So it is easiest to find the earliest of primary and
1435 * secondary, and copy everything from there.
1436 *
1437 * Anchor is 512 from end It contains primary_lba and secondary_lba
1438 * we choose one of those
1439 */
1440
1441 if (posix_memalign(&buf, 4096, 4096) != 0)
1442 return 1;
1443
1444 if (!get_dev_size(from, NULL, &dsize))
1445 goto err;
1446
1447 if (lseek64(from, dsize-512, 0) < 0)
1448 goto err;
1449 if (read(from, buf, 512) != 512)
1450 goto err;
1451 ddf = buf;
1452 if (ddf->magic != DDF_HEADER_MAGIC ||
1453 calc_crc(ddf, 512) != ddf->crc ||
1454 (memcmp(ddf->revision, DDF_REVISION_0, 8) != 0 &&
1455 memcmp(ddf->revision, DDF_REVISION_2, 8) != 0))
1456 goto err;
1457
1458 offset = dsize - 512;
1459 if ((__be64_to_cpu(ddf->primary_lba) << 9) < offset)
1460 offset = __be64_to_cpu(ddf->primary_lba) << 9;
1461 if ((__be64_to_cpu(ddf->secondary_lba) << 9) < offset)
1462 offset = __be64_to_cpu(ddf->secondary_lba) << 9;
1463
1464 bytes = dsize - offset;
1465
1466 if (lseek64(from, offset, 0) < 0 ||
1467 lseek64(to, offset, 0) < 0)
1468 goto err;
1469 while (written < bytes) {
1470 int n = bytes - written;
1471 if (n > 4096)
1472 n = 4096;
1473 if (read(from, buf, n) != n)
1474 goto err;
1475 if (write(to, buf, n) != n)
1476 goto err;
1477 written += n;
1478 }
1479 free(buf);
1480 return 0;
1481err:
1482 free(buf);
1483 return 1;
1484}
1485
a322f70c
DW
1486static void detail_super_ddf(struct supertype *st, char *homehost)
1487{
1488 /* FIXME later
1489 * Could print DDF GUID
1490 * Need to find which array
1491 * If whole, briefly list all arrays
1492 * If one, give name
1493 */
1494}
1495
1496static void brief_detail_super_ddf(struct supertype *st)
1497{
ff54de6e
N
1498 struct mdinfo info;
1499 char nbuf[64];
bedbf68a 1500 struct ddf_super *ddf = st->sb;
1501 unsigned int vcnum = get_vd_num_of_subarray(st);
1502 if (vcnum == DDF_CONTAINER)
1503 uuid_from_super_ddf(st, info.uuid);
1504 else if (vcnum == DDF_NOTFOUND)
1505 return;
1506 else
1507 uuid_from_ddf_guid(ddf->virt->entries[vcnum].guid, info.uuid);
ff54de6e
N
1508 fname_from_uuid(st, &info, nbuf,':');
1509 printf(" UUID=%s", nbuf + 5);
a322f70c 1510}
a322f70c
DW
1511#endif
1512
1513static int match_home_ddf(struct supertype *st, char *homehost)
1514{
1515 /* It matches 'this' host if the controller is a
1516 * Linux-MD controller with vendor_data matching
1517 * the hostname
1518 */
1519 struct ddf_super *ddf = st->sb;
f21e18ca 1520 unsigned int len;
d1d3482b
N
1521
1522 if (!homehost)
1523 return 0;
1524 len = strlen(homehost);
a322f70c
DW
1525
1526 return (memcmp(ddf->controller.guid, T10, 8) == 0 &&
1527 len < sizeof(ddf->controller.vendor_data) &&
1528 memcmp(ddf->controller.vendor_data, homehost,len) == 0 &&
1529 ddf->controller.vendor_data[len] == 0);
1530}
1531
0e600426 1532#ifndef MDASSEMBLE
baba3f4e 1533static int find_index_in_bvd(const struct ddf_super *ddf,
1534 const struct vd_config *conf, unsigned int n,
1535 unsigned int *n_bvd)
1536{
1537 /*
1538 * Find the index of the n-th valid physical disk in this BVD
1539 */
1540 unsigned int i, j;
1541 for (i = 0, j = 0; i < ddf->mppe &&
1542 j < __be16_to_cpu(conf->prim_elmnt_count); i++) {
1543 if (conf->phys_refnum[i] != 0xffffffff) {
1544 if (n == j) {
1545 *n_bvd = i;
1546 return 1;
1547 }
1548 j++;
1549 }
1550 }
1551 dprintf("%s: couldn't find BVD member %u (total %u)\n",
1552 __func__, n, __be16_to_cpu(conf->prim_elmnt_count));
1553 return 0;
1554}
1555
1556static struct vd_config *find_vdcr(struct ddf_super *ddf, unsigned int inst,
1557 unsigned int n,
1558 unsigned int *n_bvd, struct vcl **vcl)
a322f70c 1559{
7a7cc504 1560 struct vcl *v;
59e36268 1561
baba3f4e 1562 for (v = ddf->conflist; v; v = v->next) {
1563 unsigned int nsec, ibvd;
1564 struct vd_config *conf;
1565 if (inst != v->vcnum)
1566 continue;
1567 conf = &v->conf;
1568 if (conf->sec_elmnt_count == 1) {
1569 if (find_index_in_bvd(ddf, conf, n, n_bvd)) {
1570 *vcl = v;
1571 return conf;
1572 } else
1573 goto bad;
1574 }
1575 if (v->other_bvds == NULL) {
1576 pr_err("%s: BUG: other_bvds is NULL, nsec=%u\n",
1577 __func__, conf->sec_elmnt_count);
1578 goto bad;
1579 }
1580 nsec = n / __be16_to_cpu(conf->prim_elmnt_count);
1581 if (conf->sec_elmnt_seq != nsec) {
1582 for (ibvd = 1; ibvd < conf->sec_elmnt_count; ibvd++) {
1583 if (v->other_bvds[ibvd-1] == NULL)
1584 continue;
1585 if (v->other_bvds[ibvd-1]->sec_elmnt_seq
1586 == nsec)
1587 break;
1588 }
1589 if (ibvd == conf->sec_elmnt_count)
1590 goto bad;
1591 conf = v->other_bvds[ibvd-1];
1592 }
1593 if (!find_index_in_bvd(ddf, conf,
1594 n - nsec*conf->sec_elmnt_count, n_bvd))
1595 goto bad;
1596 dprintf("%s: found disk %u as member %u in bvd %d of array %u\n"
1597 , __func__, n, *n_bvd, ibvd-1, inst);
1598 *vcl = v;
1599 return conf;
1600 }
1601bad:
1602 pr_err("%s: Could't find disk %d in array %u\n", __func__, n, inst);
7a7cc504
NB
1603 return NULL;
1604}
0e600426 1605#endif
7a7cc504 1606
5ec636b7 1607static int find_phys(const struct ddf_super *ddf, __u32 phys_refnum)
7a7cc504
NB
1608{
1609 /* Find the entry in phys_disk which has the given refnum
1610 * and return it's index
1611 */
f21e18ca
N
1612 unsigned int i;
1613 for (i = 0; i < __be16_to_cpu(ddf->phys->max_pdes); i++)
7a7cc504
NB
1614 if (ddf->phys->entries[i].refnum == phys_refnum)
1615 return i;
1616 return -1;
a322f70c
DW
1617}
1618
bedbf68a 1619static void uuid_from_ddf_guid(const char *guid, int uuid[4])
1620{
1621 char buf[20];
1622 struct sha1_ctx ctx;
1623 sha1_init_ctx(&ctx);
1624 sha1_process_bytes(guid, DDF_GUID_LEN, &ctx);
1625 sha1_finish_ctx(&ctx, buf);
1626 memcpy(uuid, buf, 4*4);
1627}
1628
a322f70c
DW
1629static void uuid_from_super_ddf(struct supertype *st, int uuid[4])
1630{
1631 /* The uuid returned here is used for:
1632 * uuid to put into bitmap file (Create, Grow)
1633 * uuid for backup header when saving critical section (Grow)
1634 * comparing uuids when re-adding a device into an array
51006d85
N
1635 * In these cases the uuid required is that of the data-array,
1636 * not the device-set.
1637 * uuid to recognise same set when adding a missing device back
1638 * to an array. This is a uuid for the device-set.
613b0d17 1639 *
a322f70c
DW
1640 * For each of these we can make do with a truncated
1641 * or hashed uuid rather than the original, as long as
1642 * everyone agrees.
a322f70c
DW
1643 * In the case of SVD we assume the BVD is of interest,
1644 * though that might be the case if a bitmap were made for
1645 * a mirrored SVD - worry about that later.
1646 * So we need to find the VD configuration record for the
1647 * relevant BVD and extract the GUID and Secondary_Element_Seq.
1648 * The first 16 bytes of the sha1 of these is used.
1649 */
1650 struct ddf_super *ddf = st->sb;
d2ca6449 1651 struct vcl *vcl = ddf->currentconf;
c5afc314 1652 char *guid;
a322f70c 1653
c5afc314
N
1654 if (vcl)
1655 guid = vcl->conf.guid;
1656 else
1657 guid = ddf->anchor.guid;
bedbf68a 1658 uuid_from_ddf_guid(guid, uuid);
a322f70c
DW
1659}
1660
a5d85af7 1661static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map);
78e44928 1662
a5d85af7 1663static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map)
a322f70c
DW
1664{
1665 struct ddf_super *ddf = st->sb;
a5d85af7 1666 int map_disks = info->array.raid_disks;
90fa1a29 1667 __u32 *cptr;
a322f70c 1668
78e44928 1669 if (ddf->currentconf) {
a5d85af7 1670 getinfo_super_ddf_bvd(st, info, map);
78e44928
NB
1671 return;
1672 }
95eeceeb 1673 memset(info, 0, sizeof(*info));
78e44928 1674
a322f70c
DW
1675 info->array.raid_disks = __be16_to_cpu(ddf->phys->used_pdes);
1676 info->array.level = LEVEL_CONTAINER;
1677 info->array.layout = 0;
1678 info->array.md_minor = -1;
90fa1a29
JS
1679 cptr = (__u32 *)(ddf->anchor.guid + 16);
1680 info->array.ctime = DECADE + __be32_to_cpu(*cptr);
1681
a322f70c
DW
1682 info->array.utime = 0;
1683 info->array.chunk_size = 0;
510242aa 1684 info->container_enough = 1;
a322f70c 1685
a322f70c
DW
1686 info->disk.major = 0;
1687 info->disk.minor = 0;
cba0191b
NB
1688 if (ddf->dlist) {
1689 info->disk.number = __be32_to_cpu(ddf->dlist->disk.refnum);
59e36268 1690 info->disk.raid_disk = find_phys(ddf, ddf->dlist->disk.refnum);
d2ca6449
NB
1691
1692 info->data_offset = __be64_to_cpu(ddf->phys->
613b0d17
N
1693 entries[info->disk.raid_disk].
1694 config_size);
d2ca6449 1695 info->component_size = ddf->dlist->size - info->data_offset;
cba0191b
NB
1696 } else {
1697 info->disk.number = -1;
661dce36 1698 info->disk.raid_disk = -1;
cba0191b
NB
1699// info->disk.raid_disk = find refnum in the table and use index;
1700 }
f22385f9 1701 info->disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
a19c88b8 1702
921d9e16 1703 info->recovery_start = MaxSector;
a19c88b8 1704 info->reshape_active = 0;
6e75048b 1705 info->recovery_blocked = 0;
c5afc314 1706 info->name[0] = 0;
a322f70c 1707
f35f2525
N
1708 info->array.major_version = -1;
1709 info->array.minor_version = -2;
159c3a1a 1710 strcpy(info->text_version, "ddf");
a67dd8cc 1711 info->safe_mode_delay = 0;
159c3a1a 1712
c5afc314 1713 uuid_from_super_ddf(st, info->uuid);
a322f70c 1714
a5d85af7
N
1715 if (map) {
1716 int i;
1717 for (i = 0 ; i < map_disks; i++) {
1718 if (i < info->array.raid_disks &&
1719 (__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Online) &&
1720 !(__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Failed))
1721 map[i] = 1;
1722 else
1723 map[i] = 0;
1724 }
1725 }
a322f70c
DW
1726}
1727
a5d85af7 1728static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map)
a322f70c
DW
1729{
1730 struct ddf_super *ddf = st->sb;
d2ca6449
NB
1731 struct vcl *vc = ddf->currentconf;
1732 int cd = ddf->currentdev;
db42fa9b 1733 int j;
8592f29d 1734 struct dl *dl;
a5d85af7 1735 int map_disks = info->array.raid_disks;
90fa1a29 1736 __u32 *cptr;
a322f70c 1737
95eeceeb 1738 memset(info, 0, sizeof(*info));
8a2848a7 1739 if (layout_ddf2md(&vc->conf, &info->array) == -1)
1740 return;
a322f70c 1741 info->array.md_minor = -1;
90fa1a29
JS
1742 cptr = (__u32 *)(vc->conf.guid + 16);
1743 info->array.ctime = DECADE + __be32_to_cpu(*cptr);
d2ca6449
NB
1744 info->array.utime = DECADE + __be32_to_cpu(vc->conf.timestamp);
1745 info->array.chunk_size = 512 << vc->conf.chunk_shift;
da9b4a62 1746 info->custom_array_size = 0;
d2ca6449 1747
f21e18ca 1748 if (cd >= 0 && (unsigned)cd < ddf->mppe) {
d2ca6449
NB
1749 info->data_offset = __be64_to_cpu(vc->lba_offset[cd]);
1750 if (vc->block_sizes)
1751 info->component_size = vc->block_sizes[cd];
1752 else
1753 info->component_size = __be64_to_cpu(vc->conf.blocks);
1754 }
a322f70c 1755
fb204fb2
N
1756 for (dl = ddf->dlist; dl ; dl = dl->next)
1757 if (dl->raiddisk == ddf->currentdev)
1758 break;
1759
a322f70c
DW
1760 info->disk.major = 0;
1761 info->disk.minor = 0;
fb204fb2 1762 info->disk.state = 0;
8592f29d
N
1763 if (dl) {
1764 info->disk.major = dl->major;
1765 info->disk.minor = dl->minor;
fb204fb2
N
1766 info->disk.raid_disk = dl->raiddisk;
1767 info->disk.number = dl->pdnum;
1768 info->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
8592f29d 1769 }
a322f70c 1770
103f2410
NB
1771 info->container_member = ddf->currentconf->vcnum;
1772
921d9e16 1773 info->recovery_start = MaxSector;
80d26cb2 1774 info->resync_start = 0;
624c5ad4 1775 info->reshape_active = 0;
6e75048b 1776 info->recovery_blocked = 0;
80d26cb2
NB
1777 if (!(ddf->virt->entries[info->container_member].state
1778 & DDF_state_inconsistent) &&
1779 (ddf->virt->entries[info->container_member].init_state
1780 & DDF_initstate_mask)
1781 == DDF_init_full)
b7528a20 1782 info->resync_start = MaxSector;
80d26cb2 1783
a322f70c
DW
1784 uuid_from_super_ddf(st, info->uuid);
1785
f35f2525
N
1786 info->array.major_version = -1;
1787 info->array.minor_version = -2;
9b63e648 1788 sprintf(info->text_version, "/%s/%d",
4dd2df09 1789 st->container_devnm,
9b63e648 1790 info->container_member);
a67dd8cc 1791 info->safe_mode_delay = 200;
159c3a1a 1792
db42fa9b
N
1793 memcpy(info->name, ddf->virt->entries[info->container_member].name, 16);
1794 info->name[16]=0;
1795 for(j=0; j<16; j++)
1796 if (info->name[j] == ' ')
1797 info->name[j] = 0;
a5d85af7
N
1798
1799 if (map)
1800 for (j = 0; j < map_disks; j++) {
1801 map[j] = 0;
1802 if (j < info->array.raid_disks) {
1803 int i = find_phys(ddf, vc->conf.phys_refnum[j]);
613b0d17 1804 if (i >= 0 &&
a5d85af7
N
1805 (__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Online) &&
1806 !(__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Failed))
1807 map[i] = 1;
1808 }
1809 }
a322f70c
DW
1810}
1811
1812static int update_super_ddf(struct supertype *st, struct mdinfo *info,
1813 char *update,
1814 char *devname, int verbose,
1815 int uuid_set, char *homehost)
1816{
1817 /* For 'assemble' and 'force' we need to return non-zero if any
1818 * change was made. For others, the return value is ignored.
1819 * Update options are:
1820 * force-one : This device looks a bit old but needs to be included,
1821 * update age info appropriately.
1822 * assemble: clear any 'faulty' flag to allow this device to
1823 * be assembled.
1824 * force-array: Array is degraded but being forced, mark it clean
1825 * if that will be needed to assemble it.
1826 *
1827 * newdev: not used ????
1828 * grow: Array has gained a new device - this is currently for
1829 * linear only
1830 * resync: mark as dirty so a resync will happen.
59e36268 1831 * uuid: Change the uuid of the array to match what is given
a322f70c
DW
1832 * homehost: update the recorded homehost
1833 * name: update the name - preserving the homehost
1834 * _reshape_progress: record new reshape_progress position.
1835 *
1836 * Following are not relevant for this version:
1837 * sparc2.2 : update from old dodgey metadata
1838 * super-minor: change the preferred_minor number
1839 * summaries: update redundant counters.
1840 */
1841 int rv = 0;
1842// struct ddf_super *ddf = st->sb;
7a7cc504 1843// struct vd_config *vd = find_vdcr(ddf, info->container_member);
a322f70c
DW
1844// struct virtual_entry *ve = find_ve(ddf);
1845
a322f70c
DW
1846 /* we don't need to handle "force-*" or "assemble" as
1847 * there is no need to 'trick' the kernel. We the metadata is
1848 * first updated to activate the array, all the implied modifications
1849 * will just happen.
1850 */
1851
1852 if (strcmp(update, "grow") == 0) {
1853 /* FIXME */
1e2b2765 1854 } else if (strcmp(update, "resync") == 0) {
a322f70c 1855// info->resync_checkpoint = 0;
1e2b2765 1856 } else if (strcmp(update, "homehost") == 0) {
a322f70c
DW
1857 /* homehost is stored in controller->vendor_data,
1858 * or it is when we are the vendor
1859 */
1860// if (info->vendor_is_local)
1861// strcpy(ddf->controller.vendor_data, homehost);
1e2b2765 1862 rv = -1;
f49208ec 1863 } else if (strcmp(update, "name") == 0) {
a322f70c
DW
1864 /* name is stored in virtual_entry->name */
1865// memset(ve->name, ' ', 16);
1866// strncpy(ve->name, info->name, 16);
1e2b2765 1867 rv = -1;
f49208ec 1868 } else if (strcmp(update, "_reshape_progress") == 0) {
a322f70c 1869 /* We don't support reshape yet */
f49208ec
N
1870 } else if (strcmp(update, "assemble") == 0 ) {
1871 /* Do nothing, just succeed */
1872 rv = 0;
1e2b2765
N
1873 } else
1874 rv = -1;
a322f70c
DW
1875
1876// update_all_csum(ddf);
1877
1878 return rv;
1879}
1880
5f8097be
NB
1881static void make_header_guid(char *guid)
1882{
1883 __u32 stamp;
5f8097be
NB
1884 /* Create a DDF Header of Virtual Disk GUID */
1885
1886 /* 24 bytes of fiction required.
1887 * first 8 are a 'vendor-id' - "Linux-MD"
1888 * next 8 are controller type.. how about 0X DEAD BEEF 0000 0000
1889 * Remaining 8 random number plus timestamp
1890 */
1891 memcpy(guid, T10, sizeof(T10));
1892 stamp = __cpu_to_be32(0xdeadbeef);
1893 memcpy(guid+8, &stamp, 4);
1894 stamp = __cpu_to_be32(0);
1895 memcpy(guid+12, &stamp, 4);
1896 stamp = __cpu_to_be32(time(0) - DECADE);
1897 memcpy(guid+16, &stamp, 4);
bfb7ea78 1898 stamp = random32();
5f8097be 1899 memcpy(guid+20, &stamp, 4);
5f8097be 1900}
59e36268 1901
fb9d0acb 1902static unsigned int find_unused_vde(const struct ddf_super *ddf)
1903{
1904 unsigned int i;
1905 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
1906 if (all_ff(ddf->virt->entries[i].guid))
1907 return i;
1908 }
1909 return DDF_NOTFOUND;
1910}
1911
1912static unsigned int find_vde_by_name(const struct ddf_super *ddf,
1913 const char *name)
1914{
1915 unsigned int i;
1916 if (name == NULL)
1917 return DDF_NOTFOUND;
1918 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
1919 if (all_ff(ddf->virt->entries[i].guid))
1920 continue;
1921 if (!strncmp(name, ddf->virt->entries[i].name,
1922 sizeof(ddf->virt->entries[i].name)))
1923 return i;
1924 }
1925 return DDF_NOTFOUND;
1926}
1927
1928static unsigned int find_vde_by_guid(const struct ddf_super *ddf,
1929 const char *guid)
1930{
1931 unsigned int i;
1932 if (guid == NULL || all_ff(guid))
1933 return DDF_NOTFOUND;
1934 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++)
1935 if (!memcmp(ddf->virt->entries[i].guid, guid, DDF_GUID_LEN))
1936 return i;
1937 return DDF_NOTFOUND;
1938}
1939
78e44928
NB
1940static int init_super_ddf_bvd(struct supertype *st,
1941 mdu_array_info_t *info,
1942 unsigned long long size,
1943 char *name, char *homehost,
83cd1e97 1944 int *uuid, unsigned long long data_offset);
78e44928 1945
a322f70c
DW
1946static int init_super_ddf(struct supertype *st,
1947 mdu_array_info_t *info,
1948 unsigned long long size, char *name, char *homehost,
83cd1e97 1949 int *uuid, unsigned long long data_offset)
a322f70c
DW
1950{
1951 /* This is primarily called by Create when creating a new array.
1952 * We will then get add_to_super called for each component, and then
1953 * write_init_super called to write it out to each device.
1954 * For DDF, Create can create on fresh devices or on a pre-existing
1955 * array.
1956 * To create on a pre-existing array a different method will be called.
1957 * This one is just for fresh drives.
1958 *
1959 * We need to create the entire 'ddf' structure which includes:
1960 * DDF headers - these are easy.
1961 * Controller data - a Sector describing this controller .. not that
1962 * this is a controller exactly.
1963 * Physical Disk Record - one entry per device, so
1964 * leave plenty of space.
1965 * Virtual Disk Records - again, just leave plenty of space.
1966 * This just lists VDs, doesn't give details
1967 * Config records - describes the VDs that use this disk
1968 * DiskData - describes 'this' device.
1969 * BadBlockManagement - empty
1970 * Diag Space - empty
1971 * Vendor Logs - Could we put bitmaps here?
1972 *
1973 */
1974 struct ddf_super *ddf;
1975 char hostname[17];
1976 int hostlen;
a322f70c
DW
1977 int max_phys_disks, max_virt_disks;
1978 unsigned long long sector;
1979 int clen;
1980 int i;
1981 int pdsize, vdsize;
1982 struct phys_disk *pd;
1983 struct virtual_disk *vd;
1984
83cd1e97 1985 if (data_offset != INVALID_SECTORS) {
ed503f89 1986 pr_err("data-offset not supported by DDF\n");
83cd1e97
N
1987 return 0;
1988 }
1989
78e44928 1990 if (st->sb)
83cd1e97
N
1991 return init_super_ddf_bvd(st, info, size, name, homehost, uuid,
1992 data_offset);
ba7eb04f 1993
3d2c4fc7 1994 if (posix_memalign((void**)&ddf, 512, sizeof(*ddf)) != 0) {
e7b84f9d 1995 pr_err("%s could not allocate superblock\n", __func__);
3d2c4fc7
DW
1996 return 0;
1997 }
6264b437 1998 memset(ddf, 0, sizeof(*ddf));
a322f70c
DW
1999 ddf->dlist = NULL; /* no physical disks yet */
2000 ddf->conflist = NULL; /* No virtual disks yet */
955e9ea1
DW
2001 st->sb = ddf;
2002
2003 if (info == NULL) {
2004 /* zeroing superblock */
2005 return 0;
2006 }
a322f70c
DW
2007
2008 /* At least 32MB *must* be reserved for the ddf. So let's just
2009 * start 32MB from the end, and put the primary header there.
2010 * Don't do secondary for now.
2011 * We don't know exactly where that will be yet as it could be
2012 * different on each device. To just set up the lengths.
2013 *
2014 */
2015
2016 ddf->anchor.magic = DDF_HEADER_MAGIC;
5f8097be 2017 make_header_guid(ddf->anchor.guid);
a322f70c 2018
59e36268 2019 memcpy(ddf->anchor.revision, DDF_REVISION_2, 8);
a322f70c
DW
2020 ddf->anchor.seq = __cpu_to_be32(1);
2021 ddf->anchor.timestamp = __cpu_to_be32(time(0) - DECADE);
2022 ddf->anchor.openflag = 0xFF;
2023 ddf->anchor.foreignflag = 0;
2024 ddf->anchor.enforcegroups = 0; /* Is this best?? */
2025 ddf->anchor.pad0 = 0xff;
2026 memset(ddf->anchor.pad1, 0xff, 12);
2027 memset(ddf->anchor.header_ext, 0xff, 32);
2028 ddf->anchor.primary_lba = ~(__u64)0;
2029 ddf->anchor.secondary_lba = ~(__u64)0;
2030 ddf->anchor.type = DDF_HEADER_ANCHOR;
2031 memset(ddf->anchor.pad2, 0xff, 3);
2032 ddf->anchor.workspace_len = __cpu_to_be32(32768); /* Must be reserved */
2033 ddf->anchor.workspace_lba = ~(__u64)0; /* Put this at bottom
2034 of 32M reserved.. */
2035 max_phys_disks = 1023; /* Should be enough */
2036 ddf->anchor.max_pd_entries = __cpu_to_be16(max_phys_disks);
2037 max_virt_disks = 255;
2038 ddf->anchor.max_vd_entries = __cpu_to_be16(max_virt_disks); /* ?? */
2039 ddf->anchor.max_partitions = __cpu_to_be16(64); /* ?? */
2040 ddf->max_part = 64;
8c3b8c2c 2041 ddf->mppe = 256;
59e36268
NB
2042 ddf->conf_rec_len = 1 + ROUND_UP(ddf->mppe * (4+8), 512)/512;
2043 ddf->anchor.config_record_len = __cpu_to_be16(ddf->conf_rec_len);
2044 ddf->anchor.max_primary_element_entries = __cpu_to_be16(ddf->mppe);
a322f70c 2045 memset(ddf->anchor.pad3, 0xff, 54);
a322f70c
DW
2046 /* controller sections is one sector long immediately
2047 * after the ddf header */
2048 sector = 1;
2049 ddf->anchor.controller_section_offset = __cpu_to_be32(sector);
2050 ddf->anchor.controller_section_length = __cpu_to_be32(1);
2051 sector += 1;
2052
2053 /* phys is 8 sectors after that */
2054 pdsize = ROUND_UP(sizeof(struct phys_disk) +
2055 sizeof(struct phys_disk_entry)*max_phys_disks,
2056 512);
2057 switch(pdsize/512) {
2058 case 2: case 8: case 32: case 128: case 512: break;
2059 default: abort();
2060 }
2061 ddf->anchor.phys_section_offset = __cpu_to_be32(sector);
2062 ddf->anchor.phys_section_length =
2063 __cpu_to_be32(pdsize/512); /* max_primary_element_entries/8 */
2064 sector += pdsize/512;
2065
2066 /* virt is another 32 sectors */
2067 vdsize = ROUND_UP(sizeof(struct virtual_disk) +
2068 sizeof(struct virtual_entry) * max_virt_disks,
2069 512);
2070 switch(vdsize/512) {
2071 case 2: case 8: case 32: case 128: case 512: break;
2072 default: abort();
2073 }
2074 ddf->anchor.virt_section_offset = __cpu_to_be32(sector);
2075 ddf->anchor.virt_section_length =
2076 __cpu_to_be32(vdsize/512); /* max_vd_entries/8 */
2077 sector += vdsize/512;
2078
59e36268 2079 clen = ddf->conf_rec_len * (ddf->max_part+1);
a322f70c
DW
2080 ddf->anchor.config_section_offset = __cpu_to_be32(sector);
2081 ddf->anchor.config_section_length = __cpu_to_be32(clen);
2082 sector += clen;
2083
2084 ddf->anchor.data_section_offset = __cpu_to_be32(sector);
2085 ddf->anchor.data_section_length = __cpu_to_be32(1);
2086 sector += 1;
2087
2088 ddf->anchor.bbm_section_length = __cpu_to_be32(0);
2089 ddf->anchor.bbm_section_offset = __cpu_to_be32(0xFFFFFFFF);
2090 ddf->anchor.diag_space_length = __cpu_to_be32(0);
2091 ddf->anchor.diag_space_offset = __cpu_to_be32(0xFFFFFFFF);
2092 ddf->anchor.vendor_length = __cpu_to_be32(0);
2093 ddf->anchor.vendor_offset = __cpu_to_be32(0xFFFFFFFF);
2094
2095 memset(ddf->anchor.pad4, 0xff, 256);
2096
2097 memcpy(&ddf->primary, &ddf->anchor, 512);
2098 memcpy(&ddf->secondary, &ddf->anchor, 512);
2099
2100 ddf->primary.openflag = 1; /* I guess.. */
2101 ddf->primary.type = DDF_HEADER_PRIMARY;
2102
2103 ddf->secondary.openflag = 1; /* I guess.. */
2104 ddf->secondary.type = DDF_HEADER_SECONDARY;
2105
2106 ddf->active = &ddf->primary;
2107
2108 ddf->controller.magic = DDF_CONTROLLER_MAGIC;
2109
2110 /* 24 more bytes of fiction required.
2111 * first 8 are a 'vendor-id' - "Linux-MD"
2112 * Remaining 16 are serial number.... maybe a hostname would do?
2113 */
2114 memcpy(ddf->controller.guid, T10, sizeof(T10));
1ba6bff9
DW
2115 gethostname(hostname, sizeof(hostname));
2116 hostname[sizeof(hostname) - 1] = 0;
a322f70c
DW
2117 hostlen = strlen(hostname);
2118 memcpy(ddf->controller.guid + 24 - hostlen, hostname, hostlen);
2119 for (i = strlen(T10) ; i+hostlen < 24; i++)
2120 ddf->controller.guid[i] = ' ';
2121
2122 ddf->controller.type.vendor_id = __cpu_to_be16(0xDEAD);
2123 ddf->controller.type.device_id = __cpu_to_be16(0xBEEF);
2124 ddf->controller.type.sub_vendor_id = 0;
2125 ddf->controller.type.sub_device_id = 0;
2126 memcpy(ddf->controller.product_id, "What Is My PID??", 16);
2127 memset(ddf->controller.pad, 0xff, 8);
2128 memset(ddf->controller.vendor_data, 0xff, 448);
a9e1c11d
N
2129 if (homehost && strlen(homehost) < 440)
2130 strcpy((char*)ddf->controller.vendor_data, homehost);
a322f70c 2131
3d2c4fc7 2132 if (posix_memalign((void**)&pd, 512, pdsize) != 0) {
e7b84f9d 2133 pr_err("%s could not allocate pd\n", __func__);
3d2c4fc7
DW
2134 return 0;
2135 }
6416d527 2136 ddf->phys = pd;
a322f70c
DW
2137 ddf->pdsize = pdsize;
2138
2139 memset(pd, 0xff, pdsize);
2140 memset(pd, 0, sizeof(*pd));
076515ba 2141 pd->magic = DDF_PHYS_RECORDS_MAGIC;
a322f70c
DW
2142 pd->used_pdes = __cpu_to_be16(0);
2143 pd->max_pdes = __cpu_to_be16(max_phys_disks);
2144 memset(pd->pad, 0xff, 52);
2145
3d2c4fc7 2146 if (posix_memalign((void**)&vd, 512, vdsize) != 0) {
e7b84f9d 2147 pr_err("%s could not allocate vd\n", __func__);
3d2c4fc7
DW
2148 return 0;
2149 }
6416d527 2150 ddf->virt = vd;
a322f70c
DW
2151 ddf->vdsize = vdsize;
2152 memset(vd, 0, vdsize);
2153 vd->magic = DDF_VIRT_RECORDS_MAGIC;
2154 vd->populated_vdes = __cpu_to_be16(0);
2155 vd->max_vdes = __cpu_to_be16(max_virt_disks);
2156 memset(vd->pad, 0xff, 52);
2157
5f8097be
NB
2158 for (i=0; i<max_virt_disks; i++)
2159 memset(&vd->entries[i], 0xff, sizeof(struct virtual_entry));
2160
a322f70c 2161 st->sb = ddf;
7d5a7ff3 2162 ddf_set_updates_pending(ddf);
a322f70c
DW
2163 return 1;
2164}
2165
5f8097be
NB
2166static int chunk_to_shift(int chunksize)
2167{
2168 return ffs(chunksize/512)-1;
2169}
2170
2171static int level_to_prl(int level)
2172{
2173 switch (level) {
2174 case LEVEL_LINEAR: return DDF_CONCAT;
2175 case 0: return DDF_RAID0;
2176 case 1: return DDF_RAID1;
2177 case 4: return DDF_RAID4;
2178 case 5: return DDF_RAID5;
2179 case 6: return DDF_RAID6;
2180 default: return -1;
2181 }
2182}
613b0d17 2183
5f8097be
NB
2184static int layout_to_rlq(int level, int layout, int raiddisks)
2185{
2186 switch(level) {
2187 case 0:
2188 return DDF_RAID0_SIMPLE;
2189 case 1:
2190 switch(raiddisks) {
2191 case 2: return DDF_RAID1_SIMPLE;
2192 case 3: return DDF_RAID1_MULTI;
2193 default: return -1;
2194 }
2195 case 4:
2196 switch(layout) {
2197 case 0: return DDF_RAID4_N;
2198 }
2199 break;
2200 case 5:
5f8097be
NB
2201 switch(layout) {
2202 case ALGORITHM_LEFT_ASYMMETRIC:
2203 return DDF_RAID5_N_RESTART;
2204 case ALGORITHM_RIGHT_ASYMMETRIC:
b640a252 2205 return DDF_RAID5_0_RESTART;
5f8097be
NB
2206 case ALGORITHM_LEFT_SYMMETRIC:
2207 return DDF_RAID5_N_CONTINUE;
2208 case ALGORITHM_RIGHT_SYMMETRIC:
2209 return -1; /* not mentioned in standard */
2210 }
b640a252
N
2211 case 6:
2212 switch(layout) {
2213 case ALGORITHM_ROTATING_N_RESTART:
2214 return DDF_RAID5_N_RESTART;
2215 case ALGORITHM_ROTATING_ZERO_RESTART:
2216 return DDF_RAID6_0_RESTART;
2217 case ALGORITHM_ROTATING_N_CONTINUE:
2218 return DDF_RAID5_N_CONTINUE;
2219 }
5f8097be
NB
2220 }
2221 return -1;
2222}
2223
0e600426 2224#ifndef MDASSEMBLE
59e36268
NB
2225struct extent {
2226 unsigned long long start, size;
2227};
78e44928 2228static int cmp_extent(const void *av, const void *bv)
59e36268
NB
2229{
2230 const struct extent *a = av;
2231 const struct extent *b = bv;
2232 if (a->start < b->start)
2233 return -1;
2234 if (a->start > b->start)
2235 return 1;
2236 return 0;
2237}
2238
78e44928 2239static struct extent *get_extents(struct ddf_super *ddf, struct dl *dl)
59e36268
NB
2240{
2241 /* find a list of used extents on the give physical device
2242 * (dnum) of the given ddf.
2243 * Return a malloced array of 'struct extent'
2244
613b0d17 2245 * FIXME ignore DDF_Legacy devices?
59e36268
NB
2246
2247 */
2248 struct extent *rv;
2249 int n = 0;
f21e18ca 2250 unsigned int i, j;
59e36268 2251
503975b9 2252 rv = xmalloc(sizeof(struct extent) * (ddf->max_part + 2));
59e36268
NB
2253
2254 for (i = 0; i < ddf->max_part; i++) {
2255 struct vcl *v = dl->vlist[i];
2256 if (v == NULL)
2257 continue;
f21e18ca 2258 for (j = 0; j < v->conf.prim_elmnt_count; j++)
59e36268
NB
2259 if (v->conf.phys_refnum[j] == dl->disk.refnum) {
2260 /* This device plays role 'j' in 'v'. */
2261 rv[n].start = __be64_to_cpu(v->lba_offset[j]);
2262 rv[n].size = __be64_to_cpu(v->conf.blocks);
2263 n++;
2264 break;
2265 }
2266 }
2267 qsort(rv, n, sizeof(*rv), cmp_extent);
2268
2269 rv[n].start = __be64_to_cpu(ddf->phys->entries[dl->pdnum].config_size);
2270 rv[n].size = 0;
2271 return rv;
2272}
0e600426 2273#endif
59e36268 2274
5f8097be
NB
2275static int init_super_ddf_bvd(struct supertype *st,
2276 mdu_array_info_t *info,
2277 unsigned long long size,
2278 char *name, char *homehost,
83cd1e97 2279 int *uuid, unsigned long long data_offset)
5f8097be
NB
2280{
2281 /* We are creating a BVD inside a pre-existing container.
2282 * so st->sb is already set.
2283 * We need to create a new vd_config and a new virtual_entry
2284 */
2285 struct ddf_super *ddf = st->sb;
f21e18ca 2286 unsigned int venum;
5f8097be
NB
2287 struct virtual_entry *ve;
2288 struct vcl *vcl;
2289 struct vd_config *vc;
5f8097be 2290
fb9d0acb 2291 if (find_vde_by_name(ddf, name) != DDF_NOTFOUND) {
2292 pr_err("This ddf already has an array called %s\n", name);
5f8097be
NB
2293 return 0;
2294 }
fb9d0acb 2295 venum = find_unused_vde(ddf);
2296 if (venum == DDF_NOTFOUND) {
2297 pr_err("Cannot find spare slot for virtual disk\n");
5f8097be
NB
2298 return 0;
2299 }
2300 ve = &ddf->virt->entries[venum];
2301
2302 /* A Virtual Disk GUID contains the T10 Vendor ID, controller type,
2303 * timestamp, random number
2304 */
2305 make_header_guid(ve->guid);
2306 ve->unit = __cpu_to_be16(info->md_minor);
2307 ve->pad0 = 0xFFFF;
2308 ve->guid_crc = crc32(0, (unsigned char*)ddf->anchor.guid, DDF_GUID_LEN);
2309 ve->type = 0;
7a7cc504
NB
2310 ve->state = DDF_state_degraded; /* Will be modified as devices are added */
2311 if (info->state & 1) /* clean */
2312 ve->init_state = DDF_init_full;
2313 else
2314 ve->init_state = DDF_init_not;
2315
5f8097be
NB
2316 memset(ve->pad1, 0xff, 14);
2317 memset(ve->name, ' ', 16);
2318 if (name)
2319 strncpy(ve->name, name, 16);
2320 ddf->virt->populated_vdes =
2321 __cpu_to_be16(__be16_to_cpu(ddf->virt->populated_vdes)+1);
2322
2323 /* Now create a new vd_config */
3d2c4fc7
DW
2324 if (posix_memalign((void**)&vcl, 512,
2325 (offsetof(struct vcl, conf) + ddf->conf_rec_len * 512)) != 0) {
e7b84f9d 2326 pr_err("%s could not allocate vd_config\n", __func__);
3d2c4fc7
DW
2327 return 0;
2328 }
8c3b8c2c 2329 vcl->lba_offset = (__u64*) &vcl->conf.phys_refnum[ddf->mppe];
59e36268
NB
2330 vcl->vcnum = venum;
2331 vcl->block_sizes = NULL; /* FIXME not for CONCAT */
8ec5d685 2332 vcl->other_bvds = NULL;
5f8097be
NB
2333
2334 vc = &vcl->conf;
2335
2336 vc->magic = DDF_VD_CONF_MAGIC;
2337 memcpy(vc->guid, ve->guid, DDF_GUID_LEN);
2338 vc->timestamp = __cpu_to_be32(time(0)-DECADE);
2339 vc->seqnum = __cpu_to_be32(1);
2340 memset(vc->pad0, 0xff, 24);
2341 vc->prim_elmnt_count = __cpu_to_be16(info->raid_disks);
2342 vc->chunk_shift = chunk_to_shift(info->chunk_size);
2343 vc->prl = level_to_prl(info->level);
2344 vc->rlq = layout_to_rlq(info->level, info->layout, info->raid_disks);
2345 vc->sec_elmnt_count = 1;
2346 vc->sec_elmnt_seq = 0;
2347 vc->srl = 0;
2348 vc->blocks = __cpu_to_be64(info->size * 2);
2349 vc->array_blocks = __cpu_to_be64(
2350 calc_array_size(info->level, info->raid_disks, info->layout,
2351 info->chunk_size, info->size*2));
2352 memset(vc->pad1, 0xff, 8);
2353 vc->spare_refs[0] = 0xffffffff;
2354 vc->spare_refs[1] = 0xffffffff;
2355 vc->spare_refs[2] = 0xffffffff;
2356 vc->spare_refs[3] = 0xffffffff;
2357 vc->spare_refs[4] = 0xffffffff;
2358 vc->spare_refs[5] = 0xffffffff;
2359 vc->spare_refs[6] = 0xffffffff;
2360 vc->spare_refs[7] = 0xffffffff;
2361 memset(vc->cache_pol, 0, 8);
2362 vc->bg_rate = 0x80;
2363 memset(vc->pad2, 0xff, 3);
2364 memset(vc->pad3, 0xff, 52);
2365 memset(vc->pad4, 0xff, 192);
2366 memset(vc->v0, 0xff, 32);
2367 memset(vc->v1, 0xff, 32);
2368 memset(vc->v2, 0xff, 16);
2369 memset(vc->v3, 0xff, 16);
2370 memset(vc->vendor, 0xff, 32);
598f0d58 2371
8c3b8c2c 2372 memset(vc->phys_refnum, 0xff, 4*ddf->mppe);
e5a2a3cf 2373 memset(vc->phys_refnum+ddf->mppe, 0x00, 8*ddf->mppe);
5f8097be
NB
2374
2375 vcl->next = ddf->conflist;
2376 ddf->conflist = vcl;
d2ca6449 2377 ddf->currentconf = vcl;
7d5a7ff3 2378 ddf_set_updates_pending(ddf);
5f8097be
NB
2379 return 1;
2380}
2381
0e600426 2382#ifndef MDASSEMBLE
5f8097be
NB
2383static void add_to_super_ddf_bvd(struct supertype *st,
2384 mdu_disk_info_t *dk, int fd, char *devname)
2385{
2386 /* fd and devname identify a device with-in the ddf container (st).
2387 * dk identifies a location in the new BVD.
2388 * We need to find suitable free space in that device and update
2389 * the phys_refnum and lba_offset for the newly created vd_config.
2390 * We might also want to update the type in the phys_disk
5575e7d9 2391 * section.
8592f29d
N
2392 *
2393 * Alternately: fd == -1 and we have already chosen which device to
2394 * use and recorded in dlist->raid_disk;
5f8097be
NB
2395 */
2396 struct dl *dl;
2397 struct ddf_super *ddf = st->sb;
2398 struct vd_config *vc;
2399 __u64 *lba_offset;
f21e18ca
N
2400 unsigned int working;
2401 unsigned int i;
59e36268
NB
2402 unsigned long long blocks, pos, esize;
2403 struct extent *ex;
5f8097be 2404
8592f29d
N
2405 if (fd == -1) {
2406 for (dl = ddf->dlist; dl ; dl = dl->next)
2407 if (dl->raiddisk == dk->raid_disk)
2408 break;
2409 } else {
2410 for (dl = ddf->dlist; dl ; dl = dl->next)
2411 if (dl->major == dk->major &&
2412 dl->minor == dk->minor)
2413 break;
2414 }
5f8097be
NB
2415 if (!dl || ! (dk->state & (1<<MD_DISK_SYNC)))
2416 return;
2417
d2ca6449
NB
2418 vc = &ddf->currentconf->conf;
2419 lba_offset = ddf->currentconf->lba_offset;
59e36268
NB
2420
2421 ex = get_extents(ddf, dl);
2422 if (!ex)
2423 return;
2424
2425 i = 0; pos = 0;
2426 blocks = __be64_to_cpu(vc->blocks);
d2ca6449
NB
2427 if (ddf->currentconf->block_sizes)
2428 blocks = ddf->currentconf->block_sizes[dk->raid_disk];
59e36268
NB
2429
2430 do {
2431 esize = ex[i].start - pos;
2432 if (esize >= blocks)
2433 break;
2434 pos = ex[i].start + ex[i].size;
2435 i++;
2436 } while (ex[i-1].size);
2437
2438 free(ex);
2439 if (esize < blocks)
2440 return;
2441
d2ca6449 2442 ddf->currentdev = dk->raid_disk;
5f8097be 2443 vc->phys_refnum[dk->raid_disk] = dl->disk.refnum;
59e36268 2444 lba_offset[dk->raid_disk] = __cpu_to_be64(pos);
5f8097be 2445
f21e18ca 2446 for (i = 0; i < ddf->max_part ; i++)
5575e7d9
NB
2447 if (dl->vlist[i] == NULL)
2448 break;
2449 if (i == ddf->max_part)
2450 return;
d2ca6449 2451 dl->vlist[i] = ddf->currentconf;
5f8097be 2452
8592f29d
N
2453 if (fd >= 0)
2454 dl->fd = fd;
2455 if (devname)
2456 dl->devname = devname;
7a7cc504
NB
2457
2458 /* Check how many working raid_disks, and if we can mark
2459 * array as optimal yet
2460 */
2461 working = 0;
5575e7d9 2462
f21e18ca 2463 for (i = 0; i < __be16_to_cpu(vc->prim_elmnt_count); i++)
7a7cc504
NB
2464 if (vc->phys_refnum[i] != 0xffffffff)
2465 working++;
59e36268 2466
5575e7d9 2467 /* Find which virtual_entry */
d2ca6449 2468 i = ddf->currentconf->vcnum;
7a7cc504 2469 if (working == __be16_to_cpu(vc->prim_elmnt_count))
5575e7d9
NB
2470 ddf->virt->entries[i].state =
2471 (ddf->virt->entries[i].state & ~DDF_state_mask)
7a7cc504
NB
2472 | DDF_state_optimal;
2473
2474 if (vc->prl == DDF_RAID6 &&
2475 working+1 == __be16_to_cpu(vc->prim_elmnt_count))
5575e7d9
NB
2476 ddf->virt->entries[i].state =
2477 (ddf->virt->entries[i].state & ~DDF_state_mask)
7a7cc504 2478 | DDF_state_part_optimal;
5575e7d9
NB
2479
2480 ddf->phys->entries[dl->pdnum].type &= ~__cpu_to_be16(DDF_Global_Spare);
2481 ddf->phys->entries[dl->pdnum].type |= __cpu_to_be16(DDF_Active_in_VD);
7d5a7ff3 2482 ddf_set_updates_pending(ddf);
5f8097be
NB
2483}
2484
a322f70c
DW
2485/* add a device to a container, either while creating it or while
2486 * expanding a pre-existing container
2487 */
f20c3968 2488static int add_to_super_ddf(struct supertype *st,
72ca9bcf
N
2489 mdu_disk_info_t *dk, int fd, char *devname,
2490 unsigned long long data_offset)
a322f70c
DW
2491{
2492 struct ddf_super *ddf = st->sb;
2493 struct dl *dd;
2494 time_t now;
2495 struct tm *tm;
2496 unsigned long long size;
2497 struct phys_disk_entry *pde;
f21e18ca 2498 unsigned int n, i;
a322f70c 2499 struct stat stb;
90fa1a29 2500 __u32 *tptr;
a322f70c 2501
78e44928
NB
2502 if (ddf->currentconf) {
2503 add_to_super_ddf_bvd(st, dk, fd, devname);
f20c3968 2504 return 0;
78e44928
NB
2505 }
2506
a322f70c
DW
2507 /* This is device numbered dk->number. We need to create
2508 * a phys_disk entry and a more detailed disk_data entry.
2509 */
2510 fstat(fd, &stb);
3d2c4fc7
DW
2511 if (posix_memalign((void**)&dd, 512,
2512 sizeof(*dd) + sizeof(dd->vlist[0]) * ddf->max_part) != 0) {
e7b84f9d
N
2513 pr_err("%s could allocate buffer for new disk, aborting\n",
2514 __func__);
f20c3968 2515 return 1;
3d2c4fc7 2516 }
a322f70c
DW
2517 dd->major = major(stb.st_rdev);
2518 dd->minor = minor(stb.st_rdev);
2519 dd->devname = devname;
a322f70c 2520 dd->fd = fd;
b2280677 2521 dd->spare = NULL;
a322f70c
DW
2522
2523 dd->disk.magic = DDF_PHYS_DATA_MAGIC;
2524 now = time(0);
2525 tm = localtime(&now);
2526 sprintf(dd->disk.guid, "%8s%04d%02d%02d",
2527 T10, tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday);
90fa1a29
JS
2528 tptr = (__u32 *)(dd->disk.guid + 16);
2529 *tptr++ = random32();
2530 *tptr = random32();
a322f70c 2531
59e36268
NB
2532 do {
2533 /* Cannot be bothered finding a CRC of some irrelevant details*/
bfb7ea78 2534 dd->disk.refnum = random32();
f21e18ca
N
2535 for (i = __be16_to_cpu(ddf->active->max_pd_entries);
2536 i > 0; i--)
2537 if (ddf->phys->entries[i-1].refnum == dd->disk.refnum)
59e36268 2538 break;
f21e18ca 2539 } while (i > 0);
59e36268 2540
a322f70c
DW
2541 dd->disk.forced_ref = 1;
2542 dd->disk.forced_guid = 1;
2543 memset(dd->disk.vendor, ' ', 32);
2544 memcpy(dd->disk.vendor, "Linux", 5);
2545 memset(dd->disk.pad, 0xff, 442);
b2280677 2546 for (i = 0; i < ddf->max_part ; i++)
a322f70c
DW
2547 dd->vlist[i] = NULL;
2548
2549 n = __be16_to_cpu(ddf->phys->used_pdes);
2550 pde = &ddf->phys->entries[n];
5575e7d9
NB
2551 dd->pdnum = n;
2552
2cc2983d
N
2553 if (st->update_tail) {
2554 int len = (sizeof(struct phys_disk) +
2555 sizeof(struct phys_disk_entry));
2556 struct phys_disk *pd;
2557
503975b9 2558 pd = xmalloc(len);
2cc2983d
N
2559 pd->magic = DDF_PHYS_RECORDS_MAGIC;
2560 pd->used_pdes = __cpu_to_be16(n);
2561 pde = &pd->entries[0];
2562 dd->mdupdate = pd;
2563 } else {
2564 n++;
2565 ddf->phys->used_pdes = __cpu_to_be16(n);
2566 }
a322f70c
DW
2567
2568 memcpy(pde->guid, dd->disk.guid, DDF_GUID_LEN);
2569 pde->refnum = dd->disk.refnum;
5575e7d9 2570 pde->type = __cpu_to_be16(DDF_Forced_PD_GUID | DDF_Global_Spare);
a322f70c
DW
2571 pde->state = __cpu_to_be16(DDF_Online);
2572 get_dev_size(fd, NULL, &size);
2573 /* We are required to reserve 32Meg, and record the size in sectors */
2574 pde->config_size = __cpu_to_be64( (size - 32*1024*1024) / 512);
2575 sprintf(pde->path, "%17.17s","Information: nil") ;
2576 memset(pde->pad, 0xff, 6);
2577
d2ca6449 2578 dd->size = size >> 9;
2cc2983d
N
2579 if (st->update_tail) {
2580 dd->next = ddf->add_list;
2581 ddf->add_list = dd;
2582 } else {
2583 dd->next = ddf->dlist;
2584 ddf->dlist = dd;
7d5a7ff3 2585 ddf_set_updates_pending(ddf);
2cc2983d 2586 }
f20c3968
DW
2587
2588 return 0;
a322f70c
DW
2589}
2590
4dd968cc
N
2591static int remove_from_super_ddf(struct supertype *st, mdu_disk_info_t *dk)
2592{
2593 struct ddf_super *ddf = st->sb;
2594 struct dl *dl;
2595
2596 /* mdmon has noticed that this disk (dk->major/dk->minor) has
2597 * disappeared from the container.
2598 * We need to arrange that it disappears from the metadata and
2599 * internal data structures too.
2600 * Most of the work is done by ddf_process_update which edits
2601 * the metadata and closes the file handle and attaches the memory
2602 * where free_updates will free it.
2603 */
2604 for (dl = ddf->dlist; dl ; dl = dl->next)
2605 if (dl->major == dk->major &&
2606 dl->minor == dk->minor)
2607 break;
2608 if (!dl)
2609 return -1;
2610
2611 if (st->update_tail) {
2612 int len = (sizeof(struct phys_disk) +
2613 sizeof(struct phys_disk_entry));
2614 struct phys_disk *pd;
2615
503975b9 2616 pd = xmalloc(len);
4dd968cc
N
2617 pd->magic = DDF_PHYS_RECORDS_MAGIC;
2618 pd->used_pdes = __cpu_to_be16(dl->pdnum);
2619 pd->entries[0].state = __cpu_to_be16(DDF_Missing);
2620 append_metadata_update(st, pd, len);
2621 }
2622 return 0;
2623}
2624
a322f70c
DW
2625/*
2626 * This is the write_init_super method for a ddf container. It is
2627 * called when creating a container or adding another device to a
2628 * container.
2629 */
42d5dfd9 2630#define NULL_CONF_SZ 4096
18a2f463 2631
e3c2a365 2632static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
2633 __u32 refnum, unsigned int nmax,
2634 const struct vd_config **bvd,
2635 unsigned int *idx);
2636
7f798aca 2637static int __write_ddf_structure(struct dl *d, struct ddf_super *ddf, __u8 type,
2638 char *null_aligned)
a322f70c 2639{
7f798aca 2640 unsigned long long sector;
2641 struct ddf_header *header;
2642 int fd, i, n_config, conf_size;
a4057a88 2643 int ret = 0;
7f798aca 2644
2645 fd = d->fd;
2646
2647 switch (type) {
2648 case DDF_HEADER_PRIMARY:
2649 header = &ddf->primary;
2650 sector = __be64_to_cpu(header->primary_lba);
2651 break;
2652 case DDF_HEADER_SECONDARY:
2653 header = &ddf->secondary;
2654 sector = __be64_to_cpu(header->secondary_lba);
2655 break;
2656 default:
2657 return 0;
2658 }
2659
2660 header->type = type;
a4057a88 2661 header->openflag = 1;
7f798aca 2662 header->crc = calc_crc(header, 512);
2663
2664 lseek64(fd, sector<<9, 0);
2665 if (write(fd, header, 512) < 0)
a4057a88 2666 goto out;
7f798aca 2667
2668 ddf->controller.crc = calc_crc(&ddf->controller, 512);
2669 if (write(fd, &ddf->controller, 512) < 0)
a4057a88 2670 goto out;
a322f70c 2671
7f798aca 2672 ddf->phys->crc = calc_crc(ddf->phys, ddf->pdsize);
2673 if (write(fd, ddf->phys, ddf->pdsize) < 0)
a4057a88 2674 goto out;
7f798aca 2675 ddf->virt->crc = calc_crc(ddf->virt, ddf->vdsize);
2676 if (write(fd, ddf->virt, ddf->vdsize) < 0)
a4057a88 2677 goto out;
7f798aca 2678
2679 /* Now write lots of config records. */
2680 n_config = ddf->max_part;
2681 conf_size = ddf->conf_rec_len * 512;
2682 for (i = 0 ; i <= n_config ; i++) {
e3c2a365 2683 struct vcl *c;
2684 struct vd_config *vdc = NULL;
2685 if (i == n_config) {
7f798aca 2686 c = (struct vcl *)d->spare;
e3c2a365 2687 if (c)
2688 vdc = &c->conf;
2689 } else {
2690 unsigned int dummy;
2691 c = d->vlist[i];
2692 if (c)
2693 get_pd_index_from_refnum(
2694 c, d->disk.refnum,
2695 ddf->mppe,
2696 (const struct vd_config **)&vdc,
2697 &dummy);
2698 }
7f798aca 2699 if (c) {
dacf3dc5 2700 vdc->seqnum = header->seq;
e3c2a365 2701 vdc->crc = calc_crc(vdc, conf_size);
2702 if (write(fd, vdc, conf_size) < 0)
7f798aca 2703 break;
2704 } else {
2705 unsigned int togo = conf_size;
2706 while (togo > NULL_CONF_SZ) {
2707 if (write(fd, null_aligned, NULL_CONF_SZ) < 0)
2708 break;
2709 togo -= NULL_CONF_SZ;
2710 }
2711 if (write(fd, null_aligned, togo) < 0)
2712 break;
2713 }
2714 }
2715 if (i <= n_config)
a4057a88 2716 goto out;
7f798aca 2717
2718 d->disk.crc = calc_crc(&d->disk, 512);
2719 if (write(fd, &d->disk, 512) < 0)
a4057a88 2720 goto out;
7f798aca 2721
a4057a88 2722 ret = 1;
2723out:
2724 header->openflag = 0;
2725 header->crc = calc_crc(header, 512);
2726
2727 lseek64(fd, sector<<9, 0);
2728 if (write(fd, header, 512) < 0)
2729 ret = 0;
2730
2731 return ret;
7f798aca 2732}
2733
2734static int __write_init_super_ddf(struct supertype *st)
2735{
a322f70c 2736 struct ddf_super *ddf = st->sb;
a322f70c 2737 struct dl *d;
175593bf
DW
2738 int attempts = 0;
2739 int successes = 0;
7f798aca 2740 unsigned long long size;
42d5dfd9 2741 char *null_aligned;
0175cbf6 2742 __u32 seq;
42d5dfd9 2743
7d5a7ff3 2744 pr_state(ddf, __func__);
42d5dfd9
JS
2745 if (posix_memalign((void**)&null_aligned, 4096, NULL_CONF_SZ) != 0) {
2746 return -ENOMEM;
2747 }
2748 memset(null_aligned, 0xff, NULL_CONF_SZ);
a322f70c 2749
dc9e279c 2750 seq = ddf->active->seq + 1;
0175cbf6 2751
175593bf
DW
2752 /* try to write updated metadata,
2753 * if we catch a failure move on to the next disk
2754 */
a322f70c
DW
2755 for (d = ddf->dlist; d; d=d->next) {
2756 int fd = d->fd;
2757
2758 if (fd < 0)
2759 continue;
2760
175593bf 2761 attempts++;
a322f70c
DW
2762 /* We need to fill in the primary, (secondary) and workspace
2763 * lba's in the headers, set their checksums,
2764 * Also checksum phys, virt....
2765 *
2766 * Then write everything out, finally the anchor is written.
2767 */
2768 get_dev_size(fd, NULL, &size);
2769 size /= 512;
097bcf00 2770 if (d->workspace_lba != 0)
2771 ddf->anchor.workspace_lba = d->workspace_lba;
2772 else
2773 ddf->anchor.workspace_lba =
2774 __cpu_to_be64(size - 32*1024*2);
2775 if (d->primary_lba != 0)
2776 ddf->anchor.primary_lba = d->primary_lba;
2777 else
2778 ddf->anchor.primary_lba =
2779 __cpu_to_be64(size - 16*1024*2);
2780 if (d->secondary_lba != 0)
2781 ddf->anchor.secondary_lba = d->secondary_lba;
2782 else
2783 ddf->anchor.secondary_lba =
2784 __cpu_to_be64(size - 32*1024*2);
0175cbf6 2785 ddf->anchor.seq = seq;
a322f70c
DW
2786 memcpy(&ddf->primary, &ddf->anchor, 512);
2787 memcpy(&ddf->secondary, &ddf->anchor, 512);
2788
2789 ddf->anchor.openflag = 0xFF; /* 'open' means nothing */
2790 ddf->anchor.seq = 0xFFFFFFFF; /* no sequencing in anchor */
2791 ddf->anchor.crc = calc_crc(&ddf->anchor, 512);
2792
7f798aca 2793 if (!__write_ddf_structure(d, ddf, DDF_HEADER_PRIMARY,
2794 null_aligned))
175593bf 2795 continue;
a322f70c 2796
7f798aca 2797 if (!__write_ddf_structure(d, ddf, DDF_HEADER_SECONDARY,
2798 null_aligned))
175593bf 2799 continue;
a322f70c 2800
a322f70c 2801 lseek64(fd, (size-1)*512, SEEK_SET);
175593bf
DW
2802 if (write(fd, &ddf->anchor, 512) < 0)
2803 continue;
2804 successes++;
2805 }
42d5dfd9 2806 free(null_aligned);
175593bf 2807
175593bf 2808 return attempts != successes;
a322f70c 2809}
7a7cc504
NB
2810
2811static int write_init_super_ddf(struct supertype *st)
2812{
9b1fb677
DW
2813 struct ddf_super *ddf = st->sb;
2814 struct vcl *currentconf = ddf->currentconf;
2815
2816 /* we are done with currentconf reset it to point st at the container */
2817 ddf->currentconf = NULL;
edd8d13c
NB
2818
2819 if (st->update_tail) {
2820 /* queue the virtual_disk and vd_config as metadata updates */
2821 struct virtual_disk *vd;
2822 struct vd_config *vc;
edd8d13c
NB
2823 int len;
2824
9b1fb677 2825 if (!currentconf) {
2cc2983d
N
2826 int len = (sizeof(struct phys_disk) +
2827 sizeof(struct phys_disk_entry));
2828
2829 /* adding a disk to the container. */
2830 if (!ddf->add_list)
2831 return 0;
2832
2833 append_metadata_update(st, ddf->add_list->mdupdate, len);
2834 ddf->add_list->mdupdate = NULL;
2835 return 0;
2836 }
2837
2838 /* Newly created VD */
2839
edd8d13c
NB
2840 /* First the virtual disk. We have a slightly fake header */
2841 len = sizeof(struct virtual_disk) + sizeof(struct virtual_entry);
503975b9 2842 vd = xmalloc(len);
edd8d13c 2843 *vd = *ddf->virt;
9b1fb677
DW
2844 vd->entries[0] = ddf->virt->entries[currentconf->vcnum];
2845 vd->populated_vdes = __cpu_to_be16(currentconf->vcnum);
edd8d13c
NB
2846 append_metadata_update(st, vd, len);
2847
2848 /* Then the vd_config */
2849 len = ddf->conf_rec_len * 512;
503975b9 2850 vc = xmalloc(len);
9b1fb677 2851 memcpy(vc, &currentconf->conf, len);
edd8d13c
NB
2852 append_metadata_update(st, vc, len);
2853
2854 /* FIXME I need to close the fds! */
2855 return 0;
613b0d17 2856 } else {
d682f344
N
2857 struct dl *d;
2858 for (d = ddf->dlist; d; d=d->next)
ba728be7 2859 while (Kill(d->devname, NULL, 0, -1, 1) == 0);
1cc7f4fe 2860 return __write_init_super_ddf(st);
d682f344 2861 }
7a7cc504
NB
2862}
2863
a322f70c
DW
2864#endif
2865
387fcd59
N
2866static __u64 avail_size_ddf(struct supertype *st, __u64 devsize,
2867 unsigned long long data_offset)
a322f70c
DW
2868{
2869 /* We must reserve the last 32Meg */
2870 if (devsize <= 32*1024*2)
2871 return 0;
2872 return devsize - 32*1024*2;
2873}
2874
2875#ifndef MDASSEMBLE
8592f29d
N
2876
2877static int reserve_space(struct supertype *st, int raiddisks,
2878 unsigned long long size, int chunk,
2879 unsigned long long *freesize)
2880{
2881 /* Find 'raiddisks' spare extents at least 'size' big (but
2882 * only caring about multiples of 'chunk') and remember
2883 * them.
2884 * If the cannot be found, fail.
2885 */
2886 struct dl *dl;
2887 struct ddf_super *ddf = st->sb;
2888 int cnt = 0;
2889
2890 for (dl = ddf->dlist; dl ; dl=dl->next) {
613b0d17 2891 dl->raiddisk = -1;
8592f29d
N
2892 dl->esize = 0;
2893 }
2894 /* Now find largest extent on each device */
2895 for (dl = ddf->dlist ; dl ; dl=dl->next) {
2896 struct extent *e = get_extents(ddf, dl);
2897 unsigned long long pos = 0;
2898 int i = 0;
2899 int found = 0;
2900 unsigned long long minsize = size;
2901
2902 if (size == 0)
2903 minsize = chunk;
2904
2905 if (!e)
2906 continue;
2907 do {
2908 unsigned long long esize;
2909 esize = e[i].start - pos;
2910 if (esize >= minsize) {
2911 found = 1;
2912 minsize = esize;
2913 }
2914 pos = e[i].start + e[i].size;
2915 i++;
2916 } while (e[i-1].size);
2917 if (found) {
2918 cnt++;
2919 dl->esize = minsize;
2920 }
2921 free(e);
2922 }
2923 if (cnt < raiddisks) {
e7b84f9d 2924 pr_err("not enough devices with space to create array.\n");
8592f29d
N
2925 return 0; /* No enough free spaces large enough */
2926 }
2927 if (size == 0) {
2928 /* choose the largest size of which there are at least 'raiddisk' */
2929 for (dl = ddf->dlist ; dl ; dl=dl->next) {
2930 struct dl *dl2;
2931 if (dl->esize <= size)
2932 continue;
2933 /* This is bigger than 'size', see if there are enough */
2934 cnt = 0;
7b80ad6a 2935 for (dl2 = ddf->dlist; dl2 ; dl2=dl2->next)
8592f29d
N
2936 if (dl2->esize >= dl->esize)
2937 cnt++;
2938 if (cnt >= raiddisks)
2939 size = dl->esize;
2940 }
2941 if (chunk) {
2942 size = size / chunk;
2943 size *= chunk;
2944 }
2945 *freesize = size;
2946 if (size < 32) {
e7b84f9d 2947 pr_err("not enough spare devices to create array.\n");
8592f29d
N
2948 return 0;
2949 }
2950 }
2951 /* We have a 'size' of which there are enough spaces.
2952 * We simply do a first-fit */
2953 cnt = 0;
2954 for (dl = ddf->dlist ; dl && cnt < raiddisks ; dl=dl->next) {
2955 if (dl->esize < size)
2956 continue;
613b0d17 2957
8592f29d
N
2958 dl->raiddisk = cnt;
2959 cnt++;
2960 }
2961 return 1;
2962}
2963
2c514b71
NB
2964static int
2965validate_geometry_ddf_container(struct supertype *st,
2966 int level, int layout, int raiddisks,
2967 int chunk, unsigned long long size,
af4348dd 2968 unsigned long long data_offset,
2c514b71
NB
2969 char *dev, unsigned long long *freesize,
2970 int verbose);
78e44928
NB
2971
2972static int validate_geometry_ddf_bvd(struct supertype *st,
2973 int level, int layout, int raiddisks,
c21e737b 2974 int *chunk, unsigned long long size,
af4348dd 2975 unsigned long long data_offset,
2c514b71
NB
2976 char *dev, unsigned long long *freesize,
2977 int verbose);
78e44928
NB
2978
2979static int validate_geometry_ddf(struct supertype *st,
2c514b71 2980 int level, int layout, int raiddisks,
c21e737b 2981 int *chunk, unsigned long long size,
af4348dd 2982 unsigned long long data_offset,
2c514b71
NB
2983 char *dev, unsigned long long *freesize,
2984 int verbose)
a322f70c
DW
2985{
2986 int fd;
2987 struct mdinfo *sra;
2988 int cfd;
2989
2990 /* ddf potentially supports lots of things, but it depends on
2991 * what devices are offered (and maybe kernel version?)
2992 * If given unused devices, we will make a container.
2993 * If given devices in a container, we will make a BVD.
2994 * If given BVDs, we make an SVD, changing all the GUIDs in the process.
2995 */
2996
bb7295f1
N
2997 if (chunk && *chunk == UnSet)
2998 *chunk = DEFAULT_CHUNK;
2999
542ef4ec 3000 if (level == -1000000) level = LEVEL_CONTAINER;
a322f70c 3001 if (level == LEVEL_CONTAINER) {
78e44928
NB
3002 /* Must be a fresh device to add to a container */
3003 return validate_geometry_ddf_container(st, level, layout,
c21e737b 3004 raiddisks, chunk?*chunk:0,
af4348dd
N
3005 size, data_offset, dev,
3006 freesize,
2c514b71 3007 verbose);
5f8097be
NB
3008 }
3009
78e44928
NB
3010 if (!dev) {
3011 /* Initial sanity check. Exclude illegal levels. */
3012 int i;
3013 for (i=0; ddf_level_num[i].num1 != MAXINT; i++)
3014 if (ddf_level_num[i].num2 == level)
3015 break;
b42f577a
N
3016 if (ddf_level_num[i].num1 == MAXINT) {
3017 if (verbose)
e7b84f9d 3018 pr_err("DDF does not support level %d arrays\n",
613b0d17 3019 level);
78e44928 3020 return 0;
b42f577a 3021 }
78e44928 3022 /* Should check layout? etc */
8592f29d
N
3023
3024 if (st->sb && freesize) {
3025 /* --create was given a container to create in.
3026 * So we need to check that there are enough
3027 * free spaces and return the amount of space.
3028 * We may as well remember which drives were
3029 * chosen so that add_to_super/getinfo_super
3030 * can return them.
3031 */
c21e737b 3032 return reserve_space(st, raiddisks, size, chunk?*chunk:0, freesize);
8592f29d 3033 }
a322f70c 3034 return 1;
78e44928 3035 }
a322f70c 3036
8592f29d
N
3037 if (st->sb) {
3038 /* A container has already been opened, so we are
3039 * creating in there. Maybe a BVD, maybe an SVD.
3040 * Should make a distinction one day.
3041 */
3042 return validate_geometry_ddf_bvd(st, level, layout, raiddisks,
af4348dd
N
3043 chunk, size, data_offset, dev,
3044 freesize,
8592f29d
N
3045 verbose);
3046 }
78e44928
NB
3047 /* This is the first device for the array.
3048 * If it is a container, we read it in and do automagic allocations,
3049 * no other devices should be given.
3050 * Otherwise it must be a member device of a container, and we
3051 * do manual allocation.
3052 * Later we should check for a BVD and make an SVD.
a322f70c 3053 */
a322f70c
DW
3054 fd = open(dev, O_RDONLY|O_EXCL, 0);
3055 if (fd >= 0) {
4dd2df09 3056 sra = sysfs_read(fd, NULL, GET_VERSION);
a322f70c
DW
3057 close(fd);
3058 if (sra && sra->array.major_version == -1 &&
78e44928
NB
3059 strcmp(sra->text_version, "ddf") == 0) {
3060
3061 /* load super */
3062 /* find space for 'n' devices. */
3063 /* remember the devices */
3064 /* Somehow return the fact that we have enough */
a322f70c
DW
3065 }
3066
2c514b71 3067 if (verbose)
e7b84f9d
N
3068 pr_err("ddf: Cannot create this array "
3069 "on device %s - a container is required.\n",
3070 dev);
a322f70c
DW
3071 return 0;
3072 }
3073 if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
2c514b71 3074 if (verbose)
e7b84f9d 3075 pr_err("ddf: Cannot open %s: %s\n",
613b0d17 3076 dev, strerror(errno));
a322f70c
DW
3077 return 0;
3078 }
3079 /* Well, it is in use by someone, maybe a 'ddf' container. */
3080 cfd = open_container(fd);
3081 if (cfd < 0) {
3082 close(fd);
2c514b71 3083 if (verbose)
e7b84f9d 3084 pr_err("ddf: Cannot use %s: %s\n",
613b0d17 3085 dev, strerror(EBUSY));
a322f70c
DW
3086 return 0;
3087 }
4dd2df09 3088 sra = sysfs_read(cfd, NULL, GET_VERSION);
a322f70c
DW
3089 close(fd);
3090 if (sra && sra->array.major_version == -1 &&
3091 strcmp(sra->text_version, "ddf") == 0) {
3092 /* This is a member of a ddf container. Load the container
3093 * and try to create a bvd
3094 */
3095 struct ddf_super *ddf;
e1902a7b 3096 if (load_super_ddf_all(st, cfd, (void **)&ddf, NULL) == 0) {
5f8097be 3097 st->sb = ddf;
4dd2df09 3098 strcpy(st->container_devnm, fd2devnm(cfd));
a322f70c 3099 close(cfd);
78e44928 3100 return validate_geometry_ddf_bvd(st, level, layout,
a322f70c 3101 raiddisks, chunk, size,
af4348dd 3102 data_offset,
2c514b71
NB
3103 dev, freesize,
3104 verbose);
a322f70c
DW
3105 }
3106 close(cfd);
c42ec1ed
DW
3107 } else /* device may belong to a different container */
3108 return 0;
3109
a322f70c
DW
3110 return 1;
3111}
3112
2c514b71
NB
3113static int
3114validate_geometry_ddf_container(struct supertype *st,
3115 int level, int layout, int raiddisks,
3116 int chunk, unsigned long long size,
af4348dd 3117 unsigned long long data_offset,
2c514b71
NB
3118 char *dev, unsigned long long *freesize,
3119 int verbose)
a322f70c
DW
3120{
3121 int fd;
3122 unsigned long long ldsize;
3123
3124 if (level != LEVEL_CONTAINER)
3125 return 0;
3126 if (!dev)
3127 return 1;
3128
3129 fd = open(dev, O_RDONLY|O_EXCL, 0);
3130 if (fd < 0) {
2c514b71 3131 if (verbose)
e7b84f9d 3132 pr_err("ddf: Cannot open %s: %s\n",
613b0d17 3133 dev, strerror(errno));
a322f70c
DW
3134 return 0;
3135 }
3136 if (!get_dev_size(fd, dev, &ldsize)) {
3137 close(fd);
3138 return 0;
3139 }
3140 close(fd);
3141
387fcd59 3142 *freesize = avail_size_ddf(st, ldsize >> 9, INVALID_SECTORS);
ea17e7aa
N
3143 if (*freesize == 0)
3144 return 0;
a322f70c
DW
3145
3146 return 1;
3147}
3148
78e44928
NB
3149static int validate_geometry_ddf_bvd(struct supertype *st,
3150 int level, int layout, int raiddisks,
c21e737b 3151 int *chunk, unsigned long long size,
af4348dd 3152 unsigned long long data_offset,
2c514b71
NB
3153 char *dev, unsigned long long *freesize,
3154 int verbose)
a322f70c
DW
3155{
3156 struct stat stb;
3157 struct ddf_super *ddf = st->sb;
3158 struct dl *dl;
5f8097be
NB
3159 unsigned long long pos = 0;
3160 unsigned long long maxsize;
3161 struct extent *e;
3162 int i;
a322f70c 3163 /* ddf/bvd supports lots of things, but not containers */
b42f577a
N
3164 if (level == LEVEL_CONTAINER) {
3165 if (verbose)
e7b84f9d 3166 pr_err("DDF cannot create a container within an container\n");
a322f70c 3167 return 0;
b42f577a 3168 }
a322f70c
DW
3169 /* We must have the container info already read in. */
3170 if (!ddf)
3171 return 0;
3172
5f8097be
NB
3173 if (!dev) {
3174 /* General test: make sure there is space for
3175 * 'raiddisks' device extents of size 'size'.
3176 */
3177 unsigned long long minsize = size;
3178 int dcnt = 0;
3179 if (minsize == 0)
3180 minsize = 8;
3181 for (dl = ddf->dlist; dl ; dl = dl->next)
3182 {
3183 int found = 0;
7e1432fb 3184 pos = 0;
5f8097be
NB
3185
3186 i = 0;
3187 e = get_extents(ddf, dl);
3188 if (!e) continue;
3189 do {
3190 unsigned long long esize;
3191 esize = e[i].start - pos;
3192 if (esize >= minsize)
3193 found = 1;
3194 pos = e[i].start + e[i].size;
3195 i++;
3196 } while (e[i-1].size);
3197 if (found)
3198 dcnt++;
3199 free(e);
3200 }
3201 if (dcnt < raiddisks) {
2c514b71 3202 if (verbose)
e7b84f9d
N
3203 pr_err("ddf: Not enough devices with "
3204 "space for this array (%d < %d)\n",
3205 dcnt, raiddisks);
5f8097be
NB
3206 return 0;
3207 }
3208 return 1;
3209 }
a322f70c
DW
3210 /* This device must be a member of the set */
3211 if (stat(dev, &stb) < 0)
3212 return 0;
3213 if ((S_IFMT & stb.st_mode) != S_IFBLK)
3214 return 0;
3215 for (dl = ddf->dlist ; dl ; dl = dl->next) {
f21e18ca
N
3216 if (dl->major == (int)major(stb.st_rdev) &&
3217 dl->minor == (int)minor(stb.st_rdev))
a322f70c
DW
3218 break;
3219 }
5f8097be 3220 if (!dl) {
2c514b71 3221 if (verbose)
e7b84f9d 3222 pr_err("ddf: %s is not in the "
613b0d17
N
3223 "same DDF set\n",
3224 dev);
5f8097be
NB
3225 return 0;
3226 }
3227 e = get_extents(ddf, dl);
3228 maxsize = 0;
3229 i = 0;
3230 if (e) do {
613b0d17
N
3231 unsigned long long esize;
3232 esize = e[i].start - pos;
3233 if (esize >= maxsize)
3234 maxsize = esize;
3235 pos = e[i].start + e[i].size;
3236 i++;
3237 } while (e[i-1].size);
5f8097be 3238 *freesize = maxsize;
a322f70c
DW
3239 // FIXME here I am
3240
3241 return 1;
3242}
59e36268 3243
a322f70c 3244static int load_super_ddf_all(struct supertype *st, int fd,
e1902a7b 3245 void **sbp, char *devname)
a322f70c
DW
3246{
3247 struct mdinfo *sra;
3248 struct ddf_super *super;
3249 struct mdinfo *sd, *best = NULL;
3250 int bestseq = 0;
3251 int seq;
3252 char nm[20];
3253 int dfd;
3254
b526e52d 3255 sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
a322f70c
DW
3256 if (!sra)
3257 return 1;
3258 if (sra->array.major_version != -1 ||
3259 sra->array.minor_version != -2 ||
3260 strcmp(sra->text_version, "ddf") != 0)
3261 return 1;
3262
6416d527 3263 if (posix_memalign((void**)&super, 512, sizeof(*super)) != 0)
a322f70c 3264 return 1;
a2349791 3265 memset(super, 0, sizeof(*super));
a322f70c
DW
3266
3267 /* first, try each device, and choose the best ddf */
3268 for (sd = sra->devs ; sd ; sd = sd->next) {
3269 int rv;
3270 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
7a7cc504
NB
3271 dfd = dev_open(nm, O_RDONLY);
3272 if (dfd < 0)
a322f70c
DW
3273 return 2;
3274 rv = load_ddf_headers(dfd, super, NULL);
7a7cc504 3275 close(dfd);
a322f70c
DW
3276 if (rv == 0) {
3277 seq = __be32_to_cpu(super->active->seq);
3278 if (super->active->openflag)
3279 seq--;
3280 if (!best || seq > bestseq) {
3281 bestseq = seq;
3282 best = sd;
3283 }
3284 }
3285 }
3286 if (!best)
3287 return 1;
3288 /* OK, load this ddf */
3289 sprintf(nm, "%d:%d", best->disk.major, best->disk.minor);
3290 dfd = dev_open(nm, O_RDONLY);
7a7cc504 3291 if (dfd < 0)
a322f70c
DW
3292 return 1;
3293 load_ddf_headers(dfd, super, NULL);
3294 load_ddf_global(dfd, super, NULL);
3295 close(dfd);
3296 /* Now we need the device-local bits */
3297 for (sd = sra->devs ; sd ; sd = sd->next) {
3d2c4fc7
DW
3298 int rv;
3299
a322f70c 3300 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
e1902a7b 3301 dfd = dev_open(nm, O_RDWR);
7a7cc504 3302 if (dfd < 0)
a322f70c 3303 return 2;
3d2c4fc7
DW
3304 rv = load_ddf_headers(dfd, super, NULL);
3305 if (rv == 0)
e1902a7b 3306 rv = load_ddf_local(dfd, super, NULL, 1);
3d2c4fc7
DW
3307 if (rv)
3308 return 1;
a322f70c 3309 }
33414a01 3310
a322f70c
DW
3311 *sbp = super;
3312 if (st->ss == NULL) {
78e44928 3313 st->ss = &super_ddf;
a322f70c
DW
3314 st->minor_version = 0;
3315 st->max_devs = 512;
3316 }
4dd2df09 3317 strcpy(st->container_devnm, fd2devnm(fd));
a322f70c
DW
3318 return 0;
3319}
2b959fbf
N
3320
3321static int load_container_ddf(struct supertype *st, int fd,
3322 char *devname)
3323{
3324 return load_super_ddf_all(st, fd, &st->sb, devname);
3325}
3326
0e600426 3327#endif /* MDASSEMBLE */
a322f70c 3328
a5c7adb3 3329static int check_secondary(const struct vcl *vc)
3330{
3331 const struct vd_config *conf = &vc->conf;
3332 int i;
3333
3334 /* The only DDF secondary RAID level md can support is
3335 * RAID 10, if the stripe sizes and Basic volume sizes
3336 * are all equal.
3337 * Other configurations could in theory be supported by exposing
3338 * the BVDs to user space and using device mapper for the secondary
3339 * mapping. So far we don't support that.
3340 */
3341
3342 __u64 sec_elements[4] = {0, 0, 0, 0};
3343#define __set_sec_seen(n) (sec_elements[(n)>>6] |= (1<<((n)&63)))
3344#define __was_sec_seen(n) ((sec_elements[(n)>>6] & (1<<((n)&63))) != 0)
3345
3346 if (vc->other_bvds == NULL) {
3347 pr_err("No BVDs for secondary RAID found\n");
3348 return -1;
3349 }
3350 if (conf->prl != DDF_RAID1) {
3351 pr_err("Secondary RAID level only supported for mirrored BVD\n");
3352 return -1;
3353 }
3354 if (conf->srl != DDF_2STRIPED && conf->srl != DDF_2SPANNED) {
3355 pr_err("Secondary RAID level %d is unsupported\n",
3356 conf->srl);
3357 return -1;
3358 }
3359 __set_sec_seen(conf->sec_elmnt_seq);
3360 for (i = 0; i < conf->sec_elmnt_count-1; i++) {
3361 const struct vd_config *bvd = vc->other_bvds[i];
c98567ba 3362 if (bvd == NULL)
3363 continue;
a5c7adb3 3364 if (bvd->srl != conf->srl) {
3365 pr_err("Inconsistent secondary RAID level across BVDs\n");
3366 return -1;
3367 }
3368 if (bvd->prl != conf->prl) {
3369 pr_err("Different RAID levels for BVDs are unsupported\n");
3370 return -1;
3371 }
3372 if (bvd->prim_elmnt_count != conf->prim_elmnt_count) {
3373 pr_err("All BVDs must have the same number of primary elements\n");
3374 return -1;
3375 }
3376 if (bvd->chunk_shift != conf->chunk_shift) {
3377 pr_err("Different strip sizes for BVDs are unsupported\n");
3378 return -1;
3379 }
3380 if (bvd->array_blocks != conf->array_blocks) {
3381 pr_err("Different BVD sizes are unsupported\n");
3382 return -1;
3383 }
3384 __set_sec_seen(bvd->sec_elmnt_seq);
3385 }
3386 for (i = 0; i < conf->sec_elmnt_count; i++) {
3387 if (!__was_sec_seen(i)) {
3388 pr_err("BVD %d is missing\n", i);
3389 return -1;
3390 }
3391 }
3392 return 0;
3393}
3394
8a38db86 3395static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
4e587018 3396 __u32 refnum, unsigned int nmax,
3397 const struct vd_config **bvd,
3398 unsigned int *idx)
8a38db86 3399{
4e587018 3400 unsigned int i, j, n, sec, cnt;
3401
3402 cnt = __be16_to_cpu(vc->conf.prim_elmnt_count);
3403 sec = (vc->conf.sec_elmnt_count == 1 ? 0 : vc->conf.sec_elmnt_seq);
3404
3405 for (i = 0, j = 0 ; i < nmax ; i++) {
3406 /* j counts valid entries for this BVD */
3407 if (vc->conf.phys_refnum[i] != 0xffffffff)
3408 j++;
3409 if (vc->conf.phys_refnum[i] == refnum) {
3410 *bvd = &vc->conf;
3411 *idx = i;
3412 return sec * cnt + j - 1;
3413 }
3414 }
3415 if (vc->other_bvds == NULL)
3416 goto bad;
3417
3418 for (n = 1; n < vc->conf.sec_elmnt_count; n++) {
3419 struct vd_config *vd = vc->other_bvds[n-1];
3420 if (vd == NULL)
3421 continue;
3422 sec = vd->sec_elmnt_seq;
3423 for (i = 0, j = 0 ; i < nmax ; i++) {
3424 if (vd->phys_refnum[i] != 0xffffffff)
3425 j++;
3426 if (vd->phys_refnum[i] == refnum) {
3427 *bvd = vd;
3428 *idx = i;
3429 return sec * cnt + j - 1;
3430 }
3431 }
3432 }
3433bad:
3434 *bvd = NULL;
d6e7b083 3435 return DDF_NOTFOUND;
8a38db86 3436}
3437
00bbdbda 3438static struct mdinfo *container_content_ddf(struct supertype *st, char *subarray)
598f0d58
NB
3439{
3440 /* Given a container loaded by load_super_ddf_all,
3441 * extract information about all the arrays into
3442 * an mdinfo tree.
3443 *
3444 * For each vcl in conflist: create an mdinfo, fill it in,
3445 * then look for matching devices (phys_refnum) in dlist
3446 * and create appropriate device mdinfo.
3447 */
3448 struct ddf_super *ddf = st->sb;
3449 struct mdinfo *rest = NULL;
3450 struct vcl *vc;
3451
3452 for (vc = ddf->conflist ; vc ; vc=vc->next)
3453 {
f21e18ca
N
3454 unsigned int i;
3455 unsigned int j;
598f0d58 3456 struct mdinfo *this;
00bbdbda 3457 char *ep;
90fa1a29 3458 __u32 *cptr;
8a38db86 3459 unsigned int pd;
00bbdbda
N
3460
3461 if (subarray &&
3462 (strtoul(subarray, &ep, 10) != vc->vcnum ||
3463 *ep != '\0'))
3464 continue;
3465
a5c7adb3 3466 if (vc->conf.sec_elmnt_count > 1) {
3467 if (check_secondary(vc) != 0)
3468 continue;
3469 }
3470
503975b9 3471 this = xcalloc(1, sizeof(*this));
598f0d58
NB
3472 this->next = rest;
3473 rest = this;
3474
8a2848a7 3475 if (layout_ddf2md(&vc->conf, &this->array))
3476 continue;
598f0d58 3477 this->array.md_minor = -1;
f35f2525
N
3478 this->array.major_version = -1;
3479 this->array.minor_version = -2;
90fa1a29
JS
3480 cptr = (__u32 *)(vc->conf.guid + 16);
3481 this->array.ctime = DECADE + __be32_to_cpu(*cptr);
598f0d58
NB
3482 this->array.utime = DECADE +
3483 __be32_to_cpu(vc->conf.timestamp);
3484 this->array.chunk_size = 512 << vc->conf.chunk_shift;
3485
59e36268 3486 i = vc->vcnum;
7a7cc504
NB
3487 if ((ddf->virt->entries[i].state & DDF_state_inconsistent) ||
3488 (ddf->virt->entries[i].init_state & DDF_initstate_mask) !=
ed9d66aa 3489 DDF_init_full) {
598f0d58 3490 this->array.state = 0;
ed9d66aa
NB
3491 this->resync_start = 0;
3492 } else {
598f0d58 3493 this->array.state = 1;
b7528a20 3494 this->resync_start = MaxSector;
ed9d66aa 3495 }
db42fa9b
N
3496 memcpy(this->name, ddf->virt->entries[i].name, 16);
3497 this->name[16]=0;
3498 for(j=0; j<16; j++)
3499 if (this->name[j] == ' ')
3500 this->name[j] = 0;
598f0d58
NB
3501
3502 memset(this->uuid, 0, sizeof(this->uuid));
3503 this->component_size = __be64_to_cpu(vc->conf.blocks);
3504 this->array.size = this->component_size / 2;
5f2aace8 3505 this->container_member = i;
598f0d58 3506
c5afc314
N
3507 ddf->currentconf = vc;
3508 uuid_from_super_ddf(st, this->uuid);
3509 ddf->currentconf = NULL;
3510
60f18132 3511 sprintf(this->text_version, "/%s/%d",
4dd2df09 3512 st->container_devnm, this->container_member);
60f18132 3513
8a38db86 3514 for (pd = 0; pd < __be16_to_cpu(ddf->phys->used_pdes); pd++) {
598f0d58
NB
3515 struct mdinfo *dev;
3516 struct dl *d;
4e587018 3517 const struct vd_config *bvd;
3518 unsigned int iphys;
3519 __u64 *lba_offset;
fa033bec 3520 int stt;
598f0d58 3521
8a38db86 3522 if (ddf->phys->entries[pd].refnum == 0xFFFFFFFF)
bc17324f 3523 continue;
0cf5ef67
N
3524
3525 stt = __be16_to_cpu(ddf->phys->entries[pd].state);
fa033bec
N
3526 if ((stt & (DDF_Online|DDF_Failed|DDF_Rebuilding))
3527 != DDF_Online)
3528 continue;
3529
8a38db86 3530 i = get_pd_index_from_refnum(
4e587018 3531 vc, ddf->phys->entries[pd].refnum,
3532 ddf->mppe, &bvd, &iphys);
d6e7b083 3533 if (i == DDF_NOTFOUND)
8a38db86 3534 continue;
3535
fa033bec 3536 this->array.working_disks++;
bc17324f 3537
0cf5ef67 3538 for (d = ddf->dlist; d ; d=d->next)
8a38db86 3539 if (d->disk.refnum ==
3540 ddf->phys->entries[pd].refnum)
0cf5ef67
N
3541 break;
3542 if (d == NULL)
3543 /* Haven't found that one yet, maybe there are others */
3544 continue;
3545
503975b9 3546 dev = xcalloc(1, sizeof(*dev));
598f0d58
NB
3547 dev->next = this->devs;
3548 this->devs = dev;
3549
3550 dev->disk.number = __be32_to_cpu(d->disk.refnum);
3551 dev->disk.major = d->major;
3552 dev->disk.minor = d->minor;
3553 dev->disk.raid_disk = i;
3554 dev->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
d23534e4 3555 dev->recovery_start = MaxSector;
598f0d58 3556
120f7677 3557 dev->events = __be32_to_cpu(ddf->primary.seq);
4e587018 3558 lba_offset = (__u64 *)&bvd->phys_refnum[ddf->mppe];
3559 dev->data_offset = __be64_to_cpu(lba_offset[iphys]);
3560 dev->component_size = __be64_to_cpu(bvd->blocks);
598f0d58
NB
3561 if (d->devname)
3562 strcpy(dev->name, d->devname);
3563 }
3564 }
3565 return rest;
3566}
3567
955e9ea1 3568static int store_super_ddf(struct supertype *st, int fd)
a322f70c 3569{
955e9ea1 3570 struct ddf_super *ddf = st->sb;
a322f70c 3571 unsigned long long dsize;
6416d527 3572 void *buf;
3d2c4fc7 3573 int rc;
a322f70c 3574
955e9ea1
DW
3575 if (!ddf)
3576 return 1;
3577
a322f70c
DW
3578 if (!get_dev_size(fd, NULL, &dsize))
3579 return 1;
3580
dbf98368 3581 if (ddf->dlist || ddf->conflist) {
3582 struct stat sta;
3583 struct dl *dl;
3584 int ofd, ret;
3585
3586 if (fstat(fd, &sta) == -1 || !S_ISBLK(sta.st_mode)) {
3587 pr_err("%s: file descriptor for invalid device\n",
3588 __func__);
3589 return 1;
3590 }
3591 for (dl = ddf->dlist; dl; dl = dl->next)
3592 if (dl->major == (int)major(sta.st_rdev) &&
3593 dl->minor == (int)minor(sta.st_rdev))
3594 break;
3595 if (!dl) {
3596 pr_err("%s: couldn't find disk %d/%d\n", __func__,
3597 (int)major(sta.st_rdev),
3598 (int)minor(sta.st_rdev));
3599 return 1;
3600 }
3601 /*
3602 For DDF, writing to just one disk makes no sense.
3603 We would run the risk of writing inconsistent meta data
3604 to the devices. So just call __write_init_super_ddf and
3605 write to all devices, including this one.
3606 Use the fd passed to this function, just in case dl->fd
3607 is invalid.
3608 */
3609 ofd = dl->fd;
3610 dl->fd = fd;
3611 ret = __write_init_super_ddf(st);
3612 dl->fd = ofd;
3613 return ret;
3614 }
3615
3d2c4fc7
DW
3616 if (posix_memalign(&buf, 512, 512) != 0)
3617 return 1;
6416d527
NB
3618 memset(buf, 0, 512);
3619
a322f70c 3620 lseek64(fd, dsize-512, 0);
3d2c4fc7 3621 rc = write(fd, buf, 512);
6416d527 3622 free(buf);
3d2c4fc7
DW
3623 if (rc < 0)
3624 return 1;
a322f70c
DW
3625 return 0;
3626}
3627
a19c88b8
NB
3628static int compare_super_ddf(struct supertype *st, struct supertype *tst)
3629{
3630 /*
3631 * return:
3632 * 0 same, or first was empty, and second was copied
3633 * 1 second had wrong number
3634 * 2 wrong uuid
3635 * 3 wrong other info
3636 */
3637 struct ddf_super *first = st->sb;
3638 struct ddf_super *second = tst->sb;
4eefd651 3639 struct dl *dl1, *dl2;
3640 struct vcl *vl1, *vl2;
2d210697 3641 unsigned int max_vds, max_pds, pd, vd;
a19c88b8
NB
3642
3643 if (!first) {
3644 st->sb = tst->sb;
3645 tst->sb = NULL;
3646 return 0;
3647 }
3648
3649 if (memcmp(first->anchor.guid, second->anchor.guid, DDF_GUID_LEN) != 0)
3650 return 2;
3651
2d210697 3652 if (first->anchor.seq != second->anchor.seq) {
3653 dprintf("%s: sequence number mismatch %u/%u\n", __func__,
3654 __be32_to_cpu(first->anchor.seq),
3655 __be32_to_cpu(second->anchor.seq));
3656 return 3;
3657 }
3658 if (first->max_part != second->max_part ||
3659 first->phys->used_pdes != second->phys->used_pdes ||
3660 first->virt->populated_vdes != second->virt->populated_vdes) {
3661 dprintf("%s: PD/VD number mismatch\n", __func__);
3662 return 3;
3663 }
3664
3665 max_pds = __be16_to_cpu(first->phys->used_pdes);
3666 for (dl2 = second->dlist; dl2; dl2 = dl2->next) {
3667 for (pd = 0; pd < max_pds; pd++)
3668 if (first->phys->entries[pd].refnum == dl2->disk.refnum)
3669 break;
3670 if (pd == max_pds) {
3671 dprintf("%s: no match for disk %08x\n", __func__,
3672 __be32_to_cpu(dl2->disk.refnum));
3673 return 3;
3674 }
3675 }
3676
3677 max_vds = __be16_to_cpu(first->active->max_vd_entries);
3678 for (vl2 = second->conflist; vl2; vl2 = vl2->next) {
3679 if (vl2->conf.magic != DDF_VD_CONF_MAGIC)
3680 continue;
3681 for (vd = 0; vd < max_vds; vd++)
3682 if (!memcmp(first->virt->entries[vd].guid,
3683 vl2->conf.guid, DDF_GUID_LEN))
3684 break;
3685 if (vd == max_vds) {
3686 dprintf("%s: no match for VD config\n", __func__);
3687 return 3;
3688 }
3689 }
a19c88b8 3690 /* FIXME should I look at anything else? */
2d210697 3691
4eefd651 3692 /*
3693 At this point we are fairly sure that the meta data matches.
3694 But the new disk may contain additional local data.
3695 Add it to the super block.
3696 */
3697 for (vl2 = second->conflist; vl2; vl2 = vl2->next) {
3698 for (vl1 = first->conflist; vl1; vl1 = vl1->next)
3699 if (!memcmp(vl1->conf.guid, vl2->conf.guid,
3700 DDF_GUID_LEN))
3701 break;
3702 if (vl1) {
3703 if (vl1->other_bvds != NULL &&
3704 vl1->conf.sec_elmnt_seq !=
3705 vl2->conf.sec_elmnt_seq) {
3706 dprintf("%s: adding BVD %u\n", __func__,
3707 vl2->conf.sec_elmnt_seq);
3708 add_other_bvd(vl1, &vl2->conf,
3709 first->conf_rec_len*512);
3710 }
3711 continue;
3712 }
3713
3714 if (posix_memalign((void **)&vl1, 512,
3715 (first->conf_rec_len*512 +
3716 offsetof(struct vcl, conf))) != 0) {
3717 pr_err("%s could not allocate vcl buf\n",
3718 __func__);
3719 return 3;
3720 }
3721
3722 vl1->next = first->conflist;
3723 vl1->block_sizes = NULL;
3f188b10 3724 if (vl2->conf.sec_elmnt_count > 1) {
4eefd651 3725 vl1->other_bvds = xcalloc(vl2->conf.sec_elmnt_count - 1,
3726 sizeof(struct vd_config *));
3727 } else
3728 vl1->other_bvds = NULL;
3729 memcpy(&vl1->conf, &vl2->conf, first->conf_rec_len*512);
3730 vl1->lba_offset = (__u64 *)
3731 &vl1->conf.phys_refnum[first->mppe];
3732 for (vd = 0; vd < max_vds; vd++)
3733 if (!memcmp(first->virt->entries[vd].guid,
3734 vl1->conf.guid, DDF_GUID_LEN))
3735 break;
3736 vl1->vcnum = vd;
3737 dprintf("%s: added config for VD %u\n", __func__, vl1->vcnum);
3738 first->conflist = vl1;
3739 }
3740
3741 for (dl2 = second->dlist; dl2; dl2 = dl2->next) {
3742 for (dl1 = first->dlist; dl1; dl1 = dl1->next)
3743 if (dl1->disk.refnum == dl2->disk.refnum)
3744 break;
3745 if (dl1)
3746 continue;
3747
3748 if (posix_memalign((void **)&dl1, 512,
3749 sizeof(*dl1) + (first->max_part) * sizeof(dl1->vlist[0]))
3750 != 0) {
3751 pr_err("%s could not allocate disk info buffer\n",
3752 __func__);
3753 return 3;
3754 }
3755 memcpy(dl1, dl2, sizeof(*dl1));
3756 dl1->mdupdate = NULL;
3757 dl1->next = first->dlist;
3758 dl1->fd = -1;
3759 for (pd = 0; pd < max_pds; pd++)
3760 if (first->phys->entries[pd].refnum == dl1->disk.refnum)
3761 break;
3762 dl1->pdnum = pd;
3763 if (dl2->spare) {
3764 if (posix_memalign((void **)&dl1->spare, 512,
3765 first->conf_rec_len*512) != 0) {
3766 pr_err("%s could not allocate spare info buf\n",
3767 __func__);
3768 return 3;
3769 }
3770 memcpy(dl1->spare, dl2->spare, first->conf_rec_len*512);
3771 }
3772 for (vd = 0 ; vd < first->max_part ; vd++) {
3773 if (!dl2->vlist[vd]) {
3774 dl1->vlist[vd] = NULL;
3775 continue;
3776 }
3777 for (vl1 = first->conflist; vl1; vl1 = vl1->next) {
3778 if (!memcmp(vl1->conf.guid,
3779 dl2->vlist[vd]->conf.guid,
3780 DDF_GUID_LEN))
3781 break;
3782 dl1->vlist[vd] = vl1;
3783 }
3784 }
3785 first->dlist = dl1;
3786 dprintf("%s: added disk %d: %08x\n", __func__, dl1->pdnum,
3787 dl1->disk.refnum);
3788 }
3789
a19c88b8
NB
3790 return 0;
3791}
3792
0e600426 3793#ifndef MDASSEMBLE
4e5528c6
NB
3794/*
3795 * A new array 'a' has been started which claims to be instance 'inst'
3796 * within container 'c'.
3797 * We need to confirm that the array matches the metadata in 'c' so
3798 * that we don't corrupt any metadata.
3799 */
cba0191b 3800static int ddf_open_new(struct supertype *c, struct active_array *a, char *inst)
549e9569 3801{
a2aa439e 3802 struct ddf_super *ddf = c->sb;
3803 int n = atoi(inst);
fb9d0acb 3804 if (all_ff(ddf->virt->entries[n].guid)) {
3805 pr_err("%s: subarray %d doesn't exist\n", __func__, n);
a2aa439e 3806 return -ENODEV;
3807 }
3808 dprintf("ddf: open_new %d\n", n);
3809 a->info.container_member = n;
549e9569
NB
3810 return 0;
3811}
3812
4e5528c6
NB
3813/*
3814 * The array 'a' is to be marked clean in the metadata.
ed9d66aa 3815 * If '->resync_start' is not ~(unsigned long long)0, then the array is only
4e5528c6
NB
3816 * clean up to the point (in sectors). If that cannot be recorded in the
3817 * metadata, then leave it as dirty.
3818 *
3819 * For DDF, we need to clear the DDF_state_inconsistent bit in the
3820 * !global! virtual_disk.virtual_entry structure.
3821 */
01f157d7 3822static int ddf_set_array_state(struct active_array *a, int consistent)
549e9569 3823{
4e5528c6
NB
3824 struct ddf_super *ddf = a->container->sb;
3825 int inst = a->info.container_member;
18a2f463 3826 int old = ddf->virt->entries[inst].state;
01f157d7
N
3827 if (consistent == 2) {
3828 /* Should check if a recovery should be started FIXME */
3829 consistent = 1;
b7941fd6 3830 if (!is_resync_complete(&a->info))
01f157d7
N
3831 consistent = 0;
3832 }
ed9d66aa
NB
3833 if (consistent)
3834 ddf->virt->entries[inst].state &= ~DDF_state_inconsistent;
3835 else
4e5528c6 3836 ddf->virt->entries[inst].state |= DDF_state_inconsistent;
18a2f463 3837 if (old != ddf->virt->entries[inst].state)
7d5a7ff3 3838 ddf_set_updates_pending(ddf);
18a2f463
NB
3839
3840 old = ddf->virt->entries[inst].init_state;
ed9d66aa 3841 ddf->virt->entries[inst].init_state &= ~DDF_initstate_mask;
b7941fd6 3842 if (is_resync_complete(&a->info))
ed9d66aa 3843 ddf->virt->entries[inst].init_state |= DDF_init_full;
b7941fd6 3844 else if (a->info.resync_start == 0)
ed9d66aa 3845 ddf->virt->entries[inst].init_state |= DDF_init_not;
4e5528c6 3846 else
ed9d66aa 3847 ddf->virt->entries[inst].init_state |= DDF_init_quick;
18a2f463 3848 if (old != ddf->virt->entries[inst].init_state)
7d5a7ff3 3849 ddf_set_updates_pending(ddf);
ed9d66aa 3850
2c514b71 3851 dprintf("ddf mark %d %s %llu\n", inst, consistent?"clean":"dirty",
b7941fd6 3852 a->info.resync_start);
01f157d7 3853 return consistent;
fd7cde1b
DW
3854}
3855
5ec636b7 3856static int get_bvd_state(const struct ddf_super *ddf,
3857 const struct vd_config *vc)
3858{
3859 unsigned int i, n_bvd, working = 0;
3860 unsigned int n_prim = __be16_to_cpu(vc->prim_elmnt_count);
3861 int pd, st, state;
3862 for (i = 0; i < n_prim; i++) {
3863 if (!find_index_in_bvd(ddf, vc, i, &n_bvd))
3864 continue;
3865 pd = find_phys(ddf, vc->phys_refnum[n_bvd]);
3866 if (pd < 0)
3867 continue;
3868 st = __be16_to_cpu(ddf->phys->entries[pd].state);
3869 if ((st & (DDF_Online|DDF_Failed|DDF_Rebuilding))
3870 == DDF_Online)
3871 working++;
3872 }
3873
3874 state = DDF_state_degraded;
3875 if (working == n_prim)
3876 state = DDF_state_optimal;
3877 else
3878 switch (vc->prl) {
3879 case DDF_RAID0:
3880 case DDF_CONCAT:
3881 case DDF_JBOD:
3882 state = DDF_state_failed;
3883 break;
3884 case DDF_RAID1:
3885 if (working == 0)
3886 state = DDF_state_failed;
3887 else if (working >= 2)
3888 state = DDF_state_part_optimal;
3889 break;
3890 case DDF_RAID4:
3891 case DDF_RAID5:
3892 if (working < n_prim - 1)
3893 state = DDF_state_failed;
3894 break;
3895 case DDF_RAID6:
3896 if (working < n_prim - 2)
3897 state = DDF_state_failed;
3898 else if (working == n_prim - 1)
3899 state = DDF_state_part_optimal;
3900 break;
3901 }
3902 return state;
3903}
3904
0777d17d 3905static int secondary_state(int state, int other, int seclevel)
3906{
3907 if (state == DDF_state_optimal && other == DDF_state_optimal)
3908 return DDF_state_optimal;
3909 if (seclevel == DDF_2MIRRORED) {
3910 if (state == DDF_state_optimal || other == DDF_state_optimal)
3911 return DDF_state_part_optimal;
3912 if (state == DDF_state_failed && other == DDF_state_failed)
3913 return DDF_state_failed;
3914 return DDF_state_degraded;
3915 } else {
3916 if (state == DDF_state_failed || other == DDF_state_failed)
3917 return DDF_state_failed;
3918 if (state == DDF_state_degraded || other == DDF_state_degraded)
3919 return DDF_state_degraded;
3920 return DDF_state_part_optimal;
3921 }
3922}
3923
3924static int get_svd_state(const struct ddf_super *ddf, const struct vcl *vcl)
3925{
3926 int state = get_bvd_state(ddf, &vcl->conf);
3927 unsigned int i;
3928 for (i = 1; i < vcl->conf.sec_elmnt_count; i++) {
3929 state = secondary_state(
3930 state,
3931 get_bvd_state(ddf, vcl->other_bvds[i-1]),
3932 vcl->conf.srl);
3933 }
3934 return state;
3935}
3936
7a7cc504
NB
3937/*
3938 * The state of each disk is stored in the global phys_disk structure
3939 * in phys_disk.entries[n].state.
3940 * This makes various combinations awkward.
3941 * - When a device fails in any array, it must be failed in all arrays
3942 * that include a part of this device.
3943 * - When a component is rebuilding, we cannot include it officially in the
3944 * array unless this is the only array that uses the device.
3945 *
3946 * So: when transitioning:
3947 * Online -> failed, just set failed flag. monitor will propagate
3948 * spare -> online, the device might need to be added to the array.
3949 * spare -> failed, just set failed. Don't worry if in array or not.
3950 */
8d45d196 3951static void ddf_set_disk(struct active_array *a, int n, int state)
549e9569 3952{
7a7cc504 3953 struct ddf_super *ddf = a->container->sb;
baba3f4e 3954 unsigned int inst = a->info.container_member, n_bvd;
3955 struct vcl *vcl;
3956 struct vd_config *vc = find_vdcr(ddf, inst, (unsigned int)n,
3957 &n_bvd, &vcl);
3958 int pd;
e1316fab
N
3959 struct mdinfo *mdi;
3960 struct dl *dl;
7a7cc504
NB
3961
3962 if (vc == NULL) {
2c514b71 3963 dprintf("ddf: cannot find instance %d!!\n", inst);
7a7cc504
NB
3964 return;
3965 }
e1316fab
N
3966 /* Find the matching slot in 'info'. */
3967 for (mdi = a->info.devs; mdi; mdi = mdi->next)
3968 if (mdi->disk.raid_disk == n)
3969 break;
3970 if (!mdi)
3971 return;
3972
3973 /* and find the 'dl' entry corresponding to that. */
3974 for (dl = ddf->dlist; dl; dl = dl->next)
77632af9
N
3975 if (mdi->state_fd >= 0 &&
3976 mdi->disk.major == dl->major &&
e1316fab
N
3977 mdi->disk.minor == dl->minor)
3978 break;
3979 if (!dl)
3980 return;
3981
baba3f4e 3982 pd = find_phys(ddf, vc->phys_refnum[n_bvd]);
e1316fab
N
3983 if (pd < 0 || pd != dl->pdnum) {
3984 /* disk doesn't currently exist or has changed.
3985 * If it is now in_sync, insert it. */
baba3f4e 3986 dprintf("%s: phys disk not found for %d: %d/%d ref %08x\n",
3987 __func__, dl->pdnum, dl->major, dl->minor,
3988 dl->disk.refnum);
3989 dprintf("%s: array %u disk %u ref %08x pd %d\n",
3990 __func__, inst, n_bvd, vc->phys_refnum[n_bvd], pd);
7a7cc504 3991 if ((state & DS_INSYNC) && ! (state & DS_FAULTY)) {
baba3f4e 3992 __u64 *lba_offset;
3993 pd = dl->pdnum; /* FIXME: is this really correct ? */
3994 vc->phys_refnum[n_bvd] = dl->disk.refnum;
3995 lba_offset = (__u64 *)&vc->phys_refnum[ddf->mppe];
3996 lba_offset[n_bvd] = mdi->data_offset;
e1316fab
N
3997 ddf->phys->entries[pd].type &=
3998 ~__cpu_to_be16(DDF_Global_Spare);
3999 ddf->phys->entries[pd].type |=
4000 __cpu_to_be16(DDF_Active_in_VD);
7d5a7ff3 4001 ddf_set_updates_pending(ddf);
7a7cc504
NB
4002 }
4003 } else {
18a2f463 4004 int old = ddf->phys->entries[pd].state;
7a7cc504
NB
4005 if (state & DS_FAULTY)
4006 ddf->phys->entries[pd].state |= __cpu_to_be16(DDF_Failed);
4007 if (state & DS_INSYNC) {
4008 ddf->phys->entries[pd].state |= __cpu_to_be16(DDF_Online);
4009 ddf->phys->entries[pd].state &= __cpu_to_be16(~DDF_Rebuilding);
4010 }
18a2f463 4011 if (old != ddf->phys->entries[pd].state)
7d5a7ff3 4012 ddf_set_updates_pending(ddf);
7a7cc504
NB
4013 }
4014
2c514b71 4015 dprintf("ddf: set_disk %d to %x\n", n, state);
7e1432fb 4016
7a7cc504
NB
4017 /* Now we need to check the state of the array and update
4018 * virtual_disk.entries[n].state.
4019 * It needs to be one of "optimal", "degraded", "failed".
4020 * I don't understand 'deleted' or 'missing'.
4021 */
0777d17d 4022 state = get_svd_state(ddf, vcl);
7a7cc504 4023
18a2f463
NB
4024 if (ddf->virt->entries[inst].state !=
4025 ((ddf->virt->entries[inst].state & ~DDF_state_mask)
4026 | state)) {
4027
4028 ddf->virt->entries[inst].state =
4029 (ddf->virt->entries[inst].state & ~DDF_state_mask)
4030 | state;
7d5a7ff3 4031 ddf_set_updates_pending(ddf);
18a2f463 4032 }
7a7cc504 4033
549e9569
NB
4034}
4035
2e735d19 4036static void ddf_sync_metadata(struct supertype *st)
549e9569 4037{
7a7cc504
NB
4038
4039 /*
4040 * Write all data to all devices.
4041 * Later, we might be able to track whether only local changes
4042 * have been made, or whether any global data has been changed,
4043 * but ddf is sufficiently weird that it probably always
4044 * changes global data ....
4045 */
18a2f463
NB
4046 struct ddf_super *ddf = st->sb;
4047 if (!ddf->updates_pending)
4048 return;
4049 ddf->updates_pending = 0;
1cc7f4fe 4050 __write_init_super_ddf(st);
2c514b71 4051 dprintf("ddf: sync_metadata\n");
549e9569
NB
4052}
4053
88c164f4
NB
4054static void ddf_process_update(struct supertype *st,
4055 struct metadata_update *update)
4056{
4057 /* Apply this update to the metadata.
4058 * The first 4 bytes are a DDF_*_MAGIC which guides
4059 * our actions.
4060 * Possible update are:
4061 * DDF_PHYS_RECORDS_MAGIC
4dd968cc
N
4062 * Add a new physical device or remove an old one.
4063 * Changes to this record only happen implicitly.
88c164f4
NB
4064 * used_pdes is the device number.
4065 * DDF_VIRT_RECORDS_MAGIC
4066 * Add a new VD. Possibly also change the 'access' bits.
4067 * populated_vdes is the entry number.
4068 * DDF_VD_CONF_MAGIC
4069 * New or updated VD. the VIRT_RECORD must already
4070 * exist. For an update, phys_refnum and lba_offset
4071 * (at least) are updated, and the VD_CONF must
4072 * be written to precisely those devices listed with
4073 * a phys_refnum.
4074 * DDF_SPARE_ASSIGN_MAGIC
4075 * replacement Spare Assignment Record... but for which device?
4076 *
4077 * So, e.g.:
4078 * - to create a new array, we send a VIRT_RECORD and
4079 * a VD_CONF. Then assemble and start the array.
4080 * - to activate a spare we send a VD_CONF to add the phys_refnum
4081 * and offset. This will also mark the spare as active with
4082 * a spare-assignment record.
4083 */
4084 struct ddf_super *ddf = st->sb;
4085 __u32 *magic = (__u32*)update->buf;
4086 struct phys_disk *pd;
4087 struct virtual_disk *vd;
4088 struct vd_config *vc;
4089 struct vcl *vcl;
4090 struct dl *dl;
f21e18ca
N
4091 unsigned int mppe;
4092 unsigned int ent;
c7079c84 4093 unsigned int pdnum, pd2;
88c164f4 4094
2c514b71 4095 dprintf("Process update %x\n", *magic);
7e1432fb 4096
88c164f4
NB
4097 switch (*magic) {
4098 case DDF_PHYS_RECORDS_MAGIC:
4099
4100 if (update->len != (sizeof(struct phys_disk) +
4101 sizeof(struct phys_disk_entry)))
4102 return;
4103 pd = (struct phys_disk*)update->buf;
4104
4105 ent = __be16_to_cpu(pd->used_pdes);
4106 if (ent >= __be16_to_cpu(ddf->phys->max_pdes))
4107 return;
4dd968cc
N
4108 if (pd->entries[0].state & __cpu_to_be16(DDF_Missing)) {
4109 struct dl **dlp;
4110 /* removing this disk. */
4111 ddf->phys->entries[ent].state |= __cpu_to_be16(DDF_Missing);
4112 for (dlp = &ddf->dlist; *dlp; dlp = &(*dlp)->next) {
4113 struct dl *dl = *dlp;
4114 if (dl->pdnum == (signed)ent) {
4115 close(dl->fd);
4116 dl->fd = -1;
4117 /* FIXME this doesn't free
4118 * dl->devname */
4119 update->space = dl;
4120 *dlp = dl->next;
4121 break;
4122 }
4123 }
7d5a7ff3 4124 ddf_set_updates_pending(ddf);
4dd968cc
N
4125 return;
4126 }
88c164f4
NB
4127 if (!all_ff(ddf->phys->entries[ent].guid))
4128 return;
4129 ddf->phys->entries[ent] = pd->entries[0];
4130 ddf->phys->used_pdes = __cpu_to_be16(1 +
613b0d17 4131 __be16_to_cpu(ddf->phys->used_pdes));
7d5a7ff3 4132 ddf_set_updates_pending(ddf);
2cc2983d
N
4133 if (ddf->add_list) {
4134 struct active_array *a;
4135 struct dl *al = ddf->add_list;
4136 ddf->add_list = al->next;
4137
4138 al->next = ddf->dlist;
4139 ddf->dlist = al;
4140
4141 /* As a device has been added, we should check
4142 * for any degraded devices that might make
4143 * use of this spare */
4144 for (a = st->arrays ; a; a=a->next)
4145 a->check_degraded = 1;
4146 }
88c164f4
NB
4147 break;
4148
4149 case DDF_VIRT_RECORDS_MAGIC:
4150
4151 if (update->len != (sizeof(struct virtual_disk) +
4152 sizeof(struct virtual_entry)))
4153 return;
4154 vd = (struct virtual_disk*)update->buf;
4155
fb9d0acb 4156 ent = find_unused_vde(ddf);
4157 if (ent == DDF_NOTFOUND)
88c164f4
NB
4158 return;
4159 ddf->virt->entries[ent] = vd->entries[0];
4160 ddf->virt->populated_vdes = __cpu_to_be16(1 +
613b0d17 4161 __be16_to_cpu(ddf->virt->populated_vdes));
7d5a7ff3 4162 ddf_set_updates_pending(ddf);
88c164f4
NB
4163 break;
4164
4165 case DDF_VD_CONF_MAGIC:
2c514b71 4166 dprintf("len %d %d\n", update->len, ddf->conf_rec_len);
88c164f4
NB
4167
4168 mppe = __be16_to_cpu(ddf->anchor.max_primary_element_entries);
f21e18ca 4169 if ((unsigned)update->len != ddf->conf_rec_len * 512)
88c164f4
NB
4170 return;
4171 vc = (struct vd_config*)update->buf;
4172 for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
4173 if (memcmp(vcl->conf.guid, vc->guid, DDF_GUID_LEN) == 0)
4174 break;
2c514b71 4175 dprintf("vcl = %p\n", vcl);
88c164f4
NB
4176 if (vcl) {
4177 /* An update, just copy the phys_refnum and lba_offset
4178 * fields
4179 */
4180 memcpy(vcl->conf.phys_refnum, vc->phys_refnum,
4181 mppe * (sizeof(__u32) + sizeof(__u64)));
4182 } else {
4183 /* A new VD_CONF */
e6b9548d
DW
4184 if (!update->space)
4185 return;
88c164f4
NB
4186 vcl = update->space;
4187 update->space = NULL;
4188 vcl->next = ddf->conflist;
edd8d13c 4189 memcpy(&vcl->conf, vc, update->len);
88c164f4
NB
4190 vcl->lba_offset = (__u64*)
4191 &vcl->conf.phys_refnum[mppe];
fb9d0acb 4192 ent = find_vde_by_guid(ddf, vc->guid);
4193 if (ent == DDF_NOTFOUND)
4194 return;
4195 vcl->vcnum = ent;
88c164f4
NB
4196 ddf->conflist = vcl;
4197 }
c7079c84
N
4198 /* Set DDF_Transition on all Failed devices - to help
4199 * us detect those that are no longer in use
4200 */
4201 for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
4202 if (ddf->phys->entries[pdnum].state
4203 & __be16_to_cpu(DDF_Failed))
4204 ddf->phys->entries[pdnum].state
4205 |= __be16_to_cpu(DDF_Transition);
88c164f4
NB
4206 /* Now make sure vlist is correct for each dl. */
4207 for (dl = ddf->dlist; dl; dl = dl->next) {
f21e18ca
N
4208 unsigned int dn;
4209 unsigned int vn = 0;
8401644c 4210 int in_degraded = 0;
88c164f4
NB
4211 for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
4212 for (dn=0; dn < ddf->mppe ; dn++)
4213 if (vcl->conf.phys_refnum[dn] ==
4214 dl->disk.refnum) {
8401644c 4215 int vstate;
2c514b71
NB
4216 dprintf("dev %d has %p at %d\n",
4217 dl->pdnum, vcl, vn);
c7079c84
N
4218 /* Clear the Transition flag */
4219 if (ddf->phys->entries[dl->pdnum].state
4220 & __be16_to_cpu(DDF_Failed))
4221 ddf->phys->entries[dl->pdnum].state &=
4222 ~__be16_to_cpu(DDF_Transition);
4223
88c164f4 4224 dl->vlist[vn++] = vcl;
8401644c
N
4225 vstate = ddf->virt->entries[vcl->vcnum].state
4226 & DDF_state_mask;
4227 if (vstate == DDF_state_degraded ||
4228 vstate == DDF_state_part_optimal)
4229 in_degraded = 1;
88c164f4
NB
4230 break;
4231 }
4232 while (vn < ddf->max_part)
4233 dl->vlist[vn++] = NULL;
7e1432fb
NB
4234 if (dl->vlist[0]) {
4235 ddf->phys->entries[dl->pdnum].type &=
4236 ~__cpu_to_be16(DDF_Global_Spare);
8401644c
N
4237 if (!(ddf->phys->entries[dl->pdnum].type &
4238 __cpu_to_be16(DDF_Active_in_VD))) {
613b0d17
N
4239 ddf->phys->entries[dl->pdnum].type |=
4240 __cpu_to_be16(DDF_Active_in_VD);
4241 if (in_degraded)
4242 ddf->phys->entries[dl->pdnum].state |=
4243 __cpu_to_be16(DDF_Rebuilding);
4244 }
7e1432fb
NB
4245 }
4246 if (dl->spare) {
4247 ddf->phys->entries[dl->pdnum].type &=
4248 ~__cpu_to_be16(DDF_Global_Spare);
4249 ddf->phys->entries[dl->pdnum].type |=
4250 __cpu_to_be16(DDF_Spare);
4251 }
4252 if (!dl->vlist[0] && !dl->spare) {
4253 ddf->phys->entries[dl->pdnum].type |=
4254 __cpu_to_be16(DDF_Global_Spare);
4255 ddf->phys->entries[dl->pdnum].type &=
4256 ~__cpu_to_be16(DDF_Spare |
4257 DDF_Active_in_VD);
4258 }
88c164f4 4259 }
c7079c84
N
4260
4261 /* Now remove any 'Failed' devices that are not part
4262 * of any VD. They will have the Transition flag set.
4263 * Once done, we need to update all dl->pdnum numbers.
4264 */
4265 pd2 = 0;
4266 for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
4267 if ((ddf->phys->entries[pdnum].state
4268 & __be16_to_cpu(DDF_Failed))
4269 && (ddf->phys->entries[pdnum].state
4270 & __be16_to_cpu(DDF_Transition)))
4271 /* skip this one */;
4272 else if (pdnum == pd2)
4273 pd2++;
4274 else {
4275 ddf->phys->entries[pd2] = ddf->phys->entries[pdnum];
4276 for (dl = ddf->dlist; dl; dl = dl->next)
4277 if (dl->pdnum == (int)pdnum)
4278 dl->pdnum = pd2;
4279 pd2++;
4280 }
4281 ddf->phys->used_pdes = __cpu_to_be16(pd2);
4282 while (pd2 < pdnum) {
4283 memset(ddf->phys->entries[pd2].guid, 0xff, DDF_GUID_LEN);
4284 pd2++;
4285 }
4286
7d5a7ff3 4287 ddf_set_updates_pending(ddf);
88c164f4
NB
4288 break;
4289 case DDF_SPARE_ASSIGN_MAGIC:
4290 default: break;
4291 }
4292}
4293
edd8d13c
NB
4294static void ddf_prepare_update(struct supertype *st,
4295 struct metadata_update *update)
4296{
4297 /* This update arrived at managemon.
4298 * We are about to pass it to monitor.
4299 * If a malloc is needed, do it here.
4300 */
4301 struct ddf_super *ddf = st->sb;
4302 __u32 *magic = (__u32*)update->buf;
4303 if (*magic == DDF_VD_CONF_MAGIC)
e6b9548d 4304 if (posix_memalign(&update->space, 512,
613b0d17
N
4305 offsetof(struct vcl, conf)
4306 + ddf->conf_rec_len * 512) != 0)
e6b9548d 4307 update->space = NULL;
edd8d13c
NB
4308}
4309
7e1432fb
NB
4310/*
4311 * Check if the array 'a' is degraded but not failed.
4312 * If it is, find as many spares as are available and needed and
4313 * arrange for their inclusion.
4314 * We only choose devices which are not already in the array,
4315 * and prefer those with a spare-assignment to this array.
4316 * otherwise we choose global spares - assuming always that
4317 * there is enough room.
4318 * For each spare that we assign, we return an 'mdinfo' which
4319 * describes the position for the device in the array.
4320 * We also add to 'updates' a DDF_VD_CONF_MAGIC update with
4321 * the new phys_refnum and lba_offset values.
4322 *
4323 * Only worry about BVDs at the moment.
4324 */
4325static struct mdinfo *ddf_activate_spare(struct active_array *a,
4326 struct metadata_update **updates)
4327{
4328 int working = 0;
4329 struct mdinfo *d;
4330 struct ddf_super *ddf = a->container->sb;
4331 int global_ok = 0;
4332 struct mdinfo *rv = NULL;
4333 struct mdinfo *di;
4334 struct metadata_update *mu;
4335 struct dl *dl;
4336 int i;
baba3f4e 4337 struct vcl *vcl;
7e1432fb
NB
4338 struct vd_config *vc;
4339 __u64 *lba;
baba3f4e 4340 unsigned int n_bvd;
7e1432fb 4341
7e1432fb
NB
4342 for (d = a->info.devs ; d ; d = d->next) {
4343 if ((d->curr_state & DS_FAULTY) &&
613b0d17 4344 d->state_fd >= 0)
7e1432fb
NB
4345 /* wait for Removal to happen */
4346 return NULL;
4347 if (d->state_fd >= 0)
4348 working ++;
4349 }
4350
2c514b71
NB
4351 dprintf("ddf_activate: working=%d (%d) level=%d\n", working, a->info.array.raid_disks,
4352 a->info.array.level);
7e1432fb
NB
4353 if (working == a->info.array.raid_disks)
4354 return NULL; /* array not degraded */
4355 switch (a->info.array.level) {
4356 case 1:
4357 if (working == 0)
4358 return NULL; /* failed */
4359 break;
4360 case 4:
4361 case 5:
4362 if (working < a->info.array.raid_disks - 1)
4363 return NULL; /* failed */
4364 break;
4365 case 6:
4366 if (working < a->info.array.raid_disks - 2)
4367 return NULL; /* failed */
4368 break;
4369 default: /* concat or stripe */
4370 return NULL; /* failed */
4371 }
4372
4373 /* For each slot, if it is not working, find a spare */
4374 dl = ddf->dlist;
4375 for (i = 0; i < a->info.array.raid_disks; i++) {
4376 for (d = a->info.devs ; d ; d = d->next)
4377 if (d->disk.raid_disk == i)
4378 break;
2c514b71 4379 dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
7e1432fb
NB
4380 if (d && (d->state_fd >= 0))
4381 continue;
4382
4383 /* OK, this device needs recovery. Find a spare */
4384 again:
4385 for ( ; dl ; dl = dl->next) {
4386 unsigned long long esize;
4387 unsigned long long pos;
4388 struct mdinfo *d2;
4389 int is_global = 0;
4390 int is_dedicated = 0;
4391 struct extent *ex;
f21e18ca 4392 unsigned int j;
7e1432fb
NB
4393 /* If in this array, skip */
4394 for (d2 = a->info.devs ; d2 ; d2 = d2->next)
7590d562
N
4395 if (d2->state_fd >= 0 &&
4396 d2->disk.major == dl->major &&
7e1432fb 4397 d2->disk.minor == dl->minor) {
2c514b71 4398 dprintf("%x:%x already in array\n", dl->major, dl->minor);
7e1432fb
NB
4399 break;
4400 }
4401 if (d2)
4402 continue;
4403 if (ddf->phys->entries[dl->pdnum].type &
4404 __cpu_to_be16(DDF_Spare)) {
4405 /* Check spare assign record */
4406 if (dl->spare) {
4407 if (dl->spare->type & DDF_spare_dedicated) {
4408 /* check spare_ents for guid */
4409 for (j = 0 ;
4410 j < __be16_to_cpu(dl->spare->populated);
4411 j++) {
4412 if (memcmp(dl->spare->spare_ents[j].guid,
4413 ddf->virt->entries[a->info.container_member].guid,
4414 DDF_GUID_LEN) == 0)
4415 is_dedicated = 1;
4416 }
4417 } else
4418 is_global = 1;
4419 }
4420 } else if (ddf->phys->entries[dl->pdnum].type &
4421 __cpu_to_be16(DDF_Global_Spare)) {
4422 is_global = 1;
e0e7aeaa
N
4423 } else if (!(ddf->phys->entries[dl->pdnum].state &
4424 __cpu_to_be16(DDF_Failed))) {
4425 /* we can possibly use some of this */
4426 is_global = 1;
7e1432fb
NB
4427 }
4428 if ( ! (is_dedicated ||
4429 (is_global && global_ok))) {
2c514b71 4430 dprintf("%x:%x not suitable: %d %d\n", dl->major, dl->minor,
613b0d17 4431 is_dedicated, is_global);
7e1432fb
NB
4432 continue;
4433 }
4434
4435 /* We are allowed to use this device - is there space?
4436 * We need a->info.component_size sectors */
4437 ex = get_extents(ddf, dl);
4438 if (!ex) {
2c514b71 4439 dprintf("cannot get extents\n");
7e1432fb
NB
4440 continue;
4441 }
4442 j = 0; pos = 0;
4443 esize = 0;
4444
4445 do {
4446 esize = ex[j].start - pos;
4447 if (esize >= a->info.component_size)
4448 break;
e5cc7d46
N
4449 pos = ex[j].start + ex[j].size;
4450 j++;
4451 } while (ex[j-1].size);
7e1432fb
NB
4452
4453 free(ex);
4454 if (esize < a->info.component_size) {
e5cc7d46
N
4455 dprintf("%x:%x has no room: %llu %llu\n",
4456 dl->major, dl->minor,
2c514b71 4457 esize, a->info.component_size);
7e1432fb
NB
4458 /* No room */
4459 continue;
4460 }
4461
4462 /* Cool, we have a device with some space at pos */
503975b9 4463 di = xcalloc(1, sizeof(*di));
7e1432fb
NB
4464 di->disk.number = i;
4465 di->disk.raid_disk = i;
4466 di->disk.major = dl->major;
4467 di->disk.minor = dl->minor;
4468 di->disk.state = 0;
d23534e4 4469 di->recovery_start = 0;
7e1432fb
NB
4470 di->data_offset = pos;
4471 di->component_size = a->info.component_size;
4472 di->container_member = dl->pdnum;
4473 di->next = rv;
4474 rv = di;
2c514b71
NB
4475 dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
4476 i, pos);
7e1432fb
NB
4477
4478 break;
4479 }
4480 if (!dl && ! global_ok) {
4481 /* not enough dedicated spares, try global */
4482 global_ok = 1;
4483 dl = ddf->dlist;
4484 goto again;
4485 }
4486 }
4487
4488 if (!rv)
4489 /* No spares found */
4490 return rv;
4491 /* Now 'rv' has a list of devices to return.
4492 * Create a metadata_update record to update the
4493 * phys_refnum and lba_offset values
4494 */
503975b9
N
4495 mu = xmalloc(sizeof(*mu));
4496 if (posix_memalign(&mu->space, 512, sizeof(struct vcl)) != 0) {
79244939
DW
4497 free(mu);
4498 mu = NULL;
4499 }
503975b9 4500 mu->buf = xmalloc(ddf->conf_rec_len * 512);
7590d562
N
4501 mu->len = ddf->conf_rec_len * 512;
4502 mu->space = NULL;
f50ae22e 4503 mu->space_list = NULL;
7e1432fb 4504 mu->next = *updates;
baba3f4e 4505 vc = find_vdcr(ddf, a->info.container_member, di->disk.raid_disk,
4506 &n_bvd, &vcl);
7e1432fb
NB
4507 memcpy(mu->buf, vc, ddf->conf_rec_len * 512);
4508
4509 vc = (struct vd_config*)mu->buf;
4510 lba = (__u64*)&vc->phys_refnum[ddf->mppe];
4511 for (di = rv ; di ; di = di->next) {
4512 vc->phys_refnum[di->disk.raid_disk] =
4513 ddf->phys->entries[dl->pdnum].refnum;
4514 lba[di->disk.raid_disk] = di->data_offset;
4515 }
4516 *updates = mu;
4517 return rv;
4518}
0e600426 4519#endif /* MDASSEMBLE */
7e1432fb 4520
b640a252
N
4521static int ddf_level_to_layout(int level)
4522{
4523 switch(level) {
4524 case 0:
4525 case 1:
4526 return 0;
4527 case 5:
4528 return ALGORITHM_LEFT_SYMMETRIC;
4529 case 6:
4530 return ALGORITHM_ROTATING_N_CONTINUE;
4531 case 10:
4532 return 0x102;
4533 default:
4534 return UnSet;
4535 }
4536}
4537
30f58b22
DW
4538static void default_geometry_ddf(struct supertype *st, int *level, int *layout, int *chunk)
4539{
4540 if (level && *level == UnSet)
4541 *level = LEVEL_CONTAINER;
4542
4543 if (level && layout && *layout == UnSet)
4544 *layout = ddf_level_to_layout(*level);
4545}
4546
a322f70c
DW
4547struct superswitch super_ddf = {
4548#ifndef MDASSEMBLE
4549 .examine_super = examine_super_ddf,
4550 .brief_examine_super = brief_examine_super_ddf,
4737ae25 4551 .brief_examine_subarrays = brief_examine_subarrays_ddf,
bceedeec 4552 .export_examine_super = export_examine_super_ddf,
a322f70c
DW
4553 .detail_super = detail_super_ddf,
4554 .brief_detail_super = brief_detail_super_ddf,
4555 .validate_geometry = validate_geometry_ddf,
78e44928 4556 .write_init_super = write_init_super_ddf,
0e600426 4557 .add_to_super = add_to_super_ddf,
4dd968cc 4558 .remove_from_super = remove_from_super_ddf,
2b959fbf 4559 .load_container = load_container_ddf,
74db60b0 4560 .copy_metadata = copy_metadata_ddf,
a322f70c
DW
4561#endif
4562 .match_home = match_home_ddf,
4563 .uuid_from_super= uuid_from_super_ddf,
4564 .getinfo_super = getinfo_super_ddf,
4565 .update_super = update_super_ddf,
4566
4567 .avail_size = avail_size_ddf,
4568
a19c88b8
NB
4569 .compare_super = compare_super_ddf,
4570
a322f70c 4571 .load_super = load_super_ddf,
ba7eb04f 4572 .init_super = init_super_ddf,
955e9ea1 4573 .store_super = store_super_ddf,
a322f70c
DW
4574 .free_super = free_super_ddf,
4575 .match_metadata_desc = match_metadata_desc_ddf,
78e44928 4576 .container_content = container_content_ddf,
30f58b22 4577 .default_geometry = default_geometry_ddf,
a322f70c 4578
a322f70c 4579 .external = 1,
549e9569 4580
0e600426 4581#ifndef MDASSEMBLE
549e9569
NB
4582/* for mdmon */
4583 .open_new = ddf_open_new,
ed9d66aa 4584 .set_array_state= ddf_set_array_state,
549e9569
NB
4585 .set_disk = ddf_set_disk,
4586 .sync_metadata = ddf_sync_metadata,
88c164f4 4587 .process_update = ddf_process_update,
edd8d13c 4588 .prepare_update = ddf_prepare_update,
7e1432fb 4589 .activate_spare = ddf_activate_spare,
0e600426 4590#endif
4cce4069 4591 .name = "ddf",
a322f70c 4592};