]> git.ipfire.org Git - thirdparty/mdadm.git/blame - super-ddf.c
DDF: add endian-safe typedefs
[thirdparty/mdadm.git] / super-ddf.c
CommitLineData
a322f70c
DW
1/*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
e736b623 4 * Copyright (C) 2006-2009 Neil Brown <neilb@suse.de>
a322f70c
DW
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neil@brown.name>
23 *
24 * Specifications for DDF takes from Common RAID DDF Specification Revision 1.2
25 * (July 28 2006). Reused by permission of SNIA.
26 */
27
28#define HAVE_STDINT_H 1
29#include "mdadm.h"
549e9569 30#include "mdmon.h"
a322f70c
DW
31#include "sha1.h"
32#include <values.h>
33
a322f70c
DW
34/* a non-official T10 name for creation GUIDs */
35static char T10[] = "Linux-MD";
36
37/* DDF timestamps are 1980 based, so we need to add
38 * second-in-decade-of-seventies to convert to linux timestamps.
39 * 10 years with 2 leap years.
40 */
41#define DECADE (3600*24*(365*10+2))
42unsigned long crc32(
43 unsigned long crc,
44 const unsigned char *buf,
45 unsigned len);
46
bedbf68a 47#define DDF_NOTFOUND (~0U)
48#define DDF_CONTAINER (DDF_NOTFOUND-1)
49
a322f70c
DW
50/* The DDF metadata handling.
51 * DDF metadata lives at the end of the device.
52 * The last 512 byte block provides an 'anchor' which is used to locate
53 * the rest of the metadata which usually lives immediately behind the anchor.
54 *
55 * Note:
56 * - all multibyte numeric fields are bigendian.
57 * - all strings are space padded.
58 *
59 */
60
4d1bdc18 61typedef struct __be16 {
62 __u16 _v16;
63} be16;
64#define be16_eq(x, y) ((x)._v16 == (y)._v16)
65
66typedef struct __be32 {
67 __u32 _v32;
68} be32;
69#define be32_eq(x, y) ((x)._v32 == (y)._v32)
70
71typedef struct __be64 {
72 __u64 _v64;
73} be64;
74#define be64_eq(x, y) ((x)._v64 == (y)._v64)
75
76#define be16_to_cpu(be) __be16_to_cpu((be)._v16)
77static inline be16 cpu_to_be16(__u16 x)
78{
79 be16 be = { ._v16 = __cpu_to_be16(x) };
80 return be;
81}
82
83#define be32_to_cpu(be) __be32_to_cpu((be)._v32)
84static inline be32 cpu_to_be32(__u32 x)
85{
86 be32 be = { ._v32 = __cpu_to_be32(x) };
87 return be;
88}
89
90#define be64_to_cpu(be) __be64_to_cpu((be)._v64)
91static inline be64 cpu_to_be64(__u64 x)
92{
93 be64 be = { ._v64 = __cpu_to_be64(x) };
94 return be;
95}
96
a322f70c
DW
97/* Primary Raid Level (PRL) */
98#define DDF_RAID0 0x00
99#define DDF_RAID1 0x01
100#define DDF_RAID3 0x03
101#define DDF_RAID4 0x04
102#define DDF_RAID5 0x05
103#define DDF_RAID1E 0x11
104#define DDF_JBOD 0x0f
105#define DDF_CONCAT 0x1f
106#define DDF_RAID5E 0x15
107#define DDF_RAID5EE 0x25
59e36268 108#define DDF_RAID6 0x06
a322f70c
DW
109
110/* Raid Level Qualifier (RLQ) */
111#define DDF_RAID0_SIMPLE 0x00
112#define DDF_RAID1_SIMPLE 0x00 /* just 2 devices in this plex */
113#define DDF_RAID1_MULTI 0x01 /* exactly 3 devices in this plex */
114#define DDF_RAID3_0 0x00 /* parity in first extent */
115#define DDF_RAID3_N 0x01 /* parity in last extent */
116#define DDF_RAID4_0 0x00 /* parity in first extent */
117#define DDF_RAID4_N 0x01 /* parity in last extent */
118/* these apply to raid5e and raid5ee as well */
119#define DDF_RAID5_0_RESTART 0x00 /* same as 'right asymmetric' - layout 1 */
59e36268 120#define DDF_RAID6_0_RESTART 0x01 /* raid6 different from raid5 here!!! */
a322f70c
DW
121#define DDF_RAID5_N_RESTART 0x02 /* same as 'left asymmetric' - layout 0 */
122#define DDF_RAID5_N_CONTINUE 0x03 /* same as 'left symmetric' - layout 2 */
123
124#define DDF_RAID1E_ADJACENT 0x00 /* raid10 nearcopies==2 */
125#define DDF_RAID1E_OFFSET 0x01 /* raid10 offsetcopies==2 */
126
127/* Secondary RAID Level (SRL) */
128#define DDF_2STRIPED 0x00 /* This is weirder than RAID0 !! */
129#define DDF_2MIRRORED 0x01
130#define DDF_2CONCAT 0x02
131#define DDF_2SPANNED 0x03 /* This is also weird - be careful */
132
133/* Magic numbers */
134#define DDF_HEADER_MAGIC __cpu_to_be32(0xDE11DE11)
135#define DDF_CONTROLLER_MAGIC __cpu_to_be32(0xAD111111)
136#define DDF_PHYS_RECORDS_MAGIC __cpu_to_be32(0x22222222)
137#define DDF_PHYS_DATA_MAGIC __cpu_to_be32(0x33333333)
138#define DDF_VIRT_RECORDS_MAGIC __cpu_to_be32(0xDDDDDDDD)
139#define DDF_VD_CONF_MAGIC __cpu_to_be32(0xEEEEEEEE)
140#define DDF_SPARE_ASSIGN_MAGIC __cpu_to_be32(0x55555555)
141#define DDF_VU_CONF_MAGIC __cpu_to_be32(0x88888888)
142#define DDF_VENDOR_LOG_MAGIC __cpu_to_be32(0x01dBEEF0)
143#define DDF_BBM_LOG_MAGIC __cpu_to_be32(0xABADB10C)
144
145#define DDF_GUID_LEN 24
59e36268
NB
146#define DDF_REVISION_0 "01.00.00"
147#define DDF_REVISION_2 "01.02.00"
a322f70c
DW
148
149struct ddf_header {
88c164f4 150 __u32 magic; /* DDF_HEADER_MAGIC */
a322f70c
DW
151 __u32 crc;
152 char guid[DDF_GUID_LEN];
59e36268 153 char revision[8]; /* 01.02.00 */
a322f70c
DW
154 __u32 seq; /* starts at '1' */
155 __u32 timestamp;
156 __u8 openflag;
157 __u8 foreignflag;
158 __u8 enforcegroups;
159 __u8 pad0; /* 0xff */
160 __u8 pad1[12]; /* 12 * 0xff */
161 /* 64 bytes so far */
162 __u8 header_ext[32]; /* reserved: fill with 0xff */
163 __u64 primary_lba;
164 __u64 secondary_lba;
165 __u8 type;
166 __u8 pad2[3]; /* 0xff */
167 __u32 workspace_len; /* sectors for vendor space -
168 * at least 32768(sectors) */
169 __u64 workspace_lba;
170 __u16 max_pd_entries; /* one of 15, 63, 255, 1023, 4095 */
171 __u16 max_vd_entries; /* 2^(4,6,8,10,12)-1 : i.e. as above */
172 __u16 max_partitions; /* i.e. max num of configuration
173 record entries per disk */
174 __u16 config_record_len; /* 1 +ROUNDUP(max_primary_element_entries
175 *12/512) */
176 __u16 max_primary_element_entries; /* 16, 64, 256, 1024, or 4096 */
177 __u8 pad3[54]; /* 0xff */
178 /* 192 bytes so far */
179 __u32 controller_section_offset;
180 __u32 controller_section_length;
181 __u32 phys_section_offset;
182 __u32 phys_section_length;
183 __u32 virt_section_offset;
184 __u32 virt_section_length;
185 __u32 config_section_offset;
186 __u32 config_section_length;
187 __u32 data_section_offset;
188 __u32 data_section_length;
189 __u32 bbm_section_offset;
190 __u32 bbm_section_length;
191 __u32 diag_space_offset;
192 __u32 diag_space_length;
193 __u32 vendor_offset;
194 __u32 vendor_length;
195 /* 256 bytes so far */
196 __u8 pad4[256]; /* 0xff */
197};
198
199/* type field */
200#define DDF_HEADER_ANCHOR 0x00
201#define DDF_HEADER_PRIMARY 0x01
202#define DDF_HEADER_SECONDARY 0x02
203
204/* The content of the 'controller section' - global scope */
205struct ddf_controller_data {
88c164f4 206 __u32 magic; /* DDF_CONTROLLER_MAGIC */
a322f70c
DW
207 __u32 crc;
208 char guid[DDF_GUID_LEN];
209 struct controller_type {
210 __u16 vendor_id;
211 __u16 device_id;
212 __u16 sub_vendor_id;
213 __u16 sub_device_id;
214 } type;
215 char product_id[16];
216 __u8 pad[8]; /* 0xff */
217 __u8 vendor_data[448];
218};
219
220/* The content of phys_section - global scope */
221struct phys_disk {
88c164f4 222 __u32 magic; /* DDF_PHYS_RECORDS_MAGIC */
a322f70c
DW
223 __u32 crc;
224 __u16 used_pdes;
225 __u16 max_pdes;
226 __u8 pad[52];
227 struct phys_disk_entry {
228 char guid[DDF_GUID_LEN];
229 __u32 refnum;
230 __u16 type;
231 __u16 state;
232 __u64 config_size; /* DDF structures must be after here */
233 char path[18]; /* another horrible structure really */
234 __u8 pad[6];
235 } entries[0];
236};
237
238/* phys_disk_entry.type is a bitmap - bigendian remember */
239#define DDF_Forced_PD_GUID 1
240#define DDF_Active_in_VD 2
88c164f4 241#define DDF_Global_Spare 4 /* VD_CONF records are ignored */
a322f70c
DW
242#define DDF_Spare 8 /* overrides Global_spare */
243#define DDF_Foreign 16
244#define DDF_Legacy 32 /* no DDF on this device */
245
246#define DDF_Interface_mask 0xf00
247#define DDF_Interface_SCSI 0x100
248#define DDF_Interface_SAS 0x200
249#define DDF_Interface_SATA 0x300
250#define DDF_Interface_FC 0x400
251
252/* phys_disk_entry.state is a bigendian bitmap */
253#define DDF_Online 1
254#define DDF_Failed 2 /* overrides 1,4,8 */
255#define DDF_Rebuilding 4
256#define DDF_Transition 8
257#define DDF_SMART 16
258#define DDF_ReadErrors 32
259#define DDF_Missing 64
260
261/* The content of the virt_section global scope */
262struct virtual_disk {
88c164f4 263 __u32 magic; /* DDF_VIRT_RECORDS_MAGIC */
a322f70c
DW
264 __u32 crc;
265 __u16 populated_vdes;
266 __u16 max_vdes;
267 __u8 pad[52];
268 struct virtual_entry {
269 char guid[DDF_GUID_LEN];
270 __u16 unit;
271 __u16 pad0; /* 0xffff */
272 __u16 guid_crc;
273 __u16 type;
274 __u8 state;
275 __u8 init_state;
276 __u8 pad1[14];
277 char name[16];
278 } entries[0];
279};
280
281/* virtual_entry.type is a bitmap - bigendian */
282#define DDF_Shared 1
283#define DDF_Enforce_Groups 2
284#define DDF_Unicode 4
285#define DDF_Owner_Valid 8
286
287/* virtual_entry.state is a bigendian bitmap */
288#define DDF_state_mask 0x7
289#define DDF_state_optimal 0x0
290#define DDF_state_degraded 0x1
291#define DDF_state_deleted 0x2
292#define DDF_state_missing 0x3
293#define DDF_state_failed 0x4
7a7cc504 294#define DDF_state_part_optimal 0x5
a322f70c
DW
295
296#define DDF_state_morphing 0x8
297#define DDF_state_inconsistent 0x10
298
299/* virtual_entry.init_state is a bigendian bitmap */
300#define DDF_initstate_mask 0x03
301#define DDF_init_not 0x00
7a7cc504
NB
302#define DDF_init_quick 0x01 /* initialisation is progress.
303 * i.e. 'state_inconsistent' */
a322f70c
DW
304#define DDF_init_full 0x02
305
306#define DDF_access_mask 0xc0
307#define DDF_access_rw 0x00
308#define DDF_access_ro 0x80
309#define DDF_access_blocked 0xc0
310
311/* The content of the config_section - local scope
312 * It has multiple records each config_record_len sectors
313 * They can be vd_config or spare_assign
314 */
315
316struct vd_config {
88c164f4 317 __u32 magic; /* DDF_VD_CONF_MAGIC */
a322f70c
DW
318 __u32 crc;
319 char guid[DDF_GUID_LEN];
320 __u32 timestamp;
321 __u32 seqnum;
322 __u8 pad0[24];
323 __u16 prim_elmnt_count;
324 __u8 chunk_shift; /* 0 == 512, 1==1024 etc */
325 __u8 prl;
326 __u8 rlq;
327 __u8 sec_elmnt_count;
328 __u8 sec_elmnt_seq;
329 __u8 srl;
598f0d58
NB
330 __u64 blocks; /* blocks per component could be different
331 * on different component devices...(only
332 * for concat I hope) */
333 __u64 array_blocks; /* blocks in array */
a322f70c
DW
334 __u8 pad1[8];
335 __u32 spare_refs[8];
336 __u8 cache_pol[8];
337 __u8 bg_rate;
338 __u8 pad2[3];
339 __u8 pad3[52];
340 __u8 pad4[192];
341 __u8 v0[32]; /* reserved- 0xff */
342 __u8 v1[32]; /* reserved- 0xff */
343 __u8 v2[16]; /* reserved- 0xff */
344 __u8 v3[16]; /* reserved- 0xff */
345 __u8 vendor[32];
346 __u32 phys_refnum[0]; /* refnum of each disk in sequence */
347 /*__u64 lba_offset[0]; LBA offset in each phys. Note extents in a
348 bvd are always the same size */
349};
57a66662 350#define LBA_OFFSET(ddf, vd) ((__u64 *) &(vd)->phys_refnum[(ddf)->mppe])
a322f70c
DW
351
352/* vd_config.cache_pol[7] is a bitmap */
353#define DDF_cache_writeback 1 /* else writethrough */
354#define DDF_cache_wadaptive 2 /* only applies if writeback */
355#define DDF_cache_readahead 4
356#define DDF_cache_radaptive 8 /* only if doing read-ahead */
357#define DDF_cache_ifnobatt 16 /* even to write cache if battery is poor */
358#define DDF_cache_wallowed 32 /* enable write caching */
359#define DDF_cache_rallowed 64 /* enable read caching */
360
361struct spare_assign {
88c164f4 362 __u32 magic; /* DDF_SPARE_ASSIGN_MAGIC */
a322f70c
DW
363 __u32 crc;
364 __u32 timestamp;
365 __u8 reserved[7];
366 __u8 type;
367 __u16 populated; /* SAEs used */
368 __u16 max; /* max SAEs */
369 __u8 pad[8];
370 struct spare_assign_entry {
371 char guid[DDF_GUID_LEN];
372 __u16 secondary_element;
373 __u8 pad[6];
374 } spare_ents[0];
375};
376/* spare_assign.type is a bitmap */
377#define DDF_spare_dedicated 0x1 /* else global */
378#define DDF_spare_revertible 0x2 /* else committable */
379#define DDF_spare_active 0x4 /* else not active */
380#define DDF_spare_affinity 0x8 /* enclosure affinity */
381
382/* The data_section contents - local scope */
383struct disk_data {
88c164f4 384 __u32 magic; /* DDF_PHYS_DATA_MAGIC */
a322f70c
DW
385 __u32 crc;
386 char guid[DDF_GUID_LEN];
387 __u32 refnum; /* crc of some magic drive data ... */
388 __u8 forced_ref; /* set when above was not result of magic */
389 __u8 forced_guid; /* set if guid was forced rather than magic */
390 __u8 vendor[32];
391 __u8 pad[442];
392};
393
394/* bbm_section content */
395struct bad_block_log {
396 __u32 magic;
397 __u32 crc;
398 __u16 entry_count;
399 __u32 spare_count;
400 __u8 pad[10];
401 __u64 first_spare;
402 struct mapped_block {
403 __u64 defective_start;
404 __u32 replacement_start;
405 __u16 remap_count;
406 __u8 pad[2];
407 } entries[0];
408};
409
410/* Struct for internally holding ddf structures */
411/* The DDF structure stored on each device is potentially
412 * quite different, as some data is global and some is local.
413 * The global data is:
414 * - ddf header
415 * - controller_data
416 * - Physical disk records
417 * - Virtual disk records
418 * The local data is:
419 * - Configuration records
420 * - Physical Disk data section
421 * ( and Bad block and vendor which I don't care about yet).
422 *
423 * The local data is parsed into separate lists as it is read
424 * and reconstructed for writing. This means that we only need
425 * to make config changes once and they are automatically
426 * propagated to all devices.
427 * Note that the ddf_super has space of the conf and disk data
428 * for this disk and also for a list of all such data.
429 * The list is only used for the superblock that is being
430 * built in Create or Assemble to describe the whole array.
431 */
432struct ddf_super {
6416d527 433 struct ddf_header anchor, primary, secondary;
a322f70c 434 struct ddf_controller_data controller;
6416d527 435 struct ddf_header *active;
a322f70c
DW
436 struct phys_disk *phys;
437 struct virtual_disk *virt;
438 int pdsize, vdsize;
f21e18ca 439 unsigned int max_part, mppe, conf_rec_len;
d2ca6449 440 int currentdev;
18a2f463 441 int updates_pending;
a322f70c 442 struct vcl {
6416d527
NB
443 union {
444 char space[512];
445 struct {
446 struct vcl *next;
f21e18ca 447 unsigned int vcnum; /* index into ->virt */
8ec5d685 448 struct vd_config **other_bvds;
6416d527
NB
449 __u64 *block_sizes; /* NULL if all the same */
450 };
451 };
a322f70c 452 struct vd_config conf;
d2ca6449 453 } *conflist, *currentconf;
a322f70c 454 struct dl {
6416d527
NB
455 union {
456 char space[512];
457 struct {
458 struct dl *next;
459 int major, minor;
460 char *devname;
461 int fd;
462 unsigned long long size; /* sectors */
097bcf00 463 unsigned long long primary_lba; /* sectors */
464 unsigned long long secondary_lba; /* sectors */
465 unsigned long long workspace_lba; /* sectors */
6416d527
NB
466 int pdnum; /* index in ->phys */
467 struct spare_assign *spare;
8592f29d
N
468 void *mdupdate; /* hold metadata update */
469
470 /* These fields used by auto-layout */
471 int raiddisk; /* slot to fill in autolayout */
472 __u64 esize;
6416d527
NB
473 };
474 };
a322f70c 475 struct disk_data disk;
b2280677 476 struct vcl *vlist[0]; /* max_part in size */
2cc2983d 477 } *dlist, *add_list;
a322f70c
DW
478};
479
480#ifndef offsetof
481#define offsetof(t,f) ((size_t)&(((t*)0)->f))
482#endif
483
7d5a7ff3 484#if DEBUG
fb9d0acb 485static int all_ff(const char *guid);
7d5a7ff3 486static void pr_state(struct ddf_super *ddf, const char *msg)
487{
488 unsigned int i;
489 dprintf("%s/%s: ", __func__, msg);
490 for (i = 0; i < __be16_to_cpu(ddf->active->max_vd_entries); i++) {
491 if (all_ff(ddf->virt->entries[i].guid))
492 continue;
493 dprintf("%u(s=%02x i=%02x) ", i,
494 ddf->virt->entries[i].state,
495 ddf->virt->entries[i].init_state);
496 }
497 dprintf("\n");
498}
499#else
500static void pr_state(const struct ddf_super *ddf, const char *msg) {}
501#endif
502
35c3606d 503static void _ddf_set_updates_pending(struct ddf_super *ddf, const char *func)
504{
505 ddf->updates_pending = 1;
506 ddf->active->seq = __cpu_to_be32((__be32_to_cpu(ddf->active->seq)+1));
507 pr_state(ddf, func);
508}
509
510#define ddf_set_updates_pending(x) _ddf_set_updates_pending((x), __func__)
7d5a7ff3 511
fcc22180 512static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
513 __u32 refnum, unsigned int nmax,
514 const struct vd_config **bvd,
515 unsigned int *idx);
516
f21e18ca 517static unsigned int calc_crc(void *buf, int len)
a322f70c
DW
518{
519 /* crcs are always at the same place as in the ddf_header */
520 struct ddf_header *ddf = buf;
521 __u32 oldcrc = ddf->crc;
522 __u32 newcrc;
523 ddf->crc = 0xffffffff;
524
525 newcrc = crc32(0, buf, len);
526 ddf->crc = oldcrc;
4abe6b70
N
527 /* The crc is store (like everything) bigendian, so convert
528 * here for simplicity
529 */
530 return __cpu_to_be32(newcrc);
a322f70c
DW
531}
532
a3163bf0 533#define DDF_INVALID_LEVEL 0xff
534#define DDF_NO_SECONDARY 0xff
535static int err_bad_md_layout(const mdu_array_info_t *array)
536{
537 pr_err("RAID%d layout %x with %d disks is unsupported for DDF\n",
538 array->level, array->layout, array->raid_disks);
2aba583f 539 return -1;
a3163bf0 540}
541
542static int layout_md2ddf(const mdu_array_info_t *array,
543 struct vd_config *conf)
544{
545 __u16 prim_elmnt_count = __cpu_to_be16(array->raid_disks);
546 __u8 prl = DDF_INVALID_LEVEL, rlq = 0;
547 __u8 sec_elmnt_count = 1;
548 __u8 srl = DDF_NO_SECONDARY;
549
550 switch (array->level) {
551 case LEVEL_LINEAR:
552 prl = DDF_CONCAT;
553 break;
554 case 0:
555 rlq = DDF_RAID0_SIMPLE;
556 prl = DDF_RAID0;
557 break;
558 case 1:
559 switch (array->raid_disks) {
560 case 2:
561 rlq = DDF_RAID1_SIMPLE;
562 break;
563 case 3:
564 rlq = DDF_RAID1_MULTI;
565 break;
566 default:
567 return err_bad_md_layout(array);
568 }
569 prl = DDF_RAID1;
570 break;
571 case 4:
572 if (array->layout != 0)
573 return err_bad_md_layout(array);
574 rlq = DDF_RAID4_N;
575 prl = DDF_RAID4;
576 break;
577 case 5:
578 switch (array->layout) {
579 case ALGORITHM_LEFT_ASYMMETRIC:
580 rlq = DDF_RAID5_N_RESTART;
581 break;
582 case ALGORITHM_RIGHT_ASYMMETRIC:
583 rlq = DDF_RAID5_0_RESTART;
584 break;
585 case ALGORITHM_LEFT_SYMMETRIC:
586 rlq = DDF_RAID5_N_CONTINUE;
587 break;
588 case ALGORITHM_RIGHT_SYMMETRIC:
589 /* not mentioned in standard */
590 default:
591 return err_bad_md_layout(array);
592 }
593 prl = DDF_RAID5;
594 break;
595 case 6:
596 switch (array->layout) {
597 case ALGORITHM_ROTATING_N_RESTART:
598 rlq = DDF_RAID5_N_RESTART;
599 break;
600 case ALGORITHM_ROTATING_ZERO_RESTART:
601 rlq = DDF_RAID6_0_RESTART;
602 break;
603 case ALGORITHM_ROTATING_N_CONTINUE:
604 rlq = DDF_RAID5_N_CONTINUE;
605 break;
606 default:
607 return err_bad_md_layout(array);
608 }
609 prl = DDF_RAID6;
610 break;
611 case 10:
612 if (array->raid_disks % 2 == 0 && array->layout == 0x102) {
613 rlq = DDF_RAID1_SIMPLE;
614 prim_elmnt_count = __cpu_to_be16(2);
615 sec_elmnt_count = array->raid_disks / 2;
616 } else if (array->raid_disks % 3 == 0
617 && array->layout == 0x103) {
618 rlq = DDF_RAID1_MULTI;
619 prim_elmnt_count = __cpu_to_be16(3);
620 sec_elmnt_count = array->raid_disks / 3;
621 } else
622 return err_bad_md_layout(array);
623 srl = DDF_2SPANNED;
624 prl = DDF_RAID1;
625 break;
626 default:
627 return err_bad_md_layout(array);
628 }
629 conf->prl = prl;
630 conf->prim_elmnt_count = prim_elmnt_count;
631 conf->rlq = rlq;
632 conf->srl = srl;
633 conf->sec_elmnt_count = sec_elmnt_count;
634 return 0;
635}
636
8a2848a7 637static int err_bad_ddf_layout(const struct vd_config *conf)
638{
639 pr_err("DDF RAID %u qualifier %u with %u disks is unsupported\n",
640 conf->prl, conf->rlq, __be16_to_cpu(conf->prim_elmnt_count));
641 return -1;
642}
643
644static int layout_ddf2md(const struct vd_config *conf,
645 mdu_array_info_t *array)
646{
647 int level = LEVEL_UNSUPPORTED;
648 int layout = 0;
649 int raiddisks = __be16_to_cpu(conf->prim_elmnt_count);
650
651 if (conf->sec_elmnt_count > 1) {
652 /* see also check_secondary() */
653 if (conf->prl != DDF_RAID1 ||
654 (conf->srl != DDF_2STRIPED && conf->srl != DDF_2SPANNED)) {
655 pr_err("Unsupported secondary RAID level %u/%u\n",
656 conf->prl, conf->srl);
657 return -1;
658 }
659 if (raiddisks == 2 && conf->rlq == DDF_RAID1_SIMPLE)
660 layout = 0x102;
661 else if (raiddisks == 3 && conf->rlq == DDF_RAID1_MULTI)
662 layout = 0x103;
663 else
664 return err_bad_ddf_layout(conf);
665 raiddisks *= conf->sec_elmnt_count;
666 level = 10;
667 goto good;
668 }
669
670 switch (conf->prl) {
671 case DDF_CONCAT:
672 level = LEVEL_LINEAR;
673 break;
674 case DDF_RAID0:
675 if (conf->rlq != DDF_RAID0_SIMPLE)
676 return err_bad_ddf_layout(conf);
677 level = 0;
678 break;
679 case DDF_RAID1:
680 if (!((conf->rlq == DDF_RAID1_SIMPLE && raiddisks == 2) ||
681 (conf->rlq == DDF_RAID1_MULTI && raiddisks == 3)))
682 return err_bad_ddf_layout(conf);
683 level = 1;
684 break;
685 case DDF_RAID4:
686 if (conf->rlq != DDF_RAID4_N)
687 return err_bad_ddf_layout(conf);
688 level = 4;
689 break;
690 case DDF_RAID5:
691 switch (conf->rlq) {
692 case DDF_RAID5_N_RESTART:
693 layout = ALGORITHM_LEFT_ASYMMETRIC;
694 break;
695 case DDF_RAID5_0_RESTART:
696 layout = ALGORITHM_RIGHT_ASYMMETRIC;
697 break;
698 case DDF_RAID5_N_CONTINUE:
699 layout = ALGORITHM_LEFT_SYMMETRIC;
700 break;
701 default:
702 return err_bad_ddf_layout(conf);
703 }
704 level = 5;
705 break;
706 case DDF_RAID6:
707 switch (conf->rlq) {
708 case DDF_RAID5_N_RESTART:
709 layout = ALGORITHM_ROTATING_N_RESTART;
710 break;
711 case DDF_RAID6_0_RESTART:
712 layout = ALGORITHM_ROTATING_ZERO_RESTART;
713 break;
714 case DDF_RAID5_N_CONTINUE:
715 layout = ALGORITHM_ROTATING_N_CONTINUE;
716 break;
717 default:
718 return err_bad_ddf_layout(conf);
719 }
720 level = 6;
721 break;
722 default:
723 return err_bad_ddf_layout(conf);
724 };
725
726good:
727 array->level = level;
728 array->layout = layout;
729 array->raid_disks = raiddisks;
730 return 0;
731}
732
a322f70c
DW
733static int load_ddf_header(int fd, unsigned long long lba,
734 unsigned long long size,
735 int type,
736 struct ddf_header *hdr, struct ddf_header *anchor)
737{
738 /* read a ddf header (primary or secondary) from fd/lba
739 * and check that it is consistent with anchor
740 * Need to check:
741 * magic, crc, guid, rev, and LBA's header_type, and
742 * everything after header_type must be the same
743 */
744 if (lba >= size-1)
745 return 0;
746
747 if (lseek64(fd, lba<<9, 0) < 0)
748 return 0;
749
750 if (read(fd, hdr, 512) != 512)
751 return 0;
752
753 if (hdr->magic != DDF_HEADER_MAGIC)
754 return 0;
755 if (calc_crc(hdr, 512) != hdr->crc)
756 return 0;
757 if (memcmp(anchor->guid, hdr->guid, DDF_GUID_LEN) != 0 ||
758 memcmp(anchor->revision, hdr->revision, 8) != 0 ||
759 anchor->primary_lba != hdr->primary_lba ||
760 anchor->secondary_lba != hdr->secondary_lba ||
761 hdr->type != type ||
762 memcmp(anchor->pad2, hdr->pad2, 512 -
763 offsetof(struct ddf_header, pad2)) != 0)
764 return 0;
765
766 /* Looks good enough to me... */
767 return 1;
768}
769
770static void *load_section(int fd, struct ddf_super *super, void *buf,
771 __u32 offset_be, __u32 len_be, int check)
772{
773 unsigned long long offset = __be32_to_cpu(offset_be);
774 unsigned long long len = __be32_to_cpu(len_be);
775 int dofree = (buf == NULL);
776
777 if (check)
778 if (len != 2 && len != 8 && len != 32
779 && len != 128 && len != 512)
780 return NULL;
781
782 if (len > 1024)
783 return NULL;
784 if (buf) {
785 /* All pre-allocated sections are a single block */
786 if (len != 1)
787 return NULL;
3d2c4fc7
DW
788 } else if (posix_memalign(&buf, 512, len<<9) != 0)
789 buf = NULL;
6416d527 790
a322f70c
DW
791 if (!buf)
792 return NULL;
793
794 if (super->active->type == 1)
795 offset += __be64_to_cpu(super->active->primary_lba);
796 else
797 offset += __be64_to_cpu(super->active->secondary_lba);
798
f21e18ca 799 if ((unsigned long long)lseek64(fd, offset<<9, 0) != (offset<<9)) {
a322f70c
DW
800 if (dofree)
801 free(buf);
802 return NULL;
803 }
f21e18ca 804 if ((unsigned long long)read(fd, buf, len<<9) != (len<<9)) {
a322f70c
DW
805 if (dofree)
806 free(buf);
807 return NULL;
808 }
809 return buf;
810}
811
812static int load_ddf_headers(int fd, struct ddf_super *super, char *devname)
813{
814 unsigned long long dsize;
815
816 get_dev_size(fd, NULL, &dsize);
817
818 if (lseek64(fd, dsize-512, 0) < 0) {
819 if (devname)
e7b84f9d
N
820 pr_err("Cannot seek to anchor block on %s: %s\n",
821 devname, strerror(errno));
a322f70c
DW
822 return 1;
823 }
824 if (read(fd, &super->anchor, 512) != 512) {
825 if (devname)
e7b84f9d
N
826 pr_err("Cannot read anchor block on %s: %s\n",
827 devname, strerror(errno));
a322f70c
DW
828 return 1;
829 }
830 if (super->anchor.magic != DDF_HEADER_MAGIC) {
831 if (devname)
e7b84f9d 832 pr_err("no DDF anchor found on %s\n",
a322f70c
DW
833 devname);
834 return 2;
835 }
836 if (calc_crc(&super->anchor, 512) != super->anchor.crc) {
837 if (devname)
e7b84f9d 838 pr_err("bad CRC on anchor on %s\n",
a322f70c
DW
839 devname);
840 return 2;
841 }
59e36268
NB
842 if (memcmp(super->anchor.revision, DDF_REVISION_0, 8) != 0 &&
843 memcmp(super->anchor.revision, DDF_REVISION_2, 8) != 0) {
a322f70c 844 if (devname)
e7b84f9d 845 pr_err("can only support super revision"
59e36268
NB
846 " %.8s and earlier, not %.8s on %s\n",
847 DDF_REVISION_2, super->anchor.revision,devname);
a322f70c
DW
848 return 2;
849 }
dbeb699a 850 super->active = NULL;
a322f70c
DW
851 if (load_ddf_header(fd, __be64_to_cpu(super->anchor.primary_lba),
852 dsize >> 9, 1,
853 &super->primary, &super->anchor) == 0) {
854 if (devname)
e7b84f9d
N
855 pr_err("Failed to load primary DDF header "
856 "on %s\n", devname);
dbeb699a 857 } else
858 super->active = &super->primary;
a322f70c
DW
859 if (load_ddf_header(fd, __be64_to_cpu(super->anchor.secondary_lba),
860 dsize >> 9, 2,
861 &super->secondary, &super->anchor)) {
3eff7c1d
N
862 if (super->active == NULL
863 || (__be32_to_cpu(super->primary.seq)
864 < __be32_to_cpu(super->secondary.seq) &&
865 !super->secondary.openflag)
a322f70c
DW
866 || (__be32_to_cpu(super->primary.seq)
867 == __be32_to_cpu(super->secondary.seq) &&
868 super->primary.openflag && !super->secondary.openflag)
869 )
870 super->active = &super->secondary;
dbeb699a 871 } else if (devname)
872 pr_err("Failed to load secondary DDF header on %s\n",
873 devname);
874 if (super->active == NULL)
875 return 2;
a322f70c
DW
876 return 0;
877}
878
879static int load_ddf_global(int fd, struct ddf_super *super, char *devname)
880{
881 void *ok;
882 ok = load_section(fd, super, &super->controller,
883 super->active->controller_section_offset,
884 super->active->controller_section_length,
885 0);
886 super->phys = load_section(fd, super, NULL,
887 super->active->phys_section_offset,
888 super->active->phys_section_length,
889 1);
890 super->pdsize = __be32_to_cpu(super->active->phys_section_length) * 512;
891
892 super->virt = load_section(fd, super, NULL,
893 super->active->virt_section_offset,
894 super->active->virt_section_length,
895 1);
896 super->vdsize = __be32_to_cpu(super->active->virt_section_length) * 512;
897 if (!ok ||
898 !super->phys ||
899 !super->virt) {
900 free(super->phys);
901 free(super->virt);
a2349791
NB
902 super->phys = NULL;
903 super->virt = NULL;
a322f70c
DW
904 return 2;
905 }
906 super->conflist = NULL;
907 super->dlist = NULL;
8c3b8c2c
NB
908
909 super->max_part = __be16_to_cpu(super->active->max_partitions);
910 super->mppe = __be16_to_cpu(super->active->max_primary_element_entries);
911 super->conf_rec_len = __be16_to_cpu(super->active->config_record_len);
a322f70c
DW
912 return 0;
913}
914
3c48f7be 915#define DDF_UNUSED_BVD 0xff
916static int alloc_other_bvds(const struct ddf_super *ddf, struct vcl *vcl)
917{
918 unsigned int n_vds = vcl->conf.sec_elmnt_count - 1;
919 unsigned int i, vdsize;
920 void *p;
921 if (n_vds == 0) {
922 vcl->other_bvds = NULL;
923 return 0;
924 }
925 vdsize = ddf->conf_rec_len * 512;
926 if (posix_memalign(&p, 512, n_vds *
927 (vdsize + sizeof(struct vd_config *))) != 0)
928 return -1;
929 vcl->other_bvds = (struct vd_config **) (p + n_vds * vdsize);
930 for (i = 0; i < n_vds; i++) {
931 vcl->other_bvds[i] = p + i * vdsize;
932 memset(vcl->other_bvds[i], 0, vdsize);
933 vcl->other_bvds[i]->sec_elmnt_seq = DDF_UNUSED_BVD;
934 }
935 return 0;
936}
937
3dc821b0 938static void add_other_bvd(struct vcl *vcl, struct vd_config *vd,
939 unsigned int len)
940{
941 int i;
942 for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
3c48f7be 943 if (vcl->other_bvds[i]->sec_elmnt_seq == vd->sec_elmnt_seq)
3dc821b0 944 break;
945
946 if (i < vcl->conf.sec_elmnt_count-1) {
947 if (vd->seqnum <= vcl->other_bvds[i]->seqnum)
948 return;
949 } else {
950 for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
3c48f7be 951 if (vcl->other_bvds[i]->sec_elmnt_seq == DDF_UNUSED_BVD)
3dc821b0 952 break;
953 if (i == vcl->conf.sec_elmnt_count-1) {
954 pr_err("no space for sec level config %u, count is %u\n",
955 vd->sec_elmnt_seq, vcl->conf.sec_elmnt_count);
956 return;
957 }
3dc821b0 958 }
959 memcpy(vcl->other_bvds[i], vd, len);
960}
961
a322f70c
DW
962static int load_ddf_local(int fd, struct ddf_super *super,
963 char *devname, int keep)
964{
965 struct dl *dl;
966 struct stat stb;
967 char *conf;
f21e18ca
N
968 unsigned int i;
969 unsigned int confsec;
b2280677 970 int vnum;
f21e18ca 971 unsigned int max_virt_disks = __be16_to_cpu(super->active->max_vd_entries);
d2ca6449 972 unsigned long long dsize;
a322f70c
DW
973
974 /* First the local disk info */
3d2c4fc7 975 if (posix_memalign((void**)&dl, 512,
6416d527 976 sizeof(*dl) +
3d2c4fc7 977 (super->max_part) * sizeof(dl->vlist[0])) != 0) {
e7b84f9d 978 pr_err("%s could not allocate disk info buffer\n",
3d2c4fc7
DW
979 __func__);
980 return 1;
981 }
a322f70c
DW
982
983 load_section(fd, super, &dl->disk,
984 super->active->data_section_offset,
985 super->active->data_section_length,
986 0);
503975b9 987 dl->devname = devname ? xstrdup(devname) : NULL;
598f0d58 988
a322f70c
DW
989 fstat(fd, &stb);
990 dl->major = major(stb.st_rdev);
991 dl->minor = minor(stb.st_rdev);
992 dl->next = super->dlist;
993 dl->fd = keep ? fd : -1;
d2ca6449
NB
994
995 dl->size = 0;
996 if (get_dev_size(fd, devname, &dsize))
997 dl->size = dsize >> 9;
097bcf00 998 /* If the disks have different sizes, the LBAs will differ
999 * between phys disks.
1000 * At this point here, the values in super->active must be valid
1001 * for this phys disk. */
1002 dl->primary_lba = super->active->primary_lba;
1003 dl->secondary_lba = super->active->secondary_lba;
1004 dl->workspace_lba = super->active->workspace_lba;
b2280677 1005 dl->spare = NULL;
f21e18ca 1006 for (i = 0 ; i < super->max_part ; i++)
a322f70c
DW
1007 dl->vlist[i] = NULL;
1008 super->dlist = dl;
59e36268 1009 dl->pdnum = -1;
f21e18ca 1010 for (i = 0; i < __be16_to_cpu(super->active->max_pd_entries); i++)
5575e7d9
NB
1011 if (memcmp(super->phys->entries[i].guid,
1012 dl->disk.guid, DDF_GUID_LEN) == 0)
1013 dl->pdnum = i;
1014
a322f70c
DW
1015 /* Now the config list. */
1016 /* 'conf' is an array of config entries, some of which are
1017 * probably invalid. Those which are good need to be copied into
1018 * the conflist
1019 */
a322f70c
DW
1020
1021 conf = load_section(fd, super, NULL,
1022 super->active->config_section_offset,
1023 super->active->config_section_length,
1024 0);
1025
b2280677 1026 vnum = 0;
e223334f
N
1027 for (confsec = 0;
1028 confsec < __be32_to_cpu(super->active->config_section_length);
1029 confsec += super->conf_rec_len) {
a322f70c 1030 struct vd_config *vd =
e223334f 1031 (struct vd_config *)((char*)conf + confsec*512);
a322f70c
DW
1032 struct vcl *vcl;
1033
b2280677
NB
1034 if (vd->magic == DDF_SPARE_ASSIGN_MAGIC) {
1035 if (dl->spare)
1036 continue;
3d2c4fc7
DW
1037 if (posix_memalign((void**)&dl->spare, 512,
1038 super->conf_rec_len*512) != 0) {
e7b84f9d
N
1039 pr_err("%s could not allocate spare info buf\n",
1040 __func__);
3d2c4fc7
DW
1041 return 1;
1042 }
613b0d17 1043
b2280677
NB
1044 memcpy(dl->spare, vd, super->conf_rec_len*512);
1045 continue;
1046 }
a322f70c
DW
1047 if (vd->magic != DDF_VD_CONF_MAGIC)
1048 continue;
1049 for (vcl = super->conflist; vcl; vcl = vcl->next) {
1050 if (memcmp(vcl->conf.guid,
1051 vd->guid, DDF_GUID_LEN) == 0)
1052 break;
1053 }
1054
1055 if (vcl) {
b2280677 1056 dl->vlist[vnum++] = vcl;
3dc821b0 1057 if (vcl->other_bvds != NULL &&
1058 vcl->conf.sec_elmnt_seq != vd->sec_elmnt_seq) {
1059 add_other_bvd(vcl, vd, super->conf_rec_len*512);
1060 continue;
1061 }
a322f70c
DW
1062 if (__be32_to_cpu(vd->seqnum) <=
1063 __be32_to_cpu(vcl->conf.seqnum))
1064 continue;
59e36268 1065 } else {
3d2c4fc7 1066 if (posix_memalign((void**)&vcl, 512,
6416d527 1067 (super->conf_rec_len*512 +
3d2c4fc7 1068 offsetof(struct vcl, conf))) != 0) {
e7b84f9d
N
1069 pr_err("%s could not allocate vcl buf\n",
1070 __func__);
3d2c4fc7
DW
1071 return 1;
1072 }
a322f70c 1073 vcl->next = super->conflist;
59e36268 1074 vcl->block_sizes = NULL; /* FIXME not for CONCAT */
3c48f7be 1075 vcl->conf.sec_elmnt_count = vd->sec_elmnt_count;
1076 if (alloc_other_bvds(super, vcl) != 0) {
1077 pr_err("%s could not allocate other bvds\n",
1078 __func__);
1079 free(vcl);
1080 return 1;
1081 };
a322f70c 1082 super->conflist = vcl;
b2280677 1083 dl->vlist[vnum++] = vcl;
a322f70c 1084 }
8c3b8c2c 1085 memcpy(&vcl->conf, vd, super->conf_rec_len*512);
59e36268
NB
1086 for (i=0; i < max_virt_disks ; i++)
1087 if (memcmp(super->virt->entries[i].guid,
1088 vcl->conf.guid, DDF_GUID_LEN)==0)
1089 break;
1090 if (i < max_virt_disks)
1091 vcl->vcnum = i;
a322f70c
DW
1092 }
1093 free(conf);
1094
1095 return 0;
1096}
1097
1098#ifndef MDASSEMBLE
1099static int load_super_ddf_all(struct supertype *st, int fd,
e1902a7b 1100 void **sbp, char *devname);
a322f70c 1101#endif
37424f13
DW
1102
1103static void free_super_ddf(struct supertype *st);
1104
a322f70c
DW
1105static int load_super_ddf(struct supertype *st, int fd,
1106 char *devname)
1107{
1108 unsigned long long dsize;
1109 struct ddf_super *super;
1110 int rv;
1111
a322f70c
DW
1112 if (get_dev_size(fd, devname, &dsize) == 0)
1113 return 1;
1114
b31df436 1115 if (!st->ignore_hw_compat && test_partition(fd))
691c6ee1
N
1116 /* DDF is not allowed on partitions */
1117 return 1;
1118
a322f70c
DW
1119 /* 32M is a lower bound */
1120 if (dsize <= 32*1024*1024) {
97320d7c 1121 if (devname)
e7b84f9d
N
1122 pr_err("%s is too small for ddf: "
1123 "size is %llu sectors.\n",
1124 devname, dsize>>9);
97320d7c 1125 return 1;
a322f70c
DW
1126 }
1127 if (dsize & 511) {
97320d7c 1128 if (devname)
e7b84f9d
N
1129 pr_err("%s is an odd size for ddf: "
1130 "size is %llu bytes.\n",
1131 devname, dsize);
97320d7c 1132 return 1;
a322f70c
DW
1133 }
1134
37424f13
DW
1135 free_super_ddf(st);
1136
6416d527 1137 if (posix_memalign((void**)&super, 512, sizeof(*super))!= 0) {
e7b84f9d 1138 pr_err("malloc of %zu failed.\n",
a322f70c
DW
1139 sizeof(*super));
1140 return 1;
1141 }
a2349791 1142 memset(super, 0, sizeof(*super));
a322f70c
DW
1143
1144 rv = load_ddf_headers(fd, super, devname);
1145 if (rv) {
1146 free(super);
1147 return rv;
1148 }
1149
1150 /* Have valid headers and have chosen the best. Let's read in the rest*/
1151
1152 rv = load_ddf_global(fd, super, devname);
1153
1154 if (rv) {
1155 if (devname)
e7b84f9d
N
1156 pr_err("Failed to load all information "
1157 "sections on %s\n", devname);
a322f70c
DW
1158 free(super);
1159 return rv;
1160 }
1161
3d2c4fc7
DW
1162 rv = load_ddf_local(fd, super, devname, 0);
1163
1164 if (rv) {
1165 if (devname)
e7b84f9d
N
1166 pr_err("Failed to load all information "
1167 "sections on %s\n", devname);
3d2c4fc7
DW
1168 free(super);
1169 return rv;
1170 }
a322f70c
DW
1171
1172 /* Should possibly check the sections .... */
1173
1174 st->sb = super;
1175 if (st->ss == NULL) {
1176 st->ss = &super_ddf;
1177 st->minor_version = 0;
1178 st->max_devs = 512;
1179 }
1180 return 0;
1181
1182}
1183
1184static void free_super_ddf(struct supertype *st)
1185{
1186 struct ddf_super *ddf = st->sb;
1187 if (ddf == NULL)
1188 return;
1189 free(ddf->phys);
1190 free(ddf->virt);
1191 while (ddf->conflist) {
1192 struct vcl *v = ddf->conflist;
1193 ddf->conflist = v->next;
59e36268
NB
1194 if (v->block_sizes)
1195 free(v->block_sizes);
3c48f7be 1196 if (v->other_bvds)
1197 /*
1198 v->other_bvds[0] points to beginning of buffer,
1199 see alloc_other_bvds()
1200 */
1201 free(v->other_bvds[0]);
a322f70c
DW
1202 free(v);
1203 }
1204 while (ddf->dlist) {
1205 struct dl *d = ddf->dlist;
1206 ddf->dlist = d->next;
1207 if (d->fd >= 0)
1208 close(d->fd);
b2280677
NB
1209 if (d->spare)
1210 free(d->spare);
a322f70c
DW
1211 free(d);
1212 }
8a38cb04
N
1213 while (ddf->add_list) {
1214 struct dl *d = ddf->add_list;
1215 ddf->add_list = d->next;
1216 if (d->fd >= 0)
1217 close(d->fd);
1218 if (d->spare)
1219 free(d->spare);
1220 free(d);
1221 }
a322f70c
DW
1222 free(ddf);
1223 st->sb = NULL;
1224}
1225
1226static struct supertype *match_metadata_desc_ddf(char *arg)
1227{
1228 /* 'ddf' only support containers */
1229 struct supertype *st;
1230 if (strcmp(arg, "ddf") != 0 &&
1231 strcmp(arg, "default") != 0
1232 )
1233 return NULL;
1234
503975b9 1235 st = xcalloc(1, sizeof(*st));
a322f70c
DW
1236 st->ss = &super_ddf;
1237 st->max_devs = 512;
1238 st->minor_version = 0;
1239 st->sb = NULL;
1240 return st;
1241}
1242
a322f70c
DW
1243#ifndef MDASSEMBLE
1244
1245static mapping_t ddf_state[] = {
1246 { "Optimal", 0},
1247 { "Degraded", 1},
1248 { "Deleted", 2},
1249 { "Missing", 3},
1250 { "Failed", 4},
1251 { "Partially Optimal", 5},
1252 { "-reserved-", 6},
1253 { "-reserved-", 7},
1254 { NULL, 0}
1255};
1256
1257static mapping_t ddf_init_state[] = {
1258 { "Not Initialised", 0},
1259 { "QuickInit in Progress", 1},
1260 { "Fully Initialised", 2},
1261 { "*UNKNOWN*", 3},
1262 { NULL, 0}
1263};
1264static mapping_t ddf_access[] = {
1265 { "Read/Write", 0},
1266 { "Reserved", 1},
1267 { "Read Only", 2},
1268 { "Blocked (no access)", 3},
1269 { NULL ,0}
1270};
1271
1272static mapping_t ddf_level[] = {
1273 { "RAID0", DDF_RAID0},
1274 { "RAID1", DDF_RAID1},
1275 { "RAID3", DDF_RAID3},
1276 { "RAID4", DDF_RAID4},
1277 { "RAID5", DDF_RAID5},
1278 { "RAID1E",DDF_RAID1E},
1279 { "JBOD", DDF_JBOD},
1280 { "CONCAT",DDF_CONCAT},
1281 { "RAID5E",DDF_RAID5E},
1282 { "RAID5EE",DDF_RAID5EE},
1283 { "RAID6", DDF_RAID6},
1284 { NULL, 0}
1285};
1286static mapping_t ddf_sec_level[] = {
1287 { "Striped", DDF_2STRIPED},
1288 { "Mirrored", DDF_2MIRRORED},
1289 { "Concat", DDF_2CONCAT},
1290 { "Spanned", DDF_2SPANNED},
1291 { NULL, 0}
1292};
1293#endif
1294
fb9d0acb 1295static int all_ff(const char *guid)
42dc2744
N
1296{
1297 int i;
1298 for (i = 0; i < DDF_GUID_LEN; i++)
1299 if (guid[i] != (char)0xff)
1300 return 0;
1301 return 1;
1302}
1303
a322f70c
DW
1304#ifndef MDASSEMBLE
1305static void print_guid(char *guid, int tstamp)
1306{
1307 /* A GUIDs are part (or all) ASCII and part binary.
1308 * They tend to be space padded.
59e36268
NB
1309 * We print the GUID in HEX, then in parentheses add
1310 * any initial ASCII sequence, and a possible
1311 * time stamp from bytes 16-19
a322f70c
DW
1312 */
1313 int l = DDF_GUID_LEN;
1314 int i;
59e36268
NB
1315
1316 for (i=0 ; i<DDF_GUID_LEN ; i++) {
1317 if ((i&3)==0 && i != 0) printf(":");
1318 printf("%02X", guid[i]&255);
1319 }
1320
cfccea8c 1321 printf("\n (");
a322f70c
DW
1322 while (l && guid[l-1] == ' ')
1323 l--;
1324 for (i=0 ; i<l ; i++) {
1325 if (guid[i] >= 0x20 && guid[i] < 0x7f)
1326 fputc(guid[i], stdout);
1327 else
59e36268 1328 break;
a322f70c
DW
1329 }
1330 if (tstamp) {
1331 time_t then = __be32_to_cpu(*(__u32*)(guid+16)) + DECADE;
1332 char tbuf[100];
1333 struct tm *tm;
1334 tm = localtime(&then);
59e36268 1335 strftime(tbuf, 100, " %D %T",tm);
a322f70c
DW
1336 fputs(tbuf, stdout);
1337 }
59e36268 1338 printf(")");
a322f70c
DW
1339}
1340
be9b9ef4 1341static const char *guid_str(const char *guid)
1342{
1343 static char buf[DDF_GUID_LEN*2+1];
1344 int i;
1345 char *p = buf;
4a03cbd1 1346 for (i = 0; i < DDF_GUID_LEN; i++) {
1347 unsigned char c = guid[i];
1348 if (c >= 32 && c < 127)
1349 p += sprintf(p, "%c", c);
1350 else
1351 p += sprintf(p, "%02x", c);
1352 }
be9b9ef4 1353 *p = '\0';
1354 return (const char *) buf;
1355}
1356
a322f70c
DW
1357static void examine_vd(int n, struct ddf_super *sb, char *guid)
1358{
8c3b8c2c 1359 int crl = sb->conf_rec_len;
a322f70c
DW
1360 struct vcl *vcl;
1361
1362 for (vcl = sb->conflist ; vcl ; vcl = vcl->next) {
f21e18ca 1363 unsigned int i;
a322f70c
DW
1364 struct vd_config *vc = &vcl->conf;
1365
1366 if (calc_crc(vc, crl*512) != vc->crc)
1367 continue;
1368 if (memcmp(vc->guid, guid, DDF_GUID_LEN) != 0)
1369 continue;
1370
1371 /* Ok, we know about this VD, let's give more details */
b06e3095 1372 printf(" Raid Devices[%d] : %d (", n,
a322f70c 1373 __be16_to_cpu(vc->prim_elmnt_count));
f21e18ca 1374 for (i = 0; i < __be16_to_cpu(vc->prim_elmnt_count); i++) {
b06e3095
N
1375 int j;
1376 int cnt = __be16_to_cpu(sb->phys->used_pdes);
1377 for (j=0; j<cnt; j++)
1378 if (vc->phys_refnum[i] == sb->phys->entries[j].refnum)
1379 break;
1380 if (i) printf(" ");
1381 if (j < cnt)
1382 printf("%d", j);
1383 else
1384 printf("--");
1385 }
1386 printf(")\n");
1387 if (vc->chunk_shift != 255)
613b0d17
N
1388 printf(" Chunk Size[%d] : %d sectors\n", n,
1389 1 << vc->chunk_shift);
a322f70c
DW
1390 printf(" Raid Level[%d] : %s\n", n,
1391 map_num(ddf_level, vc->prl)?:"-unknown-");
1392 if (vc->sec_elmnt_count != 1) {
1393 printf(" Secondary Position[%d] : %d of %d\n", n,
1394 vc->sec_elmnt_seq, vc->sec_elmnt_count);
1395 printf(" Secondary Level[%d] : %s\n", n,
1396 map_num(ddf_sec_level, vc->srl) ?: "-unknown-");
1397 }
1398 printf(" Device Size[%d] : %llu\n", n,
c9b6907b 1399 (unsigned long long)__be64_to_cpu(vc->blocks)/2);
a322f70c 1400 printf(" Array Size[%d] : %llu\n", n,
c9b6907b 1401 (unsigned long long)__be64_to_cpu(vc->array_blocks)/2);
a322f70c
DW
1402 }
1403}
1404
1405static void examine_vds(struct ddf_super *sb)
1406{
1407 int cnt = __be16_to_cpu(sb->virt->populated_vdes);
fb9d0acb 1408 unsigned int i;
a322f70c
DW
1409 printf(" Virtual Disks : %d\n", cnt);
1410
fb9d0acb 1411 for (i = 0; i < __be16_to_cpu(sb->virt->max_vdes); i++) {
a322f70c 1412 struct virtual_entry *ve = &sb->virt->entries[i];
fb9d0acb 1413 if (all_ff(ve->guid))
1414 continue;
b06e3095 1415 printf("\n");
a322f70c
DW
1416 printf(" VD GUID[%d] : ", i); print_guid(ve->guid, 1);
1417 printf("\n");
1418 printf(" unit[%d] : %d\n", i, __be16_to_cpu(ve->unit));
1419 printf(" state[%d] : %s, %s%s\n", i,
1420 map_num(ddf_state, ve->state & 7),
1421 (ve->state & 8) ? "Morphing, ": "",
1422 (ve->state & 16)? "Not Consistent" : "Consistent");
1423 printf(" init state[%d] : %s\n", i,
1424 map_num(ddf_init_state, ve->init_state&3));
1425 printf(" access[%d] : %s\n", i,
1426 map_num(ddf_access, (ve->init_state>>6) & 3));
1427 printf(" Name[%d] : %.16s\n", i, ve->name);
1428 examine_vd(i, sb, ve->guid);
1429 }
1430 if (cnt) printf("\n");
1431}
1432
1433static void examine_pds(struct ddf_super *sb)
1434{
1435 int cnt = __be16_to_cpu(sb->phys->used_pdes);
1436 int i;
1437 struct dl *dl;
1438 printf(" Physical Disks : %d\n", cnt);
962371a5 1439 printf(" Number RefNo Size Device Type/State\n");
a322f70c
DW
1440
1441 for (i=0 ; i<cnt ; i++) {
1442 struct phys_disk_entry *pd = &sb->phys->entries[i];
1443 int type = __be16_to_cpu(pd->type);
1444 int state = __be16_to_cpu(pd->state);
1445
b06e3095
N
1446 //printf(" PD GUID[%d] : ", i); print_guid(pd->guid, 0);
1447 //printf("\n");
1448 printf(" %3d %08x ", i,
a322f70c 1449 __be32_to_cpu(pd->refnum));
613b0d17 1450 printf("%8lluK ",
c9b6907b 1451 (unsigned long long)__be64_to_cpu(pd->config_size)>>1);
b06e3095
N
1452 for (dl = sb->dlist; dl ; dl = dl->next) {
1453 if (dl->disk.refnum == pd->refnum) {
1454 char *dv = map_dev(dl->major, dl->minor, 0);
1455 if (dv) {
962371a5 1456 printf("%-15s", dv);
b06e3095
N
1457 break;
1458 }
1459 }
1460 }
1461 if (!dl)
962371a5 1462 printf("%15s","");
b06e3095 1463 printf(" %s%s%s%s%s",
a322f70c 1464 (type&2) ? "active":"",
b06e3095 1465 (type&4) ? "Global-Spare":"",
a322f70c
DW
1466 (type&8) ? "spare" : "",
1467 (type&16)? ", foreign" : "",
1468 (type&32)? "pass-through" : "");
18cb4496
N
1469 if (state & DDF_Failed)
1470 /* This over-rides these three */
1471 state &= ~(DDF_Online|DDF_Rebuilding|DDF_Transition);
b06e3095 1472 printf("/%s%s%s%s%s%s%s",
a322f70c
DW
1473 (state&1)? "Online": "Offline",
1474 (state&2)? ", Failed": "",
1475 (state&4)? ", Rebuilding": "",
1476 (state&8)? ", in-transition": "",
b06e3095
N
1477 (state&16)? ", SMART-errors": "",
1478 (state&32)? ", Unrecovered-Read-Errors": "",
a322f70c 1479 (state&64)? ", Missing" : "");
a322f70c
DW
1480 printf("\n");
1481 }
1482}
1483
1484static void examine_super_ddf(struct supertype *st, char *homehost)
1485{
1486 struct ddf_super *sb = st->sb;
1487
1488 printf(" Magic : %08x\n", __be32_to_cpu(sb->anchor.magic));
1489 printf(" Version : %.8s\n", sb->anchor.revision);
598f0d58
NB
1490 printf("Controller GUID : "); print_guid(sb->controller.guid, 0);
1491 printf("\n");
1492 printf(" Container GUID : "); print_guid(sb->anchor.guid, 1);
a322f70c
DW
1493 printf("\n");
1494 printf(" Seq : %08x\n", __be32_to_cpu(sb->active->seq));
1495 printf(" Redundant hdr : %s\n", sb->secondary.magic == DDF_HEADER_MAGIC
1496 ?"yes" : "no");
1497 examine_vds(sb);
1498 examine_pds(sb);
1499}
1500
a5d85af7 1501static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map);
ff54de6e 1502
bedbf68a 1503static void uuid_from_ddf_guid(const char *guid, int uuid[4]);
42dc2744 1504static void uuid_from_super_ddf(struct supertype *st, int uuid[4]);
ff54de6e 1505
bedbf68a 1506static unsigned int get_vd_num_of_subarray(struct supertype *st)
1507{
1508 /*
1509 * Figure out the VD number for this supertype.
1510 * Returns DDF_CONTAINER for the container itself,
1511 * and DDF_NOTFOUND on error.
1512 */
1513 struct ddf_super *ddf = st->sb;
1514 struct mdinfo *sra;
1515 char *sub, *end;
1516 unsigned int vcnum;
1517
1518 if (*st->container_devnm == '\0')
1519 return DDF_CONTAINER;
1520
1521 sra = sysfs_read(-1, st->devnm, GET_VERSION);
1522 if (!sra || sra->array.major_version != -1 ||
1523 sra->array.minor_version != -2 ||
1524 !is_subarray(sra->text_version))
1525 return DDF_NOTFOUND;
1526
1527 sub = strchr(sra->text_version + 1, '/');
1528 if (sub != NULL)
1529 vcnum = strtoul(sub + 1, &end, 10);
1530 if (sub == NULL || *sub == '\0' || *end != '\0' ||
1531 vcnum >= __be16_to_cpu(ddf->active->max_vd_entries))
1532 return DDF_NOTFOUND;
1533
1534 return vcnum;
1535}
1536
061f2c6a 1537static void brief_examine_super_ddf(struct supertype *st, int verbose)
4737ae25
N
1538{
1539 /* We just write a generic DDF ARRAY entry
1540 */
1541 struct mdinfo info;
1542 char nbuf[64];
a5d85af7 1543 getinfo_super_ddf(st, &info, NULL);
4737ae25
N
1544 fname_from_uuid(st, &info, nbuf, ':');
1545
1546 printf("ARRAY metadata=ddf UUID=%s\n", nbuf + 5);
1547}
1548
1549static void brief_examine_subarrays_ddf(struct supertype *st, int verbose)
a322f70c
DW
1550{
1551 /* We just write a generic DDF ARRAY entry
a322f70c 1552 */
42dc2744 1553 struct ddf_super *ddf = st->sb;
ff54de6e 1554 struct mdinfo info;
f21e18ca 1555 unsigned int i;
ff54de6e 1556 char nbuf[64];
a5d85af7 1557 getinfo_super_ddf(st, &info, NULL);
ff54de6e 1558 fname_from_uuid(st, &info, nbuf, ':');
42dc2744 1559
f21e18ca 1560 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
42dc2744
N
1561 struct virtual_entry *ve = &ddf->virt->entries[i];
1562 struct vcl vcl;
1563 char nbuf1[64];
1564 if (all_ff(ve->guid))
1565 continue;
1566 memcpy(vcl.conf.guid, ve->guid, DDF_GUID_LEN);
1567 ddf->currentconf =&vcl;
1568 uuid_from_super_ddf(st, info.uuid);
1569 fname_from_uuid(st, &info, nbuf1, ':');
1570 printf("ARRAY container=%s member=%d UUID=%s\n",
1571 nbuf+5, i, nbuf1+5);
1572 }
a322f70c
DW
1573}
1574
bceedeec
N
1575static void export_examine_super_ddf(struct supertype *st)
1576{
1577 struct mdinfo info;
1578 char nbuf[64];
a5d85af7 1579 getinfo_super_ddf(st, &info, NULL);
bceedeec
N
1580 fname_from_uuid(st, &info, nbuf, ':');
1581 printf("MD_METADATA=ddf\n");
1582 printf("MD_LEVEL=container\n");
1583 printf("MD_UUID=%s\n", nbuf+5);
1584}
bceedeec 1585
74db60b0
N
1586static int copy_metadata_ddf(struct supertype *st, int from, int to)
1587{
1588 void *buf;
1589 unsigned long long dsize, offset;
1590 int bytes;
1591 struct ddf_header *ddf;
1592 int written = 0;
1593
1594 /* The meta consists of an anchor, a primary, and a secondary.
1595 * This all lives at the end of the device.
1596 * So it is easiest to find the earliest of primary and
1597 * secondary, and copy everything from there.
1598 *
1599 * Anchor is 512 from end It contains primary_lba and secondary_lba
1600 * we choose one of those
1601 */
1602
1603 if (posix_memalign(&buf, 4096, 4096) != 0)
1604 return 1;
1605
1606 if (!get_dev_size(from, NULL, &dsize))
1607 goto err;
1608
1609 if (lseek64(from, dsize-512, 0) < 0)
1610 goto err;
1611 if (read(from, buf, 512) != 512)
1612 goto err;
1613 ddf = buf;
1614 if (ddf->magic != DDF_HEADER_MAGIC ||
1615 calc_crc(ddf, 512) != ddf->crc ||
1616 (memcmp(ddf->revision, DDF_REVISION_0, 8) != 0 &&
1617 memcmp(ddf->revision, DDF_REVISION_2, 8) != 0))
1618 goto err;
1619
1620 offset = dsize - 512;
1621 if ((__be64_to_cpu(ddf->primary_lba) << 9) < offset)
1622 offset = __be64_to_cpu(ddf->primary_lba) << 9;
1623 if ((__be64_to_cpu(ddf->secondary_lba) << 9) < offset)
1624 offset = __be64_to_cpu(ddf->secondary_lba) << 9;
1625
1626 bytes = dsize - offset;
1627
1628 if (lseek64(from, offset, 0) < 0 ||
1629 lseek64(to, offset, 0) < 0)
1630 goto err;
1631 while (written < bytes) {
1632 int n = bytes - written;
1633 if (n > 4096)
1634 n = 4096;
1635 if (read(from, buf, n) != n)
1636 goto err;
1637 if (write(to, buf, n) != n)
1638 goto err;
1639 written += n;
1640 }
1641 free(buf);
1642 return 0;
1643err:
1644 free(buf);
1645 return 1;
1646}
1647
a322f70c
DW
1648static void detail_super_ddf(struct supertype *st, char *homehost)
1649{
1650 /* FIXME later
1651 * Could print DDF GUID
1652 * Need to find which array
1653 * If whole, briefly list all arrays
1654 * If one, give name
1655 */
1656}
1657
1658static void brief_detail_super_ddf(struct supertype *st)
1659{
ff54de6e
N
1660 struct mdinfo info;
1661 char nbuf[64];
bedbf68a 1662 struct ddf_super *ddf = st->sb;
1663 unsigned int vcnum = get_vd_num_of_subarray(st);
1664 if (vcnum == DDF_CONTAINER)
1665 uuid_from_super_ddf(st, info.uuid);
1666 else if (vcnum == DDF_NOTFOUND)
1667 return;
1668 else
1669 uuid_from_ddf_guid(ddf->virt->entries[vcnum].guid, info.uuid);
ff54de6e
N
1670 fname_from_uuid(st, &info, nbuf,':');
1671 printf(" UUID=%s", nbuf + 5);
a322f70c 1672}
a322f70c
DW
1673#endif
1674
1675static int match_home_ddf(struct supertype *st, char *homehost)
1676{
1677 /* It matches 'this' host if the controller is a
1678 * Linux-MD controller with vendor_data matching
1679 * the hostname
1680 */
1681 struct ddf_super *ddf = st->sb;
f21e18ca 1682 unsigned int len;
d1d3482b
N
1683
1684 if (!homehost)
1685 return 0;
1686 len = strlen(homehost);
a322f70c
DW
1687
1688 return (memcmp(ddf->controller.guid, T10, 8) == 0 &&
1689 len < sizeof(ddf->controller.vendor_data) &&
1690 memcmp(ddf->controller.vendor_data, homehost,len) == 0 &&
1691 ddf->controller.vendor_data[len] == 0);
1692}
1693
0e600426 1694#ifndef MDASSEMBLE
baba3f4e 1695static int find_index_in_bvd(const struct ddf_super *ddf,
1696 const struct vd_config *conf, unsigned int n,
1697 unsigned int *n_bvd)
1698{
1699 /*
1700 * Find the index of the n-th valid physical disk in this BVD
1701 */
1702 unsigned int i, j;
1703 for (i = 0, j = 0; i < ddf->mppe &&
1704 j < __be16_to_cpu(conf->prim_elmnt_count); i++) {
1705 if (conf->phys_refnum[i] != 0xffffffff) {
1706 if (n == j) {
1707 *n_bvd = i;
1708 return 1;
1709 }
1710 j++;
1711 }
1712 }
1713 dprintf("%s: couldn't find BVD member %u (total %u)\n",
1714 __func__, n, __be16_to_cpu(conf->prim_elmnt_count));
1715 return 0;
1716}
1717
1718static struct vd_config *find_vdcr(struct ddf_super *ddf, unsigned int inst,
1719 unsigned int n,
1720 unsigned int *n_bvd, struct vcl **vcl)
a322f70c 1721{
7a7cc504 1722 struct vcl *v;
59e36268 1723
baba3f4e 1724 for (v = ddf->conflist; v; v = v->next) {
1725 unsigned int nsec, ibvd;
1726 struct vd_config *conf;
1727 if (inst != v->vcnum)
1728 continue;
1729 conf = &v->conf;
1730 if (conf->sec_elmnt_count == 1) {
1731 if (find_index_in_bvd(ddf, conf, n, n_bvd)) {
1732 *vcl = v;
1733 return conf;
1734 } else
1735 goto bad;
1736 }
1737 if (v->other_bvds == NULL) {
1738 pr_err("%s: BUG: other_bvds is NULL, nsec=%u\n",
1739 __func__, conf->sec_elmnt_count);
1740 goto bad;
1741 }
1742 nsec = n / __be16_to_cpu(conf->prim_elmnt_count);
1743 if (conf->sec_elmnt_seq != nsec) {
1744 for (ibvd = 1; ibvd < conf->sec_elmnt_count; ibvd++) {
baba3f4e 1745 if (v->other_bvds[ibvd-1]->sec_elmnt_seq
1746 == nsec)
1747 break;
1748 }
1749 if (ibvd == conf->sec_elmnt_count)
1750 goto bad;
1751 conf = v->other_bvds[ibvd-1];
1752 }
1753 if (!find_index_in_bvd(ddf, conf,
1754 n - nsec*conf->sec_elmnt_count, n_bvd))
1755 goto bad;
1756 dprintf("%s: found disk %u as member %u in bvd %d of array %u\n"
1757 , __func__, n, *n_bvd, ibvd-1, inst);
1758 *vcl = v;
1759 return conf;
1760 }
1761bad:
1762 pr_err("%s: Could't find disk %d in array %u\n", __func__, n, inst);
7a7cc504
NB
1763 return NULL;
1764}
0e600426 1765#endif
7a7cc504 1766
5ec636b7 1767static int find_phys(const struct ddf_super *ddf, __u32 phys_refnum)
7a7cc504
NB
1768{
1769 /* Find the entry in phys_disk which has the given refnum
1770 * and return it's index
1771 */
f21e18ca
N
1772 unsigned int i;
1773 for (i = 0; i < __be16_to_cpu(ddf->phys->max_pdes); i++)
7a7cc504
NB
1774 if (ddf->phys->entries[i].refnum == phys_refnum)
1775 return i;
1776 return -1;
a322f70c
DW
1777}
1778
bedbf68a 1779static void uuid_from_ddf_guid(const char *guid, int uuid[4])
1780{
1781 char buf[20];
1782 struct sha1_ctx ctx;
1783 sha1_init_ctx(&ctx);
1784 sha1_process_bytes(guid, DDF_GUID_LEN, &ctx);
1785 sha1_finish_ctx(&ctx, buf);
1786 memcpy(uuid, buf, 4*4);
1787}
1788
a322f70c
DW
1789static void uuid_from_super_ddf(struct supertype *st, int uuid[4])
1790{
1791 /* The uuid returned here is used for:
1792 * uuid to put into bitmap file (Create, Grow)
1793 * uuid for backup header when saving critical section (Grow)
1794 * comparing uuids when re-adding a device into an array
51006d85
N
1795 * In these cases the uuid required is that of the data-array,
1796 * not the device-set.
1797 * uuid to recognise same set when adding a missing device back
1798 * to an array. This is a uuid for the device-set.
613b0d17 1799 *
a322f70c
DW
1800 * For each of these we can make do with a truncated
1801 * or hashed uuid rather than the original, as long as
1802 * everyone agrees.
a322f70c
DW
1803 * In the case of SVD we assume the BVD is of interest,
1804 * though that might be the case if a bitmap were made for
1805 * a mirrored SVD - worry about that later.
1806 * So we need to find the VD configuration record for the
1807 * relevant BVD and extract the GUID and Secondary_Element_Seq.
1808 * The first 16 bytes of the sha1 of these is used.
1809 */
1810 struct ddf_super *ddf = st->sb;
d2ca6449 1811 struct vcl *vcl = ddf->currentconf;
c5afc314 1812 char *guid;
a322f70c 1813
c5afc314
N
1814 if (vcl)
1815 guid = vcl->conf.guid;
1816 else
1817 guid = ddf->anchor.guid;
bedbf68a 1818 uuid_from_ddf_guid(guid, uuid);
a322f70c
DW
1819}
1820
a5d85af7 1821static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map);
78e44928 1822
a5d85af7 1823static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map)
a322f70c
DW
1824{
1825 struct ddf_super *ddf = st->sb;
a5d85af7 1826 int map_disks = info->array.raid_disks;
90fa1a29 1827 __u32 *cptr;
a322f70c 1828
78e44928 1829 if (ddf->currentconf) {
a5d85af7 1830 getinfo_super_ddf_bvd(st, info, map);
78e44928
NB
1831 return;
1832 }
95eeceeb 1833 memset(info, 0, sizeof(*info));
78e44928 1834
a322f70c
DW
1835 info->array.raid_disks = __be16_to_cpu(ddf->phys->used_pdes);
1836 info->array.level = LEVEL_CONTAINER;
1837 info->array.layout = 0;
1838 info->array.md_minor = -1;
90fa1a29
JS
1839 cptr = (__u32 *)(ddf->anchor.guid + 16);
1840 info->array.ctime = DECADE + __be32_to_cpu(*cptr);
1841
a322f70c
DW
1842 info->array.utime = 0;
1843 info->array.chunk_size = 0;
510242aa 1844 info->container_enough = 1;
a322f70c 1845
a322f70c
DW
1846 info->disk.major = 0;
1847 info->disk.minor = 0;
cba0191b
NB
1848 if (ddf->dlist) {
1849 info->disk.number = __be32_to_cpu(ddf->dlist->disk.refnum);
59e36268 1850 info->disk.raid_disk = find_phys(ddf, ddf->dlist->disk.refnum);
d2ca6449
NB
1851
1852 info->data_offset = __be64_to_cpu(ddf->phys->
613b0d17
N
1853 entries[info->disk.raid_disk].
1854 config_size);
d2ca6449 1855 info->component_size = ddf->dlist->size - info->data_offset;
cba0191b
NB
1856 } else {
1857 info->disk.number = -1;
661dce36 1858 info->disk.raid_disk = -1;
cba0191b
NB
1859// info->disk.raid_disk = find refnum in the table and use index;
1860 }
f22385f9 1861 info->disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
a19c88b8 1862
921d9e16 1863 info->recovery_start = MaxSector;
a19c88b8 1864 info->reshape_active = 0;
6e75048b 1865 info->recovery_blocked = 0;
c5afc314 1866 info->name[0] = 0;
a322f70c 1867
f35f2525
N
1868 info->array.major_version = -1;
1869 info->array.minor_version = -2;
159c3a1a 1870 strcpy(info->text_version, "ddf");
a67dd8cc 1871 info->safe_mode_delay = 0;
159c3a1a 1872
c5afc314 1873 uuid_from_super_ddf(st, info->uuid);
a322f70c 1874
a5d85af7
N
1875 if (map) {
1876 int i;
1877 for (i = 0 ; i < map_disks; i++) {
1878 if (i < info->array.raid_disks &&
1879 (__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Online) &&
1880 !(__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Failed))
1881 map[i] = 1;
1882 else
1883 map[i] = 0;
1884 }
1885 }
a322f70c
DW
1886}
1887
a5d85af7 1888static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map)
a322f70c
DW
1889{
1890 struct ddf_super *ddf = st->sb;
d2ca6449
NB
1891 struct vcl *vc = ddf->currentconf;
1892 int cd = ddf->currentdev;
ddf94a43 1893 int n_prim;
db42fa9b 1894 int j;
8592f29d 1895 struct dl *dl;
a5d85af7 1896 int map_disks = info->array.raid_disks;
90fa1a29 1897 __u32 *cptr;
ddf94a43 1898 struct vd_config *conf;
a322f70c 1899
95eeceeb 1900 memset(info, 0, sizeof(*info));
8a2848a7 1901 if (layout_ddf2md(&vc->conf, &info->array) == -1)
1902 return;
a322f70c 1903 info->array.md_minor = -1;
90fa1a29
JS
1904 cptr = (__u32 *)(vc->conf.guid + 16);
1905 info->array.ctime = DECADE + __be32_to_cpu(*cptr);
d2ca6449
NB
1906 info->array.utime = DECADE + __be32_to_cpu(vc->conf.timestamp);
1907 info->array.chunk_size = 512 << vc->conf.chunk_shift;
da9b4a62 1908 info->custom_array_size = 0;
d2ca6449 1909
ddf94a43 1910 conf = &vc->conf;
1911 n_prim = __be16_to_cpu(conf->prim_elmnt_count);
1912 if (conf->sec_elmnt_count > 1 && cd >= n_prim) {
1913 int ibvd = cd / n_prim - 1;
1914 cd %= n_prim;
1915 conf = vc->other_bvds[ibvd];
1916 }
1917
f21e18ca 1918 if (cd >= 0 && (unsigned)cd < ddf->mppe) {
57a66662 1919 info->data_offset =
fbf0c2a7 1920 __be64_to_cpu(LBA_OFFSET(ddf, conf)[cd]);
d2ca6449
NB
1921 if (vc->block_sizes)
1922 info->component_size = vc->block_sizes[cd];
1923 else
fbf0c2a7 1924 info->component_size = __be64_to_cpu(conf->blocks);
d2ca6449 1925 }
a322f70c 1926
fb204fb2 1927 for (dl = ddf->dlist; dl ; dl = dl->next)
f5ded787 1928 if (dl->disk.refnum == conf->phys_refnum[cd])
fb204fb2
N
1929 break;
1930
a322f70c
DW
1931 info->disk.major = 0;
1932 info->disk.minor = 0;
fb204fb2 1933 info->disk.state = 0;
8592f29d
N
1934 if (dl) {
1935 info->disk.major = dl->major;
1936 info->disk.minor = dl->minor;
7c3fb3ec 1937 info->disk.raid_disk = cd + conf->sec_elmnt_seq
1938 * __be16_to_cpu(conf->prim_elmnt_count);
fb204fb2
N
1939 info->disk.number = dl->pdnum;
1940 info->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
8592f29d 1941 }
a322f70c 1942
103f2410
NB
1943 info->container_member = ddf->currentconf->vcnum;
1944
921d9e16 1945 info->recovery_start = MaxSector;
80d26cb2 1946 info->resync_start = 0;
624c5ad4 1947 info->reshape_active = 0;
6e75048b 1948 info->recovery_blocked = 0;
80d26cb2
NB
1949 if (!(ddf->virt->entries[info->container_member].state
1950 & DDF_state_inconsistent) &&
1951 (ddf->virt->entries[info->container_member].init_state
1952 & DDF_initstate_mask)
1953 == DDF_init_full)
b7528a20 1954 info->resync_start = MaxSector;
80d26cb2 1955
a322f70c
DW
1956 uuid_from_super_ddf(st, info->uuid);
1957
f35f2525
N
1958 info->array.major_version = -1;
1959 info->array.minor_version = -2;
9b63e648 1960 sprintf(info->text_version, "/%s/%d",
4dd2df09 1961 st->container_devnm,
9b63e648 1962 info->container_member);
a67dd8cc 1963 info->safe_mode_delay = 200;
159c3a1a 1964
db42fa9b
N
1965 memcpy(info->name, ddf->virt->entries[info->container_member].name, 16);
1966 info->name[16]=0;
1967 for(j=0; j<16; j++)
1968 if (info->name[j] == ' ')
1969 info->name[j] = 0;
a5d85af7
N
1970
1971 if (map)
1972 for (j = 0; j < map_disks; j++) {
1973 map[j] = 0;
1974 if (j < info->array.raid_disks) {
1975 int i = find_phys(ddf, vc->conf.phys_refnum[j]);
613b0d17 1976 if (i >= 0 &&
a5d85af7
N
1977 (__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Online) &&
1978 !(__be16_to_cpu(ddf->phys->entries[i].state) & DDF_Failed))
1979 map[i] = 1;
1980 }
1981 }
a322f70c
DW
1982}
1983
1984static int update_super_ddf(struct supertype *st, struct mdinfo *info,
1985 char *update,
1986 char *devname, int verbose,
1987 int uuid_set, char *homehost)
1988{
1989 /* For 'assemble' and 'force' we need to return non-zero if any
1990 * change was made. For others, the return value is ignored.
1991 * Update options are:
1992 * force-one : This device looks a bit old but needs to be included,
1993 * update age info appropriately.
1994 * assemble: clear any 'faulty' flag to allow this device to
1995 * be assembled.
1996 * force-array: Array is degraded but being forced, mark it clean
1997 * if that will be needed to assemble it.
1998 *
1999 * newdev: not used ????
2000 * grow: Array has gained a new device - this is currently for
2001 * linear only
2002 * resync: mark as dirty so a resync will happen.
59e36268 2003 * uuid: Change the uuid of the array to match what is given
a322f70c
DW
2004 * homehost: update the recorded homehost
2005 * name: update the name - preserving the homehost
2006 * _reshape_progress: record new reshape_progress position.
2007 *
2008 * Following are not relevant for this version:
2009 * sparc2.2 : update from old dodgey metadata
2010 * super-minor: change the preferred_minor number
2011 * summaries: update redundant counters.
2012 */
2013 int rv = 0;
2014// struct ddf_super *ddf = st->sb;
7a7cc504 2015// struct vd_config *vd = find_vdcr(ddf, info->container_member);
a322f70c
DW
2016// struct virtual_entry *ve = find_ve(ddf);
2017
a322f70c
DW
2018 /* we don't need to handle "force-*" or "assemble" as
2019 * there is no need to 'trick' the kernel. We the metadata is
2020 * first updated to activate the array, all the implied modifications
2021 * will just happen.
2022 */
2023
2024 if (strcmp(update, "grow") == 0) {
2025 /* FIXME */
1e2b2765 2026 } else if (strcmp(update, "resync") == 0) {
a322f70c 2027// info->resync_checkpoint = 0;
1e2b2765 2028 } else if (strcmp(update, "homehost") == 0) {
a322f70c
DW
2029 /* homehost is stored in controller->vendor_data,
2030 * or it is when we are the vendor
2031 */
2032// if (info->vendor_is_local)
2033// strcpy(ddf->controller.vendor_data, homehost);
1e2b2765 2034 rv = -1;
f49208ec 2035 } else if (strcmp(update, "name") == 0) {
a322f70c
DW
2036 /* name is stored in virtual_entry->name */
2037// memset(ve->name, ' ', 16);
2038// strncpy(ve->name, info->name, 16);
1e2b2765 2039 rv = -1;
f49208ec 2040 } else if (strcmp(update, "_reshape_progress") == 0) {
a322f70c 2041 /* We don't support reshape yet */
f49208ec
N
2042 } else if (strcmp(update, "assemble") == 0 ) {
2043 /* Do nothing, just succeed */
2044 rv = 0;
1e2b2765
N
2045 } else
2046 rv = -1;
a322f70c
DW
2047
2048// update_all_csum(ddf);
2049
2050 return rv;
2051}
2052
5f8097be
NB
2053static void make_header_guid(char *guid)
2054{
2055 __u32 stamp;
5f8097be
NB
2056 /* Create a DDF Header of Virtual Disk GUID */
2057
2058 /* 24 bytes of fiction required.
2059 * first 8 are a 'vendor-id' - "Linux-MD"
2060 * next 8 are controller type.. how about 0X DEAD BEEF 0000 0000
2061 * Remaining 8 random number plus timestamp
2062 */
2063 memcpy(guid, T10, sizeof(T10));
2064 stamp = __cpu_to_be32(0xdeadbeef);
2065 memcpy(guid+8, &stamp, 4);
2066 stamp = __cpu_to_be32(0);
2067 memcpy(guid+12, &stamp, 4);
2068 stamp = __cpu_to_be32(time(0) - DECADE);
2069 memcpy(guid+16, &stamp, 4);
bfb7ea78 2070 stamp = random32();
5f8097be 2071 memcpy(guid+20, &stamp, 4);
5f8097be 2072}
59e36268 2073
fb9d0acb 2074static unsigned int find_unused_vde(const struct ddf_super *ddf)
2075{
2076 unsigned int i;
2077 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
2078 if (all_ff(ddf->virt->entries[i].guid))
2079 return i;
2080 }
2081 return DDF_NOTFOUND;
2082}
2083
2084static unsigned int find_vde_by_name(const struct ddf_super *ddf,
2085 const char *name)
2086{
2087 unsigned int i;
2088 if (name == NULL)
2089 return DDF_NOTFOUND;
2090 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++) {
2091 if (all_ff(ddf->virt->entries[i].guid))
2092 continue;
2093 if (!strncmp(name, ddf->virt->entries[i].name,
2094 sizeof(ddf->virt->entries[i].name)))
2095 return i;
2096 }
2097 return DDF_NOTFOUND;
2098}
2099
2100static unsigned int find_vde_by_guid(const struct ddf_super *ddf,
2101 const char *guid)
2102{
2103 unsigned int i;
2104 if (guid == NULL || all_ff(guid))
2105 return DDF_NOTFOUND;
2106 for (i = 0; i < __be16_to_cpu(ddf->virt->max_vdes); i++)
2107 if (!memcmp(ddf->virt->entries[i].guid, guid, DDF_GUID_LEN))
2108 return i;
2109 return DDF_NOTFOUND;
2110}
2111
78e44928
NB
2112static int init_super_ddf_bvd(struct supertype *st,
2113 mdu_array_info_t *info,
2114 unsigned long long size,
2115 char *name, char *homehost,
83cd1e97 2116 int *uuid, unsigned long long data_offset);
78e44928 2117
a322f70c
DW
2118static int init_super_ddf(struct supertype *st,
2119 mdu_array_info_t *info,
2120 unsigned long long size, char *name, char *homehost,
83cd1e97 2121 int *uuid, unsigned long long data_offset)
a322f70c
DW
2122{
2123 /* This is primarily called by Create when creating a new array.
2124 * We will then get add_to_super called for each component, and then
2125 * write_init_super called to write it out to each device.
2126 * For DDF, Create can create on fresh devices or on a pre-existing
2127 * array.
2128 * To create on a pre-existing array a different method will be called.
2129 * This one is just for fresh drives.
2130 *
2131 * We need to create the entire 'ddf' structure which includes:
2132 * DDF headers - these are easy.
2133 * Controller data - a Sector describing this controller .. not that
2134 * this is a controller exactly.
2135 * Physical Disk Record - one entry per device, so
2136 * leave plenty of space.
2137 * Virtual Disk Records - again, just leave plenty of space.
2138 * This just lists VDs, doesn't give details
2139 * Config records - describes the VDs that use this disk
2140 * DiskData - describes 'this' device.
2141 * BadBlockManagement - empty
2142 * Diag Space - empty
2143 * Vendor Logs - Could we put bitmaps here?
2144 *
2145 */
2146 struct ddf_super *ddf;
2147 char hostname[17];
2148 int hostlen;
a322f70c
DW
2149 int max_phys_disks, max_virt_disks;
2150 unsigned long long sector;
2151 int clen;
2152 int i;
2153 int pdsize, vdsize;
2154 struct phys_disk *pd;
2155 struct virtual_disk *vd;
2156
83cd1e97 2157 if (data_offset != INVALID_SECTORS) {
ed503f89 2158 pr_err("data-offset not supported by DDF\n");
83cd1e97
N
2159 return 0;
2160 }
2161
78e44928 2162 if (st->sb)
83cd1e97
N
2163 return init_super_ddf_bvd(st, info, size, name, homehost, uuid,
2164 data_offset);
ba7eb04f 2165
3d2c4fc7 2166 if (posix_memalign((void**)&ddf, 512, sizeof(*ddf)) != 0) {
e7b84f9d 2167 pr_err("%s could not allocate superblock\n", __func__);
3d2c4fc7
DW
2168 return 0;
2169 }
6264b437 2170 memset(ddf, 0, sizeof(*ddf));
a322f70c
DW
2171 ddf->dlist = NULL; /* no physical disks yet */
2172 ddf->conflist = NULL; /* No virtual disks yet */
955e9ea1
DW
2173 st->sb = ddf;
2174
2175 if (info == NULL) {
2176 /* zeroing superblock */
2177 return 0;
2178 }
a322f70c
DW
2179
2180 /* At least 32MB *must* be reserved for the ddf. So let's just
2181 * start 32MB from the end, and put the primary header there.
2182 * Don't do secondary for now.
2183 * We don't know exactly where that will be yet as it could be
2184 * different on each device. To just set up the lengths.
2185 *
2186 */
2187
2188 ddf->anchor.magic = DDF_HEADER_MAGIC;
5f8097be 2189 make_header_guid(ddf->anchor.guid);
a322f70c 2190
59e36268 2191 memcpy(ddf->anchor.revision, DDF_REVISION_2, 8);
a322f70c
DW
2192 ddf->anchor.seq = __cpu_to_be32(1);
2193 ddf->anchor.timestamp = __cpu_to_be32(time(0) - DECADE);
2194 ddf->anchor.openflag = 0xFF;
2195 ddf->anchor.foreignflag = 0;
2196 ddf->anchor.enforcegroups = 0; /* Is this best?? */
2197 ddf->anchor.pad0 = 0xff;
2198 memset(ddf->anchor.pad1, 0xff, 12);
2199 memset(ddf->anchor.header_ext, 0xff, 32);
2200 ddf->anchor.primary_lba = ~(__u64)0;
2201 ddf->anchor.secondary_lba = ~(__u64)0;
2202 ddf->anchor.type = DDF_HEADER_ANCHOR;
2203 memset(ddf->anchor.pad2, 0xff, 3);
2204 ddf->anchor.workspace_len = __cpu_to_be32(32768); /* Must be reserved */
2205 ddf->anchor.workspace_lba = ~(__u64)0; /* Put this at bottom
2206 of 32M reserved.. */
2207 max_phys_disks = 1023; /* Should be enough */
2208 ddf->anchor.max_pd_entries = __cpu_to_be16(max_phys_disks);
2209 max_virt_disks = 255;
2210 ddf->anchor.max_vd_entries = __cpu_to_be16(max_virt_disks); /* ?? */
2211 ddf->anchor.max_partitions = __cpu_to_be16(64); /* ?? */
2212 ddf->max_part = 64;
8c3b8c2c 2213 ddf->mppe = 256;
59e36268
NB
2214 ddf->conf_rec_len = 1 + ROUND_UP(ddf->mppe * (4+8), 512)/512;
2215 ddf->anchor.config_record_len = __cpu_to_be16(ddf->conf_rec_len);
2216 ddf->anchor.max_primary_element_entries = __cpu_to_be16(ddf->mppe);
a322f70c 2217 memset(ddf->anchor.pad3, 0xff, 54);
a322f70c
DW
2218 /* controller sections is one sector long immediately
2219 * after the ddf header */
2220 sector = 1;
2221 ddf->anchor.controller_section_offset = __cpu_to_be32(sector);
2222 ddf->anchor.controller_section_length = __cpu_to_be32(1);
2223 sector += 1;
2224
2225 /* phys is 8 sectors after that */
2226 pdsize = ROUND_UP(sizeof(struct phys_disk) +
2227 sizeof(struct phys_disk_entry)*max_phys_disks,
2228 512);
2229 switch(pdsize/512) {
2230 case 2: case 8: case 32: case 128: case 512: break;
2231 default: abort();
2232 }
2233 ddf->anchor.phys_section_offset = __cpu_to_be32(sector);
2234 ddf->anchor.phys_section_length =
2235 __cpu_to_be32(pdsize/512); /* max_primary_element_entries/8 */
2236 sector += pdsize/512;
2237
2238 /* virt is another 32 sectors */
2239 vdsize = ROUND_UP(sizeof(struct virtual_disk) +
2240 sizeof(struct virtual_entry) * max_virt_disks,
2241 512);
2242 switch(vdsize/512) {
2243 case 2: case 8: case 32: case 128: case 512: break;
2244 default: abort();
2245 }
2246 ddf->anchor.virt_section_offset = __cpu_to_be32(sector);
2247 ddf->anchor.virt_section_length =
2248 __cpu_to_be32(vdsize/512); /* max_vd_entries/8 */
2249 sector += vdsize/512;
2250
59e36268 2251 clen = ddf->conf_rec_len * (ddf->max_part+1);
a322f70c
DW
2252 ddf->anchor.config_section_offset = __cpu_to_be32(sector);
2253 ddf->anchor.config_section_length = __cpu_to_be32(clen);
2254 sector += clen;
2255
2256 ddf->anchor.data_section_offset = __cpu_to_be32(sector);
2257 ddf->anchor.data_section_length = __cpu_to_be32(1);
2258 sector += 1;
2259
2260 ddf->anchor.bbm_section_length = __cpu_to_be32(0);
2261 ddf->anchor.bbm_section_offset = __cpu_to_be32(0xFFFFFFFF);
2262 ddf->anchor.diag_space_length = __cpu_to_be32(0);
2263 ddf->anchor.diag_space_offset = __cpu_to_be32(0xFFFFFFFF);
2264 ddf->anchor.vendor_length = __cpu_to_be32(0);
2265 ddf->anchor.vendor_offset = __cpu_to_be32(0xFFFFFFFF);
2266
2267 memset(ddf->anchor.pad4, 0xff, 256);
2268
2269 memcpy(&ddf->primary, &ddf->anchor, 512);
2270 memcpy(&ddf->secondary, &ddf->anchor, 512);
2271
2272 ddf->primary.openflag = 1; /* I guess.. */
2273 ddf->primary.type = DDF_HEADER_PRIMARY;
2274
2275 ddf->secondary.openflag = 1; /* I guess.. */
2276 ddf->secondary.type = DDF_HEADER_SECONDARY;
2277
2278 ddf->active = &ddf->primary;
2279
2280 ddf->controller.magic = DDF_CONTROLLER_MAGIC;
2281
2282 /* 24 more bytes of fiction required.
2283 * first 8 are a 'vendor-id' - "Linux-MD"
2284 * Remaining 16 are serial number.... maybe a hostname would do?
2285 */
2286 memcpy(ddf->controller.guid, T10, sizeof(T10));
1ba6bff9
DW
2287 gethostname(hostname, sizeof(hostname));
2288 hostname[sizeof(hostname) - 1] = 0;
a322f70c
DW
2289 hostlen = strlen(hostname);
2290 memcpy(ddf->controller.guid + 24 - hostlen, hostname, hostlen);
2291 for (i = strlen(T10) ; i+hostlen < 24; i++)
2292 ddf->controller.guid[i] = ' ';
2293
2294 ddf->controller.type.vendor_id = __cpu_to_be16(0xDEAD);
2295 ddf->controller.type.device_id = __cpu_to_be16(0xBEEF);
2296 ddf->controller.type.sub_vendor_id = 0;
2297 ddf->controller.type.sub_device_id = 0;
2298 memcpy(ddf->controller.product_id, "What Is My PID??", 16);
2299 memset(ddf->controller.pad, 0xff, 8);
2300 memset(ddf->controller.vendor_data, 0xff, 448);
a9e1c11d
N
2301 if (homehost && strlen(homehost) < 440)
2302 strcpy((char*)ddf->controller.vendor_data, homehost);
a322f70c 2303
3d2c4fc7 2304 if (posix_memalign((void**)&pd, 512, pdsize) != 0) {
e7b84f9d 2305 pr_err("%s could not allocate pd\n", __func__);
3d2c4fc7
DW
2306 return 0;
2307 }
6416d527 2308 ddf->phys = pd;
a322f70c
DW
2309 ddf->pdsize = pdsize;
2310
2311 memset(pd, 0xff, pdsize);
2312 memset(pd, 0, sizeof(*pd));
076515ba 2313 pd->magic = DDF_PHYS_RECORDS_MAGIC;
a322f70c
DW
2314 pd->used_pdes = __cpu_to_be16(0);
2315 pd->max_pdes = __cpu_to_be16(max_phys_disks);
2316 memset(pd->pad, 0xff, 52);
4a3ca8ac 2317 for (i = 0; i < max_phys_disks; i++)
2318 memset(pd->entries[i].guid, 0xff, DDF_GUID_LEN);
a322f70c 2319
3d2c4fc7 2320 if (posix_memalign((void**)&vd, 512, vdsize) != 0) {
e7b84f9d 2321 pr_err("%s could not allocate vd\n", __func__);
3d2c4fc7
DW
2322 return 0;
2323 }
6416d527 2324 ddf->virt = vd;
a322f70c
DW
2325 ddf->vdsize = vdsize;
2326 memset(vd, 0, vdsize);
2327 vd->magic = DDF_VIRT_RECORDS_MAGIC;
2328 vd->populated_vdes = __cpu_to_be16(0);
2329 vd->max_vdes = __cpu_to_be16(max_virt_disks);
2330 memset(vd->pad, 0xff, 52);
2331
5f8097be
NB
2332 for (i=0; i<max_virt_disks; i++)
2333 memset(&vd->entries[i], 0xff, sizeof(struct virtual_entry));
2334
a322f70c 2335 st->sb = ddf;
7d5a7ff3 2336 ddf_set_updates_pending(ddf);
a322f70c
DW
2337 return 1;
2338}
2339
5f8097be
NB
2340static int chunk_to_shift(int chunksize)
2341{
2342 return ffs(chunksize/512)-1;
2343}
2344
0e600426 2345#ifndef MDASSEMBLE
59e36268
NB
2346struct extent {
2347 unsigned long long start, size;
2348};
78e44928 2349static int cmp_extent(const void *av, const void *bv)
59e36268
NB
2350{
2351 const struct extent *a = av;
2352 const struct extent *b = bv;
2353 if (a->start < b->start)
2354 return -1;
2355 if (a->start > b->start)
2356 return 1;
2357 return 0;
2358}
2359
78e44928 2360static struct extent *get_extents(struct ddf_super *ddf, struct dl *dl)
59e36268
NB
2361{
2362 /* find a list of used extents on the give physical device
2363 * (dnum) of the given ddf.
2364 * Return a malloced array of 'struct extent'
2365
613b0d17 2366 * FIXME ignore DDF_Legacy devices?
59e36268
NB
2367
2368 */
2369 struct extent *rv;
2370 int n = 0;
fcc22180 2371 unsigned int i;
59e36268 2372
503975b9 2373 rv = xmalloc(sizeof(struct extent) * (ddf->max_part + 2));
59e36268
NB
2374
2375 for (i = 0; i < ddf->max_part; i++) {
fcc22180 2376 const struct vd_config *bvd;
2377 unsigned int ibvd;
59e36268 2378 struct vcl *v = dl->vlist[i];
fcc22180 2379 if (v == NULL ||
2380 get_pd_index_from_refnum(v, dl->disk.refnum, ddf->mppe,
2381 &bvd, &ibvd) == DDF_NOTFOUND)
59e36268 2382 continue;
fcc22180 2383 rv[n].start = __be64_to_cpu(LBA_OFFSET(ddf, bvd)[ibvd]);
2384 rv[n].size = __be64_to_cpu(bvd->blocks);
2385 n++;
59e36268
NB
2386 }
2387 qsort(rv, n, sizeof(*rv), cmp_extent);
2388
2389 rv[n].start = __be64_to_cpu(ddf->phys->entries[dl->pdnum].config_size);
2390 rv[n].size = 0;
2391 return rv;
2392}
0e600426 2393#endif
59e36268 2394
5f8097be
NB
2395static int init_super_ddf_bvd(struct supertype *st,
2396 mdu_array_info_t *info,
2397 unsigned long long size,
2398 char *name, char *homehost,
83cd1e97 2399 int *uuid, unsigned long long data_offset)
5f8097be
NB
2400{
2401 /* We are creating a BVD inside a pre-existing container.
2402 * so st->sb is already set.
2403 * We need to create a new vd_config and a new virtual_entry
2404 */
2405 struct ddf_super *ddf = st->sb;
5aaf6c7b 2406 unsigned int venum, i;
5f8097be
NB
2407 struct virtual_entry *ve;
2408 struct vcl *vcl;
2409 struct vd_config *vc;
5f8097be 2410
fb9d0acb 2411 if (find_vde_by_name(ddf, name) != DDF_NOTFOUND) {
2412 pr_err("This ddf already has an array called %s\n", name);
5f8097be
NB
2413 return 0;
2414 }
fb9d0acb 2415 venum = find_unused_vde(ddf);
2416 if (venum == DDF_NOTFOUND) {
2417 pr_err("Cannot find spare slot for virtual disk\n");
5f8097be
NB
2418 return 0;
2419 }
2420 ve = &ddf->virt->entries[venum];
2421
2422 /* A Virtual Disk GUID contains the T10 Vendor ID, controller type,
2423 * timestamp, random number
2424 */
2425 make_header_guid(ve->guid);
2426 ve->unit = __cpu_to_be16(info->md_minor);
2427 ve->pad0 = 0xFFFF;
2428 ve->guid_crc = crc32(0, (unsigned char*)ddf->anchor.guid, DDF_GUID_LEN);
2429 ve->type = 0;
7a7cc504
NB
2430 ve->state = DDF_state_degraded; /* Will be modified as devices are added */
2431 if (info->state & 1) /* clean */
2432 ve->init_state = DDF_init_full;
2433 else
2434 ve->init_state = DDF_init_not;
2435
5f8097be
NB
2436 memset(ve->pad1, 0xff, 14);
2437 memset(ve->name, ' ', 16);
2438 if (name)
2439 strncpy(ve->name, name, 16);
2440 ddf->virt->populated_vdes =
2441 __cpu_to_be16(__be16_to_cpu(ddf->virt->populated_vdes)+1);
2442
2443 /* Now create a new vd_config */
3d2c4fc7
DW
2444 if (posix_memalign((void**)&vcl, 512,
2445 (offsetof(struct vcl, conf) + ddf->conf_rec_len * 512)) != 0) {
e7b84f9d 2446 pr_err("%s could not allocate vd_config\n", __func__);
3d2c4fc7
DW
2447 return 0;
2448 }
59e36268
NB
2449 vcl->vcnum = venum;
2450 vcl->block_sizes = NULL; /* FIXME not for CONCAT */
5f8097be
NB
2451 vc = &vcl->conf;
2452
2453 vc->magic = DDF_VD_CONF_MAGIC;
2454 memcpy(vc->guid, ve->guid, DDF_GUID_LEN);
2455 vc->timestamp = __cpu_to_be32(time(0)-DECADE);
2456 vc->seqnum = __cpu_to_be32(1);
2457 memset(vc->pad0, 0xff, 24);
5f8097be 2458 vc->chunk_shift = chunk_to_shift(info->chunk_size);
a3163bf0 2459 if (layout_md2ddf(info, vc) == -1 ||
2460 __be16_to_cpu(vc->prim_elmnt_count) > ddf->mppe) {
2461 pr_err("%s: unsupported RAID level/layout %d/%d with %d disks\n",
2462 __func__, info->level, info->layout, info->raid_disks);
2463 free(vcl);
2464 return 0;
2465 }
5f8097be 2466 vc->sec_elmnt_seq = 0;
3c48f7be 2467 if (alloc_other_bvds(ddf, vcl) != 0) {
2468 pr_err("%s could not allocate other bvds\n",
2469 __func__);
2470 free(vcl);
2471 return 0;
2472 }
5f8097be
NB
2473 vc->blocks = __cpu_to_be64(info->size * 2);
2474 vc->array_blocks = __cpu_to_be64(
2475 calc_array_size(info->level, info->raid_disks, info->layout,
2476 info->chunk_size, info->size*2));
2477 memset(vc->pad1, 0xff, 8);
2478 vc->spare_refs[0] = 0xffffffff;
2479 vc->spare_refs[1] = 0xffffffff;
2480 vc->spare_refs[2] = 0xffffffff;
2481 vc->spare_refs[3] = 0xffffffff;
2482 vc->spare_refs[4] = 0xffffffff;
2483 vc->spare_refs[5] = 0xffffffff;
2484 vc->spare_refs[6] = 0xffffffff;
2485 vc->spare_refs[7] = 0xffffffff;
2486 memset(vc->cache_pol, 0, 8);
2487 vc->bg_rate = 0x80;
2488 memset(vc->pad2, 0xff, 3);
2489 memset(vc->pad3, 0xff, 52);
2490 memset(vc->pad4, 0xff, 192);
2491 memset(vc->v0, 0xff, 32);
2492 memset(vc->v1, 0xff, 32);
2493 memset(vc->v2, 0xff, 16);
2494 memset(vc->v3, 0xff, 16);
2495 memset(vc->vendor, 0xff, 32);
598f0d58 2496
8c3b8c2c 2497 memset(vc->phys_refnum, 0xff, 4*ddf->mppe);
e5a2a3cf 2498 memset(vc->phys_refnum+ddf->mppe, 0x00, 8*ddf->mppe);
5f8097be 2499
5aaf6c7b 2500 for (i = 1; i < vc->sec_elmnt_count; i++) {
2501 memcpy(vcl->other_bvds[i-1], vc, ddf->conf_rec_len * 512);
2502 vcl->other_bvds[i-1]->sec_elmnt_seq = i;
2503 }
2504
5f8097be
NB
2505 vcl->next = ddf->conflist;
2506 ddf->conflist = vcl;
d2ca6449 2507 ddf->currentconf = vcl;
7d5a7ff3 2508 ddf_set_updates_pending(ddf);
5f8097be
NB
2509 return 1;
2510}
2511
63eb2454 2512static int get_svd_state(const struct ddf_super *, const struct vcl *);
2513
0e600426 2514#ifndef MDASSEMBLE
5f8097be
NB
2515static void add_to_super_ddf_bvd(struct supertype *st,
2516 mdu_disk_info_t *dk, int fd, char *devname)
2517{
2518 /* fd and devname identify a device with-in the ddf container (st).
2519 * dk identifies a location in the new BVD.
2520 * We need to find suitable free space in that device and update
2521 * the phys_refnum and lba_offset for the newly created vd_config.
2522 * We might also want to update the type in the phys_disk
5575e7d9 2523 * section.
8592f29d
N
2524 *
2525 * Alternately: fd == -1 and we have already chosen which device to
2526 * use and recorded in dlist->raid_disk;
5f8097be
NB
2527 */
2528 struct dl *dl;
2529 struct ddf_super *ddf = st->sb;
2530 struct vd_config *vc;
f21e18ca 2531 unsigned int i;
59e36268
NB
2532 unsigned long long blocks, pos, esize;
2533 struct extent *ex;
475ccbdb 2534 unsigned int raid_disk = dk->raid_disk;
5f8097be 2535
8592f29d
N
2536 if (fd == -1) {
2537 for (dl = ddf->dlist; dl ; dl = dl->next)
2538 if (dl->raiddisk == dk->raid_disk)
2539 break;
2540 } else {
2541 for (dl = ddf->dlist; dl ; dl = dl->next)
2542 if (dl->major == dk->major &&
2543 dl->minor == dk->minor)
2544 break;
2545 }
5f8097be
NB
2546 if (!dl || ! (dk->state & (1<<MD_DISK_SYNC)))
2547 return;
2548
d2ca6449 2549 vc = &ddf->currentconf->conf;
475ccbdb 2550 if (vc->sec_elmnt_count > 1) {
2551 unsigned int n = __be16_to_cpu(vc->prim_elmnt_count);
2552 if (raid_disk >= n)
2553 vc = ddf->currentconf->other_bvds[raid_disk / n - 1];
2554 raid_disk %= n;
2555 }
59e36268
NB
2556
2557 ex = get_extents(ddf, dl);
2558 if (!ex)
2559 return;
2560
2561 i = 0; pos = 0;
2562 blocks = __be64_to_cpu(vc->blocks);
d2ca6449
NB
2563 if (ddf->currentconf->block_sizes)
2564 blocks = ddf->currentconf->block_sizes[dk->raid_disk];
59e36268
NB
2565
2566 do {
2567 esize = ex[i].start - pos;
2568 if (esize >= blocks)
2569 break;
2570 pos = ex[i].start + ex[i].size;
2571 i++;
2572 } while (ex[i-1].size);
2573
2574 free(ex);
2575 if (esize < blocks)
2576 return;
2577
d2ca6449 2578 ddf->currentdev = dk->raid_disk;
475ccbdb 2579 vc->phys_refnum[raid_disk] = dl->disk.refnum;
2580 LBA_OFFSET(ddf, vc)[raid_disk] = __cpu_to_be64(pos);
5f8097be 2581
f21e18ca 2582 for (i = 0; i < ddf->max_part ; i++)
5575e7d9
NB
2583 if (dl->vlist[i] == NULL)
2584 break;
2585 if (i == ddf->max_part)
2586 return;
d2ca6449 2587 dl->vlist[i] = ddf->currentconf;
5f8097be 2588
8592f29d
N
2589 if (fd >= 0)
2590 dl->fd = fd;
2591 if (devname)
2592 dl->devname = devname;
7a7cc504 2593
63eb2454 2594 /* Check if we can mark array as optimal yet */
d2ca6449 2595 i = ddf->currentconf->vcnum;
63eb2454 2596 ddf->virt->entries[i].state =
2597 (ddf->virt->entries[i].state & ~DDF_state_mask)
2598 | get_svd_state(ddf, ddf->currentconf);
5575e7d9
NB
2599 ddf->phys->entries[dl->pdnum].type &= ~__cpu_to_be16(DDF_Global_Spare);
2600 ddf->phys->entries[dl->pdnum].type |= __cpu_to_be16(DDF_Active_in_VD);
4f9bbe63 2601 dprintf("%s: added disk %d/%08x to VD %d/%s as disk %d\n",
2602 __func__, dl->pdnum, __be32_to_cpu(dl->disk.refnum),
2603 ddf->currentconf->vcnum, guid_str(vc->guid),
2604 dk->raid_disk);
7d5a7ff3 2605 ddf_set_updates_pending(ddf);
5f8097be
NB
2606}
2607
4a3ca8ac 2608static unsigned int find_unused_pde(const struct ddf_super *ddf)
2609{
2610 unsigned int i;
2611 for (i = 0; i < __be16_to_cpu(ddf->phys->max_pdes); i++) {
2612 if (all_ff(ddf->phys->entries[i].guid))
2613 return i;
2614 }
2615 return DDF_NOTFOUND;
2616}
2617
a322f70c
DW
2618/* add a device to a container, either while creating it or while
2619 * expanding a pre-existing container
2620 */
f20c3968 2621static int add_to_super_ddf(struct supertype *st,
72ca9bcf
N
2622 mdu_disk_info_t *dk, int fd, char *devname,
2623 unsigned long long data_offset)
a322f70c
DW
2624{
2625 struct ddf_super *ddf = st->sb;
2626 struct dl *dd;
2627 time_t now;
2628 struct tm *tm;
2629 unsigned long long size;
2630 struct phys_disk_entry *pde;
f21e18ca 2631 unsigned int n, i;
a322f70c 2632 struct stat stb;
90fa1a29 2633 __u32 *tptr;
a322f70c 2634
78e44928
NB
2635 if (ddf->currentconf) {
2636 add_to_super_ddf_bvd(st, dk, fd, devname);
f20c3968 2637 return 0;
78e44928
NB
2638 }
2639
a322f70c
DW
2640 /* This is device numbered dk->number. We need to create
2641 * a phys_disk entry and a more detailed disk_data entry.
2642 */
2643 fstat(fd, &stb);
4a3ca8ac 2644 n = find_unused_pde(ddf);
2645 if (n == DDF_NOTFOUND) {
2646 pr_err("%s: No free slot in array, cannot add disk\n",
2647 __func__);
2648 return 1;
2649 }
2650 pde = &ddf->phys->entries[n];
4ee8cca9 2651 get_dev_size(fd, NULL, &size);
2652 if (size <= 32*1024*1024) {
2653 pr_err("%s: device size must be at least 32MB\n",
2654 __func__);
2655 return 1;
2656 }
2657 size >>= 9;
4a3ca8ac 2658
3d2c4fc7
DW
2659 if (posix_memalign((void**)&dd, 512,
2660 sizeof(*dd) + sizeof(dd->vlist[0]) * ddf->max_part) != 0) {
e7b84f9d
N
2661 pr_err("%s could allocate buffer for new disk, aborting\n",
2662 __func__);
f20c3968 2663 return 1;
3d2c4fc7 2664 }
a322f70c
DW
2665 dd->major = major(stb.st_rdev);
2666 dd->minor = minor(stb.st_rdev);
2667 dd->devname = devname;
a322f70c 2668 dd->fd = fd;
b2280677 2669 dd->spare = NULL;
a322f70c
DW
2670
2671 dd->disk.magic = DDF_PHYS_DATA_MAGIC;
2672 now = time(0);
2673 tm = localtime(&now);
2674 sprintf(dd->disk.guid, "%8s%04d%02d%02d",
2675 T10, tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday);
90fa1a29
JS
2676 tptr = (__u32 *)(dd->disk.guid + 16);
2677 *tptr++ = random32();
2678 *tptr = random32();
a322f70c 2679
59e36268
NB
2680 do {
2681 /* Cannot be bothered finding a CRC of some irrelevant details*/
bfb7ea78 2682 dd->disk.refnum = random32();
f21e18ca
N
2683 for (i = __be16_to_cpu(ddf->active->max_pd_entries);
2684 i > 0; i--)
2685 if (ddf->phys->entries[i-1].refnum == dd->disk.refnum)
59e36268 2686 break;
f21e18ca 2687 } while (i > 0);
59e36268 2688
a322f70c
DW
2689 dd->disk.forced_ref = 1;
2690 dd->disk.forced_guid = 1;
2691 memset(dd->disk.vendor, ' ', 32);
2692 memcpy(dd->disk.vendor, "Linux", 5);
2693 memset(dd->disk.pad, 0xff, 442);
b2280677 2694 for (i = 0; i < ddf->max_part ; i++)
a322f70c
DW
2695 dd->vlist[i] = NULL;
2696
5575e7d9
NB
2697 dd->pdnum = n;
2698
2cc2983d
N
2699 if (st->update_tail) {
2700 int len = (sizeof(struct phys_disk) +
2701 sizeof(struct phys_disk_entry));
2702 struct phys_disk *pd;
2703
503975b9 2704 pd = xmalloc(len);
2cc2983d
N
2705 pd->magic = DDF_PHYS_RECORDS_MAGIC;
2706 pd->used_pdes = __cpu_to_be16(n);
2707 pde = &pd->entries[0];
2708 dd->mdupdate = pd;
4a3ca8ac 2709 } else
2710 ddf->phys->used_pdes = __cpu_to_be16(
2711 1 + __be16_to_cpu(ddf->phys->used_pdes));
a322f70c
DW
2712
2713 memcpy(pde->guid, dd->disk.guid, DDF_GUID_LEN);
2714 pde->refnum = dd->disk.refnum;
5575e7d9 2715 pde->type = __cpu_to_be16(DDF_Forced_PD_GUID | DDF_Global_Spare);
a322f70c 2716 pde->state = __cpu_to_be16(DDF_Online);
4ee8cca9 2717 dd->size = size;
2718 /*
2719 * If there is already a device in dlist, try to reserve the same
2720 * amount of workspace. Otherwise, use 32MB.
2721 * We checked disk size above already.
2722 */
2723#define __calc_lba(new, old, lba, mb) do { \
2724 unsigned long long dif; \
2725 if ((old) != NULL) \
2726 dif = (old)->size - __be64_to_cpu((old)->lba); \
2727 else \
2728 dif = (new)->size; \
2729 if ((new)->size > dif) \
2730 (new)->lba = __cpu_to_be64((new)->size - dif); \
2731 else \
2732 (new)->lba = __cpu_to_be64((new)->size - (mb*1024*2)); \
2733 } while (0)
2734 __calc_lba(dd, ddf->dlist, workspace_lba, 32);
2735 __calc_lba(dd, ddf->dlist, primary_lba, 16);
2736 __calc_lba(dd, ddf->dlist, secondary_lba, 32);
2737 pde->config_size = dd->workspace_lba;
2738
a322f70c
DW
2739 sprintf(pde->path, "%17.17s","Information: nil") ;
2740 memset(pde->pad, 0xff, 6);
2741
2cc2983d
N
2742 if (st->update_tail) {
2743 dd->next = ddf->add_list;
2744 ddf->add_list = dd;
2745 } else {
2746 dd->next = ddf->dlist;
2747 ddf->dlist = dd;
7d5a7ff3 2748 ddf_set_updates_pending(ddf);
2cc2983d 2749 }
f20c3968
DW
2750
2751 return 0;
a322f70c
DW
2752}
2753
4dd968cc
N
2754static int remove_from_super_ddf(struct supertype *st, mdu_disk_info_t *dk)
2755{
2756 struct ddf_super *ddf = st->sb;
2757 struct dl *dl;
2758
2759 /* mdmon has noticed that this disk (dk->major/dk->minor) has
2760 * disappeared from the container.
2761 * We need to arrange that it disappears from the metadata and
2762 * internal data structures too.
2763 * Most of the work is done by ddf_process_update which edits
2764 * the metadata and closes the file handle and attaches the memory
2765 * where free_updates will free it.
2766 */
2767 for (dl = ddf->dlist; dl ; dl = dl->next)
2768 if (dl->major == dk->major &&
2769 dl->minor == dk->minor)
2770 break;
2771 if (!dl)
2772 return -1;
2773
2774 if (st->update_tail) {
2775 int len = (sizeof(struct phys_disk) +
2776 sizeof(struct phys_disk_entry));
2777 struct phys_disk *pd;
2778
503975b9 2779 pd = xmalloc(len);
4dd968cc
N
2780 pd->magic = DDF_PHYS_RECORDS_MAGIC;
2781 pd->used_pdes = __cpu_to_be16(dl->pdnum);
2782 pd->entries[0].state = __cpu_to_be16(DDF_Missing);
2783 append_metadata_update(st, pd, len);
2784 }
2785 return 0;
2786}
2787
a322f70c
DW
2788/*
2789 * This is the write_init_super method for a ddf container. It is
2790 * called when creating a container or adding another device to a
2791 * container.
2792 */
42d5dfd9 2793#define NULL_CONF_SZ 4096
18a2f463 2794
8e9387ac 2795static char *null_aligned;
2796static int __write_ddf_structure(struct dl *d, struct ddf_super *ddf, __u8 type)
a322f70c 2797{
7f798aca 2798 unsigned long long sector;
2799 struct ddf_header *header;
2800 int fd, i, n_config, conf_size;
a4057a88 2801 int ret = 0;
7f798aca 2802
8e9387ac 2803 if (null_aligned == NULL) {
2804 if (posix_memalign((void **)&null_aligned, 4096, NULL_CONF_SZ)
2805 != 0)
2806 return 0;
2807 memset(null_aligned, 0xff, NULL_CONF_SZ);
2808 }
2809
7f798aca 2810 fd = d->fd;
2811
2812 switch (type) {
2813 case DDF_HEADER_PRIMARY:
2814 header = &ddf->primary;
2815 sector = __be64_to_cpu(header->primary_lba);
2816 break;
2817 case DDF_HEADER_SECONDARY:
2818 header = &ddf->secondary;
2819 sector = __be64_to_cpu(header->secondary_lba);
2820 break;
2821 default:
2822 return 0;
2823 }
2824
2825 header->type = type;
a4057a88 2826 header->openflag = 1;
7f798aca 2827 header->crc = calc_crc(header, 512);
2828
2829 lseek64(fd, sector<<9, 0);
2830 if (write(fd, header, 512) < 0)
a4057a88 2831 goto out;
7f798aca 2832
2833 ddf->controller.crc = calc_crc(&ddf->controller, 512);
2834 if (write(fd, &ddf->controller, 512) < 0)
a4057a88 2835 goto out;
a322f70c 2836
7f798aca 2837 ddf->phys->crc = calc_crc(ddf->phys, ddf->pdsize);
2838 if (write(fd, ddf->phys, ddf->pdsize) < 0)
a4057a88 2839 goto out;
7f798aca 2840 ddf->virt->crc = calc_crc(ddf->virt, ddf->vdsize);
2841 if (write(fd, ddf->virt, ddf->vdsize) < 0)
a4057a88 2842 goto out;
7f798aca 2843
2844 /* Now write lots of config records. */
2845 n_config = ddf->max_part;
2846 conf_size = ddf->conf_rec_len * 512;
2847 for (i = 0 ; i <= n_config ; i++) {
e3c2a365 2848 struct vcl *c;
2849 struct vd_config *vdc = NULL;
2850 if (i == n_config) {
7f798aca 2851 c = (struct vcl *)d->spare;
e3c2a365 2852 if (c)
2853 vdc = &c->conf;
2854 } else {
2855 unsigned int dummy;
2856 c = d->vlist[i];
2857 if (c)
2858 get_pd_index_from_refnum(
2859 c, d->disk.refnum,
2860 ddf->mppe,
2861 (const struct vd_config **)&vdc,
2862 &dummy);
2863 }
7f798aca 2864 if (c) {
be9b9ef4 2865 dprintf("writing conf record %i on disk %08x for %s/%u\n",
ad60eea1 2866 i, __be32_to_cpu(d->disk.refnum),
2867 guid_str(vdc->guid),
be9b9ef4 2868 vdc->sec_elmnt_seq);
dacf3dc5 2869 vdc->seqnum = header->seq;
e3c2a365 2870 vdc->crc = calc_crc(vdc, conf_size);
2871 if (write(fd, vdc, conf_size) < 0)
7f798aca 2872 break;
2873 } else {
2874 unsigned int togo = conf_size;
2875 while (togo > NULL_CONF_SZ) {
2876 if (write(fd, null_aligned, NULL_CONF_SZ) < 0)
2877 break;
2878 togo -= NULL_CONF_SZ;
2879 }
2880 if (write(fd, null_aligned, togo) < 0)
2881 break;
2882 }
2883 }
2884 if (i <= n_config)
a4057a88 2885 goto out;
7f798aca 2886
2887 d->disk.crc = calc_crc(&d->disk, 512);
2888 if (write(fd, &d->disk, 512) < 0)
a4057a88 2889 goto out;
7f798aca 2890
a4057a88 2891 ret = 1;
2892out:
2893 header->openflag = 0;
2894 header->crc = calc_crc(header, 512);
2895
2896 lseek64(fd, sector<<9, 0);
2897 if (write(fd, header, 512) < 0)
2898 ret = 0;
2899
2900 return ret;
7f798aca 2901}
2902
9bf38704 2903static int _write_super_to_disk(struct ddf_super *ddf, struct dl *d)
2904{
2905 unsigned long long size;
2906 int fd = d->fd;
2907 if (fd < 0)
2908 return 0;
2909
2910 /* We need to fill in the primary, (secondary) and workspace
2911 * lba's in the headers, set their checksums,
2912 * Also checksum phys, virt....
2913 *
2914 * Then write everything out, finally the anchor is written.
2915 */
2916 get_dev_size(fd, NULL, &size);
2917 size /= 512;
2918 if (d->workspace_lba != 0)
2919 ddf->anchor.workspace_lba = d->workspace_lba;
2920 else
2921 ddf->anchor.workspace_lba =
2922 __cpu_to_be64(size - 32*1024*2);
2923 if (d->primary_lba != 0)
2924 ddf->anchor.primary_lba = d->primary_lba;
2925 else
2926 ddf->anchor.primary_lba =
2927 __cpu_to_be64(size - 16*1024*2);
2928 if (d->secondary_lba != 0)
2929 ddf->anchor.secondary_lba = d->secondary_lba;
2930 else
2931 ddf->anchor.secondary_lba =
2932 __cpu_to_be64(size - 32*1024*2);
2933 ddf->anchor.seq = ddf->active->seq;
2934 memcpy(&ddf->primary, &ddf->anchor, 512);
2935 memcpy(&ddf->secondary, &ddf->anchor, 512);
2936
2937 ddf->anchor.openflag = 0xFF; /* 'open' means nothing */
2938 ddf->anchor.seq = 0xFFFFFFFF; /* no sequencing in anchor */
2939 ddf->anchor.crc = calc_crc(&ddf->anchor, 512);
2940
2941 if (!__write_ddf_structure(d, ddf, DDF_HEADER_PRIMARY))
2942 return 0;
2943
2944 if (!__write_ddf_structure(d, ddf, DDF_HEADER_SECONDARY))
2945 return 0;
2946
2947 lseek64(fd, (size-1)*512, SEEK_SET);
2948 if (write(fd, &ddf->anchor, 512) < 0)
2949 return 0;
2950
2951 return 1;
2952}
2953
7f798aca 2954static int __write_init_super_ddf(struct supertype *st)
2955{
a322f70c 2956 struct ddf_super *ddf = st->sb;
a322f70c 2957 struct dl *d;
175593bf
DW
2958 int attempts = 0;
2959 int successes = 0;
42d5dfd9 2960
7d5a7ff3 2961 pr_state(ddf, __func__);
a322f70c 2962
175593bf
DW
2963 /* try to write updated metadata,
2964 * if we catch a failure move on to the next disk
2965 */
a322f70c 2966 for (d = ddf->dlist; d; d=d->next) {
175593bf 2967 attempts++;
9bf38704 2968 successes += _write_super_to_disk(ddf, d);
175593bf
DW
2969 }
2970
175593bf 2971 return attempts != successes;
a322f70c 2972}
7a7cc504
NB
2973
2974static int write_init_super_ddf(struct supertype *st)
2975{
9b1fb677
DW
2976 struct ddf_super *ddf = st->sb;
2977 struct vcl *currentconf = ddf->currentconf;
2978
2979 /* we are done with currentconf reset it to point st at the container */
2980 ddf->currentconf = NULL;
edd8d13c
NB
2981
2982 if (st->update_tail) {
2983 /* queue the virtual_disk and vd_config as metadata updates */
2984 struct virtual_disk *vd;
2985 struct vd_config *vc;
c5943560 2986 int len, tlen;
2987 unsigned int i;
edd8d13c 2988
9b1fb677 2989 if (!currentconf) {
2cc2983d
N
2990 int len = (sizeof(struct phys_disk) +
2991 sizeof(struct phys_disk_entry));
2992
2993 /* adding a disk to the container. */
2994 if (!ddf->add_list)
2995 return 0;
2996
2997 append_metadata_update(st, ddf->add_list->mdupdate, len);
2998 ddf->add_list->mdupdate = NULL;
2999 return 0;
3000 }
3001
3002 /* Newly created VD */
3003
edd8d13c
NB
3004 /* First the virtual disk. We have a slightly fake header */
3005 len = sizeof(struct virtual_disk) + sizeof(struct virtual_entry);
503975b9 3006 vd = xmalloc(len);
edd8d13c 3007 *vd = *ddf->virt;
9b1fb677
DW
3008 vd->entries[0] = ddf->virt->entries[currentconf->vcnum];
3009 vd->populated_vdes = __cpu_to_be16(currentconf->vcnum);
edd8d13c
NB
3010 append_metadata_update(st, vd, len);
3011
3012 /* Then the vd_config */
3013 len = ddf->conf_rec_len * 512;
c5943560 3014 tlen = len * currentconf->conf.sec_elmnt_count;
3015 vc = xmalloc(tlen);
9b1fb677 3016 memcpy(vc, &currentconf->conf, len);
c5943560 3017 for (i = 1; i < currentconf->conf.sec_elmnt_count; i++)
3018 memcpy((char *)vc + i*len, currentconf->other_bvds[i-1],
3019 len);
3020 append_metadata_update(st, vc, tlen);
edd8d13c
NB
3021
3022 /* FIXME I need to close the fds! */
3023 return 0;
613b0d17 3024 } else {
d682f344 3025 struct dl *d;
19041058 3026 if (!currentconf)
3027 for (d = ddf->dlist; d; d=d->next)
3028 while (Kill(d->devname, NULL, 0, -1, 1) == 0);
1cc7f4fe 3029 return __write_init_super_ddf(st);
d682f344 3030 }
7a7cc504
NB
3031}
3032
a322f70c
DW
3033#endif
3034
387fcd59
N
3035static __u64 avail_size_ddf(struct supertype *st, __u64 devsize,
3036 unsigned long long data_offset)
a322f70c
DW
3037{
3038 /* We must reserve the last 32Meg */
3039 if (devsize <= 32*1024*2)
3040 return 0;
3041 return devsize - 32*1024*2;
3042}
3043
3044#ifndef MDASSEMBLE
8592f29d
N
3045
3046static int reserve_space(struct supertype *st, int raiddisks,
3047 unsigned long long size, int chunk,
3048 unsigned long long *freesize)
3049{
3050 /* Find 'raiddisks' spare extents at least 'size' big (but
3051 * only caring about multiples of 'chunk') and remember
3052 * them.
3053 * If the cannot be found, fail.
3054 */
3055 struct dl *dl;
3056 struct ddf_super *ddf = st->sb;
3057 int cnt = 0;
3058
3059 for (dl = ddf->dlist; dl ; dl=dl->next) {
613b0d17 3060 dl->raiddisk = -1;
8592f29d
N
3061 dl->esize = 0;
3062 }
3063 /* Now find largest extent on each device */
3064 for (dl = ddf->dlist ; dl ; dl=dl->next) {
3065 struct extent *e = get_extents(ddf, dl);
3066 unsigned long long pos = 0;
3067 int i = 0;
3068 int found = 0;
3069 unsigned long long minsize = size;
3070
3071 if (size == 0)
3072 minsize = chunk;
3073
3074 if (!e)
3075 continue;
3076 do {
3077 unsigned long long esize;
3078 esize = e[i].start - pos;
3079 if (esize >= minsize) {
3080 found = 1;
3081 minsize = esize;
3082 }
3083 pos = e[i].start + e[i].size;
3084 i++;
3085 } while (e[i-1].size);
3086 if (found) {
3087 cnt++;
3088 dl->esize = minsize;
3089 }
3090 free(e);
3091 }
3092 if (cnt < raiddisks) {
e7b84f9d 3093 pr_err("not enough devices with space to create array.\n");
8592f29d
N
3094 return 0; /* No enough free spaces large enough */
3095 }
3096 if (size == 0) {
3097 /* choose the largest size of which there are at least 'raiddisk' */
3098 for (dl = ddf->dlist ; dl ; dl=dl->next) {
3099 struct dl *dl2;
3100 if (dl->esize <= size)
3101 continue;
3102 /* This is bigger than 'size', see if there are enough */
3103 cnt = 0;
7b80ad6a 3104 for (dl2 = ddf->dlist; dl2 ; dl2=dl2->next)
8592f29d
N
3105 if (dl2->esize >= dl->esize)
3106 cnt++;
3107 if (cnt >= raiddisks)
3108 size = dl->esize;
3109 }
3110 if (chunk) {
3111 size = size / chunk;
3112 size *= chunk;
3113 }
3114 *freesize = size;
3115 if (size < 32) {
e7b84f9d 3116 pr_err("not enough spare devices to create array.\n");
8592f29d
N
3117 return 0;
3118 }
3119 }
3120 /* We have a 'size' of which there are enough spaces.
3121 * We simply do a first-fit */
3122 cnt = 0;
3123 for (dl = ddf->dlist ; dl && cnt < raiddisks ; dl=dl->next) {
3124 if (dl->esize < size)
3125 continue;
613b0d17 3126
8592f29d
N
3127 dl->raiddisk = cnt;
3128 cnt++;
3129 }
3130 return 1;
3131}
3132
2c514b71
NB
3133static int
3134validate_geometry_ddf_container(struct supertype *st,
3135 int level, int layout, int raiddisks,
3136 int chunk, unsigned long long size,
af4348dd 3137 unsigned long long data_offset,
2c514b71
NB
3138 char *dev, unsigned long long *freesize,
3139 int verbose);
78e44928
NB
3140
3141static int validate_geometry_ddf_bvd(struct supertype *st,
3142 int level, int layout, int raiddisks,
c21e737b 3143 int *chunk, unsigned long long size,
af4348dd 3144 unsigned long long data_offset,
2c514b71
NB
3145 char *dev, unsigned long long *freesize,
3146 int verbose);
78e44928
NB
3147
3148static int validate_geometry_ddf(struct supertype *st,
2c514b71 3149 int level, int layout, int raiddisks,
c21e737b 3150 int *chunk, unsigned long long size,
af4348dd 3151 unsigned long long data_offset,
2c514b71
NB
3152 char *dev, unsigned long long *freesize,
3153 int verbose)
a322f70c
DW
3154{
3155 int fd;
3156 struct mdinfo *sra;
3157 int cfd;
3158
3159 /* ddf potentially supports lots of things, but it depends on
3160 * what devices are offered (and maybe kernel version?)
3161 * If given unused devices, we will make a container.
3162 * If given devices in a container, we will make a BVD.
3163 * If given BVDs, we make an SVD, changing all the GUIDs in the process.
3164 */
3165
bb7295f1
N
3166 if (chunk && *chunk == UnSet)
3167 *chunk = DEFAULT_CHUNK;
3168
542ef4ec 3169 if (level == -1000000) level = LEVEL_CONTAINER;
a322f70c 3170 if (level == LEVEL_CONTAINER) {
78e44928
NB
3171 /* Must be a fresh device to add to a container */
3172 return validate_geometry_ddf_container(st, level, layout,
c21e737b 3173 raiddisks, chunk?*chunk:0,
af4348dd
N
3174 size, data_offset, dev,
3175 freesize,
2c514b71 3176 verbose);
5f8097be
NB
3177 }
3178
78e44928 3179 if (!dev) {
a3163bf0 3180 mdu_array_info_t array = {
3181 .level = level, .layout = layout,
3182 .raid_disks = raiddisks
3183 };
3184 struct vd_config conf;
3185 if (layout_md2ddf(&array, &conf) == -1) {
b42f577a 3186 if (verbose)
94b08b7c 3187 pr_err("DDF does not support level %d /layout %d arrays with %d disks\n",
3188 level, layout, raiddisks);
78e44928 3189 return 0;
b42f577a 3190 }
78e44928 3191 /* Should check layout? etc */
8592f29d
N
3192
3193 if (st->sb && freesize) {
3194 /* --create was given a container to create in.
3195 * So we need to check that there are enough
3196 * free spaces and return the amount of space.
3197 * We may as well remember which drives were
3198 * chosen so that add_to_super/getinfo_super
3199 * can return them.
3200 */
c21e737b 3201 return reserve_space(st, raiddisks, size, chunk?*chunk:0, freesize);
8592f29d 3202 }
a322f70c 3203 return 1;
78e44928 3204 }
a322f70c 3205
8592f29d
N
3206 if (st->sb) {
3207 /* A container has already been opened, so we are
3208 * creating in there. Maybe a BVD, maybe an SVD.
3209 * Should make a distinction one day.
3210 */
3211 return validate_geometry_ddf_bvd(st, level, layout, raiddisks,
af4348dd
N
3212 chunk, size, data_offset, dev,
3213 freesize,
8592f29d
N
3214 verbose);
3215 }
78e44928
NB
3216 /* This is the first device for the array.
3217 * If it is a container, we read it in and do automagic allocations,
3218 * no other devices should be given.
3219 * Otherwise it must be a member device of a container, and we
3220 * do manual allocation.
3221 * Later we should check for a BVD and make an SVD.
a322f70c 3222 */
a322f70c
DW
3223 fd = open(dev, O_RDONLY|O_EXCL, 0);
3224 if (fd >= 0) {
4dd2df09 3225 sra = sysfs_read(fd, NULL, GET_VERSION);
a322f70c
DW
3226 close(fd);
3227 if (sra && sra->array.major_version == -1 &&
78e44928
NB
3228 strcmp(sra->text_version, "ddf") == 0) {
3229
3230 /* load super */
3231 /* find space for 'n' devices. */
3232 /* remember the devices */
3233 /* Somehow return the fact that we have enough */
a322f70c
DW
3234 }
3235
2c514b71 3236 if (verbose)
e7b84f9d
N
3237 pr_err("ddf: Cannot create this array "
3238 "on device %s - a container is required.\n",
3239 dev);
a322f70c
DW
3240 return 0;
3241 }
3242 if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
2c514b71 3243 if (verbose)
e7b84f9d 3244 pr_err("ddf: Cannot open %s: %s\n",
613b0d17 3245 dev, strerror(errno));
a322f70c
DW
3246 return 0;
3247 }
3248 /* Well, it is in use by someone, maybe a 'ddf' container. */
3249 cfd = open_container(fd);
3250 if (cfd < 0) {
3251 close(fd);
2c514b71 3252 if (verbose)
e7b84f9d 3253 pr_err("ddf: Cannot use %s: %s\n",
613b0d17 3254 dev, strerror(EBUSY));
a322f70c
DW
3255 return 0;
3256 }
4dd2df09 3257 sra = sysfs_read(cfd, NULL, GET_VERSION);
a322f70c
DW
3258 close(fd);
3259 if (sra && sra->array.major_version == -1 &&
3260 strcmp(sra->text_version, "ddf") == 0) {
3261 /* This is a member of a ddf container. Load the container
3262 * and try to create a bvd
3263 */
3264 struct ddf_super *ddf;
e1902a7b 3265 if (load_super_ddf_all(st, cfd, (void **)&ddf, NULL) == 0) {
5f8097be 3266 st->sb = ddf;
4dd2df09 3267 strcpy(st->container_devnm, fd2devnm(cfd));
a322f70c 3268 close(cfd);
78e44928 3269 return validate_geometry_ddf_bvd(st, level, layout,
a322f70c 3270 raiddisks, chunk, size,
af4348dd 3271 data_offset,
2c514b71
NB
3272 dev, freesize,
3273 verbose);
a322f70c
DW
3274 }
3275 close(cfd);
c42ec1ed
DW
3276 } else /* device may belong to a different container */
3277 return 0;
3278
a322f70c
DW
3279 return 1;
3280}
3281
2c514b71
NB
3282static int
3283validate_geometry_ddf_container(struct supertype *st,
3284 int level, int layout, int raiddisks,
3285 int chunk, unsigned long long size,
af4348dd 3286 unsigned long long data_offset,
2c514b71
NB
3287 char *dev, unsigned long long *freesize,
3288 int verbose)
a322f70c
DW
3289{
3290 int fd;
3291 unsigned long long ldsize;
3292
3293 if (level != LEVEL_CONTAINER)
3294 return 0;
3295 if (!dev)
3296 return 1;
3297
3298 fd = open(dev, O_RDONLY|O_EXCL, 0);
3299 if (fd < 0) {
2c514b71 3300 if (verbose)
e7b84f9d 3301 pr_err("ddf: Cannot open %s: %s\n",
613b0d17 3302 dev, strerror(errno));
a322f70c
DW
3303 return 0;
3304 }
3305 if (!get_dev_size(fd, dev, &ldsize)) {
3306 close(fd);
3307 return 0;
3308 }
3309 close(fd);
3310
387fcd59 3311 *freesize = avail_size_ddf(st, ldsize >> 9, INVALID_SECTORS);
ea17e7aa
N
3312 if (*freesize == 0)
3313 return 0;
a322f70c
DW
3314
3315 return 1;
3316}
3317
78e44928
NB
3318static int validate_geometry_ddf_bvd(struct supertype *st,
3319 int level, int layout, int raiddisks,
c21e737b 3320 int *chunk, unsigned long long size,
af4348dd 3321 unsigned long long data_offset,
2c514b71
NB
3322 char *dev, unsigned long long *freesize,
3323 int verbose)
a322f70c
DW
3324{
3325 struct stat stb;
3326 struct ddf_super *ddf = st->sb;
3327 struct dl *dl;
5f8097be
NB
3328 unsigned long long pos = 0;
3329 unsigned long long maxsize;
3330 struct extent *e;
3331 int i;
a322f70c 3332 /* ddf/bvd supports lots of things, but not containers */
b42f577a
N
3333 if (level == LEVEL_CONTAINER) {
3334 if (verbose)
e7b84f9d 3335 pr_err("DDF cannot create a container within an container\n");
a322f70c 3336 return 0;
b42f577a 3337 }
a322f70c
DW
3338 /* We must have the container info already read in. */
3339 if (!ddf)
3340 return 0;
3341
5f8097be
NB
3342 if (!dev) {
3343 /* General test: make sure there is space for
3344 * 'raiddisks' device extents of size 'size'.
3345 */
3346 unsigned long long minsize = size;
3347 int dcnt = 0;
3348 if (minsize == 0)
3349 minsize = 8;
3350 for (dl = ddf->dlist; dl ; dl = dl->next)
3351 {
3352 int found = 0;
7e1432fb 3353 pos = 0;
5f8097be
NB
3354
3355 i = 0;
3356 e = get_extents(ddf, dl);
3357 if (!e) continue;
3358 do {
3359 unsigned long long esize;
3360 esize = e[i].start - pos;
3361 if (esize >= minsize)
3362 found = 1;
3363 pos = e[i].start + e[i].size;
3364 i++;
3365 } while (e[i-1].size);
3366 if (found)
3367 dcnt++;
3368 free(e);
3369 }
3370 if (dcnt < raiddisks) {
2c514b71 3371 if (verbose)
e7b84f9d
N
3372 pr_err("ddf: Not enough devices with "
3373 "space for this array (%d < %d)\n",
3374 dcnt, raiddisks);
5f8097be
NB
3375 return 0;
3376 }
3377 return 1;
3378 }
a322f70c
DW
3379 /* This device must be a member of the set */
3380 if (stat(dev, &stb) < 0)
3381 return 0;
3382 if ((S_IFMT & stb.st_mode) != S_IFBLK)
3383 return 0;
3384 for (dl = ddf->dlist ; dl ; dl = dl->next) {
f21e18ca
N
3385 if (dl->major == (int)major(stb.st_rdev) &&
3386 dl->minor == (int)minor(stb.st_rdev))
a322f70c
DW
3387 break;
3388 }
5f8097be 3389 if (!dl) {
2c514b71 3390 if (verbose)
e7b84f9d 3391 pr_err("ddf: %s is not in the "
613b0d17
N
3392 "same DDF set\n",
3393 dev);
5f8097be
NB
3394 return 0;
3395 }
3396 e = get_extents(ddf, dl);
3397 maxsize = 0;
3398 i = 0;
3399 if (e) do {
613b0d17
N
3400 unsigned long long esize;
3401 esize = e[i].start - pos;
3402 if (esize >= maxsize)
3403 maxsize = esize;
3404 pos = e[i].start + e[i].size;
3405 i++;
3406 } while (e[i-1].size);
5f8097be 3407 *freesize = maxsize;
a322f70c
DW
3408 // FIXME here I am
3409
3410 return 1;
3411}
59e36268 3412
a322f70c 3413static int load_super_ddf_all(struct supertype *st, int fd,
e1902a7b 3414 void **sbp, char *devname)
a322f70c
DW
3415{
3416 struct mdinfo *sra;
3417 struct ddf_super *super;
3418 struct mdinfo *sd, *best = NULL;
3419 int bestseq = 0;
3420 int seq;
3421 char nm[20];
3422 int dfd;
3423
b526e52d 3424 sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
a322f70c
DW
3425 if (!sra)
3426 return 1;
3427 if (sra->array.major_version != -1 ||
3428 sra->array.minor_version != -2 ||
3429 strcmp(sra->text_version, "ddf") != 0)
3430 return 1;
3431
6416d527 3432 if (posix_memalign((void**)&super, 512, sizeof(*super)) != 0)
a322f70c 3433 return 1;
a2349791 3434 memset(super, 0, sizeof(*super));
a322f70c
DW
3435
3436 /* first, try each device, and choose the best ddf */
3437 for (sd = sra->devs ; sd ; sd = sd->next) {
3438 int rv;
3439 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
7a7cc504
NB
3440 dfd = dev_open(nm, O_RDONLY);
3441 if (dfd < 0)
a322f70c
DW
3442 return 2;
3443 rv = load_ddf_headers(dfd, super, NULL);
7a7cc504 3444 close(dfd);
a322f70c
DW
3445 if (rv == 0) {
3446 seq = __be32_to_cpu(super->active->seq);
3447 if (super->active->openflag)
3448 seq--;
3449 if (!best || seq > bestseq) {
3450 bestseq = seq;
3451 best = sd;
3452 }
3453 }
3454 }
3455 if (!best)
3456 return 1;
3457 /* OK, load this ddf */
3458 sprintf(nm, "%d:%d", best->disk.major, best->disk.minor);
3459 dfd = dev_open(nm, O_RDONLY);
7a7cc504 3460 if (dfd < 0)
a322f70c
DW
3461 return 1;
3462 load_ddf_headers(dfd, super, NULL);
3463 load_ddf_global(dfd, super, NULL);
3464 close(dfd);
3465 /* Now we need the device-local bits */
3466 for (sd = sra->devs ; sd ; sd = sd->next) {
3d2c4fc7
DW
3467 int rv;
3468
a322f70c 3469 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
e1902a7b 3470 dfd = dev_open(nm, O_RDWR);
7a7cc504 3471 if (dfd < 0)
a322f70c 3472 return 2;
3d2c4fc7
DW
3473 rv = load_ddf_headers(dfd, super, NULL);
3474 if (rv == 0)
e1902a7b 3475 rv = load_ddf_local(dfd, super, NULL, 1);
3d2c4fc7
DW
3476 if (rv)
3477 return 1;
a322f70c 3478 }
33414a01 3479
a322f70c
DW
3480 *sbp = super;
3481 if (st->ss == NULL) {
78e44928 3482 st->ss = &super_ddf;
a322f70c
DW
3483 st->minor_version = 0;
3484 st->max_devs = 512;
3485 }
4dd2df09 3486 strcpy(st->container_devnm, fd2devnm(fd));
a322f70c
DW
3487 return 0;
3488}
2b959fbf
N
3489
3490static int load_container_ddf(struct supertype *st, int fd,
3491 char *devname)
3492{
3493 return load_super_ddf_all(st, fd, &st->sb, devname);
3494}
3495
0e600426 3496#endif /* MDASSEMBLE */
a322f70c 3497
a5c7adb3 3498static int check_secondary(const struct vcl *vc)
3499{
3500 const struct vd_config *conf = &vc->conf;
3501 int i;
3502
3503 /* The only DDF secondary RAID level md can support is
3504 * RAID 10, if the stripe sizes and Basic volume sizes
3505 * are all equal.
3506 * Other configurations could in theory be supported by exposing
3507 * the BVDs to user space and using device mapper for the secondary
3508 * mapping. So far we don't support that.
3509 */
3510
3511 __u64 sec_elements[4] = {0, 0, 0, 0};
3512#define __set_sec_seen(n) (sec_elements[(n)>>6] |= (1<<((n)&63)))
3513#define __was_sec_seen(n) ((sec_elements[(n)>>6] & (1<<((n)&63))) != 0)
3514
3515 if (vc->other_bvds == NULL) {
3516 pr_err("No BVDs for secondary RAID found\n");
3517 return -1;
3518 }
3519 if (conf->prl != DDF_RAID1) {
3520 pr_err("Secondary RAID level only supported for mirrored BVD\n");
3521 return -1;
3522 }
3523 if (conf->srl != DDF_2STRIPED && conf->srl != DDF_2SPANNED) {
3524 pr_err("Secondary RAID level %d is unsupported\n",
3525 conf->srl);
3526 return -1;
3527 }
3528 __set_sec_seen(conf->sec_elmnt_seq);
3529 for (i = 0; i < conf->sec_elmnt_count-1; i++) {
3530 const struct vd_config *bvd = vc->other_bvds[i];
3c48f7be 3531 if (bvd->sec_elmnt_seq == DDF_UNUSED_BVD)
c98567ba 3532 continue;
a5c7adb3 3533 if (bvd->srl != conf->srl) {
3534 pr_err("Inconsistent secondary RAID level across BVDs\n");
3535 return -1;
3536 }
3537 if (bvd->prl != conf->prl) {
3538 pr_err("Different RAID levels for BVDs are unsupported\n");
3539 return -1;
3540 }
3541 if (bvd->prim_elmnt_count != conf->prim_elmnt_count) {
3542 pr_err("All BVDs must have the same number of primary elements\n");
3543 return -1;
3544 }
3545 if (bvd->chunk_shift != conf->chunk_shift) {
3546 pr_err("Different strip sizes for BVDs are unsupported\n");
3547 return -1;
3548 }
3549 if (bvd->array_blocks != conf->array_blocks) {
3550 pr_err("Different BVD sizes are unsupported\n");
3551 return -1;
3552 }
3553 __set_sec_seen(bvd->sec_elmnt_seq);
3554 }
3555 for (i = 0; i < conf->sec_elmnt_count; i++) {
3556 if (!__was_sec_seen(i)) {
3557 pr_err("BVD %d is missing\n", i);
3558 return -1;
3559 }
3560 }
3561 return 0;
3562}
3563
8a38db86 3564static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
4e587018 3565 __u32 refnum, unsigned int nmax,
3566 const struct vd_config **bvd,
3567 unsigned int *idx)
8a38db86 3568{
4e587018 3569 unsigned int i, j, n, sec, cnt;
3570
3571 cnt = __be16_to_cpu(vc->conf.prim_elmnt_count);
3572 sec = (vc->conf.sec_elmnt_count == 1 ? 0 : vc->conf.sec_elmnt_seq);
3573
3574 for (i = 0, j = 0 ; i < nmax ; i++) {
3575 /* j counts valid entries for this BVD */
3576 if (vc->conf.phys_refnum[i] != 0xffffffff)
3577 j++;
3578 if (vc->conf.phys_refnum[i] == refnum) {
3579 *bvd = &vc->conf;
3580 *idx = i;
3581 return sec * cnt + j - 1;
3582 }
3583 }
3584 if (vc->other_bvds == NULL)
3585 goto bad;
3586
3587 for (n = 1; n < vc->conf.sec_elmnt_count; n++) {
3588 struct vd_config *vd = vc->other_bvds[n-1];
4e587018 3589 sec = vd->sec_elmnt_seq;
3c48f7be 3590 if (sec == DDF_UNUSED_BVD)
3591 continue;
4e587018 3592 for (i = 0, j = 0 ; i < nmax ; i++) {
3593 if (vd->phys_refnum[i] != 0xffffffff)
3594 j++;
3595 if (vd->phys_refnum[i] == refnum) {
3596 *bvd = vd;
3597 *idx = i;
3598 return sec * cnt + j - 1;
3599 }
3600 }
3601 }
3602bad:
3603 *bvd = NULL;
d6e7b083 3604 return DDF_NOTFOUND;
8a38db86 3605}
3606
00bbdbda 3607static struct mdinfo *container_content_ddf(struct supertype *st, char *subarray)
598f0d58
NB
3608{
3609 /* Given a container loaded by load_super_ddf_all,
3610 * extract information about all the arrays into
3611 * an mdinfo tree.
3612 *
3613 * For each vcl in conflist: create an mdinfo, fill it in,
3614 * then look for matching devices (phys_refnum) in dlist
3615 * and create appropriate device mdinfo.
3616 */
3617 struct ddf_super *ddf = st->sb;
3618 struct mdinfo *rest = NULL;
3619 struct vcl *vc;
3620
3621 for (vc = ddf->conflist ; vc ; vc=vc->next)
3622 {
f21e18ca
N
3623 unsigned int i;
3624 unsigned int j;
598f0d58 3625 struct mdinfo *this;
00bbdbda 3626 char *ep;
90fa1a29 3627 __u32 *cptr;
8a38db86 3628 unsigned int pd;
00bbdbda
N
3629
3630 if (subarray &&
3631 (strtoul(subarray, &ep, 10) != vc->vcnum ||
3632 *ep != '\0'))
3633 continue;
3634
a5c7adb3 3635 if (vc->conf.sec_elmnt_count > 1) {
3636 if (check_secondary(vc) != 0)
3637 continue;
3638 }
3639
503975b9 3640 this = xcalloc(1, sizeof(*this));
598f0d58
NB
3641 this->next = rest;
3642 rest = this;
3643
8a2848a7 3644 if (layout_ddf2md(&vc->conf, &this->array))
3645 continue;
598f0d58 3646 this->array.md_minor = -1;
f35f2525
N
3647 this->array.major_version = -1;
3648 this->array.minor_version = -2;
90fa1a29
JS
3649 cptr = (__u32 *)(vc->conf.guid + 16);
3650 this->array.ctime = DECADE + __be32_to_cpu(*cptr);
598f0d58
NB
3651 this->array.utime = DECADE +
3652 __be32_to_cpu(vc->conf.timestamp);
3653 this->array.chunk_size = 512 << vc->conf.chunk_shift;
3654
59e36268 3655 i = vc->vcnum;
7a7cc504
NB
3656 if ((ddf->virt->entries[i].state & DDF_state_inconsistent) ||
3657 (ddf->virt->entries[i].init_state & DDF_initstate_mask) !=
ed9d66aa 3658 DDF_init_full) {
598f0d58 3659 this->array.state = 0;
ed9d66aa
NB
3660 this->resync_start = 0;
3661 } else {
598f0d58 3662 this->array.state = 1;
b7528a20 3663 this->resync_start = MaxSector;
ed9d66aa 3664 }
db42fa9b
N
3665 memcpy(this->name, ddf->virt->entries[i].name, 16);
3666 this->name[16]=0;
3667 for(j=0; j<16; j++)
3668 if (this->name[j] == ' ')
3669 this->name[j] = 0;
598f0d58
NB
3670
3671 memset(this->uuid, 0, sizeof(this->uuid));
3672 this->component_size = __be64_to_cpu(vc->conf.blocks);
3673 this->array.size = this->component_size / 2;
5f2aace8 3674 this->container_member = i;
598f0d58 3675
c5afc314
N
3676 ddf->currentconf = vc;
3677 uuid_from_super_ddf(st, this->uuid);
f646805e 3678 if (!subarray)
3679 ddf->currentconf = NULL;
c5afc314 3680
60f18132 3681 sprintf(this->text_version, "/%s/%d",
4dd2df09 3682 st->container_devnm, this->container_member);
60f18132 3683
8a38db86 3684 for (pd = 0; pd < __be16_to_cpu(ddf->phys->used_pdes); pd++) {
598f0d58
NB
3685 struct mdinfo *dev;
3686 struct dl *d;
4e587018 3687 const struct vd_config *bvd;
3688 unsigned int iphys;
fa033bec 3689 int stt;
598f0d58 3690
8a38db86 3691 if (ddf->phys->entries[pd].refnum == 0xFFFFFFFF)
bc17324f 3692 continue;
0cf5ef67
N
3693
3694 stt = __be16_to_cpu(ddf->phys->entries[pd].state);
fa033bec
N
3695 if ((stt & (DDF_Online|DDF_Failed|DDF_Rebuilding))
3696 != DDF_Online)
3697 continue;
3698
8a38db86 3699 i = get_pd_index_from_refnum(
4e587018 3700 vc, ddf->phys->entries[pd].refnum,
3701 ddf->mppe, &bvd, &iphys);
d6e7b083 3702 if (i == DDF_NOTFOUND)
8a38db86 3703 continue;
3704
fa033bec 3705 this->array.working_disks++;
bc17324f 3706
0cf5ef67 3707 for (d = ddf->dlist; d ; d=d->next)
8a38db86 3708 if (d->disk.refnum ==
3709 ddf->phys->entries[pd].refnum)
0cf5ef67
N
3710 break;
3711 if (d == NULL)
3712 /* Haven't found that one yet, maybe there are others */
3713 continue;
3714
503975b9 3715 dev = xcalloc(1, sizeof(*dev));
598f0d58
NB
3716 dev->next = this->devs;
3717 this->devs = dev;
3718
3719 dev->disk.number = __be32_to_cpu(d->disk.refnum);
3720 dev->disk.major = d->major;
3721 dev->disk.minor = d->minor;
3722 dev->disk.raid_disk = i;
3723 dev->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
d23534e4 3724 dev->recovery_start = MaxSector;
598f0d58 3725
120f7677 3726 dev->events = __be32_to_cpu(ddf->primary.seq);
57a66662 3727 dev->data_offset =
3728 __be64_to_cpu(LBA_OFFSET(ddf, bvd)[iphys]);
4e587018 3729 dev->component_size = __be64_to_cpu(bvd->blocks);
598f0d58
NB
3730 if (d->devname)
3731 strcpy(dev->name, d->devname);
3732 }
3733 }
3734 return rest;
3735}
3736
955e9ea1 3737static int store_super_ddf(struct supertype *st, int fd)
a322f70c 3738{
955e9ea1 3739 struct ddf_super *ddf = st->sb;
a322f70c 3740 unsigned long long dsize;
6416d527 3741 void *buf;
3d2c4fc7 3742 int rc;
a322f70c 3743
955e9ea1
DW
3744 if (!ddf)
3745 return 1;
3746
a322f70c
DW
3747 if (!get_dev_size(fd, NULL, &dsize))
3748 return 1;
3749
dbf98368 3750 if (ddf->dlist || ddf->conflist) {
3751 struct stat sta;
3752 struct dl *dl;
3753 int ofd, ret;
3754
3755 if (fstat(fd, &sta) == -1 || !S_ISBLK(sta.st_mode)) {
3756 pr_err("%s: file descriptor for invalid device\n",
3757 __func__);
3758 return 1;
3759 }
3760 for (dl = ddf->dlist; dl; dl = dl->next)
3761 if (dl->major == (int)major(sta.st_rdev) &&
3762 dl->minor == (int)minor(sta.st_rdev))
3763 break;
3764 if (!dl) {
3765 pr_err("%s: couldn't find disk %d/%d\n", __func__,
3766 (int)major(sta.st_rdev),
3767 (int)minor(sta.st_rdev));
3768 return 1;
3769 }
dbf98368 3770 ofd = dl->fd;
3771 dl->fd = fd;
9bf38704 3772 ret = (_write_super_to_disk(ddf, dl) != 1);
dbf98368 3773 dl->fd = ofd;
3774 return ret;
3775 }
3776
3d2c4fc7
DW
3777 if (posix_memalign(&buf, 512, 512) != 0)
3778 return 1;
6416d527
NB
3779 memset(buf, 0, 512);
3780
a322f70c 3781 lseek64(fd, dsize-512, 0);
3d2c4fc7 3782 rc = write(fd, buf, 512);
6416d527 3783 free(buf);
3d2c4fc7
DW
3784 if (rc < 0)
3785 return 1;
a322f70c
DW
3786 return 0;
3787}
3788
a19c88b8
NB
3789static int compare_super_ddf(struct supertype *st, struct supertype *tst)
3790{
3791 /*
3792 * return:
3793 * 0 same, or first was empty, and second was copied
3794 * 1 second had wrong number
3795 * 2 wrong uuid
3796 * 3 wrong other info
3797 */
3798 struct ddf_super *first = st->sb;
3799 struct ddf_super *second = tst->sb;
4eefd651 3800 struct dl *dl1, *dl2;
3801 struct vcl *vl1, *vl2;
2d210697 3802 unsigned int max_vds, max_pds, pd, vd;
a19c88b8
NB
3803
3804 if (!first) {
3805 st->sb = tst->sb;
3806 tst->sb = NULL;
3807 return 0;
3808 }
3809
3810 if (memcmp(first->anchor.guid, second->anchor.guid, DDF_GUID_LEN) != 0)
3811 return 2;
3812
2d210697 3813 if (first->anchor.seq != second->anchor.seq) {
3814 dprintf("%s: sequence number mismatch %u/%u\n", __func__,
3815 __be32_to_cpu(first->anchor.seq),
3816 __be32_to_cpu(second->anchor.seq));
3817 return 3;
3818 }
3819 if (first->max_part != second->max_part ||
3820 first->phys->used_pdes != second->phys->used_pdes ||
3821 first->virt->populated_vdes != second->virt->populated_vdes) {
3822 dprintf("%s: PD/VD number mismatch\n", __func__);
3823 return 3;
3824 }
3825
3826 max_pds = __be16_to_cpu(first->phys->used_pdes);
3827 for (dl2 = second->dlist; dl2; dl2 = dl2->next) {
3828 for (pd = 0; pd < max_pds; pd++)
3829 if (first->phys->entries[pd].refnum == dl2->disk.refnum)
3830 break;
3831 if (pd == max_pds) {
3832 dprintf("%s: no match for disk %08x\n", __func__,
3833 __be32_to_cpu(dl2->disk.refnum));
3834 return 3;
3835 }
3836 }
3837
3838 max_vds = __be16_to_cpu(first->active->max_vd_entries);
3839 for (vl2 = second->conflist; vl2; vl2 = vl2->next) {
3840 if (vl2->conf.magic != DDF_VD_CONF_MAGIC)
3841 continue;
3842 for (vd = 0; vd < max_vds; vd++)
3843 if (!memcmp(first->virt->entries[vd].guid,
3844 vl2->conf.guid, DDF_GUID_LEN))
3845 break;
3846 if (vd == max_vds) {
3847 dprintf("%s: no match for VD config\n", __func__);
3848 return 3;
3849 }
3850 }
a19c88b8 3851 /* FIXME should I look at anything else? */
2d210697 3852
4eefd651 3853 /*
3854 At this point we are fairly sure that the meta data matches.
3855 But the new disk may contain additional local data.
3856 Add it to the super block.
3857 */
3858 for (vl2 = second->conflist; vl2; vl2 = vl2->next) {
3859 for (vl1 = first->conflist; vl1; vl1 = vl1->next)
3860 if (!memcmp(vl1->conf.guid, vl2->conf.guid,
3861 DDF_GUID_LEN))
3862 break;
3863 if (vl1) {
3864 if (vl1->other_bvds != NULL &&
3865 vl1->conf.sec_elmnt_seq !=
3866 vl2->conf.sec_elmnt_seq) {
3867 dprintf("%s: adding BVD %u\n", __func__,
3868 vl2->conf.sec_elmnt_seq);
3869 add_other_bvd(vl1, &vl2->conf,
3870 first->conf_rec_len*512);
3871 }
3872 continue;
3873 }
3874
3875 if (posix_memalign((void **)&vl1, 512,
3876 (first->conf_rec_len*512 +
3877 offsetof(struct vcl, conf))) != 0) {
3878 pr_err("%s could not allocate vcl buf\n",
3879 __func__);
3880 return 3;
3881 }
3882
3883 vl1->next = first->conflist;
3884 vl1->block_sizes = NULL;
4eefd651 3885 memcpy(&vl1->conf, &vl2->conf, first->conf_rec_len*512);
3c48f7be 3886 if (alloc_other_bvds(first, vl1) != 0) {
3887 pr_err("%s could not allocate other bvds\n",
3888 __func__);
3889 free(vl1);
3890 return 3;
3891 }
4eefd651 3892 for (vd = 0; vd < max_vds; vd++)
3893 if (!memcmp(first->virt->entries[vd].guid,
3894 vl1->conf.guid, DDF_GUID_LEN))
3895 break;
3896 vl1->vcnum = vd;
3897 dprintf("%s: added config for VD %u\n", __func__, vl1->vcnum);
3898 first->conflist = vl1;
3899 }
3900
3901 for (dl2 = second->dlist; dl2; dl2 = dl2->next) {
3902 for (dl1 = first->dlist; dl1; dl1 = dl1->next)
3903 if (dl1->disk.refnum == dl2->disk.refnum)
3904 break;
3905 if (dl1)
3906 continue;
3907
3908 if (posix_memalign((void **)&dl1, 512,
3909 sizeof(*dl1) + (first->max_part) * sizeof(dl1->vlist[0]))
3910 != 0) {
3911 pr_err("%s could not allocate disk info buffer\n",
3912 __func__);
3913 return 3;
3914 }
3915 memcpy(dl1, dl2, sizeof(*dl1));
3916 dl1->mdupdate = NULL;
3917 dl1->next = first->dlist;
3918 dl1->fd = -1;
3919 for (pd = 0; pd < max_pds; pd++)
3920 if (first->phys->entries[pd].refnum == dl1->disk.refnum)
3921 break;
3922 dl1->pdnum = pd;
3923 if (dl2->spare) {
3924 if (posix_memalign((void **)&dl1->spare, 512,
3925 first->conf_rec_len*512) != 0) {
3926 pr_err("%s could not allocate spare info buf\n",
3927 __func__);
3928 return 3;
3929 }
3930 memcpy(dl1->spare, dl2->spare, first->conf_rec_len*512);
3931 }
3932 for (vd = 0 ; vd < first->max_part ; vd++) {
3933 if (!dl2->vlist[vd]) {
3934 dl1->vlist[vd] = NULL;
3935 continue;
3936 }
3937 for (vl1 = first->conflist; vl1; vl1 = vl1->next) {
3938 if (!memcmp(vl1->conf.guid,
3939 dl2->vlist[vd]->conf.guid,
3940 DDF_GUID_LEN))
3941 break;
3942 dl1->vlist[vd] = vl1;
3943 }
3944 }
3945 first->dlist = dl1;
3946 dprintf("%s: added disk %d: %08x\n", __func__, dl1->pdnum,
ad60eea1 3947 __be32_to_cpu(dl1->disk.refnum));
4eefd651 3948 }
3949
a19c88b8
NB
3950 return 0;
3951}
3952
0e600426 3953#ifndef MDASSEMBLE
4e5528c6
NB
3954/*
3955 * A new array 'a' has been started which claims to be instance 'inst'
3956 * within container 'c'.
3957 * We need to confirm that the array matches the metadata in 'c' so
3958 * that we don't corrupt any metadata.
3959 */
cba0191b 3960static int ddf_open_new(struct supertype *c, struct active_array *a, char *inst)
549e9569 3961{
a2aa439e 3962 struct ddf_super *ddf = c->sb;
3963 int n = atoi(inst);
fb9d0acb 3964 if (all_ff(ddf->virt->entries[n].guid)) {
3965 pr_err("%s: subarray %d doesn't exist\n", __func__, n);
a2aa439e 3966 return -ENODEV;
3967 }
3968 dprintf("ddf: open_new %d\n", n);
3969 a->info.container_member = n;
549e9569
NB
3970 return 0;
3971}
3972
4e5528c6
NB
3973/*
3974 * The array 'a' is to be marked clean in the metadata.
ed9d66aa 3975 * If '->resync_start' is not ~(unsigned long long)0, then the array is only
4e5528c6
NB
3976 * clean up to the point (in sectors). If that cannot be recorded in the
3977 * metadata, then leave it as dirty.
3978 *
3979 * For DDF, we need to clear the DDF_state_inconsistent bit in the
3980 * !global! virtual_disk.virtual_entry structure.
3981 */
01f157d7 3982static int ddf_set_array_state(struct active_array *a, int consistent)
549e9569 3983{
4e5528c6
NB
3984 struct ddf_super *ddf = a->container->sb;
3985 int inst = a->info.container_member;
18a2f463 3986 int old = ddf->virt->entries[inst].state;
01f157d7
N
3987 if (consistent == 2) {
3988 /* Should check if a recovery should be started FIXME */
3989 consistent = 1;
b7941fd6 3990 if (!is_resync_complete(&a->info))
01f157d7
N
3991 consistent = 0;
3992 }
ed9d66aa
NB
3993 if (consistent)
3994 ddf->virt->entries[inst].state &= ~DDF_state_inconsistent;
3995 else
4e5528c6 3996 ddf->virt->entries[inst].state |= DDF_state_inconsistent;
18a2f463 3997 if (old != ddf->virt->entries[inst].state)
7d5a7ff3 3998 ddf_set_updates_pending(ddf);
18a2f463
NB
3999
4000 old = ddf->virt->entries[inst].init_state;
ed9d66aa 4001 ddf->virt->entries[inst].init_state &= ~DDF_initstate_mask;
b7941fd6 4002 if (is_resync_complete(&a->info))
ed9d66aa 4003 ddf->virt->entries[inst].init_state |= DDF_init_full;
b7941fd6 4004 else if (a->info.resync_start == 0)
ed9d66aa 4005 ddf->virt->entries[inst].init_state |= DDF_init_not;
4e5528c6 4006 else
ed9d66aa 4007 ddf->virt->entries[inst].init_state |= DDF_init_quick;
18a2f463 4008 if (old != ddf->virt->entries[inst].init_state)
7d5a7ff3 4009 ddf_set_updates_pending(ddf);
ed9d66aa 4010
b27336a2 4011 dprintf("ddf mark %d/%s (%d) %s %llu\n", inst,
4012 guid_str(ddf->virt->entries[inst].guid), a->curr_state,
4013 consistent?"clean":"dirty",
b7941fd6 4014 a->info.resync_start);
01f157d7 4015 return consistent;
fd7cde1b
DW
4016}
4017
5ec636b7 4018static int get_bvd_state(const struct ddf_super *ddf,
4019 const struct vd_config *vc)
4020{
4021 unsigned int i, n_bvd, working = 0;
4022 unsigned int n_prim = __be16_to_cpu(vc->prim_elmnt_count);
4023 int pd, st, state;
4024 for (i = 0; i < n_prim; i++) {
4025 if (!find_index_in_bvd(ddf, vc, i, &n_bvd))
4026 continue;
4027 pd = find_phys(ddf, vc->phys_refnum[n_bvd]);
4028 if (pd < 0)
4029 continue;
4030 st = __be16_to_cpu(ddf->phys->entries[pd].state);
4031 if ((st & (DDF_Online|DDF_Failed|DDF_Rebuilding))
4032 == DDF_Online)
4033 working++;
4034 }
4035
4036 state = DDF_state_degraded;
4037 if (working == n_prim)
4038 state = DDF_state_optimal;
4039 else
4040 switch (vc->prl) {
4041 case DDF_RAID0:
4042 case DDF_CONCAT:
4043 case DDF_JBOD:
4044 state = DDF_state_failed;
4045 break;
4046 case DDF_RAID1:
4047 if (working == 0)
4048 state = DDF_state_failed;
4049 else if (working >= 2)
4050 state = DDF_state_part_optimal;
4051 break;
4052 case DDF_RAID4:
4053 case DDF_RAID5:
4054 if (working < n_prim - 1)
4055 state = DDF_state_failed;
4056 break;
4057 case DDF_RAID6:
4058 if (working < n_prim - 2)
4059 state = DDF_state_failed;
4060 else if (working == n_prim - 1)
4061 state = DDF_state_part_optimal;
4062 break;
4063 }
4064 return state;
4065}
4066
0777d17d 4067static int secondary_state(int state, int other, int seclevel)
4068{
4069 if (state == DDF_state_optimal && other == DDF_state_optimal)
4070 return DDF_state_optimal;
4071 if (seclevel == DDF_2MIRRORED) {
4072 if (state == DDF_state_optimal || other == DDF_state_optimal)
4073 return DDF_state_part_optimal;
4074 if (state == DDF_state_failed && other == DDF_state_failed)
4075 return DDF_state_failed;
4076 return DDF_state_degraded;
4077 } else {
4078 if (state == DDF_state_failed || other == DDF_state_failed)
4079 return DDF_state_failed;
4080 if (state == DDF_state_degraded || other == DDF_state_degraded)
4081 return DDF_state_degraded;
4082 return DDF_state_part_optimal;
4083 }
4084}
4085
4086static int get_svd_state(const struct ddf_super *ddf, const struct vcl *vcl)
4087{
4088 int state = get_bvd_state(ddf, &vcl->conf);
4089 unsigned int i;
4090 for (i = 1; i < vcl->conf.sec_elmnt_count; i++) {
4091 state = secondary_state(
4092 state,
4093 get_bvd_state(ddf, vcl->other_bvds[i-1]),
4094 vcl->conf.srl);
4095 }
4096 return state;
4097}
4098
7a7cc504
NB
4099/*
4100 * The state of each disk is stored in the global phys_disk structure
4101 * in phys_disk.entries[n].state.
4102 * This makes various combinations awkward.
4103 * - When a device fails in any array, it must be failed in all arrays
4104 * that include a part of this device.
4105 * - When a component is rebuilding, we cannot include it officially in the
4106 * array unless this is the only array that uses the device.
4107 *
4108 * So: when transitioning:
4109 * Online -> failed, just set failed flag. monitor will propagate
4110 * spare -> online, the device might need to be added to the array.
4111 * spare -> failed, just set failed. Don't worry if in array or not.
4112 */
8d45d196 4113static void ddf_set_disk(struct active_array *a, int n, int state)
549e9569 4114{
7a7cc504 4115 struct ddf_super *ddf = a->container->sb;
baba3f4e 4116 unsigned int inst = a->info.container_member, n_bvd;
4117 struct vcl *vcl;
4118 struct vd_config *vc = find_vdcr(ddf, inst, (unsigned int)n,
4119 &n_bvd, &vcl);
4120 int pd;
e1316fab
N
4121 struct mdinfo *mdi;
4122 struct dl *dl;
7a7cc504
NB
4123
4124 if (vc == NULL) {
2c514b71 4125 dprintf("ddf: cannot find instance %d!!\n", inst);
7a7cc504
NB
4126 return;
4127 }
e1316fab
N
4128 /* Find the matching slot in 'info'. */
4129 for (mdi = a->info.devs; mdi; mdi = mdi->next)
4130 if (mdi->disk.raid_disk == n)
4131 break;
4132 if (!mdi)
4133 return;
4134
4135 /* and find the 'dl' entry corresponding to that. */
4136 for (dl = ddf->dlist; dl; dl = dl->next)
77632af9
N
4137 if (mdi->state_fd >= 0 &&
4138 mdi->disk.major == dl->major &&
e1316fab
N
4139 mdi->disk.minor == dl->minor)
4140 break;
4141 if (!dl)
4142 return;
4143
baba3f4e 4144 pd = find_phys(ddf, vc->phys_refnum[n_bvd]);
e1316fab
N
4145 if (pd < 0 || pd != dl->pdnum) {
4146 /* disk doesn't currently exist or has changed.
4147 * If it is now in_sync, insert it. */
baba3f4e 4148 dprintf("%s: phys disk not found for %d: %d/%d ref %08x\n",
4149 __func__, dl->pdnum, dl->major, dl->minor,
ad60eea1 4150 __be32_to_cpu(dl->disk.refnum));
baba3f4e 4151 dprintf("%s: array %u disk %u ref %08x pd %d\n",
4152 __func__, inst, n_bvd, vc->phys_refnum[n_bvd], pd);
7a7cc504 4153 if ((state & DS_INSYNC) && ! (state & DS_FAULTY)) {
baba3f4e 4154 pd = dl->pdnum; /* FIXME: is this really correct ? */
4155 vc->phys_refnum[n_bvd] = dl->disk.refnum;
57a66662 4156 LBA_OFFSET(ddf, vc)[n_bvd] =
4157 __cpu_to_be64(mdi->data_offset);
e1316fab
N
4158 ddf->phys->entries[pd].type &=
4159 ~__cpu_to_be16(DDF_Global_Spare);
4160 ddf->phys->entries[pd].type |=
4161 __cpu_to_be16(DDF_Active_in_VD);
7d5a7ff3 4162 ddf_set_updates_pending(ddf);
7a7cc504
NB
4163 }
4164 } else {
18a2f463 4165 int old = ddf->phys->entries[pd].state;
7a7cc504
NB
4166 if (state & DS_FAULTY)
4167 ddf->phys->entries[pd].state |= __cpu_to_be16(DDF_Failed);
4168 if (state & DS_INSYNC) {
4169 ddf->phys->entries[pd].state |= __cpu_to_be16(DDF_Online);
4170 ddf->phys->entries[pd].state &= __cpu_to_be16(~DDF_Rebuilding);
4171 }
18a2f463 4172 if (old != ddf->phys->entries[pd].state)
7d5a7ff3 4173 ddf_set_updates_pending(ddf);
7a7cc504
NB
4174 }
4175
2c514b71 4176 dprintf("ddf: set_disk %d to %x\n", n, state);
7e1432fb 4177
7a7cc504
NB
4178 /* Now we need to check the state of the array and update
4179 * virtual_disk.entries[n].state.
4180 * It needs to be one of "optimal", "degraded", "failed".
4181 * I don't understand 'deleted' or 'missing'.
4182 */
0777d17d 4183 state = get_svd_state(ddf, vcl);
7a7cc504 4184
18a2f463
NB
4185 if (ddf->virt->entries[inst].state !=
4186 ((ddf->virt->entries[inst].state & ~DDF_state_mask)
4187 | state)) {
4188
4189 ddf->virt->entries[inst].state =
4190 (ddf->virt->entries[inst].state & ~DDF_state_mask)
4191 | state;
7d5a7ff3 4192 ddf_set_updates_pending(ddf);
18a2f463 4193 }
7a7cc504 4194
549e9569
NB
4195}
4196
2e735d19 4197static void ddf_sync_metadata(struct supertype *st)
549e9569 4198{
7a7cc504
NB
4199
4200 /*
4201 * Write all data to all devices.
4202 * Later, we might be able to track whether only local changes
4203 * have been made, or whether any global data has been changed,
4204 * but ddf is sufficiently weird that it probably always
4205 * changes global data ....
4206 */
18a2f463
NB
4207 struct ddf_super *ddf = st->sb;
4208 if (!ddf->updates_pending)
4209 return;
4210 ddf->updates_pending = 0;
1cc7f4fe 4211 __write_init_super_ddf(st);
2c514b71 4212 dprintf("ddf: sync_metadata\n");
549e9569
NB
4213}
4214
f646805e 4215static int del_from_conflist(struct vcl **list, const char *guid)
4216{
4217 struct vcl **p;
4218 int found = 0;
4219 for (p = list; p && *p; p = &((*p)->next))
4220 if (!memcmp((*p)->conf.guid, guid, DDF_GUID_LEN)) {
4221 found = 1;
4222 *p = (*p)->next;
4223 }
4224 return found;
4225}
4226
4227static int _kill_subarray_ddf(struct ddf_super *ddf, const char *guid)
4228{
4229 struct dl *dl;
4230 unsigned int vdnum, i;
4231 vdnum = find_vde_by_guid(ddf, guid);
4232 if (vdnum == DDF_NOTFOUND) {
4233 pr_err("%s: could not find VD %s\n", __func__,
4234 guid_str(guid));
4235 return -1;
4236 }
4237 if (del_from_conflist(&ddf->conflist, guid) == 0) {
4238 pr_err("%s: could not find conf %s\n", __func__,
4239 guid_str(guid));
4240 return -1;
4241 }
4242 for (dl = ddf->dlist; dl; dl = dl->next)
4243 for (i = 0; i < ddf->max_part; i++)
4244 if (dl->vlist[i] != NULL &&
4245 !memcmp(dl->vlist[i]->conf.guid, guid,
4246 DDF_GUID_LEN))
4247 dl->vlist[i] = NULL;
4248 memset(ddf->virt->entries[vdnum].guid, 0xff, DDF_GUID_LEN);
4249 dprintf("%s: deleted %s\n", __func__, guid_str(guid));
4250 return 0;
4251}
4252
4253static int kill_subarray_ddf(struct supertype *st)
4254{
4255 struct ddf_super *ddf = st->sb;
4256 /*
4257 * currentconf is set in container_content_ddf,
4258 * called with subarray arg
4259 */
4260 struct vcl *victim = ddf->currentconf;
4261 struct vd_config *conf;
4262 ddf->currentconf = NULL;
4263 unsigned int vdnum;
4264 if (!victim) {
4265 pr_err("%s: nothing to kill\n", __func__);
4266 return -1;
4267 }
4268 conf = &victim->conf;
4269 vdnum = find_vde_by_guid(ddf, conf->guid);
4270 if (vdnum == DDF_NOTFOUND) {
4271 pr_err("%s: could not find VD %s\n", __func__,
4272 guid_str(conf->guid));
4273 return -1;
4274 }
4275 if (st->update_tail) {
4276 struct virtual_disk *vd;
4277 int len = sizeof(struct virtual_disk)
4278 + sizeof(struct virtual_entry);
4279 vd = xmalloc(len);
4280 if (vd == NULL) {
4281 pr_err("%s: failed to allocate %d bytes\n", __func__,
4282 len);
4283 return -1;
4284 }
4285 memset(vd, 0 , len);
4286 vd->magic = DDF_VIRT_RECORDS_MAGIC;
4287 vd->populated_vdes = 0;
4288 memcpy(vd->entries[0].guid, conf->guid, DDF_GUID_LEN);
4289 /* we use DDF_state_deleted as marker */
4290 vd->entries[0].state = DDF_state_deleted;
4291 append_metadata_update(st, vd, len);
6a350d82 4292 } else {
f646805e 4293 _kill_subarray_ddf(ddf, conf->guid);
6a350d82 4294 ddf_set_updates_pending(ddf);
4295 ddf_sync_metadata(st);
4296 }
f646805e 4297 return 0;
4298}
4299
c5943560 4300static void copy_matching_bvd(struct ddf_super *ddf,
4301 struct vd_config *conf,
4302 const struct metadata_update *update)
4303{
4304 unsigned int mppe =
4305 __be16_to_cpu(ddf->anchor.max_primary_element_entries);
4306 unsigned int len = ddf->conf_rec_len * 512;
4307 char *p;
4308 struct vd_config *vc;
4309 for (p = update->buf; p < update->buf + update->len; p += len) {
4310 vc = (struct vd_config *) p;
4311 if (vc->sec_elmnt_seq == conf->sec_elmnt_seq) {
4312 memcpy(conf->phys_refnum, vc->phys_refnum,
4313 mppe * (sizeof(__u32) + sizeof(__u64)));
4314 return;
4315 }
4316 }
4317 pr_err("%s: no match for BVD %d of %s in update\n", __func__,
4318 conf->sec_elmnt_seq, guid_str(conf->guid));
4319}
4320
88c164f4
NB
4321static void ddf_process_update(struct supertype *st,
4322 struct metadata_update *update)
4323{
4324 /* Apply this update to the metadata.
4325 * The first 4 bytes are a DDF_*_MAGIC which guides
4326 * our actions.
4327 * Possible update are:
4328 * DDF_PHYS_RECORDS_MAGIC
4dd968cc
N
4329 * Add a new physical device or remove an old one.
4330 * Changes to this record only happen implicitly.
88c164f4
NB
4331 * used_pdes is the device number.
4332 * DDF_VIRT_RECORDS_MAGIC
4333 * Add a new VD. Possibly also change the 'access' bits.
4334 * populated_vdes is the entry number.
4335 * DDF_VD_CONF_MAGIC
4336 * New or updated VD. the VIRT_RECORD must already
4337 * exist. For an update, phys_refnum and lba_offset
4338 * (at least) are updated, and the VD_CONF must
4339 * be written to precisely those devices listed with
4340 * a phys_refnum.
4341 * DDF_SPARE_ASSIGN_MAGIC
4342 * replacement Spare Assignment Record... but for which device?
4343 *
4344 * So, e.g.:
4345 * - to create a new array, we send a VIRT_RECORD and
4346 * a VD_CONF. Then assemble and start the array.
4347 * - to activate a spare we send a VD_CONF to add the phys_refnum
4348 * and offset. This will also mark the spare as active with
4349 * a spare-assignment record.
4350 */
4351 struct ddf_super *ddf = st->sb;
4352 __u32 *magic = (__u32*)update->buf;
4353 struct phys_disk *pd;
4354 struct virtual_disk *vd;
4355 struct vd_config *vc;
4356 struct vcl *vcl;
4357 struct dl *dl;
f21e18ca 4358 unsigned int ent;
c5943560 4359 unsigned int pdnum, pd2, len;
88c164f4 4360
2c514b71 4361 dprintf("Process update %x\n", *magic);
7e1432fb 4362
88c164f4
NB
4363 switch (*magic) {
4364 case DDF_PHYS_RECORDS_MAGIC:
4365
4366 if (update->len != (sizeof(struct phys_disk) +
4367 sizeof(struct phys_disk_entry)))
4368 return;
4369 pd = (struct phys_disk*)update->buf;
4370
4371 ent = __be16_to_cpu(pd->used_pdes);
4372 if (ent >= __be16_to_cpu(ddf->phys->max_pdes))
4373 return;
4dd968cc
N
4374 if (pd->entries[0].state & __cpu_to_be16(DDF_Missing)) {
4375 struct dl **dlp;
4376 /* removing this disk. */
4377 ddf->phys->entries[ent].state |= __cpu_to_be16(DDF_Missing);
4378 for (dlp = &ddf->dlist; *dlp; dlp = &(*dlp)->next) {
4379 struct dl *dl = *dlp;
4380 if (dl->pdnum == (signed)ent) {
4381 close(dl->fd);
4382 dl->fd = -1;
4383 /* FIXME this doesn't free
4384 * dl->devname */
4385 update->space = dl;
4386 *dlp = dl->next;
4387 break;
4388 }
4389 }
7d5a7ff3 4390 ddf_set_updates_pending(ddf);
4dd968cc
N
4391 return;
4392 }
88c164f4
NB
4393 if (!all_ff(ddf->phys->entries[ent].guid))
4394 return;
4395 ddf->phys->entries[ent] = pd->entries[0];
4396 ddf->phys->used_pdes = __cpu_to_be16(1 +
613b0d17 4397 __be16_to_cpu(ddf->phys->used_pdes));
7d5a7ff3 4398 ddf_set_updates_pending(ddf);
2cc2983d
N
4399 if (ddf->add_list) {
4400 struct active_array *a;
4401 struct dl *al = ddf->add_list;
4402 ddf->add_list = al->next;
4403
4404 al->next = ddf->dlist;
4405 ddf->dlist = al;
4406
4407 /* As a device has been added, we should check
4408 * for any degraded devices that might make
4409 * use of this spare */
4410 for (a = st->arrays ; a; a=a->next)
4411 a->check_degraded = 1;
4412 }
88c164f4
NB
4413 break;
4414
4415 case DDF_VIRT_RECORDS_MAGIC:
4416
4417 if (update->len != (sizeof(struct virtual_disk) +
4418 sizeof(struct virtual_entry)))
4419 return;
4420 vd = (struct virtual_disk*)update->buf;
4421
f646805e 4422 if (vd->entries[0].state == DDF_state_deleted) {
4423 if (_kill_subarray_ddf(ddf, vd->entries[0].guid))
4424 return;
4425 } else {
4426
6a7e7ecc 4427 ent = find_vde_by_guid(ddf, vd->entries[0].guid);
4428 if (ent != DDF_NOTFOUND) {
4429 dprintf("%s: VD %s exists already in slot %d\n",
4430 __func__, guid_str(vd->entries[0].guid),
4431 ent);
4432 return;
4433 }
f646805e 4434 ent = find_unused_vde(ddf);
4435 if (ent == DDF_NOTFOUND)
4436 return;
4437 ddf->virt->entries[ent] = vd->entries[0];
4438 ddf->virt->populated_vdes =
4439 __cpu_to_be16(
4440 1 + __be16_to_cpu(
4441 ddf->virt->populated_vdes));
ed5ff7a2 4442 dprintf("%s: added VD %s in slot %d(s=%02x i=%02x)\n",
4443 __func__, guid_str(vd->entries[0].guid), ent,
4444 ddf->virt->entries[ent].state,
4445 ddf->virt->entries[ent].init_state);
f646805e 4446 }
7d5a7ff3 4447 ddf_set_updates_pending(ddf);
88c164f4
NB
4448 break;
4449
4450 case DDF_VD_CONF_MAGIC:
88c164f4 4451 vc = (struct vd_config*)update->buf;
c5943560 4452 len = ddf->conf_rec_len * 512;
4453 if ((unsigned int)update->len != len * vc->sec_elmnt_count) {
4454 pr_err("%s: %s: insufficient data (%d) for %u BVDs\n",
4455 __func__, guid_str(vc->guid), update->len,
4456 vc->sec_elmnt_count);
4457 return;
4458 }
88c164f4
NB
4459 for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
4460 if (memcmp(vcl->conf.guid, vc->guid, DDF_GUID_LEN) == 0)
4461 break;
ed5ff7a2 4462 dprintf("%s: conf update for %s (%s)\n", __func__,
4463 guid_str(vc->guid), (vcl ? "old" : "new"));
88c164f4
NB
4464 if (vcl) {
4465 /* An update, just copy the phys_refnum and lba_offset
4466 * fields
4467 */
c5943560 4468 unsigned int i;
4469 copy_matching_bvd(ddf, &vcl->conf, update);
4470 for (i = 1; i < vc->sec_elmnt_count; i++)
4471 copy_matching_bvd(ddf, vcl->other_bvds[i-1],
4472 update);
88c164f4
NB
4473 } else {
4474 /* A new VD_CONF */
c5943560 4475 unsigned int i;
e6b9548d
DW
4476 if (!update->space)
4477 return;
88c164f4
NB
4478 vcl = update->space;
4479 update->space = NULL;
4480 vcl->next = ddf->conflist;
c5943560 4481 memcpy(&vcl->conf, vc, len);
fb9d0acb 4482 ent = find_vde_by_guid(ddf, vc->guid);
4483 if (ent == DDF_NOTFOUND)
4484 return;
4485 vcl->vcnum = ent;
88c164f4 4486 ddf->conflist = vcl;
c5943560 4487 for (i = 1; i < vc->sec_elmnt_count; i++)
4488 memcpy(vcl->other_bvds[i-1],
4489 update->buf + len * i, len);
88c164f4 4490 }
c7079c84
N
4491 /* Set DDF_Transition on all Failed devices - to help
4492 * us detect those that are no longer in use
4493 */
4494 for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
4495 if (ddf->phys->entries[pdnum].state
4496 & __be16_to_cpu(DDF_Failed))
4497 ddf->phys->entries[pdnum].state
4498 |= __be16_to_cpu(DDF_Transition);
88c164f4
NB
4499 /* Now make sure vlist is correct for each dl. */
4500 for (dl = ddf->dlist; dl; dl = dl->next) {
f21e18ca 4501 unsigned int vn = 0;
8401644c 4502 int in_degraded = 0;
5838fccd 4503 for (vcl = ddf->conflist; vcl ; vcl = vcl->next) {
4504 unsigned int dn, ibvd;
4505 const struct vd_config *conf;
4506 int vstate;
4507 dn = get_pd_index_from_refnum(vcl,
4508 dl->disk.refnum,
4509 ddf->mppe,
4510 &conf, &ibvd);
4511 if (dn == DDF_NOTFOUND)
4512 continue;
4513 dprintf("dev %d/%08x has %s (sec=%u) at %d\n",
ad60eea1 4514 dl->pdnum,
4515 __be32_to_cpu(dl->disk.refnum),
5838fccd 4516 guid_str(conf->guid),
4517 conf->sec_elmnt_seq, vn);
4518 /* Clear the Transition flag */
4519 if (ddf->phys->entries[dl->pdnum].state
4520 & __be16_to_cpu(DDF_Failed))
4521 ddf->phys->entries[dl->pdnum].state &=
4522 ~__be16_to_cpu(DDF_Transition);
4523 dl->vlist[vn++] = vcl;
4524 vstate = ddf->virt->entries[vcl->vcnum].state
4525 & DDF_state_mask;
4526 if (vstate == DDF_state_degraded ||
4527 vstate == DDF_state_part_optimal)
4528 in_degraded = 1;
4529 }
88c164f4
NB
4530 while (vn < ddf->max_part)
4531 dl->vlist[vn++] = NULL;
7e1432fb
NB
4532 if (dl->vlist[0]) {
4533 ddf->phys->entries[dl->pdnum].type &=
4534 ~__cpu_to_be16(DDF_Global_Spare);
8401644c
N
4535 if (!(ddf->phys->entries[dl->pdnum].type &
4536 __cpu_to_be16(DDF_Active_in_VD))) {
613b0d17
N
4537 ddf->phys->entries[dl->pdnum].type |=
4538 __cpu_to_be16(DDF_Active_in_VD);
4539 if (in_degraded)
4540 ddf->phys->entries[dl->pdnum].state |=
4541 __cpu_to_be16(DDF_Rebuilding);
4542 }
7e1432fb
NB
4543 }
4544 if (dl->spare) {
4545 ddf->phys->entries[dl->pdnum].type &=
4546 ~__cpu_to_be16(DDF_Global_Spare);
4547 ddf->phys->entries[dl->pdnum].type |=
4548 __cpu_to_be16(DDF_Spare);
4549 }
4550 if (!dl->vlist[0] && !dl->spare) {
4551 ddf->phys->entries[dl->pdnum].type |=
4552 __cpu_to_be16(DDF_Global_Spare);
4553 ddf->phys->entries[dl->pdnum].type &=
4554 ~__cpu_to_be16(DDF_Spare |
4555 DDF_Active_in_VD);
4556 }
88c164f4 4557 }
c7079c84
N
4558
4559 /* Now remove any 'Failed' devices that are not part
4560 * of any VD. They will have the Transition flag set.
4561 * Once done, we need to update all dl->pdnum numbers.
4562 */
4563 pd2 = 0;
4564 for (pdnum = 0; pdnum < __be16_to_cpu(ddf->phys->used_pdes); pdnum++)
4565 if ((ddf->phys->entries[pdnum].state
4566 & __be16_to_cpu(DDF_Failed))
4567 && (ddf->phys->entries[pdnum].state
4568 & __be16_to_cpu(DDF_Transition)))
4569 /* skip this one */;
4570 else if (pdnum == pd2)
4571 pd2++;
4572 else {
4573 ddf->phys->entries[pd2] = ddf->phys->entries[pdnum];
4574 for (dl = ddf->dlist; dl; dl = dl->next)
4575 if (dl->pdnum == (int)pdnum)
4576 dl->pdnum = pd2;
4577 pd2++;
4578 }
4579 ddf->phys->used_pdes = __cpu_to_be16(pd2);
4580 while (pd2 < pdnum) {
4581 memset(ddf->phys->entries[pd2].guid, 0xff, DDF_GUID_LEN);
4582 pd2++;
4583 }
4584
7d5a7ff3 4585 ddf_set_updates_pending(ddf);
88c164f4
NB
4586 break;
4587 case DDF_SPARE_ASSIGN_MAGIC:
4588 default: break;
4589 }
4590}
4591
edd8d13c
NB
4592static void ddf_prepare_update(struct supertype *st,
4593 struct metadata_update *update)
4594{
4595 /* This update arrived at managemon.
4596 * We are about to pass it to monitor.
4597 * If a malloc is needed, do it here.
4598 */
4599 struct ddf_super *ddf = st->sb;
4600 __u32 *magic = (__u32*)update->buf;
c5943560 4601 if (*magic == DDF_VD_CONF_MAGIC) {
4602 struct vcl *vcl;
4603 struct vd_config *conf = (struct vd_config *) update->buf;
e6b9548d 4604 if (posix_memalign(&update->space, 512,
613b0d17 4605 offsetof(struct vcl, conf)
c5943560 4606 + ddf->conf_rec_len * 512) != 0) {
4607 update->space = NULL;
4608 return;
4609 }
4610 vcl = update->space;
4611 vcl->conf.sec_elmnt_count = conf->sec_elmnt_count;
4612 if (alloc_other_bvds(ddf, vcl) != 0) {
4613 free(update->space);
e6b9548d 4614 update->space = NULL;
c5943560 4615 }
4616 }
edd8d13c
NB
4617}
4618
7e1432fb
NB
4619/*
4620 * Check if the array 'a' is degraded but not failed.
4621 * If it is, find as many spares as are available and needed and
4622 * arrange for their inclusion.
4623 * We only choose devices which are not already in the array,
4624 * and prefer those with a spare-assignment to this array.
4625 * otherwise we choose global spares - assuming always that
4626 * there is enough room.
4627 * For each spare that we assign, we return an 'mdinfo' which
4628 * describes the position for the device in the array.
4629 * We also add to 'updates' a DDF_VD_CONF_MAGIC update with
4630 * the new phys_refnum and lba_offset values.
4631 *
4632 * Only worry about BVDs at the moment.
4633 */
4634static struct mdinfo *ddf_activate_spare(struct active_array *a,
4635 struct metadata_update **updates)
4636{
4637 int working = 0;
4638 struct mdinfo *d;
4639 struct ddf_super *ddf = a->container->sb;
4640 int global_ok = 0;
4641 struct mdinfo *rv = NULL;
4642 struct mdinfo *di;
4643 struct metadata_update *mu;
4644 struct dl *dl;
4645 int i;
baba3f4e 4646 struct vcl *vcl;
7e1432fb 4647 struct vd_config *vc;
baba3f4e 4648 unsigned int n_bvd;
7e1432fb 4649
7e1432fb
NB
4650 for (d = a->info.devs ; d ; d = d->next) {
4651 if ((d->curr_state & DS_FAULTY) &&
613b0d17 4652 d->state_fd >= 0)
7e1432fb
NB
4653 /* wait for Removal to happen */
4654 return NULL;
4655 if (d->state_fd >= 0)
4656 working ++;
4657 }
4658
2c514b71
NB
4659 dprintf("ddf_activate: working=%d (%d) level=%d\n", working, a->info.array.raid_disks,
4660 a->info.array.level);
7e1432fb
NB
4661 if (working == a->info.array.raid_disks)
4662 return NULL; /* array not degraded */
4663 switch (a->info.array.level) {
4664 case 1:
4665 if (working == 0)
4666 return NULL; /* failed */
4667 break;
4668 case 4:
4669 case 5:
4670 if (working < a->info.array.raid_disks - 1)
4671 return NULL; /* failed */
4672 break;
4673 case 6:
4674 if (working < a->info.array.raid_disks - 2)
4675 return NULL; /* failed */
4676 break;
4677 default: /* concat or stripe */
4678 return NULL; /* failed */
4679 }
4680
4681 /* For each slot, if it is not working, find a spare */
4682 dl = ddf->dlist;
4683 for (i = 0; i < a->info.array.raid_disks; i++) {
4684 for (d = a->info.devs ; d ; d = d->next)
4685 if (d->disk.raid_disk == i)
4686 break;
2c514b71 4687 dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
7e1432fb
NB
4688 if (d && (d->state_fd >= 0))
4689 continue;
4690
4691 /* OK, this device needs recovery. Find a spare */
4692 again:
4693 for ( ; dl ; dl = dl->next) {
4694 unsigned long long esize;
4695 unsigned long long pos;
4696 struct mdinfo *d2;
4697 int is_global = 0;
4698 int is_dedicated = 0;
4699 struct extent *ex;
f21e18ca 4700 unsigned int j;
7e1432fb
NB
4701 /* If in this array, skip */
4702 for (d2 = a->info.devs ; d2 ; d2 = d2->next)
7590d562
N
4703 if (d2->state_fd >= 0 &&
4704 d2->disk.major == dl->major &&
7e1432fb 4705 d2->disk.minor == dl->minor) {
2c514b71 4706 dprintf("%x:%x already in array\n", dl->major, dl->minor);
7e1432fb
NB
4707 break;
4708 }
4709 if (d2)
4710 continue;
4711 if (ddf->phys->entries[dl->pdnum].type &
4712 __cpu_to_be16(DDF_Spare)) {
4713 /* Check spare assign record */
4714 if (dl->spare) {
4715 if (dl->spare->type & DDF_spare_dedicated) {
4716 /* check spare_ents for guid */
4717 for (j = 0 ;
4718 j < __be16_to_cpu(dl->spare->populated);
4719 j++) {
4720 if (memcmp(dl->spare->spare_ents[j].guid,
4721 ddf->virt->entries[a->info.container_member].guid,
4722 DDF_GUID_LEN) == 0)
4723 is_dedicated = 1;
4724 }
4725 } else
4726 is_global = 1;
4727 }
4728 } else if (ddf->phys->entries[dl->pdnum].type &
4729 __cpu_to_be16(DDF_Global_Spare)) {
4730 is_global = 1;
e0e7aeaa
N
4731 } else if (!(ddf->phys->entries[dl->pdnum].state &
4732 __cpu_to_be16(DDF_Failed))) {
4733 /* we can possibly use some of this */
4734 is_global = 1;
7e1432fb
NB
4735 }
4736 if ( ! (is_dedicated ||
4737 (is_global && global_ok))) {
2c514b71 4738 dprintf("%x:%x not suitable: %d %d\n", dl->major, dl->minor,
613b0d17 4739 is_dedicated, is_global);
7e1432fb
NB
4740 continue;
4741 }
4742
4743 /* We are allowed to use this device - is there space?
4744 * We need a->info.component_size sectors */
4745 ex = get_extents(ddf, dl);
4746 if (!ex) {
2c514b71 4747 dprintf("cannot get extents\n");
7e1432fb
NB
4748 continue;
4749 }
4750 j = 0; pos = 0;
4751 esize = 0;
4752
4753 do {
4754 esize = ex[j].start - pos;
4755 if (esize >= a->info.component_size)
4756 break;
e5cc7d46
N
4757 pos = ex[j].start + ex[j].size;
4758 j++;
4759 } while (ex[j-1].size);
7e1432fb
NB
4760
4761 free(ex);
4762 if (esize < a->info.component_size) {
e5cc7d46
N
4763 dprintf("%x:%x has no room: %llu %llu\n",
4764 dl->major, dl->minor,
2c514b71 4765 esize, a->info.component_size);
7e1432fb
NB
4766 /* No room */
4767 continue;
4768 }
4769
4770 /* Cool, we have a device with some space at pos */
503975b9 4771 di = xcalloc(1, sizeof(*di));
7e1432fb
NB
4772 di->disk.number = i;
4773 di->disk.raid_disk = i;
4774 di->disk.major = dl->major;
4775 di->disk.minor = dl->minor;
4776 di->disk.state = 0;
d23534e4 4777 di->recovery_start = 0;
7e1432fb
NB
4778 di->data_offset = pos;
4779 di->component_size = a->info.component_size;
4780 di->container_member = dl->pdnum;
4781 di->next = rv;
4782 rv = di;
2c514b71
NB
4783 dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
4784 i, pos);
7e1432fb
NB
4785
4786 break;
4787 }
4788 if (!dl && ! global_ok) {
4789 /* not enough dedicated spares, try global */
4790 global_ok = 1;
4791 dl = ddf->dlist;
4792 goto again;
4793 }
4794 }
4795
4796 if (!rv)
4797 /* No spares found */
4798 return rv;
4799 /* Now 'rv' has a list of devices to return.
4800 * Create a metadata_update record to update the
4801 * phys_refnum and lba_offset values
4802 */
503975b9
N
4803 mu = xmalloc(sizeof(*mu));
4804 if (posix_memalign(&mu->space, 512, sizeof(struct vcl)) != 0) {
79244939
DW
4805 free(mu);
4806 mu = NULL;
4807 }
503975b9 4808 mu->buf = xmalloc(ddf->conf_rec_len * 512);
7590d562
N
4809 mu->len = ddf->conf_rec_len * 512;
4810 mu->space = NULL;
f50ae22e 4811 mu->space_list = NULL;
7e1432fb 4812 mu->next = *updates;
baba3f4e 4813 vc = find_vdcr(ddf, a->info.container_member, di->disk.raid_disk,
4814 &n_bvd, &vcl);
7e1432fb
NB
4815 memcpy(mu->buf, vc, ddf->conf_rec_len * 512);
4816
4817 vc = (struct vd_config*)mu->buf;
7e1432fb
NB
4818 for (di = rv ; di ; di = di->next) {
4819 vc->phys_refnum[di->disk.raid_disk] =
4820 ddf->phys->entries[dl->pdnum].refnum;
57a66662 4821 LBA_OFFSET(ddf, vc)[di->disk.raid_disk]
4822 = __cpu_to_be64(di->data_offset);
7e1432fb
NB
4823 }
4824 *updates = mu;
4825 return rv;
4826}
0e600426 4827#endif /* MDASSEMBLE */
7e1432fb 4828
b640a252
N
4829static int ddf_level_to_layout(int level)
4830{
4831 switch(level) {
4832 case 0:
4833 case 1:
4834 return 0;
4835 case 5:
4836 return ALGORITHM_LEFT_SYMMETRIC;
4837 case 6:
4838 return ALGORITHM_ROTATING_N_CONTINUE;
4839 case 10:
4840 return 0x102;
4841 default:
4842 return UnSet;
4843 }
4844}
4845
30f58b22
DW
4846static void default_geometry_ddf(struct supertype *st, int *level, int *layout, int *chunk)
4847{
4848 if (level && *level == UnSet)
4849 *level = LEVEL_CONTAINER;
4850
4851 if (level && layout && *layout == UnSet)
4852 *layout = ddf_level_to_layout(*level);
4853}
4854
a322f70c
DW
4855struct superswitch super_ddf = {
4856#ifndef MDASSEMBLE
4857 .examine_super = examine_super_ddf,
4858 .brief_examine_super = brief_examine_super_ddf,
4737ae25 4859 .brief_examine_subarrays = brief_examine_subarrays_ddf,
bceedeec 4860 .export_examine_super = export_examine_super_ddf,
a322f70c
DW
4861 .detail_super = detail_super_ddf,
4862 .brief_detail_super = brief_detail_super_ddf,
4863 .validate_geometry = validate_geometry_ddf,
78e44928 4864 .write_init_super = write_init_super_ddf,
0e600426 4865 .add_to_super = add_to_super_ddf,
4dd968cc 4866 .remove_from_super = remove_from_super_ddf,
2b959fbf 4867 .load_container = load_container_ddf,
74db60b0 4868 .copy_metadata = copy_metadata_ddf,
a322f70c
DW
4869#endif
4870 .match_home = match_home_ddf,
4871 .uuid_from_super= uuid_from_super_ddf,
4872 .getinfo_super = getinfo_super_ddf,
4873 .update_super = update_super_ddf,
4874
4875 .avail_size = avail_size_ddf,
4876
a19c88b8
NB
4877 .compare_super = compare_super_ddf,
4878
a322f70c 4879 .load_super = load_super_ddf,
ba7eb04f 4880 .init_super = init_super_ddf,
955e9ea1 4881 .store_super = store_super_ddf,
a322f70c
DW
4882 .free_super = free_super_ddf,
4883 .match_metadata_desc = match_metadata_desc_ddf,
78e44928 4884 .container_content = container_content_ddf,
30f58b22 4885 .default_geometry = default_geometry_ddf,
f646805e 4886 .kill_subarray = kill_subarray_ddf,
a322f70c 4887
a322f70c 4888 .external = 1,
549e9569 4889
0e600426 4890#ifndef MDASSEMBLE
549e9569
NB
4891/* for mdmon */
4892 .open_new = ddf_open_new,
ed9d66aa 4893 .set_array_state= ddf_set_array_state,
549e9569
NB
4894 .set_disk = ddf_set_disk,
4895 .sync_metadata = ddf_sync_metadata,
88c164f4 4896 .process_update = ddf_process_update,
edd8d13c 4897 .prepare_update = ddf_prepare_update,
7e1432fb 4898 .activate_spare = ddf_activate_spare,
0e600426 4899#endif
4cce4069 4900 .name = "ddf",
a322f70c 4901};