]> git.ipfire.org Git - thirdparty/mdadm.git/blame - super-ddf.c
DDF: report seq counter as events.
[thirdparty/mdadm.git] / super-ddf.c
CommitLineData
a322f70c
DW
1/*
2 * mdadm - manage Linux "md" devices aka RAID arrays.
3 *
e736b623 4 * Copyright (C) 2006-2009 Neil Brown <neilb@suse.de>
a322f70c
DW
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 * Author: Neil Brown
22 * Email: <neil@brown.name>
23 *
24 * Specifications for DDF takes from Common RAID DDF Specification Revision 1.2
25 * (July 28 2006). Reused by permission of SNIA.
26 */
27
28#define HAVE_STDINT_H 1
29#include "mdadm.h"
549e9569 30#include "mdmon.h"
a322f70c
DW
31#include "sha1.h"
32#include <values.h>
33
a322f70c
DW
34/* a non-official T10 name for creation GUIDs */
35static char T10[] = "Linux-MD";
36
37/* DDF timestamps are 1980 based, so we need to add
38 * second-in-decade-of-seventies to convert to linux timestamps.
39 * 10 years with 2 leap years.
40 */
41#define DECADE (3600*24*(365*10+2))
42unsigned long crc32(
43 unsigned long crc,
44 const unsigned char *buf,
45 unsigned len);
46
bedbf68a 47#define DDF_NOTFOUND (~0U)
48#define DDF_CONTAINER (DDF_NOTFOUND-1)
49
5684fff6 50/* Default for safe_mode_delay. Same value as for IMSM.
51 */
52static const int DDF_SAFE_MODE_DELAY = 4000;
53
a322f70c
DW
54/* The DDF metadata handling.
55 * DDF metadata lives at the end of the device.
56 * The last 512 byte block provides an 'anchor' which is used to locate
57 * the rest of the metadata which usually lives immediately behind the anchor.
58 *
59 * Note:
60 * - all multibyte numeric fields are bigendian.
61 * - all strings are space padded.
62 *
63 */
64
4d1bdc18 65typedef struct __be16 {
66 __u16 _v16;
67} be16;
68#define be16_eq(x, y) ((x)._v16 == (y)._v16)
a8173e43 69#define be16_and(x, y) ((x)._v16 & (y)._v16)
70#define be16_or(x, y) ((x)._v16 | (y)._v16)
71#define be16_clear(x, y) ((x)._v16 &= ~(y)._v16)
72#define be16_set(x, y) ((x)._v16 |= (y)._v16)
4d1bdc18 73
74typedef struct __be32 {
75 __u32 _v32;
76} be32;
77#define be32_eq(x, y) ((x)._v32 == (y)._v32)
78
79typedef struct __be64 {
80 __u64 _v64;
81} be64;
82#define be64_eq(x, y) ((x)._v64 == (y)._v64)
83
84#define be16_to_cpu(be) __be16_to_cpu((be)._v16)
85static inline be16 cpu_to_be16(__u16 x)
86{
87 be16 be = { ._v16 = __cpu_to_be16(x) };
88 return be;
89}
90
91#define be32_to_cpu(be) __be32_to_cpu((be)._v32)
92static inline be32 cpu_to_be32(__u32 x)
93{
94 be32 be = { ._v32 = __cpu_to_be32(x) };
95 return be;
96}
97
98#define be64_to_cpu(be) __be64_to_cpu((be)._v64)
99static inline be64 cpu_to_be64(__u64 x)
100{
101 be64 be = { ._v64 = __cpu_to_be64(x) };
102 return be;
103}
104
a322f70c
DW
105/* Primary Raid Level (PRL) */
106#define DDF_RAID0 0x00
107#define DDF_RAID1 0x01
108#define DDF_RAID3 0x03
109#define DDF_RAID4 0x04
110#define DDF_RAID5 0x05
111#define DDF_RAID1E 0x11
112#define DDF_JBOD 0x0f
113#define DDF_CONCAT 0x1f
114#define DDF_RAID5E 0x15
115#define DDF_RAID5EE 0x25
59e36268 116#define DDF_RAID6 0x06
a322f70c
DW
117
118/* Raid Level Qualifier (RLQ) */
119#define DDF_RAID0_SIMPLE 0x00
120#define DDF_RAID1_SIMPLE 0x00 /* just 2 devices in this plex */
121#define DDF_RAID1_MULTI 0x01 /* exactly 3 devices in this plex */
122#define DDF_RAID3_0 0x00 /* parity in first extent */
123#define DDF_RAID3_N 0x01 /* parity in last extent */
124#define DDF_RAID4_0 0x00 /* parity in first extent */
125#define DDF_RAID4_N 0x01 /* parity in last extent */
126/* these apply to raid5e and raid5ee as well */
127#define DDF_RAID5_0_RESTART 0x00 /* same as 'right asymmetric' - layout 1 */
59e36268 128#define DDF_RAID6_0_RESTART 0x01 /* raid6 different from raid5 here!!! */
a322f70c
DW
129#define DDF_RAID5_N_RESTART 0x02 /* same as 'left asymmetric' - layout 0 */
130#define DDF_RAID5_N_CONTINUE 0x03 /* same as 'left symmetric' - layout 2 */
131
132#define DDF_RAID1E_ADJACENT 0x00 /* raid10 nearcopies==2 */
133#define DDF_RAID1E_OFFSET 0x01 /* raid10 offsetcopies==2 */
134
135/* Secondary RAID Level (SRL) */
136#define DDF_2STRIPED 0x00 /* This is weirder than RAID0 !! */
137#define DDF_2MIRRORED 0x01
138#define DDF_2CONCAT 0x02
139#define DDF_2SPANNED 0x03 /* This is also weird - be careful */
140
141/* Magic numbers */
60931cf9 142#define DDF_HEADER_MAGIC cpu_to_be32(0xDE11DE11)
143#define DDF_CONTROLLER_MAGIC cpu_to_be32(0xAD111111)
144#define DDF_PHYS_RECORDS_MAGIC cpu_to_be32(0x22222222)
145#define DDF_PHYS_DATA_MAGIC cpu_to_be32(0x33333333)
146#define DDF_VIRT_RECORDS_MAGIC cpu_to_be32(0xDDDDDDDD)
147#define DDF_VD_CONF_MAGIC cpu_to_be32(0xEEEEEEEE)
148#define DDF_SPARE_ASSIGN_MAGIC cpu_to_be32(0x55555555)
149#define DDF_VU_CONF_MAGIC cpu_to_be32(0x88888888)
150#define DDF_VENDOR_LOG_MAGIC cpu_to_be32(0x01dBEEF0)
151#define DDF_BBM_LOG_MAGIC cpu_to_be32(0xABADB10C)
a322f70c
DW
152
153#define DDF_GUID_LEN 24
59e36268
NB
154#define DDF_REVISION_0 "01.00.00"
155#define DDF_REVISION_2 "01.02.00"
a322f70c
DW
156
157struct ddf_header {
60931cf9 158 be32 magic; /* DDF_HEADER_MAGIC */
159 be32 crc;
a322f70c 160 char guid[DDF_GUID_LEN];
59e36268 161 char revision[8]; /* 01.02.00 */
60931cf9 162 be32 seq; /* starts at '1' */
163 be32 timestamp;
a322f70c
DW
164 __u8 openflag;
165 __u8 foreignflag;
166 __u8 enforcegroups;
167 __u8 pad0; /* 0xff */
168 __u8 pad1[12]; /* 12 * 0xff */
169 /* 64 bytes so far */
170 __u8 header_ext[32]; /* reserved: fill with 0xff */
9d0c6b70 171 be64 primary_lba;
172 be64 secondary_lba;
a322f70c
DW
173 __u8 type;
174 __u8 pad2[3]; /* 0xff */
60931cf9 175 be32 workspace_len; /* sectors for vendor space -
a322f70c 176 * at least 32768(sectors) */
9d0c6b70 177 be64 workspace_lba;
a8173e43 178 be16 max_pd_entries; /* one of 15, 63, 255, 1023, 4095 */
179 be16 max_vd_entries; /* 2^(4,6,8,10,12)-1 : i.e. as above */
180 be16 max_partitions; /* i.e. max num of configuration
a322f70c 181 record entries per disk */
a8173e43 182 be16 config_record_len; /* 1 +ROUNDUP(max_primary_element_entries
a322f70c 183 *12/512) */
a8173e43 184 be16 max_primary_element_entries; /* 16, 64, 256, 1024, or 4096 */
a322f70c
DW
185 __u8 pad3[54]; /* 0xff */
186 /* 192 bytes so far */
60931cf9 187 be32 controller_section_offset;
188 be32 controller_section_length;
189 be32 phys_section_offset;
190 be32 phys_section_length;
191 be32 virt_section_offset;
192 be32 virt_section_length;
193 be32 config_section_offset;
194 be32 config_section_length;
195 be32 data_section_offset;
196 be32 data_section_length;
197 be32 bbm_section_offset;
198 be32 bbm_section_length;
199 be32 diag_space_offset;
200 be32 diag_space_length;
201 be32 vendor_offset;
202 be32 vendor_length;
a322f70c
DW
203 /* 256 bytes so far */
204 __u8 pad4[256]; /* 0xff */
205};
206
207/* type field */
208#define DDF_HEADER_ANCHOR 0x00
209#define DDF_HEADER_PRIMARY 0x01
210#define DDF_HEADER_SECONDARY 0x02
211
212/* The content of the 'controller section' - global scope */
213struct ddf_controller_data {
60931cf9 214 be32 magic; /* DDF_CONTROLLER_MAGIC */
215 be32 crc;
a322f70c
DW
216 char guid[DDF_GUID_LEN];
217 struct controller_type {
a8173e43 218 be16 vendor_id;
219 be16 device_id;
220 be16 sub_vendor_id;
221 be16 sub_device_id;
a322f70c
DW
222 } type;
223 char product_id[16];
224 __u8 pad[8]; /* 0xff */
225 __u8 vendor_data[448];
226};
227
228/* The content of phys_section - global scope */
229struct phys_disk {
60931cf9 230 be32 magic; /* DDF_PHYS_RECORDS_MAGIC */
231 be32 crc;
a8173e43 232 be16 used_pdes;
233 be16 max_pdes;
a322f70c
DW
234 __u8 pad[52];
235 struct phys_disk_entry {
236 char guid[DDF_GUID_LEN];
60931cf9 237 be32 refnum;
a8173e43 238 be16 type;
239 be16 state;
9d0c6b70 240 be64 config_size; /* DDF structures must be after here */
a322f70c
DW
241 char path[18]; /* another horrible structure really */
242 __u8 pad[6];
243 } entries[0];
244};
245
246/* phys_disk_entry.type is a bitmap - bigendian remember */
247#define DDF_Forced_PD_GUID 1
248#define DDF_Active_in_VD 2
88c164f4 249#define DDF_Global_Spare 4 /* VD_CONF records are ignored */
a322f70c
DW
250#define DDF_Spare 8 /* overrides Global_spare */
251#define DDF_Foreign 16
252#define DDF_Legacy 32 /* no DDF on this device */
253
254#define DDF_Interface_mask 0xf00
255#define DDF_Interface_SCSI 0x100
256#define DDF_Interface_SAS 0x200
257#define DDF_Interface_SATA 0x300
258#define DDF_Interface_FC 0x400
259
260/* phys_disk_entry.state is a bigendian bitmap */
261#define DDF_Online 1
262#define DDF_Failed 2 /* overrides 1,4,8 */
263#define DDF_Rebuilding 4
264#define DDF_Transition 8
265#define DDF_SMART 16
266#define DDF_ReadErrors 32
267#define DDF_Missing 64
268
269/* The content of the virt_section global scope */
270struct virtual_disk {
60931cf9 271 be32 magic; /* DDF_VIRT_RECORDS_MAGIC */
272 be32 crc;
a8173e43 273 be16 populated_vdes;
274 be16 max_vdes;
a322f70c
DW
275 __u8 pad[52];
276 struct virtual_entry {
277 char guid[DDF_GUID_LEN];
a8173e43 278 be16 unit;
a322f70c 279 __u16 pad0; /* 0xffff */
a8173e43 280 be16 guid_crc;
281 be16 type;
a322f70c
DW
282 __u8 state;
283 __u8 init_state;
284 __u8 pad1[14];
285 char name[16];
286 } entries[0];
287};
288
289/* virtual_entry.type is a bitmap - bigendian */
290#define DDF_Shared 1
291#define DDF_Enforce_Groups 2
292#define DDF_Unicode 4
293#define DDF_Owner_Valid 8
294
295/* virtual_entry.state is a bigendian bitmap */
296#define DDF_state_mask 0x7
297#define DDF_state_optimal 0x0
298#define DDF_state_degraded 0x1
299#define DDF_state_deleted 0x2
300#define DDF_state_missing 0x3
301#define DDF_state_failed 0x4
7a7cc504 302#define DDF_state_part_optimal 0x5
a322f70c
DW
303
304#define DDF_state_morphing 0x8
305#define DDF_state_inconsistent 0x10
306
307/* virtual_entry.init_state is a bigendian bitmap */
308#define DDF_initstate_mask 0x03
309#define DDF_init_not 0x00
7a7cc504
NB
310#define DDF_init_quick 0x01 /* initialisation is progress.
311 * i.e. 'state_inconsistent' */
a322f70c
DW
312#define DDF_init_full 0x02
313
314#define DDF_access_mask 0xc0
315#define DDF_access_rw 0x00
316#define DDF_access_ro 0x80
317#define DDF_access_blocked 0xc0
318
319/* The content of the config_section - local scope
320 * It has multiple records each config_record_len sectors
321 * They can be vd_config or spare_assign
322 */
323
324struct vd_config {
60931cf9 325 be32 magic; /* DDF_VD_CONF_MAGIC */
326 be32 crc;
a322f70c 327 char guid[DDF_GUID_LEN];
60931cf9 328 be32 timestamp;
329 be32 seqnum;
a322f70c 330 __u8 pad0[24];
a8173e43 331 be16 prim_elmnt_count;
a322f70c
DW
332 __u8 chunk_shift; /* 0 == 512, 1==1024 etc */
333 __u8 prl;
334 __u8 rlq;
335 __u8 sec_elmnt_count;
336 __u8 sec_elmnt_seq;
337 __u8 srl;
9d0c6b70 338 be64 blocks; /* blocks per component could be different
598f0d58
NB
339 * on different component devices...(only
340 * for concat I hope) */
9d0c6b70 341 be64 array_blocks; /* blocks in array */
a322f70c 342 __u8 pad1[8];
60931cf9 343 be32 spare_refs[8];
a322f70c
DW
344 __u8 cache_pol[8];
345 __u8 bg_rate;
346 __u8 pad2[3];
347 __u8 pad3[52];
348 __u8 pad4[192];
349 __u8 v0[32]; /* reserved- 0xff */
350 __u8 v1[32]; /* reserved- 0xff */
351 __u8 v2[16]; /* reserved- 0xff */
352 __u8 v3[16]; /* reserved- 0xff */
353 __u8 vendor[32];
60931cf9 354 be32 phys_refnum[0]; /* refnum of each disk in sequence */
a322f70c
DW
355 /*__u64 lba_offset[0]; LBA offset in each phys. Note extents in a
356 bvd are always the same size */
357};
9d0c6b70 358#define LBA_OFFSET(ddf, vd) ((be64 *) &(vd)->phys_refnum[(ddf)->mppe])
a322f70c
DW
359
360/* vd_config.cache_pol[7] is a bitmap */
361#define DDF_cache_writeback 1 /* else writethrough */
362#define DDF_cache_wadaptive 2 /* only applies if writeback */
363#define DDF_cache_readahead 4
364#define DDF_cache_radaptive 8 /* only if doing read-ahead */
365#define DDF_cache_ifnobatt 16 /* even to write cache if battery is poor */
366#define DDF_cache_wallowed 32 /* enable write caching */
367#define DDF_cache_rallowed 64 /* enable read caching */
368
369struct spare_assign {
60931cf9 370 be32 magic; /* DDF_SPARE_ASSIGN_MAGIC */
371 be32 crc;
372 be32 timestamp;
a322f70c
DW
373 __u8 reserved[7];
374 __u8 type;
a8173e43 375 be16 populated; /* SAEs used */
376 be16 max; /* max SAEs */
a322f70c
DW
377 __u8 pad[8];
378 struct spare_assign_entry {
379 char guid[DDF_GUID_LEN];
a8173e43 380 be16 secondary_element;
a322f70c
DW
381 __u8 pad[6];
382 } spare_ents[0];
383};
384/* spare_assign.type is a bitmap */
385#define DDF_spare_dedicated 0x1 /* else global */
386#define DDF_spare_revertible 0x2 /* else committable */
387#define DDF_spare_active 0x4 /* else not active */
388#define DDF_spare_affinity 0x8 /* enclosure affinity */
389
390/* The data_section contents - local scope */
391struct disk_data {
60931cf9 392 be32 magic; /* DDF_PHYS_DATA_MAGIC */
393 be32 crc;
a322f70c 394 char guid[DDF_GUID_LEN];
60931cf9 395 be32 refnum; /* crc of some magic drive data ... */
a322f70c
DW
396 __u8 forced_ref; /* set when above was not result of magic */
397 __u8 forced_guid; /* set if guid was forced rather than magic */
398 __u8 vendor[32];
399 __u8 pad[442];
400};
401
402/* bbm_section content */
403struct bad_block_log {
60931cf9 404 be32 magic;
405 be32 crc;
a8173e43 406 be16 entry_count;
60931cf9 407 be32 spare_count;
a322f70c 408 __u8 pad[10];
9d0c6b70 409 be64 first_spare;
a322f70c 410 struct mapped_block {
9d0c6b70 411 be64 defective_start;
60931cf9 412 be32 replacement_start;
a8173e43 413 be16 remap_count;
a322f70c
DW
414 __u8 pad[2];
415 } entries[0];
416};
417
418/* Struct for internally holding ddf structures */
419/* The DDF structure stored on each device is potentially
420 * quite different, as some data is global and some is local.
421 * The global data is:
422 * - ddf header
423 * - controller_data
424 * - Physical disk records
425 * - Virtual disk records
426 * The local data is:
427 * - Configuration records
428 * - Physical Disk data section
429 * ( and Bad block and vendor which I don't care about yet).
430 *
431 * The local data is parsed into separate lists as it is read
432 * and reconstructed for writing. This means that we only need
433 * to make config changes once and they are automatically
434 * propagated to all devices.
435 * Note that the ddf_super has space of the conf and disk data
436 * for this disk and also for a list of all such data.
437 * The list is only used for the superblock that is being
438 * built in Create or Assemble to describe the whole array.
439 */
440struct ddf_super {
6416d527 441 struct ddf_header anchor, primary, secondary;
a322f70c 442 struct ddf_controller_data controller;
6416d527 443 struct ddf_header *active;
a322f70c
DW
444 struct phys_disk *phys;
445 struct virtual_disk *virt;
3921e41a 446 char *conf;
a322f70c 447 int pdsize, vdsize;
f21e18ca 448 unsigned int max_part, mppe, conf_rec_len;
d2ca6449 449 int currentdev;
18a2f463 450 int updates_pending;
a322f70c 451 struct vcl {
6416d527
NB
452 union {
453 char space[512];
454 struct {
455 struct vcl *next;
f21e18ca 456 unsigned int vcnum; /* index into ->virt */
8ec5d685 457 struct vd_config **other_bvds;
6416d527
NB
458 __u64 *block_sizes; /* NULL if all the same */
459 };
460 };
a322f70c 461 struct vd_config conf;
d2ca6449 462 } *conflist, *currentconf;
a322f70c 463 struct dl {
6416d527
NB
464 union {
465 char space[512];
466 struct {
467 struct dl *next;
468 int major, minor;
469 char *devname;
470 int fd;
471 unsigned long long size; /* sectors */
9d0c6b70 472 be64 primary_lba; /* sectors */
473 be64 secondary_lba; /* sectors */
474 be64 workspace_lba; /* sectors */
6416d527
NB
475 int pdnum; /* index in ->phys */
476 struct spare_assign *spare;
8592f29d
N
477 void *mdupdate; /* hold metadata update */
478
479 /* These fields used by auto-layout */
480 int raiddisk; /* slot to fill in autolayout */
481 __u64 esize;
6416d527
NB
482 };
483 };
a322f70c 484 struct disk_data disk;
b2280677 485 struct vcl *vlist[0]; /* max_part in size */
2cc2983d 486 } *dlist, *add_list;
a322f70c
DW
487};
488
489#ifndef offsetof
490#define offsetof(t,f) ((size_t)&(((t*)0)->f))
491#endif
492
7d5a7ff3 493#if DEBUG
fb9d0acb 494static int all_ff(const char *guid);
7d5a7ff3 495static void pr_state(struct ddf_super *ddf, const char *msg)
496{
497 unsigned int i;
498 dprintf("%s/%s: ", __func__, msg);
a8173e43 499 for (i = 0; i < be16_to_cpu(ddf->active->max_vd_entries); i++) {
7d5a7ff3 500 if (all_ff(ddf->virt->entries[i].guid))
501 continue;
502 dprintf("%u(s=%02x i=%02x) ", i,
503 ddf->virt->entries[i].state,
504 ddf->virt->entries[i].init_state);
505 }
506 dprintf("\n");
507}
508#else
509static void pr_state(const struct ddf_super *ddf, const char *msg) {}
510#endif
511
35c3606d 512static void _ddf_set_updates_pending(struct ddf_super *ddf, const char *func)
513{
514 ddf->updates_pending = 1;
60931cf9 515 ddf->active->seq = cpu_to_be32((be32_to_cpu(ddf->active->seq)+1));
35c3606d 516 pr_state(ddf, func);
517}
518
519#define ddf_set_updates_pending(x) _ddf_set_updates_pending((x), __func__)
7d5a7ff3 520
fcc22180 521static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
60931cf9 522 be32 refnum, unsigned int nmax,
fcc22180 523 const struct vd_config **bvd,
524 unsigned int *idx);
525
60931cf9 526static be32 calc_crc(void *buf, int len)
a322f70c
DW
527{
528 /* crcs are always at the same place as in the ddf_header */
529 struct ddf_header *ddf = buf;
60931cf9 530 be32 oldcrc = ddf->crc;
a322f70c 531 __u32 newcrc;
60931cf9 532 ddf->crc = cpu_to_be32(0xffffffff);
a322f70c
DW
533
534 newcrc = crc32(0, buf, len);
535 ddf->crc = oldcrc;
4abe6b70
N
536 /* The crc is store (like everything) bigendian, so convert
537 * here for simplicity
538 */
60931cf9 539 return cpu_to_be32(newcrc);
a322f70c
DW
540}
541
a3163bf0 542#define DDF_INVALID_LEVEL 0xff
543#define DDF_NO_SECONDARY 0xff
544static int err_bad_md_layout(const mdu_array_info_t *array)
545{
546 pr_err("RAID%d layout %x with %d disks is unsupported for DDF\n",
547 array->level, array->layout, array->raid_disks);
2aba583f 548 return -1;
a3163bf0 549}
550
551static int layout_md2ddf(const mdu_array_info_t *array,
552 struct vd_config *conf)
553{
a8173e43 554 be16 prim_elmnt_count = cpu_to_be16(array->raid_disks);
a3163bf0 555 __u8 prl = DDF_INVALID_LEVEL, rlq = 0;
556 __u8 sec_elmnt_count = 1;
557 __u8 srl = DDF_NO_SECONDARY;
558
559 switch (array->level) {
560 case LEVEL_LINEAR:
561 prl = DDF_CONCAT;
562 break;
563 case 0:
564 rlq = DDF_RAID0_SIMPLE;
565 prl = DDF_RAID0;
566 break;
567 case 1:
568 switch (array->raid_disks) {
569 case 2:
570 rlq = DDF_RAID1_SIMPLE;
571 break;
572 case 3:
573 rlq = DDF_RAID1_MULTI;
574 break;
575 default:
576 return err_bad_md_layout(array);
577 }
578 prl = DDF_RAID1;
579 break;
580 case 4:
581 if (array->layout != 0)
582 return err_bad_md_layout(array);
583 rlq = DDF_RAID4_N;
584 prl = DDF_RAID4;
585 break;
586 case 5:
587 switch (array->layout) {
588 case ALGORITHM_LEFT_ASYMMETRIC:
589 rlq = DDF_RAID5_N_RESTART;
590 break;
591 case ALGORITHM_RIGHT_ASYMMETRIC:
592 rlq = DDF_RAID5_0_RESTART;
593 break;
594 case ALGORITHM_LEFT_SYMMETRIC:
595 rlq = DDF_RAID5_N_CONTINUE;
596 break;
597 case ALGORITHM_RIGHT_SYMMETRIC:
598 /* not mentioned in standard */
599 default:
600 return err_bad_md_layout(array);
601 }
602 prl = DDF_RAID5;
603 break;
604 case 6:
605 switch (array->layout) {
606 case ALGORITHM_ROTATING_N_RESTART:
607 rlq = DDF_RAID5_N_RESTART;
608 break;
609 case ALGORITHM_ROTATING_ZERO_RESTART:
610 rlq = DDF_RAID6_0_RESTART;
611 break;
612 case ALGORITHM_ROTATING_N_CONTINUE:
613 rlq = DDF_RAID5_N_CONTINUE;
614 break;
615 default:
616 return err_bad_md_layout(array);
617 }
618 prl = DDF_RAID6;
619 break;
620 case 10:
621 if (array->raid_disks % 2 == 0 && array->layout == 0x102) {
622 rlq = DDF_RAID1_SIMPLE;
a8173e43 623 prim_elmnt_count = cpu_to_be16(2);
a3163bf0 624 sec_elmnt_count = array->raid_disks / 2;
625 } else if (array->raid_disks % 3 == 0
626 && array->layout == 0x103) {
627 rlq = DDF_RAID1_MULTI;
a8173e43 628 prim_elmnt_count = cpu_to_be16(3);
a3163bf0 629 sec_elmnt_count = array->raid_disks / 3;
630 } else
631 return err_bad_md_layout(array);
632 srl = DDF_2SPANNED;
633 prl = DDF_RAID1;
634 break;
635 default:
636 return err_bad_md_layout(array);
637 }
638 conf->prl = prl;
639 conf->prim_elmnt_count = prim_elmnt_count;
640 conf->rlq = rlq;
641 conf->srl = srl;
642 conf->sec_elmnt_count = sec_elmnt_count;
643 return 0;
644}
645
8a2848a7 646static int err_bad_ddf_layout(const struct vd_config *conf)
647{
648 pr_err("DDF RAID %u qualifier %u with %u disks is unsupported\n",
a8173e43 649 conf->prl, conf->rlq, be16_to_cpu(conf->prim_elmnt_count));
8a2848a7 650 return -1;
651}
652
653static int layout_ddf2md(const struct vd_config *conf,
654 mdu_array_info_t *array)
655{
656 int level = LEVEL_UNSUPPORTED;
657 int layout = 0;
a8173e43 658 int raiddisks = be16_to_cpu(conf->prim_elmnt_count);
8a2848a7 659
660 if (conf->sec_elmnt_count > 1) {
661 /* see also check_secondary() */
662 if (conf->prl != DDF_RAID1 ||
663 (conf->srl != DDF_2STRIPED && conf->srl != DDF_2SPANNED)) {
664 pr_err("Unsupported secondary RAID level %u/%u\n",
665 conf->prl, conf->srl);
666 return -1;
667 }
668 if (raiddisks == 2 && conf->rlq == DDF_RAID1_SIMPLE)
669 layout = 0x102;
670 else if (raiddisks == 3 && conf->rlq == DDF_RAID1_MULTI)
671 layout = 0x103;
672 else
673 return err_bad_ddf_layout(conf);
674 raiddisks *= conf->sec_elmnt_count;
675 level = 10;
676 goto good;
677 }
678
679 switch (conf->prl) {
680 case DDF_CONCAT:
681 level = LEVEL_LINEAR;
682 break;
683 case DDF_RAID0:
684 if (conf->rlq != DDF_RAID0_SIMPLE)
685 return err_bad_ddf_layout(conf);
686 level = 0;
687 break;
688 case DDF_RAID1:
689 if (!((conf->rlq == DDF_RAID1_SIMPLE && raiddisks == 2) ||
690 (conf->rlq == DDF_RAID1_MULTI && raiddisks == 3)))
691 return err_bad_ddf_layout(conf);
692 level = 1;
693 break;
694 case DDF_RAID4:
695 if (conf->rlq != DDF_RAID4_N)
696 return err_bad_ddf_layout(conf);
697 level = 4;
698 break;
699 case DDF_RAID5:
700 switch (conf->rlq) {
701 case DDF_RAID5_N_RESTART:
702 layout = ALGORITHM_LEFT_ASYMMETRIC;
703 break;
704 case DDF_RAID5_0_RESTART:
705 layout = ALGORITHM_RIGHT_ASYMMETRIC;
706 break;
707 case DDF_RAID5_N_CONTINUE:
708 layout = ALGORITHM_LEFT_SYMMETRIC;
709 break;
710 default:
711 return err_bad_ddf_layout(conf);
712 }
713 level = 5;
714 break;
715 case DDF_RAID6:
716 switch (conf->rlq) {
717 case DDF_RAID5_N_RESTART:
718 layout = ALGORITHM_ROTATING_N_RESTART;
719 break;
720 case DDF_RAID6_0_RESTART:
721 layout = ALGORITHM_ROTATING_ZERO_RESTART;
722 break;
723 case DDF_RAID5_N_CONTINUE:
724 layout = ALGORITHM_ROTATING_N_CONTINUE;
725 break;
726 default:
727 return err_bad_ddf_layout(conf);
728 }
729 level = 6;
730 break;
731 default:
732 return err_bad_ddf_layout(conf);
733 };
734
735good:
736 array->level = level;
737 array->layout = layout;
738 array->raid_disks = raiddisks;
739 return 0;
740}
741
a322f70c
DW
742static int load_ddf_header(int fd, unsigned long long lba,
743 unsigned long long size,
744 int type,
745 struct ddf_header *hdr, struct ddf_header *anchor)
746{
747 /* read a ddf header (primary or secondary) from fd/lba
748 * and check that it is consistent with anchor
749 * Need to check:
750 * magic, crc, guid, rev, and LBA's header_type, and
751 * everything after header_type must be the same
752 */
753 if (lba >= size-1)
754 return 0;
755
756 if (lseek64(fd, lba<<9, 0) < 0)
757 return 0;
758
759 if (read(fd, hdr, 512) != 512)
760 return 0;
761
0e5fa862
MW
762 if (!be32_eq(hdr->magic, DDF_HEADER_MAGIC)) {
763 pr_err("%s: bad header magic\n", __func__);
a322f70c 764 return 0;
0e5fa862
MW
765 }
766 if (!be32_eq(calc_crc(hdr, 512), hdr->crc)) {
767 pr_err("%s: bad CRC\n", __func__);
a322f70c 768 return 0;
0e5fa862 769 }
a322f70c
DW
770 if (memcmp(anchor->guid, hdr->guid, DDF_GUID_LEN) != 0 ||
771 memcmp(anchor->revision, hdr->revision, 8) != 0 ||
9d0c6b70 772 !be64_eq(anchor->primary_lba, hdr->primary_lba) ||
773 !be64_eq(anchor->secondary_lba, hdr->secondary_lba) ||
a322f70c
DW
774 hdr->type != type ||
775 memcmp(anchor->pad2, hdr->pad2, 512 -
0e5fa862
MW
776 offsetof(struct ddf_header, pad2)) != 0) {
777 pr_err("%s: header mismatch\n", __func__);
a322f70c 778 return 0;
0e5fa862 779 }
a322f70c
DW
780
781 /* Looks good enough to me... */
782 return 1;
783}
784
785static void *load_section(int fd, struct ddf_super *super, void *buf,
60931cf9 786 be32 offset_be, be32 len_be, int check)
a322f70c 787{
60931cf9 788 unsigned long long offset = be32_to_cpu(offset_be);
789 unsigned long long len = be32_to_cpu(len_be);
a322f70c
DW
790 int dofree = (buf == NULL);
791
792 if (check)
793 if (len != 2 && len != 8 && len != 32
794 && len != 128 && len != 512)
795 return NULL;
796
797 if (len > 1024)
798 return NULL;
3921e41a 799 if (!buf && posix_memalign(&buf, 512, len<<9) != 0)
3d2c4fc7 800 buf = NULL;
6416d527 801
a322f70c
DW
802 if (!buf)
803 return NULL;
804
805 if (super->active->type == 1)
9d0c6b70 806 offset += be64_to_cpu(super->active->primary_lba);
a322f70c 807 else
9d0c6b70 808 offset += be64_to_cpu(super->active->secondary_lba);
a322f70c 809
f21e18ca 810 if ((unsigned long long)lseek64(fd, offset<<9, 0) != (offset<<9)) {
a322f70c
DW
811 if (dofree)
812 free(buf);
813 return NULL;
814 }
f21e18ca 815 if ((unsigned long long)read(fd, buf, len<<9) != (len<<9)) {
a322f70c
DW
816 if (dofree)
817 free(buf);
818 return NULL;
819 }
820 return buf;
821}
822
823static int load_ddf_headers(int fd, struct ddf_super *super, char *devname)
824{
825 unsigned long long dsize;
826
827 get_dev_size(fd, NULL, &dsize);
828
829 if (lseek64(fd, dsize-512, 0) < 0) {
830 if (devname)
e7b84f9d
N
831 pr_err("Cannot seek to anchor block on %s: %s\n",
832 devname, strerror(errno));
a322f70c
DW
833 return 1;
834 }
835 if (read(fd, &super->anchor, 512) != 512) {
836 if (devname)
e7b84f9d
N
837 pr_err("Cannot read anchor block on %s: %s\n",
838 devname, strerror(errno));
a322f70c
DW
839 return 1;
840 }
60931cf9 841 if (!be32_eq(super->anchor.magic, DDF_HEADER_MAGIC)) {
a322f70c 842 if (devname)
e7b84f9d 843 pr_err("no DDF anchor found on %s\n",
a322f70c
DW
844 devname);
845 return 2;
846 }
60931cf9 847 if (!be32_eq(calc_crc(&super->anchor, 512), super->anchor.crc)) {
a322f70c 848 if (devname)
e7b84f9d 849 pr_err("bad CRC on anchor on %s\n",
a322f70c
DW
850 devname);
851 return 2;
852 }
59e36268
NB
853 if (memcmp(super->anchor.revision, DDF_REVISION_0, 8) != 0 &&
854 memcmp(super->anchor.revision, DDF_REVISION_2, 8) != 0) {
a322f70c 855 if (devname)
e7b84f9d 856 pr_err("can only support super revision"
59e36268
NB
857 " %.8s and earlier, not %.8s on %s\n",
858 DDF_REVISION_2, super->anchor.revision,devname);
a322f70c
DW
859 return 2;
860 }
dbeb699a 861 super->active = NULL;
9d0c6b70 862 if (load_ddf_header(fd, be64_to_cpu(super->anchor.primary_lba),
a322f70c
DW
863 dsize >> 9, 1,
864 &super->primary, &super->anchor) == 0) {
865 if (devname)
e7b84f9d
N
866 pr_err("Failed to load primary DDF header "
867 "on %s\n", devname);
dbeb699a 868 } else
869 super->active = &super->primary;
60931cf9 870
9d0c6b70 871 if (load_ddf_header(fd, be64_to_cpu(super->anchor.secondary_lba),
a322f70c
DW
872 dsize >> 9, 2,
873 &super->secondary, &super->anchor)) {
3eff7c1d 874 if (super->active == NULL
60931cf9 875 || (be32_to_cpu(super->primary.seq)
876 < be32_to_cpu(super->secondary.seq) &&
3eff7c1d 877 !super->secondary.openflag)
60931cf9 878 || (be32_to_cpu(super->primary.seq)
879 == be32_to_cpu(super->secondary.seq) &&
a322f70c
DW
880 super->primary.openflag && !super->secondary.openflag)
881 )
882 super->active = &super->secondary;
b95cb4b9
N
883 } else if (devname &&
884 be64_to_cpu(super->anchor.secondary_lba) != ~(__u64)0)
dbeb699a 885 pr_err("Failed to load secondary DDF header on %s\n",
886 devname);
887 if (super->active == NULL)
888 return 2;
a322f70c
DW
889 return 0;
890}
891
892static int load_ddf_global(int fd, struct ddf_super *super, char *devname)
893{
894 void *ok;
895 ok = load_section(fd, super, &super->controller,
896 super->active->controller_section_offset,
897 super->active->controller_section_length,
898 0);
899 super->phys = load_section(fd, super, NULL,
900 super->active->phys_section_offset,
901 super->active->phys_section_length,
902 1);
60931cf9 903 super->pdsize = be32_to_cpu(super->active->phys_section_length) * 512;
a322f70c
DW
904
905 super->virt = load_section(fd, super, NULL,
906 super->active->virt_section_offset,
907 super->active->virt_section_length,
908 1);
60931cf9 909 super->vdsize = be32_to_cpu(super->active->virt_section_length) * 512;
a322f70c
DW
910 if (!ok ||
911 !super->phys ||
912 !super->virt) {
913 free(super->phys);
914 free(super->virt);
a2349791
NB
915 super->phys = NULL;
916 super->virt = NULL;
a322f70c
DW
917 return 2;
918 }
919 super->conflist = NULL;
920 super->dlist = NULL;
8c3b8c2c 921
a8173e43 922 super->max_part = be16_to_cpu(super->active->max_partitions);
923 super->mppe = be16_to_cpu(super->active->max_primary_element_entries);
924 super->conf_rec_len = be16_to_cpu(super->active->config_record_len);
a322f70c
DW
925 return 0;
926}
927
3c48f7be 928#define DDF_UNUSED_BVD 0xff
929static int alloc_other_bvds(const struct ddf_super *ddf, struct vcl *vcl)
930{
931 unsigned int n_vds = vcl->conf.sec_elmnt_count - 1;
932 unsigned int i, vdsize;
933 void *p;
934 if (n_vds == 0) {
935 vcl->other_bvds = NULL;
936 return 0;
937 }
938 vdsize = ddf->conf_rec_len * 512;
939 if (posix_memalign(&p, 512, n_vds *
940 (vdsize + sizeof(struct vd_config *))) != 0)
941 return -1;
942 vcl->other_bvds = (struct vd_config **) (p + n_vds * vdsize);
943 for (i = 0; i < n_vds; i++) {
944 vcl->other_bvds[i] = p + i * vdsize;
945 memset(vcl->other_bvds[i], 0, vdsize);
946 vcl->other_bvds[i]->sec_elmnt_seq = DDF_UNUSED_BVD;
947 }
948 return 0;
949}
950
3dc821b0 951static void add_other_bvd(struct vcl *vcl, struct vd_config *vd,
952 unsigned int len)
953{
954 int i;
955 for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
3c48f7be 956 if (vcl->other_bvds[i]->sec_elmnt_seq == vd->sec_elmnt_seq)
3dc821b0 957 break;
958
959 if (i < vcl->conf.sec_elmnt_count-1) {
60931cf9 960 if (be32_to_cpu(vd->seqnum) <=
961 be32_to_cpu(vcl->other_bvds[i]->seqnum))
3dc821b0 962 return;
963 } else {
964 for (i = 0; i < vcl->conf.sec_elmnt_count-1; i++)
3c48f7be 965 if (vcl->other_bvds[i]->sec_elmnt_seq == DDF_UNUSED_BVD)
3dc821b0 966 break;
967 if (i == vcl->conf.sec_elmnt_count-1) {
968 pr_err("no space for sec level config %u, count is %u\n",
969 vd->sec_elmnt_seq, vcl->conf.sec_elmnt_count);
970 return;
971 }
3dc821b0 972 }
973 memcpy(vcl->other_bvds[i], vd, len);
974}
975
a322f70c
DW
976static int load_ddf_local(int fd, struct ddf_super *super,
977 char *devname, int keep)
978{
979 struct dl *dl;
980 struct stat stb;
981 char *conf;
f21e18ca
N
982 unsigned int i;
983 unsigned int confsec;
b2280677 984 int vnum;
a8173e43 985 unsigned int max_virt_disks = be16_to_cpu
986 (super->active->max_vd_entries);
d2ca6449 987 unsigned long long dsize;
a322f70c
DW
988
989 /* First the local disk info */
3d2c4fc7 990 if (posix_memalign((void**)&dl, 512,
6416d527 991 sizeof(*dl) +
3d2c4fc7 992 (super->max_part) * sizeof(dl->vlist[0])) != 0) {
e7b84f9d 993 pr_err("%s could not allocate disk info buffer\n",
3d2c4fc7
DW
994 __func__);
995 return 1;
996 }
a322f70c
DW
997
998 load_section(fd, super, &dl->disk,
999 super->active->data_section_offset,
1000 super->active->data_section_length,
1001 0);
503975b9 1002 dl->devname = devname ? xstrdup(devname) : NULL;
598f0d58 1003
a322f70c
DW
1004 fstat(fd, &stb);
1005 dl->major = major(stb.st_rdev);
1006 dl->minor = minor(stb.st_rdev);
1007 dl->next = super->dlist;
1008 dl->fd = keep ? fd : -1;
d2ca6449
NB
1009
1010 dl->size = 0;
1011 if (get_dev_size(fd, devname, &dsize))
1012 dl->size = dsize >> 9;
097bcf00 1013 /* If the disks have different sizes, the LBAs will differ
1014 * between phys disks.
1015 * At this point here, the values in super->active must be valid
1016 * for this phys disk. */
1017 dl->primary_lba = super->active->primary_lba;
1018 dl->secondary_lba = super->active->secondary_lba;
1019 dl->workspace_lba = super->active->workspace_lba;
b2280677 1020 dl->spare = NULL;
f21e18ca 1021 for (i = 0 ; i < super->max_part ; i++)
a322f70c
DW
1022 dl->vlist[i] = NULL;
1023 super->dlist = dl;
59e36268 1024 dl->pdnum = -1;
a8173e43 1025 for (i = 0; i < be16_to_cpu(super->active->max_pd_entries); i++)
5575e7d9
NB
1026 if (memcmp(super->phys->entries[i].guid,
1027 dl->disk.guid, DDF_GUID_LEN) == 0)
1028 dl->pdnum = i;
1029
a322f70c
DW
1030 /* Now the config list. */
1031 /* 'conf' is an array of config entries, some of which are
1032 * probably invalid. Those which are good need to be copied into
1033 * the conflist
1034 */
a322f70c 1035
3921e41a 1036 conf = load_section(fd, super, super->conf,
a322f70c
DW
1037 super->active->config_section_offset,
1038 super->active->config_section_length,
1039 0);
3921e41a 1040 super->conf = conf;
b2280677 1041 vnum = 0;
e223334f 1042 for (confsec = 0;
60931cf9 1043 confsec < be32_to_cpu(super->active->config_section_length);
e223334f 1044 confsec += super->conf_rec_len) {
a322f70c 1045 struct vd_config *vd =
e223334f 1046 (struct vd_config *)((char*)conf + confsec*512);
a322f70c
DW
1047 struct vcl *vcl;
1048
60931cf9 1049 if (be32_eq(vd->magic, DDF_SPARE_ASSIGN_MAGIC)) {
b2280677
NB
1050 if (dl->spare)
1051 continue;
3d2c4fc7
DW
1052 if (posix_memalign((void**)&dl->spare, 512,
1053 super->conf_rec_len*512) != 0) {
e7b84f9d
N
1054 pr_err("%s could not allocate spare info buf\n",
1055 __func__);
3d2c4fc7
DW
1056 return 1;
1057 }
613b0d17 1058
b2280677
NB
1059 memcpy(dl->spare, vd, super->conf_rec_len*512);
1060 continue;
1061 }
60931cf9 1062 if (!be32_eq(vd->magic, DDF_VD_CONF_MAGIC))
a322f70c
DW
1063 continue;
1064 for (vcl = super->conflist; vcl; vcl = vcl->next) {
1065 if (memcmp(vcl->conf.guid,
1066 vd->guid, DDF_GUID_LEN) == 0)
1067 break;
1068 }
1069
1070 if (vcl) {
b2280677 1071 dl->vlist[vnum++] = vcl;
3dc821b0 1072 if (vcl->other_bvds != NULL &&
1073 vcl->conf.sec_elmnt_seq != vd->sec_elmnt_seq) {
1074 add_other_bvd(vcl, vd, super->conf_rec_len*512);
1075 continue;
1076 }
60931cf9 1077 if (be32_to_cpu(vd->seqnum) <=
1078 be32_to_cpu(vcl->conf.seqnum))
a322f70c 1079 continue;
59e36268 1080 } else {
3d2c4fc7 1081 if (posix_memalign((void**)&vcl, 512,
6416d527 1082 (super->conf_rec_len*512 +
3d2c4fc7 1083 offsetof(struct vcl, conf))) != 0) {
e7b84f9d
N
1084 pr_err("%s could not allocate vcl buf\n",
1085 __func__);
3d2c4fc7
DW
1086 return 1;
1087 }
a322f70c 1088 vcl->next = super->conflist;
59e36268 1089 vcl->block_sizes = NULL; /* FIXME not for CONCAT */
3c48f7be 1090 vcl->conf.sec_elmnt_count = vd->sec_elmnt_count;
1091 if (alloc_other_bvds(super, vcl) != 0) {
1092 pr_err("%s could not allocate other bvds\n",
1093 __func__);
1094 free(vcl);
1095 return 1;
1096 };
a322f70c 1097 super->conflist = vcl;
b2280677 1098 dl->vlist[vnum++] = vcl;
a322f70c 1099 }
8c3b8c2c 1100 memcpy(&vcl->conf, vd, super->conf_rec_len*512);
59e36268
NB
1101 for (i=0; i < max_virt_disks ; i++)
1102 if (memcmp(super->virt->entries[i].guid,
1103 vcl->conf.guid, DDF_GUID_LEN)==0)
1104 break;
1105 if (i < max_virt_disks)
1106 vcl->vcnum = i;
a322f70c 1107 }
a322f70c
DW
1108
1109 return 0;
1110}
1111
1112#ifndef MDASSEMBLE
1113static int load_super_ddf_all(struct supertype *st, int fd,
e1902a7b 1114 void **sbp, char *devname);
a322f70c 1115#endif
37424f13
DW
1116
1117static void free_super_ddf(struct supertype *st);
1118
a322f70c
DW
1119static int load_super_ddf(struct supertype *st, int fd,
1120 char *devname)
1121{
1122 unsigned long long dsize;
1123 struct ddf_super *super;
1124 int rv;
1125
a322f70c
DW
1126 if (get_dev_size(fd, devname, &dsize) == 0)
1127 return 1;
1128
a34fea0e 1129 if (test_partition(fd))
691c6ee1
N
1130 /* DDF is not allowed on partitions */
1131 return 1;
1132
a322f70c
DW
1133 /* 32M is a lower bound */
1134 if (dsize <= 32*1024*1024) {
97320d7c 1135 if (devname)
e7b84f9d
N
1136 pr_err("%s is too small for ddf: "
1137 "size is %llu sectors.\n",
1138 devname, dsize>>9);
97320d7c 1139 return 1;
a322f70c
DW
1140 }
1141 if (dsize & 511) {
97320d7c 1142 if (devname)
e7b84f9d
N
1143 pr_err("%s is an odd size for ddf: "
1144 "size is %llu bytes.\n",
1145 devname, dsize);
97320d7c 1146 return 1;
a322f70c
DW
1147 }
1148
37424f13
DW
1149 free_super_ddf(st);
1150
6416d527 1151 if (posix_memalign((void**)&super, 512, sizeof(*super))!= 0) {
e7b84f9d 1152 pr_err("malloc of %zu failed.\n",
a322f70c
DW
1153 sizeof(*super));
1154 return 1;
1155 }
a2349791 1156 memset(super, 0, sizeof(*super));
a322f70c
DW
1157
1158 rv = load_ddf_headers(fd, super, devname);
1159 if (rv) {
1160 free(super);
1161 return rv;
1162 }
1163
1164 /* Have valid headers and have chosen the best. Let's read in the rest*/
1165
1166 rv = load_ddf_global(fd, super, devname);
1167
1168 if (rv) {
1169 if (devname)
e7b84f9d
N
1170 pr_err("Failed to load all information "
1171 "sections on %s\n", devname);
a322f70c
DW
1172 free(super);
1173 return rv;
1174 }
1175
3d2c4fc7
DW
1176 rv = load_ddf_local(fd, super, devname, 0);
1177
1178 if (rv) {
1179 if (devname)
e7b84f9d
N
1180 pr_err("Failed to load all information "
1181 "sections on %s\n", devname);
3d2c4fc7
DW
1182 free(super);
1183 return rv;
1184 }
a322f70c
DW
1185
1186 /* Should possibly check the sections .... */
1187
1188 st->sb = super;
1189 if (st->ss == NULL) {
1190 st->ss = &super_ddf;
1191 st->minor_version = 0;
1192 st->max_devs = 512;
1193 }
1194 return 0;
1195
1196}
1197
1198static void free_super_ddf(struct supertype *st)
1199{
1200 struct ddf_super *ddf = st->sb;
1201 if (ddf == NULL)
1202 return;
1203 free(ddf->phys);
1204 free(ddf->virt);
3921e41a 1205 free(ddf->conf);
a322f70c
DW
1206 while (ddf->conflist) {
1207 struct vcl *v = ddf->conflist;
1208 ddf->conflist = v->next;
59e36268
NB
1209 if (v->block_sizes)
1210 free(v->block_sizes);
3c48f7be 1211 if (v->other_bvds)
1212 /*
1213 v->other_bvds[0] points to beginning of buffer,
1214 see alloc_other_bvds()
1215 */
1216 free(v->other_bvds[0]);
a322f70c
DW
1217 free(v);
1218 }
1219 while (ddf->dlist) {
1220 struct dl *d = ddf->dlist;
1221 ddf->dlist = d->next;
1222 if (d->fd >= 0)
1223 close(d->fd);
b2280677
NB
1224 if (d->spare)
1225 free(d->spare);
a322f70c
DW
1226 free(d);
1227 }
8a38cb04
N
1228 while (ddf->add_list) {
1229 struct dl *d = ddf->add_list;
1230 ddf->add_list = d->next;
1231 if (d->fd >= 0)
1232 close(d->fd);
1233 if (d->spare)
1234 free(d->spare);
1235 free(d);
1236 }
a322f70c
DW
1237 free(ddf);
1238 st->sb = NULL;
1239}
1240
1241static struct supertype *match_metadata_desc_ddf(char *arg)
1242{
1243 /* 'ddf' only support containers */
1244 struct supertype *st;
1245 if (strcmp(arg, "ddf") != 0 &&
1246 strcmp(arg, "default") != 0
1247 )
1248 return NULL;
1249
503975b9 1250 st = xcalloc(1, sizeof(*st));
a322f70c
DW
1251 st->ss = &super_ddf;
1252 st->max_devs = 512;
1253 st->minor_version = 0;
1254 st->sb = NULL;
1255 return st;
1256}
1257
a322f70c
DW
1258#ifndef MDASSEMBLE
1259
1260static mapping_t ddf_state[] = {
1261 { "Optimal", 0},
1262 { "Degraded", 1},
1263 { "Deleted", 2},
1264 { "Missing", 3},
1265 { "Failed", 4},
1266 { "Partially Optimal", 5},
1267 { "-reserved-", 6},
1268 { "-reserved-", 7},
1269 { NULL, 0}
1270};
1271
1272static mapping_t ddf_init_state[] = {
1273 { "Not Initialised", 0},
1274 { "QuickInit in Progress", 1},
1275 { "Fully Initialised", 2},
1276 { "*UNKNOWN*", 3},
1277 { NULL, 0}
1278};
1279static mapping_t ddf_access[] = {
1280 { "Read/Write", 0},
1281 { "Reserved", 1},
1282 { "Read Only", 2},
1283 { "Blocked (no access)", 3},
1284 { NULL ,0}
1285};
1286
1287static mapping_t ddf_level[] = {
1288 { "RAID0", DDF_RAID0},
1289 { "RAID1", DDF_RAID1},
1290 { "RAID3", DDF_RAID3},
1291 { "RAID4", DDF_RAID4},
1292 { "RAID5", DDF_RAID5},
1293 { "RAID1E",DDF_RAID1E},
1294 { "JBOD", DDF_JBOD},
1295 { "CONCAT",DDF_CONCAT},
1296 { "RAID5E",DDF_RAID5E},
1297 { "RAID5EE",DDF_RAID5EE},
1298 { "RAID6", DDF_RAID6},
1299 { NULL, 0}
1300};
1301static mapping_t ddf_sec_level[] = {
1302 { "Striped", DDF_2STRIPED},
1303 { "Mirrored", DDF_2MIRRORED},
1304 { "Concat", DDF_2CONCAT},
1305 { "Spanned", DDF_2SPANNED},
1306 { NULL, 0}
1307};
1308#endif
1309
fb9d0acb 1310static int all_ff(const char *guid)
42dc2744
N
1311{
1312 int i;
1313 for (i = 0; i < DDF_GUID_LEN; i++)
1314 if (guid[i] != (char)0xff)
1315 return 0;
1316 return 1;
1317}
1318
4441541f
N
1319static const char *guid_str(const char *guid)
1320{
1321 static char buf[DDF_GUID_LEN*2+1];
1322 int i;
1323 char *p = buf;
1324 for (i = 0; i < DDF_GUID_LEN; i++) {
1325 unsigned char c = guid[i];
1326 if (c >= 32 && c < 127)
1327 p += sprintf(p, "%c", c);
1328 else
1329 p += sprintf(p, "%02x", c);
1330 }
1331 *p = '\0';
1332 return (const char *) buf;
1333}
1334
a322f70c
DW
1335#ifndef MDASSEMBLE
1336static void print_guid(char *guid, int tstamp)
1337{
1338 /* A GUIDs are part (or all) ASCII and part binary.
1339 * They tend to be space padded.
59e36268
NB
1340 * We print the GUID in HEX, then in parentheses add
1341 * any initial ASCII sequence, and a possible
1342 * time stamp from bytes 16-19
a322f70c
DW
1343 */
1344 int l = DDF_GUID_LEN;
1345 int i;
59e36268
NB
1346
1347 for (i=0 ; i<DDF_GUID_LEN ; i++) {
1348 if ((i&3)==0 && i != 0) printf(":");
1349 printf("%02X", guid[i]&255);
1350 }
1351
cfccea8c 1352 printf("\n (");
a322f70c
DW
1353 while (l && guid[l-1] == ' ')
1354 l--;
1355 for (i=0 ; i<l ; i++) {
1356 if (guid[i] >= 0x20 && guid[i] < 0x7f)
1357 fputc(guid[i], stdout);
1358 else
59e36268 1359 break;
a322f70c
DW
1360 }
1361 if (tstamp) {
1362 time_t then = __be32_to_cpu(*(__u32*)(guid+16)) + DECADE;
1363 char tbuf[100];
1364 struct tm *tm;
1365 tm = localtime(&then);
59e36268 1366 strftime(tbuf, 100, " %D %T",tm);
a322f70c
DW
1367 fputs(tbuf, stdout);
1368 }
59e36268 1369 printf(")");
a322f70c
DW
1370}
1371
1372static void examine_vd(int n, struct ddf_super *sb, char *guid)
1373{
8c3b8c2c 1374 int crl = sb->conf_rec_len;
a322f70c
DW
1375 struct vcl *vcl;
1376
1377 for (vcl = sb->conflist ; vcl ; vcl = vcl->next) {
f21e18ca 1378 unsigned int i;
a322f70c
DW
1379 struct vd_config *vc = &vcl->conf;
1380
60931cf9 1381 if (!be32_eq(calc_crc(vc, crl*512), vc->crc))
a322f70c
DW
1382 continue;
1383 if (memcmp(vc->guid, guid, DDF_GUID_LEN) != 0)
1384 continue;
1385
1386 /* Ok, we know about this VD, let's give more details */
b06e3095 1387 printf(" Raid Devices[%d] : %d (", n,
a8173e43 1388 be16_to_cpu(vc->prim_elmnt_count));
1389 for (i = 0; i < be16_to_cpu(vc->prim_elmnt_count); i++) {
b06e3095 1390 int j;
a8173e43 1391 int cnt = be16_to_cpu(sb->phys->used_pdes);
b06e3095 1392 for (j=0; j<cnt; j++)
60931cf9 1393 if (be32_eq(vc->phys_refnum[i],
1394 sb->phys->entries[j].refnum))
b06e3095
N
1395 break;
1396 if (i) printf(" ");
1397 if (j < cnt)
1398 printf("%d", j);
1399 else
1400 printf("--");
1401 }
1402 printf(")\n");
1403 if (vc->chunk_shift != 255)
613b0d17
N
1404 printf(" Chunk Size[%d] : %d sectors\n", n,
1405 1 << vc->chunk_shift);
a322f70c
DW
1406 printf(" Raid Level[%d] : %s\n", n,
1407 map_num(ddf_level, vc->prl)?:"-unknown-");
1408 if (vc->sec_elmnt_count != 1) {
1409 printf(" Secondary Position[%d] : %d of %d\n", n,
1410 vc->sec_elmnt_seq, vc->sec_elmnt_count);
1411 printf(" Secondary Level[%d] : %s\n", n,
1412 map_num(ddf_sec_level, vc->srl) ?: "-unknown-");
1413 }
1414 printf(" Device Size[%d] : %llu\n", n,
9d0c6b70 1415 be64_to_cpu(vc->blocks)/2);
a322f70c 1416 printf(" Array Size[%d] : %llu\n", n,
9d0c6b70 1417 be64_to_cpu(vc->array_blocks)/2);
a322f70c
DW
1418 }
1419}
1420
1421static void examine_vds(struct ddf_super *sb)
1422{
a8173e43 1423 int cnt = be16_to_cpu(sb->virt->populated_vdes);
fb9d0acb 1424 unsigned int i;
a322f70c
DW
1425 printf(" Virtual Disks : %d\n", cnt);
1426
a8173e43 1427 for (i = 0; i < be16_to_cpu(sb->virt->max_vdes); i++) {
a322f70c 1428 struct virtual_entry *ve = &sb->virt->entries[i];
fb9d0acb 1429 if (all_ff(ve->guid))
1430 continue;
b06e3095 1431 printf("\n");
a322f70c
DW
1432 printf(" VD GUID[%d] : ", i); print_guid(ve->guid, 1);
1433 printf("\n");
a8173e43 1434 printf(" unit[%d] : %d\n", i, be16_to_cpu(ve->unit));
a322f70c
DW
1435 printf(" state[%d] : %s, %s%s\n", i,
1436 map_num(ddf_state, ve->state & 7),
cc83a819
N
1437 (ve->state & DDF_state_morphing) ? "Morphing, ": "",
1438 (ve->state & DDF_state_inconsistent)? "Not Consistent" : "Consistent");
a322f70c 1439 printf(" init state[%d] : %s\n", i,
cc83a819 1440 map_num(ddf_init_state, ve->init_state&DDF_initstate_mask));
a322f70c 1441 printf(" access[%d] : %s\n", i,
cc83a819 1442 map_num(ddf_access, (ve->init_state & DDF_access_mask) >> 6));
a322f70c
DW
1443 printf(" Name[%d] : %.16s\n", i, ve->name);
1444 examine_vd(i, sb, ve->guid);
1445 }
1446 if (cnt) printf("\n");
1447}
1448
1449static void examine_pds(struct ddf_super *sb)
1450{
a8173e43 1451 int cnt = be16_to_cpu(sb->phys->used_pdes);
a322f70c
DW
1452 int i;
1453 struct dl *dl;
1454 printf(" Physical Disks : %d\n", cnt);
962371a5 1455 printf(" Number RefNo Size Device Type/State\n");
a322f70c
DW
1456
1457 for (i=0 ; i<cnt ; i++) {
1458 struct phys_disk_entry *pd = &sb->phys->entries[i];
a8173e43 1459 int type = be16_to_cpu(pd->type);
1460 int state = be16_to_cpu(pd->state);
a322f70c 1461
b06e3095
N
1462 //printf(" PD GUID[%d] : ", i); print_guid(pd->guid, 0);
1463 //printf("\n");
1464 printf(" %3d %08x ", i,
60931cf9 1465 be32_to_cpu(pd->refnum));
613b0d17 1466 printf("%8lluK ",
9d0c6b70 1467 be64_to_cpu(pd->config_size)>>1);
b06e3095 1468 for (dl = sb->dlist; dl ; dl = dl->next) {
60931cf9 1469 if (be32_eq(dl->disk.refnum, pd->refnum)) {
b06e3095
N
1470 char *dv = map_dev(dl->major, dl->minor, 0);
1471 if (dv) {
962371a5 1472 printf("%-15s", dv);
b06e3095
N
1473 break;
1474 }
1475 }
1476 }
1477 if (!dl)
962371a5 1478 printf("%15s","");
b06e3095 1479 printf(" %s%s%s%s%s",
a322f70c 1480 (type&2) ? "active":"",
b06e3095 1481 (type&4) ? "Global-Spare":"",
a322f70c
DW
1482 (type&8) ? "spare" : "",
1483 (type&16)? ", foreign" : "",
1484 (type&32)? "pass-through" : "");
18cb4496
N
1485 if (state & DDF_Failed)
1486 /* This over-rides these three */
1487 state &= ~(DDF_Online|DDF_Rebuilding|DDF_Transition);
b06e3095 1488 printf("/%s%s%s%s%s%s%s",
a322f70c
DW
1489 (state&1)? "Online": "Offline",
1490 (state&2)? ", Failed": "",
1491 (state&4)? ", Rebuilding": "",
1492 (state&8)? ", in-transition": "",
b06e3095
N
1493 (state&16)? ", SMART-errors": "",
1494 (state&32)? ", Unrecovered-Read-Errors": "",
a322f70c 1495 (state&64)? ", Missing" : "");
a322f70c
DW
1496 printf("\n");
1497 }
1498}
1499
1500static void examine_super_ddf(struct supertype *st, char *homehost)
1501{
1502 struct ddf_super *sb = st->sb;
1503
60931cf9 1504 printf(" Magic : %08x\n", be32_to_cpu(sb->anchor.magic));
a322f70c 1505 printf(" Version : %.8s\n", sb->anchor.revision);
598f0d58
NB
1506 printf("Controller GUID : "); print_guid(sb->controller.guid, 0);
1507 printf("\n");
1508 printf(" Container GUID : "); print_guid(sb->anchor.guid, 1);
a322f70c 1509 printf("\n");
60931cf9 1510 printf(" Seq : %08x\n", be32_to_cpu(sb->active->seq));
1511 printf(" Redundant hdr : %s\n", be32_eq(sb->secondary.magic,
1512 DDF_HEADER_MAGIC)
a322f70c
DW
1513 ?"yes" : "no");
1514 examine_vds(sb);
1515 examine_pds(sb);
1516}
1517
a5d85af7 1518static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map);
ff54de6e 1519
bedbf68a 1520static void uuid_from_ddf_guid(const char *guid, int uuid[4]);
42dc2744 1521static void uuid_from_super_ddf(struct supertype *st, int uuid[4]);
a8b25633 1522static void _ddf_array_name(char *name, const struct ddf_super *ddf, int i);
ff54de6e 1523
bedbf68a 1524static unsigned int get_vd_num_of_subarray(struct supertype *st)
1525{
1526 /*
1527 * Figure out the VD number for this supertype.
1528 * Returns DDF_CONTAINER for the container itself,
1529 * and DDF_NOTFOUND on error.
1530 */
1531 struct ddf_super *ddf = st->sb;
1532 struct mdinfo *sra;
1533 char *sub, *end;
1534 unsigned int vcnum;
1535
1536 if (*st->container_devnm == '\0')
1537 return DDF_CONTAINER;
1538
1539 sra = sysfs_read(-1, st->devnm, GET_VERSION);
1540 if (!sra || sra->array.major_version != -1 ||
1541 sra->array.minor_version != -2 ||
1542 !is_subarray(sra->text_version))
1543 return DDF_NOTFOUND;
1544
1545 sub = strchr(sra->text_version + 1, '/');
1546 if (sub != NULL)
1547 vcnum = strtoul(sub + 1, &end, 10);
1548 if (sub == NULL || *sub == '\0' || *end != '\0' ||
a8173e43 1549 vcnum >= be16_to_cpu(ddf->active->max_vd_entries))
bedbf68a 1550 return DDF_NOTFOUND;
1551
1552 return vcnum;
1553}
1554
061f2c6a 1555static void brief_examine_super_ddf(struct supertype *st, int verbose)
4737ae25
N
1556{
1557 /* We just write a generic DDF ARRAY entry
1558 */
1559 struct mdinfo info;
1560 char nbuf[64];
a5d85af7 1561 getinfo_super_ddf(st, &info, NULL);
4737ae25
N
1562 fname_from_uuid(st, &info, nbuf, ':');
1563
1564 printf("ARRAY metadata=ddf UUID=%s\n", nbuf + 5);
1565}
1566
1567static void brief_examine_subarrays_ddf(struct supertype *st, int verbose)
a322f70c
DW
1568{
1569 /* We just write a generic DDF ARRAY entry
a322f70c 1570 */
42dc2744 1571 struct ddf_super *ddf = st->sb;
ff54de6e 1572 struct mdinfo info;
f21e18ca 1573 unsigned int i;
ff54de6e 1574 char nbuf[64];
a5d85af7 1575 getinfo_super_ddf(st, &info, NULL);
ff54de6e 1576 fname_from_uuid(st, &info, nbuf, ':');
42dc2744 1577
a8173e43 1578 for (i = 0; i < be16_to_cpu(ddf->virt->max_vdes); i++) {
42dc2744
N
1579 struct virtual_entry *ve = &ddf->virt->entries[i];
1580 struct vcl vcl;
1581 char nbuf1[64];
a8b25633 1582 char namebuf[17];
42dc2744
N
1583 if (all_ff(ve->guid))
1584 continue;
1585 memcpy(vcl.conf.guid, ve->guid, DDF_GUID_LEN);
1586 ddf->currentconf =&vcl;
7087f02b 1587 vcl.vcnum = i;
42dc2744
N
1588 uuid_from_super_ddf(st, info.uuid);
1589 fname_from_uuid(st, &info, nbuf1, ':');
a8b25633 1590 _ddf_array_name(namebuf, ddf, i);
1591 printf("ARRAY%s%s container=%s member=%d UUID=%s\n",
1592 namebuf[0] == '\0' ? "" : " /dev/md/", namebuf,
42dc2744
N
1593 nbuf+5, i, nbuf1+5);
1594 }
a322f70c
DW
1595}
1596
bceedeec
N
1597static void export_examine_super_ddf(struct supertype *st)
1598{
1599 struct mdinfo info;
1600 char nbuf[64];
a5d85af7 1601 getinfo_super_ddf(st, &info, NULL);
bceedeec
N
1602 fname_from_uuid(st, &info, nbuf, ':');
1603 printf("MD_METADATA=ddf\n");
1604 printf("MD_LEVEL=container\n");
1605 printf("MD_UUID=%s\n", nbuf+5);
cc9bfd9e 1606 printf("MD_DEVICES=%u\n",
1607 be16_to_cpu(((struct ddf_super *)st->sb)->phys->used_pdes));
bceedeec 1608}
bceedeec 1609
74db60b0
N
1610static int copy_metadata_ddf(struct supertype *st, int from, int to)
1611{
1612 void *buf;
1613 unsigned long long dsize, offset;
1614 int bytes;
1615 struct ddf_header *ddf;
1616 int written = 0;
1617
1618 /* The meta consists of an anchor, a primary, and a secondary.
1619 * This all lives at the end of the device.
1620 * So it is easiest to find the earliest of primary and
1621 * secondary, and copy everything from there.
1622 *
1623 * Anchor is 512 from end It contains primary_lba and secondary_lba
1624 * we choose one of those
1625 */
1626
1627 if (posix_memalign(&buf, 4096, 4096) != 0)
1628 return 1;
1629
1630 if (!get_dev_size(from, NULL, &dsize))
1631 goto err;
1632
1633 if (lseek64(from, dsize-512, 0) < 0)
1634 goto err;
1635 if (read(from, buf, 512) != 512)
1636 goto err;
1637 ddf = buf;
60931cf9 1638 if (!be32_eq(ddf->magic, DDF_HEADER_MAGIC) ||
1639 !be32_eq(calc_crc(ddf, 512), ddf->crc) ||
74db60b0
N
1640 (memcmp(ddf->revision, DDF_REVISION_0, 8) != 0 &&
1641 memcmp(ddf->revision, DDF_REVISION_2, 8) != 0))
1642 goto err;
1643
1644 offset = dsize - 512;
9d0c6b70 1645 if ((be64_to_cpu(ddf->primary_lba) << 9) < offset)
1646 offset = be64_to_cpu(ddf->primary_lba) << 9;
1647 if ((be64_to_cpu(ddf->secondary_lba) << 9) < offset)
1648 offset = be64_to_cpu(ddf->secondary_lba) << 9;
74db60b0
N
1649
1650 bytes = dsize - offset;
1651
1652 if (lseek64(from, offset, 0) < 0 ||
1653 lseek64(to, offset, 0) < 0)
1654 goto err;
1655 while (written < bytes) {
1656 int n = bytes - written;
1657 if (n > 4096)
1658 n = 4096;
1659 if (read(from, buf, n) != n)
1660 goto err;
1661 if (write(to, buf, n) != n)
1662 goto err;
1663 written += n;
1664 }
1665 free(buf);
1666 return 0;
1667err:
1668 free(buf);
1669 return 1;
1670}
1671
a322f70c
DW
1672static void detail_super_ddf(struct supertype *st, char *homehost)
1673{
1674 /* FIXME later
1675 * Could print DDF GUID
1676 * Need to find which array
1677 * If whole, briefly list all arrays
1678 * If one, give name
1679 */
1680}
1681
7087f02b 1682static const char *vendors_with_variable_volume_UUID[] = {
1683 "LSI ",
1684};
1685
1686static int volume_id_is_reliable(const struct ddf_super *ddf)
1687{
1c0aebc2 1688 int n = ARRAY_SIZE(vendors_with_variable_volume_UUID);
7087f02b 1689 int i;
1690 for (i = 0; i < n; i++)
1691 if (!memcmp(ddf->controller.guid,
1692 vendors_with_variable_volume_UUID[i], 8))
1693 return 0;
1694 return 1;
1695}
1696
1697static void uuid_of_ddf_subarray(const struct ddf_super *ddf,
1698 unsigned int vcnum, int uuid[4])
1699{
1700 char buf[DDF_GUID_LEN+18], sha[20], *p;
1701 struct sha1_ctx ctx;
1702 if (volume_id_is_reliable(ddf)) {
1703 uuid_from_ddf_guid(ddf->virt->entries[vcnum].guid, uuid);
1704 return;
1705 }
1706 /*
1707 * Some fake RAID BIOSes (in particular, LSI ones) change the
1708 * VD GUID at every boot. These GUIDs are not suitable for
1709 * identifying an array. Luckily the header GUID appears to
1710 * remain constant.
1711 * We construct a pseudo-UUID from the header GUID and those
1712 * properties of the subarray that we expect to remain constant.
1713 */
1714 memset(buf, 0, sizeof(buf));
1715 p = buf;
1716 memcpy(p, ddf->anchor.guid, DDF_GUID_LEN);
1717 p += DDF_GUID_LEN;
1718 memcpy(p, ddf->virt->entries[vcnum].name, 16);
1719 p += 16;
1720 *((__u16 *) p) = vcnum;
1721 sha1_init_ctx(&ctx);
1722 sha1_process_bytes(buf, sizeof(buf), &ctx);
1723 sha1_finish_ctx(&ctx, sha);
1724 memcpy(uuid, sha, 4*4);
1725}
1726
a322f70c
DW
1727static void brief_detail_super_ddf(struct supertype *st)
1728{
ff54de6e
N
1729 struct mdinfo info;
1730 char nbuf[64];
bedbf68a 1731 struct ddf_super *ddf = st->sb;
1732 unsigned int vcnum = get_vd_num_of_subarray(st);
1733 if (vcnum == DDF_CONTAINER)
1734 uuid_from_super_ddf(st, info.uuid);
1735 else if (vcnum == DDF_NOTFOUND)
1736 return;
1737 else
7087f02b 1738 uuid_of_ddf_subarray(ddf, vcnum, info.uuid);
ff54de6e
N
1739 fname_from_uuid(st, &info, nbuf,':');
1740 printf(" UUID=%s", nbuf + 5);
a322f70c 1741}
a322f70c
DW
1742#endif
1743
1744static int match_home_ddf(struct supertype *st, char *homehost)
1745{
1746 /* It matches 'this' host if the controller is a
1747 * Linux-MD controller with vendor_data matching
1748 * the hostname
1749 */
1750 struct ddf_super *ddf = st->sb;
f21e18ca 1751 unsigned int len;
d1d3482b
N
1752
1753 if (!homehost)
1754 return 0;
1755 len = strlen(homehost);
a322f70c
DW
1756
1757 return (memcmp(ddf->controller.guid, T10, 8) == 0 &&
1758 len < sizeof(ddf->controller.vendor_data) &&
1759 memcmp(ddf->controller.vendor_data, homehost,len) == 0 &&
1760 ddf->controller.vendor_data[len] == 0);
1761}
1762
0e600426 1763#ifndef MDASSEMBLE
baba3f4e 1764static int find_index_in_bvd(const struct ddf_super *ddf,
1765 const struct vd_config *conf, unsigned int n,
1766 unsigned int *n_bvd)
1767{
1768 /*
1769 * Find the index of the n-th valid physical disk in this BVD
1770 */
1771 unsigned int i, j;
1772 for (i = 0, j = 0; i < ddf->mppe &&
a8173e43 1773 j < be16_to_cpu(conf->prim_elmnt_count); i++) {
60931cf9 1774 if (be32_to_cpu(conf->phys_refnum[i]) != 0xffffffff) {
baba3f4e 1775 if (n == j) {
1776 *n_bvd = i;
1777 return 1;
1778 }
1779 j++;
1780 }
1781 }
1782 dprintf("%s: couldn't find BVD member %u (total %u)\n",
a8173e43 1783 __func__, n, be16_to_cpu(conf->prim_elmnt_count));
baba3f4e 1784 return 0;
1785}
1786
1787static struct vd_config *find_vdcr(struct ddf_super *ddf, unsigned int inst,
1788 unsigned int n,
1789 unsigned int *n_bvd, struct vcl **vcl)
a322f70c 1790{
7a7cc504 1791 struct vcl *v;
59e36268 1792
baba3f4e 1793 for (v = ddf->conflist; v; v = v->next) {
84e32e19 1794 unsigned int nsec, ibvd = 0;
baba3f4e 1795 struct vd_config *conf;
1796 if (inst != v->vcnum)
1797 continue;
1798 conf = &v->conf;
1799 if (conf->sec_elmnt_count == 1) {
1800 if (find_index_in_bvd(ddf, conf, n, n_bvd)) {
1801 *vcl = v;
1802 return conf;
1803 } else
1804 goto bad;
1805 }
1806 if (v->other_bvds == NULL) {
1807 pr_err("%s: BUG: other_bvds is NULL, nsec=%u\n",
1808 __func__, conf->sec_elmnt_count);
1809 goto bad;
1810 }
a8173e43 1811 nsec = n / be16_to_cpu(conf->prim_elmnt_count);
baba3f4e 1812 if (conf->sec_elmnt_seq != nsec) {
1813 for (ibvd = 1; ibvd < conf->sec_elmnt_count; ibvd++) {
baba3f4e 1814 if (v->other_bvds[ibvd-1]->sec_elmnt_seq
1815 == nsec)
1816 break;
1817 }
1818 if (ibvd == conf->sec_elmnt_count)
1819 goto bad;
1820 conf = v->other_bvds[ibvd-1];
1821 }
1822 if (!find_index_in_bvd(ddf, conf,
1823 n - nsec*conf->sec_elmnt_count, n_bvd))
1824 goto bad;
1825 dprintf("%s: found disk %u as member %u in bvd %d of array %u\n"
84e32e19 1826 , __func__, n, *n_bvd, ibvd, inst);
baba3f4e 1827 *vcl = v;
1828 return conf;
1829 }
1830bad:
1831 pr_err("%s: Could't find disk %d in array %u\n", __func__, n, inst);
7a7cc504
NB
1832 return NULL;
1833}
0e600426 1834#endif
7a7cc504 1835
60931cf9 1836static int find_phys(const struct ddf_super *ddf, be32 phys_refnum)
7a7cc504
NB
1837{
1838 /* Find the entry in phys_disk which has the given refnum
1839 * and return it's index
1840 */
f21e18ca 1841 unsigned int i;
a8173e43 1842 for (i = 0; i < be16_to_cpu(ddf->phys->max_pdes); i++)
60931cf9 1843 if (be32_eq(ddf->phys->entries[i].refnum, phys_refnum))
7a7cc504
NB
1844 return i;
1845 return -1;
a322f70c
DW
1846}
1847
bedbf68a 1848static void uuid_from_ddf_guid(const char *guid, int uuid[4])
1849{
1850 char buf[20];
1851 struct sha1_ctx ctx;
1852 sha1_init_ctx(&ctx);
1853 sha1_process_bytes(guid, DDF_GUID_LEN, &ctx);
1854 sha1_finish_ctx(&ctx, buf);
1855 memcpy(uuid, buf, 4*4);
1856}
1857
a322f70c
DW
1858static void uuid_from_super_ddf(struct supertype *st, int uuid[4])
1859{
1860 /* The uuid returned here is used for:
1861 * uuid to put into bitmap file (Create, Grow)
1862 * uuid for backup header when saving critical section (Grow)
1863 * comparing uuids when re-adding a device into an array
51006d85
N
1864 * In these cases the uuid required is that of the data-array,
1865 * not the device-set.
1866 * uuid to recognise same set when adding a missing device back
1867 * to an array. This is a uuid for the device-set.
613b0d17 1868 *
a322f70c
DW
1869 * For each of these we can make do with a truncated
1870 * or hashed uuid rather than the original, as long as
1871 * everyone agrees.
a322f70c
DW
1872 * In the case of SVD we assume the BVD is of interest,
1873 * though that might be the case if a bitmap were made for
1874 * a mirrored SVD - worry about that later.
1875 * So we need to find the VD configuration record for the
1876 * relevant BVD and extract the GUID and Secondary_Element_Seq.
1877 * The first 16 bytes of the sha1 of these is used.
1878 */
1879 struct ddf_super *ddf = st->sb;
d2ca6449 1880 struct vcl *vcl = ddf->currentconf;
a322f70c 1881
c5afc314 1882 if (vcl)
7087f02b 1883 uuid_of_ddf_subarray(ddf, vcl->vcnum, uuid);
c5afc314 1884 else
7087f02b 1885 uuid_from_ddf_guid(ddf->anchor.guid, uuid);
a322f70c
DW
1886}
1887
a5d85af7 1888static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map);
78e44928 1889
a5d85af7 1890static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *map)
a322f70c
DW
1891{
1892 struct ddf_super *ddf = st->sb;
a5d85af7 1893 int map_disks = info->array.raid_disks;
90fa1a29 1894 __u32 *cptr;
a322f70c 1895
78e44928 1896 if (ddf->currentconf) {
a5d85af7 1897 getinfo_super_ddf_bvd(st, info, map);
78e44928
NB
1898 return;
1899 }
95eeceeb 1900 memset(info, 0, sizeof(*info));
78e44928 1901
a8173e43 1902 info->array.raid_disks = be16_to_cpu(ddf->phys->used_pdes);
a322f70c
DW
1903 info->array.level = LEVEL_CONTAINER;
1904 info->array.layout = 0;
1905 info->array.md_minor = -1;
90fa1a29
JS
1906 cptr = (__u32 *)(ddf->anchor.guid + 16);
1907 info->array.ctime = DECADE + __be32_to_cpu(*cptr);
1908
a322f70c
DW
1909 info->array.utime = 0;
1910 info->array.chunk_size = 0;
510242aa 1911 info->container_enough = 1;
a322f70c 1912
a322f70c
DW
1913 info->disk.major = 0;
1914 info->disk.minor = 0;
cba0191b 1915 if (ddf->dlist) {
f0e876ce 1916 struct phys_disk_entry *pde = NULL;
60931cf9 1917 info->disk.number = be32_to_cpu(ddf->dlist->disk.refnum);
59e36268 1918 info->disk.raid_disk = find_phys(ddf, ddf->dlist->disk.refnum);
d2ca6449 1919
9d0c6b70 1920 info->data_offset = be64_to_cpu(ddf->phys->
613b0d17
N
1921 entries[info->disk.raid_disk].
1922 config_size);
d2ca6449 1923 info->component_size = ddf->dlist->size - info->data_offset;
f0e876ce
N
1924 if (info->disk.raid_disk >= 0)
1925 pde = ddf->phys->entries + info->disk.raid_disk;
1926 if (pde &&
1927 !(be16_to_cpu(pde->state) & DDF_Failed))
1928 info->disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
1929 else
1930 info->disk.state = 1 << MD_DISK_FAULTY;
eba2859f
N
1931
1932 info->events = be32_to_cpu(ddf->active->seq);
cba0191b
NB
1933 } else {
1934 info->disk.number = -1;
661dce36 1935 info->disk.raid_disk = -1;
cba0191b 1936// info->disk.raid_disk = find refnum in the table and use index;
f0e876ce 1937 info->disk.state = (1 << MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE);
cba0191b 1938 }
a19c88b8 1939
921d9e16 1940 info->recovery_start = MaxSector;
a19c88b8 1941 info->reshape_active = 0;
6e75048b 1942 info->recovery_blocked = 0;
c5afc314 1943 info->name[0] = 0;
a322f70c 1944
f35f2525
N
1945 info->array.major_version = -1;
1946 info->array.minor_version = -2;
159c3a1a 1947 strcpy(info->text_version, "ddf");
a67dd8cc 1948 info->safe_mode_delay = 0;
159c3a1a 1949
c5afc314 1950 uuid_from_super_ddf(st, info->uuid);
a322f70c 1951
a5d85af7
N
1952 if (map) {
1953 int i;
1954 for (i = 0 ; i < map_disks; i++) {
1955 if (i < info->array.raid_disks &&
a8173e43 1956 !(be16_to_cpu(ddf->phys->entries[i].state)
1957 & DDF_Failed))
a5d85af7
N
1958 map[i] = 1;
1959 else
1960 map[i] = 0;
1961 }
1962 }
a322f70c
DW
1963}
1964
8bf989d8 1965/* size of name must be at least 17 bytes! */
1966static void _ddf_array_name(char *name, const struct ddf_super *ddf, int i)
1967{
1968 int j;
1969 memcpy(name, ddf->virt->entries[i].name, 16);
1970 name[16] = 0;
1971 for(j = 0; j < 16; j++)
1972 if (name[j] == ' ')
1973 name[j] = 0;
1974}
1975
a5d85af7 1976static void getinfo_super_ddf_bvd(struct supertype *st, struct mdinfo *info, char *map)
a322f70c
DW
1977{
1978 struct ddf_super *ddf = st->sb;
d2ca6449
NB
1979 struct vcl *vc = ddf->currentconf;
1980 int cd = ddf->currentdev;
ddf94a43 1981 int n_prim;
db42fa9b 1982 int j;
8592f29d 1983 struct dl *dl;
a5d85af7 1984 int map_disks = info->array.raid_disks;
90fa1a29 1985 __u32 *cptr;
ddf94a43 1986 struct vd_config *conf;
a322f70c 1987
95eeceeb 1988 memset(info, 0, sizeof(*info));
8a2848a7 1989 if (layout_ddf2md(&vc->conf, &info->array) == -1)
1990 return;
a322f70c 1991 info->array.md_minor = -1;
90fa1a29
JS
1992 cptr = (__u32 *)(vc->conf.guid + 16);
1993 info->array.ctime = DECADE + __be32_to_cpu(*cptr);
60931cf9 1994 info->array.utime = DECADE + be32_to_cpu(vc->conf.timestamp);
d2ca6449 1995 info->array.chunk_size = 512 << vc->conf.chunk_shift;
da9b4a62 1996 info->custom_array_size = 0;
d2ca6449 1997
ddf94a43 1998 conf = &vc->conf;
a8173e43 1999 n_prim = be16_to_cpu(conf->prim_elmnt_count);
ddf94a43 2000 if (conf->sec_elmnt_count > 1 && cd >= n_prim) {
2001 int ibvd = cd / n_prim - 1;
2002 cd %= n_prim;
2003 conf = vc->other_bvds[ibvd];
2004 }
2005
f21e18ca 2006 if (cd >= 0 && (unsigned)cd < ddf->mppe) {
57a66662 2007 info->data_offset =
9d0c6b70 2008 be64_to_cpu(LBA_OFFSET(ddf, conf)[cd]);
d2ca6449
NB
2009 if (vc->block_sizes)
2010 info->component_size = vc->block_sizes[cd];
2011 else
9d0c6b70 2012 info->component_size = be64_to_cpu(conf->blocks);
d2ca6449 2013 }
a322f70c 2014
fb204fb2 2015 for (dl = ddf->dlist; dl ; dl = dl->next)
60931cf9 2016 if (be32_eq(dl->disk.refnum, conf->phys_refnum[cd]))
fb204fb2
N
2017 break;
2018
a322f70c
DW
2019 info->disk.major = 0;
2020 info->disk.minor = 0;
fb204fb2 2021 info->disk.state = 0;
8592f29d
N
2022 if (dl) {
2023 info->disk.major = dl->major;
2024 info->disk.minor = dl->minor;
7c3fb3ec 2025 info->disk.raid_disk = cd + conf->sec_elmnt_seq
a8173e43 2026 * be16_to_cpu(conf->prim_elmnt_count);
fb204fb2 2027 info->disk.number = dl->pdnum;
f0e876ce
N
2028 info->disk.state = 0;
2029 if (info->disk.number >= 0 &&
2030 (be16_to_cpu(ddf->phys->entries[info->disk.number].state) & DDF_Online) &&
2031 !(be16_to_cpu(ddf->phys->entries[info->disk.number].state) & DDF_Failed))
2032 info->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
eba2859f 2033 info->events = be32_to_cpu(ddf->active->seq);
8592f29d 2034 }
a322f70c 2035
103f2410
NB
2036 info->container_member = ddf->currentconf->vcnum;
2037
921d9e16 2038 info->recovery_start = MaxSector;
80d26cb2 2039 info->resync_start = 0;
624c5ad4 2040 info->reshape_active = 0;
6e75048b 2041 info->recovery_blocked = 0;
80d26cb2
NB
2042 if (!(ddf->virt->entries[info->container_member].state
2043 & DDF_state_inconsistent) &&
2044 (ddf->virt->entries[info->container_member].init_state
2045 & DDF_initstate_mask)
2046 == DDF_init_full)
b7528a20 2047 info->resync_start = MaxSector;
80d26cb2 2048
a322f70c
DW
2049 uuid_from_super_ddf(st, info->uuid);
2050
f35f2525
N
2051 info->array.major_version = -1;
2052 info->array.minor_version = -2;
9b63e648 2053 sprintf(info->text_version, "/%s/%d",
4dd2df09 2054 st->container_devnm,
9b63e648 2055 info->container_member);
5684fff6 2056 info->safe_mode_delay = DDF_SAFE_MODE_DELAY;
159c3a1a 2057
8bf989d8 2058 _ddf_array_name(info->name, ddf, info->container_member);
a5d85af7
N
2059
2060 if (map)
2061 for (j = 0; j < map_disks; j++) {
2062 map[j] = 0;
2063 if (j < info->array.raid_disks) {
2064 int i = find_phys(ddf, vc->conf.phys_refnum[j]);
613b0d17 2065 if (i >= 0 &&
a8173e43 2066 (be16_to_cpu(ddf->phys->entries[i].state)
2067 & DDF_Online) &&
2068 !(be16_to_cpu(ddf->phys->entries[i].state)
2069 & DDF_Failed))
a5d85af7
N
2070 map[i] = 1;
2071 }
2072 }
a322f70c
DW
2073}
2074
2075static int update_super_ddf(struct supertype *st, struct mdinfo *info,
2076 char *update,
2077 char *devname, int verbose,
2078 int uuid_set, char *homehost)
2079{
2080 /* For 'assemble' and 'force' we need to return non-zero if any
2081 * change was made. For others, the return value is ignored.
2082 * Update options are:
2083 * force-one : This device looks a bit old but needs to be included,
2084 * update age info appropriately.
2085 * assemble: clear any 'faulty' flag to allow this device to
2086 * be assembled.
2087 * force-array: Array is degraded but being forced, mark it clean
2088 * if that will be needed to assemble it.
2089 *
2090 * newdev: not used ????
2091 * grow: Array has gained a new device - this is currently for
2092 * linear only
2093 * resync: mark as dirty so a resync will happen.
59e36268 2094 * uuid: Change the uuid of the array to match what is given
a322f70c
DW
2095 * homehost: update the recorded homehost
2096 * name: update the name - preserving the homehost
2097 * _reshape_progress: record new reshape_progress position.
2098 *
2099 * Following are not relevant for this version:
2100 * sparc2.2 : update from old dodgey metadata
2101 * super-minor: change the preferred_minor number
2102 * summaries: update redundant counters.
2103 */
2104 int rv = 0;
2105// struct ddf_super *ddf = st->sb;
7a7cc504 2106// struct vd_config *vd = find_vdcr(ddf, info->container_member);
a322f70c
DW
2107// struct virtual_entry *ve = find_ve(ddf);
2108
a322f70c
DW
2109 /* we don't need to handle "force-*" or "assemble" as
2110 * there is no need to 'trick' the kernel. We the metadata is
2111 * first updated to activate the array, all the implied modifications
2112 * will just happen.
2113 */
2114
2115 if (strcmp(update, "grow") == 0) {
2116 /* FIXME */
1e2b2765 2117 } else if (strcmp(update, "resync") == 0) {
a322f70c 2118// info->resync_checkpoint = 0;
1e2b2765 2119 } else if (strcmp(update, "homehost") == 0) {
a322f70c
DW
2120 /* homehost is stored in controller->vendor_data,
2121 * or it is when we are the vendor
2122 */
2123// if (info->vendor_is_local)
2124// strcpy(ddf->controller.vendor_data, homehost);
1e2b2765 2125 rv = -1;
f49208ec 2126 } else if (strcmp(update, "name") == 0) {
a322f70c
DW
2127 /* name is stored in virtual_entry->name */
2128// memset(ve->name, ' ', 16);
2129// strncpy(ve->name, info->name, 16);
1e2b2765 2130 rv = -1;
f49208ec 2131 } else if (strcmp(update, "_reshape_progress") == 0) {
a322f70c 2132 /* We don't support reshape yet */
f49208ec
N
2133 } else if (strcmp(update, "assemble") == 0 ) {
2134 /* Do nothing, just succeed */
2135 rv = 0;
1e2b2765
N
2136 } else
2137 rv = -1;
a322f70c
DW
2138
2139// update_all_csum(ddf);
2140
2141 return rv;
2142}
2143
5f8097be
NB
2144static void make_header_guid(char *guid)
2145{
60931cf9 2146 be32 stamp;
5f8097be
NB
2147 /* Create a DDF Header of Virtual Disk GUID */
2148
2149 /* 24 bytes of fiction required.
2150 * first 8 are a 'vendor-id' - "Linux-MD"
2151 * next 8 are controller type.. how about 0X DEAD BEEF 0000 0000
2152 * Remaining 8 random number plus timestamp
2153 */
2154 memcpy(guid, T10, sizeof(T10));
60931cf9 2155 stamp = cpu_to_be32(0xdeadbeef);
5f8097be 2156 memcpy(guid+8, &stamp, 4);
60931cf9 2157 stamp = cpu_to_be32(0);
5f8097be 2158 memcpy(guid+12, &stamp, 4);
60931cf9 2159 stamp = cpu_to_be32(time(0) - DECADE);
5f8097be 2160 memcpy(guid+16, &stamp, 4);
60931cf9 2161 stamp._v32 = random32();
5f8097be 2162 memcpy(guid+20, &stamp, 4);
5f8097be 2163}
59e36268 2164
fb9d0acb 2165static unsigned int find_unused_vde(const struct ddf_super *ddf)
2166{
2167 unsigned int i;
a8173e43 2168 for (i = 0; i < be16_to_cpu(ddf->virt->max_vdes); i++) {
fb9d0acb 2169 if (all_ff(ddf->virt->entries[i].guid))
2170 return i;
2171 }
2172 return DDF_NOTFOUND;
2173}
2174
2175static unsigned int find_vde_by_name(const struct ddf_super *ddf,
2176 const char *name)
2177{
2178 unsigned int i;
2179 if (name == NULL)
2180 return DDF_NOTFOUND;
a8173e43 2181 for (i = 0; i < be16_to_cpu(ddf->virt->max_vdes); i++) {
fb9d0acb 2182 if (all_ff(ddf->virt->entries[i].guid))
2183 continue;
2184 if (!strncmp(name, ddf->virt->entries[i].name,
2185 sizeof(ddf->virt->entries[i].name)))
2186 return i;
2187 }
2188 return DDF_NOTFOUND;
2189}
2190
4441541f 2191#ifndef MDASSEMBLE
fb9d0acb 2192static unsigned int find_vde_by_guid(const struct ddf_super *ddf,
2193 const char *guid)
2194{
2195 unsigned int i;
2196 if (guid == NULL || all_ff(guid))
2197 return DDF_NOTFOUND;
a8173e43 2198 for (i = 0; i < be16_to_cpu(ddf->virt->max_vdes); i++)
fb9d0acb 2199 if (!memcmp(ddf->virt->entries[i].guid, guid, DDF_GUID_LEN))
2200 return i;
2201 return DDF_NOTFOUND;
2202}
4441541f 2203#endif
fb9d0acb 2204
78e44928
NB
2205static int init_super_ddf_bvd(struct supertype *st,
2206 mdu_array_info_t *info,
2207 unsigned long long size,
2208 char *name, char *homehost,
83cd1e97 2209 int *uuid, unsigned long long data_offset);
78e44928 2210
a322f70c
DW
2211static int init_super_ddf(struct supertype *st,
2212 mdu_array_info_t *info,
2213 unsigned long long size, char *name, char *homehost,
83cd1e97 2214 int *uuid, unsigned long long data_offset)
a322f70c
DW
2215{
2216 /* This is primarily called by Create when creating a new array.
2217 * We will then get add_to_super called for each component, and then
2218 * write_init_super called to write it out to each device.
2219 * For DDF, Create can create on fresh devices or on a pre-existing
2220 * array.
2221 * To create on a pre-existing array a different method will be called.
2222 * This one is just for fresh drives.
2223 *
2224 * We need to create the entire 'ddf' structure which includes:
2225 * DDF headers - these are easy.
2226 * Controller data - a Sector describing this controller .. not that
2227 * this is a controller exactly.
2228 * Physical Disk Record - one entry per device, so
2229 * leave plenty of space.
2230 * Virtual Disk Records - again, just leave plenty of space.
2231 * This just lists VDs, doesn't give details
2232 * Config records - describes the VDs that use this disk
2233 * DiskData - describes 'this' device.
2234 * BadBlockManagement - empty
2235 * Diag Space - empty
2236 * Vendor Logs - Could we put bitmaps here?
2237 *
2238 */
2239 struct ddf_super *ddf;
2240 char hostname[17];
2241 int hostlen;
a322f70c
DW
2242 int max_phys_disks, max_virt_disks;
2243 unsigned long long sector;
2244 int clen;
2245 int i;
2246 int pdsize, vdsize;
2247 struct phys_disk *pd;
2248 struct virtual_disk *vd;
2249
83cd1e97 2250 if (data_offset != INVALID_SECTORS) {
ed503f89 2251 pr_err("data-offset not supported by DDF\n");
83cd1e97
N
2252 return 0;
2253 }
2254
78e44928 2255 if (st->sb)
83cd1e97
N
2256 return init_super_ddf_bvd(st, info, size, name, homehost, uuid,
2257 data_offset);
ba7eb04f 2258
3d2c4fc7 2259 if (posix_memalign((void**)&ddf, 512, sizeof(*ddf)) != 0) {
e7b84f9d 2260 pr_err("%s could not allocate superblock\n", __func__);
3d2c4fc7
DW
2261 return 0;
2262 }
6264b437 2263 memset(ddf, 0, sizeof(*ddf));
a322f70c
DW
2264 ddf->dlist = NULL; /* no physical disks yet */
2265 ddf->conflist = NULL; /* No virtual disks yet */
955e9ea1
DW
2266 st->sb = ddf;
2267
2268 if (info == NULL) {
2269 /* zeroing superblock */
2270 return 0;
2271 }
a322f70c
DW
2272
2273 /* At least 32MB *must* be reserved for the ddf. So let's just
2274 * start 32MB from the end, and put the primary header there.
2275 * Don't do secondary for now.
2276 * We don't know exactly where that will be yet as it could be
2277 * different on each device. To just set up the lengths.
2278 *
2279 */
2280
2281 ddf->anchor.magic = DDF_HEADER_MAGIC;
5f8097be 2282 make_header_guid(ddf->anchor.guid);
a322f70c 2283
59e36268 2284 memcpy(ddf->anchor.revision, DDF_REVISION_2, 8);
60931cf9 2285 ddf->anchor.seq = cpu_to_be32(1);
2286 ddf->anchor.timestamp = cpu_to_be32(time(0) - DECADE);
a322f70c
DW
2287 ddf->anchor.openflag = 0xFF;
2288 ddf->anchor.foreignflag = 0;
2289 ddf->anchor.enforcegroups = 0; /* Is this best?? */
2290 ddf->anchor.pad0 = 0xff;
2291 memset(ddf->anchor.pad1, 0xff, 12);
2292 memset(ddf->anchor.header_ext, 0xff, 32);
9d0c6b70 2293 ddf->anchor.primary_lba = cpu_to_be64(~(__u64)0);
2294 ddf->anchor.secondary_lba = cpu_to_be64(~(__u64)0);
a322f70c
DW
2295 ddf->anchor.type = DDF_HEADER_ANCHOR;
2296 memset(ddf->anchor.pad2, 0xff, 3);
60931cf9 2297 ddf->anchor.workspace_len = cpu_to_be32(32768); /* Must be reserved */
9d0c6b70 2298 /* Put this at bottom of 32M reserved.. */
2299 ddf->anchor.workspace_lba = cpu_to_be64(~(__u64)0);
a322f70c 2300 max_phys_disks = 1023; /* Should be enough */
a8173e43 2301 ddf->anchor.max_pd_entries = cpu_to_be16(max_phys_disks);
a322f70c 2302 max_virt_disks = 255;
a8173e43 2303 ddf->anchor.max_vd_entries = cpu_to_be16(max_virt_disks); /* ?? */
2304 ddf->anchor.max_partitions = cpu_to_be16(64); /* ?? */
a322f70c 2305 ddf->max_part = 64;
8c3b8c2c 2306 ddf->mppe = 256;
59e36268 2307 ddf->conf_rec_len = 1 + ROUND_UP(ddf->mppe * (4+8), 512)/512;
a8173e43 2308 ddf->anchor.config_record_len = cpu_to_be16(ddf->conf_rec_len);
2309 ddf->anchor.max_primary_element_entries = cpu_to_be16(ddf->mppe);
a322f70c 2310 memset(ddf->anchor.pad3, 0xff, 54);
a322f70c
DW
2311 /* controller sections is one sector long immediately
2312 * after the ddf header */
2313 sector = 1;
60931cf9 2314 ddf->anchor.controller_section_offset = cpu_to_be32(sector);
2315 ddf->anchor.controller_section_length = cpu_to_be32(1);
a322f70c
DW
2316 sector += 1;
2317
2318 /* phys is 8 sectors after that */
2319 pdsize = ROUND_UP(sizeof(struct phys_disk) +
2320 sizeof(struct phys_disk_entry)*max_phys_disks,
2321 512);
2322 switch(pdsize/512) {
2323 case 2: case 8: case 32: case 128: case 512: break;
2324 default: abort();
2325 }
60931cf9 2326 ddf->anchor.phys_section_offset = cpu_to_be32(sector);
a322f70c 2327 ddf->anchor.phys_section_length =
60931cf9 2328 cpu_to_be32(pdsize/512); /* max_primary_element_entries/8 */
a322f70c
DW
2329 sector += pdsize/512;
2330
2331 /* virt is another 32 sectors */
2332 vdsize = ROUND_UP(sizeof(struct virtual_disk) +
2333 sizeof(struct virtual_entry) * max_virt_disks,
2334 512);
2335 switch(vdsize/512) {
2336 case 2: case 8: case 32: case 128: case 512: break;
2337 default: abort();
2338 }
60931cf9 2339 ddf->anchor.virt_section_offset = cpu_to_be32(sector);
a322f70c 2340 ddf->anchor.virt_section_length =
60931cf9 2341 cpu_to_be32(vdsize/512); /* max_vd_entries/8 */
a322f70c
DW
2342 sector += vdsize/512;
2343
59e36268 2344 clen = ddf->conf_rec_len * (ddf->max_part+1);
60931cf9 2345 ddf->anchor.config_section_offset = cpu_to_be32(sector);
2346 ddf->anchor.config_section_length = cpu_to_be32(clen);
a322f70c
DW
2347 sector += clen;
2348
60931cf9 2349 ddf->anchor.data_section_offset = cpu_to_be32(sector);
2350 ddf->anchor.data_section_length = cpu_to_be32(1);
a322f70c
DW
2351 sector += 1;
2352
60931cf9 2353 ddf->anchor.bbm_section_length = cpu_to_be32(0);
2354 ddf->anchor.bbm_section_offset = cpu_to_be32(0xFFFFFFFF);
2355 ddf->anchor.diag_space_length = cpu_to_be32(0);
2356 ddf->anchor.diag_space_offset = cpu_to_be32(0xFFFFFFFF);
2357 ddf->anchor.vendor_length = cpu_to_be32(0);
2358 ddf->anchor.vendor_offset = cpu_to_be32(0xFFFFFFFF);
a322f70c
DW
2359
2360 memset(ddf->anchor.pad4, 0xff, 256);
2361
2362 memcpy(&ddf->primary, &ddf->anchor, 512);
2363 memcpy(&ddf->secondary, &ddf->anchor, 512);
2364
2365 ddf->primary.openflag = 1; /* I guess.. */
2366 ddf->primary.type = DDF_HEADER_PRIMARY;
2367
2368 ddf->secondary.openflag = 1; /* I guess.. */
2369 ddf->secondary.type = DDF_HEADER_SECONDARY;
2370
2371 ddf->active = &ddf->primary;
2372
2373 ddf->controller.magic = DDF_CONTROLLER_MAGIC;
2374
2375 /* 24 more bytes of fiction required.
2376 * first 8 are a 'vendor-id' - "Linux-MD"
2377 * Remaining 16 are serial number.... maybe a hostname would do?
2378 */
2379 memcpy(ddf->controller.guid, T10, sizeof(T10));
1ba6bff9
DW
2380 gethostname(hostname, sizeof(hostname));
2381 hostname[sizeof(hostname) - 1] = 0;
a322f70c
DW
2382 hostlen = strlen(hostname);
2383 memcpy(ddf->controller.guid + 24 - hostlen, hostname, hostlen);
2384 for (i = strlen(T10) ; i+hostlen < 24; i++)
2385 ddf->controller.guid[i] = ' ';
2386
a8173e43 2387 ddf->controller.type.vendor_id = cpu_to_be16(0xDEAD);
2388 ddf->controller.type.device_id = cpu_to_be16(0xBEEF);
2389 ddf->controller.type.sub_vendor_id = cpu_to_be16(0);
2390 ddf->controller.type.sub_device_id = cpu_to_be16(0);
a322f70c
DW
2391 memcpy(ddf->controller.product_id, "What Is My PID??", 16);
2392 memset(ddf->controller.pad, 0xff, 8);
2393 memset(ddf->controller.vendor_data, 0xff, 448);
a9e1c11d
N
2394 if (homehost && strlen(homehost) < 440)
2395 strcpy((char*)ddf->controller.vendor_data, homehost);
a322f70c 2396
3d2c4fc7 2397 if (posix_memalign((void**)&pd, 512, pdsize) != 0) {
e7b84f9d 2398 pr_err("%s could not allocate pd\n", __func__);
3d2c4fc7
DW
2399 return 0;
2400 }
6416d527 2401 ddf->phys = pd;
a322f70c
DW
2402 ddf->pdsize = pdsize;
2403
2404 memset(pd, 0xff, pdsize);
2405 memset(pd, 0, sizeof(*pd));
076515ba 2406 pd->magic = DDF_PHYS_RECORDS_MAGIC;
a8173e43 2407 pd->used_pdes = cpu_to_be16(0);
2408 pd->max_pdes = cpu_to_be16(max_phys_disks);
a322f70c 2409 memset(pd->pad, 0xff, 52);
4a3ca8ac 2410 for (i = 0; i < max_phys_disks; i++)
2411 memset(pd->entries[i].guid, 0xff, DDF_GUID_LEN);
a322f70c 2412
3d2c4fc7 2413 if (posix_memalign((void**)&vd, 512, vdsize) != 0) {
e7b84f9d 2414 pr_err("%s could not allocate vd\n", __func__);
3d2c4fc7
DW
2415 return 0;
2416 }
6416d527 2417 ddf->virt = vd;
a322f70c
DW
2418 ddf->vdsize = vdsize;
2419 memset(vd, 0, vdsize);
2420 vd->magic = DDF_VIRT_RECORDS_MAGIC;
a8173e43 2421 vd->populated_vdes = cpu_to_be16(0);
2422 vd->max_vdes = cpu_to_be16(max_virt_disks);
a322f70c
DW
2423 memset(vd->pad, 0xff, 52);
2424
5f8097be
NB
2425 for (i=0; i<max_virt_disks; i++)
2426 memset(&vd->entries[i], 0xff, sizeof(struct virtual_entry));
2427
a322f70c 2428 st->sb = ddf;
7d5a7ff3 2429 ddf_set_updates_pending(ddf);
a322f70c
DW
2430 return 1;
2431}
2432
5f8097be
NB
2433static int chunk_to_shift(int chunksize)
2434{
2435 return ffs(chunksize/512)-1;
2436}
2437
0e600426 2438#ifndef MDASSEMBLE
59e36268
NB
2439struct extent {
2440 unsigned long long start, size;
2441};
78e44928 2442static int cmp_extent(const void *av, const void *bv)
59e36268
NB
2443{
2444 const struct extent *a = av;
2445 const struct extent *b = bv;
2446 if (a->start < b->start)
2447 return -1;
2448 if (a->start > b->start)
2449 return 1;
2450 return 0;
2451}
2452
78e44928 2453static struct extent *get_extents(struct ddf_super *ddf, struct dl *dl)
59e36268
NB
2454{
2455 /* find a list of used extents on the give physical device
2456 * (dnum) of the given ddf.
2457 * Return a malloced array of 'struct extent'
2458
613b0d17 2459 * FIXME ignore DDF_Legacy devices?
59e36268
NB
2460
2461 */
2462 struct extent *rv;
2463 int n = 0;
fcc22180 2464 unsigned int i;
60056e1c 2465 __u16 state = be16_to_cpu(ddf->phys->entries[dl->pdnum].state);
2466
2467 if ((state & (DDF_Online|DDF_Failed|DDF_Missing)) != DDF_Online)
2468 return NULL;
59e36268 2469
503975b9 2470 rv = xmalloc(sizeof(struct extent) * (ddf->max_part + 2));
59e36268
NB
2471
2472 for (i = 0; i < ddf->max_part; i++) {
fcc22180 2473 const struct vd_config *bvd;
2474 unsigned int ibvd;
59e36268 2475 struct vcl *v = dl->vlist[i];
fcc22180 2476 if (v == NULL ||
2477 get_pd_index_from_refnum(v, dl->disk.refnum, ddf->mppe,
2478 &bvd, &ibvd) == DDF_NOTFOUND)
59e36268 2479 continue;
9d0c6b70 2480 rv[n].start = be64_to_cpu(LBA_OFFSET(ddf, bvd)[ibvd]);
2481 rv[n].size = be64_to_cpu(bvd->blocks);
fcc22180 2482 n++;
59e36268
NB
2483 }
2484 qsort(rv, n, sizeof(*rv), cmp_extent);
2485
9d0c6b70 2486 rv[n].start = be64_to_cpu(ddf->phys->entries[dl->pdnum].config_size);
59e36268
NB
2487 rv[n].size = 0;
2488 return rv;
2489}
0e600426 2490#endif
59e36268 2491
5f8097be
NB
2492static int init_super_ddf_bvd(struct supertype *st,
2493 mdu_array_info_t *info,
2494 unsigned long long size,
2495 char *name, char *homehost,
83cd1e97 2496 int *uuid, unsigned long long data_offset)
5f8097be
NB
2497{
2498 /* We are creating a BVD inside a pre-existing container.
2499 * so st->sb is already set.
2500 * We need to create a new vd_config and a new virtual_entry
2501 */
2502 struct ddf_super *ddf = st->sb;
5aaf6c7b 2503 unsigned int venum, i;
5f8097be
NB
2504 struct virtual_entry *ve;
2505 struct vcl *vcl;
2506 struct vd_config *vc;
5f8097be 2507
fb9d0acb 2508 if (find_vde_by_name(ddf, name) != DDF_NOTFOUND) {
2509 pr_err("This ddf already has an array called %s\n", name);
5f8097be
NB
2510 return 0;
2511 }
fb9d0acb 2512 venum = find_unused_vde(ddf);
2513 if (venum == DDF_NOTFOUND) {
2514 pr_err("Cannot find spare slot for virtual disk\n");
5f8097be
NB
2515 return 0;
2516 }
2517 ve = &ddf->virt->entries[venum];
2518
2519 /* A Virtual Disk GUID contains the T10 Vendor ID, controller type,
2520 * timestamp, random number
2521 */
2522 make_header_guid(ve->guid);
a8173e43 2523 ve->unit = cpu_to_be16(info->md_minor);
5f8097be 2524 ve->pad0 = 0xFFFF;
a8173e43 2525 ve->guid_crc._v16 = crc32(0, (unsigned char *)ddf->anchor.guid,
2526 DDF_GUID_LEN);
2527 ve->type = cpu_to_be16(0);
7a7cc504
NB
2528 ve->state = DDF_state_degraded; /* Will be modified as devices are added */
2529 if (info->state & 1) /* clean */
2530 ve->init_state = DDF_init_full;
2531 else
2532 ve->init_state = DDF_init_not;
2533
5f8097be
NB
2534 memset(ve->pad1, 0xff, 14);
2535 memset(ve->name, ' ', 16);
2536 if (name)
2537 strncpy(ve->name, name, 16);
2538 ddf->virt->populated_vdes =
a8173e43 2539 cpu_to_be16(be16_to_cpu(ddf->virt->populated_vdes)+1);
5f8097be
NB
2540
2541 /* Now create a new vd_config */
3d2c4fc7
DW
2542 if (posix_memalign((void**)&vcl, 512,
2543 (offsetof(struct vcl, conf) + ddf->conf_rec_len * 512)) != 0) {
e7b84f9d 2544 pr_err("%s could not allocate vd_config\n", __func__);
3d2c4fc7
DW
2545 return 0;
2546 }
59e36268
NB
2547 vcl->vcnum = venum;
2548 vcl->block_sizes = NULL; /* FIXME not for CONCAT */
5f8097be
NB
2549 vc = &vcl->conf;
2550
2551 vc->magic = DDF_VD_CONF_MAGIC;
2552 memcpy(vc->guid, ve->guid, DDF_GUID_LEN);
60931cf9 2553 vc->timestamp = cpu_to_be32(time(0)-DECADE);
2554 vc->seqnum = cpu_to_be32(1);
5f8097be 2555 memset(vc->pad0, 0xff, 24);
5f8097be 2556 vc->chunk_shift = chunk_to_shift(info->chunk_size);
a3163bf0 2557 if (layout_md2ddf(info, vc) == -1 ||
a8173e43 2558 be16_to_cpu(vc->prim_elmnt_count) > ddf->mppe) {
a3163bf0 2559 pr_err("%s: unsupported RAID level/layout %d/%d with %d disks\n",
2560 __func__, info->level, info->layout, info->raid_disks);
2561 free(vcl);
2562 return 0;
2563 }
5f8097be 2564 vc->sec_elmnt_seq = 0;
3c48f7be 2565 if (alloc_other_bvds(ddf, vcl) != 0) {
2566 pr_err("%s could not allocate other bvds\n",
2567 __func__);
2568 free(vcl);
2569 return 0;
2570 }
9d0c6b70 2571 vc->blocks = cpu_to_be64(info->size * 2);
2572 vc->array_blocks = cpu_to_be64(
5f8097be
NB
2573 calc_array_size(info->level, info->raid_disks, info->layout,
2574 info->chunk_size, info->size*2));
2575 memset(vc->pad1, 0xff, 8);
60931cf9 2576 vc->spare_refs[0] = cpu_to_be32(0xffffffff);
2577 vc->spare_refs[1] = cpu_to_be32(0xffffffff);
2578 vc->spare_refs[2] = cpu_to_be32(0xffffffff);
2579 vc->spare_refs[3] = cpu_to_be32(0xffffffff);
2580 vc->spare_refs[4] = cpu_to_be32(0xffffffff);
2581 vc->spare_refs[5] = cpu_to_be32(0xffffffff);
2582 vc->spare_refs[6] = cpu_to_be32(0xffffffff);
2583 vc->spare_refs[7] = cpu_to_be32(0xffffffff);
5f8097be
NB
2584 memset(vc->cache_pol, 0, 8);
2585 vc->bg_rate = 0x80;
2586 memset(vc->pad2, 0xff, 3);
2587 memset(vc->pad3, 0xff, 52);
2588 memset(vc->pad4, 0xff, 192);
2589 memset(vc->v0, 0xff, 32);
2590 memset(vc->v1, 0xff, 32);
2591 memset(vc->v2, 0xff, 16);
2592 memset(vc->v3, 0xff, 16);
2593 memset(vc->vendor, 0xff, 32);
598f0d58 2594
8c3b8c2c 2595 memset(vc->phys_refnum, 0xff, 4*ddf->mppe);
e5a2a3cf 2596 memset(vc->phys_refnum+ddf->mppe, 0x00, 8*ddf->mppe);
5f8097be 2597
5aaf6c7b 2598 for (i = 1; i < vc->sec_elmnt_count; i++) {
2599 memcpy(vcl->other_bvds[i-1], vc, ddf->conf_rec_len * 512);
2600 vcl->other_bvds[i-1]->sec_elmnt_seq = i;
2601 }
2602
5f8097be
NB
2603 vcl->next = ddf->conflist;
2604 ddf->conflist = vcl;
d2ca6449 2605 ddf->currentconf = vcl;
7d5a7ff3 2606 ddf_set_updates_pending(ddf);
5f8097be
NB
2607 return 1;
2608}
2609
63eb2454 2610
0e600426 2611#ifndef MDASSEMBLE
4441541f
N
2612static int get_svd_state(const struct ddf_super *, const struct vcl *);
2613
5f8097be
NB
2614static void add_to_super_ddf_bvd(struct supertype *st,
2615 mdu_disk_info_t *dk, int fd, char *devname)
2616{
2617 /* fd and devname identify a device with-in the ddf container (st).
2618 * dk identifies a location in the new BVD.
2619 * We need to find suitable free space in that device and update
2620 * the phys_refnum and lba_offset for the newly created vd_config.
2621 * We might also want to update the type in the phys_disk
5575e7d9 2622 * section.
8592f29d
N
2623 *
2624 * Alternately: fd == -1 and we have already chosen which device to
2625 * use and recorded in dlist->raid_disk;
5f8097be
NB
2626 */
2627 struct dl *dl;
2628 struct ddf_super *ddf = st->sb;
2629 struct vd_config *vc;
f21e18ca 2630 unsigned int i;
59e36268
NB
2631 unsigned long long blocks, pos, esize;
2632 struct extent *ex;
475ccbdb 2633 unsigned int raid_disk = dk->raid_disk;
5f8097be 2634
8592f29d
N
2635 if (fd == -1) {
2636 for (dl = ddf->dlist; dl ; dl = dl->next)
2637 if (dl->raiddisk == dk->raid_disk)
2638 break;
2639 } else {
2640 for (dl = ddf->dlist; dl ; dl = dl->next)
2641 if (dl->major == dk->major &&
2642 dl->minor == dk->minor)
2643 break;
2644 }
5f8097be
NB
2645 if (!dl || ! (dk->state & (1<<MD_DISK_SYNC)))
2646 return;
2647
d2ca6449 2648 vc = &ddf->currentconf->conf;
475ccbdb 2649 if (vc->sec_elmnt_count > 1) {
a8173e43 2650 unsigned int n = be16_to_cpu(vc->prim_elmnt_count);
475ccbdb 2651 if (raid_disk >= n)
2652 vc = ddf->currentconf->other_bvds[raid_disk / n - 1];
2653 raid_disk %= n;
2654 }
59e36268
NB
2655
2656 ex = get_extents(ddf, dl);
2657 if (!ex)
2658 return;
2659
2660 i = 0; pos = 0;
9d0c6b70 2661 blocks = be64_to_cpu(vc->blocks);
d2ca6449
NB
2662 if (ddf->currentconf->block_sizes)
2663 blocks = ddf->currentconf->block_sizes[dk->raid_disk];
59e36268
NB
2664
2665 do {
2666 esize = ex[i].start - pos;
2667 if (esize >= blocks)
2668 break;
2669 pos = ex[i].start + ex[i].size;
2670 i++;
2671 } while (ex[i-1].size);
2672
2673 free(ex);
2674 if (esize < blocks)
2675 return;
2676
d2ca6449 2677 ddf->currentdev = dk->raid_disk;
475ccbdb 2678 vc->phys_refnum[raid_disk] = dl->disk.refnum;
9d0c6b70 2679 LBA_OFFSET(ddf, vc)[raid_disk] = cpu_to_be64(pos);
5f8097be 2680
f21e18ca 2681 for (i = 0; i < ddf->max_part ; i++)
5575e7d9
NB
2682 if (dl->vlist[i] == NULL)
2683 break;
2684 if (i == ddf->max_part)
2685 return;
d2ca6449 2686 dl->vlist[i] = ddf->currentconf;
5f8097be 2687
8592f29d
N
2688 if (fd >= 0)
2689 dl->fd = fd;
2690 if (devname)
2691 dl->devname = devname;
7a7cc504 2692
63eb2454 2693 /* Check if we can mark array as optimal yet */
d2ca6449 2694 i = ddf->currentconf->vcnum;
63eb2454 2695 ddf->virt->entries[i].state =
2696 (ddf->virt->entries[i].state & ~DDF_state_mask)
2697 | get_svd_state(ddf, ddf->currentconf);
a8173e43 2698 be16_clear(ddf->phys->entries[dl->pdnum].type,
2699 cpu_to_be16(DDF_Global_Spare));
2700 be16_set(ddf->phys->entries[dl->pdnum].type,
2701 cpu_to_be16(DDF_Active_in_VD));
4f9bbe63 2702 dprintf("%s: added disk %d/%08x to VD %d/%s as disk %d\n",
60931cf9 2703 __func__, dl->pdnum, be32_to_cpu(dl->disk.refnum),
4f9bbe63 2704 ddf->currentconf->vcnum, guid_str(vc->guid),
2705 dk->raid_disk);
7d5a7ff3 2706 ddf_set_updates_pending(ddf);
5f8097be
NB
2707}
2708
4a3ca8ac 2709static unsigned int find_unused_pde(const struct ddf_super *ddf)
2710{
2711 unsigned int i;
a8173e43 2712 for (i = 0; i < be16_to_cpu(ddf->phys->max_pdes); i++) {
4a3ca8ac 2713 if (all_ff(ddf->phys->entries[i].guid))
2714 return i;
2715 }
2716 return DDF_NOTFOUND;
2717}
2718
105e6e93 2719static void _set_config_size(struct phys_disk_entry *pde, const struct dl *dl)
2720{
2721 __u64 cfs, t;
2722 cfs = min(dl->size - 32*1024*2ULL, be64_to_cpu(dl->primary_lba));
2723 t = be64_to_cpu(dl->secondary_lba);
2724 if (t != ~(__u64)0)
2725 cfs = min(cfs, t);
2726 /*
2727 * Some vendor DDF structures interpret workspace_lba
2728 * very differently then us. Make a sanity check on the value.
2729 */
2730 t = be64_to_cpu(dl->workspace_lba);
2731 if (t < cfs) {
2732 __u64 wsp = cfs - t;
2733 if (wsp > 1024*1024*2ULL && wsp > dl->size / 16) {
2734 pr_err("%s: %x:%x: workspace size 0x%llx too big, ignoring\n",
2735 __func__, dl->major, dl->minor, wsp);
2736 } else
2737 cfs = t;
2738 }
2739 pde->config_size = cpu_to_be64(cfs);
2740 dprintf("%s: %x:%x config_size %llx, DDF structure is %llx blocks\n",
2741 __func__, dl->major, dl->minor, cfs, dl->size-cfs);
2742}
2743
a322f70c
DW
2744/* add a device to a container, either while creating it or while
2745 * expanding a pre-existing container
2746 */
f20c3968 2747static int add_to_super_ddf(struct supertype *st,
72ca9bcf
N
2748 mdu_disk_info_t *dk, int fd, char *devname,
2749 unsigned long long data_offset)
a322f70c
DW
2750{
2751 struct ddf_super *ddf = st->sb;
2752 struct dl *dd;
2753 time_t now;
2754 struct tm *tm;
2755 unsigned long long size;
2756 struct phys_disk_entry *pde;
f21e18ca 2757 unsigned int n, i;
a322f70c 2758 struct stat stb;
90fa1a29 2759 __u32 *tptr;
a322f70c 2760
78e44928
NB
2761 if (ddf->currentconf) {
2762 add_to_super_ddf_bvd(st, dk, fd, devname);
f20c3968 2763 return 0;
78e44928
NB
2764 }
2765
a322f70c
DW
2766 /* This is device numbered dk->number. We need to create
2767 * a phys_disk entry and a more detailed disk_data entry.
2768 */
2769 fstat(fd, &stb);
4a3ca8ac 2770 n = find_unused_pde(ddf);
2771 if (n == DDF_NOTFOUND) {
2772 pr_err("%s: No free slot in array, cannot add disk\n",
2773 __func__);
2774 return 1;
2775 }
2776 pde = &ddf->phys->entries[n];
4ee8cca9 2777 get_dev_size(fd, NULL, &size);
2778 if (size <= 32*1024*1024) {
2779 pr_err("%s: device size must be at least 32MB\n",
2780 __func__);
2781 return 1;
2782 }
2783 size >>= 9;
4a3ca8ac 2784
3d2c4fc7
DW
2785 if (posix_memalign((void**)&dd, 512,
2786 sizeof(*dd) + sizeof(dd->vlist[0]) * ddf->max_part) != 0) {
e7b84f9d
N
2787 pr_err("%s could allocate buffer for new disk, aborting\n",
2788 __func__);
f20c3968 2789 return 1;
3d2c4fc7 2790 }
a322f70c
DW
2791 dd->major = major(stb.st_rdev);
2792 dd->minor = minor(stb.st_rdev);
2793 dd->devname = devname;
a322f70c 2794 dd->fd = fd;
b2280677 2795 dd->spare = NULL;
a322f70c
DW
2796
2797 dd->disk.magic = DDF_PHYS_DATA_MAGIC;
2798 now = time(0);
2799 tm = localtime(&now);
2800 sprintf(dd->disk.guid, "%8s%04d%02d%02d",
2801 T10, tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday);
90fa1a29
JS
2802 tptr = (__u32 *)(dd->disk.guid + 16);
2803 *tptr++ = random32();
2804 *tptr = random32();
a322f70c 2805
59e36268
NB
2806 do {
2807 /* Cannot be bothered finding a CRC of some irrelevant details*/
60931cf9 2808 dd->disk.refnum._v32 = random32();
a8173e43 2809 for (i = be16_to_cpu(ddf->active->max_pd_entries);
f21e18ca 2810 i > 0; i--)
60931cf9 2811 if (be32_eq(ddf->phys->entries[i-1].refnum,
2812 dd->disk.refnum))
59e36268 2813 break;
f21e18ca 2814 } while (i > 0);
59e36268 2815
a322f70c
DW
2816 dd->disk.forced_ref = 1;
2817 dd->disk.forced_guid = 1;
2818 memset(dd->disk.vendor, ' ', 32);
2819 memcpy(dd->disk.vendor, "Linux", 5);
2820 memset(dd->disk.pad, 0xff, 442);
b2280677 2821 for (i = 0; i < ddf->max_part ; i++)
a322f70c
DW
2822 dd->vlist[i] = NULL;
2823
5575e7d9
NB
2824 dd->pdnum = n;
2825
2cc2983d
N
2826 if (st->update_tail) {
2827 int len = (sizeof(struct phys_disk) +
2828 sizeof(struct phys_disk_entry));
2829 struct phys_disk *pd;
2830
503975b9 2831 pd = xmalloc(len);
2cc2983d 2832 pd->magic = DDF_PHYS_RECORDS_MAGIC;
a8173e43 2833 pd->used_pdes = cpu_to_be16(n);
2cc2983d
N
2834 pde = &pd->entries[0];
2835 dd->mdupdate = pd;
4a3ca8ac 2836 } else
a8173e43 2837 ddf->phys->used_pdes = cpu_to_be16(
2838 1 + be16_to_cpu(ddf->phys->used_pdes));
a322f70c
DW
2839
2840 memcpy(pde->guid, dd->disk.guid, DDF_GUID_LEN);
2841 pde->refnum = dd->disk.refnum;
a8173e43 2842 pde->type = cpu_to_be16(DDF_Forced_PD_GUID | DDF_Global_Spare);
2843 pde->state = cpu_to_be16(DDF_Online);
4ee8cca9 2844 dd->size = size;
2845 /*
2846 * If there is already a device in dlist, try to reserve the same
2847 * amount of workspace. Otherwise, use 32MB.
2848 * We checked disk size above already.
2849 */
2850#define __calc_lba(new, old, lba, mb) do { \
2851 unsigned long long dif; \
2852 if ((old) != NULL) \
9d0c6b70 2853 dif = (old)->size - be64_to_cpu((old)->lba); \
4ee8cca9 2854 else \
2855 dif = (new)->size; \
2856 if ((new)->size > dif) \
9d0c6b70 2857 (new)->lba = cpu_to_be64((new)->size - dif); \
4ee8cca9 2858 else \
9d0c6b70 2859 (new)->lba = cpu_to_be64((new)->size - (mb*1024*2)); \
4ee8cca9 2860 } while (0)
2861 __calc_lba(dd, ddf->dlist, workspace_lba, 32);
2862 __calc_lba(dd, ddf->dlist, primary_lba, 16);
b95cb4b9
N
2863 if (ddf->dlist == NULL ||
2864 be64_to_cpu(ddf->dlist->secondary_lba) != ~(__u64)0)
2865 __calc_lba(dd, ddf->dlist, secondary_lba, 32);
105e6e93 2866 _set_config_size(pde, dd);
4ee8cca9 2867
a322f70c
DW
2868 sprintf(pde->path, "%17.17s","Information: nil") ;
2869 memset(pde->pad, 0xff, 6);
2870
2cc2983d
N
2871 if (st->update_tail) {
2872 dd->next = ddf->add_list;
2873 ddf->add_list = dd;
2874 } else {
2875 dd->next = ddf->dlist;
2876 ddf->dlist = dd;
7d5a7ff3 2877 ddf_set_updates_pending(ddf);
2cc2983d 2878 }
f20c3968
DW
2879
2880 return 0;
a322f70c
DW
2881}
2882
4dd968cc
N
2883static int remove_from_super_ddf(struct supertype *st, mdu_disk_info_t *dk)
2884{
2885 struct ddf_super *ddf = st->sb;
2886 struct dl *dl;
2887
2888 /* mdmon has noticed that this disk (dk->major/dk->minor) has
2889 * disappeared from the container.
2890 * We need to arrange that it disappears from the metadata and
2891 * internal data structures too.
2892 * Most of the work is done by ddf_process_update which edits
2893 * the metadata and closes the file handle and attaches the memory
2894 * where free_updates will free it.
2895 */
2896 for (dl = ddf->dlist; dl ; dl = dl->next)
2897 if (dl->major == dk->major &&
2898 dl->minor == dk->minor)
2899 break;
2900 if (!dl)
2901 return -1;
2902
2903 if (st->update_tail) {
2904 int len = (sizeof(struct phys_disk) +
2905 sizeof(struct phys_disk_entry));
2906 struct phys_disk *pd;
2907
503975b9 2908 pd = xmalloc(len);
4dd968cc 2909 pd->magic = DDF_PHYS_RECORDS_MAGIC;
a8173e43 2910 pd->used_pdes = cpu_to_be16(dl->pdnum);
2911 pd->entries[0].state = cpu_to_be16(DDF_Missing);
4dd968cc
N
2912 append_metadata_update(st, pd, len);
2913 }
2914 return 0;
2915}
4441541f 2916#endif
4dd968cc 2917
a322f70c
DW
2918/*
2919 * This is the write_init_super method for a ddf container. It is
2920 * called when creating a container or adding another device to a
2921 * container.
2922 */
42d5dfd9 2923#define NULL_CONF_SZ 4096
18a2f463 2924
3921e41a 2925static int __write_ddf_structure(struct dl *d, struct ddf_super *ddf, __u8 type)
a322f70c 2926{
7f798aca 2927 unsigned long long sector;
2928 struct ddf_header *header;
3921e41a 2929 int fd, i, n_config, conf_size, buf_size;
a4057a88 2930 int ret = 0;
3921e41a 2931 char *conf;
8e9387ac 2932
7f798aca 2933 fd = d->fd;
2934
2935 switch (type) {
2936 case DDF_HEADER_PRIMARY:
2937 header = &ddf->primary;
9d0c6b70 2938 sector = be64_to_cpu(header->primary_lba);
7f798aca 2939 break;
2940 case DDF_HEADER_SECONDARY:
2941 header = &ddf->secondary;
9d0c6b70 2942 sector = be64_to_cpu(header->secondary_lba);
7f798aca 2943 break;
2944 default:
2945 return 0;
2946 }
b95cb4b9
N
2947 if (sector == ~(__u64)0)
2948 return 0;
7f798aca 2949
2950 header->type = type;
a4057a88 2951 header->openflag = 1;
7f798aca 2952 header->crc = calc_crc(header, 512);
2953
2954 lseek64(fd, sector<<9, 0);
2955 if (write(fd, header, 512) < 0)
a4057a88 2956 goto out;
7f798aca 2957
2958 ddf->controller.crc = calc_crc(&ddf->controller, 512);
2959 if (write(fd, &ddf->controller, 512) < 0)
a4057a88 2960 goto out;
a322f70c 2961
7f798aca 2962 ddf->phys->crc = calc_crc(ddf->phys, ddf->pdsize);
2963 if (write(fd, ddf->phys, ddf->pdsize) < 0)
a4057a88 2964 goto out;
7f798aca 2965 ddf->virt->crc = calc_crc(ddf->virt, ddf->vdsize);
2966 if (write(fd, ddf->virt, ddf->vdsize) < 0)
a4057a88 2967 goto out;
7f798aca 2968
2969 /* Now write lots of config records. */
2970 n_config = ddf->max_part;
2971 conf_size = ddf->conf_rec_len * 512;
3921e41a
N
2972 conf = ddf->conf;
2973 buf_size = conf_size * (n_config + 1);
2974 if (!conf) {
2975 if (posix_memalign((void**)&conf, 512, buf_size) != 0)
2976 goto out;
2977 ddf->conf = conf;
2978 }
7f798aca 2979 for (i = 0 ; i <= n_config ; i++) {
e3c2a365 2980 struct vcl *c;
2981 struct vd_config *vdc = NULL;
2982 if (i == n_config) {
7f798aca 2983 c = (struct vcl *)d->spare;
e3c2a365 2984 if (c)
2985 vdc = &c->conf;
2986 } else {
2987 unsigned int dummy;
2988 c = d->vlist[i];
2989 if (c)
2990 get_pd_index_from_refnum(
2991 c, d->disk.refnum,
2992 ddf->mppe,
2993 (const struct vd_config **)&vdc,
2994 &dummy);
2995 }
7f798aca 2996 if (c) {
be9b9ef4 2997 dprintf("writing conf record %i on disk %08x for %s/%u\n",
60931cf9 2998 i, be32_to_cpu(d->disk.refnum),
ad60eea1 2999 guid_str(vdc->guid),
be9b9ef4 3000 vdc->sec_elmnt_seq);
dacf3dc5 3001 vdc->seqnum = header->seq;
e3c2a365 3002 vdc->crc = calc_crc(vdc, conf_size);
3921e41a 3003 memcpy(conf + i*conf_size, vdc, conf_size);
ce45c819 3004 } else
3921e41a 3005 memset(conf + i*conf_size, 0xff, conf_size);
7f798aca 3006 }
3921e41a 3007 if (write(fd, conf, buf_size) != buf_size)
a4057a88 3008 goto out;
7f798aca 3009
3010 d->disk.crc = calc_crc(&d->disk, 512);
3011 if (write(fd, &d->disk, 512) < 0)
a4057a88 3012 goto out;
7f798aca 3013
a4057a88 3014 ret = 1;
3015out:
3016 header->openflag = 0;
3017 header->crc = calc_crc(header, 512);
3018
3019 lseek64(fd, sector<<9, 0);
3020 if (write(fd, header, 512) < 0)
3021 ret = 0;
3022
3023 return ret;
7f798aca 3024}
3025
3921e41a 3026static int _write_super_to_disk(struct ddf_super *ddf, struct dl *d)
9bf38704 3027{
3028 unsigned long long size;
3029 int fd = d->fd;
3030 if (fd < 0)
3031 return 0;
3032
3033 /* We need to fill in the primary, (secondary) and workspace
3034 * lba's in the headers, set their checksums,
3035 * Also checksum phys, virt....
3036 *
3037 * Then write everything out, finally the anchor is written.
3038 */
3039 get_dev_size(fd, NULL, &size);
3040 size /= 512;
9d0c6b70 3041 if (be64_to_cpu(d->workspace_lba) != 0ULL)
9bf38704 3042 ddf->anchor.workspace_lba = d->workspace_lba;
3043 else
3044 ddf->anchor.workspace_lba =
9d0c6b70 3045 cpu_to_be64(size - 32*1024*2);
3046 if (be64_to_cpu(d->primary_lba) != 0ULL)
9bf38704 3047 ddf->anchor.primary_lba = d->primary_lba;
3048 else
3049 ddf->anchor.primary_lba =
9d0c6b70 3050 cpu_to_be64(size - 16*1024*2);
3051 if (be64_to_cpu(d->secondary_lba) != 0ULL)
9bf38704 3052 ddf->anchor.secondary_lba = d->secondary_lba;
3053 else
3054 ddf->anchor.secondary_lba =
9d0c6b70 3055 cpu_to_be64(size - 32*1024*2);
9bf38704 3056 ddf->anchor.seq = ddf->active->seq;
3057 memcpy(&ddf->primary, &ddf->anchor, 512);
3058 memcpy(&ddf->secondary, &ddf->anchor, 512);
3059
3060 ddf->anchor.openflag = 0xFF; /* 'open' means nothing */
60931cf9 3061 ddf->anchor.seq = cpu_to_be32(0xFFFFFFFF); /* no sequencing in anchor */
9bf38704 3062 ddf->anchor.crc = calc_crc(&ddf->anchor, 512);
3063
3921e41a 3064 if (!__write_ddf_structure(d, ddf, DDF_HEADER_PRIMARY))
9bf38704 3065 return 0;
3066
3921e41a 3067 if (!__write_ddf_structure(d, ddf, DDF_HEADER_SECONDARY))
9bf38704 3068 return 0;
3069
3070 lseek64(fd, (size-1)*512, SEEK_SET);
3071 if (write(fd, &ddf->anchor, 512) < 0)
3072 return 0;
3073
3074 return 1;
3075}
3076
4441541f 3077#ifndef MDASSEMBLE
3921e41a 3078static int __write_init_super_ddf(struct supertype *st)
7f798aca 3079{
a322f70c 3080 struct ddf_super *ddf = st->sb;
a322f70c 3081 struct dl *d;
175593bf
DW
3082 int attempts = 0;
3083 int successes = 0;
42d5dfd9 3084
7d5a7ff3 3085 pr_state(ddf, __func__);
a322f70c 3086
175593bf
DW
3087 /* try to write updated metadata,
3088 * if we catch a failure move on to the next disk
3089 */
a322f70c 3090 for (d = ddf->dlist; d; d=d->next) {
175593bf 3091 attempts++;
3921e41a 3092 successes += _write_super_to_disk(ddf, d);
175593bf
DW
3093 }
3094
175593bf 3095 return attempts != successes;
a322f70c 3096}
7a7cc504
NB
3097
3098static int write_init_super_ddf(struct supertype *st)
3099{
9b1fb677
DW
3100 struct ddf_super *ddf = st->sb;
3101 struct vcl *currentconf = ddf->currentconf;
3102
3103 /* we are done with currentconf reset it to point st at the container */
3104 ddf->currentconf = NULL;
edd8d13c
NB
3105
3106 if (st->update_tail) {
3107 /* queue the virtual_disk and vd_config as metadata updates */
3108 struct virtual_disk *vd;
3109 struct vd_config *vc;
c5943560 3110 int len, tlen;
3111 unsigned int i;
edd8d13c 3112
9b1fb677 3113 if (!currentconf) {
2cc2983d
N
3114 int len = (sizeof(struct phys_disk) +
3115 sizeof(struct phys_disk_entry));
3116
3117 /* adding a disk to the container. */
3118 if (!ddf->add_list)
3119 return 0;
3120
3121 append_metadata_update(st, ddf->add_list->mdupdate, len);
3122 ddf->add_list->mdupdate = NULL;
3123 return 0;
3124 }
3125
3126 /* Newly created VD */
3127
edd8d13c
NB
3128 /* First the virtual disk. We have a slightly fake header */
3129 len = sizeof(struct virtual_disk) + sizeof(struct virtual_entry);
503975b9 3130 vd = xmalloc(len);
edd8d13c 3131 *vd = *ddf->virt;
9b1fb677 3132 vd->entries[0] = ddf->virt->entries[currentconf->vcnum];
a8173e43 3133 vd->populated_vdes = cpu_to_be16(currentconf->vcnum);
edd8d13c
NB
3134 append_metadata_update(st, vd, len);
3135
3136 /* Then the vd_config */
3137 len = ddf->conf_rec_len * 512;
c5943560 3138 tlen = len * currentconf->conf.sec_elmnt_count;
3139 vc = xmalloc(tlen);
9b1fb677 3140 memcpy(vc, &currentconf->conf, len);
c5943560 3141 for (i = 1; i < currentconf->conf.sec_elmnt_count; i++)
3142 memcpy((char *)vc + i*len, currentconf->other_bvds[i-1],
3143 len);
3144 append_metadata_update(st, vc, tlen);
edd8d13c
NB
3145
3146 /* FIXME I need to close the fds! */
3147 return 0;
613b0d17 3148 } else {
d682f344 3149 struct dl *d;
19041058 3150 if (!currentconf)
3151 for (d = ddf->dlist; d; d=d->next)
3152 while (Kill(d->devname, NULL, 0, -1, 1) == 0);
3921e41a 3153 return __write_init_super_ddf(st);
d682f344 3154 }
7a7cc504
NB
3155}
3156
a322f70c
DW
3157#endif
3158
387fcd59
N
3159static __u64 avail_size_ddf(struct supertype *st, __u64 devsize,
3160 unsigned long long data_offset)
a322f70c
DW
3161{
3162 /* We must reserve the last 32Meg */
3163 if (devsize <= 32*1024*2)
3164 return 0;
3165 return devsize - 32*1024*2;
3166}
3167
3168#ifndef MDASSEMBLE
8592f29d
N
3169
3170static int reserve_space(struct supertype *st, int raiddisks,
3171 unsigned long long size, int chunk,
3172 unsigned long long *freesize)
3173{
3174 /* Find 'raiddisks' spare extents at least 'size' big (but
3175 * only caring about multiples of 'chunk') and remember
3176 * them.
3177 * If the cannot be found, fail.
3178 */
3179 struct dl *dl;
3180 struct ddf_super *ddf = st->sb;
3181 int cnt = 0;
3182
3183 for (dl = ddf->dlist; dl ; dl=dl->next) {
613b0d17 3184 dl->raiddisk = -1;
8592f29d
N
3185 dl->esize = 0;
3186 }
3187 /* Now find largest extent on each device */
3188 for (dl = ddf->dlist ; dl ; dl=dl->next) {
3189 struct extent *e = get_extents(ddf, dl);
3190 unsigned long long pos = 0;
3191 int i = 0;
3192 int found = 0;
3193 unsigned long long minsize = size;
3194
3195 if (size == 0)
3196 minsize = chunk;
3197
3198 if (!e)
3199 continue;
3200 do {
3201 unsigned long long esize;
3202 esize = e[i].start - pos;
3203 if (esize >= minsize) {
3204 found = 1;
3205 minsize = esize;
3206 }
3207 pos = e[i].start + e[i].size;
3208 i++;
3209 } while (e[i-1].size);
3210 if (found) {
3211 cnt++;
3212 dl->esize = minsize;
3213 }
3214 free(e);
3215 }
3216 if (cnt < raiddisks) {
e7b84f9d 3217 pr_err("not enough devices with space to create array.\n");
8592f29d
N
3218 return 0; /* No enough free spaces large enough */
3219 }
3220 if (size == 0) {
3221 /* choose the largest size of which there are at least 'raiddisk' */
3222 for (dl = ddf->dlist ; dl ; dl=dl->next) {
3223 struct dl *dl2;
3224 if (dl->esize <= size)
3225 continue;
3226 /* This is bigger than 'size', see if there are enough */
3227 cnt = 0;
7b80ad6a 3228 for (dl2 = ddf->dlist; dl2 ; dl2=dl2->next)
8592f29d
N
3229 if (dl2->esize >= dl->esize)
3230 cnt++;
3231 if (cnt >= raiddisks)
3232 size = dl->esize;
3233 }
3234 if (chunk) {
3235 size = size / chunk;
3236 size *= chunk;
3237 }
3238 *freesize = size;
3239 if (size < 32) {
e7b84f9d 3240 pr_err("not enough spare devices to create array.\n");
8592f29d
N
3241 return 0;
3242 }
3243 }
3244 /* We have a 'size' of which there are enough spaces.
3245 * We simply do a first-fit */
3246 cnt = 0;
3247 for (dl = ddf->dlist ; dl && cnt < raiddisks ; dl=dl->next) {
3248 if (dl->esize < size)
3249 continue;
613b0d17 3250
8592f29d
N
3251 dl->raiddisk = cnt;
3252 cnt++;
3253 }
3254 return 1;
3255}
3256
2c514b71
NB
3257static int
3258validate_geometry_ddf_container(struct supertype *st,
3259 int level, int layout, int raiddisks,
3260 int chunk, unsigned long long size,
af4348dd 3261 unsigned long long data_offset,
2c514b71
NB
3262 char *dev, unsigned long long *freesize,
3263 int verbose);
78e44928
NB
3264
3265static int validate_geometry_ddf_bvd(struct supertype *st,
3266 int level, int layout, int raiddisks,
c21e737b 3267 int *chunk, unsigned long long size,
af4348dd 3268 unsigned long long data_offset,
2c514b71
NB
3269 char *dev, unsigned long long *freesize,
3270 int verbose);
78e44928
NB
3271
3272static int validate_geometry_ddf(struct supertype *st,
2c514b71 3273 int level, int layout, int raiddisks,
c21e737b 3274 int *chunk, unsigned long long size,
af4348dd 3275 unsigned long long data_offset,
2c514b71
NB
3276 char *dev, unsigned long long *freesize,
3277 int verbose)
a322f70c
DW
3278{
3279 int fd;
3280 struct mdinfo *sra;
3281 int cfd;
3282
3283 /* ddf potentially supports lots of things, but it depends on
3284 * what devices are offered (and maybe kernel version?)
3285 * If given unused devices, we will make a container.
3286 * If given devices in a container, we will make a BVD.
3287 * If given BVDs, we make an SVD, changing all the GUIDs in the process.
3288 */
3289
7ccc4cc4 3290 if (*chunk == UnSet)
bb7295f1
N
3291 *chunk = DEFAULT_CHUNK;
3292
542ef4ec 3293 if (level == -1000000) level = LEVEL_CONTAINER;
a322f70c 3294 if (level == LEVEL_CONTAINER) {
78e44928
NB
3295 /* Must be a fresh device to add to a container */
3296 return validate_geometry_ddf_container(st, level, layout,
7ccc4cc4 3297 raiddisks, *chunk,
af4348dd
N
3298 size, data_offset, dev,
3299 freesize,
2c514b71 3300 verbose);
5f8097be
NB
3301 }
3302
78e44928 3303 if (!dev) {
a3163bf0 3304 mdu_array_info_t array = {
3305 .level = level, .layout = layout,
3306 .raid_disks = raiddisks
3307 };
3308 struct vd_config conf;
3309 if (layout_md2ddf(&array, &conf) == -1) {
b42f577a 3310 if (verbose)
94b08b7c 3311 pr_err("DDF does not support level %d /layout %d arrays with %d disks\n",
3312 level, layout, raiddisks);
78e44928 3313 return 0;
b42f577a 3314 }
78e44928 3315 /* Should check layout? etc */
8592f29d
N
3316
3317 if (st->sb && freesize) {
3318 /* --create was given a container to create in.
3319 * So we need to check that there are enough
3320 * free spaces and return the amount of space.
3321 * We may as well remember which drives were
3322 * chosen so that add_to_super/getinfo_super
3323 * can return them.
3324 */
7ccc4cc4 3325 return reserve_space(st, raiddisks, size, *chunk, freesize);
8592f29d 3326 }
a322f70c 3327 return 1;
78e44928 3328 }
a322f70c 3329
8592f29d
N
3330 if (st->sb) {
3331 /* A container has already been opened, so we are
3332 * creating in there. Maybe a BVD, maybe an SVD.
3333 * Should make a distinction one day.
3334 */
3335 return validate_geometry_ddf_bvd(st, level, layout, raiddisks,
af4348dd
N
3336 chunk, size, data_offset, dev,
3337 freesize,
8592f29d
N
3338 verbose);
3339 }
78e44928
NB
3340 /* This is the first device for the array.
3341 * If it is a container, we read it in and do automagic allocations,
3342 * no other devices should be given.
3343 * Otherwise it must be a member device of a container, and we
3344 * do manual allocation.
3345 * Later we should check for a BVD and make an SVD.
a322f70c 3346 */
a322f70c
DW
3347 fd = open(dev, O_RDONLY|O_EXCL, 0);
3348 if (fd >= 0) {
4dd2df09 3349 sra = sysfs_read(fd, NULL, GET_VERSION);
a322f70c
DW
3350 close(fd);
3351 if (sra && sra->array.major_version == -1 &&
78e44928
NB
3352 strcmp(sra->text_version, "ddf") == 0) {
3353
3354 /* load super */
3355 /* find space for 'n' devices. */
3356 /* remember the devices */
3357 /* Somehow return the fact that we have enough */
a322f70c
DW
3358 }
3359
2c514b71 3360 if (verbose)
e7b84f9d
N
3361 pr_err("ddf: Cannot create this array "
3362 "on device %s - a container is required.\n",
3363 dev);
a322f70c
DW
3364 return 0;
3365 }
3366 if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
2c514b71 3367 if (verbose)
e7b84f9d 3368 pr_err("ddf: Cannot open %s: %s\n",
613b0d17 3369 dev, strerror(errno));
a322f70c
DW
3370 return 0;
3371 }
3372 /* Well, it is in use by someone, maybe a 'ddf' container. */
3373 cfd = open_container(fd);
3374 if (cfd < 0) {
3375 close(fd);
2c514b71 3376 if (verbose)
e7b84f9d 3377 pr_err("ddf: Cannot use %s: %s\n",
613b0d17 3378 dev, strerror(EBUSY));
a322f70c
DW
3379 return 0;
3380 }
4dd2df09 3381 sra = sysfs_read(cfd, NULL, GET_VERSION);
a322f70c
DW
3382 close(fd);
3383 if (sra && sra->array.major_version == -1 &&
3384 strcmp(sra->text_version, "ddf") == 0) {
3385 /* This is a member of a ddf container. Load the container
3386 * and try to create a bvd
3387 */
3388 struct ddf_super *ddf;
e1902a7b 3389 if (load_super_ddf_all(st, cfd, (void **)&ddf, NULL) == 0) {
5f8097be 3390 st->sb = ddf;
4dd2df09 3391 strcpy(st->container_devnm, fd2devnm(cfd));
a322f70c 3392 close(cfd);
78e44928 3393 return validate_geometry_ddf_bvd(st, level, layout,
a322f70c 3394 raiddisks, chunk, size,
af4348dd 3395 data_offset,
2c514b71
NB
3396 dev, freesize,
3397 verbose);
a322f70c
DW
3398 }
3399 close(cfd);
c42ec1ed
DW
3400 } else /* device may belong to a different container */
3401 return 0;
3402
a322f70c
DW
3403 return 1;
3404}
3405
2c514b71
NB
3406static int
3407validate_geometry_ddf_container(struct supertype *st,
3408 int level, int layout, int raiddisks,
3409 int chunk, unsigned long long size,
af4348dd 3410 unsigned long long data_offset,
2c514b71
NB
3411 char *dev, unsigned long long *freesize,
3412 int verbose)
a322f70c
DW
3413{
3414 int fd;
3415 unsigned long long ldsize;
3416
3417 if (level != LEVEL_CONTAINER)
3418 return 0;
3419 if (!dev)
3420 return 1;
3421
3422 fd = open(dev, O_RDONLY|O_EXCL, 0);
3423 if (fd < 0) {
2c514b71 3424 if (verbose)
e7b84f9d 3425 pr_err("ddf: Cannot open %s: %s\n",
613b0d17 3426 dev, strerror(errno));
a322f70c
DW
3427 return 0;
3428 }
3429 if (!get_dev_size(fd, dev, &ldsize)) {
3430 close(fd);
3431 return 0;
3432 }
3433 close(fd);
3434
387fcd59 3435 *freesize = avail_size_ddf(st, ldsize >> 9, INVALID_SECTORS);
ea17e7aa
N
3436 if (*freesize == 0)
3437 return 0;
a322f70c
DW
3438
3439 return 1;
3440}
3441
78e44928
NB
3442static int validate_geometry_ddf_bvd(struct supertype *st,
3443 int level, int layout, int raiddisks,
c21e737b 3444 int *chunk, unsigned long long size,
af4348dd 3445 unsigned long long data_offset,
2c514b71
NB
3446 char *dev, unsigned long long *freesize,
3447 int verbose)
a322f70c
DW
3448{
3449 struct stat stb;
3450 struct ddf_super *ddf = st->sb;
3451 struct dl *dl;
5f8097be
NB
3452 unsigned long long pos = 0;
3453 unsigned long long maxsize;
3454 struct extent *e;
3455 int i;
a322f70c 3456 /* ddf/bvd supports lots of things, but not containers */
b42f577a
N
3457 if (level == LEVEL_CONTAINER) {
3458 if (verbose)
e7b84f9d 3459 pr_err("DDF cannot create a container within an container\n");
a322f70c 3460 return 0;
b42f577a 3461 }
a322f70c
DW
3462 /* We must have the container info already read in. */
3463 if (!ddf)
3464 return 0;
3465
5f8097be
NB
3466 if (!dev) {
3467 /* General test: make sure there is space for
3468 * 'raiddisks' device extents of size 'size'.
3469 */
3470 unsigned long long minsize = size;
3471 int dcnt = 0;
3472 if (minsize == 0)
3473 minsize = 8;
3474 for (dl = ddf->dlist; dl ; dl = dl->next)
3475 {
3476 int found = 0;
7e1432fb 3477 pos = 0;
5f8097be
NB
3478
3479 i = 0;
3480 e = get_extents(ddf, dl);
3481 if (!e) continue;
3482 do {
3483 unsigned long long esize;
3484 esize = e[i].start - pos;
3485 if (esize >= minsize)
3486 found = 1;
3487 pos = e[i].start + e[i].size;
3488 i++;
3489 } while (e[i-1].size);
3490 if (found)
3491 dcnt++;
3492 free(e);
3493 }
3494 if (dcnt < raiddisks) {
2c514b71 3495 if (verbose)
e7b84f9d
N
3496 pr_err("ddf: Not enough devices with "
3497 "space for this array (%d < %d)\n",
3498 dcnt, raiddisks);
5f8097be
NB
3499 return 0;
3500 }
3501 return 1;
3502 }
a322f70c
DW
3503 /* This device must be a member of the set */
3504 if (stat(dev, &stb) < 0)
3505 return 0;
3506 if ((S_IFMT & stb.st_mode) != S_IFBLK)
3507 return 0;
3508 for (dl = ddf->dlist ; dl ; dl = dl->next) {
f21e18ca
N
3509 if (dl->major == (int)major(stb.st_rdev) &&
3510 dl->minor == (int)minor(stb.st_rdev))
a322f70c
DW
3511 break;
3512 }
5f8097be 3513 if (!dl) {
2c514b71 3514 if (verbose)
e7b84f9d 3515 pr_err("ddf: %s is not in the "
613b0d17
N
3516 "same DDF set\n",
3517 dev);
5f8097be
NB
3518 return 0;
3519 }
3520 e = get_extents(ddf, dl);
3521 maxsize = 0;
3522 i = 0;
3523 if (e) do {
613b0d17
N
3524 unsigned long long esize;
3525 esize = e[i].start - pos;
3526 if (esize >= maxsize)
3527 maxsize = esize;
3528 pos = e[i].start + e[i].size;
3529 i++;
3530 } while (e[i-1].size);
5f8097be 3531 *freesize = maxsize;
a322f70c
DW
3532 // FIXME here I am
3533
3534 return 1;
3535}
59e36268 3536
a322f70c 3537static int load_super_ddf_all(struct supertype *st, int fd,
e1902a7b 3538 void **sbp, char *devname)
a322f70c
DW
3539{
3540 struct mdinfo *sra;
3541 struct ddf_super *super;
3542 struct mdinfo *sd, *best = NULL;
3543 int bestseq = 0;
3544 int seq;
3545 char nm[20];
3546 int dfd;
3547
b526e52d 3548 sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
a322f70c
DW
3549 if (!sra)
3550 return 1;
3551 if (sra->array.major_version != -1 ||
3552 sra->array.minor_version != -2 ||
3553 strcmp(sra->text_version, "ddf") != 0)
3554 return 1;
3555
6416d527 3556 if (posix_memalign((void**)&super, 512, sizeof(*super)) != 0)
a322f70c 3557 return 1;
a2349791 3558 memset(super, 0, sizeof(*super));
a322f70c
DW
3559
3560 /* first, try each device, and choose the best ddf */
3561 for (sd = sra->devs ; sd ; sd = sd->next) {
3562 int rv;
3563 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
7a7cc504
NB
3564 dfd = dev_open(nm, O_RDONLY);
3565 if (dfd < 0)
a322f70c
DW
3566 return 2;
3567 rv = load_ddf_headers(dfd, super, NULL);
7a7cc504 3568 close(dfd);
a322f70c 3569 if (rv == 0) {
60931cf9 3570 seq = be32_to_cpu(super->active->seq);
a322f70c
DW
3571 if (super->active->openflag)
3572 seq--;
3573 if (!best || seq > bestseq) {
3574 bestseq = seq;
3575 best = sd;
3576 }
3577 }
3578 }
3579 if (!best)
3580 return 1;
3581 /* OK, load this ddf */
3582 sprintf(nm, "%d:%d", best->disk.major, best->disk.minor);
3583 dfd = dev_open(nm, O_RDONLY);
7a7cc504 3584 if (dfd < 0)
a322f70c
DW
3585 return 1;
3586 load_ddf_headers(dfd, super, NULL);
3587 load_ddf_global(dfd, super, NULL);
3588 close(dfd);
3589 /* Now we need the device-local bits */
3590 for (sd = sra->devs ; sd ; sd = sd->next) {
3d2c4fc7
DW
3591 int rv;
3592
a322f70c 3593 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
e1902a7b 3594 dfd = dev_open(nm, O_RDWR);
7a7cc504 3595 if (dfd < 0)
a322f70c 3596 return 2;
3d2c4fc7
DW
3597 rv = load_ddf_headers(dfd, super, NULL);
3598 if (rv == 0)
e1902a7b 3599 rv = load_ddf_local(dfd, super, NULL, 1);
3d2c4fc7
DW
3600 if (rv)
3601 return 1;
a322f70c 3602 }
33414a01 3603
a322f70c
DW
3604 *sbp = super;
3605 if (st->ss == NULL) {
78e44928 3606 st->ss = &super_ddf;
a322f70c
DW
3607 st->minor_version = 0;
3608 st->max_devs = 512;
3609 }
4dd2df09 3610 strcpy(st->container_devnm, fd2devnm(fd));
a322f70c
DW
3611 return 0;
3612}
2b959fbf
N
3613
3614static int load_container_ddf(struct supertype *st, int fd,
3615 char *devname)
3616{
3617 return load_super_ddf_all(st, fd, &st->sb, devname);
3618}
3619
0e600426 3620#endif /* MDASSEMBLE */
a322f70c 3621
a5c7adb3 3622static int check_secondary(const struct vcl *vc)
3623{
3624 const struct vd_config *conf = &vc->conf;
3625 int i;
3626
3627 /* The only DDF secondary RAID level md can support is
3628 * RAID 10, if the stripe sizes and Basic volume sizes
3629 * are all equal.
3630 * Other configurations could in theory be supported by exposing
3631 * the BVDs to user space and using device mapper for the secondary
3632 * mapping. So far we don't support that.
3633 */
3634
3635 __u64 sec_elements[4] = {0, 0, 0, 0};
3636#define __set_sec_seen(n) (sec_elements[(n)>>6] |= (1<<((n)&63)))
3637#define __was_sec_seen(n) ((sec_elements[(n)>>6] & (1<<((n)&63))) != 0)
3638
3639 if (vc->other_bvds == NULL) {
3640 pr_err("No BVDs for secondary RAID found\n");
3641 return -1;
3642 }
3643 if (conf->prl != DDF_RAID1) {
3644 pr_err("Secondary RAID level only supported for mirrored BVD\n");
3645 return -1;
3646 }
3647 if (conf->srl != DDF_2STRIPED && conf->srl != DDF_2SPANNED) {
3648 pr_err("Secondary RAID level %d is unsupported\n",
3649 conf->srl);
3650 return -1;
3651 }
3652 __set_sec_seen(conf->sec_elmnt_seq);
3653 for (i = 0; i < conf->sec_elmnt_count-1; i++) {
3654 const struct vd_config *bvd = vc->other_bvds[i];
3c48f7be 3655 if (bvd->sec_elmnt_seq == DDF_UNUSED_BVD)
c98567ba 3656 continue;
a5c7adb3 3657 if (bvd->srl != conf->srl) {
3658 pr_err("Inconsistent secondary RAID level across BVDs\n");
3659 return -1;
3660 }
3661 if (bvd->prl != conf->prl) {
3662 pr_err("Different RAID levels for BVDs are unsupported\n");
3663 return -1;
3664 }
a8173e43 3665 if (!be16_eq(bvd->prim_elmnt_count, conf->prim_elmnt_count)) {
a5c7adb3 3666 pr_err("All BVDs must have the same number of primary elements\n");
3667 return -1;
3668 }
3669 if (bvd->chunk_shift != conf->chunk_shift) {
3670 pr_err("Different strip sizes for BVDs are unsupported\n");
3671 return -1;
3672 }
9d0c6b70 3673 if (!be64_eq(bvd->array_blocks, conf->array_blocks)) {
a5c7adb3 3674 pr_err("Different BVD sizes are unsupported\n");
3675 return -1;
3676 }
3677 __set_sec_seen(bvd->sec_elmnt_seq);
3678 }
3679 for (i = 0; i < conf->sec_elmnt_count; i++) {
3680 if (!__was_sec_seen(i)) {
3681 pr_err("BVD %d is missing\n", i);
3682 return -1;
3683 }
3684 }
3685 return 0;
3686}
3687
8a38db86 3688static unsigned int get_pd_index_from_refnum(const struct vcl *vc,
60931cf9 3689 be32 refnum, unsigned int nmax,
4e587018 3690 const struct vd_config **bvd,
3691 unsigned int *idx)
8a38db86 3692{
4e587018 3693 unsigned int i, j, n, sec, cnt;
3694
a8173e43 3695 cnt = be16_to_cpu(vc->conf.prim_elmnt_count);
4e587018 3696 sec = (vc->conf.sec_elmnt_count == 1 ? 0 : vc->conf.sec_elmnt_seq);
3697
3698 for (i = 0, j = 0 ; i < nmax ; i++) {
3699 /* j counts valid entries for this BVD */
60931cf9 3700 if (be32_to_cpu(vc->conf.phys_refnum[i]) != 0xffffffff)
4e587018 3701 j++;
60931cf9 3702 if (be32_eq(vc->conf.phys_refnum[i], refnum)) {
4e587018 3703 *bvd = &vc->conf;
3704 *idx = i;
3705 return sec * cnt + j - 1;
3706 }
3707 }
3708 if (vc->other_bvds == NULL)
3709 goto bad;
3710
3711 for (n = 1; n < vc->conf.sec_elmnt_count; n++) {
3712 struct vd_config *vd = vc->other_bvds[n-1];
4e587018 3713 sec = vd->sec_elmnt_seq;
3c48f7be 3714 if (sec == DDF_UNUSED_BVD)
3715 continue;
4e587018 3716 for (i = 0, j = 0 ; i < nmax ; i++) {
60931cf9 3717 if (be32_to_cpu(vd->phys_refnum[i]) != 0xffffffff)
4e587018 3718 j++;
60931cf9 3719 if (be32_eq(vd->phys_refnum[i], refnum)) {
4e587018 3720 *bvd = vd;
3721 *idx = i;
3722 return sec * cnt + j - 1;
3723 }
3724 }
3725 }
3726bad:
3727 *bvd = NULL;
d6e7b083 3728 return DDF_NOTFOUND;
8a38db86 3729}
3730
00bbdbda 3731static struct mdinfo *container_content_ddf(struct supertype *st, char *subarray)
598f0d58
NB
3732{
3733 /* Given a container loaded by load_super_ddf_all,
3734 * extract information about all the arrays into
3735 * an mdinfo tree.
3736 *
3737 * For each vcl in conflist: create an mdinfo, fill it in,
3738 * then look for matching devices (phys_refnum) in dlist
3739 * and create appropriate device mdinfo.
3740 */
3741 struct ddf_super *ddf = st->sb;
3742 struct mdinfo *rest = NULL;
3743 struct vcl *vc;
3744
3745 for (vc = ddf->conflist ; vc ; vc=vc->next)
3746 {
f21e18ca 3747 unsigned int i;
598f0d58 3748 struct mdinfo *this;
00bbdbda 3749 char *ep;
90fa1a29 3750 __u32 *cptr;
8a38db86 3751 unsigned int pd;
00bbdbda
N
3752
3753 if (subarray &&
3754 (strtoul(subarray, &ep, 10) != vc->vcnum ||
3755 *ep != '\0'))
3756 continue;
3757
a5c7adb3 3758 if (vc->conf.sec_elmnt_count > 1) {
3759 if (check_secondary(vc) != 0)
3760 continue;
3761 }
3762
503975b9 3763 this = xcalloc(1, sizeof(*this));
598f0d58
NB
3764 this->next = rest;
3765 rest = this;
3766
8a2848a7 3767 if (layout_ddf2md(&vc->conf, &this->array))
3768 continue;
598f0d58 3769 this->array.md_minor = -1;
f35f2525
N
3770 this->array.major_version = -1;
3771 this->array.minor_version = -2;
5684fff6 3772 this->safe_mode_delay = DDF_SAFE_MODE_DELAY;
90fa1a29
JS
3773 cptr = (__u32 *)(vc->conf.guid + 16);
3774 this->array.ctime = DECADE + __be32_to_cpu(*cptr);
598f0d58 3775 this->array.utime = DECADE +
60931cf9 3776 be32_to_cpu(vc->conf.timestamp);
598f0d58
NB
3777 this->array.chunk_size = 512 << vc->conf.chunk_shift;
3778
59e36268 3779 i = vc->vcnum;
7a7cc504
NB
3780 if ((ddf->virt->entries[i].state & DDF_state_inconsistent) ||
3781 (ddf->virt->entries[i].init_state & DDF_initstate_mask) !=
ed9d66aa 3782 DDF_init_full) {
598f0d58 3783 this->array.state = 0;
ed9d66aa
NB
3784 this->resync_start = 0;
3785 } else {
598f0d58 3786 this->array.state = 1;
b7528a20 3787 this->resync_start = MaxSector;
ed9d66aa 3788 }
8bf989d8 3789 _ddf_array_name(this->name, ddf, i);
598f0d58 3790 memset(this->uuid, 0, sizeof(this->uuid));
9d0c6b70 3791 this->component_size = be64_to_cpu(vc->conf.blocks);
598f0d58 3792 this->array.size = this->component_size / 2;
5f2aace8 3793 this->container_member = i;
598f0d58 3794
c5afc314
N
3795 ddf->currentconf = vc;
3796 uuid_from_super_ddf(st, this->uuid);
f646805e 3797 if (!subarray)
3798 ddf->currentconf = NULL;
c5afc314 3799
60f18132 3800 sprintf(this->text_version, "/%s/%d",
4dd2df09 3801 st->container_devnm, this->container_member);
60f18132 3802
a8173e43 3803 for (pd = 0; pd < be16_to_cpu(ddf->phys->used_pdes); pd++) {
598f0d58
NB
3804 struct mdinfo *dev;
3805 struct dl *d;
4e587018 3806 const struct vd_config *bvd;
3807 unsigned int iphys;
fa033bec 3808 int stt;
598f0d58 3809
60931cf9 3810 if (be32_to_cpu(ddf->phys->entries[pd].refnum)
3811 == 0xFFFFFFFF)
bc17324f 3812 continue;
0cf5ef67 3813
a8173e43 3814 stt = be16_to_cpu(ddf->phys->entries[pd].state);
fa033bec
N
3815 if ((stt & (DDF_Online|DDF_Failed|DDF_Rebuilding))
3816 != DDF_Online)
3817 continue;
3818
8a38db86 3819 i = get_pd_index_from_refnum(
4e587018 3820 vc, ddf->phys->entries[pd].refnum,
3821 ddf->mppe, &bvd, &iphys);
d6e7b083 3822 if (i == DDF_NOTFOUND)
8a38db86 3823 continue;
3824
fa033bec 3825 this->array.working_disks++;
bc17324f 3826
0cf5ef67 3827 for (d = ddf->dlist; d ; d=d->next)
60931cf9 3828 if (be32_eq(d->disk.refnum,
3829 ddf->phys->entries[pd].refnum))
0cf5ef67
N
3830 break;
3831 if (d == NULL)
3832 /* Haven't found that one yet, maybe there are others */
3833 continue;
3834
503975b9 3835 dev = xcalloc(1, sizeof(*dev));
598f0d58
NB
3836 dev->next = this->devs;
3837 this->devs = dev;
3838
60931cf9 3839 dev->disk.number = be32_to_cpu(d->disk.refnum);
598f0d58
NB
3840 dev->disk.major = d->major;
3841 dev->disk.minor = d->minor;
3842 dev->disk.raid_disk = i;
3843 dev->disk.state = (1<<MD_DISK_SYNC)|(1<<MD_DISK_ACTIVE);
d23534e4 3844 dev->recovery_start = MaxSector;
598f0d58 3845
eba2859f 3846 dev->events = be32_to_cpu(ddf->active->seq);
57a66662 3847 dev->data_offset =
9d0c6b70 3848 be64_to_cpu(LBA_OFFSET(ddf, bvd)[iphys]);
3849 dev->component_size = be64_to_cpu(bvd->blocks);
598f0d58
NB
3850 if (d->devname)
3851 strcpy(dev->name, d->devname);
3852 }
3853 }
3854 return rest;
3855}
3856
955e9ea1 3857static int store_super_ddf(struct supertype *st, int fd)
a322f70c 3858{
955e9ea1 3859 struct ddf_super *ddf = st->sb;
a322f70c 3860 unsigned long long dsize;
6416d527 3861 void *buf;
3d2c4fc7 3862 int rc;
a322f70c 3863
955e9ea1
DW
3864 if (!ddf)
3865 return 1;
3866
a322f70c
DW
3867 if (!get_dev_size(fd, NULL, &dsize))
3868 return 1;
3869
dbf98368 3870 if (ddf->dlist || ddf->conflist) {
3871 struct stat sta;
3872 struct dl *dl;
3873 int ofd, ret;
3874
3875 if (fstat(fd, &sta) == -1 || !S_ISBLK(sta.st_mode)) {
3876 pr_err("%s: file descriptor for invalid device\n",
3877 __func__);
3878 return 1;
3879 }
3880 for (dl = ddf->dlist; dl; dl = dl->next)
3881 if (dl->major == (int)major(sta.st_rdev) &&
3882 dl->minor == (int)minor(sta.st_rdev))
3883 break;
3884 if (!dl) {
3885 pr_err("%s: couldn't find disk %d/%d\n", __func__,
3886 (int)major(sta.st_rdev),
3887 (int)minor(sta.st_rdev));
3888 return 1;
3889 }
dbf98368 3890 ofd = dl->fd;
3891 dl->fd = fd;
3921e41a 3892 ret = (_write_super_to_disk(ddf, dl) != 1);
dbf98368 3893 dl->fd = ofd;
3894 return ret;
3895 }
3896
3d2c4fc7
DW
3897 if (posix_memalign(&buf, 512, 512) != 0)
3898 return 1;
6416d527
NB
3899 memset(buf, 0, 512);
3900
a322f70c 3901 lseek64(fd, dsize-512, 0);
3d2c4fc7 3902 rc = write(fd, buf, 512);
6416d527 3903 free(buf);
3d2c4fc7
DW
3904 if (rc < 0)
3905 return 1;
a322f70c
DW
3906 return 0;
3907}
3908
a19c88b8
NB
3909static int compare_super_ddf(struct supertype *st, struct supertype *tst)
3910{
3911 /*
3912 * return:
3913 * 0 same, or first was empty, and second was copied
3914 * 1 second had wrong number
3915 * 2 wrong uuid
3916 * 3 wrong other info
3917 */
3918 struct ddf_super *first = st->sb;
3919 struct ddf_super *second = tst->sb;
4eefd651 3920 struct dl *dl1, *dl2;
3921 struct vcl *vl1, *vl2;
2d210697 3922 unsigned int max_vds, max_pds, pd, vd;
a19c88b8
NB
3923
3924 if (!first) {
3925 st->sb = tst->sb;
3926 tst->sb = NULL;
3927 return 0;
3928 }
3929
3930 if (memcmp(first->anchor.guid, second->anchor.guid, DDF_GUID_LEN) != 0)
3931 return 2;
3932
2d210697 3933 if (first->max_part != second->max_part ||
a8173e43 3934 !be16_eq(first->phys->used_pdes, second->phys->used_pdes) ||
3935 !be16_eq(first->virt->populated_vdes,
3936 second->virt->populated_vdes)) {
2d210697 3937 dprintf("%s: PD/VD number mismatch\n", __func__);
3938 return 3;
3939 }
3940
a8173e43 3941 max_pds = be16_to_cpu(first->phys->used_pdes);
2d210697 3942 for (dl2 = second->dlist; dl2; dl2 = dl2->next) {
3943 for (pd = 0; pd < max_pds; pd++)
60931cf9 3944 if (be32_eq(first->phys->entries[pd].refnum,
3945 dl2->disk.refnum))
2d210697 3946 break;
3947 if (pd == max_pds) {
3948 dprintf("%s: no match for disk %08x\n", __func__,
60931cf9 3949 be32_to_cpu(dl2->disk.refnum));
2d210697 3950 return 3;
3951 }
3952 }
3953
a8173e43 3954 max_vds = be16_to_cpu(first->active->max_vd_entries);
2d210697 3955 for (vl2 = second->conflist; vl2; vl2 = vl2->next) {
60931cf9 3956 if (!be32_eq(vl2->conf.magic, DDF_VD_CONF_MAGIC))
2d210697 3957 continue;
3958 for (vd = 0; vd < max_vds; vd++)
3959 if (!memcmp(first->virt->entries[vd].guid,
3960 vl2->conf.guid, DDF_GUID_LEN))
3961 break;
3962 if (vd == max_vds) {
3963 dprintf("%s: no match for VD config\n", __func__);
3964 return 3;
3965 }
3966 }
a19c88b8 3967 /* FIXME should I look at anything else? */
2d210697 3968
4eefd651 3969 /*
3970 At this point we are fairly sure that the meta data matches.
3971 But the new disk may contain additional local data.
3972 Add it to the super block.
3973 */
3974 for (vl2 = second->conflist; vl2; vl2 = vl2->next) {
3975 for (vl1 = first->conflist; vl1; vl1 = vl1->next)
3976 if (!memcmp(vl1->conf.guid, vl2->conf.guid,
3977 DDF_GUID_LEN))
3978 break;
3979 if (vl1) {
3980 if (vl1->other_bvds != NULL &&
3981 vl1->conf.sec_elmnt_seq !=
3982 vl2->conf.sec_elmnt_seq) {
3983 dprintf("%s: adding BVD %u\n", __func__,
3984 vl2->conf.sec_elmnt_seq);
3985 add_other_bvd(vl1, &vl2->conf,
3986 first->conf_rec_len*512);
3987 }
3988 continue;
3989 }
3990
3991 if (posix_memalign((void **)&vl1, 512,
3992 (first->conf_rec_len*512 +
3993 offsetof(struct vcl, conf))) != 0) {
3994 pr_err("%s could not allocate vcl buf\n",
3995 __func__);
3996 return 3;
3997 }
3998
3999 vl1->next = first->conflist;
4000 vl1->block_sizes = NULL;
4eefd651 4001 memcpy(&vl1->conf, &vl2->conf, first->conf_rec_len*512);
3c48f7be 4002 if (alloc_other_bvds(first, vl1) != 0) {
4003 pr_err("%s could not allocate other bvds\n",
4004 __func__);
4005 free(vl1);
4006 return 3;
4007 }
4eefd651 4008 for (vd = 0; vd < max_vds; vd++)
4009 if (!memcmp(first->virt->entries[vd].guid,
4010 vl1->conf.guid, DDF_GUID_LEN))
4011 break;
4012 vl1->vcnum = vd;
4013 dprintf("%s: added config for VD %u\n", __func__, vl1->vcnum);
4014 first->conflist = vl1;
4015 }
4016
4017 for (dl2 = second->dlist; dl2; dl2 = dl2->next) {
4018 for (dl1 = first->dlist; dl1; dl1 = dl1->next)
60931cf9 4019 if (be32_eq(dl1->disk.refnum, dl2->disk.refnum))
4eefd651 4020 break;
4021 if (dl1)
4022 continue;
4023
4024 if (posix_memalign((void **)&dl1, 512,
4025 sizeof(*dl1) + (first->max_part) * sizeof(dl1->vlist[0]))
4026 != 0) {
4027 pr_err("%s could not allocate disk info buffer\n",
4028 __func__);
4029 return 3;
4030 }
4031 memcpy(dl1, dl2, sizeof(*dl1));
4032 dl1->mdupdate = NULL;
4033 dl1->next = first->dlist;
4034 dl1->fd = -1;
4035 for (pd = 0; pd < max_pds; pd++)
60931cf9 4036 if (be32_eq(first->phys->entries[pd].refnum,
4037 dl1->disk.refnum))
4eefd651 4038 break;
4039 dl1->pdnum = pd;
4040 if (dl2->spare) {
4041 if (posix_memalign((void **)&dl1->spare, 512,
4042 first->conf_rec_len*512) != 0) {
4043 pr_err("%s could not allocate spare info buf\n",
4044 __func__);
4045 return 3;
4046 }
4047 memcpy(dl1->spare, dl2->spare, first->conf_rec_len*512);
4048 }
4049 for (vd = 0 ; vd < first->max_part ; vd++) {
4050 if (!dl2->vlist[vd]) {
4051 dl1->vlist[vd] = NULL;
4052 continue;
4053 }
4054 for (vl1 = first->conflist; vl1; vl1 = vl1->next) {
4055 if (!memcmp(vl1->conf.guid,
4056 dl2->vlist[vd]->conf.guid,
4057 DDF_GUID_LEN))
4058 break;
4059 dl1->vlist[vd] = vl1;
4060 }
4061 }
4062 first->dlist = dl1;
4063 dprintf("%s: added disk %d: %08x\n", __func__, dl1->pdnum,
60931cf9 4064 be32_to_cpu(dl1->disk.refnum));
4eefd651 4065 }
4066
a19c88b8
NB
4067 return 0;
4068}
4069
0e600426 4070#ifndef MDASSEMBLE
4e5528c6
NB
4071/*
4072 * A new array 'a' has been started which claims to be instance 'inst'
4073 * within container 'c'.
4074 * We need to confirm that the array matches the metadata in 'c' so
4075 * that we don't corrupt any metadata.
4076 */
cba0191b 4077static int ddf_open_new(struct supertype *c, struct active_array *a, char *inst)
549e9569 4078{
a2aa439e 4079 struct ddf_super *ddf = c->sb;
4080 int n = atoi(inst);
5daa35ac 4081 struct mdinfo *dev;
4082 struct dl *dl;
4083 static const char faulty[] = "faulty";
4084
fb9d0acb 4085 if (all_ff(ddf->virt->entries[n].guid)) {
4086 pr_err("%s: subarray %d doesn't exist\n", __func__, n);
a2aa439e 4087 return -ENODEV;
4088 }
5daa35ac 4089 dprintf("%s: new subarray %d, GUID: %s\n", __func__, n,
4090 guid_str(ddf->virt->entries[n].guid));
4091 for (dev = a->info.devs; dev; dev = dev->next) {
4092 for (dl = ddf->dlist; dl; dl = dl->next)
4093 if (dl->major == dev->disk.major &&
4094 dl->minor == dev->disk.minor)
4095 break;
4096 if (!dl) {
4097 pr_err("%s: device %d/%d of subarray %d not found in meta data\n",
4098 __func__, dev->disk.major, dev->disk.minor, n);
4099 return -1;
4100 }
4101 if ((be16_to_cpu(ddf->phys->entries[dl->pdnum].state) &
4102 (DDF_Online|DDF_Missing|DDF_Failed)) != DDF_Online) {
4103 pr_err("%s: new subarray %d contains broken device %d/%d (%02x)\n",
4104 __func__, n, dl->major, dl->minor,
4105 be16_to_cpu(
4106 ddf->phys->entries[dl->pdnum].state));
4107 if (write(dev->state_fd, faulty, sizeof(faulty)-1) !=
4108 sizeof(faulty) - 1)
4109 pr_err("Write to state_fd failed\n");
4110 dev->curr_state = DS_FAULTY;
4111 }
4112 }
a2aa439e 4113 a->info.container_member = n;
549e9569
NB
4114 return 0;
4115}
4116
4e5528c6
NB
4117/*
4118 * The array 'a' is to be marked clean in the metadata.
ed9d66aa 4119 * If '->resync_start' is not ~(unsigned long long)0, then the array is only
4e5528c6
NB
4120 * clean up to the point (in sectors). If that cannot be recorded in the
4121 * metadata, then leave it as dirty.
4122 *
4123 * For DDF, we need to clear the DDF_state_inconsistent bit in the
4124 * !global! virtual_disk.virtual_entry structure.
4125 */
01f157d7 4126static int ddf_set_array_state(struct active_array *a, int consistent)
549e9569 4127{
4e5528c6
NB
4128 struct ddf_super *ddf = a->container->sb;
4129 int inst = a->info.container_member;
18a2f463 4130 int old = ddf->virt->entries[inst].state;
01f157d7
N
4131 if (consistent == 2) {
4132 /* Should check if a recovery should be started FIXME */
4133 consistent = 1;
b7941fd6 4134 if (!is_resync_complete(&a->info))
01f157d7
N
4135 consistent = 0;
4136 }
ed9d66aa
NB
4137 if (consistent)
4138 ddf->virt->entries[inst].state &= ~DDF_state_inconsistent;
4139 else
4e5528c6 4140 ddf->virt->entries[inst].state |= DDF_state_inconsistent;
18a2f463 4141 if (old != ddf->virt->entries[inst].state)
7d5a7ff3 4142 ddf_set_updates_pending(ddf);
18a2f463
NB
4143
4144 old = ddf->virt->entries[inst].init_state;
ed9d66aa 4145 ddf->virt->entries[inst].init_state &= ~DDF_initstate_mask;
b7941fd6 4146 if (is_resync_complete(&a->info))
ed9d66aa 4147 ddf->virt->entries[inst].init_state |= DDF_init_full;
b7941fd6 4148 else if (a->info.resync_start == 0)
ed9d66aa 4149 ddf->virt->entries[inst].init_state |= DDF_init_not;
4e5528c6 4150 else
ed9d66aa 4151 ddf->virt->entries[inst].init_state |= DDF_init_quick;
18a2f463 4152 if (old != ddf->virt->entries[inst].init_state)
7d5a7ff3 4153 ddf_set_updates_pending(ddf);
ed9d66aa 4154
b27336a2 4155 dprintf("ddf mark %d/%s (%d) %s %llu\n", inst,
4156 guid_str(ddf->virt->entries[inst].guid), a->curr_state,
4157 consistent?"clean":"dirty",
b7941fd6 4158 a->info.resync_start);
01f157d7 4159 return consistent;
fd7cde1b
DW
4160}
4161
5ec636b7 4162static int get_bvd_state(const struct ddf_super *ddf,
4163 const struct vd_config *vc)
4164{
4165 unsigned int i, n_bvd, working = 0;
a8173e43 4166 unsigned int n_prim = be16_to_cpu(vc->prim_elmnt_count);
5ec636b7 4167 int pd, st, state;
4168 for (i = 0; i < n_prim; i++) {
4169 if (!find_index_in_bvd(ddf, vc, i, &n_bvd))
4170 continue;
4171 pd = find_phys(ddf, vc->phys_refnum[n_bvd]);
4172 if (pd < 0)
4173 continue;
a8173e43 4174 st = be16_to_cpu(ddf->phys->entries[pd].state);
5ec636b7 4175 if ((st & (DDF_Online|DDF_Failed|DDF_Rebuilding))
4176 == DDF_Online)
4177 working++;
4178 }
4179
4180 state = DDF_state_degraded;
4181 if (working == n_prim)
4182 state = DDF_state_optimal;
4183 else
4184 switch (vc->prl) {
4185 case DDF_RAID0:
4186 case DDF_CONCAT:
4187 case DDF_JBOD:
4188 state = DDF_state_failed;
4189 break;
4190 case DDF_RAID1:
4191 if (working == 0)
4192 state = DDF_state_failed;
4193 else if (working >= 2)
4194 state = DDF_state_part_optimal;
4195 break;
4196 case DDF_RAID4:
4197 case DDF_RAID5:
4198 if (working < n_prim - 1)
4199 state = DDF_state_failed;
4200 break;
4201 case DDF_RAID6:
4202 if (working < n_prim - 2)
4203 state = DDF_state_failed;
4204 else if (working == n_prim - 1)
4205 state = DDF_state_part_optimal;
4206 break;
4207 }
4208 return state;
4209}
4210
0777d17d 4211static int secondary_state(int state, int other, int seclevel)
4212{
4213 if (state == DDF_state_optimal && other == DDF_state_optimal)
4214 return DDF_state_optimal;
4215 if (seclevel == DDF_2MIRRORED) {
4216 if (state == DDF_state_optimal || other == DDF_state_optimal)
4217 return DDF_state_part_optimal;
4218 if (state == DDF_state_failed && other == DDF_state_failed)
4219 return DDF_state_failed;
4220 return DDF_state_degraded;
4221 } else {
4222 if (state == DDF_state_failed || other == DDF_state_failed)
4223 return DDF_state_failed;
4224 if (state == DDF_state_degraded || other == DDF_state_degraded)
4225 return DDF_state_degraded;
4226 return DDF_state_part_optimal;
4227 }
4228}
4229
4230static int get_svd_state(const struct ddf_super *ddf, const struct vcl *vcl)
4231{
4232 int state = get_bvd_state(ddf, &vcl->conf);
4233 unsigned int i;
4234 for (i = 1; i < vcl->conf.sec_elmnt_count; i++) {
4235 state = secondary_state(
4236 state,
4237 get_bvd_state(ddf, vcl->other_bvds[i-1]),
4238 vcl->conf.srl);
4239 }
4240 return state;
4241}
4242
7a7cc504
NB
4243/*
4244 * The state of each disk is stored in the global phys_disk structure
4245 * in phys_disk.entries[n].state.
4246 * This makes various combinations awkward.
4247 * - When a device fails in any array, it must be failed in all arrays
4248 * that include a part of this device.
4249 * - When a component is rebuilding, we cannot include it officially in the
4250 * array unless this is the only array that uses the device.
4251 *
4252 * So: when transitioning:
4253 * Online -> failed, just set failed flag. monitor will propagate
4254 * spare -> online, the device might need to be added to the array.
4255 * spare -> failed, just set failed. Don't worry if in array or not.
4256 */
8d45d196 4257static void ddf_set_disk(struct active_array *a, int n, int state)
549e9569 4258{
7a7cc504 4259 struct ddf_super *ddf = a->container->sb;
baba3f4e 4260 unsigned int inst = a->info.container_member, n_bvd;
4261 struct vcl *vcl;
4262 struct vd_config *vc = find_vdcr(ddf, inst, (unsigned int)n,
4263 &n_bvd, &vcl);
4264 int pd;
e1316fab
N
4265 struct mdinfo *mdi;
4266 struct dl *dl;
7a7cc504 4267
ce6844b9 4268 dprintf("%s: %d to %x\n", __func__, n, state);
7a7cc504 4269 if (vc == NULL) {
2c514b71 4270 dprintf("ddf: cannot find instance %d!!\n", inst);
7a7cc504
NB
4271 return;
4272 }
e1316fab
N
4273 /* Find the matching slot in 'info'. */
4274 for (mdi = a->info.devs; mdi; mdi = mdi->next)
4275 if (mdi->disk.raid_disk == n)
4276 break;
ce6844b9
MW
4277 if (!mdi) {
4278 pr_err("%s: cannot find raid disk %d\n",
4279 __func__, n);
e1316fab 4280 return;
ce6844b9 4281 }
e1316fab
N
4282
4283 /* and find the 'dl' entry corresponding to that. */
4284 for (dl = ddf->dlist; dl; dl = dl->next)
77632af9
N
4285 if (mdi->state_fd >= 0 &&
4286 mdi->disk.major == dl->major &&
e1316fab
N
4287 mdi->disk.minor == dl->minor)
4288 break;
ce6844b9
MW
4289 if (!dl) {
4290 pr_err("%s: cannot find raid disk %d (%d/%d)\n",
4291 __func__, n,
4292 mdi->disk.major, mdi->disk.minor);
e1316fab 4293 return;
ce6844b9 4294 }
e1316fab 4295
baba3f4e 4296 pd = find_phys(ddf, vc->phys_refnum[n_bvd]);
e1316fab
N
4297 if (pd < 0 || pd != dl->pdnum) {
4298 /* disk doesn't currently exist or has changed.
4299 * If it is now in_sync, insert it. */
baba3f4e 4300 dprintf("%s: phys disk not found for %d: %d/%d ref %08x\n",
4301 __func__, dl->pdnum, dl->major, dl->minor,
60931cf9 4302 be32_to_cpu(dl->disk.refnum));
baba3f4e 4303 dprintf("%s: array %u disk %u ref %08x pd %d\n",
60931cf9 4304 __func__, inst, n_bvd,
4305 be32_to_cpu(vc->phys_refnum[n_bvd]), pd);
7a7cc504 4306 if ((state & DS_INSYNC) && ! (state & DS_FAULTY)) {
baba3f4e 4307 pd = dl->pdnum; /* FIXME: is this really correct ? */
4308 vc->phys_refnum[n_bvd] = dl->disk.refnum;
57a66662 4309 LBA_OFFSET(ddf, vc)[n_bvd] =
9d0c6b70 4310 cpu_to_be64(mdi->data_offset);
a8173e43 4311 be16_clear(ddf->phys->entries[pd].type,
4312 cpu_to_be16(DDF_Global_Spare));
4313 be16_set(ddf->phys->entries[pd].type,
4314 cpu_to_be16(DDF_Active_in_VD));
7d5a7ff3 4315 ddf_set_updates_pending(ddf);
7a7cc504
NB
4316 }
4317 } else {
a8173e43 4318 be16 old = ddf->phys->entries[pd].state;
7a7cc504 4319 if (state & DS_FAULTY)
a8173e43 4320 be16_set(ddf->phys->entries[pd].state,
4321 cpu_to_be16(DDF_Failed));
7a7cc504 4322 if (state & DS_INSYNC) {
a8173e43 4323 be16_set(ddf->phys->entries[pd].state,
4324 cpu_to_be16(DDF_Online));
4325 be16_clear(ddf->phys->entries[pd].state,
4326 cpu_to_be16(DDF_Rebuilding));
7a7cc504 4327 }
a8173e43 4328 if (!be16_eq(old, ddf->phys->entries[pd].state))
7d5a7ff3 4329 ddf_set_updates_pending(ddf);
7a7cc504
NB
4330 }
4331
ce6844b9
MW
4332 dprintf("ddf: set_disk %d (%08x) to %x->%02x\n", n,
4333 be32_to_cpu(dl->disk.refnum), state,
4334 be16_to_cpu(ddf->phys->entries[pd].state));
7e1432fb 4335
7a7cc504
NB
4336 /* Now we need to check the state of the array and update
4337 * virtual_disk.entries[n].state.
4338 * It needs to be one of "optimal", "degraded", "failed".
4339 * I don't understand 'deleted' or 'missing'.
4340 */
0777d17d 4341 state = get_svd_state(ddf, vcl);
7a7cc504 4342
18a2f463
NB
4343 if (ddf->virt->entries[inst].state !=
4344 ((ddf->virt->entries[inst].state & ~DDF_state_mask)
4345 | state)) {
4346
4347 ddf->virt->entries[inst].state =
4348 (ddf->virt->entries[inst].state & ~DDF_state_mask)
4349 | state;
7d5a7ff3 4350 ddf_set_updates_pending(ddf);
18a2f463 4351 }
7a7cc504 4352
549e9569
NB
4353}
4354
2e735d19 4355static void ddf_sync_metadata(struct supertype *st)
549e9569 4356{
7a7cc504
NB
4357
4358 /*
4359 * Write all data to all devices.
4360 * Later, we might be able to track whether only local changes
4361 * have been made, or whether any global data has been changed,
4362 * but ddf is sufficiently weird that it probably always
4363 * changes global data ....
4364 */
18a2f463
NB
4365 struct ddf_super *ddf = st->sb;
4366 if (!ddf->updates_pending)
4367 return;
4368 ddf->updates_pending = 0;
3921e41a 4369 __write_init_super_ddf(st);
2c514b71 4370 dprintf("ddf: sync_metadata\n");
549e9569
NB
4371}
4372
f646805e 4373static int del_from_conflist(struct vcl **list, const char *guid)
4374{
4375 struct vcl **p;
4376 int found = 0;
4377 for (p = list; p && *p; p = &((*p)->next))
4378 if (!memcmp((*p)->conf.guid, guid, DDF_GUID_LEN)) {
4379 found = 1;
4380 *p = (*p)->next;
4381 }
4382 return found;
4383}
4384
4385static int _kill_subarray_ddf(struct ddf_super *ddf, const char *guid)
4386{
4387 struct dl *dl;
4388 unsigned int vdnum, i;
4389 vdnum = find_vde_by_guid(ddf, guid);
4390 if (vdnum == DDF_NOTFOUND) {
4391 pr_err("%s: could not find VD %s\n", __func__,
4392 guid_str(guid));
4393 return -1;
4394 }
4395 if (del_from_conflist(&ddf->conflist, guid) == 0) {
4396 pr_err("%s: could not find conf %s\n", __func__,
4397 guid_str(guid));
4398 return -1;
4399 }
4400 for (dl = ddf->dlist; dl; dl = dl->next)
4401 for (i = 0; i < ddf->max_part; i++)
4402 if (dl->vlist[i] != NULL &&
4403 !memcmp(dl->vlist[i]->conf.guid, guid,
4404 DDF_GUID_LEN))
4405 dl->vlist[i] = NULL;
4406 memset(ddf->virt->entries[vdnum].guid, 0xff, DDF_GUID_LEN);
4407 dprintf("%s: deleted %s\n", __func__, guid_str(guid));
4408 return 0;
4409}
4410
4411static int kill_subarray_ddf(struct supertype *st)
4412{
4413 struct ddf_super *ddf = st->sb;
4414 /*
4415 * currentconf is set in container_content_ddf,
4416 * called with subarray arg
4417 */
4418 struct vcl *victim = ddf->currentconf;
4419 struct vd_config *conf;
4420 ddf->currentconf = NULL;
4421 unsigned int vdnum;
4422 if (!victim) {
4423 pr_err("%s: nothing to kill\n", __func__);
4424 return -1;
4425 }
4426 conf = &victim->conf;
4427 vdnum = find_vde_by_guid(ddf, conf->guid);
4428 if (vdnum == DDF_NOTFOUND) {
4429 pr_err("%s: could not find VD %s\n", __func__,
4430 guid_str(conf->guid));
4431 return -1;
4432 }
4433 if (st->update_tail) {
4434 struct virtual_disk *vd;
4435 int len = sizeof(struct virtual_disk)
4436 + sizeof(struct virtual_entry);
4437 vd = xmalloc(len);
4438 if (vd == NULL) {
4439 pr_err("%s: failed to allocate %d bytes\n", __func__,
4440 len);
4441 return -1;
4442 }
4443 memset(vd, 0 , len);
4444 vd->magic = DDF_VIRT_RECORDS_MAGIC;
a8173e43 4445 vd->populated_vdes = cpu_to_be16(0);
f646805e 4446 memcpy(vd->entries[0].guid, conf->guid, DDF_GUID_LEN);
4447 /* we use DDF_state_deleted as marker */
4448 vd->entries[0].state = DDF_state_deleted;
4449 append_metadata_update(st, vd, len);
6a350d82 4450 } else {
f646805e 4451 _kill_subarray_ddf(ddf, conf->guid);
6a350d82 4452 ddf_set_updates_pending(ddf);
4453 ddf_sync_metadata(st);
4454 }
f646805e 4455 return 0;
4456}
4457
c5943560 4458static void copy_matching_bvd(struct ddf_super *ddf,
4459 struct vd_config *conf,
4460 const struct metadata_update *update)
4461{
4462 unsigned int mppe =
a8173e43 4463 be16_to_cpu(ddf->anchor.max_primary_element_entries);
c5943560 4464 unsigned int len = ddf->conf_rec_len * 512;
4465 char *p;
4466 struct vd_config *vc;
4467 for (p = update->buf; p < update->buf + update->len; p += len) {
4468 vc = (struct vd_config *) p;
4469 if (vc->sec_elmnt_seq == conf->sec_elmnt_seq) {
4470 memcpy(conf->phys_refnum, vc->phys_refnum,
4471 mppe * (sizeof(__u32) + sizeof(__u64)));
4472 return;
4473 }
4474 }
4475 pr_err("%s: no match for BVD %d of %s in update\n", __func__,
4476 conf->sec_elmnt_seq, guid_str(conf->guid));
4477}
4478
88c164f4
NB
4479static void ddf_process_update(struct supertype *st,
4480 struct metadata_update *update)
4481{
4482 /* Apply this update to the metadata.
4483 * The first 4 bytes are a DDF_*_MAGIC which guides
4484 * our actions.
4485 * Possible update are:
4486 * DDF_PHYS_RECORDS_MAGIC
4dd968cc
N
4487 * Add a new physical device or remove an old one.
4488 * Changes to this record only happen implicitly.
88c164f4
NB
4489 * used_pdes is the device number.
4490 * DDF_VIRT_RECORDS_MAGIC
4491 * Add a new VD. Possibly also change the 'access' bits.
4492 * populated_vdes is the entry number.
4493 * DDF_VD_CONF_MAGIC
4494 * New or updated VD. the VIRT_RECORD must already
4495 * exist. For an update, phys_refnum and lba_offset
4496 * (at least) are updated, and the VD_CONF must
4497 * be written to precisely those devices listed with
4498 * a phys_refnum.
4499 * DDF_SPARE_ASSIGN_MAGIC
4500 * replacement Spare Assignment Record... but for which device?
4501 *
4502 * So, e.g.:
4503 * - to create a new array, we send a VIRT_RECORD and
4504 * a VD_CONF. Then assemble and start the array.
4505 * - to activate a spare we send a VD_CONF to add the phys_refnum
4506 * and offset. This will also mark the spare as active with
4507 * a spare-assignment record.
4508 */
4509 struct ddf_super *ddf = st->sb;
60931cf9 4510 be32 *magic = (be32 *)update->buf;
88c164f4
NB
4511 struct phys_disk *pd;
4512 struct virtual_disk *vd;
4513 struct vd_config *vc;
4514 struct vcl *vcl;
4515 struct dl *dl;
f21e18ca 4516 unsigned int ent;
c5943560 4517 unsigned int pdnum, pd2, len;
88c164f4 4518
60931cf9 4519 dprintf("Process update %x\n", be32_to_cpu(*magic));
7e1432fb 4520
60931cf9 4521 if (be32_eq(*magic, DDF_PHYS_RECORDS_MAGIC)) {
88c164f4
NB
4522
4523 if (update->len != (sizeof(struct phys_disk) +
4524 sizeof(struct phys_disk_entry)))
4525 return;
4526 pd = (struct phys_disk*)update->buf;
4527
a8173e43 4528 ent = be16_to_cpu(pd->used_pdes);
4529 if (ent >= be16_to_cpu(ddf->phys->max_pdes))
88c164f4 4530 return;
a8173e43 4531 if (be16_and(pd->entries[0].state, cpu_to_be16(DDF_Missing))) {
4dd968cc
N
4532 struct dl **dlp;
4533 /* removing this disk. */
a8173e43 4534 be16_set(ddf->phys->entries[ent].state,
4535 cpu_to_be16(DDF_Missing));
4dd968cc
N
4536 for (dlp = &ddf->dlist; *dlp; dlp = &(*dlp)->next) {
4537 struct dl *dl = *dlp;
4538 if (dl->pdnum == (signed)ent) {
4539 close(dl->fd);
4540 dl->fd = -1;
4541 /* FIXME this doesn't free
4542 * dl->devname */
4543 update->space = dl;
4544 *dlp = dl->next;
4545 break;
4546 }
4547 }
7d5a7ff3 4548 ddf_set_updates_pending(ddf);
4dd968cc
N
4549 return;
4550 }
88c164f4
NB
4551 if (!all_ff(ddf->phys->entries[ent].guid))
4552 return;
4553 ddf->phys->entries[ent] = pd->entries[0];
a8173e43 4554 ddf->phys->used_pdes = cpu_to_be16
4555 (1 + be16_to_cpu(ddf->phys->used_pdes));
7d5a7ff3 4556 ddf_set_updates_pending(ddf);
2cc2983d
N
4557 if (ddf->add_list) {
4558 struct active_array *a;
4559 struct dl *al = ddf->add_list;
4560 ddf->add_list = al->next;
4561
4562 al->next = ddf->dlist;
4563 ddf->dlist = al;
4564
4565 /* As a device has been added, we should check
4566 * for any degraded devices that might make
4567 * use of this spare */
4568 for (a = st->arrays ; a; a=a->next)
4569 a->check_degraded = 1;
4570 }
60931cf9 4571 } else if (be32_eq(*magic, DDF_VIRT_RECORDS_MAGIC)) {
88c164f4
NB
4572
4573 if (update->len != (sizeof(struct virtual_disk) +
4574 sizeof(struct virtual_entry)))
4575 return;
4576 vd = (struct virtual_disk*)update->buf;
4577
f646805e 4578 if (vd->entries[0].state == DDF_state_deleted) {
4579 if (_kill_subarray_ddf(ddf, vd->entries[0].guid))
4580 return;
4581 } else {
4582
6a7e7ecc 4583 ent = find_vde_by_guid(ddf, vd->entries[0].guid);
4584 if (ent != DDF_NOTFOUND) {
4585 dprintf("%s: VD %s exists already in slot %d\n",
4586 __func__, guid_str(vd->entries[0].guid),
4587 ent);
4588 return;
4589 }
f646805e 4590 ent = find_unused_vde(ddf);
4591 if (ent == DDF_NOTFOUND)
4592 return;
4593 ddf->virt->entries[ent] = vd->entries[0];
4594 ddf->virt->populated_vdes =
a8173e43 4595 cpu_to_be16(
4596 1 + be16_to_cpu(
f646805e 4597 ddf->virt->populated_vdes));
ed5ff7a2 4598 dprintf("%s: added VD %s in slot %d(s=%02x i=%02x)\n",
4599 __func__, guid_str(vd->entries[0].guid), ent,
4600 ddf->virt->entries[ent].state,
4601 ddf->virt->entries[ent].init_state);
f646805e 4602 }
7d5a7ff3 4603 ddf_set_updates_pending(ddf);
60931cf9 4604 }
88c164f4 4605
60931cf9 4606 else if (be32_eq(*magic, DDF_VD_CONF_MAGIC)) {
88c164f4 4607 vc = (struct vd_config*)update->buf;
c5943560 4608 len = ddf->conf_rec_len * 512;
4609 if ((unsigned int)update->len != len * vc->sec_elmnt_count) {
4610 pr_err("%s: %s: insufficient data (%d) for %u BVDs\n",
4611 __func__, guid_str(vc->guid), update->len,
4612 vc->sec_elmnt_count);
4613 return;
4614 }
88c164f4
NB
4615 for (vcl = ddf->conflist; vcl ; vcl = vcl->next)
4616 if (memcmp(vcl->conf.guid, vc->guid, DDF_GUID_LEN) == 0)
4617 break;
ed5ff7a2 4618 dprintf("%s: conf update for %s (%s)\n", __func__,
4619 guid_str(vc->guid), (vcl ? "old" : "new"));
88c164f4
NB
4620 if (vcl) {
4621 /* An update, just copy the phys_refnum and lba_offset
4622 * fields
4623 */
c5943560 4624 unsigned int i;
0847945b 4625 unsigned int k;
c5943560 4626 copy_matching_bvd(ddf, &vcl->conf, update);
0847945b
MW
4627 for (k = 0; k < be16_to_cpu(vc->prim_elmnt_count); k++)
4628 dprintf("BVD %u has %08x at %llu\n", 0,
4629 be32_to_cpu(vcl->conf.phys_refnum[k]),
4630 be64_to_cpu(LBA_OFFSET(ddf,
4631 &vcl->conf)[k]));
4632 for (i = 1; i < vc->sec_elmnt_count; i++) {
c5943560 4633 copy_matching_bvd(ddf, vcl->other_bvds[i-1],
4634 update);
0847945b
MW
4635 for (k = 0; k < be16_to_cpu(
4636 vc->prim_elmnt_count); k++)
4637 dprintf("BVD %u has %08x at %llu\n", i,
4638 be32_to_cpu
4639 (vcl->other_bvds[i-1]->
4640 phys_refnum[k]),
4641 be64_to_cpu
4642 (LBA_OFFSET
4643 (ddf,
4644 vcl->other_bvds[i-1])[k]));
4645 }
88c164f4
NB
4646 } else {
4647 /* A new VD_CONF */
c5943560 4648 unsigned int i;
e6b9548d
DW
4649 if (!update->space)
4650 return;
88c164f4
NB
4651 vcl = update->space;
4652 update->space = NULL;
4653 vcl->next = ddf->conflist;
c5943560 4654 memcpy(&vcl->conf, vc, len);
fb9d0acb 4655 ent = find_vde_by_guid(ddf, vc->guid);
4656 if (ent == DDF_NOTFOUND)
4657 return;
4658 vcl->vcnum = ent;
88c164f4 4659 ddf->conflist = vcl;
c5943560 4660 for (i = 1; i < vc->sec_elmnt_count; i++)
4661 memcpy(vcl->other_bvds[i-1],
4662 update->buf + len * i, len);
88c164f4 4663 }
c7079c84
N
4664 /* Set DDF_Transition on all Failed devices - to help
4665 * us detect those that are no longer in use
4666 */
a8173e43 4667 for (pdnum = 0; pdnum < be16_to_cpu(ddf->phys->used_pdes);
4668 pdnum++)
4669 if (be16_and(ddf->phys->entries[pdnum].state,
4670 cpu_to_be16(DDF_Failed)))
4671 be16_set(ddf->phys->entries[pdnum].state,
4672 cpu_to_be16(DDF_Transition));
88c164f4
NB
4673 /* Now make sure vlist is correct for each dl. */
4674 for (dl = ddf->dlist; dl; dl = dl->next) {
f21e18ca 4675 unsigned int vn = 0;
8401644c 4676 int in_degraded = 0;
5838fccd 4677 for (vcl = ddf->conflist; vcl ; vcl = vcl->next) {
4678 unsigned int dn, ibvd;
4679 const struct vd_config *conf;
4680 int vstate;
4681 dn = get_pd_index_from_refnum(vcl,
4682 dl->disk.refnum,
4683 ddf->mppe,
4684 &conf, &ibvd);
4685 if (dn == DDF_NOTFOUND)
4686 continue;
4687 dprintf("dev %d/%08x has %s (sec=%u) at %d\n",
ad60eea1 4688 dl->pdnum,
60931cf9 4689 be32_to_cpu(dl->disk.refnum),
5838fccd 4690 guid_str(conf->guid),
4691 conf->sec_elmnt_seq, vn);
4692 /* Clear the Transition flag */
a8173e43 4693 if (be16_and
4694 (ddf->phys->entries[dl->pdnum].state,
4695 cpu_to_be16(DDF_Failed)))
4696 be16_clear(ddf->phys
4697 ->entries[dl->pdnum].state,
4698 cpu_to_be16(DDF_Transition));
5838fccd 4699 dl->vlist[vn++] = vcl;
4700 vstate = ddf->virt->entries[vcl->vcnum].state
4701 & DDF_state_mask;
4702 if (vstate == DDF_state_degraded ||
4703 vstate == DDF_state_part_optimal)
4704 in_degraded = 1;
4705 }
88c164f4
NB
4706 while (vn < ddf->max_part)
4707 dl->vlist[vn++] = NULL;
7e1432fb 4708 if (dl->vlist[0]) {
a8173e43 4709 be16_clear(ddf->phys->entries[dl->pdnum].type,
4710 cpu_to_be16(DDF_Global_Spare));
4711 if (!be16_and(ddf->phys
4712 ->entries[dl->pdnum].type,
4713 cpu_to_be16(DDF_Active_in_VD))) {
4714 be16_set(ddf->phys
4715 ->entries[dl->pdnum].type,
4716 cpu_to_be16(DDF_Active_in_VD));
613b0d17 4717 if (in_degraded)
a8173e43 4718 be16_set(ddf->phys
4719 ->entries[dl->pdnum]
4720 .state,
4721 cpu_to_be16
4722 (DDF_Rebuilding));
613b0d17 4723 }
7e1432fb
NB
4724 }
4725 if (dl->spare) {
a8173e43 4726 be16_clear(ddf->phys->entries[dl->pdnum].type,
4727 cpu_to_be16(DDF_Global_Spare));
4728 be16_set(ddf->phys->entries[dl->pdnum].type,
4729 cpu_to_be16(DDF_Spare));
7e1432fb
NB
4730 }
4731 if (!dl->vlist[0] && !dl->spare) {
a8173e43 4732 be16_set(ddf->phys->entries[dl->pdnum].type,
4733 cpu_to_be16(DDF_Global_Spare));
4734 be16_clear(ddf->phys->entries[dl->pdnum].type,
4735 cpu_to_be16(DDF_Spare));
4736 be16_clear(ddf->phys->entries[dl->pdnum].type,
4737 cpu_to_be16(DDF_Active_in_VD));
7e1432fb 4738 }
88c164f4 4739 }
c7079c84
N
4740
4741 /* Now remove any 'Failed' devices that are not part
4742 * of any VD. They will have the Transition flag set.
4743 * Once done, we need to update all dl->pdnum numbers.
4744 */
4745 pd2 = 0;
a8173e43 4746 for (pdnum = 0; pdnum < be16_to_cpu(ddf->phys->used_pdes);
92939eb2 4747 pdnum++) {
a8173e43 4748 if (be16_and(ddf->phys->entries[pdnum].state,
4749 cpu_to_be16(DDF_Failed))
4750 && be16_and(ddf->phys->entries[pdnum].state,
92939eb2
N
4751 cpu_to_be16(DDF_Transition))) {
4752 /* skip this one unless in dlist*/
4753 for (dl = ddf->dlist; dl; dl = dl->next)
4754 if (dl->pdnum == (int)pdnum)
4755 break;
4756 if (!dl)
4757 continue;
4758 }
4759 if (pdnum == pd2)
c7079c84
N
4760 pd2++;
4761 else {
a8173e43 4762 ddf->phys->entries[pd2] =
4763 ddf->phys->entries[pdnum];
c7079c84
N
4764 for (dl = ddf->dlist; dl; dl = dl->next)
4765 if (dl->pdnum == (int)pdnum)
4766 dl->pdnum = pd2;
4767 pd2++;
4768 }
92939eb2 4769 }
a8173e43 4770 ddf->phys->used_pdes = cpu_to_be16(pd2);
c7079c84 4771 while (pd2 < pdnum) {
a8173e43 4772 memset(ddf->phys->entries[pd2].guid, 0xff,
4773 DDF_GUID_LEN);
c7079c84
N
4774 pd2++;
4775 }
4776
7d5a7ff3 4777 ddf_set_updates_pending(ddf);
88c164f4 4778 }
60931cf9 4779 /* case DDF_SPARE_ASSIGN_MAGIC */
88c164f4
NB
4780}
4781
edd8d13c
NB
4782static void ddf_prepare_update(struct supertype *st,
4783 struct metadata_update *update)
4784{
4785 /* This update arrived at managemon.
4786 * We are about to pass it to monitor.
4787 * If a malloc is needed, do it here.
4788 */
4789 struct ddf_super *ddf = st->sb;
60931cf9 4790 be32 *magic = (be32 *)update->buf;
4791 if (be32_eq(*magic, DDF_VD_CONF_MAGIC)) {
c5943560 4792 struct vcl *vcl;
4793 struct vd_config *conf = (struct vd_config *) update->buf;
e6b9548d 4794 if (posix_memalign(&update->space, 512,
613b0d17 4795 offsetof(struct vcl, conf)
c5943560 4796 + ddf->conf_rec_len * 512) != 0) {
4797 update->space = NULL;
4798 return;
4799 }
4800 vcl = update->space;
4801 vcl->conf.sec_elmnt_count = conf->sec_elmnt_count;
4802 if (alloc_other_bvds(ddf, vcl) != 0) {
4803 free(update->space);
e6b9548d 4804 update->space = NULL;
c5943560 4805 }
4806 }
edd8d13c
NB
4807}
4808
7733b91d 4809/*
4810 * Check degraded state of a RAID10.
4811 * returns 2 for good, 1 for degraded, 0 for failed, and -1 for error
4812 */
4813static int raid10_degraded(struct mdinfo *info)
4814{
4815 int n_prim, n_bvds;
4816 int i;
9591a2de 4817 struct mdinfo *d;
7733b91d 4818 char *found;
4819 int ret = -1;
4820
7733b91d 4821 n_prim = info->array.layout & ~0x100;
4822 n_bvds = info->array.raid_disks / n_prim;
4823 found = xmalloc(n_bvds);
4824 if (found == NULL)
4825 return ret;
4826 memset(found, 0, n_bvds);
4827 for (d = info->devs; d; d = d->next) {
4828 i = d->disk.raid_disk / n_prim;
4829 if (i >= n_bvds) {
4830 pr_err("%s: BUG: invalid raid disk\n", __func__);
4831 goto out;
4832 }
4833 if (d->state_fd > 0)
4834 found[i]++;
4835 }
4836 ret = 2;
4837 for (i = 0; i < n_bvds; i++)
4838 if (!found[i]) {
4839 dprintf("%s: BVD %d/%d failed\n", __func__, i, n_bvds);
4840 ret = 0;
4841 goto out;
4842 } else if (found[i] < n_prim) {
4843 dprintf("%s: BVD %d/%d degraded\n", __func__, i,
4844 n_bvds);
4845 ret = 1;
4846 }
4847out:
4848 free(found);
4849 return ret;
4850}
4851
7e1432fb
NB
4852/*
4853 * Check if the array 'a' is degraded but not failed.
4854 * If it is, find as many spares as are available and needed and
4855 * arrange for their inclusion.
4856 * We only choose devices which are not already in the array,
4857 * and prefer those with a spare-assignment to this array.
4858 * otherwise we choose global spares - assuming always that
4859 * there is enough room.
4860 * For each spare that we assign, we return an 'mdinfo' which
4861 * describes the position for the device in the array.
4862 * We also add to 'updates' a DDF_VD_CONF_MAGIC update with
4863 * the new phys_refnum and lba_offset values.
4864 *
4865 * Only worry about BVDs at the moment.
4866 */
4867static struct mdinfo *ddf_activate_spare(struct active_array *a,
4868 struct metadata_update **updates)
4869{
4870 int working = 0;
4871 struct mdinfo *d;
4872 struct ddf_super *ddf = a->container->sb;
4873 int global_ok = 0;
4874 struct mdinfo *rv = NULL;
4875 struct mdinfo *di;
4876 struct metadata_update *mu;
4877 struct dl *dl;
4878 int i;
0c78849f 4879 unsigned int j;
baba3f4e 4880 struct vcl *vcl;
7e1432fb 4881 struct vd_config *vc;
baba3f4e 4882 unsigned int n_bvd;
7e1432fb 4883
7e1432fb
NB
4884 for (d = a->info.devs ; d ; d = d->next) {
4885 if ((d->curr_state & DS_FAULTY) &&
613b0d17 4886 d->state_fd >= 0)
7e1432fb
NB
4887 /* wait for Removal to happen */
4888 return NULL;
4889 if (d->state_fd >= 0)
4890 working ++;
4891 }
4892
7733b91d 4893 dprintf("%s: working=%d (%d) level=%d\n", __func__, working,
a8173e43 4894 a->info.array.raid_disks,
2c514b71 4895 a->info.array.level);
7e1432fb
NB
4896 if (working == a->info.array.raid_disks)
4897 return NULL; /* array not degraded */
4898 switch (a->info.array.level) {
4899 case 1:
4900 if (working == 0)
4901 return NULL; /* failed */
4902 break;
4903 case 4:
4904 case 5:
4905 if (working < a->info.array.raid_disks - 1)
4906 return NULL; /* failed */
4907 break;
4908 case 6:
4909 if (working < a->info.array.raid_disks - 2)
4910 return NULL; /* failed */
4911 break;
7733b91d 4912 case 10:
4913 if (raid10_degraded(&a->info) < 1)
4914 return NULL;
4915 break;
7e1432fb
NB
4916 default: /* concat or stripe */
4917 return NULL; /* failed */
4918 }
4919
4920 /* For each slot, if it is not working, find a spare */
4921 dl = ddf->dlist;
4922 for (i = 0; i < a->info.array.raid_disks; i++) {
4923 for (d = a->info.devs ; d ; d = d->next)
4924 if (d->disk.raid_disk == i)
4925 break;
2c514b71 4926 dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
7e1432fb
NB
4927 if (d && (d->state_fd >= 0))
4928 continue;
4929
4930 /* OK, this device needs recovery. Find a spare */
4931 again:
4932 for ( ; dl ; dl = dl->next) {
4933 unsigned long long esize;
4934 unsigned long long pos;
4935 struct mdinfo *d2;
4936 int is_global = 0;
4937 int is_dedicated = 0;
4938 struct extent *ex;
f21e18ca 4939 unsigned int j;
6f56dbb9
MW
4940 be16 state = ddf->phys->entries[dl->pdnum].state;
4941 if (be16_and(state,
4942 cpu_to_be16(DDF_Failed|DDF_Missing)) ||
4943 !be16_and(state,
4944 cpu_to_be16(DDF_Online)))
4945 continue;
4946
7e1432fb
NB
4947 /* If in this array, skip */
4948 for (d2 = a->info.devs ; d2 ; d2 = d2->next)
7590d562
N
4949 if (d2->state_fd >= 0 &&
4950 d2->disk.major == dl->major &&
7e1432fb 4951 d2->disk.minor == dl->minor) {
2a645ee2
MW
4952 dprintf("%x:%x (%08x) already in array\n",
4953 dl->major, dl->minor,
4954 be32_to_cpu(dl->disk.refnum));
7e1432fb
NB
4955 break;
4956 }
4957 if (d2)
4958 continue;
a8173e43 4959 if (be16_and(ddf->phys->entries[dl->pdnum].type,
4960 cpu_to_be16(DDF_Spare))) {
7e1432fb
NB
4961 /* Check spare assign record */
4962 if (dl->spare) {
4963 if (dl->spare->type & DDF_spare_dedicated) {
4964 /* check spare_ents for guid */
4965 for (j = 0 ;
a8173e43 4966 j < be16_to_cpu
4967 (dl->spare
4968 ->populated);
7e1432fb
NB
4969 j++) {
4970 if (memcmp(dl->spare->spare_ents[j].guid,
4971 ddf->virt->entries[a->info.container_member].guid,
4972 DDF_GUID_LEN) == 0)
4973 is_dedicated = 1;
4974 }
4975 } else
4976 is_global = 1;
4977 }
a8173e43 4978 } else if (be16_and(ddf->phys->entries[dl->pdnum].type,
4979 cpu_to_be16(DDF_Global_Spare))) {
7e1432fb 4980 is_global = 1;
a8173e43 4981 } else if (!be16_and(ddf->phys
4982 ->entries[dl->pdnum].state,
4983 cpu_to_be16(DDF_Failed))) {
e0e7aeaa
N
4984 /* we can possibly use some of this */
4985 is_global = 1;
7e1432fb
NB
4986 }
4987 if ( ! (is_dedicated ||
4988 (is_global && global_ok))) {
2c514b71 4989 dprintf("%x:%x not suitable: %d %d\n", dl->major, dl->minor,
613b0d17 4990 is_dedicated, is_global);
7e1432fb
NB
4991 continue;
4992 }
4993
4994 /* We are allowed to use this device - is there space?
4995 * We need a->info.component_size sectors */
4996 ex = get_extents(ddf, dl);
4997 if (!ex) {
2c514b71 4998 dprintf("cannot get extents\n");
7e1432fb
NB
4999 continue;
5000 }
5001 j = 0; pos = 0;
5002 esize = 0;
5003
5004 do {
5005 esize = ex[j].start - pos;
5006 if (esize >= a->info.component_size)
5007 break;
e5cc7d46
N
5008 pos = ex[j].start + ex[j].size;
5009 j++;
5010 } while (ex[j-1].size);
7e1432fb
NB
5011
5012 free(ex);
5013 if (esize < a->info.component_size) {
e5cc7d46
N
5014 dprintf("%x:%x has no room: %llu %llu\n",
5015 dl->major, dl->minor,
2c514b71 5016 esize, a->info.component_size);
7e1432fb
NB
5017 /* No room */
5018 continue;
5019 }
5020
5021 /* Cool, we have a device with some space at pos */
503975b9 5022 di = xcalloc(1, sizeof(*di));
7e1432fb
NB
5023 di->disk.number = i;
5024 di->disk.raid_disk = i;
5025 di->disk.major = dl->major;
5026 di->disk.minor = dl->minor;
5027 di->disk.state = 0;
d23534e4 5028 di->recovery_start = 0;
7e1432fb
NB
5029 di->data_offset = pos;
5030 di->component_size = a->info.component_size;
5031 di->container_member = dl->pdnum;
5032 di->next = rv;
5033 rv = di;
2a645ee2
MW
5034 dprintf("%x:%x (%08x) to be %d at %llu\n",
5035 dl->major, dl->minor,
5036 be32_to_cpu(dl->disk.refnum), i, pos);
7e1432fb
NB
5037
5038 break;
5039 }
5040 if (!dl && ! global_ok) {
5041 /* not enough dedicated spares, try global */
5042 global_ok = 1;
5043 dl = ddf->dlist;
5044 goto again;
5045 }
5046 }
5047
5048 if (!rv)
5049 /* No spares found */
5050 return rv;
5051 /* Now 'rv' has a list of devices to return.
5052 * Create a metadata_update record to update the
5053 * phys_refnum and lba_offset values
5054 */
bb925ff0 5055 vc = find_vdcr(ddf, a->info.container_member, rv->disk.raid_disk,
0c78849f 5056 &n_bvd, &vcl);
5057 if (vc == NULL)
5058 return NULL;
5059
503975b9
N
5060 mu = xmalloc(sizeof(*mu));
5061 if (posix_memalign(&mu->space, 512, sizeof(struct vcl)) != 0) {
79244939
DW
5062 free(mu);
5063 mu = NULL;
5064 }
0c78849f 5065
5066 mu->len = ddf->conf_rec_len * 512 * vcl->conf.sec_elmnt_count;
5067 mu->buf = xmalloc(mu->len);
7590d562 5068 mu->space = NULL;
f50ae22e 5069 mu->space_list = NULL;
7e1432fb 5070 mu->next = *updates;
0c78849f 5071 memcpy(mu->buf, &vcl->conf, ddf->conf_rec_len * 512);
5072 for (j = 1; j < vcl->conf.sec_elmnt_count; j++)
5073 memcpy(mu->buf + j * ddf->conf_rec_len * 512,
5074 vcl->other_bvds[j-1], ddf->conf_rec_len * 512);
7e1432fb
NB
5075
5076 vc = (struct vd_config*)mu->buf;
7e1432fb 5077 for (di = rv ; di ; di = di->next) {
0c78849f 5078 unsigned int i_sec, i_prim;
5079 i_sec = di->disk.raid_disk
5080 / be16_to_cpu(vcl->conf.prim_elmnt_count);
5081 i_prim = di->disk.raid_disk
5082 % be16_to_cpu(vcl->conf.prim_elmnt_count);
5083 vc = (struct vd_config *)(mu->buf
5084 + i_sec * ddf->conf_rec_len * 512);
5085 for (dl = ddf->dlist; dl; dl = dl->next)
5086 if (dl->major == di->disk.major
5087 && dl->minor == di->disk.minor)
5088 break;
5089 if (!dl) {
5090 pr_err("%s: BUG: can't find disk %d (%d/%d)\n",
5091 __func__, di->disk.raid_disk,
5092 di->disk.major, di->disk.minor);
5093 return NULL;
5094 }
5095 vc->phys_refnum[i_prim] = ddf->phys->entries[dl->pdnum].refnum;
5096 LBA_OFFSET(ddf, vc)[i_prim] = cpu_to_be64(di->data_offset);
2a645ee2
MW
5097 dprintf("BVD %u gets %u: %08x at %llu\n", i_sec, i_prim,
5098 be32_to_cpu(vc->phys_refnum[i_prim]),
5099 be64_to_cpu(LBA_OFFSET(ddf, vc)[i_prim]));
7e1432fb
NB
5100 }
5101 *updates = mu;
5102 return rv;
5103}
0e600426 5104#endif /* MDASSEMBLE */
7e1432fb 5105
b640a252
N
5106static int ddf_level_to_layout(int level)
5107{
5108 switch(level) {
5109 case 0:
5110 case 1:
5111 return 0;
5112 case 5:
5113 return ALGORITHM_LEFT_SYMMETRIC;
5114 case 6:
5115 return ALGORITHM_ROTATING_N_CONTINUE;
5116 case 10:
5117 return 0x102;
5118 default:
5119 return UnSet;
5120 }
5121}
5122
30f58b22
DW
5123static void default_geometry_ddf(struct supertype *st, int *level, int *layout, int *chunk)
5124{
5125 if (level && *level == UnSet)
5126 *level = LEVEL_CONTAINER;
5127
5128 if (level && layout && *layout == UnSet)
5129 *layout = ddf_level_to_layout(*level);
5130}
5131
a322f70c
DW
5132struct superswitch super_ddf = {
5133#ifndef MDASSEMBLE
5134 .examine_super = examine_super_ddf,
5135 .brief_examine_super = brief_examine_super_ddf,
4737ae25 5136 .brief_examine_subarrays = brief_examine_subarrays_ddf,
bceedeec 5137 .export_examine_super = export_examine_super_ddf,
a322f70c
DW
5138 .detail_super = detail_super_ddf,
5139 .brief_detail_super = brief_detail_super_ddf,
5140 .validate_geometry = validate_geometry_ddf,
78e44928 5141 .write_init_super = write_init_super_ddf,
0e600426 5142 .add_to_super = add_to_super_ddf,
4dd968cc 5143 .remove_from_super = remove_from_super_ddf,
2b959fbf 5144 .load_container = load_container_ddf,
74db60b0 5145 .copy_metadata = copy_metadata_ddf,
4441541f 5146 .kill_subarray = kill_subarray_ddf,
a322f70c
DW
5147#endif
5148 .match_home = match_home_ddf,
5149 .uuid_from_super= uuid_from_super_ddf,
5150 .getinfo_super = getinfo_super_ddf,
5151 .update_super = update_super_ddf,
5152
5153 .avail_size = avail_size_ddf,
5154
a19c88b8
NB
5155 .compare_super = compare_super_ddf,
5156
a322f70c 5157 .load_super = load_super_ddf,
ba7eb04f 5158 .init_super = init_super_ddf,
955e9ea1 5159 .store_super = store_super_ddf,
a322f70c
DW
5160 .free_super = free_super_ddf,
5161 .match_metadata_desc = match_metadata_desc_ddf,
78e44928 5162 .container_content = container_content_ddf,
30f58b22 5163 .default_geometry = default_geometry_ddf,
a322f70c 5164
a322f70c 5165 .external = 1,
549e9569 5166
0e600426 5167#ifndef MDASSEMBLE
549e9569
NB
5168/* for mdmon */
5169 .open_new = ddf_open_new,
ed9d66aa 5170 .set_array_state= ddf_set_array_state,
549e9569
NB
5171 .set_disk = ddf_set_disk,
5172 .sync_metadata = ddf_sync_metadata,
88c164f4 5173 .process_update = ddf_process_update,
edd8d13c 5174 .prepare_update = ddf_prepare_update,
7e1432fb 5175 .activate_spare = ddf_activate_spare,
0e600426 5176#endif
4cce4069 5177 .name = "ddf",
a322f70c 5178};